Skip to content

Commit 466c626

Browse files
authored
Merge pull request #20974 from calixteman/bug2025674
Don't walk the children of a node having some attached MathML (bug 2025674)
2 parents 377f6d8 + eff11e8 commit 466c626

4 files changed

Lines changed: 98 additions & 92 deletions

File tree

test/integration/accessibility_spec.mjs

Lines changed: 81 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -320,37 +320,22 @@ describe("accessibility", () => {
320320
it("must check that the MathML is correctly inserted", async () => {
321321
await Promise.all(
322322
pages.map(async ([browserName, page]) => {
323-
const isSanitizerSupported = await page.evaluate(() => {
324-
try {
325-
// eslint-disable-next-line no-undef
326-
return typeof Sanitizer !== "undefined";
327-
} catch {
328-
return false;
329-
}
330-
});
331-
if (isSanitizerSupported) {
332-
const mathML = await page.$eval(
333-
"span.structTree span[aria-owns='p58R_mc13'] > math",
334-
el => el?.innerHTML ?? ""
335-
);
336-
expect(mathML)
337-
.withContext(`In ${browserName}`)
338-
.toEqual(
339-
` <msqrt><msup><mi>x</mi><mn>2</mn></msup></msqrt> <mo>=</mo> <mrow intent="absolute-value($x)"><mo>|</mo><mi arg="x">x</mi><mo>|</mo></mrow> `
340-
);
341-
342-
// Check that the math corresponding element is hidden in the text
343-
// layer.
344-
const ariaHidden = await page.$eval("span#p58R_mc13", el =>
345-
el.getAttribute("aria-hidden")
346-
);
347-
expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true");
348-
} else {
349-
// eslint-disable-next-line no-console
350-
console.log(
351-
`Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
323+
const mathML = await page.$eval(
324+
"span.structTree span[aria-owns='p58R_mc13'] > math",
325+
el => el?.innerHTML ?? ""
326+
);
327+
expect(mathML)
328+
.withContext(`In ${browserName}`)
329+
.toEqual(
330+
` <msqrt><msup><mi>x</mi><mn>2</mn></msup></msqrt> <mo>=</mo> <mrow intent="absolute-value($x)"><mo>|</mo><mi arg="x">x</mi><mo>|</mo></mrow> `
352331
);
353-
}
332+
333+
// Check that the math corresponding element is hidden in the text
334+
// layer.
335+
const ariaHidden = await page.$eval("span#p58R_mc13", el =>
336+
el.getAttribute("aria-hidden")
337+
);
338+
expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true");
354339
})
355340
);
356341
});
@@ -370,30 +355,15 @@ describe("accessibility", () => {
370355
it("must check that the MathML is correctly inserted", async () => {
371356
await Promise.all(
372357
pages.map(async ([browserName, page]) => {
373-
const isSanitizerSupported = await page.evaluate(() => {
374-
try {
375-
// eslint-disable-next-line no-undef
376-
return typeof Sanitizer !== "undefined";
377-
} catch {
378-
return false;
379-
}
380-
});
381-
if (isSanitizerSupported) {
382-
const mathML = await page.$eval(
383-
"span.structTree span[aria-owns='p21R_mc64']",
384-
el => el?.innerHTML ?? ""
385-
);
386-
expect(mathML)
387-
.withContext(`In ${browserName}`)
388-
.toEqual(
389-
'<math display="block"> <msup> <mi>𝑛</mi> <mi>𝑝</mi> </msup> <mo lspace="0.278em" rspace="0.278em">=</mo> <mi>𝑛</mi> <mspace width="1.000em"></mspace> <mi> mod </mi> <mspace width="0.167em"></mspace> <mspace width="0.167em"></mspace> <mi>𝑝</mi> </math>'
390-
);
391-
} else {
392-
// eslint-disable-next-line no-console
393-
console.log(
394-
`Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
358+
const mathML = await page.$eval(
359+
"span.structTree span[aria-owns='p21R_mc64']",
360+
el => el?.innerHTML ?? ""
361+
);
362+
expect(mathML)
363+
.withContext(`In ${browserName}`)
364+
.toEqual(
365+
'<math display="block"> <msup> <mi>𝑛</mi> <mi>𝑝</mi> </msup> <mo lspace="0.278em" rspace="0.278em">=</mo> <mi>𝑛</mi> <mspace width="1.000em"></mspace> <mi> mod </mi> <mspace width="0.167em"></mspace> <mspace width="0.167em"></mspace> <mi>𝑝</mi> </math>'
395366
);
396-
}
397367
})
398368
);
399369
});
@@ -475,25 +445,11 @@ describe("accessibility", () => {
475445
it("must check that there's no alt-text on the MathML node", async () => {
476446
await Promise.all(
477447
pages.map(async ([browserName, page]) => {
478-
const isSanitizerSupported = await page.evaluate(() => {
479-
try {
480-
// eslint-disable-next-line no-undef
481-
return typeof Sanitizer !== "undefined";
482-
} catch {
483-
return false;
484-
}
485-
});
486448
const ariaLabel = await page.$eval(
487449
"span[aria-owns='p3R_mc2']",
488450
el => el.getAttribute("aria-label") || ""
489451
);
490-
if (isSanitizerSupported) {
491-
expect(ariaLabel).withContext(`In ${browserName}`).toEqual("");
492-
} else {
493-
expect(ariaLabel)
494-
.withContext(`In ${browserName}`)
495-
.toEqual("cube root of , x plus y end cube root ");
496-
}
452+
expect(ariaLabel).withContext(`In ${browserName}`).toEqual("");
497453
})
498454
);
499455
});
@@ -513,14 +469,6 @@ describe("accessibility", () => {
513469
it("must check that the text in text layer is aria-hidden", async () => {
514470
await Promise.all(
515471
pages.map(async ([browserName, page]) => {
516-
const isSanitizerSupported = await page.evaluate(() => {
517-
try {
518-
// eslint-disable-next-line no-undef
519-
return typeof Sanitizer !== "undefined";
520-
} catch {
521-
return false;
522-
}
523-
});
524472
const ariaHidden = await page.evaluate(() =>
525473
Array.from(
526474
document.querySelectorAll(".structTree :has(> math)")
@@ -530,16 +478,64 @@ describe("accessibility", () => {
530478
.getAttribute("aria-hidden")
531479
)
532480
);
533-
if (isSanitizerSupported) {
534-
expect(ariaHidden)
535-
.withContext(`In ${browserName}`)
536-
.toEqual(["true", "true", "true"]);
537-
} else {
538-
// eslint-disable-next-line no-console
539-
console.log(
540-
`Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
481+
expect(ariaHidden)
482+
.withContext(`In ${browserName}`)
483+
.toEqual(["true", "true", "true"]);
484+
})
485+
);
486+
});
487+
});
488+
489+
describe("MathML in AF entry with struct tree children must not be duplicated", () => {
490+
let pages;
491+
492+
beforeEach(async () => {
493+
pages = await loadAndWait("bug2025674.pdf", ".textLayer");
494+
});
495+
496+
afterEach(async () => {
497+
await closePages(pages);
498+
});
499+
500+
it("must check that the MathML is not duplicated in the struct tree", async () => {
501+
await Promise.all(
502+
pages.map(async ([browserName, page]) => {
503+
// The Formula node has both AF MathML and struct tree children.
504+
// When AF MathML is present, children must not be walked to avoid
505+
// rendering the math content twice in the accessibility tree.
506+
const mathCount = await page.evaluate(
507+
() => document.querySelectorAll(".structTree math").length
508+
);
509+
expect(mathCount).withContext(`In ${browserName}`).toBe(1);
510+
511+
// All text layer elements referenced by the formula subtree must
512+
// be aria-hidden so screen readers don't read both the MathML and
513+
// the underlying text content.
514+
const allHidden = await page.evaluate(() => {
515+
const ids = [];
516+
for (const el of document.querySelectorAll(
517+
".structTree [aria-owns]"
518+
)) {
519+
if (el.closest("math")) {
520+
ids.push(el.getAttribute("aria-owns"));
521+
}
522+
}
523+
// Also collect ids from the formula span itself.
524+
for (const el of document.querySelectorAll(
525+
".structTree span:has(> math)"
526+
)) {
527+
const owned = el.getAttribute("aria-owns");
528+
if (owned) {
529+
ids.push(owned);
530+
}
531+
}
532+
return ids.every(
533+
id =>
534+
document.getElementById(id)?.getAttribute("aria-hidden") ===
535+
"true"
541536
);
542-
}
537+
});
538+
expect(allHidden).withContext(`In ${browserName}`).toBeTrue();
543539
})
544540
);
545541
});

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,3 +892,4 @@
892892
!issue20930.pdf
893893
!text_rise_eol_bug.pdf
894894
!hello_world_rotated.pdf
895+
!bug2025674.pdf

test/pdfs/bug2025674.pdf

25.4 KB
Binary file not shown.

web/struct_tree_layer_builder.js

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -350,12 +350,25 @@ class StructTreeLayerBuilder {
350350
}
351351
}
352352

353+
#collectIds(node, ids) {
354+
if (!node) {
355+
return;
356+
}
357+
if ("id" in node) {
358+
ids.push(node.id);
359+
}
360+
for (const kid of node.children || []) {
361+
this.#collectIds(kid, ids);
362+
}
363+
}
364+
353365
#walk(node, parentNodes = []) {
354366
if (!node) {
355367
return null;
356368
}
357369

358370
let element;
371+
let visitChildren = true;
359372
if ("role" in node) {
360373
const { role } = node;
361374
if (MathMLElements.has(role)) {
@@ -389,18 +402,14 @@ class StructTreeLayerBuilder {
389402
}
390403
if (role === "Formula") {
391404
if (node.mathML && MathMLSanitizer.sanitizer) {
405+
visitChildren = false;
392406
element.setHTML(node.mathML, {
393407
sanitizer: MathMLSanitizer.sanitizer,
394408
});
395409
// Hide all the corresponding content elements in the text layer in
396410
// order to avoid screen readers reading both the MathML and the
397411
// text content.
398-
for (const { id } of node.children || []) {
399-
if (!id) {
400-
continue;
401-
}
402-
(this.#elementsToHideInTextLayer ||= []).push(id);
403-
}
412+
this.#collectIds(node, (this.#elementsToHideInTextLayer ||= []));
404413
// For now, we don't want to keep the alt text if there's valid
405414
// MathML (see https://github.com/w3c/mathml-aam/issues/37).
406415
// TODO: Revisit this decision in the future.
@@ -426,7 +435,7 @@ class StructTreeLayerBuilder {
426435
// Often there is only one content node so just set the values on the
427436
// parent node to avoid creating an extra span.
428437
this.#setAttributes(node.children[0], element);
429-
} else {
438+
} else if (visitChildren) {
430439
parentNodes.push(node);
431440
for (const kid of node.children) {
432441
element.append(this.#walk(kid, parentNodes));

0 commit comments

Comments
 (0)