Skip to content

Commit 9fa5cb9

Browse files
Merge pull request #20947 from calixteman/fix_superscript
Don't add an EOL after a superscript
2 parents 6c19752 + ec24053 commit 9fa5cb9

4 files changed

Lines changed: 37 additions & 1 deletion

File tree

src/core/evaluator.js

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2458,6 +2458,7 @@ class PartialEvaluator {
24582458
height: 0,
24592459
vertical: false,
24602460
prevTransform: null,
2461+
prevTextRise: 0,
24612462
textAdvanceScale: 0,
24622463
spaceInFlowMin: 0,
24632464
spaceInFlowMax: 0,
@@ -2906,7 +2907,19 @@ class PartialEvaluator {
29062907
return true;
29072908
}
29082909

2909-
if (Math.abs(advanceY) > textContentItem.height) {
2910+
// Compensate for a textRise change (e.g. superscript/subscript dropping
2911+
// back to baseline): textRise is baked into posY/lastPosY via tsm[5] in
2912+
// getCurrentTextTransform(), scaled by the Y component of the CTM×TM
2913+
// product, which equals currentTransform[3] / textState.fontSize.
2914+
// Without this correction a superscript whose textRise exceeds the line
2915+
// height triggers a spurious EOL when the rise returns to 0.
2916+
const textRiseDelta = textState.textRise - textContentItem.prevTextRise;
2917+
const advanceYCorrected =
2918+
textRiseDelta === 0
2919+
? advanceY
2920+
: advanceY -
2921+
(currentTransform[3] / textState.fontSize) * textRiseDelta;
2922+
if (Math.abs(advanceYCorrected) > textContentItem.height) {
29102923
appendEOL();
29112924
return true;
29122925
}
@@ -3068,6 +3081,7 @@ class PartialEvaluator {
30683081
if (scaledDim) {
30693082
// Save the position of the last visible character.
30703083
textChunk.prevTransform = getCurrentTextTransform();
3084+
textChunk.prevTextRise = textState.textRise;
30713085
}
30723086

30733087
const glyphUnicode = glyph.unicode;

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,3 +890,4 @@
890890
!acroform_calculation_order.pdf
891891
!extractPages_null_in_array.pdf
892892
!issue20930.pdf
893+
!text_rise_eol_bug.pdf

test/pdfs/text_rise_eol_bug.pdf

670 Bytes
Binary file not shown.

test/unit/api_spec.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3985,6 +3985,27 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
39853985
await loadingTask.destroy();
39863986
});
39873987

3988+
it("gets text content without spurious EOL after a superscript (text_rise_eol_bug.pdf)", async function () {
3989+
const loadingTask = getDocument(
3990+
buildGetDocumentParams("text_rise_eol_bug.pdf")
3991+
);
3992+
const pdfDoc = await loadingTask.promise;
3993+
const pdfPage = await pdfDoc.getPage(1);
3994+
const { items } = await pdfPage.getTextContent({
3995+
disableNormalization: true,
3996+
});
3997+
3998+
// No item should carry a hasEOL flag between the superscript and the
3999+
// text that follows it.
4000+
expect(items.every(i => !i.hasEOL)).toEqual(true);
4001+
4002+
// Full sentence must be reconstructable without a newline.
4003+
const text = mergeText(items);
4004+
expect(text).toEqual("E = mc2 is the mass-energy equivalence.");
4005+
4006+
await loadingTask.destroy();
4007+
});
4008+
39884009
it("gets text content with a specific view box", async function () {
39894010
const loadingTask = getDocument(buildGetDocumentParams("issue16316.pdf"));
39904011
const pdfDoc = await loadingTask.promise;

0 commit comments

Comments
 (0)