Skip to content

Commit a0f3528

Browse files
Merge pull request #20624 from calixteman/bug2013793
Flush the text content chunk only on real font changes (bug 2013793)
2 parents 222a24c + 22b97d1 commit a0f3528

4 files changed

Lines changed: 46 additions & 14 deletions

File tree

src/core/evaluator.js

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,7 +2538,7 @@ class PartialEvaluator {
25382538

25392539
const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
25402540

2541-
let textState;
2541+
let textState, currentTextState;
25422542

25432543
function pushWhitespace({
25442544
width = 0,
@@ -2800,7 +2800,9 @@ class PartialEvaluator {
28002800

28012801
// When the total height of the current chunk is negative
28022802
// then we're writing from bottom to top.
2803-
const textOrientation = Math.sign(textContentItem.height);
2803+
const textOrientation = Math.sign(
2804+
textContentItem.height || textContentItem.totalHeight
2805+
);
28042806
if (advanceY < textOrientation * textContentItem.negativeSpaceMax) {
28052807
if (
28062808
Math.abs(advanceX) >
@@ -2864,7 +2866,9 @@ class PartialEvaluator {
28642866

28652867
// When the total width of the current chunk is negative
28662868
// then we're writing from right to left.
2867-
const textOrientation = Math.sign(textContentItem.width);
2869+
const textOrientation = Math.sign(
2870+
textContentItem.width || textContentItem.totalWidth
2871+
);
28682872
if (advanceX < textOrientation * textContentItem.negativeSpaceMax) {
28692873
if (
28702874
Math.abs(advanceY) >
@@ -2922,6 +2926,15 @@ class PartialEvaluator {
29222926
}
29232927

29242928
function buildTextContentItem({ chars, extraSpacing }) {
2929+
if (
2930+
currentTextState !== textState &&
2931+
(currentTextState.fontName !== textState.fontName ||
2932+
currentTextState.fontSize !== textState.fontSize)
2933+
) {
2934+
flushTextContentItem();
2935+
currentTextState = textState.clone();
2936+
}
2937+
29252938
const font = textState.font;
29262939
if (!chars) {
29272940
// Just move according to the space we have.
@@ -3177,8 +3190,8 @@ class PartialEvaluator {
31773190
break;
31783191
}
31793192

3180-
const previousState = textState;
31813193
textState = stateManager.state;
3194+
currentTextState ||= textState.clone();
31823195
const fn = operation.fn;
31833196
args = operation.args;
31843197

@@ -3195,7 +3208,6 @@ class PartialEvaluator {
31953208
break;
31963209
}
31973210

3198-
flushTextContentItem();
31993211
textState.fontName = fontNameArg;
32003212
textState.fontSize = fontSizeArg;
32013213
next(handleSetFont(fontNameArg, null));
@@ -3552,14 +3564,10 @@ class PartialEvaluator {
35523564
}
35533565
break;
35543566
case OPS.restore:
3555-
if (
3556-
previousState &&
3557-
(previousState.font !== textState.font ||
3558-
previousState.fontSize !== textState.fontSize ||
3559-
previousState.fontName !== textState.fontName)
3560-
) {
3561-
flushTextContentItem();
3562-
}
3567+
stateManager.restore();
3568+
break;
3569+
case OPS.save:
3570+
stateManager.save();
35633571
break;
35643572
} // switch
35653573
if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
@@ -5083,7 +5091,7 @@ class TextState {
50835091
}
50845092

50855093
clone() {
5086-
const clone = Object.create(this);
5094+
const clone = Object.assign(Object.create(this), this);
50875095
clone.textMatrix = this.textMatrix.slice();
50885096
clone.textLineMatrix = this.textLineMatrix.slice();
50895097
clone.fontMatrix = this.fontMatrix.slice();

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,3 +871,4 @@
871871
!page_with_number.pdf
872872
!page_with_number_and_link.pdf
873873
!Brotli-Prototype-FileA.pdf
874+
!bug2013793.pdf

test/pdfs/bug2013793.pdf

70.7 KB
Binary file not shown.

test/unit/api_spec.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4069,6 +4069,29 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
40694069
await loadingTask.destroy();
40704070
});
40714071

4072+
it("gets text content with some fake font changes (bug 2013793)", async function () {
4073+
const loadingTask = getDocument(buildGetDocumentParams("bug2013793.pdf"));
4074+
const pdfDoc = await loadingTask.promise;
4075+
const pdfPage = await pdfDoc.getPage(1);
4076+
const { items } = await pdfPage.getTextContent({
4077+
disableNormalization: true,
4078+
});
4079+
const text = mergeText(items);
4080+
expect(text)
4081+
.toEqual(`This is a great deal of nothing. The purpose is to help in identifying a bug when the PDF
4082+
is read by Firefox. I want to know whether any of the two words in this paragraph run
4083+
together. If they do, I will file a bug report. The problem seems to occur somewhere
4084+
between the 240th and 260th character in the paragraph. I should have written that much
4085+
by now. So, here’s to squashing bugs.
4086+
This is a great deal of nothing. The purpose is to help in identifying a bug when the
4087+
PDF is read by Firefox. I want to know whether any of the two words in this
4088+
paragraph run together. If they do, I will file a bug report. The problem seems to
4089+
occur somewhere between the 240th and 260th character in the paragraph. I should
4090+
have written that much by now. So, here’s to squashing bugs.`);
4091+
4092+
await loadingTask.destroy();
4093+
});
4094+
40724095
it("gets empty structure tree", async function () {
40734096
const tree = await page.getStructTree();
40744097

0 commit comments

Comments
 (0)