Skip to content

Commit 2e3d79e

Browse files
committed
Break text chunks only if the base font is different
It fixes #20956.
1 parent 4845186 commit 2e3d79e

File tree

2 files changed

+9
-22
lines changed

2 files changed

+9
-22
lines changed

src/core/evaluator.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2962,8 +2962,10 @@ class PartialEvaluator {
29622962
function buildTextContentItem({ chars, extraSpacing }) {
29632963
if (
29642964
currentTextState !== textState &&
2965-
(currentTextState.fontName !== textState.fontName ||
2966-
currentTextState.fontSize !== textState.fontSize)
2965+
(currentTextState.fontSize !== textState.fontSize ||
2966+
(currentTextState.fontName !== textState.fontName &&
2967+
(currentTextState.font.name !== textState.font.name ||
2968+
currentTextState.font.vertical !== textState.font.vertical)))
29672969
) {
29682970
flushTextContentItem();
29692971
currentTextState = textState.clone();

test/unit/api_spec.js

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4027,34 +4027,19 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
40274027
const { items } = await pdfPage.getTextContent({
40284028
disableNormalization: true,
40294029
});
4030+
// The pdf has 3 different fonts but with the same underlying font data
4031+
// so we have finally one chunk.
40304032
expect(items).toEqual([
40314033
jasmine.objectContaining({
4032-
str: "ABC",
4034+
str: "ABCDEFGHI",
40334035
dir: "ltr",
4034-
width: 20.56,
4036+
width: 57.779999999999994,
40354037
height: 10,
40364038
transform: [10, 0, 0, 10, 100, 100],
40374039
hasEOL: false,
40384040
}),
4039-
jasmine.objectContaining({
4040-
str: "DEF",
4041-
dir: "ltr",
4042-
width: 20,
4043-
height: 10,
4044-
transform: [10, 0, 0, 10, 120, 100],
4045-
hasEOL: false,
4046-
}),
4047-
jasmine.objectContaining({
4048-
str: "GHI",
4049-
dir: "ltr",
4050-
width: 17.78,
4051-
height: 10,
4052-
transform: [10, 0, 0, 10, 140, 100],
4053-
hasEOL: false,
4054-
}),
40554041
]);
4056-
expect(items[0].fontName).toEqual(items[2].fontName);
4057-
expect(items[1].fontName).not.toEqual(items[0].fontName);
4042+
await loadingTask.destroy();
40584043
});
40594044

40604045
it("gets text content with word spacing (issue 20319)", async function () {

0 commit comments

Comments
 (0)