Skip to content

Commit 9c6e2e6

Browse files
Merge pull request #19803 from Snuffleupagus/issue-19800
Catch circular references in /Form XObjects (issue 19800)
2 parents 53c5a53 + 1048508 commit 9c6e2e6

7 files changed

Lines changed: 111 additions & 25 deletions

File tree

src/core/annotation.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ import {
4949
lookupNormalRect,
5050
lookupRect,
5151
numberToString,
52+
RESOURCES_KEYS_OPERATOR_LIST,
53+
RESOURCES_KEYS_TEXT_CONTENT,
5254
stringToAsciiOrUTF16BE,
5355
stringToUTF16String,
5456
} from "./core_utils.js";
@@ -1196,7 +1198,7 @@ class Annotation {
11961198

11971199
const appearanceDict = appearance.dict;
11981200
const resources = await this.loadResources(
1199-
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"],
1201+
RESOURCES_KEYS_OPERATOR_LIST,
12001202
appearance
12011203
);
12021204
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
@@ -1257,7 +1259,7 @@ class Annotation {
12571259
}
12581260

12591261
const resources = await this.loadResources(
1260-
["ExtGState", "Font", "Properties", "XObject"],
1262+
RESOURCES_KEYS_TEXT_CONTENT,
12611263
this.appearance
12621264
);
12631265

src/core/core_utils.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
3232

3333
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
3434

35+
const RESOURCES_KEYS_OPERATOR_LIST = [
36+
"ColorSpace",
37+
"ExtGState",
38+
"Font",
39+
"Pattern",
40+
"Properties",
41+
"Shading",
42+
"XObject",
43+
];
44+
45+
const RESOURCES_KEYS_TEXT_CONTENT = [
46+
"ExtGState",
47+
"Font",
48+
"Properties",
49+
"XObject",
50+
];
51+
3552
function getLookupTableFactory(initializer) {
3653
let lookup;
3754
return function () {
@@ -745,6 +762,8 @@ export {
745762
readUint16,
746763
readUint32,
747764
recoverJsURL,
765+
RESOURCES_KEYS_OPERATOR_LIST,
766+
RESOURCES_KEYS_TEXT_CONTENT,
748767
stringToAsciiOrUTF16BE,
749768
stringToUTF16HexString,
750769
stringToUTF16String,

src/core/document.js

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ import {
4545
lookupNormalRect,
4646
MissingDataException,
4747
PDF_VERSION_REGEXP,
48+
RESOURCES_KEYS_OPERATOR_LIST,
49+
RESOURCES_KEYS_TEXT_CONTENT,
4850
validateCSSFont,
4951
XRefEntryException,
5052
XRefParseException,
@@ -419,6 +421,25 @@ class Page {
419421
await objectLoader.load();
420422
}
421423

424+
async #getMergedResources(streamDict, keys) {
425+
// In rare cases /Resources are also found in the /Contents stream-dict,
426+
// in addition to in the /Page dict, hence we need to prefer those when
427+
// available (see issue18894.pdf).
428+
const localResources = streamDict?.get("Resources");
429+
430+
if (!(localResources instanceof Dict)) {
431+
return this.resources;
432+
}
433+
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
434+
await objectLoader.load();
435+
436+
return Dict.merge({
437+
xref: this.xref,
438+
dictArray: [localResources, this.resources],
439+
mergeSubDicts: true,
440+
});
441+
}
442+
422443
async getOperatorList({
423444
handler,
424445
sink,
@@ -429,15 +450,7 @@ class Page {
429450
modifiedIds = null,
430451
}) {
431452
const contentStreamPromise = this.getContentStream();
432-
const resourcesPromise = this.loadResources([
433-
"ColorSpace",
434-
"ExtGState",
435-
"Font",
436-
"Pattern",
437-
"Properties",
438-
"Shading",
439-
"XObject",
440-
]);
453+
const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
441454

442455
const partialEvaluator = new PartialEvaluator({
443456
xref: this.xref,
@@ -525,11 +538,15 @@ class Page {
525538
contentStreamPromise,
526539
resourcesPromise,
527540
]).then(async ([contentStream]) => {
541+
const resources = await this.#getMergedResources(
542+
contentStream.dict,
543+
RESOURCES_KEYS_OPERATOR_LIST
544+
);
528545
const opList = new OperatorList(intent, sink);
529546

530547
handler.send("StartRenderPage", {
531548
transparency: partialEvaluator.hasBlendModes(
532-
this.resources,
549+
resources,
533550
this.nonBlendModesSet
534551
),
535552
pageIndex: this.pageIndex,
@@ -539,7 +556,7 @@ class Page {
539556
await partialEvaluator.getOperatorList({
540557
stream: contentStream,
541558
task,
542-
resources: this.resources,
559+
resources,
543560
operatorList: opList,
544561
});
545562
return opList;
@@ -642,19 +659,19 @@ class Page {
642659
sink,
643660
}) {
644661
const contentStreamPromise = this.getContentStream();
645-
const resourcesPromise = this.loadResources([
646-
"ExtGState",
647-
"Font",
648-
"Properties",
649-
"XObject",
650-
]);
662+
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
651663
const langPromise = this.pdfManager.ensureCatalog("lang");
652664

653665
const [contentStream, , lang] = await Promise.all([
654666
contentStreamPromise,
655667
resourcesPromise,
656668
langPromise,
657669
]);
670+
const resources = await this.#getMergedResources(
671+
contentStream.dict,
672+
RESOURCES_KEYS_TEXT_CONTENT
673+
);
674+
658675
const partialEvaluator = new PartialEvaluator({
659676
xref: this.xref,
660677
handler,
@@ -672,7 +689,7 @@ class Page {
672689
return partialEvaluator.getTextContent({
673690
stream: contentStream,
674691
task,
675-
resources: this.resources,
692+
resources,
676693
includeMarkedContent,
677694
disableNormalization,
678695
sink,

src/core/evaluator.js

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,8 @@ class PartialEvaluator {
462462
operatorList,
463463
task,
464464
initialState,
465-
localColorSpaceCache
465+
localColorSpaceCache,
466+
seenRefs
466467
) {
467468
const dict = xobj.dict;
468469
const matrix = lookupMatrix(dict.getArray("Matrix"), null);
@@ -526,6 +527,7 @@ class PartialEvaluator {
526527
resources: dict.get("Resources") || resources,
527528
operatorList,
528529
initialState,
530+
prevRefs: seenRefs,
529531
});
530532
operatorList.addOp(OPS.paintFormXObjectEnd, []);
531533

@@ -850,7 +852,8 @@ class PartialEvaluator {
850852
operatorList,
851853
task,
852854
stateManager,
853-
localColorSpaceCache
855+
localColorSpaceCache,
856+
seenRefs
854857
) {
855858
const smaskContent = smask.get("G");
856859
const smaskOptions = {
@@ -880,7 +883,8 @@ class PartialEvaluator {
880883
operatorList,
881884
task,
882885
stateManager.state.clone({ newPath: true }),
883-
localColorSpaceCache
886+
localColorSpaceCache,
887+
seenRefs
884888
);
885889
}
886890

@@ -1065,6 +1069,7 @@ class PartialEvaluator {
10651069
stateManager,
10661070
localGStateCache,
10671071
localColorSpaceCache,
1072+
seenRefs,
10681073
}) {
10691074
const gStateRef = gState.objId;
10701075
let isSimpleGState = true;
@@ -1127,7 +1132,8 @@ class PartialEvaluator {
11271132
operatorList,
11281133
task,
11291134
stateManager,
1130-
localColorSpaceCache
1135+
localColorSpaceCache,
1136+
seenRefs
11311137
)
11321138
);
11331139
gStateObj.push([key, true]);
@@ -1696,7 +1702,19 @@ class PartialEvaluator {
16961702
operatorList,
16971703
initialState = null,
16981704
fallbackFontDict = null,
1705+
prevRefs = null,
16991706
}) {
1707+
const objId = stream.dict?.objId;
1708+
const seenRefs = new RefSet(prevRefs);
1709+
1710+
if (objId) {
1711+
if (prevRefs?.has(objId)) {
1712+
throw new Error(
1713+
`getOperatorList - ignoring circular reference: ${objId}`
1714+
);
1715+
}
1716+
seenRefs.put(objId);
1717+
}
17001718
// Ensure that `resources`/`initialState` is correctly initialized,
17011719
// even if the provided parameter is e.g. `null`.
17021720
resources ||= Dict.empty;
@@ -1808,7 +1826,8 @@ class PartialEvaluator {
18081826
operatorList,
18091827
task,
18101828
stateManager.state.clone({ newPath: true }),
1811-
localColorSpaceCache
1829+
localColorSpaceCache,
1830+
seenRefs
18121831
)
18131832
.then(function () {
18141833
stateManager.restore();
@@ -2158,6 +2177,7 @@ class PartialEvaluator {
21582177
stateManager,
21592178
localGStateCache,
21602179
localColorSpaceCache,
2180+
seenRefs,
21612181
})
21622182
.then(resolveGState, rejectGState);
21632183
}).catch(function (reason) {
@@ -2339,7 +2359,19 @@ class PartialEvaluator {
23392359
markedContentData = null,
23402360
disableNormalization = false,
23412361
keepWhiteSpace = false,
2362+
prevRefs = null,
23422363
}) {
2364+
const objId = stream.dict?.objId;
2365+
const seenRefs = new RefSet(prevRefs);
2366+
2367+
if (objId) {
2368+
if (prevRefs?.has(objId)) {
2369+
throw new Error(
2370+
`getTextContent - ignoring circular reference: ${objId}`
2371+
);
2372+
}
2373+
seenRefs.put(objId);
2374+
}
23432375
// Ensure that `resources`/`stateManager` is correctly initialized,
23442376
// even if the provided parameter is e.g. `null`.
23452377
resources ||= Dict.empty;
@@ -3326,6 +3358,7 @@ class PartialEvaluator {
33263358
markedContentData,
33273359
disableNormalization,
33283360
keepWhiteSpace,
3361+
prevRefs: seenRefs,
33293362
})
33303363
.then(function () {
33313364
if (!sinkWrapper.enqueueInvoked) {

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@
206206
!issue3928.pdf
207207
!issue8565.pdf
208208
!clippath.pdf
209+
!issue19800.pdf
209210
!issue8795_reduced.pdf
210211
!bug1755507.pdf
211212
!close-path-bug.pdf

test/pdfs/issue19800.pdf

1.08 KB
Binary file not shown.

test/test_manifest.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6897,6 +6897,20 @@
68976897
"lastPage": 2,
68986898
"type": "eq"
68996899
},
6900+
{
6901+
"id": "issue19800-eq",
6902+
"file": "pdfs/issue19800.pdf",
6903+
"md5": "92825d3178196bdd01096c4081609efd",
6904+
"rounds": 1,
6905+
"type": "eq"
6906+
},
6907+
{
6908+
"id": "issue19800-text",
6909+
"file": "pdfs/issue19800.pdf",
6910+
"md5": "92825d3178196bdd01096c4081609efd",
6911+
"rounds": 1,
6912+
"type": "text"
6913+
},
69006914
{
69016915
"id": "issue3438",
69026916
"file": "pdfs/issue3438.pdf",

0 commit comments

Comments
 (0)