Skip to content

Commit 7a94faf

Browse files
committed
Prefer /Resources from the /Contents stream-dict, if available
In rare cases /Resources are also found in the /Contents stream-dict, in addition to in the /Page dict, hence we need to prefer those when available; see `issue18894.pdf`.
1 parent 53c5a53 commit 7a94faf

3 files changed

Lines changed: 58 additions & 20 deletions

File tree

src/core/annotation.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ import {
4949
lookupNormalRect,
5050
lookupRect,
5151
numberToString,
52+
RESOURCES_KEYS_OPERATOR_LIST,
53+
RESOURCES_KEYS_TEXT_CONTENT,
5254
stringToAsciiOrUTF16BE,
5355
stringToUTF16String,
5456
} from "./core_utils.js";
@@ -1196,7 +1198,7 @@ class Annotation {
11961198

11971199
const appearanceDict = appearance.dict;
11981200
const resources = await this.loadResources(
1199-
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"],
1201+
RESOURCES_KEYS_OPERATOR_LIST,
12001202
appearance
12011203
);
12021204
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
@@ -1257,7 +1259,7 @@ class Annotation {
12571259
}
12581260

12591261
const resources = await this.loadResources(
1260-
["ExtGState", "Font", "Properties", "XObject"],
1262+
RESOURCES_KEYS_TEXT_CONTENT,
12611263
this.appearance
12621264
);
12631265

src/core/core_utils.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
3232

3333
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
3434

35+
const RESOURCES_KEYS_OPERATOR_LIST = [
36+
"ColorSpace",
37+
"ExtGState",
38+
"Font",
39+
"Pattern",
40+
"Properties",
41+
"Shading",
42+
"XObject",
43+
];
44+
45+
const RESOURCES_KEYS_TEXT_CONTENT = [
46+
"ExtGState",
47+
"Font",
48+
"Properties",
49+
"XObject",
50+
];
51+
3552
function getLookupTableFactory(initializer) {
3653
let lookup;
3754
return function () {
@@ -745,6 +762,8 @@ export {
745762
readUint16,
746763
readUint32,
747764
recoverJsURL,
765+
RESOURCES_KEYS_OPERATOR_LIST,
766+
RESOURCES_KEYS_TEXT_CONTENT,
748767
stringToAsciiOrUTF16BE,
749768
stringToUTF16HexString,
750769
stringToUTF16String,

src/core/document.js

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ import {
4545
lookupNormalRect,
4646
MissingDataException,
4747
PDF_VERSION_REGEXP,
48+
RESOURCES_KEYS_OPERATOR_LIST,
49+
RESOURCES_KEYS_TEXT_CONTENT,
4850
validateCSSFont,
4951
XRefEntryException,
5052
XRefParseException,
@@ -419,6 +421,25 @@ class Page {
419421
await objectLoader.load();
420422
}
421423

424+
async #getMergedResources(streamDict, keys) {
425+
// In rare cases /Resources are also found in the /Contents stream-dict,
426+
// in addition to in the /Page dict, hence we need to prefer those when
427+
// available (see issue18894.pdf).
428+
const localResources = streamDict?.get("Resources");
429+
430+
if (!(localResources instanceof Dict)) {
431+
return this.resources;
432+
}
433+
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
434+
await objectLoader.load();
435+
436+
return Dict.merge({
437+
xref: this.xref,
438+
dictArray: [localResources, this.resources],
439+
mergeSubDicts: true,
440+
});
441+
}
442+
422443
async getOperatorList({
423444
handler,
424445
sink,
@@ -429,15 +450,7 @@ class Page {
429450
modifiedIds = null,
430451
}) {
431452
const contentStreamPromise = this.getContentStream();
432-
const resourcesPromise = this.loadResources([
433-
"ColorSpace",
434-
"ExtGState",
435-
"Font",
436-
"Pattern",
437-
"Properties",
438-
"Shading",
439-
"XObject",
440-
]);
453+
const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
441454

442455
const partialEvaluator = new PartialEvaluator({
443456
xref: this.xref,
@@ -525,11 +538,15 @@ class Page {
525538
contentStreamPromise,
526539
resourcesPromise,
527540
]).then(async ([contentStream]) => {
541+
const resources = await this.#getMergedResources(
542+
contentStream.dict,
543+
RESOURCES_KEYS_OPERATOR_LIST
544+
);
528545
const opList = new OperatorList(intent, sink);
529546

530547
handler.send("StartRenderPage", {
531548
transparency: partialEvaluator.hasBlendModes(
532-
this.resources,
549+
resources,
533550
this.nonBlendModesSet
534551
),
535552
pageIndex: this.pageIndex,
@@ -539,7 +556,7 @@ class Page {
539556
await partialEvaluator.getOperatorList({
540557
stream: contentStream,
541558
task,
542-
resources: this.resources,
559+
resources,
543560
operatorList: opList,
544561
});
545562
return opList;
@@ -642,19 +659,19 @@ class Page {
642659
sink,
643660
}) {
644661
const contentStreamPromise = this.getContentStream();
645-
const resourcesPromise = this.loadResources([
646-
"ExtGState",
647-
"Font",
648-
"Properties",
649-
"XObject",
650-
]);
662+
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
651663
const langPromise = this.pdfManager.ensureCatalog("lang");
652664

653665
const [contentStream, , lang] = await Promise.all([
654666
contentStreamPromise,
655667
resourcesPromise,
656668
langPromise,
657669
]);
670+
const resources = await this.#getMergedResources(
671+
contentStream.dict,
672+
RESOURCES_KEYS_TEXT_CONTENT
673+
);
674+
658675
const partialEvaluator = new PartialEvaluator({
659676
xref: this.xref,
660677
handler,
@@ -672,7 +689,7 @@ class Page {
672689
return partialEvaluator.getTextContent({
673690
stream: contentStream,
674691
task,
675-
resources: this.resources,
692+
resources,
676693
includeMarkedContent,
677694
disableNormalization,
678695
sink,

0 commit comments

Comments
 (0)