Skip to content

Commit fc92e40

Browse files
Merge pull request #19884 from Snuffleupagus/issue-19835-2
Allow to, optionally, keep Unicode escape sequences in `stringToPDFString` (PR 17331 follow-up)
2 parents b8de9a3 + b629baf commit fc92e40

5 files changed

Lines changed: 68 additions & 16 deletions

File tree

src/core/catalog.js

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ function fetchRemoteDest(action) {
7676
dest = dest.name;
7777
}
7878
if (typeof dest === "string") {
79-
return stringToPDFString(dest);
79+
return stringToPDFString(dest, /* keepEscapeSequence = */ true);
8080
} else if (isValidExplicitDest(dest)) {
8181
return JSON.stringify(dest);
8282
}
@@ -674,15 +674,17 @@ class Catalog {
674674
for (const [key, value] of obj.getAll()) {
675675
const dest = fetchDest(value);
676676
if (dest) {
677-
dests[stringToPDFString(key)] = dest;
677+
dests[stringToPDFString(key, /* keepEscapeSequence = */ true)] =
678+
dest;
678679
}
679680
}
680681
} else if (obj instanceof Dict) {
681682
for (const [key, value] of obj) {
682683
const dest = fetchDest(value);
683684
if (dest) {
684685
// Always let the NameTree take precedence.
685-
dests[stringToPDFString(key)] ||= dest;
686+
dests[stringToPDFString(key, /* keepEscapeSequence = */ true)] ||=
687+
dest;
686688
}
687689
}
688690
}
@@ -691,6 +693,11 @@ class Catalog {
691693
}
692694

693695
getDestination(id) {
696+
// Avoid extra lookup/parsing when all destinations are already available.
697+
if (this.hasOwnProperty("destinations")) {
698+
return this.destinations[id] ?? null;
699+
}
700+
694701
const rawDests = this.#readDests();
695702
for (const obj of rawDests) {
696703
if (obj instanceof NameTree || obj instanceof Dict) {
@@ -1041,7 +1048,8 @@ class Catalog {
10411048
for (const [key, value] of nameTree.getAll()) {
10421049
const fs = new FileSpec(value, this.xref);
10431050
attachments ??= Object.create(null);
1044-
attachments[stringToPDFString(key)] = fs.serializable;
1051+
attachments[stringToPDFString(key, /* keepEscapeSequence = */ true)] =
1052+
fs.serializable;
10451053
}
10461054
}
10471055
return shadow(this, "attachments", attachments);
@@ -1055,7 +1063,10 @@ class Catalog {
10551063
const nameTree = new NameTree(obj.getRaw("XFAImages"), this.xref);
10561064
for (const [key, value] of nameTree.getAll()) {
10571065
xfaImages ??= new Dict(this.xref);
1058-
xfaImages.set(stringToPDFString(key), value);
1066+
xfaImages.set(
1067+
stringToPDFString(key, /* keepEscapeSequence = */ true),
1068+
value
1069+
);
10591070
}
10601071
}
10611072
return shadow(this, "xfaImages", xfaImages);
@@ -1079,7 +1090,10 @@ class Catalog {
10791090
} else if (typeof js !== "string") {
10801091
return;
10811092
}
1082-
js = stringToPDFString(js).replaceAll("\x00", "");
1093+
js = stringToPDFString(js, /* keepEscapeSequence = */ true).replaceAll(
1094+
"\x00",
1095+
""
1096+
);
10831097
// Skip empty entries, similar to the `_collectJS` function.
10841098
if (js) {
10851099
(javaScript ||= new Map()).set(name, js);
@@ -1089,7 +1103,10 @@ class Catalog {
10891103
if (obj instanceof Dict && obj.has("JavaScript")) {
10901104
const nameTree = new NameTree(obj.getRaw("JavaScript"), this.xref);
10911105
for (const [key, value] of nameTree.getAll()) {
1092-
appendIfJavaScriptDict(stringToPDFString(key), value);
1106+
appendIfJavaScriptDict(
1107+
stringToPDFString(key, /* keepEscapeSequence = */ true),
1108+
value
1109+
);
10931110
}
10941111
}
10951112
// Append OpenAction "JavaScript" actions, if any, to the JavaScript map.
@@ -1628,7 +1645,10 @@ class Catalog {
16281645
const name = target.get("N");
16291646

16301647
if (isName(relationship, "C") && typeof name === "string") {
1631-
attachment = docAttachments[stringToPDFString(name)];
1648+
attachment =
1649+
docAttachments[
1650+
stringToPDFString(name, /* keepEscapeSequence = */ true)
1651+
];
16321652
}
16331653
}
16341654

@@ -1694,7 +1714,11 @@ class Catalog {
16941714
js = jsAction;
16951715
}
16961716

1697-
const jsURL = js && recoverJsURL(stringToPDFString(js));
1717+
const jsURL =
1718+
js &&
1719+
recoverJsURL(
1720+
stringToPDFString(js, /* keepEscapeSequence = */ true)
1721+
);
16981722
if (jsURL) {
16991723
url = jsURL.url;
17001724
resultObj.newWindow = jsURL.newWindow;
@@ -1730,7 +1754,10 @@ class Catalog {
17301754
dest = dest.name;
17311755
}
17321756
if (typeof dest === "string") {
1733-
resultObj.dest = stringToPDFString(dest);
1757+
resultObj.dest = stringToPDFString(
1758+
dest,
1759+
/* keepEscapeSequence = */ true
1760+
);
17341761
} else if (isValidExplicitDest(dest)) {
17351762
resultObj.dest = dest;
17361763
}

src/core/core_utils.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,10 @@ function _collectJS(entry, xref, list, parents) {
424424
} else if (typeof js === "string") {
425425
code = js;
426426
}
427-
code &&= stringToPDFString(code).replaceAll("\x00", "");
427+
code &&= stringToPDFString(
428+
code,
429+
/* keepEscapeSequence = */ true
430+
).replaceAll("\x00", "");
428431
if (code) {
429432
list.push(code);
430433
}

src/core/file_spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class FileSpec {
7777

7878
const item = pickPlatformItem(this.root);
7979
if (item && typeof item === "string") {
80-
filename = stringToPDFString(item)
80+
filename = stringToPDFString(item, /* keepEscapeSequence = */ true)
8181
.replaceAll("\\\\", "\\")
8282
.replaceAll("\\/", "/")
8383
.replaceAll("\\", "/");

src/shared/util.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,9 +1022,9 @@ const PDFStringTranslateTable = [
10221022
0x131, 0x142, 0x153, 0x161, 0x17e, 0, 0x20ac,
10231023
];
10241024

1025-
function stringToPDFString(str) {
1025+
function stringToPDFString(str, keepEscapeSequence = false) {
10261026
// See section 7.9.2.2 Text String Type.
1027-
// The string can contain some language codes bracketed with 0x0b,
1027+
// The string can contain some language codes bracketed with 0x1b,
10281028
// so we must remove them.
10291029
if (str[0] >= "\xEF") {
10301030
let encoding;
@@ -1047,7 +1047,7 @@ function stringToPDFString(str) {
10471047
const decoder = new TextDecoder(encoding, { fatal: true });
10481048
const buffer = stringToBytes(str);
10491049
const decoded = decoder.decode(buffer);
1050-
if (!decoded.includes("\x1b")) {
1050+
if (keepEscapeSequence || !decoded.includes("\x1b")) {
10511051
return decoded;
10521052
}
10531053
return decoded.replaceAll(/\x1b[^\x1b]*(?:\x1b|$)/g, "");
@@ -1060,7 +1060,7 @@ function stringToPDFString(str) {
10601060
const strBuf = [];
10611061
for (let i = 0, ii = str.length; i < ii; i++) {
10621062
const charCode = str.charCodeAt(i);
1063-
if (charCode === 0x1b) {
1063+
if (!keepEscapeSequence && charCode === 0x1b) {
10641064
// eslint-disable-next-line no-empty
10651065
while (++i < ii && str.charCodeAt(i) !== 0x1b) {}
10661066
continue;

test/unit/api_spec.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,28 @@ describe("api", function () {
14211421
await loadingTask.destroy();
14221422
});
14231423

1424+
it("gets a destination containing Unicode escape sequence (\x1b), from /Dests dictionary with keys using PDFDocEncoding", async function () {
1425+
if (isNodeJS) {
1426+
pending("Linked test-cases are not supported in Node.js.");
1427+
}
1428+
const loadingTask = getDocument(buildGetDocumentParams("issue19835.pdf"));
1429+
const pdfDoc = await loadingTask.promise;
1430+
1431+
const page3 = await pdfDoc.getPage(3);
1432+
const annots = await page3.getAnnotations();
1433+
1434+
const annot = annots.find(x => x.id === "55R");
1435+
// Sanity check to make sure that we found the "correct" annotation.
1436+
expect(annot.dest).toEqual(
1437+
"\u02d9\u0064\u002a\u0010\u000e\u0061\u00d6\u0002\u005b\u00b7\u201a\u0022\u00c5\u00da\u017e\u00bb\u00d5\u0062\u02dd\u00d1"
1438+
);
1439+
1440+
const dest = await pdfDoc.getDestination(annot.dest);
1441+
expect(dest).toEqual([28, { name: "XYZ" }, 34.0799999, 73.5199999, 0]);
1442+
1443+
await loadingTask.destroy();
1444+
});
1445+
14241446
it("gets non-string destination", async function () {
14251447
let numberPromise = pdfDocument.getDestination(4.3);
14261448
let booleanPromise = pdfDocument.getDestination(true);

0 commit comments

Comments
 (0)