Skip to content

Commit 91033c2

Browse files
committed
Fix the encoding for some missing chinese fonts
It fixes #20489.
1 parent 9f4db38 commit 91033c2

File tree

5 files changed

+112
-1
lines changed

5 files changed

+112
-1
lines changed

src/core/evaluator.js

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3588,7 +3588,7 @@ class PartialEvaluator {
35883588
if (properties.composite) {
35893589
// CIDSystemInfo helps to match CID to glyphs
35903590
const cidSystemInfo = dict.get("CIDSystemInfo");
3591-
if (cidSystemInfo instanceof Dict) {
3591+
if (cidSystemInfo instanceof Dict && !properties.cidSystemInfo) {
35923592
properties.cidSystemInfo = {
35933593
registry: stringToPDFString(cidSystemInfo.get("Registry")),
35943594
ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
@@ -3670,6 +3670,51 @@ class PartialEvaluator {
36703670
baseEncodingName = null;
36713671
}
36723672

3673+
// Ignore incorrectly specified WinAnsiEncoding for non-embedded CJK fonts
3674+
// (fixes issue20489). Some chinese fonts often have WinAnsiEncoding in the
3675+
// PDF even though they should use Identity-H or GB-EUC-H encoding.
3676+
if (
3677+
baseEncodingName === "WinAnsiEncoding" &&
3678+
nonEmbeddedFont &&
3679+
properties.name?.charCodeAt(0) >= 0xb7
3680+
) {
3681+
const fontName = properties.name;
3682+
// This list is built from some names from Pdfium and mupdf:
3683+
// - https://pdfium.googlesource.com/pdfium/+/master/core/fpdfapi/font/cpdf_font.cpp#41
3684+
// - https://fossies.org/linux/mupdf/source/pdf/pdf-font.c#l_820
3685+
const chineseFontNames = [
3686+
"\xCB\xCE\xCC\xE5", // SimSun
3687+
"\xBA\xDA\xCC\xE5", // SimHei
3688+
"\xBF\xAC\xCC\xE5", // SimKai
3689+
"\xB7\xC2\xCB\xCE", // SimFang
3690+
"\xBF\xAC\xCC\xE5_GB2312", // SimKai
3691+
"\xB7\xC2\xCB\xCE_GB2312", // SimFang
3692+
"\xC1\xA5\xCA\xE9", // SimLi
3693+
"\xD0\xC2\xCB\xCE", // SimSun
3694+
];
3695+
3696+
// Check for common Chinese font names and their GBK-encoded equivalents
3697+
// (which may appear as Latin-1 when incorrectly decoded).
3698+
if (chineseFontNames.includes(fontName)) {
3699+
baseEncodingName = null;
3700+
properties.defaultEncoding = "Adobe-GB1-UCS2";
3701+
properties.composite = true;
3702+
properties.cidEncoding = Name.get("GBK-EUC-H");
3703+
const cMap = await CMapFactory.create({
3704+
encoding: properties.cidEncoding,
3705+
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
3706+
useCMap: null,
3707+
});
3708+
properties.cMap = cMap;
3709+
properties.vertical = properties.cMap.vertical;
3710+
properties.cidSystemInfo = {
3711+
registry: "Adobe",
3712+
ordering: "GB1",
3713+
supplement: 0,
3714+
};
3715+
}
3716+
}
3717+
36733718
if (baseEncodingName) {
36743719
properties.defaultEncoding = getEncoding(baseEncodingName);
36753720
} else {

src/core/font_substitutions.js

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,64 @@ const substitutionMap = new Map([
306306
alias: "Wingdings",
307307
},
308308
],
309+
[
310+
"\xCB\xCE\xCC\xE5",
311+
{
312+
local: ["SimSun", "SimSun Regular", "NSimSun"],
313+
style: NORMAL,
314+
ultimate: "serif",
315+
},
316+
],
317+
[
318+
"\xBA\xDA\xCC\xE5",
319+
{
320+
local: ["SimHei", "SimHei Regular"],
321+
style: NORMAL,
322+
ultimate: "sans-serif",
323+
},
324+
],
325+
[
326+
"\xBF\xAC\xCC\xE5",
327+
{
328+
local: ["KaiTi", "SimKai", "SimKai Regular"],
329+
style: NORMAL,
330+
ultimate: "sans-serif",
331+
},
332+
],
333+
[
334+
"\xB7\xC2\xCB\xCE",
335+
{
336+
local: ["FangSong", "SimFang", "SimFang Regular"],
337+
style: NORMAL,
338+
ultimate: "serif",
339+
},
340+
],
341+
[
342+
"\xBF\xAC\xCC\xE5_GB2312",
343+
{
344+
alias: "\xBF\xAC\xCC\xE5",
345+
},
346+
],
347+
[
348+
"\xB7\xC2\xCB\xCE_GB2312",
349+
{
350+
alias: "\xB7\xC2\xCB\xCE",
351+
},
352+
],
353+
[
354+
"\xC1\xA5\xCA\xE9",
355+
{
356+
local: ["SimLi", "SimLi Regular"],
357+
style: NORMAL,
358+
ultimate: "serif",
359+
},
360+
],
361+
[
362+
"\xD0\xC2\xCB\xCE",
363+
{
364+
alias: "\xCB\xCE\xCC\xE5",
365+
},
366+
],
309367
]);
310368

311369
const fontAliases = new Map([["Arial-Black", "ArialBlack"]]);

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,3 +767,4 @@
767767
!issue20225.pdf
768768
!issue20513.pdf
769769
!issue20516.pdf
770+
!issue20489.pdf

test/pdfs/issue20489.pdf

7.17 KB
Binary file not shown.

test/test_manifest.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13106,5 +13106,12 @@
1310613106
"md5": "19a3a347773518242fa3cf1c04a9a1e4",
1310713107
"rounds": 1,
1310813108
"type": "eq"
13109+
},
13110+
{
13111+
"id": "issue20489",
13112+
"file": "pdfs/issue20489.pdf",
13113+
"md5": "b85c798b9a4cc2cd4337d335321cc612",
13114+
"rounds": 1,
13115+
"type": "eq"
1310913116
}
1311013117
]

0 commit comments

Comments
 (0)