Skip to content

Commit 88c2051

Browse files
committed
Use the ccittfax decoder from pdfium
The decoder is a dependency of the jbig2 one and is already included in pdf.js, so we just need to wire it up. It improves the performance of documents using ccittfax images.
1 parent 471adfd commit 88c2051

9 files changed

Lines changed: 139 additions & 48 deletions

File tree

external/jbig2/jbig2.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

external/jbig2/jbig2.wasm

4.29 KB
Binary file not shown.

src/core/ccitt.js

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -465,20 +465,33 @@ const blackTable3 = [
465465
* @param {Object} [options] - Decoding options.
466466
*/
467467
class CCITTFaxDecoder {
468-
constructor(source, options = {}) {
468+
constructor(
469+
source,
470+
options = {
471+
K: 0,
472+
EndOfLine: false,
473+
EncodedByteAlign: false,
474+
Columns: 1728,
475+
Rows: 0,
476+
EndOfBlock: true,
477+
BlackIs1: false,
478+
}
479+
) {
469480
if (typeof source?.next !== "function") {
470481
throw new Error('CCITTFaxDecoder - invalid "source" parameter.');
471482
}
472483
this.source = source;
473484
this.eof = false;
474485

475-
this.encoding = options.K || 0;
476-
this.eoline = options.EndOfLine || false;
477-
this.byteAlign = options.EncodedByteAlign || false;
478-
this.columns = options.Columns || 1728;
479-
this.rows = options.Rows || 0;
480-
this.eoblock = options.EndOfBlock ?? true;
481-
this.black = options.BlackIs1 || false;
486+
({
487+
K: this.encoding,
488+
EndOfLine: this.eoline,
489+
EncodedByteAlign: this.byteAlign,
490+
Columns: this.columns,
491+
Rows: this.rows,
492+
EndOfBlock: this.eoblock,
493+
BlackIs1: this.black,
494+
} = options);
482495

483496
this.codingLine = new Uint32Array(this.columns + 1);
484497
this.refLine = new Uint32Array(this.columns + 2);

src/core/ccitt_stream.js

Lines changed: 82 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,51 +13,114 @@
1313
* limitations under the License.
1414
*/
1515

16+
import { shadow, warn } from "../shared/util.js";
1617
import { CCITTFaxDecoder } from "./ccitt.js";
1718
import { DecodeStream } from "./decode_stream.js";
1819
import { Dict } from "./primitives.js";
20+
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
1921

2022
class CCITTFaxStream extends DecodeStream {
2123
constructor(str, maybeLength, params) {
2224
super(maybeLength);
2325

2426
this.stream = str;
27+
this.maybeLength = maybeLength;
2528
this.dict = str.dict;
2629

2730
if (!(params instanceof Dict)) {
2831
params = Dict.empty;
2932
}
3033

31-
const source = {
32-
next() {
33-
return str.getByte();
34-
},
34+
this.params = {
35+
K: params.get("K") || 0,
36+
EndOfLine: !!params.get("EndOfLine"),
37+
EncodedByteAlign: !!params.get("EncodedByteAlign"),
38+
Columns: params.get("Columns") || 1728,
39+
Rows: params.get("Rows") || 0,
40+
EndOfBlock: !!(params.get("EndOfBlock") ?? true),
41+
BlackIs1: !!params.get("BlackIs1"),
3542
};
36-
this.ccittFaxDecoder = new CCITTFaxDecoder(source, {
37-
K: params.get("K"),
38-
EndOfLine: params.get("EndOfLine"),
39-
EncodedByteAlign: params.get("EncodedByteAlign"),
40-
Columns: params.get("Columns"),
41-
Rows: params.get("Rows"),
42-
EndOfBlock: params.get("EndOfBlock"),
43-
BlackIs1: params.get("BlackIs1"),
44-
});
43+
}
44+
45+
get bytes() {
46+
// If `this.maybeLength` is null, we'll get the entire stream.
47+
return shadow(this, "bytes", this.stream.getBytes(this.maybeLength));
4548
}
4649

4750
readBlock() {
51+
this.decodeImageFallback();
52+
}
53+
54+
get isImageStream() {
55+
return true;
56+
}
57+
58+
get isAsyncDecoder() {
59+
return true;
60+
}
61+
62+
async decodeImage(bytes, length, _decoderOptions) {
63+
if (this.eof) {
64+
return this.buffer;
65+
}
66+
if (!bytes) {
67+
bytes = this.stream.isAsync
68+
? (await this.stream.asyncGetBytes()) || this.bytes
69+
: this.bytes;
70+
}
71+
72+
try {
73+
this.buffer = await JBig2CCITTFaxWasmImage.decode(
74+
bytes,
75+
this.dict.get("W", "Width"),
76+
this.dict.get("H", "Height"),
77+
null,
78+
this.params
79+
);
80+
} catch {
81+
warn("CCITTFaxStream: Falling back to JS CCITTFax decoder.");
82+
return this.decodeImageFallback(bytes, length);
83+
}
84+
this.bufferLength = this.buffer.length;
85+
this.eof = true;
86+
87+
return this.buffer;
88+
}
89+
90+
decodeImageFallback(bytes, length) {
91+
if (this.eof) {
92+
return this.buffer;
93+
}
94+
const { params } = this;
95+
if (!bytes) {
96+
this.stream.reset();
97+
bytes = this.bytes;
98+
}
99+
let pos = 0;
100+
const source = {
101+
next() {
102+
return bytes[pos++] ?? -1;
103+
},
104+
};
105+
if (length && this.buffer.byteLength < length) {
106+
this.buffer = new Uint8Array(length);
107+
}
108+
this.ccittFaxDecoder = new CCITTFaxDecoder(source, params);
109+
let outPos = 0;
48110
while (!this.eof) {
49111
const c = this.ccittFaxDecoder.readNextChar();
50112
if (c === -1) {
51113
this.eof = true;
52-
return;
114+
break;
53115
}
54-
this.ensureBuffer(this.bufferLength + 1);
55-
this.buffer[this.bufferLength++] = c;
116+
if (!length) {
117+
this.ensureBuffer(outPos + 1);
118+
}
119+
this.buffer[outPos++] = c;
56120
}
57-
}
58121

59-
get isImageStream() {
60-
return true;
122+
this.bufferLength = this.buffer.length;
123+
return this.buffer.subarray(0, length || this.bufferLength);
61124
}
62125
}
63126

src/core/decode_stream.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ class DecodeStream extends BaseStream {
102102
async getImageData(length, decoderOptions) {
103103
if (!this.canAsyncDecodeImageFromBuffer) {
104104
if (this.isAsyncDecoder) {
105-
return this.decodeImage(null, decoderOptions);
105+
return this.decodeImage(null, length, decoderOptions);
106106
}
107107
return this.getBytes(length, decoderOptions);
108108
}
109109
const data = await this.stream.asyncGetBytes();
110-
return this.decodeImage(data, decoderOptions);
110+
return this.decodeImage(data, length, decoderOptions);
111111
}
112112

113113
reset() {
Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class JBig2Error extends BaseException {
2323
}
2424
}
2525

26-
class JBig2WasmImage {
26+
class JBig2CCITTFaxWasmImage {
2727
static #buffer = null;
2828

2929
static #handler = null;
@@ -69,7 +69,7 @@ class JBig2WasmImage {
6969
}
7070
}
7171

72-
static async decode(bytes, width, height, globals) {
72+
static async decode(bytes, width, height, globals, CCITTOptions) {
7373
if (!this.#modulePromise) {
7474
const { promise, resolve } = Promise.withResolvers();
7575
const promises = [promise];
@@ -95,13 +95,28 @@ class JBig2WasmImage {
9595
const size = bytes.length;
9696
ptr = module._malloc(size);
9797
module.writeArrayToMemory(bytes, ptr);
98-
const globalsSize = globals ? globals.length : 0;
99-
if (globalsSize > 0) {
100-
globalsPtr = module._malloc(globalsSize);
101-
module.writeArrayToMemory(globals, globalsPtr);
102-
}
10398

104-
module._jbig2_decode(ptr, size, width, height, globalsPtr, globalsSize);
99+
if (CCITTOptions) {
100+
module._ccitt_decode(
101+
ptr,
102+
size,
103+
width,
104+
height,
105+
CCITTOptions.K,
106+
CCITTOptions.EndOfLine ? 1 : 0,
107+
CCITTOptions.EncodedByteAlign ? 1 : 0,
108+
CCITTOptions.BlackIs1 ? 1 : 0,
109+
CCITTOptions.Columns,
110+
CCITTOptions.Rows
111+
);
112+
} else {
113+
const globalsSize = globals ? globals.length : 0;
114+
if (globalsSize > 0) {
115+
globalsPtr = module._malloc(globalsSize);
116+
module.writeArrayToMemory(globals, globalsPtr);
117+
}
118+
module._jbig2_decode(ptr, size, width, height, globalsPtr, globalsSize);
119+
}
105120
if (!module.imageData) {
106121
throw new JBig2Error("Unknown error");
107122
}
@@ -124,4 +139,4 @@ class JBig2WasmImage {
124139
}
125140
}
126141

127-
export { JBig2Error, JBig2WasmImage };
142+
export { JBig2CCITTFaxWasmImage, JBig2Error };

src/core/jbig2_stream.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import { shadow, warn } from "../shared/util.js";
1717
import { BaseStream } from "./base_stream.js";
1818
import { DecodeStream } from "./decode_stream.js";
1919
import { Dict } from "./primitives.js";
20+
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
2021
import { Jbig2Image } from "./jbig2.js";
21-
import { JBig2WasmImage } from "./jbig2_wasm.js";
2222

2323
/**
2424
* For JBIG2's we use a library to decode these images and
@@ -45,7 +45,7 @@ class Jbig2Stream extends DecodeStream {
4545
}
4646

4747
readBlock() {
48-
this.decodeImage();
48+
this.decodeImageFallback();
4949
}
5050

5151
get isAsyncDecoder() {
@@ -56,7 +56,7 @@ class Jbig2Stream extends DecodeStream {
5656
return true;
5757
}
5858

59-
async decodeImage(bytes, _decoderOptions) {
59+
async decodeImage(bytes, length, _decoderOptions) {
6060
if (this.eof) {
6161
return this.buffer;
6262
}
@@ -69,23 +69,23 @@ class Jbig2Stream extends DecodeStream {
6969
globals = globalsStream.getBytes();
7070
}
7171
}
72-
this.buffer = await JBig2WasmImage.decode(
72+
this.buffer = await JBig2CCITTFaxWasmImage.decode(
7373
bytes,
7474
this.dict.get("Width"),
7575
this.dict.get("Height"),
7676
globals
7777
);
7878
} catch {
7979
warn("Jbig2Stream: Falling back to JS JBIG2 decoder.");
80-
return this.decodeImageFallback(bytes);
80+
return this.decodeImageFallback(bytes, length);
8181
}
8282
this.bufferLength = this.buffer.length;
8383
this.eof = true;
8484

8585
return this.buffer;
8686
}
8787

88-
async decodeImageFallback(bytes) {
88+
decodeImageFallback(bytes, _length) {
8989
if (this.eof) {
9090
return this.buffer;
9191
}

src/core/jpx_stream.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class JpxStream extends DecodeStream {
4949
return true;
5050
}
5151

52-
async decodeImage(bytes, decoderOptions) {
52+
async decodeImage(bytes, _length, decoderOptions) {
5353
if (this.eof) {
5454
return this.buffer;
5555
}

src/core/pdf_manager.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import {
2222
} from "../shared/util.js";
2323
import { ChunkedStreamManager } from "./chunked_stream.js";
2424
import { ImageResizer } from "./image_resizer.js";
25-
import { JBig2WasmImage } from "./jbig2_wasm.js";
25+
import { JBig2CCITTFaxWasmImage } from "./jbig2_ccittFax_wasm.js";
2626
import { JpegStream } from "./jpeg_stream.js";
2727
import { JpxImage } from "./jpx.js";
2828
import { MissingDataException } from "./core_utils.js";
@@ -82,7 +82,7 @@ class BasePdfManager {
8282
JpxImage.setOptions(options);
8383
IccColorSpace.setOptions(options);
8484
CmykICCBasedCS.setOptions(options);
85-
JBig2WasmImage.setOptions(options);
85+
JBig2CCITTFaxWasmImage.setOptions(options);
8686
}
8787

8888
get docId() {

0 commit comments

Comments
 (0)