Skip to content

Commit 2c0ba3c

Browse files
committed
Speed up TextDecoder.test.ts
- Reduce calls to decode and clear - Consolidate numberic checks - Increase batch size
1 parent e515647 commit 2c0ba3c

1 file changed

Lines changed: 75 additions & 43 deletions

File tree

src/common/input/TextDecoder.test.ts

Lines changed: 75 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,46 @@ function fromByteString(s: string): Uint8Array {
2929
return result;
3030
}
3131

32-
const BATCH_SIZE = 2048;
32+
function assertDecodedRange(
33+
min: number,
34+
max: number,
35+
skip: (codePoint: number) => boolean,
36+
buildChar: (codePoint: number) => string,
37+
decode: (input: string, target: Uint32Array) => number,
38+
outputToString: (data: Uint32Array, length: number) => string
39+
): void {
40+
if (max <= min) {
41+
return;
42+
}
43+
let input = '';
44+
let count = 0;
45+
for (let i = min; i < max; ++i) {
46+
if (skip(i)) {
47+
continue;
48+
}
49+
input += buildChar(i);
50+
count++;
51+
}
52+
const target = new Uint32Array(count);
53+
const length = decode(input, target);
54+
assert.equal(length, count);
55+
let mismatchIndex = -1;
56+
let index = 0;
57+
for (let i = min; i < max; ++i) {
58+
if (skip(i)) {
59+
continue;
60+
}
61+
if (target[index] !== i) {
62+
mismatchIndex = index;
63+
break;
64+
}
65+
index++;
66+
}
67+
assert.equal(mismatchIndex, -1);
68+
assert.equal(outputToString(target, length), input);
69+
}
70+
71+
const BATCH_SIZE = 8192;
3372

3473
const TEST_STRINGS = [
3574
'Лорем ипсум долор сит амет, ех сеа аццусам диссентиет. Ан еос стет еирмод витуперата. Иус дицерет урбанитас ет. Ан при алтера долорес сплендиде, цу яуо интегре денияуе, игнота волуптариа инструцтиор цу вим.',
@@ -60,34 +99,31 @@ describe('text encodings', () => {
6099
const max = Math.min(min + BATCH_SIZE, 65536);
61100
it(`${formatRange(min, max)}`, () => {
62101
const decoder = new StringToUtf32();
63-
const target = new Uint32Array(5);
64-
for (let i = min; i < max; ++i) {
65-
// skip surrogate pairs and a BOM
66-
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
67-
continue;
68-
}
69-
const length = decoder.decode(String.fromCharCode(i), target);
70-
assert.equal(length, 1);
71-
assert.equal(target[0], i);
72-
assert.equal(utf32ToString(target, 0, length), String.fromCharCode(i));
73-
decoder.clear();
74-
}
102+
assertDecodedRange(
103+
min,
104+
max,
105+
(i) => (i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF,
106+
(i) => String.fromCharCode(i),
107+
(input, target) => decoder.decode(input, target),
108+
(data, length) => utf32ToString(data, 0, length)
109+
);
75110
});
76111
}
77112
for (let min = 65536; min < 0x10FFFF; min += BATCH_SIZE) {
78113
const max = Math.min(min + BATCH_SIZE, 0x10FFFF);
79114
it(`${formatRange(min, max)} (surrogates)`, () => {
80115
const decoder = new StringToUtf32();
81-
const target = new Uint32Array(5);
82-
for (let i = min; i < max; ++i) {
83-
const codePoint = i - 0x10000;
84-
const s = String.fromCharCode((codePoint >> 10) + 0xD800) + String.fromCharCode((codePoint % 0x400) + 0xDC00);
85-
const length = decoder.decode(s, target);
86-
assert.equal(length, 1);
87-
assert.equal(target[0], i);
88-
assert.equal(utf32ToString(target, 0, length), s);
89-
decoder.clear();
90-
}
116+
assertDecodedRange(
117+
min,
118+
max,
119+
() => false,
120+
(i) => {
121+
const codePoint = i - 0x10000;
122+
return String.fromCharCode((codePoint >> 10) + 0xD800, (codePoint % 0x400) + 0xDC00);
123+
},
124+
(input, target) => decoder.decode(input, target),
125+
(data, length) => utf32ToString(data, 0, length)
126+
);
91127
});
92128
}
93129

@@ -131,33 +167,29 @@ describe('text encodings', () => {
131167
const max = Math.min(min + BATCH_SIZE, 65536);
132168
it(`${formatRange(min, max)} (1/2/3 byte sequences)`, () => {
133169
const decoder = new Utf8ToUtf32();
134-
const target = new Uint32Array(5);
135-
for (let i = min; i < max; ++i) {
136-
// skip surrogate pairs and a BOM
137-
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
138-
continue;
139-
}
140-
const utf8Data = fromByteString(encode(String.fromCharCode(i)));
141-
const length = decoder.decode(utf8Data, target);
142-
assert.equal(length, 1);
143-
assert.equal(toString(target, length), String.fromCharCode(i));
144-
decoder.clear();
145-
}
170+
assertDecodedRange(
171+
min,
172+
max,
173+
(i) => (i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF,
174+
(i) => String.fromCharCode(i),
175+
(input, target) => decoder.decode(fromByteString(encode(input)), target),
176+
(data, length) => toString(data, length)
177+
);
146178
});
147179
}
148180
for (let minRaw = 60000; minRaw < 0x10FFFF; minRaw += BATCH_SIZE) {
149181
const min = Math.max(minRaw, 65536);
150182
const max = Math.min(minRaw + BATCH_SIZE, 0x10FFFF);
151183
it(`${formatRange(min, max)} (4 byte sequences)`, function (): void {
152184
const decoder = new Utf8ToUtf32();
153-
const target = new Uint32Array(5);
154-
for (let i = min; i < max; ++i) {
155-
const utf8Data = fromByteString(encode(stringFromCodePoint(i)));
156-
const length = decoder.decode(utf8Data, target);
157-
assert.equal(length, 1);
158-
assert.equal(target[0], i);
159-
decoder.clear();
160-
}
185+
assertDecodedRange(
186+
min,
187+
max,
188+
() => false,
189+
(i) => stringFromCodePoint(i),
190+
(input, target) => decoder.decode(fromByteString(encode(input)), target),
191+
(data, length) => toString(data, length)
192+
);
161193
});
162194
}
163195

0 commit comments

Comments
 (0)