@@ -29,7 +29,46 @@ function fromByteString(s: string): Uint8Array {
2929 return result ;
3030}
3131
32- const BATCH_SIZE = 2048 ;
32+ function assertDecodedRange (
33+ min : number ,
34+ max : number ,
35+ skip : ( codePoint : number ) => boolean ,
36+ buildChar : ( codePoint : number ) => string ,
37+ decode : ( input : string , target : Uint32Array ) => number ,
38+ outputToString : ( data : Uint32Array , length : number ) => string
39+ ) : void {
40+ if ( max <= min ) {
41+ return ;
42+ }
43+ let input = '' ;
44+ let count = 0 ;
45+ for ( let i = min ; i < max ; ++ i ) {
46+ if ( skip ( i ) ) {
47+ continue ;
48+ }
49+ input += buildChar ( i ) ;
50+ count ++ ;
51+ }
52+ const target = new Uint32Array ( count ) ;
53+ const length = decode ( input , target ) ;
54+ assert . equal ( length , count ) ;
55+ let mismatchIndex = - 1 ;
56+ let index = 0 ;
57+ for ( let i = min ; i < max ; ++ i ) {
58+ if ( skip ( i ) ) {
59+ continue ;
60+ }
61+ if ( target [ index ] !== i ) {
62+ mismatchIndex = index ;
63+ break ;
64+ }
65+ index ++ ;
66+ }
67+ assert . equal ( mismatchIndex , - 1 ) ;
68+ assert . equal ( outputToString ( target , length ) , input ) ;
69+ }
70+
71+ const BATCH_SIZE = 8192 ;
3372
3473const TEST_STRINGS = [
3574 'Лорем ипсум долор сит амет, ех сеа аццусам диссентиет. Ан еос стет еирмод витуперата. Иус дицерет урбанитас ет. Ан при алтера долорес сплендиде, цу яуо интегре денияуе, игнота волуптариа инструцтиор цу вим.' ,
@@ -60,34 +99,31 @@ describe('text encodings', () => {
6099 const max = Math . min ( min + BATCH_SIZE , 65536 ) ;
61100 it ( `${ formatRange ( min , max ) } ` , ( ) => {
62101 const decoder = new StringToUtf32 ( ) ;
63- const target = new Uint32Array ( 5 ) ;
64- for ( let i = min ; i < max ; ++ i ) {
65- // skip surrogate pairs and a BOM
66- if ( ( i >= 0xD800 && i <= 0xDFFF ) || i === 0xFEFF ) {
67- continue ;
68- }
69- const length = decoder . decode ( String . fromCharCode ( i ) , target ) ;
70- assert . equal ( length , 1 ) ;
71- assert . equal ( target [ 0 ] , i ) ;
72- assert . equal ( utf32ToString ( target , 0 , length ) , String . fromCharCode ( i ) ) ;
73- decoder . clear ( ) ;
74- }
102+ assertDecodedRange (
103+ min ,
104+ max ,
105+ ( i ) => ( i >= 0xD800 && i <= 0xDFFF ) || i === 0xFEFF ,
106+ ( i ) => String . fromCharCode ( i ) ,
107+ ( input , target ) => decoder . decode ( input , target ) ,
108+ ( data , length ) => utf32ToString ( data , 0 , length )
109+ ) ;
75110 } ) ;
76111 }
77112 for ( let min = 65536 ; min < 0x10FFFF ; min += BATCH_SIZE ) {
78113 const max = Math . min ( min + BATCH_SIZE , 0x10FFFF ) ;
79114 it ( `${ formatRange ( min , max ) } (surrogates)` , ( ) => {
80115 const decoder = new StringToUtf32 ( ) ;
81- const target = new Uint32Array ( 5 ) ;
82- for ( let i = min ; i < max ; ++ i ) {
83- const codePoint = i - 0x10000 ;
84- const s = String . fromCharCode ( ( codePoint >> 10 ) + 0xD800 ) + String . fromCharCode ( ( codePoint % 0x400 ) + 0xDC00 ) ;
85- const length = decoder . decode ( s , target ) ;
86- assert . equal ( length , 1 ) ;
87- assert . equal ( target [ 0 ] , i ) ;
88- assert . equal ( utf32ToString ( target , 0 , length ) , s ) ;
89- decoder . clear ( ) ;
90- }
116+ assertDecodedRange (
117+ min ,
118+ max ,
119+ ( ) => false ,
120+ ( i ) => {
121+ const codePoint = i - 0x10000 ;
122+ return String . fromCharCode ( ( codePoint >> 10 ) + 0xD800 , ( codePoint % 0x400 ) + 0xDC00 ) ;
123+ } ,
124+ ( input , target ) => decoder . decode ( input , target ) ,
125+ ( data , length ) => utf32ToString ( data , 0 , length )
126+ ) ;
91127 } ) ;
92128 }
93129
@@ -131,33 +167,29 @@ describe('text encodings', () => {
131167 const max = Math . min ( min + BATCH_SIZE , 65536 ) ;
132168 it ( `${ formatRange ( min , max ) } (1/2/3 byte sequences)` , ( ) => {
133169 const decoder = new Utf8ToUtf32 ( ) ;
134- const target = new Uint32Array ( 5 ) ;
135- for ( let i = min ; i < max ; ++ i ) {
136- // skip surrogate pairs and a BOM
137- if ( ( i >= 0xD800 && i <= 0xDFFF ) || i === 0xFEFF ) {
138- continue ;
139- }
140- const utf8Data = fromByteString ( encode ( String . fromCharCode ( i ) ) ) ;
141- const length = decoder . decode ( utf8Data , target ) ;
142- assert . equal ( length , 1 ) ;
143- assert . equal ( toString ( target , length ) , String . fromCharCode ( i ) ) ;
144- decoder . clear ( ) ;
145- }
170+ assertDecodedRange (
171+ min ,
172+ max ,
173+ ( i ) => ( i >= 0xD800 && i <= 0xDFFF ) || i === 0xFEFF ,
174+ ( i ) => String . fromCharCode ( i ) ,
175+ ( input , target ) => decoder . decode ( fromByteString ( encode ( input ) ) , target ) ,
176+ ( data , length ) => toString ( data , length )
177+ ) ;
146178 } ) ;
147179 }
148180 for ( let minRaw = 60000 ; minRaw < 0x10FFFF ; minRaw += BATCH_SIZE ) {
149181 const min = Math . max ( minRaw , 65536 ) ;
150182 const max = Math . min ( minRaw + BATCH_SIZE , 0x10FFFF ) ;
151183 it ( `${ formatRange ( min , max ) } (4 byte sequences)` , function ( ) : void {
152184 const decoder = new Utf8ToUtf32 ( ) ;
153- const target = new Uint32Array ( 5 ) ;
154- for ( let i = min ; i < max ; ++ i ) {
155- const utf8Data = fromByteString ( encode ( stringFromCodePoint ( i ) ) ) ;
156- const length = decoder . decode ( utf8Data , target ) ;
157- assert . equal ( length , 1 ) ;
158- assert . equal ( target [ 0 ] , i ) ;
159- decoder . clear ( ) ;
160- }
185+ assertDecodedRange (
186+ min ,
187+ max ,
188+ ( ) => false ,
189+ ( i ) => stringFromCodePoint ( i ) ,
190+ ( input , target ) => decoder . decode ( fromByteString ( encode ( input ) ) , target ) ,
191+ ( data , length ) => toString ( data , length )
192+ ) ;
161193 } ) ;
162194 }
163195
0 commit comments