1+ /*
2+ * Source: Mozilla Rhino, org.mozilla.javascript.TokenStream
3+ *
4+ * This Source Code Form is subject to the terms of the Mozilla Public
5+ * License, v. 2.0. If a copy of the MPL was not distributed with this
6+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7+ *
8+ */
19package org .schabi .newpipe .extractor .utils .jsextractor ;
210
3- import org .mozilla .javascript .Context ;
411import org .mozilla .javascript .Kit ;
512import org .mozilla .javascript .ScriptRuntime ;
613import org .schabi .newpipe .extractor .exceptions .ParsingException ;
714
8- /* Source: Mozilla Rhino, org.mozilla.javascript.Token
9- *
10- * This Source Code Form is subject to the terms of the Mozilla Public
11- * License, v. 2.0. If a copy of the MPL was not distributed with this
12- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
13- * */
14- class TokenStream {
15+ /**
16+ * Based on Mozilla Rhino's (v1.7.14) org.mozilla.javascript.TokenStream
17+ * <p/>
18+ * Changes:
19+ * <ul>
20+ * <li>Tailored for {@link Lexer}</li>
21+ * <li>Removed all not needed code to improve performance</li>
22+ * <li>Optimized for ECMAScript6/2015</li>
23+ * </ul>
24+ */
25+ class EcmaScriptTokenStream {
1526 /*
1627 * For chars - because we need something out-of-range
1728 * to check. (And checking EOF by exception is annoying.)
@@ -28,125 +39,17 @@ class TokenStream {
2839 private static final char BYTE_ORDER_MARK = '\uFEFF' ;
2940 private static final char NUMERIC_SEPARATOR = '_' ;
3041
31- TokenStream (final String sourceString , final int lineno , final int languageVersion ) {
42+ EcmaScriptTokenStream (final String sourceString , final int lineno , final boolean strictMode ) {
3243 this .sourceString = sourceString ;
3344 this .sourceCursor = 0 ;
3445 this .cursor = 0 ;
3546
3647 this .lineno = lineno ;
37- this .languageVersion = languageVersion ;
48+ this .strictMode = strictMode ;
3849 }
3950
40- private static Token stringToKeyword (
41- final String name ,
42- final int version ,
43- final boolean isStrict ) {
44- if (version < Context .VERSION_ES6 ) {
45- return stringToKeywordForJS (name );
46- }
47- return stringToKeywordForES (name , isStrict );
48- }
49-
50- /** JavaScript 1.8 and earlier */
51- private static Token stringToKeywordForJS (final String name ) {
52- switch (name ) {
53- case "break" :
54- return Token .BREAK ;
55- case "case" :
56- return Token .CASE ;
57- case "continue" :
58- return Token .CONTINUE ;
59- case "default" :
60- return Token .DEFAULT ;
61- case "delete" :
62- return Token .DELPROP ;
63- case "do" :
64- return Token .DO ;
65- case "else" :
66- return Token .ELSE ;
67- case "export" :
68- return Token .EXPORT ;
69- case "false" :
70- return Token .FALSE ;
71- case "for" :
72- return Token .FOR ;
73- case "function" :
74- return Token .FUNCTION ;
75- case "if" :
76- return Token .IF ;
77- case "in" :
78- return Token .IN ;
79- case "let" :
80- return Token .LET ;
81- case "new" :
82- return Token .NEW ;
83- case "null" :
84- return Token .NULL ;
85- case "return" :
86- return Token .RETURN ;
87- case "switch" :
88- return Token .SWITCH ;
89- case "this" :
90- return Token .THIS ;
91- case "true" :
92- return Token .TRUE ;
93- case "typeof" :
94- return Token .TYPEOF ;
95- case "var" :
96- return Token .VAR ;
97- case "void" :
98- return Token .VOID ;
99- case "while" :
100- return Token .WHILE ;
101- case "with" :
102- return Token .WITH ;
103- case "yield" :
104- return Token .YIELD ;
105- case "throw" :
106- return Token .THROW ;
107- case "catch" :
108- return Token .CATCH ;
109- case "const" :
110- return Token .CONST ;
111- case "debugger" :
112- return Token .DEBUGGER ;
113- case "finally" :
114- return Token .FINALLY ;
115- case "instanceof" :
116- return Token .INSTANCEOF ;
117- case "try" :
118- return Token .TRY ;
119- case "abstract" :
120- case "boolean" :
121- case "byte" :
122- case "char" :
123- case "class" :
124- case "double" :
125- case "enum" :
126- case "extends" :
127- case "final" :
128- case "float" :
129- case "goto" :
130- case "implements" :
131- case "import" :
132- case "int" :
133- case "interface" :
134- case "long" :
135- case "native" :
136- case "package" :
137- case "private" :
138- case "protected" :
139- case "public" :
140- case "short" :
141- case "static" :
142- case "super" :
143- case "synchronized" :
144- case "throws" :
145- case "transient" :
146- case "volatile" :
147- return Token .RESERVED ;
148- }
149- return Token .EOF ;
51+ private Token stringToKeyword (final String name ) {
52+ return stringToKeywordForES (name , strictMode );
15053 }
15154
15255 /** ECMAScript 6. */
@@ -346,19 +249,9 @@ final Token getToken() throws ParsingException {
346249 // check if it's a keyword.
347250
348251 // Return the corresponding token if it's a keyword
349- Token result = stringToKeyword (str , languageVersion , STRICT_MODE );
252+ final Token result = stringToKeyword (str );
350253 if (result != Token .EOF ) {
351- if ((result == Token .LET || result == Token .YIELD )
352- && languageVersion < Context .VERSION_1_7 ) {
353- result = Token .NAME ;
354- }
355- // Save the string in case we need to use in
356- // object literal definitions.
357- if (result != Token .RESERVED
358- || languageVersion >= Context .VERSION_ES6
359- || !IS_RESERVED_KEYWORD_AS_IDENTIFIER ) {
360- return result ;
361- }
254+ return result ; // Always needed due to ECMAScript
362255 }
363256 }
364257 return Token .NAME ;
@@ -368,18 +261,17 @@ final Token getToken() throws ParsingException {
368261 if (isDigit (c ) || (c == '.' && isDigit (peekChar ()))) {
369262 stringBufferTop = 0 ;
370263 int base = 10 ;
371- final boolean es6 = languageVersion >= Context .VERSION_ES6 ;
372264 boolean isOldOctal = false ;
373265
374266 if (c == '0' ) {
375267 c = getChar ();
376268 if (c == 'x' || c == 'X' ) {
377269 base = 16 ;
378270 c = getChar ();
379- } else if (es6 && ( c == 'o' || c == 'O' ) ) {
271+ } else if (c == 'o' || c == 'O' ) {
380272 base = 8 ;
381273 c = getChar ();
382- } else if (es6 && ( c == 'b' || c == 'B' ) ) {
274+ } else if (c == 'b' || c == 'B' ) {
383275 base = 2 ;
384276 c = getChar ();
385277 } else if (isDigit (c )) {
@@ -422,7 +314,7 @@ final Token getToken() throws ParsingException {
422314 throw new ParsingException ("number format error" );
423315 }
424316
425- if (es6 && c == 'n' ) {
317+ if (c == 'n' ) {
426318 c = getChar ();
427319 } else if (base == 10 && (c == '.' || c == 'e' || c == 'E' )) {
428320 if (c == '.' ) {
@@ -705,7 +597,7 @@ final Token getToken() throws ParsingException {
705597 return Token .GT ;
706598
707599 case '*' :
708- if (languageVersion >= Context . VERSION_ES6 && matchChar ('*' )) {
600+ if (matchChar ('*' )) {
709601 if (matchChar ('=' )) {
710602 return Token .ASSIGN_EXP ;
711603 }
@@ -1080,18 +972,16 @@ public Token nextToken() throws ParsingException {
1080972
1081973 // sourceCursor is an index into a small buffer that keeps a
1082974 // sliding window of the source stream.
1083- int sourceCursor ;
975+ private int sourceCursor ;
1084976
1085977 // cursor is a monotonically increasing index into the original
1086978 // source stream, tracking exactly how far scanning has progressed.
1087979 // Its value is the index of the next character to be scanned.
1088- int cursor ;
980+ private int cursor ;
1089981
1090982 // Record start and end positions of last scanned token.
1091983 int tokenBeg ;
1092984 int tokenEnd ;
1093985
1094- private final int languageVersion ;
1095- private static final boolean IS_RESERVED_KEYWORD_AS_IDENTIFIER = true ;
1096- private static final boolean STRICT_MODE = false ;
986+ private final boolean strictMode ;
1097987}
0 commit comments