Skip to content

Commit 0c3ac0a

Browse files
committed
Rework TokenStream
1 parent 77ee25e commit 0c3ac0a

2 files changed

Lines changed: 35 additions & 156 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java renamed to extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/EcmaScriptTokenStream.java

Lines changed: 32 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
1+
/*
2+
* Source: Mozilla Rhino, org.mozilla.javascript.TokenStream
3+
*
4+
* This Source Code Form is subject to the terms of the Mozilla Public
5+
* License, v. 2.0. If a copy of the MPL was not distributed with this
6+
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
7+
*
8+
*/
19
package org.schabi.newpipe.extractor.utils.jsextractor;
210

3-
import org.mozilla.javascript.Context;
411
import org.mozilla.javascript.Kit;
512
import org.mozilla.javascript.ScriptRuntime;
613
import org.schabi.newpipe.extractor.exceptions.ParsingException;
714

8-
/* Source: Mozilla Rhino, org.mozilla.javascript.Token
9-
*
10-
* This Source Code Form is subject to the terms of the Mozilla Public
11-
* License, v. 2.0. If a copy of the MPL was not distributed with this
12-
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
13-
* */
14-
class TokenStream {
15+
/**
16+
* Based on Mozilla Rhino's (v1.7.14) org.mozilla.javascript.TokenStream
17+
* <p/>
18+
* Changes:
19+
* <ul>
20+
* <li>Tailored for {@link Lexer}</li>
21+
* <li>Removed all not needed code to improve performance</li>
22+
* <li>Optimized for ECMAScript6/2015</li>
23+
* </ul>
24+
*/
25+
class EcmaScriptTokenStream {
1526
/*
1627
* For chars - because we need something out-of-range
1728
* to check. (And checking EOF by exception is annoying.)
@@ -28,125 +39,17 @@ class TokenStream {
2839
private static final char BYTE_ORDER_MARK = '\uFEFF';
2940
private static final char NUMERIC_SEPARATOR = '_';
3041

31-
TokenStream(final String sourceString, final int lineno, final int languageVersion) {
42+
EcmaScriptTokenStream(final String sourceString, final int lineno, final boolean strictMode) {
3243
this.sourceString = sourceString;
3344
this.sourceCursor = 0;
3445
this.cursor = 0;
3546

3647
this.lineno = lineno;
37-
this.languageVersion = languageVersion;
48+
this.strictMode = strictMode;
3849
}
3950

40-
private static Token stringToKeyword(
41-
final String name,
42-
final int version,
43-
final boolean isStrict) {
44-
if (version < Context.VERSION_ES6) {
45-
return stringToKeywordForJS(name);
46-
}
47-
return stringToKeywordForES(name, isStrict);
48-
}
49-
50-
/** JavaScript 1.8 and earlier */
51-
private static Token stringToKeywordForJS(final String name) {
52-
switch (name) {
53-
case "break":
54-
return Token.BREAK;
55-
case "case":
56-
return Token.CASE;
57-
case "continue":
58-
return Token.CONTINUE;
59-
case "default":
60-
return Token.DEFAULT;
61-
case "delete":
62-
return Token.DELPROP;
63-
case "do":
64-
return Token.DO;
65-
case "else":
66-
return Token.ELSE;
67-
case "export":
68-
return Token.EXPORT;
69-
case "false":
70-
return Token.FALSE;
71-
case "for":
72-
return Token.FOR;
73-
case "function":
74-
return Token.FUNCTION;
75-
case "if":
76-
return Token.IF;
77-
case "in":
78-
return Token.IN;
79-
case "let":
80-
return Token.LET;
81-
case "new":
82-
return Token.NEW;
83-
case "null":
84-
return Token.NULL;
85-
case "return":
86-
return Token.RETURN;
87-
case "switch":
88-
return Token.SWITCH;
89-
case "this":
90-
return Token.THIS;
91-
case "true":
92-
return Token.TRUE;
93-
case "typeof":
94-
return Token.TYPEOF;
95-
case "var":
96-
return Token.VAR;
97-
case "void":
98-
return Token.VOID;
99-
case "while":
100-
return Token.WHILE;
101-
case "with":
102-
return Token.WITH;
103-
case "yield":
104-
return Token.YIELD;
105-
case "throw":
106-
return Token.THROW;
107-
case "catch":
108-
return Token.CATCH;
109-
case "const":
110-
return Token.CONST;
111-
case "debugger":
112-
return Token.DEBUGGER;
113-
case "finally":
114-
return Token.FINALLY;
115-
case "instanceof":
116-
return Token.INSTANCEOF;
117-
case "try":
118-
return Token.TRY;
119-
case "abstract":
120-
case "boolean":
121-
case "byte":
122-
case "char":
123-
case "class":
124-
case "double":
125-
case "enum":
126-
case "extends":
127-
case "final":
128-
case "float":
129-
case "goto":
130-
case "implements":
131-
case "import":
132-
case "int":
133-
case "interface":
134-
case "long":
135-
case "native":
136-
case "package":
137-
case "private":
138-
case "protected":
139-
case "public":
140-
case "short":
141-
case "static":
142-
case "super":
143-
case "synchronized":
144-
case "throws":
145-
case "transient":
146-
case "volatile":
147-
return Token.RESERVED;
148-
}
149-
return Token.EOF;
51+
private Token stringToKeyword(final String name) {
52+
return stringToKeywordForES(name, strictMode);
15053
}
15154

15255
/** ECMAScript 6. */
@@ -346,19 +249,9 @@ final Token getToken() throws ParsingException {
346249
// check if it's a keyword.
347250

348251
// Return the corresponding token if it's a keyword
349-
Token result = stringToKeyword(str, languageVersion, STRICT_MODE);
252+
final Token result = stringToKeyword(str);
350253
if (result != Token.EOF) {
351-
if ((result == Token.LET || result == Token.YIELD)
352-
&& languageVersion < Context.VERSION_1_7) {
353-
result = Token.NAME;
354-
}
355-
// Save the string in case we need to use in
356-
// object literal definitions.
357-
if (result != Token.RESERVED
358-
|| languageVersion >= Context.VERSION_ES6
359-
|| !IS_RESERVED_KEYWORD_AS_IDENTIFIER) {
360-
return result;
361-
}
254+
return result; // Always needed due to ECMAScript
362255
}
363256
}
364257
return Token.NAME;
@@ -368,18 +261,17 @@ final Token getToken() throws ParsingException {
368261
if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
369262
stringBufferTop = 0;
370263
int base = 10;
371-
final boolean es6 = languageVersion >= Context.VERSION_ES6;
372264
boolean isOldOctal = false;
373265

374266
if (c == '0') {
375267
c = getChar();
376268
if (c == 'x' || c == 'X') {
377269
base = 16;
378270
c = getChar();
379-
} else if (es6 && (c == 'o' || c == 'O')) {
271+
} else if (c == 'o' || c == 'O') {
380272
base = 8;
381273
c = getChar();
382-
} else if (es6 && (c == 'b' || c == 'B')) {
274+
} else if (c == 'b' || c == 'B') {
383275
base = 2;
384276
c = getChar();
385277
} else if (isDigit(c)) {
@@ -422,7 +314,7 @@ final Token getToken() throws ParsingException {
422314
throw new ParsingException("number format error");
423315
}
424316

425-
if (es6 && c == 'n') {
317+
if (c == 'n') {
426318
c = getChar();
427319
} else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
428320
if (c == '.') {
@@ -705,7 +597,7 @@ final Token getToken() throws ParsingException {
705597
return Token.GT;
706598

707599
case '*':
708-
if (languageVersion >= Context.VERSION_ES6 && matchChar('*')) {
600+
if (matchChar('*')) {
709601
if (matchChar('=')) {
710602
return Token.ASSIGN_EXP;
711603
}
@@ -1080,18 +972,16 @@ public Token nextToken() throws ParsingException {
1080972

1081973
// sourceCursor is an index into a small buffer that keeps a
1082974
// sliding window of the source stream.
1083-
int sourceCursor;
975+
private int sourceCursor;
1084976

1085977
// cursor is a monotonically increasing index into the original
1086978
// source stream, tracking exactly how far scanning has progressed.
1087979
// Its value is the index of the next character to be scanned.
1088-
int cursor;
980+
private int cursor;
1089981

1090982
// Record start and end positions of last scanned token.
1091983
int tokenBeg;
1092984
int tokenEnd;
1093985

1094-
private final int languageVersion;
1095-
private static final boolean IS_RESERVED_KEYWORD_AS_IDENTIFIER = true;
1096-
private static final boolean STRICT_MODE = false;
986+
private final boolean strictMode;
1097987
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Lexer.java

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.schabi.newpipe.extractor.utils.jsextractor;
22

3-
import org.mozilla.javascript.Context;
43
import org.schabi.newpipe.extractor.exceptions.ParsingException;
54

65
import java.util.Stack;
@@ -119,7 +118,7 @@ public static class ParsedToken {
119118
}
120119
}
121120

122-
private final TokenStream stream;
121+
private final EcmaScriptTokenStream stream;
123122
private final LookBehind lastThree;
124123
private final Stack<Brace> braceStack;
125124
private final Stack<Paren> parenStack;
@@ -128,24 +127,14 @@ public static class ParsedToken {
128127
* Create a new JavaScript lexer with the given source code
129128
*
130129
* @param js JavaScript code
131-
* @param languageVersion JavaScript version (from Rhino)
132130
*/
133-
public Lexer(final String js, final int languageVersion) {
134-
stream = new TokenStream(js, 0, languageVersion);
131+
public Lexer(final String js) {
132+
stream = new EcmaScriptTokenStream(js, 0, false);
135133
lastThree = new LookBehind();
136134
braceStack = new Stack<>();
137135
parenStack = new Stack<>();
138136
}
139137

140-
/**
141-
* Create a new JavaScript lexer with the given source code
142-
*
143-
* @param js JavaScript code
144-
*/
145-
public Lexer(final String js) {
146-
this(js, Context.VERSION_DEFAULT);
147-
}
148-
149138
/**
150139
* Continue parsing and return the next token
151140
* @return next token

0 commit comments

Comments
 (0)