Skip to content

Commit 69ff271

Browse files
committed
[YouTube] Fix extraction of n param deobfuscation function name
This commit adds two new regular expressions to parse the n parameter function. It also improves existing regular expressions by using the constant representing multiple characters instead of adding the one or multiple characters token manually in each regex for everything and not only function names.
1 parent eb30316 commit 69ff271

1 file changed

Lines changed: 41 additions & 18 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,57 +22,80 @@ final class YoutubeThrottlingParameterUtils {
2222

2323
private static final String SINGLE_CHAR_VARIABLE_REGEX = "[a-zA-Z0-9$_]";
2424

25-
private static final String FUNCTION_NAME_REGEX = SINGLE_CHAR_VARIABLE_REGEX + "+";
25+
private static final String MULTIPLE_CHARS_REGEX = SINGLE_CHAR_VARIABLE_REGEX + "+";
2626

2727
private static final String ARRAY_ACCESS_REGEX = "\\[(\\d+)]";
2828

2929
// CHECKSTYLE:OFF
3030
private static final Pattern[] DEOBFUSCATION_FUNCTION_NAME_REGEXES = {
3131

3232
/*
33-
* The first regex matches the following text, where we want rDa and the array index
33+
* The first regex matches the following text, where we want Wma and the array index
3434
* accessed:
3535
*
36+
* a.D&&(b="nn"[+a.D],WL(a),c=a.j[b]||null)&&(c=SDa[0](c),a.set(b,c),SDa.length||Wma("")
37+
*/
38+
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "=\"nn\"\\[\\+" + MULTIPLE_CHARS_REGEX
39+
+ "\\." + MULTIPLE_CHARS_REGEX + "]," + MULTIPLE_CHARS_REGEX + "\\("
40+
+ MULTIPLE_CHARS_REGEX + "\\)," + MULTIPLE_CHARS_REGEX + "="
41+
+ MULTIPLE_CHARS_REGEX + "\\." + MULTIPLE_CHARS_REGEX + "\\["
42+
+ MULTIPLE_CHARS_REGEX + "]\\|\\|null\\).+\\|\\|(" + MULTIPLE_CHARS_REGEX
43+
+ ")\\(\"\"\\)"),
44+
45+
/*
46+
* The second regex matches the following text, where we want SDa and the array index
47+
* accessed:
48+
*
49+
* a.D&&(b="nn"[+a.D],WL(a),c=a.j[b]||null)&&(c=SDa[0](c),a.set(b,c),SDa.length||Wma("")
50+
*/
51+
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "=\"nn\"\\[\\+" + MULTIPLE_CHARS_REGEX
52+
+ "\\." + MULTIPLE_CHARS_REGEX + "]," + MULTIPLE_CHARS_REGEX + "\\("
53+
+ MULTIPLE_CHARS_REGEX + "\\)," + MULTIPLE_CHARS_REGEX + "="
54+
+ MULTIPLE_CHARS_REGEX + "\\." + MULTIPLE_CHARS_REGEX + "\\["
55+
+ MULTIPLE_CHARS_REGEX + "]\\|\\|null\\)&&\\(" + MULTIPLE_CHARS_REGEX + "=("
56+
+ MULTIPLE_CHARS_REGEX + ")" + ARRAY_ACCESS_REGEX),
57+
58+
/*
59+
* The third regex matches the following text, where we want rma:
60+
*
3661
* a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=rDa[0](c),a.set(b,c),rDa.length||rma("")
3762
*/
38-
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "+=\"nn\"\\[\\+"
39-
+ SINGLE_CHAR_VARIABLE_REGEX + "+\\." + SINGLE_CHAR_VARIABLE_REGEX + "+],"
40-
+ SINGLE_CHAR_VARIABLE_REGEX + "+=" + SINGLE_CHAR_VARIABLE_REGEX
41-
+ "+\\.get\\(" + SINGLE_CHAR_VARIABLE_REGEX + "+\\)\\)&&\\("
42-
+ SINGLE_CHAR_VARIABLE_REGEX + "+=(" + SINGLE_CHAR_VARIABLE_REGEX
43-
+ "+)\\[(\\d+)]"),
63+
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "=\"nn\"\\[\\+" + MULTIPLE_CHARS_REGEX
64+
+ "\\." + MULTIPLE_CHARS_REGEX + "]," + MULTIPLE_CHARS_REGEX + "="
65+
+ MULTIPLE_CHARS_REGEX + "\\.get\\(" + MULTIPLE_CHARS_REGEX + "\\)\\).+\\|\\|("
66+
+ MULTIPLE_CHARS_REGEX + ")\\(\"\"\\)"),
4467

4568
/*
46-
* The second regex matches the following text, where we want rma:
69+
* The fourth regex matches the following text, where we want rDa and the array index
70+
* accessed:
4771
*
4872
* a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=rDa[0](c),a.set(b,c),rDa.length||rma("")
4973
*/
50-
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "+=\"nn\"\\[\\+"
51-
+ SINGLE_CHAR_VARIABLE_REGEX + "+\\." + SINGLE_CHAR_VARIABLE_REGEX + "+],"
52-
+ SINGLE_CHAR_VARIABLE_REGEX + "+=" + SINGLE_CHAR_VARIABLE_REGEX + "+\\.get\\("
53-
+ SINGLE_CHAR_VARIABLE_REGEX + "+\\)\\).+\\|\\|(" + SINGLE_CHAR_VARIABLE_REGEX
54-
+ "+)\\(\"\"\\)"),
74+
Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "=\"nn\"\\[\\+" + MULTIPLE_CHARS_REGEX
75+
+ "\\." + MULTIPLE_CHARS_REGEX + "]," + MULTIPLE_CHARS_REGEX + "="
76+
+ MULTIPLE_CHARS_REGEX + "\\.get\\(" + MULTIPLE_CHARS_REGEX + "\\)\\)&&\\("
77+
+ MULTIPLE_CHARS_REGEX + "=(" + MULTIPLE_CHARS_REGEX + ")\\[(\\d+)]"),
5578

5679
/*
57-
* The third regex matches the following text, where we want BDa and the array index
80+
* The fifth regex matches the following text, where we want BDa and the array index
5881
* accessed:
5982
*
6083
* (b=String.fromCharCode(110),c=a.get(b))&&(c=BDa[0](c)
6184
*/
6285
Pattern.compile("\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=String\\.fromCharCode\\(110\\),"
6386
+ SINGLE_CHAR_VARIABLE_REGEX + "=" + SINGLE_CHAR_VARIABLE_REGEX + "\\.get\\("
6487
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)\\)" + "&&\\(" + SINGLE_CHAR_VARIABLE_REGEX
65-
+ "=(" + FUNCTION_NAME_REGEX + ")" + "(?:" + ARRAY_ACCESS_REGEX + ")?\\("
88+
+ "=(" + MULTIPLE_CHARS_REGEX + ")" + "(?:" + ARRAY_ACCESS_REGEX + ")?\\("
6689
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)"),
6790

6891
/*
69-
* The fourth regex matches the following text, where we want Yva and the array index
92+
* The sixth regex matches the following text, where we want Yva and the array index
7093
* accessed:
7194
*
7295
* .get("n"))&&(b=Yva[0](b)
7396
*/
7497
Pattern.compile("\\.get\\(\"n\"\\)\\)&&\\(" + SINGLE_CHAR_VARIABLE_REGEX
75-
+ "=(" + FUNCTION_NAME_REGEX + ")(?:" + ARRAY_ACCESS_REGEX + ")?\\("
98+
+ "=(" + MULTIPLE_CHARS_REGEX + ")(?:" + ARRAY_ACCESS_REGEX + ")?\\("
7699
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)")
77100
};
78101
// CHECKSTYLE:ON

0 commit comments

Comments
 (0)