Skip to content

Commit 6884d19

Browse files
committed
[YouTube] Add utility class around signatures and fix signature deobfuscation function extraction
The goal of this class is to decouple the extraction of signature timestamp and signature deobfuscation function from YoutubeStreamExtractor. The extraction of the signature deobfuscation function has been also adapted to support the latest YouTube player versions. This new class, YoutubeSignatureUtils, doens't store anything temporary such as a copy of the player code, which has to be passed where required. It is not public, as it will be used by a JavaScript player manager class in the future, in order to handle in a better way fetching, caching and resetting cache of the player code.
1 parent 3be76a6 commit 6884d19

1 file changed

Lines changed: 150 additions & 0 deletions

File tree

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
4+
import org.schabi.newpipe.extractor.utils.JavaScript;
5+
import org.schabi.newpipe.extractor.utils.Parser;
6+
import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
7+
8+
import javax.annotation.Nonnull;
9+
10+
/**
11+
* Utility class to get the signature timestamp of YouTube's base JavaScript player and deobfuscate
12+
* signature of streaming URLs from HTML5 clients.
13+
*/
14+
final class YoutubeSignatureUtils {
15+
16+
/**
17+
* The name of the deobfuscation function which needs to be called inside the deobfuscation
18+
* code.
19+
*/
20+
static final String DEOBFUSCATION_FUNCTION_NAME = "deobfuscate";
21+
22+
private static final String[] FUNCTION_REGEXES = {
23+
"\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)",
24+
"\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)",
25+
// CHECKSTYLE:OFF
26+
"(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)",
27+
// CHECKSTYLE:ON
28+
"([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;"
29+
};
30+
31+
private static final String STS_REGEX = "signatureTimestamp[=:](\\d+)";
32+
33+
private static final String DEOBF_FUNC_REGEX_START = "(";
34+
private static final String DEOBF_FUNC_REGEX_END = "=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})";
35+
36+
private static final String SIG_DEOBF_HELPER_OBJ_NAME_REGEX = ";([A-Za-z0-9_\\$]{2,})\\...\\(";
37+
private static final String SIG_DEOBF_HELPER_OBJ_REGEX_START = "(var ";
38+
private static final String SIG_DEOBF_HELPER_OBJ_REGEX_END = "=\\{(?>.|\\n)+?\\}\\};)";
39+
40+
private YoutubeSignatureUtils() {
41+
}
42+
43+
/**
44+
* Get the signature timestamp property of YouTube's base JavaScript file.
45+
*
46+
* @param javaScriptPlayerCode the complete JavaScript base player code
47+
* @return the signature timestamp
48+
* @throws ParsingException if the signature timestamp couldn't be extracted
49+
*/
50+
@Nonnull
51+
static String getSignatureTimestamp(@Nonnull final String javaScriptPlayerCode)
52+
throws ParsingException {
53+
try {
54+
return Parser.matchGroup1(STS_REGEX, javaScriptPlayerCode);
55+
} catch (final ParsingException e) {
56+
throw new ParsingException(
57+
"Could not extract signature timestamp from JavaScript code", e);
58+
}
59+
}
60+
61+
/**
62+
* Get the signature deobfuscation code of YouTube's base JavaScript file.
63+
*
64+
* @param javaScriptPlayerCode the complete JavaScript base player code
65+
* @return the signature deobfuscation code
66+
* @throws ParsingException if the signature deobfuscation code couldn't be extracted
67+
*/
68+
@Nonnull
69+
static String getDeobfuscationCode(@Nonnull final String javaScriptPlayerCode)
70+
throws ParsingException {
71+
try {
72+
final String deobfuscationFunctionName = getDeobfuscationFunctionName(
73+
javaScriptPlayerCode);
74+
75+
String deobfuscationFunction;
76+
try {
77+
deobfuscationFunction = getDeobfuscateFunctionWithLexer(
78+
javaScriptPlayerCode, deobfuscationFunctionName);
79+
} catch (final Exception e) {
80+
deobfuscationFunction = getDeobfuscateFunctionWithRegex(
81+
javaScriptPlayerCode, deobfuscationFunctionName);
82+
}
83+
84+
// Assert the extracted deobfuscation function is valid
85+
JavaScript.compileOrThrow(deobfuscationFunction);
86+
87+
final String helperObjectName =
88+
Parser.matchGroup1(SIG_DEOBF_HELPER_OBJ_NAME_REGEX, deobfuscationFunction);
89+
90+
final String helperObject = getHelperObject(javaScriptPlayerCode, helperObjectName);
91+
92+
final String callerFunction = "function " + DEOBFUSCATION_FUNCTION_NAME
93+
+ "(a){return "
94+
+ deobfuscationFunctionName
95+
+ "(a);}";
96+
97+
return helperObject + deobfuscationFunction + callerFunction;
98+
} catch (final Exception e) {
99+
throw new ParsingException("Could not parse deobfuscation function", e);
100+
}
101+
}
102+
103+
@Nonnull
104+
private static String getDeobfuscationFunctionName(@Nonnull final String javaScriptPlayerCode)
105+
throws ParsingException {
106+
Parser.RegexException exception = null;
107+
for (final String regex : FUNCTION_REGEXES) {
108+
try {
109+
return Parser.matchGroup1(regex, javaScriptPlayerCode);
110+
} catch (final Parser.RegexException e) {
111+
if (exception == null) {
112+
exception = e;
113+
}
114+
}
115+
}
116+
117+
throw new ParsingException(
118+
"Could not find deobfuscation function with any of the known patterns", exception);
119+
}
120+
121+
@Nonnull
122+
private static String getDeobfuscateFunctionWithLexer(
123+
@Nonnull final String javaScriptPlayerCode,
124+
@Nonnull final String deobfuscationFunctionName) throws ParsingException {
125+
final String functionBase = deobfuscationFunctionName + "=function";
126+
return functionBase + JavaScriptExtractor.matchToClosingBrace(
127+
javaScriptPlayerCode, functionBase) + ";";
128+
}
129+
130+
@Nonnull
131+
private static String getDeobfuscateFunctionWithRegex(
132+
@Nonnull final String javaScriptPlayerCode,
133+
@Nonnull final String deobfuscationFunctionName) throws ParsingException {
134+
final String functionPattern = DEOBF_FUNC_REGEX_START
135+
+ deobfuscationFunctionName.replace("$", "\\$")
136+
+ DEOBF_FUNC_REGEX_END;
137+
return "var " + Parser.matchGroup1(functionPattern, javaScriptPlayerCode) + ";";
138+
}
139+
140+
@Nonnull
141+
private static String getHelperObject(@Nonnull final String javaScriptPlayerCode,
142+
@Nonnull final String helperObjectName)
143+
throws ParsingException {
144+
final String helperPattern = SIG_DEOBF_HELPER_OBJ_REGEX_START
145+
+ helperObjectName.replace("$", "\\$")
146+
+ SIG_DEOBF_HELPER_OBJ_REGEX_END;
147+
return Parser.matchGroup1(helperPattern, javaScriptPlayerCode)
148+
.replace("\n", "");
149+
}
150+
}

0 commit comments

Comments
 (0)