Skip to content

Commit 7de3753

Browse files
committed
[YouTube] Refactor JavaScript player management API
This commit is introducing breaking changes. For clients, everything is managed in a new class called YoutubeJavaScriptPlayerManager: - caching JavaScript base player code and its extracted code (functions and variables); - getting player signature timestamp; - getting deobfuscated signatures of streaming URLs; - getting streaming URLs with a throttling parameter deobfuscated, if applicable. The class delegates the extraction parts to external package-private classes: - YoutubeJavaScriptExtractor, to extract and download YouTube's JavaScript base player code: it always already present before and has been edited to mainly remove the previous caching system and made it package-private; - YoutubeSignatureUtils, for player signature timestamp and signature deobfuscation function of streaming URLs, added in a recent commit; - YoutubeThrottlingParameterUtils, which was originally YoutubeThrottlingDecrypter, for throttling parameter of streaming URLs deobfuscation function and checking whether this parameter is in a streaming URL. YoutubeJavaScriptPlayerManager caches and then runs the extracted code if it has been executed successfully. The cache system of throttling parameters deobfuscated values has been kept, its size can be get using the getThrottlingParametersCacheSize method and can be cleared independently using the clearThrottlingParametersCache method. If an exception occurs during the extraction or the parsing of a function property which is not related to JavaScript base player code fetching, it is stored until caches are cleared, making subsequent failing extraction calls of the requested function or property faster and consuming less resources, as the result should be the same until the base player code changes. All caches can be reset using the clearAllCaches method of YoutubeJavaScriptPlayerManager. Classes using JavaScript base player code and utilities directly (in the code and its tests) have been also updated in this commit.
1 parent 6884d19 commit 7de3753

9 files changed

Lines changed: 600 additions & 478 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavaScriptExtractor.java

Lines changed: 40 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,14 @@
1010
import org.schabi.newpipe.extractor.utils.Parser;
1111

1212
import javax.annotation.Nonnull;
13+
import java.net.MalformedURLException;
14+
import java.net.URL;
1315
import java.util.regex.Pattern;
1416

1517
/**
1618
* The extractor of YouTube's base JavaScript player file.
1719
*
1820
* <p>
19-
* YouTube restrict streaming their media in multiple ways by requiring their HTML5 clients to use
20-
* a signature timestamp, and on streaming URLs a signature deobfuscation function for some
21-
* contents and a throttling parameter deobfuscation one for all contents.
22-
* </p>
23-
*
24-
* <p>
2521
* This class handles fetching of this base JavaScript player file in order to allow other classes
2622
* to extract the needed data.
2723
* </p>
@@ -31,7 +27,7 @@
3127
* watch page as a fallback.
3228
* </p>
3329
*/
34-
public final class YoutubeJavaScriptExtractor {
30+
final class YoutubeJavaScriptExtractor {
3531

3632
private static final String HTTPS = "https:";
3733
private static final String BASE_JS_PLAYER_URL_FORMAT =
@@ -40,49 +36,45 @@ public final class YoutubeJavaScriptExtractor {
4036
"player\\\\/([a-z0-9]{8})\\\\/");
4137
private static final Pattern EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN = Pattern.compile(
4238
"\"jsUrl\":\"(/s/player/[A-Za-z0-9]+/player_ias\\.vflset/[A-Za-z_-]+/base\\.js)\"");
43-
private static String cachedJavaScriptCode;
4439

4540
private YoutubeJavaScriptExtractor() {
4641
}
4742

4843
/**
49-
* Extracts the JavaScript file.
44+
* Extracts the JavaScript base player file.
5045
*
51-
* <p>
52-
* The result is cached, so subsequent calls use the result of previous calls.
53-
* </p>
54-
*
55-
* @param videoId a YouTube video ID, which doesn't influence the result, but it may help in
56-
* the chance that YouTube track it
57-
* @return the whole JavaScript file as a string
58-
* @throws ParsingException if the extraction failed
46+
* @param videoId the video ID used to get the JavaScript base player file (an empty one can be
47+
* passed, even it is not recommend in order to spoof better official YouTube
48+
* clients)
49+
* @return the whole JavaScript base player file as a string
50+
* @throws ParsingException if the extraction of the file failed
5951
*/
6052
@Nonnull
61-
public static String extractJavaScriptCode(@Nonnull final String videoId)
53+
static String extractJavaScriptPlayerCode(@Nonnull final String videoId)
6254
throws ParsingException {
63-
if (cachedJavaScriptCode == null) {
64-
String url;
65-
try {
66-
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
67-
} catch (final Exception e) {
68-
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
69-
}
55+
String url;
56+
try {
57+
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
7058
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);
71-
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
72-
}
7359

74-
return cachedJavaScriptCode;
75-
}
60+
// Assert that the URL we extracted and built is valid
61+
new URL(playerJsUrl);
7662

77-
/**
78-
* Reset the cached JavaScript code.
79-
*
80-
* <p>
81-
* It will be fetched again the next time {@link #extractJavaScriptCode(String)} is called.
82-
* </p>
83-
*/
84-
public static void resetJavaScriptCode() {
85-
cachedJavaScriptCode = null;
63+
return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
64+
} catch (final Exception e) {
65+
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
66+
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);
67+
68+
try {
69+
// Assert that the URL we extracted and built is valid
70+
new URL(playerJsUrl);
71+
} catch (final MalformedURLException exception) {
72+
throw new ParsingException(
73+
"The extracted and built JavaScript URL is invalid", exception);
74+
}
75+
76+
return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
77+
}
8678
}
8779

8880
@Nonnull
@@ -134,7 +126,7 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
134126
}
135127
}
136128

137-
// Use regexes to match the URL in a JavaScript embedded script of the HTML page
129+
// Use regexes to match the URL in an embedded script of the HTML page
138130
try {
139131
return Parser.matchGroup1(
140132
EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN, embedPageContent);
@@ -145,29 +137,28 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
145137
}
146138

147139
@Nonnull
148-
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
149-
if (playerJsUrl.startsWith("//")) {
140+
private static String cleanJavaScriptUrl(@Nonnull final String javaScriptPlayerUrl) {
141+
if (javaScriptPlayerUrl.startsWith("//")) {
150142
// https part has to be added manually if the URL is protocol-relative
151-
return HTTPS + playerJsUrl;
152-
} else if (playerJsUrl.startsWith("/")) {
143+
return HTTPS + javaScriptPlayerUrl;
144+
} else if (javaScriptPlayerUrl.startsWith("/")) {
153145
// https://www.youtube.com part has to be added manually if the URL is relative to
154146
// YouTube's domain
155-
return HTTPS + "//www.youtube.com" + playerJsUrl;
147+
return HTTPS + "//www.youtube.com" + javaScriptPlayerUrl;
156148
} else {
157-
return playerJsUrl;
149+
return javaScriptPlayerUrl;
158150
}
159151
}
160152

161153
@Nonnull
162-
private static String downloadJavaScriptCode(@Nonnull final String playerJsUrl)
154+
private static String downloadJavaScriptCode(@Nonnull final String javaScriptPlayerUrl)
163155
throws ParsingException {
164156
try {
165157
return NewPipe.getDownloader()
166-
.get(playerJsUrl, Localization.DEFAULT)
158+
.get(javaScriptPlayerUrl, Localization.DEFAULT)
167159
.responseBody();
168160
} catch (final Exception e) {
169-
throw new ParsingException(
170-
"Could not get JavaScript base player's code from URL: " + playerJsUrl, e);
161+
throw new ParsingException("Could not get JavaScript base player's code", e);
171162
}
172163
}
173164
}

0 commit comments

Comments
 (0)