Skip to content

Commit 289db11

Browse files
authored
Merge pull request #1108 from AudricV/yt_refactor-js-usage
[YouTube] Refactor JavaScript usage and fix extraction of obfuscated signature deobfuscation function
2 parents 3be76a6 + 6ed2209 commit 289db11

119 files changed

Lines changed: 1655 additions & 1319 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavaScriptExtractor.java

Lines changed: 40 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,14 @@
1010
import org.schabi.newpipe.extractor.utils.Parser;
1111

1212
import javax.annotation.Nonnull;
13+
import java.net.MalformedURLException;
14+
import java.net.URL;
1315
import java.util.regex.Pattern;
1416

1517
/**
1618
* The extractor of YouTube's base JavaScript player file.
1719
*
1820
* <p>
19-
* YouTube restrict streaming their media in multiple ways by requiring their HTML5 clients to use
20-
* a signature timestamp, and on streaming URLs a signature deobfuscation function for some
21-
* contents and a throttling parameter deobfuscation one for all contents.
22-
* </p>
23-
*
24-
* <p>
2521
* This class handles fetching of this base JavaScript player file in order to allow other classes
2622
* to extract the needed data.
2723
* </p>
@@ -31,7 +27,7 @@
3127
* watch page as a fallback.
3228
* </p>
3329
*/
34-
public final class YoutubeJavaScriptExtractor {
30+
final class YoutubeJavaScriptExtractor {
3531

3632
private static final String HTTPS = "https:";
3733
private static final String BASE_JS_PLAYER_URL_FORMAT =
@@ -40,49 +36,45 @@ public final class YoutubeJavaScriptExtractor {
4036
"player\\\\/([a-z0-9]{8})\\\\/");
4137
private static final Pattern EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN = Pattern.compile(
4238
"\"jsUrl\":\"(/s/player/[A-Za-z0-9]+/player_ias\\.vflset/[A-Za-z_-]+/base\\.js)\"");
43-
private static String cachedJavaScriptCode;
4439

4540
private YoutubeJavaScriptExtractor() {
4641
}
4742

4843
/**
49-
* Extracts the JavaScript file.
44+
* Extracts the JavaScript base player file.
5045
*
51-
* <p>
52-
* The result is cached, so subsequent calls use the result of previous calls.
53-
* </p>
54-
*
55-
* @param videoId a YouTube video ID, which doesn't influence the result, but it may help in
56-
* the chance that YouTube track it
57-
* @return the whole JavaScript file as a string
58-
* @throws ParsingException if the extraction failed
46+
* @param videoId the video ID used to get the JavaScript base player file (an empty one can be
47+
* passed, even it is not recommend in order to spoof better official YouTube
48+
* clients)
49+
* @return the whole JavaScript base player file as a string
50+
* @throws ParsingException if the extraction of the file failed
5951
*/
6052
@Nonnull
61-
public static String extractJavaScriptCode(@Nonnull final String videoId)
53+
static String extractJavaScriptPlayerCode(@Nonnull final String videoId)
6254
throws ParsingException {
63-
if (cachedJavaScriptCode == null) {
64-
String url;
65-
try {
66-
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
67-
} catch (final Exception e) {
68-
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
69-
}
55+
String url;
56+
try {
57+
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
7058
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);
71-
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
72-
}
7359

74-
return cachedJavaScriptCode;
75-
}
60+
// Assert that the URL we extracted and built is valid
61+
new URL(playerJsUrl);
7662

77-
/**
78-
* Reset the cached JavaScript code.
79-
*
80-
* <p>
81-
* It will be fetched again the next time {@link #extractJavaScriptCode(String)} is called.
82-
* </p>
83-
*/
84-
public static void resetJavaScriptCode() {
85-
cachedJavaScriptCode = null;
63+
return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
64+
} catch (final Exception e) {
65+
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
66+
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);
67+
68+
try {
69+
// Assert that the URL we extracted and built is valid
70+
new URL(playerJsUrl);
71+
} catch (final MalformedURLException exception) {
72+
throw new ParsingException(
73+
"The extracted and built JavaScript URL is invalid", exception);
74+
}
75+
76+
return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
77+
}
8678
}
8779

8880
@Nonnull
@@ -134,7 +126,7 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
134126
}
135127
}
136128

137-
// Use regexes to match the URL in a JavaScript embedded script of the HTML page
129+
// Use regexes to match the URL in an embedded script of the HTML page
138130
try {
139131
return Parser.matchGroup1(
140132
EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN, embedPageContent);
@@ -145,29 +137,28 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
145137
}
146138

147139
@Nonnull
148-
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
149-
if (playerJsUrl.startsWith("//")) {
140+
private static String cleanJavaScriptUrl(@Nonnull final String javaScriptPlayerUrl) {
141+
if (javaScriptPlayerUrl.startsWith("//")) {
150142
// https part has to be added manually if the URL is protocol-relative
151-
return HTTPS + playerJsUrl;
152-
} else if (playerJsUrl.startsWith("/")) {
143+
return HTTPS + javaScriptPlayerUrl;
144+
} else if (javaScriptPlayerUrl.startsWith("/")) {
153145
// https://www.youtube.com part has to be added manually if the URL is relative to
154146
// YouTube's domain
155-
return HTTPS + "//www.youtube.com" + playerJsUrl;
147+
return HTTPS + "//www.youtube.com" + javaScriptPlayerUrl;
156148
} else {
157-
return playerJsUrl;
149+
return javaScriptPlayerUrl;
158150
}
159151
}
160152

161153
@Nonnull
162-
private static String downloadJavaScriptCode(@Nonnull final String playerJsUrl)
154+
private static String downloadJavaScriptCode(@Nonnull final String javaScriptPlayerUrl)
163155
throws ParsingException {
164156
try {
165157
return NewPipe.getDownloader()
166-
.get(playerJsUrl, Localization.DEFAULT)
158+
.get(javaScriptPlayerUrl, Localization.DEFAULT)
167159
.responseBody();
168160
} catch (final Exception e) {
169-
throw new ParsingException(
170-
"Could not get JavaScript base player's code from URL: " + playerJsUrl, e);
161+
throw new ParsingException("Could not get JavaScript base player's code", e);
171162
}
172163
}
173164
}

0 commit comments

Comments
 (0)