Skip to content

Commit 027dc65

Browse files
committed
pull request #683 from XiangRongLin/yt_throttling
[YouTube] Fix buffering by decoding n parameter of stream urls
1 parent 6fd93cd commit 027dc65

6 files changed

Lines changed: 375 additions & 67 deletions

File tree

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
import org.jsoup.nodes.Element;
6+
import org.jsoup.select.Elements;
7+
import org.schabi.newpipe.extractor.NewPipe;
8+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
9+
import org.schabi.newpipe.extractor.localization.Localization;
10+
import org.schabi.newpipe.extractor.utils.Parser;
11+
12+
import javax.annotation.Nonnull;
13+
14+
/**
15+
* YouTube restricts streaming their media in multiple ways by requiring clients to apply a cipher
16+
* function on parameters of requests.
17+
* The cipher function is sent alongside as a JavaScript function.
18+
* <p>
19+
* This class handling fetching the JavaScript file in order to allow other classes to extract the
20+
* needed functions.
21+
*/
22+
public class YoutubeJavaScriptExtractor {
23+
24+
private static final String HTTPS = "https:";
25+
private static String cachedJavaScriptCode;
26+
27+
private YoutubeJavaScriptExtractor() {
28+
}
29+
30+
/**
31+
* Extracts the JavaScript file. The result is cached, so subsequent calls use the result of
32+
* previous calls.
33+
*
34+
* @param videoId Does not influence the result, but a valid video id may help in the chance
35+
* that YouTube tracks it.
36+
* @return The whole JavaScript file as a string.
37+
* @throws ParsingException If the extraction failed.
38+
*/
39+
@Nonnull
40+
public static String extractJavaScriptCode(final String videoId) throws ParsingException {
41+
if (cachedJavaScriptCode == null) {
42+
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(
43+
YoutubeJavaScriptExtractor.extractJavaScriptUrl(videoId));
44+
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
45+
}
46+
47+
return cachedJavaScriptCode;
48+
}
49+
50+
/**
51+
* Same as {@link YoutubeJavaScriptExtractor#extractJavaScriptCode(String)} but with a constant
52+
* value for videoId.
53+
* Possible because the videoId has no influence on the result.
54+
* <p>
55+
* In the off chance that YouTube tracks with which video id the request is made, it may make
56+
* sense to pass in video ids.
57+
*/
58+
@Nonnull
59+
public static String extractJavaScriptCode() throws ParsingException {
60+
return extractJavaScriptCode("d4IGg5dqeO8");
61+
}
62+
63+
private static String extractJavaScriptUrl(final String videoId) throws ParsingException {
64+
try {
65+
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
66+
final String embedPageContent = NewPipe.getDownloader()
67+
.get(embedUrl, Localization.DEFAULT).responseBody();
68+
69+
try {
70+
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
71+
return Parser.matchGroup1(assetsPattern, embedPageContent)
72+
.replace("\\", "").replace("\"", "");
73+
} catch (final Parser.RegexException ex) {
74+
// playerJsUrl is still available in the file, just somewhere else TODO
75+
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
76+
final Document doc = Jsoup.parse(embedPageContent);
77+
final Elements elems = doc.select("script").attr("name", "player_ias/base");
78+
for (final Element elem : elems) {
79+
if (elem.attr("src").contains("base.js")) {
80+
return elem.attr("src");
81+
}
82+
}
83+
}
84+
85+
} catch (final Exception i) {
86+
throw new ParsingException("Embedded info did not provide YouTube player js url");
87+
}
88+
throw new ParsingException("Embedded info did not provide YouTube player js url");
89+
}
90+
91+
@Nonnull
92+
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
93+
if (playerJsUrl.startsWith("//")) {
94+
return HTTPS + playerJsUrl;
95+
} else if (playerJsUrl.startsWith("/")) {
96+
// sometimes https://www.youtube.com part has to be added manually
97+
return HTTPS + "//www.youtube.com" + playerJsUrl;
98+
} else {
99+
return playerJsUrl;
100+
}
101+
}
102+
103+
@Nonnull
104+
private static String downloadJavaScriptCode(final String playerJsUrl)
105+
throws ParsingException {
106+
try {
107+
return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
108+
} catch (final Exception e) {
109+
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
110+
}
111+
}
112+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
4+
import org.schabi.newpipe.extractor.utils.JavaScript;
5+
import org.schabi.newpipe.extractor.utils.Parser;
6+
7+
import javax.annotation.Nonnull;
8+
import java.util.HashMap;
9+
import java.util.Map;
10+
import java.util.regex.Pattern;
11+
12+
/**
13+
* <p>
14+
* YouTube's media is protected with a cipher,
15+
* which modifies the "n" query parameter of it's video playback urls.
16+
* This class handles extracting that "n" query parameter,
17+
* applying the cipher on it and returning the resulting url which is not throttled.
18+
* </p>
19+
*
20+
* <p>
21+
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
22+
* </p>
23+
* becomes
24+
* <p>
25+
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
26+
* </p>
27+
* <br>
28+
* <p>
29+
* Decoding the "n" parameter is time intensive. For this reason, the results are cached.
30+
* The cache can be cleared using {@link #clearCache()}
31+
* </p>
32+
*
33+
*/
34+
public class YoutubeThrottlingDecrypter {
35+
36+
private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
37+
private static final Map<String, String> nParams = new HashMap<>();
38+
39+
private final String functionName;
40+
private final String function;
41+
42+
/**
43+
* <p>
44+
* Use this if you care about the off chance that YouTube tracks with which videoId the cipher
45+
* is requested.
46+
* </p>
47+
* Otherwise use the no-arg constructor which uses a constant value.
48+
*/
49+
public YoutubeThrottlingDecrypter(final String videoId) throws ParsingException {
50+
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(videoId);
51+
52+
functionName = parseDecodeFunctionName(playerJsCode);
53+
function = parseDecodeFunction(playerJsCode, functionName);
54+
}
55+
56+
public YoutubeThrottlingDecrypter() throws ParsingException {
57+
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();
58+
59+
functionName = parseDecodeFunctionName(playerJsCode);
60+
function = parseDecodeFunction(playerJsCode, functionName);
61+
}
62+
63+
private String parseDecodeFunctionName(final String playerJsCode)
64+
throws Parser.RegexException {
65+
Pattern pattern = Pattern.compile(
66+
"b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
67+
return Parser.matchGroup1(pattern, playerJsCode);
68+
}
69+
70+
@Nonnull
71+
private String parseDecodeFunction(final String playerJsCode, final String functionName)
72+
throws Parser.RegexException {
73+
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n",
74+
Pattern.DOTALL);
75+
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
76+
}
77+
78+
public String apply(final String url) throws Parser.RegexException {
79+
if (containsNParam(url)) {
80+
String oldNParam = parseNParam(url);
81+
String newNParam = decryptNParam(oldNParam);
82+
return replaceNParam(url, oldNParam, newNParam);
83+
} else {
84+
return url;
85+
}
86+
}
87+
88+
private boolean containsNParam(final String url) {
89+
return Parser.isMatch(N_PARAM_REGEX, url);
90+
}
91+
92+
private String parseNParam(final String url) throws Parser.RegexException {
93+
Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
94+
return Parser.matchGroup1(nValuePattern, url);
95+
}
96+
97+
private String decryptNParam(final String nParam) {
98+
if (nParams.containsKey(nParam)) {
99+
return nParams.get(nParam);
100+
}
101+
final String decryptedNParam = JavaScript.run(function, functionName, nParam);
102+
nParams.put(nParam, decryptedNParam);
103+
return decryptedNParam;
104+
}
105+
106+
@Nonnull
107+
private String replaceNParam(@Nonnull final String url,
108+
final String oldValue,
109+
final String newValue) {
110+
return url.replace(oldValue, newValue);
111+
}
112+
113+
/**
114+
* @return the number of the cached "n" query parameters.
115+
*/
116+
public static int getCacheSize() {
117+
return nParams.size();
118+
}
119+
120+
/**
121+
* Clears all stored "n" query parameters.
122+
*/
123+
public static void clearCache() {
124+
nParams.clear();
125+
}
126+
}

0 commit comments

Comments
 (0)