Skip to content

Commit a02ee2e

Browse files
committed
Rewrite youtube throttling solution and add tests
1 parent 6956b72 commit a02ee2e

6 files changed

Lines changed: 287 additions & 148 deletions

File tree

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
import org.jsoup.nodes.Element;
6+
import org.jsoup.select.Elements;
7+
import org.schabi.newpipe.extractor.NewPipe;
8+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
9+
import org.schabi.newpipe.extractor.localization.Localization;
10+
import org.schabi.newpipe.extractor.utils.Parser;
11+
12+
import javax.annotation.Nonnull;
13+
14+
/**
15+
* Youtube restricts streaming their media in multiple ways by requiring clients to apply a cipher function
16+
* on parameters of requests.
17+
* The cipher function is sent alongside as a JavaScript function.
18+
* <p>
19+
* This class handling fetching the JavaScript file in order to allow other classes to extract the needed functions.
20+
*/
21+
public class YoutubeJavascriptExtractor {
22+
23+
private static final String HTTPS = "https:";
24+
private static String cachedJavascriptCode;
25+
26+
/**
27+
* Extracts the JavaScript file. The result is cached, so subsequent calls use the result of previous calls.
28+
*
29+
* @param videoId Does not influence the result, but a valid video id can prevent tracking
30+
* @return The whole javascript file as a string.
31+
* @throws ParsingException If the extraction failed.
32+
*/
33+
@Nonnull
34+
public static String extractJavascriptCode(String videoId) throws ParsingException {
35+
if (cachedJavascriptCode == null) {
36+
final YoutubeJavascriptExtractor extractor = new YoutubeJavascriptExtractor();
37+
String playerJsUrl = extractor.cleanJavascriptUrl(extractor.extractJavascriptUrl(videoId));
38+
cachedJavascriptCode = extractor.downloadJavascriptCode(playerJsUrl);
39+
}
40+
41+
return cachedJavascriptCode;
42+
}
43+
44+
/**
45+
* Same as {@link YoutubeJavascriptExtractor#extractJavascriptCode(String)} but with a constant value for videoId.
46+
* Possible because the videoId has no influence on the result.
47+
*
48+
* For tracking avoidance purposes it may make sense to pass in valid video ids.
49+
*/
50+
@Nonnull
51+
public static String extractJavascriptCode() throws ParsingException {
52+
return extractJavascriptCode("d4IGg5dqeO8");
53+
}
54+
55+
private String extractJavascriptUrl(String videoId) throws ParsingException {
56+
try {
57+
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
58+
final String embedPageContent = NewPipe.getDownloader()
59+
.get(embedUrl, Localization.DEFAULT).responseBody();
60+
61+
try {
62+
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
63+
return Parser.matchGroup1(assetsPattern, embedPageContent)
64+
.replace("\\", "").replace("\"", "");
65+
} catch (final Parser.RegexException ex) {
66+
// playerJsUrl is still available in the file, just somewhere else TODO
67+
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
68+
final Document doc = Jsoup.parse(embedPageContent);
69+
final Elements elems = doc.select("script").attr("name", "player_ias/base");
70+
for (final Element elem : elems) {
71+
if (elem.attr("src").contains("base.js")) {
72+
return elem.attr("src");
73+
}
74+
}
75+
}
76+
77+
} catch (final Exception i) {
78+
throw new ParsingException("Embedded info did not provide YouTube player js url");
79+
}
80+
throw new ParsingException("Embedded info did not provide YouTube player js url");
81+
}
82+
83+
private String cleanJavascriptUrl(String playerJsUrl) {
84+
if (playerJsUrl.startsWith("//")) {
85+
return HTTPS + playerJsUrl;
86+
} else if (playerJsUrl.startsWith("/")) {
87+
// sometimes https://www.youtube.com part has to be added manually
88+
return HTTPS + "//www.youtube.com" + playerJsUrl;
89+
} else {
90+
return playerJsUrl;
91+
}
92+
}
93+
94+
private String downloadJavascriptCode(String playerJsUrl) throws ParsingException {
95+
try {
96+
return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
97+
} catch (Exception e) {
98+
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
99+
}
100+
}
101+
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java

Lines changed: 0 additions & 104 deletions
This file was deleted.
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
4+
import org.schabi.newpipe.extractor.utils.Javascript;
5+
import org.schabi.newpipe.extractor.utils.Parser;
6+
7+
import java.util.regex.Pattern;
8+
9+
/**
10+
* <p>
11+
* YouTube's media is protected with a cipher, which modifies the "n" query parameter of it's video playback urls.
12+
* This class handles extracting that "n" query parameter, applying the cipher on it and returning the resulting url
13+
* which is not throttled.
14+
* </p>
15+
*
16+
* <p>
17+
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
18+
* </p>
19+
* becomes
20+
* <p>
21+
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
22+
* </p>
23+
*/
24+
public class YoutubeThrottlingDecrypter {
25+
26+
private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
27+
28+
private final String functionName;
29+
private final String function;
30+
31+
/**
32+
* <p>
33+
* Use this if you care about the off chance that YouTube tracks with which videoId the cipher is requested.
34+
* </p>
35+
* Otherwise use the no-arg constructor which uses a constant value.
36+
*/
37+
public YoutubeThrottlingDecrypter(String videoId) throws ParsingException {
38+
final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode(videoId);
39+
40+
functionName = parseDecodeFunctionName(playerJsCode);
41+
function = parseDecodeFunction(playerJsCode, functionName);
42+
}
43+
44+
public YoutubeThrottlingDecrypter() throws ParsingException {
45+
final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode();
46+
47+
functionName = parseDecodeFunctionName(playerJsCode);
48+
function = parseDecodeFunction(playerJsCode, functionName);
49+
}
50+
51+
private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
52+
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
53+
return Parser.matchGroup1(pattern, playerJsCode);
54+
}
55+
56+
private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
57+
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
58+
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
59+
}
60+
61+
public String apply(String url) throws Parser.RegexException {
62+
if (containsNParam(url)) {
63+
String oldNParam = parseNParam(url);
64+
String newNParam = decryptNParam(oldNParam);
65+
return replaceNParam(url, oldNParam, newNParam);
66+
} else {
67+
return url;
68+
}
69+
}
70+
71+
private boolean containsNParam(String url) {
72+
return Parser.isMatch(N_PARAM_REGEX, url);
73+
}
74+
75+
private String parseNParam(String url) throws Parser.RegexException {
76+
Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
77+
return Parser.matchGroup1(nValuePattern, url);
78+
}
79+
80+
private String decryptNParam(String nParam) {
81+
Javascript javascript = new Javascript();
82+
return javascript.run(function, functionName, nParam);
83+
}
84+
85+
private String replaceNParam(String url, String oldValue, String newValue) {
86+
return url.replace(oldValue, newValue);
87+
}
88+
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 11 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import com.grack.nanojson.JsonObject;
55
import com.grack.nanojson.JsonParser;
66
import com.grack.nanojson.JsonParserException;
7-
87
import org.jsoup.Jsoup;
98
import org.jsoup.nodes.Document;
109
import org.jsoup.nodes.Element;
@@ -18,60 +17,31 @@
1817
import org.schabi.newpipe.extractor.StreamingService;
1918
import org.schabi.newpipe.extractor.downloader.Downloader;
2019
import org.schabi.newpipe.extractor.downloader.Response;
21-
import org.schabi.newpipe.extractor.exceptions.AgeRestrictedContentException;
22-
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
23-
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
24-
import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException;
25-
import org.schabi.newpipe.extractor.exceptions.PaidContentException;
26-
import org.schabi.newpipe.extractor.exceptions.ParsingException;
27-
import org.schabi.newpipe.extractor.exceptions.PrivateContentException;
28-
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
29-
import org.schabi.newpipe.extractor.exceptions.YoutubeMusicPremiumContentException;
20+
import org.schabi.newpipe.extractor.exceptions.*;
3021
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
3122
import org.schabi.newpipe.extractor.localization.DateWrapper;
3223
import org.schabi.newpipe.extractor.localization.Localization;
3324
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
3425
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
3526
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
3627
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
37-
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder;
28+
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
3829
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
39-
import org.schabi.newpipe.extractor.stream.AudioStream;
40-
import org.schabi.newpipe.extractor.stream.Description;
41-
import org.schabi.newpipe.extractor.stream.Frameset;
42-
import org.schabi.newpipe.extractor.stream.Stream;
43-
import org.schabi.newpipe.extractor.stream.StreamExtractor;
44-
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
45-
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
46-
import org.schabi.newpipe.extractor.stream.StreamSegment;
47-
import org.schabi.newpipe.extractor.stream.StreamType;
48-
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
49-
import org.schabi.newpipe.extractor.stream.VideoStream;
30+
import org.schabi.newpipe.extractor.stream.*;
5031
import org.schabi.newpipe.extractor.utils.JsonUtils;
5132
import org.schabi.newpipe.extractor.utils.Parser;
5233
import org.schabi.newpipe.extractor.utils.Utils;
5334

35+
import javax.annotation.Nonnull;
36+
import javax.annotation.Nullable;
5437
import java.io.IOException;
5538
import java.io.UnsupportedEncodingException;
5639
import java.time.LocalDate;
5740
import java.time.OffsetDateTime;
5841
import java.time.format.DateTimeFormatter;
59-
import java.util.ArrayList;
60-
import java.util.Collections;
61-
import java.util.HashMap;
62-
import java.util.LinkedHashMap;
63-
import java.util.List;
64-
import java.util.Locale;
65-
import java.util.Map;
66-
import java.util.Objects;
42+
import java.util.*;
6743

68-
import javax.annotation.Nonnull;
69-
import javax.annotation.Nullable;
70-
71-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
72-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
73-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
74-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
44+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*;
7545
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
7646
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
7747

@@ -553,18 +523,15 @@ public List<AudioStream> getAudioStreams() throws ExtractionException {
553523
public List<VideoStream> getVideoStreams() throws ExtractionException {
554524
assertPageFetched();
555525
final List<VideoStream> videoStreams = new ArrayList<>();
556-
YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization());
526+
YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
557527

558528
try {
559529
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
560530
final ItagItem itag = entry.getValue();
561-
final String url = entry.getKey();
562-
563-
String oldNParam = throttlingDecoder.parseNParam(url);
564-
String newNParam = throttlingDecoder.decodeNParam(oldNParam);
565-
String newUrl = throttlingDecoder.replaceNParam(url, oldNParam, newNParam);
531+
String url = entry.getKey();
532+
url = throttlingDecrypter.apply(url);
566533

567-
final VideoStream videoStream = new VideoStream(newUrl, false, itag);
534+
final VideoStream videoStream = new VideoStream(url, false, itag);
568535
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
569536
videoStreams.add(videoStream);
570537
}

0 commit comments

Comments
 (0)