Skip to content

Commit 80cf8b3

Browse files
committed
Extract separate YoutubeThrottlingDecoder
1 parent a86a301 commit 80cf8b3

3 files changed

Lines changed: 152 additions & 52 deletions

File tree

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
import org.jsoup.nodes.Element;
6+
import org.jsoup.select.Elements;
7+
import org.schabi.newpipe.extractor.NewPipe;
8+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
9+
import org.schabi.newpipe.extractor.localization.Localization;
10+
import org.schabi.newpipe.extractor.utils.Javascript;
11+
import org.schabi.newpipe.extractor.utils.Parser;
12+
13+
import java.util.regex.Pattern;
14+
15+
public class YoutubeThrottlingDecoder {
16+
17+
private static final String HTTPS = "https:";
18+
19+
private final String functionName;
20+
private final String function;
21+
22+
public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException {
23+
String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization));
24+
String playerJsCode = downloadPlayerJsCode(localization, playerJsUrl);
25+
26+
functionName = parseDecodeFunctionName(playerJsCode);
27+
function = parseDecodeFunction(playerJsCode, functionName);
28+
}
29+
30+
private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException {
31+
try {
32+
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
33+
final String embedPageContent = NewPipe.getDownloader()
34+
.get(embedUrl, localization).responseBody();
35+
36+
try {
37+
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
38+
return Parser.matchGroup1(assetsPattern, embedPageContent)
39+
.replace("\\", "").replace("\"", "");
40+
} catch (final Parser.RegexException ex) {
41+
// playerJsUrl is still available in the file, just somewhere else TODO
42+
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
43+
final Document doc = Jsoup.parse(embedPageContent);
44+
final Elements elems = doc.select("script").attr("name", "player_ias/base");
45+
for (final Element elem : elems) {
46+
if (elem.attr("src").contains("base.js")) {
47+
return elem.attr("src");
48+
}
49+
}
50+
}
51+
52+
} catch (final Exception i) {
53+
throw new ParsingException("Embedded info did not provide YouTube player js url");
54+
}
55+
throw new ParsingException("Embedded info did not provide YouTube player js url");
56+
}
57+
58+
private String cleanPlayerJsUrl(String playerJsUrl) {
59+
if (playerJsUrl.startsWith("//")) {
60+
return HTTPS + playerJsUrl;
61+
} else if (playerJsUrl.startsWith("/")) {
62+
// sometimes https://www.youtube.com part has to be added manually
63+
return HTTPS + "//www.youtube.com" + playerJsUrl;
64+
} else {
65+
return playerJsUrl;
66+
}
67+
}
68+
69+
private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException {
70+
try {
71+
return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody();
72+
} catch (Exception e) {
73+
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
74+
}
75+
}
76+
77+
private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
78+
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
79+
return Parser.matchGroup1(pattern, playerJsCode);
80+
}
81+
82+
private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
83+
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
84+
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
85+
}
86+
87+
public String parseNParam(String url) throws Parser.RegexException {
88+
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
89+
return Parser.matchGroup1(nValuePattern, url);
90+
}
91+
92+
public String decodeNParam(String nParam) {
93+
Javascript javascript = new Javascript();
94+
return javascript.run(function, functionName, nParam);
95+
}
96+
97+
public String replaceNParam(String url, String newValue) {
98+
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
99+
return nValuePattern.matcher(url).replaceFirst(newValue);
100+
}
101+
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 27 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
2626
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
2727
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
28+
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder;
2829
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
2930
import org.schabi.newpipe.extractor.stream.*;
3031
import org.schabi.newpipe.extractor.utils.JsonUtils;
@@ -39,7 +40,6 @@
3940
import java.time.OffsetDateTime;
4041
import java.time.format.DateTimeFormatter;
4142
import java.util.*;
42-
import java.util.regex.Pattern;
4343

4444
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*;
4545
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
@@ -80,13 +80,10 @@ public static class DeobfuscateException extends ParsingException {
8080

8181
@Nullable
8282
private static String cachedDeobfuscationCode = null;
83-
@Nullable
84-
private String playerJsUrl = null;
85-
86-
private JsonArray initialAjaxJson;
87-
private JsonObject initialData;
8883
@Nonnull
8984
private final Map<String, String> videoInfoPage = new HashMap<>();
85+
private JsonArray initialAjaxJson;
86+
private JsonObject initialData;
9087
private JsonObject playerResponse;
9188
private JsonObject videoPrimaryInfoRenderer;
9289
private JsonObject videoSecondaryInfoRenderer;
@@ -526,32 +523,18 @@ public List<AudioStream> getAudioStreams() throws ExtractionException {
526523
public List<VideoStream> getVideoStreams() throws ExtractionException {
527524
assertPageFetched();
528525
final List<VideoStream> videoStreams = new ArrayList<>();
526+
YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization());
529527

530528
try {
531-
getDeobfuscationCode();
532-
final String playerCode = NewPipe.getDownloader()
533-
.get(playerJsUrl, getExtractorLocalization()).responseBody();
534-
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
535-
String functionName = Parser.matchGroup1(pattern, playerCode);
536-
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
537-
String function = "function " + functionName + Parser.matchGroup1(functionPattern, playerCode);
538-
539-
Context context = Context.enter();
540-
context.setOptimizationLevel(-1);
541-
ScriptableObject scope = context.initSafeStandardObjects();
542-
543529
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
544530
final ItagItem itag = entry.getValue();
545531
final String url = entry.getKey();
546-
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
547-
String nValue = Parser.matchGroup1(nValuePattern, url);
548-
549-
context.evaluateString(scope, function, functionName, 1, null);
550-
final Function jsFunction = (Function) scope.get(functionName, scope);
551-
Object result = jsFunction.call(context, scope, scope, new Object[]{nValue});
552-
String newNValue = Objects.toString(result, nValue);
553-
String newUrl = nValuePattern.matcher(url).replaceFirst(newNValue);
554-
System.out.println("aaaaaa " + nValue + " - " + newNValue);
532+
533+
String oldNParam = throttlingDecoder.parseNParam(url);
534+
String newNParam = throttlingDecoder.decodeNParam(oldNParam);
535+
String newUrl = throttlingDecoder.replaceNParam(url, newNParam);
536+
537+
System.out.println("aaaaaa " + oldNParam + " - " + newNParam);
555538
final VideoStream videoStream = new VideoStream(newUrl, false, itag);
556539
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
557540
videoStreams.add(videoStream);
@@ -820,16 +803,15 @@ private void fetchVideoInfoPage() throws ParsingException, ReCaptchaException, I
820803
}
821804
}
822805

823-
@Nonnull
824-
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
806+
private String extractPlayerJsUrl() throws ParsingException {
825807
try {
826808
final String embedUrl = "https://www.youtube.com/embed/" + getId();
827809
final String embedPageContent = NewPipe.getDownloader()
828810
.get(embedUrl, getExtractorLocalization()).responseBody();
829811

830812
try {
831813
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
832-
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
814+
return Parser.matchGroup1(assetsPattern, embedPageContent)
833815
.replace("\\", "").replace("\"", "");
834816
} catch (final Parser.RegexException ex) {
835817
// playerJsUrl is still available in the file, just somewhere else TODO
@@ -838,17 +820,25 @@ private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
838820
final Elements elems = doc.select("script").attr("name", "player_ias/base");
839821
for (final Element elem : elems) {
840822
if (elem.attr("src").contains("base.js")) {
841-
playerJsUrl = elem.attr("src");
842-
break;
823+
return elem.attr("src");
843824
}
844825
}
845826
}
846827

847-
// Get embed sts
848-
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
849828
} catch (final Exception i) {
850-
// if it fails we simply reply with no sts as then it does not seem to be necessary
851-
return "";
829+
throw new ParsingException("Embedded info did not provide YouTube player js url");
830+
}
831+
throw new ParsingException("Embedded info did not provide YouTube player js url");
832+
}
833+
834+
private String cleanPlayerJsUrl(String playerJsUrl) {
835+
if (playerJsUrl.startsWith("//")) {
836+
return HTTPS + playerJsUrl;
837+
} else if (playerJsUrl.startsWith("/")) {
838+
// sometimes https://www.youtube.com part has to be added manually
839+
return HTTPS + "//www.youtube.com" + playerJsUrl;
840+
} else {
841+
return playerJsUrl;
852842
}
853843
}
854844

@@ -899,22 +889,7 @@ private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
899889
@Nonnull
900890
private String getDeobfuscationCode() throws ParsingException {
901891
if (cachedDeobfuscationCode == null) {
902-
if (playerJsUrl == null) {
903-
// the currentPlayerJsUrl was not found in any page fetched so far and there is
904-
// nothing cached, so try fetching embedded info
905-
getEmbeddedInfoStsAndStorePlayerJsUrl();
906-
if (playerJsUrl == null) {
907-
throw new ParsingException(
908-
"Embedded info did not provide YouTube player js url");
909-
}
910-
}
911-
912-
if (playerJsUrl.startsWith("//")) {
913-
playerJsUrl = HTTPS + playerJsUrl;
914-
} else if (playerJsUrl.startsWith("/")) {
915-
// sometimes https://www.youtube.com part has to be added manually
916-
playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
917-
}
892+
String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl());
918893

919894
cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
920895
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package org.schabi.newpipe.extractor.utils;
2+
3+
import org.mozilla.javascript.Context;
4+
import org.mozilla.javascript.Function;
5+
import org.mozilla.javascript.ScriptableObject;
6+
7+
public class Javascript {
8+
9+
public String run(String function, String functionName, String... parameters) {
10+
try {
11+
Context context = Context.enter();
12+
context.setOptimizationLevel(-1);
13+
ScriptableObject scope = context.initSafeStandardObjects();
14+
15+
context.evaluateString(scope, function, functionName, 1, null);
16+
Function jsFunction = (Function) scope.get(functionName, scope);
17+
Object result = jsFunction.call(context, scope, scope, parameters);
18+
return result.toString();
19+
} finally {
20+
Context.exit();
21+
}
22+
}
23+
24+
}

0 commit comments

Comments
 (0)