Skip to content

Commit 22d2f7e

Browse files
committed
[Youtube] Add cookies to youtube mix request
This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)
1 parent 4219354 commit 22d2f7e

8 files changed

Lines changed: 276 additions & 167 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import com.grack.nanojson.JsonParser;
66
import com.grack.nanojson.JsonParserException;
77
import com.grack.nanojson.JsonWriter;
8+
89
import org.jsoup.Jsoup;
910
import org.jsoup.nodes.Document;
11+
import org.schabi.newpipe.extractor.Page;
1012
import org.schabi.newpipe.extractor.downloader.Response;
1113
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
1214
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@@ -21,6 +23,7 @@
2123
import java.net.MalformedURLException;
2224
import java.net.URL;
2325
import java.net.URLDecoder;
26+
import java.nio.charset.StandardCharsets;
2427
import java.time.LocalDate;
2528
import java.time.OffsetDateTime;
2629
import java.time.ZoneOffset;
@@ -35,6 +38,7 @@
3538
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
3639
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
3740
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
41+
import static org.schabi.newpipe.extractor.utils.Utils.join;
3842

3943
/*
4044
* Created by Christian Schabesberger on 02.03.16.
@@ -61,6 +65,12 @@ public class YoutubeParsingHelper {
6165
private YoutubeParsingHelper() {
6266
}
6367

68+
/**
69+
* The official youtube app supports intents in this format, where after the ':' is the videoId.
70+
* Accordingly there are other apps sharing streams in this format.
71+
*/
72+
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";
73+
6474
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
6575
private static String clientVersion;
6676

@@ -193,22 +203,22 @@ public static OffsetDateTime parseDateFrom(String textualUploadDate) throws Pars
193203
}
194204

195205
/**
196-
* Checks if the given playlist id is a youtube mix (auto-generated playlist)
197-
* Ids from a youtube mix start with "RD"
206+
* Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
207+
* Ids from a YouTube Mix start with "RD"
198208
* @param playlistId
199-
* @return Whether given id belongs to a youtube mix
209+
* @return Whether given id belongs to a YouTube Mix
200210
*/
201-
public static boolean isYoutubeMixId(String playlistId) {
211+
public static boolean isYoutubeMixId(final String playlistId) {
202212
return playlistId.startsWith("RD") && !isYoutubeMusicMixId(playlistId);
203213
}
204214

205215
/**
206-
* Checks if the given playlist id is a youtube music mix (auto-generated playlist)
207-
* Ids from a youtube music mix start with "RD"
216+
* Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
217+
* Ids from a YouTube Music Mix start with "RD"
208218
* @param playlistId
209-
* @return Whether given id belongs to a youtube music mix
219+
* @return Whether given id belongs to a YouTube Music Mix
210220
*/
211-
public static boolean isYoutubeMusicMixId(String playlistId) {
221+
public static boolean isYoutubeMusicMixId(final String playlistId) {
212222
return playlistId.startsWith("RDAMVM");
213223
}
214224

@@ -352,7 +362,7 @@ public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, Re
352362
.end()
353363
.value("query", "test")
354364
.value("params", "Eg-KAQwIARAAGAAgACgAMABqChAEEAUQAxAKEAk%3D")
355-
.end().done().getBytes("UTF-8");
365+
.end().done().getBytes(StandardCharsets.UTF_8);
356366
// @formatter:on
357367

358368
Map<String, List<String>> headers = new HashMap<>();
@@ -436,10 +446,14 @@ public static String getUrlFromNavigationEndpoint(JsonObject navigationEndpoint)
436446
} else if (navigationEndpoint.has("watchEndpoint")) {
437447
StringBuilder url = new StringBuilder();
438448
url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint.getObject("watchEndpoint").getString("videoId"));
439-
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId"))
440-
url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint").getString("playlistId"));
441-
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds"))
442-
url.append("&t=").append(navigationEndpoint.getObject("watchEndpoint").getInt("startTimeSeconds"));
449+
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
450+
url.append("&amp;list=").append(navigationEndpoint.getObject("watchEndpoint")
451+
.getString("playlistId"));
452+
}
453+
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
454+
url.append("&amp;t=").append(navigationEndpoint.getObject("watchEndpoint")
455+
.getInt("startTimeSeconds"));
456+
}
443457
return url.toString();
444458
} else if (navigationEndpoint.has("watchPlaylistEndpoint")) {
445459
return "https://www.youtube.com/playlist?list=" +
@@ -467,7 +481,6 @@ public static String getTextFromObject(JsonObject textObject, boolean html) thro
467481
if (html && ((JsonObject) textPart).has("navigationEndpoint")) {
468482
String url = getUrlFromNavigationEndpoint(((JsonObject) textPart).getObject("navigationEndpoint"));
469483
if (!isNullOrEmpty(url)) {
470-
url = url.replaceAll("&", "&amp;");
471484
textBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
472485
continue;
473486
}
@@ -506,8 +519,8 @@ public static String fixThumbnailUrl(String thumbnailUrl) {
506519
public static String getValidJsonResponseBody(final Response response)
507520
throws ParsingException, MalformedURLException {
508521
if (response.responseCode() == 404) {
509-
throw new ContentNotAvailableException("Not found" +
510-
" (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
522+
throw new ContentNotAvailableException("Not found"
523+
+ " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
511524
}
512525

513526
final String responseBody = response.responseBody();
@@ -527,22 +540,64 @@ public static String getValidJsonResponseBody(final Response response)
527540
final String responseContentType = response.getHeader("Content-Type");
528541
if (responseContentType != null
529542
&& responseContentType.toLowerCase().contains("text/html")) {
530-
throw new ParsingException("Got HTML document, expected JSON response" +
531-
" (latest url was: \"" + response.latestUrl() + "\")");
543+
throw new ParsingException("Got HTML document, expected JSON response"
544+
+ " (latest url was: \"" + response.latestUrl() + "\")");
532545
}
533546

534547
return responseBody;
535548
}
536549

550+
public static Response getResponse(final String url, final Localization localization)
551+
throws IOException, ExtractionException {
552+
final Map<String, List<String>> headers = new HashMap<>();
553+
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
554+
headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion()));
555+
556+
final Response response = getDownloader().get(url, headers, localization);
557+
getValidJsonResponseBody(response);
558+
559+
return response;
560+
}
561+
562+
public static String extractCookieValue(final String cookieName, final Response response) {
563+
final List<String> cookies = response.responseHeaders().get("Set-Cookie");
564+
int startIndex;
565+
String result = "";
566+
for (final String cookie : cookies) {
567+
startIndex = cookie.indexOf(cookieName);
568+
if (startIndex != -1) {
569+
result = cookie.substring(startIndex + cookieName.length() + "=".length(),
570+
cookie.indexOf(";", startIndex));
571+
}
572+
}
573+
return result;
574+
}
575+
537576
public static JsonArray getJsonResponse(final String url, final Localization localization)
538577
throws IOException, ExtractionException {
539578
Map<String, List<String>> headers = new HashMap<>();
540579
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
541580
headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion()));
542581
final Response response = getDownloader().get(url, headers, localization);
543582

544-
final String responseBody = getValidJsonResponseBody(response);
583+
return toJsonArray(getValidJsonResponseBody(response));
584+
}
585+
586+
public static JsonArray getJsonResponse(final Page page, final Localization localization)
587+
throws IOException, ExtractionException {
588+
final Map<String, List<String>> headers = new HashMap<>();
589+
if (!isNullOrEmpty(page.getCookies())) {
590+
headers.put("Cookie", Collections.singletonList(join(";", "=", page.getCookies())));
591+
}
592+
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
593+
headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion()));
594+
595+
final Response response = getDownloader().get(page.getUrl(), headers, localization);
596+
597+
return toJsonArray(getValidJsonResponseBody(response));
598+
}
545599

600+
public static JsonArray toJsonArray(final String responseBody) throws ParsingException {
546601
try {
547602
return JsonParser.array().from(responseBody);
548603
} catch (JsonParserException e) {

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) {
110110
}
111111

112112
@Override
113-
public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) {
113+
public PlaylistExtractor getPlaylistExtractor(final ListLinkHandler linkHandler) {
114114
if (YoutubeParsingHelper.isYoutubeMixId(linkHandler.getId())) {
115115
return new YoutubeMixPlaylistExtractor(this, linkHandler);
116116
} else {

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor.java

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import com.grack.nanojson.JsonObject;
55

66
import org.schabi.newpipe.extractor.ListExtractor;
7+
import org.schabi.newpipe.extractor.Page;
78
import org.schabi.newpipe.extractor.StreamingService;
89
import org.schabi.newpipe.extractor.downloader.Downloader;
10+
import org.schabi.newpipe.extractor.downloader.Response;
911
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1012
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1113
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
@@ -15,34 +17,50 @@
1517
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
1618

1719
import java.io.IOException;
20+
import java.util.Collections;
21+
import java.util.List;
1822

1923
import javax.annotation.Nonnull;
2024
import javax.annotation.Nullable;
2125

26+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCookieValue;
2227
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
28+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getResponse;
2329
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
30+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.toJsonArray;
2431

2532
/**
26-
* A YoutubePlaylistExtractor for a mix (auto-generated playlist). It handles urls in the format of
27-
* "youtube.com/watch?v=videoId&list=playlistId"
33+
* A {@link YoutubePlaylistExtractor} for a mix (auto-generated playlist).
34+
* It handles URLs in the format of
35+
* {@code youtube.com/watch?v=videoId&list=playlistId}
2836
*/
2937
public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
3038

39+
/**
40+
* YouTube identifies mixes based on this cookie. With this information it can generate
41+
* continuations without duplicates.
42+
*/
43+
private static final String COOKIE_NAME = "VISITOR_INFO1_LIVE";
44+
3145
private JsonObject initialData;
3246
private JsonObject playlistData;
47+
private String cookieValue;
3348

34-
public YoutubeMixPlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
49+
public YoutubeMixPlaylistExtractor(final StreamingService service,
50+
final ListLinkHandler linkHandler) {
3551
super(service, linkHandler);
3652
}
3753

3854
@Override
39-
public void onFetchPage(@Nonnull Downloader downloader)
40-
throws IOException, ExtractionException {
55+
public void onFetchPage(@Nonnull final Downloader downloader)
56+
throws IOException, ExtractionException {
4157
final String url = getUrl() + "&pbj=1";
42-
final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization());
58+
final Response response = getResponse(url, getExtractorLocalization());
59+
final JsonArray ajaxJson = toJsonArray(response.responseBody());
4360
initialData = ajaxJson.getObject(3).getObject("response");
4461
playlistData = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
45-
.getObject("playlist").getObject("playlist");
62+
.getObject("playlist").getObject("playlist");
63+
cookieValue = extractCookieValue(COOKIE_NAME, response);
4664
}
4765

4866
@Nonnull
@@ -58,16 +76,15 @@ public String getName() throws ParsingException {
5876
@Override
5977
public String getThumbnailUrl() throws ParsingException {
6078
try {
61-
final String playlistId = playlistData.getString("playlistId");
79+
return getThumbnailUrlFromPlaylistId(playlistData.getString("playlistId"));
80+
} catch (final Exception e) {
6281
try {
63-
return getThumbnailUrlFromPlaylistId(playlistId);
64-
} catch (ParsingException e) {
6582
//fallback to thumbnail of current video. Always the case for channel mix
6683
return getThumbnailUrlFromVideoId(
6784
initialData.getObject("currentVideoEndpoint").getObject("watchEndpoint")
6885
.getString("videoId"));
86+
} catch (final Exception ignored) {
6987
}
70-
} catch (Exception e) {
7188
throw new ParsingException("Could not get playlist thumbnail", e);
7289
}
7390
}
@@ -104,63 +121,66 @@ public long getStreamCount() {
104121
@Nonnull
105122
@Override
106123
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
107-
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
124+
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
108125
collectStreamsFrom(collector, playlistData.getArray("contents"));
109-
return new InfoItemsPage<>(collector, getNextPageUrl());
126+
return new InfoItemsPage<>(collector,
127+
new Page(getNextPageUrl(), Collections.singletonMap(COOKIE_NAME, cookieValue)));
110128
}
111129

112-
@Override
113-
public String getNextPageUrl() throws ExtractionException {
130+
private String getNextPageUrl() throws ExtractionException {
114131
return getNextPageUrlFrom(playlistData);
115132
}
116133

117-
private String getNextPageUrlFrom(JsonObject playlistData) throws ExtractionException {
118-
final JsonObject lastStream = ((JsonObject) playlistData.getArray("contents")
119-
.get(playlistData.getArray("contents").size() - 1));
134+
private String getNextPageUrlFrom(final JsonObject playlistJson) throws ExtractionException {
135+
final JsonObject lastStream = ((JsonObject) playlistJson.getArray("contents")
136+
.get(playlistJson.getArray("contents").size() - 1));
120137
if (lastStream == null || lastStream.getObject("playlistPanelVideoRenderer") == null) {
121138
throw new ExtractionException("Could not extract next page url");
122139
}
123-
//Index of video in mix is missing, but adding it doesn't appear to have any effect.
124-
//And since the index needs to be tracked by us, it is left out
140+
125141
return getUrlFromNavigationEndpoint(
126-
lastStream.getObject("playlistPanelVideoRenderer").getObject("navigationEndpoint"))
127-
+ "&pbj=1";
142+
lastStream.getObject("playlistPanelVideoRenderer").getObject("navigationEndpoint"))
143+
+ "&pbj=1";
128144
}
129145

130146
@Override
131-
public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl)
147+
public InfoItemsPage<StreamInfoItem> getPage(final Page page)
132148
throws ExtractionException, IOException {
133-
if (pageUrl == null || pageUrl.isEmpty()) {
149+
if (page == null || page.getUrl().isEmpty()) {
134150
throw new ExtractionException(
135151
new IllegalArgumentException("Page url is empty or null"));
136152
}
137153

138-
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
139-
final JsonArray ajaxJson = getJsonResponse(pageUrl, getExtractorLocalization());
140-
JsonObject playlistData =
141-
ajaxJson.getObject(3).getObject("response").getObject("contents")
142-
.getObject("twoColumnWatchNextResults").getObject("playlist")
143-
.getObject("playlist");
144-
final JsonArray streams = playlistData.getArray("contents");
145-
//Because continuation requests are created with the last video of previous request as start
146-
streams.remove(0);
147-
collectStreamsFrom(collector, streams);
148-
return new InfoItemsPage<>(collector, getNextPageUrlFrom(playlistData));
154+
final JsonArray ajaxJson = getJsonResponse(page, getExtractorLocalization());
155+
final JsonObject playlistJson =
156+
ajaxJson.getObject(3).getObject("response").getObject("contents")
157+
.getObject("twoColumnWatchNextResults").getObject("playlist")
158+
.getObject("playlist");
159+
final JsonArray allStreams = playlistJson.getArray("contents");
160+
// Sublist because youtube returns up to 24 previous streams in the mix
161+
// +1 because the stream of "currentIndex" was already extracted in previous request
162+
final List<Object> newStreams =
163+
allStreams.subList(playlistJson.getInt("currentIndex") + 1, allStreams.size());
164+
165+
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
166+
collectStreamsFrom(collector, newStreams);
167+
return new InfoItemsPage<>(collector,
168+
new Page(getNextPageUrlFrom(playlistJson), page.getCookies()));
149169
}
150170

151171
private void collectStreamsFrom(
152-
@Nonnull StreamInfoItemsCollector collector,
153-
@Nullable JsonArray streams) {
172+
@Nonnull final StreamInfoItemsCollector collector,
173+
@Nullable final List<Object> streams) {
154174

155175
if (streams == null) {
156176
return;
157177
}
158178

159179
final TimeAgoParser timeAgoParser = getTimeAgoParser();
160180

161-
for (Object stream : streams) {
181+
for (final Object stream : streams) {
162182
if (stream instanceof JsonObject) {
163-
JsonObject streamInfo = ((JsonObject) stream)
183+
final JsonObject streamInfo = ((JsonObject) stream)
164184
.getObject("playlistPanelVideoRenderer");
165185
if (streamInfo != null) {
166186
collector.commit(new YoutubeStreamInfoItemExtractor(streamInfo, timeAgoParser));
@@ -169,7 +189,7 @@ private void collectStreamsFrom(
169189
}
170190
}
171191

172-
private String getThumbnailUrlFromPlaylistId(String playlistId) throws ParsingException {
192+
private String getThumbnailUrlFromPlaylistId(final String playlistId) throws ParsingException {
173193
final String videoId;
174194
if (playlistId.startsWith("RDMM")) {
175195
videoId = playlistId.substring(4);
@@ -184,7 +204,7 @@ private String getThumbnailUrlFromPlaylistId(String playlistId) throws ParsingEx
184204
return getThumbnailUrlFromVideoId(videoId);
185205
}
186206

187-
private String getThumbnailUrlFromVideoId(String videoId) {
207+
private String getThumbnailUrlFromVideoId(final String videoId) {
188208
return "https://i.ytimg.com/vi/" + videoId + "/hqdefault.jpg";
189209
}
190210

0 commit comments

Comments
 (0)