Skip to content

Commit c0a8e01

Browse files
wb9688TobiGr
authored andcommitted
Implement pagination in YoutubePlaylistExtractor
1 parent 4039409 commit c0a8e01

2 files changed

Lines changed: 40 additions & 41 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) throws IOException,
231231

232232

233233
private String getNextPageUrlFrom(JsonArray continuations) {
234+
if (continuations == null) {
235+
return "";
236+
}
237+
234238
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
235239
String continuation = nextContinuationData.getString("continuation");
236240
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
import com.grack.nanojson.JsonParser;
66
import com.grack.nanojson.JsonParserException;
77

8-
import org.jsoup.Jsoup;
98
import org.jsoup.nodes.Document;
10-
import org.jsoup.nodes.Element;
119
import org.schabi.newpipe.extractor.StreamingService;
1210
import org.schabi.newpipe.extractor.downloader.Downloader;
1311
import org.schabi.newpipe.extractor.downloader.Response;
@@ -22,9 +20,12 @@
2220
import org.schabi.newpipe.extractor.utils.Utils;
2321

2422
import java.io.IOException;
23+
import java.util.Collections;
24+
import java.util.HashMap;
25+
import java.util.List;
26+
import java.util.Map;
2527

2628
import javax.annotation.Nonnull;
27-
import javax.annotation.Nullable;
2829

2930
@SuppressWarnings("WeakerAccess")
3031
public class YoutubePlaylistExtractor extends PlaylistExtractor {
@@ -95,7 +96,11 @@ private JsonObject getPlaylistVideos() throws ParsingException {
9596

9697
@Override
9798
public String getNextPageUrl() throws ExtractionException {
98-
return getNextPageUrlFrom(doc);
99+
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
100+
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
101+
.getObject("sectionListRenderer").getArray("contents").getObject(0)
102+
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
103+
.getObject("playlistVideoListRenderer").getArray("continuations"));
99104
}
100105

101106
@Nonnull
@@ -174,8 +179,14 @@ public long getStreamCount() throws ParsingException {
174179
@Override
175180
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
176181
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
177-
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
178-
collectStreamsFrom(collector, tbody);
182+
183+
JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
184+
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
185+
.getObject("sectionListRenderer").getArray("contents").getObject(0)
186+
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
187+
.getObject("playlistVideoListRenderer").getArray("contents");
188+
189+
collectStreamsFrom(collector, videos);
179190
return new InfoItemsPage<>(collector, getNextPageUrl());
180191
}
181192

@@ -186,58 +197,42 @@ public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl) throws IOExce
186197
}
187198

188199
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
189-
JsonObject pageJson;
200+
JsonArray ajaxJson;
190201
try {
191-
final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
192-
pageJson = JsonParser.object().from(responseBody);
202+
Map<String, List<String>> headers = new HashMap<>();
203+
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
204+
headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow
205+
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
206+
ajaxJson = JsonParser.array().from(response);
193207
} catch (JsonParserException pe) {
194-
throw new ParsingException("Could not parse ajax json", pe);
208+
throw new ParsingException("Could not parse json data for next streams", pe);
195209
}
196210

197-
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
198-
+ pageJson.getString("content_html")
199-
+ "</tbody></table>", pageUrl);
211+
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
212+
.getObject("continuationContents").getObject("playlistVideoListContinuation");
200213

201-
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
214+
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
202215

203-
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
216+
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
204217
}
205218

206-
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
207-
throws ParsingException {
208-
String nextPageHtml = pageJson.getString("load_more_widget_html");
209-
if (!nextPageHtml.isEmpty()) {
210-
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
211-
} else {
219+
private String getNextPageUrlFrom(JsonArray continuations) {
220+
if (continuations == null) {
212221
return "";
213222
}
214-
}
215223

216-
private String getNextPageUrlFrom(Document d) throws ParsingException {
217-
try {
218-
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
219-
if (button != null) {
220-
return button.attr("abs:data-uix-load-more-href");
221-
} else {
222-
// Sometimes playlists are simply so small, they don't have a more streams/videos
223-
return "";
224-
}
225-
} catch (Exception e) {
226-
throw new ParsingException("could not get next streams' url", e);
227-
}
224+
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
225+
String continuation = nextContinuationData.getString("continuation");
226+
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
227+
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
228+
+ "&itct=" + clickTrackingParams;
228229
}
229230

230-
private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) {
231+
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) {
231232
collector.reset();
232233

233234
final TimeAgoParser timeAgoParser = getTimeAgoParser();
234235

235-
JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
236-
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
237-
.getObject("sectionListRenderer").getArray("contents").getObject(0)
238-
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
239-
.getObject("playlistVideoListRenderer").getArray("contents");
240-
241236
for (Object video : videos) {
242237
if (((JsonObject) video).getObject("playlistVideoRenderer") != null) {
243238
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {

0 commit comments

Comments
 (0)