Skip to content

Commit 2dfa218

Browse files
wb9688TobiGr
authored andcommitted
Implement pagination in YoutubeChannelExtractor
1 parent a38ab9b commit 2dfa218

1 file changed

Lines changed: 49 additions & 50 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 49 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import com.grack.nanojson.JsonParser;
66
import com.grack.nanojson.JsonParserException;
77

8-
import org.jsoup.Jsoup;
98
import org.jsoup.nodes.Document;
109
import org.jsoup.nodes.Element;
1110
import org.schabi.newpipe.extractor.StreamingService;
@@ -22,6 +21,10 @@
2221
import org.schabi.newpipe.extractor.utils.Utils;
2322

2423
import java.io.IOException;
24+
import java.util.Collections;
25+
import java.util.HashMap;
26+
import java.util.List;
27+
import java.util.Map;
2528

2629
import javax.annotation.Nonnull;
2730

@@ -71,7 +74,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
7174

7275
@Override
7376
public String getNextPageUrl() throws ExtractionException {
74-
return getNextPageUrlFrom(doc);
77+
return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
7578
}
7679

7780
@Nonnull
@@ -189,8 +192,10 @@ public String getDescription() throws ParsingException {
189192
@Override
190193
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
191194
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
192-
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
193-
collectStreamsFrom(collector, ul);
195+
196+
JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
197+
collectStreamsFrom(collector, videos);
198+
194199
return new InfoItemsPage<>(collector, getNextPageUrl());
195200
}
196201

@@ -203,71 +208,44 @@ public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) throws IOException,
203208

204209
// Unfortunately, we have to fetch the page even if we are only getting next streams,
205210
// as they don't deliver enough information on their own (the channel name, for example).
206-
fetchPage();
211+
// fetchPage();
207212

208213
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
209-
JsonObject ajaxJson;
214+
JsonArray ajaxJson;
210215
try {
211-
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
212-
ajaxJson = JsonParser.object().from(response);
216+
Map<String, List<String>> headers = new HashMap<>();
217+
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
218+
headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow
219+
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
220+
ajaxJson = JsonParser.array().from(response);
213221
} catch (JsonParserException pe) {
214222
throw new ParsingException("Could not parse json data for next streams", pe);
215223
}
216224

217-
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
218-
collectStreamsFrom(collector, ajaxHtml.select("body").first());
225+
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
226+
.getObject("continuationContents").getObject("sectionListContinuation");
219227

220-
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
221-
}
228+
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
222229

223-
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
224-
throws ParsingException {
225-
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
226-
if (!loadMoreHtmlDataRaw.isEmpty()) {
227-
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
228-
} else {
229-
return "";
230-
}
230+
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
231231
}
232232

233-
private String getNextPageUrlFrom(Document d) throws ParsingException {
234-
try {
235-
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
236-
if (button != null) {
237-
return button.attr("abs:data-uix-load-more-href");
238-
} else {
239-
// Sometimes channels are simply so small, they don't have a more streams/videos
240-
return "";
241-
}
242-
} catch (Exception e) {
243-
throw new ParsingException("Could not get next page url", e);
244-
}
233+
234+
private String getNextPageUrlFrom(JsonArray continuations) {
235+
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
236+
String continuation = nextContinuationData.getString("continuation");
237+
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
238+
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
239+
+ "&itct=" + clickTrackingParams;
245240
}
246241

247-
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
242+
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
248243
collector.reset();
249244

250245
final String uploaderName = getName();
251246
final String uploaderUrl = getUrl();
252247
final TimeAgoParser timeAgoParser = getTimeAgoParser();
253248

254-
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
255-
.getArray("tabs");
256-
JsonArray videos = null;
257-
258-
for (Object tab : tabs) {
259-
if (((JsonObject) tab).getObject("tabRenderer") != null) {
260-
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
261-
videos = ((JsonObject) tab).getObject("tabRenderer").getObject("content")
262-
.getObject("sectionListRenderer").getArray("contents");
263-
}
264-
}
265-
}
266-
267-
if (videos == null) {
268-
throw new ParsingException("Could not find Videos tab");
269-
}
270-
271249
for (Object video : videos) {
272250
JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
273251
.getArray("contents").getObject(0);
@@ -286,4 +264,25 @@ public String getUploaderUrl() {
286264
}
287265
}
288266
}
267+
268+
private JsonObject getVideoTab() throws ParsingException {
269+
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
270+
.getArray("tabs");
271+
JsonObject videoTab = null;
272+
273+
for (Object tab : tabs) {
274+
if (((JsonObject) tab).getObject("tabRenderer") != null) {
275+
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
276+
videoTab = ((JsonObject) tab).getObject("tabRenderer");
277+
break;
278+
}
279+
}
280+
}
281+
282+
if (videoTab == null) {
283+
throw new ParsingException("Could not find Videos tab");
284+
}
285+
286+
return videoTab;
287+
}
289288
}

0 commit comments

Comments
 (0)