Skip to content

Commit 5d883d1

Browse files
wb9688TobiGr
authored andcommitted
Implement pagination in YoutubeSearchExtractor
1 parent c0a8e01 commit 5d883d1

2 files changed

Lines changed: 59 additions & 39 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
import com.grack.nanojson.JsonParser;
66
import com.grack.nanojson.JsonParserException;
77

8-
import org.jsoup.Jsoup;
98
import org.jsoup.nodes.Document;
10-
import org.jsoup.nodes.Element;
119
import org.schabi.newpipe.extractor.InfoItem;
1210
import org.schabi.newpipe.extractor.StreamingService;
1311
import org.schabi.newpipe.extractor.downloader.Downloader;
@@ -19,12 +17,12 @@
1917
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
2018
import org.schabi.newpipe.extractor.search.SearchExtractor;
2119
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
22-
import org.schabi.newpipe.extractor.utils.Parser;
2320

2421
import java.io.IOException;
25-
import java.io.UnsupportedEncodingException;
26-
import java.net.MalformedURLException;
27-
import java.net.URL;
22+
import java.util.Collections;
23+
import java.util.HashMap;
24+
import java.util.List;
25+
import java.util.Map;
2826

2927
import javax.annotation.Nonnull;
3028

@@ -73,58 +71,70 @@ public String getUrl() throws ParsingException {
7371

7472
@Override
7573
public String getSearchSuggestion() {
76-
final Element el = doc.select("div[class*=\"spell-correction\"]").first();
77-
if (el != null) {
78-
return el.select("a").first().text();
79-
} else {
74+
JsonObject showingResultsForRenderer = initialData.getObject("contents")
75+
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
76+
.getObject("sectionListRenderer").getArray("contents").getObject(0)
77+
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
78+
.getObject("showingResultsForRenderer");
79+
if (showingResultsForRenderer == null) {
8080
return "";
81+
} else {
82+
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
83+
.getObject(0).getString("text");
8184
}
8285
}
8386

8487
@Nonnull
8588
@Override
8689
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
87-
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl());
90+
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
91+
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
92+
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
93+
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
94+
95+
collectStreamsFrom(collector, videos);
96+
return new InfoItemsPage<>(collector, getNextPageUrl());
8897
}
8998

9099
@Override
91100
public String getNextPageUrl() throws ExtractionException {
92-
return getUrl() + "&page=" + 2;
101+
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
102+
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
103+
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
93104
}
94105

95106
@Override
96107
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
97-
// TODO: Get extracting next pages working
98-
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
99-
doc = Jsoup.parse(response, pageUrl);
108+
if (pageUrl == null || pageUrl.isEmpty()) {
109+
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
110+
}
100111

101-
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
102-
}
112+
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
113+
JsonArray ajaxJson;
114+
try {
115+
Map<String, List<String>> headers = new HashMap<>();
116+
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
117+
headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow
118+
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
119+
ajaxJson = JsonParser.array().from(response);
120+
} catch (JsonParserException pe) {
121+
throw new ParsingException("Could not parse json data for next streams", pe);
122+
}
123+
124+
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
125+
.getObject("continuationContents").getObject("itemSectionContinuation");
103126

104-
private String getNextPageUrlFromCurrentUrl(String currentUrl)
105-
throws MalformedURLException, UnsupportedEncodingException {
106-
final int pageNr = Integer.parseInt(
107-
Parser.compatParseMap(
108-
new URL(currentUrl)
109-
.getQuery())
110-
.get("page"));
127+
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
111128

112-
return currentUrl.replace("&page=" + pageNr,
113-
"&page=" + Integer.toString(pageNr + 1));
129+
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
114130
}
115131

116-
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException, ParsingException {
117-
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
132+
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException, ParsingException {
118133
collector.reset();
119134

120135
final TimeAgoParser timeAgoParser = getTimeAgoParser();
121136

122-
if (initialData == null) initialData = YoutubeParsingHelper.getInitialData(doc.toString());
123-
JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
124-
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
125-
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
126-
127-
for (Object item : list) {
137+
for (Object item : videos) {
128138
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
129139
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
130140
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
@@ -136,7 +146,17 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE
136146
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
137147
}
138148
}
139-
return collector;
140149
}
141150

151+
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
152+
if (continuations == null) {
153+
return "";
154+
}
155+
156+
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
157+
String continuation = nextContinuationData.getString("continuation");
158+
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
159+
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
160+
+ "&itct=" + clickTrackingParams;
161+
}
142162
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ public static YoutubeSearchQueryHandlerFactory getInstance() {
2424
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
2525
try {
2626
final String url = "https://www.youtube.com/results"
27-
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
27+
+ "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
2828

2929
if (contentFilters.size() > 0) {
3030
switch (contentFilters.get(0)) {
31-
case VIDEOS: return url + "&sp=EgIQAVAU";
32-
case CHANNELS: return url + "&sp=EgIQAlAU";
33-
case PLAYLISTS: return url + "&sp=EgIQA1AU";
31+
case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
32+
case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
33+
case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
3434
case ALL:
3535
default:
3636
}

0 commit comments

Comments
 (0)