55import com .grack .nanojson .JsonParser ;
66import com .grack .nanojson .JsonParserException ;
77
8- import org .jsoup .Jsoup ;
98import org .jsoup .nodes .Document ;
10- import org .jsoup .nodes .Element ;
119import org .schabi .newpipe .extractor .InfoItem ;
1210import org .schabi .newpipe .extractor .StreamingService ;
1311import org .schabi .newpipe .extractor .downloader .Downloader ;
1917import org .schabi .newpipe .extractor .search .InfoItemsSearchCollector ;
2018import org .schabi .newpipe .extractor .search .SearchExtractor ;
2119import org .schabi .newpipe .extractor .services .youtube .linkHandler .YoutubeParsingHelper ;
22- import org .schabi .newpipe .extractor .utils .Parser ;
2320
2421import java .io .IOException ;
25- import java .io .UnsupportedEncodingException ;
26- import java .net .MalformedURLException ;
27- import java .net .URL ;
22+ import java .util .Collections ;
23+ import java .util .HashMap ;
24+ import java .util .List ;
25+ import java .util .Map ;
2826
2927import javax .annotation .Nonnull ;
3028
@@ -73,58 +71,70 @@ public String getUrl() throws ParsingException {
7371
7472 @ Override
7573 public String getSearchSuggestion () {
76- final Element el = doc .select ("div[class*=\" spell-correction\" ]" ).first ();
77- if (el != null ) {
78- return el .select ("a" ).first ().text ();
79- } else {
74+ JsonObject showingResultsForRenderer = initialData .getObject ("contents" )
75+ .getObject ("twoColumnSearchResultsRenderer" ).getObject ("primaryContents" )
76+ .getObject ("sectionListRenderer" ).getArray ("contents" ).getObject (0 )
77+ .getObject ("itemSectionRenderer" ).getArray ("contents" ).getObject (0 )
78+ .getObject ("showingResultsForRenderer" );
79+ if (showingResultsForRenderer == null ) {
8080 return "" ;
81+ } else {
82+ return showingResultsForRenderer .getObject ("correctedQuery" ).getArray ("runs" )
83+ .getObject (0 ).getString ("text" );
8184 }
8285 }
8386
8487 @ Nonnull
8588 @ Override
8689 public InfoItemsPage <InfoItem > getInitialPage () throws ExtractionException {
87- return new InfoItemsPage <>(collectItems (doc ), getNextPageUrl ());
90+ InfoItemsSearchCollector collector = getInfoItemSearchCollector ();
91+ JsonArray videos = initialData .getObject ("contents" ).getObject ("twoColumnSearchResultsRenderer" )
92+ .getObject ("primaryContents" ).getObject ("sectionListRenderer" ).getArray ("contents" )
93+ .getObject (0 ).getObject ("itemSectionRenderer" ).getArray ("contents" );
94+
95+ collectStreamsFrom (collector , videos );
96+ return new InfoItemsPage <>(collector , getNextPageUrl ());
8897 }
8998
9099 @ Override
91100 public String getNextPageUrl () throws ExtractionException {
92- return getUrl () + "&page=" + 2 ;
101+ return getNextPageUrlFrom (initialData .getObject ("contents" ).getObject ("twoColumnSearchResultsRenderer" )
102+ .getObject ("primaryContents" ).getObject ("sectionListRenderer" ).getArray ("contents" )
103+ .getObject (0 ).getObject ("itemSectionRenderer" ).getArray ("continuations" ));
93104 }
94105
95106 @ Override
96107 public InfoItemsPage <InfoItem > getPage (String pageUrl ) throws IOException , ExtractionException {
97- // TODO: Get extracting next pages working
98- final String response = getDownloader (). get ( pageUrl , getExtractorLocalization ()). responseBody ( );
99- doc = Jsoup . parse ( response , pageUrl );
108+ if ( pageUrl == null || pageUrl . isEmpty ()) {
109+ throw new ExtractionException ( new IllegalArgumentException ( "Page url is empty or null" ) );
110+ }
100111
101- return new InfoItemsPage <>(collectItems (doc ), getNextPageUrlFromCurrentUrl (pageUrl ));
102- }
112+ InfoItemsSearchCollector collector = getInfoItemSearchCollector ();
113+ JsonArray ajaxJson ;
114+ try {
115+ Map <String , List <String >> headers = new HashMap <>();
116+ headers .put ("X-YouTube-Client-Name" , Collections .singletonList ("1" ));
117+ headers .put ("X-YouTube-Client-Version" , Collections .singletonList ("2.20200221.03.00" )); // TODO: Automatically get YouTube client version somehow
118+ final String response = getDownloader ().get (pageUrl , headers , getExtractorLocalization ()).responseBody ();
119+ ajaxJson = JsonParser .array ().from (response );
120+ } catch (JsonParserException pe ) {
121+ throw new ParsingException ("Could not parse json data for next streams" , pe );
122+ }
123+
124+ JsonObject itemSectionRenderer = ajaxJson .getObject (1 ).getObject ("response" )
125+ .getObject ("continuationContents" ).getObject ("itemSectionContinuation" );
103126
104- private String getNextPageUrlFromCurrentUrl (String currentUrl )
105- throws MalformedURLException , UnsupportedEncodingException {
106- final int pageNr = Integer .parseInt (
107- Parser .compatParseMap (
108- new URL (currentUrl )
109- .getQuery ())
110- .get ("page" ));
127+ collectStreamsFrom (collector , itemSectionRenderer .getArray ("contents" ));
111128
112- return currentUrl .replace ("&page=" + pageNr ,
113- "&page=" + Integer .toString (pageNr + 1 ));
129+ return new InfoItemsPage <>(collector , getNextPageUrlFrom (itemSectionRenderer .getArray ("continuations" )));
114130 }
115131
116- private InfoItemsSearchCollector collectItems (Document doc ) throws NothingFoundException , ParsingException {
117- InfoItemsSearchCollector collector = getInfoItemSearchCollector ();
132+ private void collectStreamsFrom (InfoItemsSearchCollector collector , JsonArray videos ) throws NothingFoundException , ParsingException {
118133 collector .reset ();
119134
120135 final TimeAgoParser timeAgoParser = getTimeAgoParser ();
121136
122- if (initialData == null ) initialData = YoutubeParsingHelper .getInitialData (doc .toString ());
123- JsonArray list = initialData .getObject ("contents" ).getObject ("twoColumnSearchResultsRenderer" )
124- .getObject ("primaryContents" ).getObject ("sectionListRenderer" ).getArray ("contents" )
125- .getObject (0 ).getObject ("itemSectionRenderer" ).getArray ("contents" );
126-
127- for (Object item : list ) {
137+ for (Object item : videos ) {
128138 if (((JsonObject ) item ).getObject ("backgroundPromoRenderer" ) != null ) {
129139 throw new NothingFoundException (((JsonObject ) item ).getObject ("backgroundPromoRenderer" )
130140 .getObject ("bodyText" ).getArray ("runs" ).getObject (0 ).getString ("text" ));
@@ -136,7 +146,17 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE
136146 collector .commit (new YoutubePlaylistInfoItemExtractor (((JsonObject ) item ).getObject ("playlistRenderer" )));
137147 }
138148 }
139- return collector ;
140149 }
141150
151+ private String getNextPageUrlFrom (JsonArray continuations ) throws ParsingException {
152+ if (continuations == null ) {
153+ return "" ;
154+ }
155+
156+ JsonObject nextContinuationData = continuations .getObject (0 ).getObject ("nextContinuationData" );
157+ String continuation = nextContinuationData .getString ("continuation" );
158+ String clickTrackingParams = nextContinuationData .getString ("clickTrackingParams" );
159+ return getUrl () + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
160+ + "&itct=" + clickTrackingParams ;
161+ }
142162}
0 commit comments