11package org .schabi .newpipe .extractor .services .youtube .extractors ;
22
3- import com .grack .nanojson .JsonArray ;
4- import com .grack .nanojson .JsonObject ;
5- import com .grack .nanojson .JsonParser ;
3+ import static org .schabi .newpipe .extractor .services .youtube .YoutubeParsingHelper .getJsonPostResponse ;
4+ import static org .schabi .newpipe .extractor .services .youtube .YoutubeParsingHelper .prepareDesktopJsonBuilder ;
5+ import static org .schabi .newpipe .extractor .utils .Utils .UTF_8 ;
6+ import static org .schabi .newpipe .extractor .utils .Utils .isNullOrEmpty ;
7+
8+ import java .io .IOException ;
9+ import java .util .Collections ;
10+ import java .util .List ;
11+ import java .util .Optional ;
12+
13+ import javax .annotation .Nonnull ;
14+ import javax .annotation .Nullable ;
15+
616import org .schabi .newpipe .extractor .Page ;
717import org .schabi .newpipe .extractor .StreamingService ;
818import org .schabi .newpipe .extractor .comments .CommentsExtractor ;
919import org .schabi .newpipe .extractor .comments .CommentsInfoItem ;
1020import org .schabi .newpipe .extractor .comments .CommentsInfoItemExtractor ;
1121import org .schabi .newpipe .extractor .comments .CommentsInfoItemsCollector ;
1222import org .schabi .newpipe .extractor .downloader .Downloader ;
13- import org .schabi .newpipe .extractor .downloader .Response ;
1423import org .schabi .newpipe .extractor .exceptions .ExtractionException ;
1524import org .schabi .newpipe .extractor .exceptions .ParsingException ;
16- import org .schabi .newpipe .extractor .exceptions .ReCaptchaException ;
1725import org .schabi .newpipe .extractor .linkhandler .ListLinkHandler ;
18- import org .schabi .newpipe .extractor .services . youtube . YoutubeParsingHelper ;
26+ import org .schabi .newpipe .extractor .localization . Localization ;
1927import org .schabi .newpipe .extractor .utils .JsonUtils ;
20- import org .schabi .newpipe .extractor .utils .Parser ;
21-
22- import javax .annotation .Nonnull ;
23- import java .io .IOException ;
24- import java .io .UnsupportedEncodingException ;
25- import java .net .URLEncoder ;
26- import java .util .Collections ;
27- import java .util .HashMap ;
28- import java .util .List ;
29- import java .util .Map ;
30- import java .util .Optional ;
31- import java .util .regex .Pattern ;
3228
33- import static java . util . Collections . singletonList ;
34- import static org . schabi . newpipe . extractor . utils . Utils . UTF_8 ;
35- import static org . schabi . newpipe . extractor . utils . Utils . isNullOrEmpty ;
29+ import com . grack . nanojson . JsonArray ;
30+ import com . grack . nanojson . JsonObject ;
31+ import com . grack . nanojson . JsonWriter ;
3632
3733public class YoutubeCommentsExtractor extends CommentsExtractor {
38- // using the mobile site for comments because it loads faster and uses get requests instead of post
39- private static final String USER_AGENT = "Mozilla/5.0 (Android 9; Mobile; rv:78.0) Gecko/20100101 Firefox/78.0" ;
40- private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern .compile ("INNERTUBE_CONTEXT_CLIENT_NAME\\ \" :(.*?)[,}]" );
4134
42- private String ytClientVersion ;
43- private String ytClientName ;
44- private String responseBody ;
35+ private JsonObject nextResponse ;
4536
4637 /**
4738 * Caching mechanism and holder of the commentsDisabled value.
@@ -52,6 +43,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
5243 * If the method or another one that is depending on disabled comments
5344 * is now called again, the method execution can avoid unnecessary calls
5445 */
46+ @ SuppressWarnings ("OptionalUsedAsFieldOrParameterType" )
5547 private Optional <Boolean > optCommentsDisabled = Optional .empty ();
5648
5749 public YoutubeCommentsExtractor (
@@ -60,6 +52,7 @@ public YoutubeCommentsExtractor(
6052 super (service , uiHandler );
6153 }
6254
55+ @ Nonnull
6356 @ Override
6457 public InfoItemsPage <CommentsInfoItem > getInitialPage ()
6558 throws IOException , ExtractionException {
@@ -81,163 +74,177 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
8174
8275 /**
8376 * Finds the initial comments token and initializes commentsDisabled.
77+ *
8478 * @return the continuation token or null if none was found
8579 */
86- private String findInitialCommentsToken () {
87- final String continuationStartPattern = "continuation\" :\" " ;
80+ @ Nullable
81+ private String findInitialCommentsToken () throws ExtractionException {
82+
83+ final JsonArray jArray = JsonUtils .getArray (nextResponse ,
84+ "contents.twoColumnWatchNextResults.results.results.contents" );
8885
89- String commentsTokenInside = findValue (responseBody , "sectionListRenderer" , "}" );
90- if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
91- commentsTokenInside = findValue (responseBody , "commentSectionRenderer" , "}" );
86+ final Optional <Object > itemSectionRenderer = jArray .stream ().filter (o -> {
87+ JsonObject jObj = (JsonObject ) o ;
88+
89+ if (jObj .has ("itemSectionRenderer" )) {
90+ try {
91+ return JsonUtils .getString (jObj , "itemSectionRenderer.targetId" )
92+ .equals ("comments-section" );
93+ } catch (final ParsingException ignored ) {
94+ }
95+ }
96+
97+ return false ;
98+ }).findFirst ();
99+
100+ final String token ;
101+
102+ if (itemSectionRenderer .isPresent ()) {
103+ token = JsonUtils .getString (((JsonObject ) itemSectionRenderer .get ())
104+ .getObject ("itemSectionRenderer" ).getArray ("contents" ).getObject (0 ),
105+ "continuationItemRenderer.continuationEndpoint.continuationCommand.token" );
106+ } else {
107+ token = null ;
92108 }
93109
94- // If no continuation token is found the comments are disabled
95- if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
110+ if (token == null ) {
96111 optCommentsDisabled = Optional .of (true );
97112 return null ;
98113 }
99114
100- // If a continuation token is found there are >= 0 comments
101- final String commentsToken = findValue (commentsTokenInside , continuationStartPattern , "\" " );
102-
103115 optCommentsDisabled = Optional .of (false );
104116
105- return commentsToken ;
117+ return token ;
106118 }
107119
120+ @ Nonnull
108121 private InfoItemsPage <CommentsInfoItem > getInfoItemsPageForDisabledComments () {
109122 return new InfoItemsPage <>(Collections .emptyList (), null , Collections .emptyList ());
110123 }
111124
112- private Page getNextPage (final JsonObject ajaxJson ) throws ParsingException {
113- final JsonArray arr ;
125+ @ Nullable
126+ private Page getNextPage (@ Nonnull final JsonObject ajaxJson ) throws ExtractionException {
127+ final JsonArray jsonArray ;
128+ final JsonArray onResponseReceivedEndpoints = ajaxJson .getArray (
129+ "onResponseReceivedEndpoints" );
130+ final JsonObject endpoint = onResponseReceivedEndpoints .getObject (
131+ onResponseReceivedEndpoints .size () - 1 );
132+
114133 try {
115- arr = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.continuations" );
134+ jsonArray = endpoint .getObject ("reloadContinuationItemsCommand" , endpoint .getObject (
135+ "appendContinuationItemsAction" )).getArray ("continuationItems" );
116136 } catch (final Exception e ) {
117137 return null ;
118138 }
119- if (arr .isEmpty ()) {
139+ if (jsonArray .isEmpty ()) {
120140 return null ;
121141 }
142+
122143 final String continuation ;
123144 try {
124- continuation = JsonUtils .getString (arr .getObject (0 ), "nextContinuationData.continuation" );
145+ continuation = JsonUtils .getString (jsonArray .getObject (jsonArray .size () - 1 ),
146+ "continuationItemRenderer.continuationEndpoint.continuationCommand.token" );
125147 } catch (final Exception e ) {
126148 return null ;
127149 }
128150 return getNextPage (continuation );
129151 }
130152
153+ @ Nonnull
131154 private Page getNextPage (final String continuation ) throws ParsingException {
132- final Map <String , String > params = new HashMap <>();
133- params .put ("action_get_comments" , "1" );
134- params .put ("pbj" , "1" );
135- params .put ("ctoken" , continuation );
136- try {
137- return new Page ("https://m.youtube.com/watch_comment?" + getDataString (params ));
138- } catch (final UnsupportedEncodingException e ) {
139- throw new ParsingException ("Could not get next page url" , e );
140- }
155+ return new Page (getUrl (), continuation ); // URL is ignored tho
141156 }
142157
143158 @ Override
144- public InfoItemsPage <CommentsInfoItem > getPage (final Page page ) throws IOException , ExtractionException {
159+ public InfoItemsPage <CommentsInfoItem > getPage (final Page page )
160+ throws IOException , ExtractionException {
145161 if (optCommentsDisabled .orElse (false )) {
146162 return getInfoItemsPageForDisabledComments ();
147163 }
148- if (page == null || isNullOrEmpty (page .getUrl ())) {
149- throw new IllegalArgumentException ("Page doesn't contain an URL " );
164+ if (page == null || isNullOrEmpty (page .getId ())) {
165+ throw new IllegalArgumentException ("Page doesn't have the continuation. " );
150166 }
151167
152- final String ajaxResponse = makeAjaxRequest (page .getUrl ());
153- final JsonObject ajaxJson ;
154- try {
155- ajaxJson = JsonParser .array ().from (ajaxResponse ).getObject (1 );
156- } catch (final Exception e ) {
157- throw new ParsingException ("Could not parse json data for comments" , e );
158- }
159- final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector (getServiceId ());
168+ final Localization localization = getExtractorLocalization ();
169+ final byte [] body = JsonWriter .string (prepareDesktopJsonBuilder (localization ,
170+ getExtractorContentCountry ())
171+ .value ("continuation" , page .getId ())
172+ .done ())
173+ .getBytes (UTF_8 );
174+
175+ final JsonObject ajaxJson = getJsonPostResponse ("next" , body , localization );
176+
177+ final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector (
178+ getServiceId ());
160179 collectCommentsFrom (collector , ajaxJson );
161180 return new InfoItemsPage <>(collector , getNextPage (ajaxJson ));
162181 }
163182
164- private void collectCommentsFrom (final CommentsInfoItemsCollector collector , final JsonObject ajaxJson ) throws ParsingException {
183+ private void collectCommentsFrom (final CommentsInfoItemsCollector collector ,
184+ @ Nonnull final JsonObject ajaxJson ) throws ParsingException {
185+
186+ final JsonArray onResponseReceivedEndpoints = ajaxJson .getArray (
187+ "onResponseReceivedEndpoints" );
188+ final JsonObject commentsEndpoint = onResponseReceivedEndpoints .getObject (
189+ onResponseReceivedEndpoints .size () - 1 );
190+
191+ final String path ;
192+
193+ if (commentsEndpoint .has ("reloadContinuationItemsCommand" )) {
194+ path = "reloadContinuationItemsCommand.continuationItems" ;
195+ } else if (commentsEndpoint .has ("appendContinuationItemsAction" )) {
196+ path = "appendContinuationItemsAction.continuationItems" ;
197+ } else {
198+ // No comments
199+ return ;
200+ }
201+
165202 final JsonArray contents ;
166203 try {
167- contents = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.items" );
204+ contents = ( JsonArray ) JsonUtils .getArray (commentsEndpoint , path ). clone ( );
168205 } catch (final Exception e ) {
169- //no comments
206+ // No comments
170207 return ;
171208 }
209+
210+ final int index = contents .size () - 1 ;
211+ if (contents .getObject (index ).has ("continuationItemRenderer" )) {
212+ contents .remove (index );
213+ }
214+
172215 final List <Object > comments ;
173216 try {
174- comments = JsonUtils .getValues (contents , "commentThreadRenderer.comment.commentRenderer" );
217+ comments = JsonUtils .getValues (contents ,
218+ "commentThreadRenderer.comment.commentRenderer" );
175219 } catch (final Exception e ) {
176- throw new ParsingException ("unable to get parse youtube comments" , e );
220+ throw new ParsingException ("Unable to get parse youtube comments" , e );
177221 }
178222
179223 for (final Object c : comments ) {
180224 if (c instanceof JsonObject ) {
181- final CommentsInfoItemExtractor extractor =
182- new YoutubeCommentsInfoItemExtractor ( (JsonObject ) c , getUrl (), getTimeAgoParser ());
225+ final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor (
226+ (JsonObject ) c , getUrl (), getTimeAgoParser ());
183227 collector .commit (extractor );
184228 }
185229 }
186230 }
187231
188232 @ Override
189- public void onFetchPage (@ Nonnull final Downloader downloader ) throws IOException , ExtractionException {
190- final Map <String , List <String >> requestHeaders = new HashMap <>();
191- requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
192- final Response response = downloader .get (getUrl (), requestHeaders , getExtractorLocalization ());
193- responseBody = YoutubeParsingHelper .unescapeDocument (response .responseBody ());
194- ytClientVersion = findValue (responseBody , "INNERTUBE_CONTEXT_CLIENT_VERSION\" :\" " , "\" " );
195- ytClientName = Parser .matchGroup1 (YT_CLIENT_NAME_PATTERN , responseBody );
196- }
197-
233+ public void onFetchPage (@ Nonnull final Downloader downloader )
234+ throws IOException , ExtractionException {
235+ final Localization localization = getExtractorLocalization ();
236+ final byte [] body = JsonWriter .string (prepareDesktopJsonBuilder (localization ,
237+ getExtractorContentCountry ())
238+ .value ("videoId" , getId ())
239+ .done ())
240+ .getBytes (UTF_8 );
198241
199- private String makeAjaxRequest (final String siteUrl ) throws IOException , ReCaptchaException {
200- final Map <String , List <String >> requestHeaders = new HashMap <>();
201- requestHeaders .put ("Accept" , singletonList ("*/*" ));
202- requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
203- requestHeaders .put ("X-YouTube-Client-Version" , singletonList (ytClientVersion ));
204- requestHeaders .put ("X-YouTube-Client-Name" , singletonList (ytClientName ));
205- return getDownloader ().get (siteUrl , requestHeaders , getExtractorLocalization ()).responseBody ();
242+ nextResponse = getJsonPostResponse ("next" , body , localization );
206243 }
207244
208- private String getDataString (final Map <String , String > params ) throws UnsupportedEncodingException {
209- final StringBuilder result = new StringBuilder ();
210- boolean first = true ;
211- for (final Map .Entry <String , String > entry : params .entrySet ()) {
212- if (first ) {
213- first = false ;
214- } else {
215- result .append ("&" );
216- }
217- result .append (URLEncoder .encode (entry .getKey (), UTF_8 ));
218- result .append ("=" );
219- result .append (URLEncoder .encode (entry .getValue (), UTF_8 ));
220- }
221- return result .toString ();
222- }
223-
224- private String findValue (final String doc , final String start , final String end ) {
225- int beginIndex = doc .indexOf (start );
226- // Start string was not found
227- if (beginIndex == -1 ) {
228- return null ;
229- }
230- beginIndex = beginIndex + start .length ();
231- final int endIndex = doc .indexOf (end , beginIndex );
232- // End string was not found
233- if (endIndex == -1 ) {
234- return null ;
235- }
236- return doc .substring (beginIndex , endIndex );
237- }
238245
239246 @ Override
240- public boolean isCommentsDisabled () {
247+ public boolean isCommentsDisabled () throws ExtractionException {
241248 // Check if commentsDisabled has to be initialized
242249 if (!optCommentsDisabled .isPresent ()) {
243250 // Initialize commentsDisabled
0 commit comments