2323import java .io .IOException ;
2424import java .io .UnsupportedEncodingException ;
2525import java .net .URLEncoder ;
26+ import java .util .Collections ;
2627import java .util .HashMap ;
2728import java .util .List ;
2829import java .util .Map ;
30+ import java .util .Optional ;
2931import java .util .regex .Pattern ;
3032
3133import static java .util .Collections .singletonList ;
@@ -41,53 +43,99 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
4143 private String ytClientName ;
4244 private String responseBody ;
4345
44- public YoutubeCommentsExtractor (StreamingService service , ListLinkHandler uiHandler ) {
46+ private Optional <Boolean > optCommentsDisabled = Optional .empty ();
47+
48+ public YoutubeCommentsExtractor (
49+ final StreamingService service ,
50+ final ListLinkHandler uiHandler ) {
4551 super (service , uiHandler );
4652 }
4753
4854 @ Override
49- public InfoItemsPage <CommentsInfoItem > getInitialPage () throws IOException , ExtractionException {
55+ public InfoItemsPage <CommentsInfoItem > getInitialPage ()
56+ throws IOException , ExtractionException {
57+
58+ // Check if the the findInitialCommentsToken was already called and initialized
59+ if (optCommentsDisabled .orElse (false )) {
60+ return getInfoItemsPageForDisabledComments ();
61+ }
62+
63+ // Get the token
64+ final String commentsToken = findInitialCommentsToken ();
65+ // Check if the comments have been disabled
66+ if (optCommentsDisabled .get ()) {
67+ return getInfoItemsPageForDisabledComments ();
68+ }
69+
70+ return getPage (getNextPage (commentsToken ));
71+ }
72+
73+ /**
74+ * Finds the initial comments token and initializes commentsDisabled.
75+ * @return
76+ */
77+ private String findInitialCommentsToken () {
78+ final String continuationStartPattern = "continuation\" :\" " ;
79+
5080 String commentsTokenInside = findValue (responseBody , "sectionListRenderer" , "}" );
51- if (!commentsTokenInside .contains ("continuation \" : \" " )) {
81+ if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
5282 commentsTokenInside = findValue (responseBody , "commentSectionRenderer" , "}" );
5383 }
54- final String commentsToken = findValue (commentsTokenInside , "continuation\" :\" " , "\" " );
55- return getPage (getNextPage (commentsToken ));
84+
85+ // If no continuation token is found the comments are disabled
86+ if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
87+ optCommentsDisabled = Optional .of (true );
88+ return null ;
89+ }
90+
91+ // If a continuation token is found there are >= 0 comments
92+ final String commentsToken = findValue (commentsTokenInside , continuationStartPattern , "\" " );
93+
94+ optCommentsDisabled = Optional .of (false );
95+
96+ return commentsToken ;
97+ }
98+
99+ private InfoItemsPage <CommentsInfoItem > getInfoItemsPageForDisabledComments () {
100+ return new InfoItemsPage <>(Collections .emptyList (), null , Collections .emptyList ());
56101 }
57102
58- private Page getNextPage (JsonObject ajaxJson ) throws ParsingException {
103+ private Page getNextPage (final JsonObject ajaxJson ) throws ParsingException {
59104 final JsonArray arr ;
60105 try {
61106 arr = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.continuations" );
62- } catch (Exception e ) {
107+ } catch (final Exception e ) {
63108 return null ;
64109 }
65110 if (arr .isEmpty ()) {
66111 return null ;
67112 }
68- String continuation ;
113+ final String continuation ;
69114 try {
70115 continuation = JsonUtils .getString (arr .getObject (0 ), "nextContinuationData.continuation" );
71- } catch (Exception e ) {
116+ } catch (final Exception e ) {
72117 return null ;
73118 }
74119 return getNextPage (continuation );
75120 }
76121
77- private Page getNextPage (String continuation ) throws ParsingException {
78- Map <String , String > params = new HashMap <>();
122+ private Page getNextPage (final String continuation ) throws ParsingException {
123+ final Map <String , String > params = new HashMap <>();
79124 params .put ("action_get_comments" , "1" );
80125 params .put ("pbj" , "1" );
81126 params .put ("ctoken" , continuation );
82127 try {
83128 return new Page ("https://m.youtube.com/watch_comment?" + getDataString (params ));
84- } catch (UnsupportedEncodingException e ) {
129+ } catch (final UnsupportedEncodingException e ) {
85130 throw new ParsingException ("Could not get next page url" , e );
86131 }
87132 }
88133
89134 @ Override
90135 public InfoItemsPage <CommentsInfoItem > getPage (final Page page ) throws IOException , ExtractionException {
136+ if (optCommentsDisabled .orElse (false )) {
137+ return getInfoItemsPageForDisabledComments ();
138+ }
91139 if (page == null || isNullOrEmpty (page .getUrl ())) {
92140 throw new IllegalArgumentException ("Page doesn't contain an URL" );
93141 }
@@ -96,39 +144,40 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOExcepti
96144 final JsonObject ajaxJson ;
97145 try {
98146 ajaxJson = JsonParser .array ().from (ajaxResponse ).getObject (1 );
99- } catch (Exception e ) {
147+ } catch (final Exception e ) {
100148 throw new ParsingException ("Could not parse json data for comments" , e );
101149 }
102150 final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector (getServiceId ());
103151 collectCommentsFrom (collector , ajaxJson );
104152 return new InfoItemsPage <>(collector , getNextPage (ajaxJson ));
105153 }
106154
107- private void collectCommentsFrom (CommentsInfoItemsCollector collector , JsonObject ajaxJson ) throws ParsingException {
108- JsonArray contents ;
155+ private void collectCommentsFrom (final CommentsInfoItemsCollector collector , final JsonObject ajaxJson ) throws ParsingException {
156+ final JsonArray contents ;
109157 try {
110158 contents = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.items" );
111- } catch (Exception e ) {
159+ } catch (final Exception e ) {
112160 //no comments
113161 return ;
114162 }
115- List <Object > comments ;
163+ final List <Object > comments ;
116164 try {
117165 comments = JsonUtils .getValues (contents , "commentThreadRenderer.comment.commentRenderer" );
118- } catch (Exception e ) {
166+ } catch (final Exception e ) {
119167 throw new ParsingException ("unable to get parse youtube comments" , e );
120168 }
121169
122- for (Object c : comments ) {
170+ for (final Object c : comments ) {
123171 if (c instanceof JsonObject ) {
124- CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor ((JsonObject ) c , getUrl (), getTimeAgoParser ());
172+ final CommentsInfoItemExtractor extractor =
173+ new YoutubeCommentsInfoItemExtractor ((JsonObject ) c , getUrl (), getTimeAgoParser ());
125174 collector .commit (extractor );
126175 }
127176 }
128177 }
129178
130179 @ Override
131- public void onFetchPage (@ Nonnull Downloader downloader ) throws IOException , ExtractionException {
180+ public void onFetchPage (@ Nonnull final Downloader downloader ) throws IOException , ExtractionException {
132181 final Map <String , List <String >> requestHeaders = new HashMap <>();
133182 requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
134183 final Response response = downloader .get (getUrl (), requestHeaders , getExtractorLocalization ());
@@ -138,23 +187,24 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
138187 }
139188
140189
141- private String makeAjaxRequest (String siteUrl ) throws IOException , ReCaptchaException {
142- Map <String , List <String >> requestHeaders = new HashMap <>();
190+ private String makeAjaxRequest (final String siteUrl ) throws IOException , ReCaptchaException {
191+ final Map <String , List <String >> requestHeaders = new HashMap <>();
143192 requestHeaders .put ("Accept" , singletonList ("*/*" ));
144193 requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
145194 requestHeaders .put ("X-YouTube-Client-Version" , singletonList (ytClientVersion ));
146195 requestHeaders .put ("X-YouTube-Client-Name" , singletonList (ytClientName ));
147196 return getDownloader ().get (siteUrl , requestHeaders , getExtractorLocalization ()).responseBody ();
148197 }
149198
150- private String getDataString (Map <String , String > params ) throws UnsupportedEncodingException {
151- StringBuilder result = new StringBuilder ();
199+ private String getDataString (final Map <String , String > params ) throws UnsupportedEncodingException {
200+ final StringBuilder result = new StringBuilder ();
152201 boolean first = true ;
153- for (Map .Entry <String , String > entry : params .entrySet ()) {
154- if (first )
202+ for (final Map .Entry <String , String > entry : params .entrySet ()) {
203+ if (first ) {
155204 first = false ;
156- else
205+ } else {
157206 result .append ("&" );
207+ }
158208 result .append (URLEncoder .encode (entry .getKey (), UTF_8 ));
159209 result .append ("=" );
160210 result .append (URLEncoder .encode (entry .getValue (), UTF_8 ));
@@ -163,8 +213,28 @@ private String getDataString(Map<String, String> params) throws UnsupportedEncod
163213 }
164214
165215 private String findValue (final String doc , final String start , final String end ) {
166- final int beginIndex = doc .indexOf (start ) + start .length ();
216+ int beginIndex = doc .indexOf (start );
217+ // Start string was not found
218+ if (beginIndex == -1 ) {
219+ return null ;
220+ }
221+ beginIndex = beginIndex + start .length ();
167222 final int endIndex = doc .indexOf (end , beginIndex );
223+ // End string was not found
224+ if (endIndex == -1 ) {
225+ return null ;
226+ }
168227 return doc .substring (beginIndex , endIndex );
169228 }
229+
230+ @ Override
231+ public boolean isCommentsDisabled () {
232+ // Check if commentsDisabled has to be initialized
233+ if (!optCommentsDisabled .isPresent ()) {
234+ // Initialize commentsDisabled
235+ this .findInitialCommentsToken ();
236+ }
237+
238+ return optCommentsDisabled .get ();
239+ }
170240}
0 commit comments