2323import java .io .IOException ;
2424import java .io .UnsupportedEncodingException ;
2525import java .net .URLEncoder ;
26+ import java .util .Collections ;
2627import java .util .HashMap ;
2728import java .util .List ;
2829import java .util .Map ;
30+ import java .util .Optional ;
2931import java .util .regex .Pattern ;
3032
3133import static java .util .Collections .singletonList ;
@@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
4143 private String ytClientName ;
4244 private String responseBody ;
4345
44- public YoutubeCommentsExtractor (StreamingService service , ListLinkHandler uiHandler ) {
46+ /**
47+ * Caching mechanism and holder of the commentsDisabled value.
48+ * <br/>
49+ * Initial value = empty -> unknown if comments are disabled or not<br/>
50+ * Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
51+ * -> value is set<br/>
52+ * If the method or another one that is depending on disabled comments
53+ * is now called again, the method execution can avoid unnecessary calls
54+ */
55+ private Optional <Boolean > optCommentsDisabled = Optional .empty ();
56+
57+ public YoutubeCommentsExtractor (
58+ final StreamingService service ,
59+ final ListLinkHandler uiHandler ) {
4560 super (service , uiHandler );
4661 }
4762
4863 @ Override
49- public InfoItemsPage <CommentsInfoItem > getInitialPage () throws IOException , ExtractionException {
64+ public InfoItemsPage <CommentsInfoItem > getInitialPage ()
65+ throws IOException , ExtractionException {
66+
67+ // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
68+ if (optCommentsDisabled .orElse (false )) {
69+ return getInfoItemsPageForDisabledComments ();
70+ }
71+
72+ // Get the token
73+ final String commentsToken = findInitialCommentsToken ();
74+ // Check if the comments have been disabled
75+ if (optCommentsDisabled .get ()) {
76+ return getInfoItemsPageForDisabledComments ();
77+ }
78+
79+ return getPage (getNextPage (commentsToken ));
80+ }
81+
82+ /**
83+ * Finds the initial comments token and initializes commentsDisabled.
84+ * @return the continuation token or null if none was found
85+ */
86+ private String findInitialCommentsToken () {
87+ final String continuationStartPattern = "continuation\" :\" " ;
88+
5089 String commentsTokenInside = findValue (responseBody , "sectionListRenderer" , "}" );
51- if (!commentsTokenInside .contains ("continuation \" : \" " )) {
90+ if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
5291 commentsTokenInside = findValue (responseBody , "commentSectionRenderer" , "}" );
5392 }
54- final String commentsToken = findValue (commentsTokenInside , "continuation\" :\" " , "\" " );
55- return getPage (getNextPage (commentsToken ));
93+
94+ // If no continuation token is found the comments are disabled
95+ if (commentsTokenInside == null || !commentsTokenInside .contains (continuationStartPattern )) {
96+ optCommentsDisabled = Optional .of (true );
97+ return null ;
98+ }
99+
100+ // If a continuation token is found there are >= 0 comments
101+ final String commentsToken = findValue (commentsTokenInside , continuationStartPattern , "\" " );
102+
103+ optCommentsDisabled = Optional .of (false );
104+
105+ return commentsToken ;
106+ }
107+
108+ private InfoItemsPage <CommentsInfoItem > getInfoItemsPageForDisabledComments () {
109+ return new InfoItemsPage <>(Collections .emptyList (), null , Collections .emptyList ());
56110 }
57111
58- private Page getNextPage (JsonObject ajaxJson ) throws ParsingException {
112+ private Page getNextPage (final JsonObject ajaxJson ) throws ParsingException {
59113 final JsonArray arr ;
60114 try {
61115 arr = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.continuations" );
62- } catch (Exception e ) {
116+ } catch (final Exception e ) {
63117 return null ;
64118 }
65119 if (arr .isEmpty ()) {
66120 return null ;
67121 }
68- String continuation ;
122+ final String continuation ;
69123 try {
70124 continuation = JsonUtils .getString (arr .getObject (0 ), "nextContinuationData.continuation" );
71- } catch (Exception e ) {
125+ } catch (final Exception e ) {
72126 return null ;
73127 }
74128 return getNextPage (continuation );
75129 }
76130
77- private Page getNextPage (String continuation ) throws ParsingException {
78- Map <String , String > params = new HashMap <>();
131+ private Page getNextPage (final String continuation ) throws ParsingException {
132+ final Map <String , String > params = new HashMap <>();
79133 params .put ("action_get_comments" , "1" );
80134 params .put ("pbj" , "1" );
81135 params .put ("ctoken" , continuation );
82136 try {
83137 return new Page ("https://m.youtube.com/watch_comment?" + getDataString (params ));
84- } catch (UnsupportedEncodingException e ) {
138+ } catch (final UnsupportedEncodingException e ) {
85139 throw new ParsingException ("Could not get next page url" , e );
86140 }
87141 }
88142
89143 @ Override
90144 public InfoItemsPage <CommentsInfoItem > getPage (final Page page ) throws IOException , ExtractionException {
145+ if (optCommentsDisabled .orElse (false )) {
146+ return getInfoItemsPageForDisabledComments ();
147+ }
91148 if (page == null || isNullOrEmpty (page .getUrl ())) {
92149 throw new IllegalArgumentException ("Page doesn't contain an URL" );
93150 }
@@ -96,39 +153,40 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOExcepti
96153 final JsonObject ajaxJson ;
97154 try {
98155 ajaxJson = JsonParser .array ().from (ajaxResponse ).getObject (1 );
99- } catch (Exception e ) {
156+ } catch (final Exception e ) {
100157 throw new ParsingException ("Could not parse json data for comments" , e );
101158 }
102159 final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector (getServiceId ());
103160 collectCommentsFrom (collector , ajaxJson );
104161 return new InfoItemsPage <>(collector , getNextPage (ajaxJson ));
105162 }
106163
107- private void collectCommentsFrom (CommentsInfoItemsCollector collector , JsonObject ajaxJson ) throws ParsingException {
108- JsonArray contents ;
164+ private void collectCommentsFrom (final CommentsInfoItemsCollector collector , final JsonObject ajaxJson ) throws ParsingException {
165+ final JsonArray contents ;
109166 try {
110167 contents = JsonUtils .getArray (ajaxJson , "response.continuationContents.commentSectionContinuation.items" );
111- } catch (Exception e ) {
168+ } catch (final Exception e ) {
112169 //no comments
113170 return ;
114171 }
115- List <Object > comments ;
172+ final List <Object > comments ;
116173 try {
117174 comments = JsonUtils .getValues (contents , "commentThreadRenderer.comment.commentRenderer" );
118- } catch (Exception e ) {
175+ } catch (final Exception e ) {
119176 throw new ParsingException ("unable to get parse youtube comments" , e );
120177 }
121178
122- for (Object c : comments ) {
179+ for (final Object c : comments ) {
123180 if (c instanceof JsonObject ) {
124- CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor ((JsonObject ) c , getUrl (), getTimeAgoParser ());
181+ final CommentsInfoItemExtractor extractor =
182+ new YoutubeCommentsInfoItemExtractor ((JsonObject ) c , getUrl (), getTimeAgoParser ());
125183 collector .commit (extractor );
126184 }
127185 }
128186 }
129187
130188 @ Override
131- public void onFetchPage (@ Nonnull Downloader downloader ) throws IOException , ExtractionException {
189+ public void onFetchPage (@ Nonnull final Downloader downloader ) throws IOException , ExtractionException {
132190 final Map <String , List <String >> requestHeaders = new HashMap <>();
133191 requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
134192 final Response response = downloader .get (getUrl (), requestHeaders , getExtractorLocalization ());
@@ -138,23 +196,24 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
138196 }
139197
140198
141- private String makeAjaxRequest (String siteUrl ) throws IOException , ReCaptchaException {
142- Map <String , List <String >> requestHeaders = new HashMap <>();
199+ private String makeAjaxRequest (final String siteUrl ) throws IOException , ReCaptchaException {
200+ final Map <String , List <String >> requestHeaders = new HashMap <>();
143201 requestHeaders .put ("Accept" , singletonList ("*/*" ));
144202 requestHeaders .put ("User-Agent" , singletonList (USER_AGENT ));
145203 requestHeaders .put ("X-YouTube-Client-Version" , singletonList (ytClientVersion ));
146204 requestHeaders .put ("X-YouTube-Client-Name" , singletonList (ytClientName ));
147205 return getDownloader ().get (siteUrl , requestHeaders , getExtractorLocalization ()).responseBody ();
148206 }
149207
150- private String getDataString (Map <String , String > params ) throws UnsupportedEncodingException {
151- StringBuilder result = new StringBuilder ();
208+ private String getDataString (final Map <String , String > params ) throws UnsupportedEncodingException {
209+ final StringBuilder result = new StringBuilder ();
152210 boolean first = true ;
153- for (Map .Entry <String , String > entry : params .entrySet ()) {
154- if (first )
211+ for (final Map .Entry <String , String > entry : params .entrySet ()) {
212+ if (first ) {
155213 first = false ;
156- else
214+ } else {
157215 result .append ("&" );
216+ }
158217 result .append (URLEncoder .encode (entry .getKey (), UTF_8 ));
159218 result .append ("=" );
160219 result .append (URLEncoder .encode (entry .getValue (), UTF_8 ));
@@ -163,8 +222,28 @@ private String getDataString(Map<String, String> params) throws UnsupportedEncod
163222 }
164223
165224 private String findValue (final String doc , final String start , final String end ) {
166- final int beginIndex = doc .indexOf (start ) + start .length ();
225+ int beginIndex = doc .indexOf (start );
226+ // Start string was not found
227+ if (beginIndex == -1 ) {
228+ return null ;
229+ }
230+ beginIndex = beginIndex + start .length ();
167231 final int endIndex = doc .indexOf (end , beginIndex );
232+ // End string was not found
233+ if (endIndex == -1 ) {
234+ return null ;
235+ }
168236 return doc .substring (beginIndex , endIndex );
169237 }
238+
239+ @ Override
240+ public boolean isCommentsDisabled () {
241+ // Check if commentsDisabled has to be initialized
242+ if (!optCommentsDisabled .isPresent ()) {
243+ // Initialize commentsDisabled
244+ this .findInitialCommentsToken ();
245+ }
246+
247+ return optCommentsDisabled .get ();
248+ }
170249}
0 commit comments