Skip to content

Commit b45bb41

Browse files
authored
Merge pull request #652 from litetex/fixYTCommentsAndAddDisabledComments
Fix yt comments and add disabled comments functionallity
2 parents 6fd93cd + fdebf3c commit b45bb41

3 files changed

Lines changed: 161 additions & 47 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,16 @@
99

1010
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
1111

12-
public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
12+
public CommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) {
1313
super(service, uiHandler);
14-
// TODO Auto-generated constructor stub
14+
}
15+
16+
/**
17+
* @apiNote Warning: This method is experimental and may get removed in a future release.
18+
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
19+
*/
20+
public boolean isCommentsDisabled() {
21+
return false;
1522
}
1623

1724
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,45 +13,56 @@
1313

1414
public class CommentsInfo extends ListInfo<CommentsInfoItem> {
1515

16-
private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) {
16+
private CommentsInfo(
17+
final int serviceId,
18+
final ListLinkHandler listUrlIdHandler,
19+
final String name) {
1720
super(serviceId, listUrlIdHandler, name);
1821
}
1922

20-
public static CommentsInfo getInfo(String url) throws IOException, ExtractionException {
23+
public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException {
2124
return getInfo(NewPipe.getServiceByUrl(url), url);
2225
}
2326

24-
public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException {
27+
public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url)
28+
throws ExtractionException, IOException {
2529
return getInfo(serviceByUrl.getCommentsExtractor(url));
2630
}
2731

28-
public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException {
32+
public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
33+
throws IOException, ExtractionException {
2934
// for services which do not have a comments extractor
30-
if (null == commentsExtractor) {
35+
if (commentsExtractor == null) {
3136
return null;
3237
}
3338

3439
commentsExtractor.fetchPage();
35-
String name = commentsExtractor.getName();
36-
int serviceId = commentsExtractor.getServiceId();
37-
ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
38-
CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
40+
41+
final String name = commentsExtractor.getName();
42+
final int serviceId = commentsExtractor.getServiceId();
43+
final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
44+
45+
final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
3946
commentsInfo.setCommentsExtractor(commentsExtractor);
40-
InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo,
41-
commentsExtractor);
47+
final InfoItemsPage<CommentsInfoItem> initialCommentsPage =
48+
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
49+
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
4250
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
4351
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
4452

4553
return commentsInfo;
4654
}
4755

48-
public static InfoItemsPage<CommentsInfoItem> getMoreItems(CommentsInfo commentsInfo, Page page)
49-
throws ExtractionException, IOException {
56+
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
57+
final CommentsInfo commentsInfo,
58+
final Page page) throws ExtractionException, IOException {
5059
return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
5160
}
5261

53-
public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService service, CommentsInfo commentsInfo,
54-
Page page) throws IOException, ExtractionException {
62+
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
63+
final StreamingService service,
64+
final CommentsInfo commentsInfo,
65+
final Page page) throws IOException, ExtractionException {
5566
if (null == commentsInfo.getCommentsExtractor()) {
5667
commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
5768
commentsInfo.getCommentsExtractor().fetchPage();
@@ -60,13 +71,30 @@ public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService serv
6071
}
6172

6273
private transient CommentsExtractor commentsExtractor;
74+
private boolean commentsDisabled = false;
6375

6476
public CommentsExtractor getCommentsExtractor() {
6577
return commentsExtractor;
6678
}
6779

68-
public void setCommentsExtractor(CommentsExtractor commentsExtractor) {
80+
public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
6981
this.commentsExtractor = commentsExtractor;
7082
}
7183

84+
/**
85+
* @apiNote Warning: This method is experimental and may get removed in a future release.
86+
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
87+
* @see CommentsExtractor#isCommentsDisabled()
88+
*/
89+
public boolean isCommentsDisabled() {
90+
return commentsDisabled;
91+
}
92+
93+
/**
94+
* @apiNote Warning: This method is experimental and may get removed in a future release.
95+
* @param commentsDisabled <code>true</code> if the comments are disabled otherwise <code>false</code>
96+
*/
97+
public void setCommentsDisabled(final boolean commentsDisabled) {
98+
this.commentsDisabled = commentsDisabled;
99+
}
72100
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 108 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
import java.io.IOException;
2424
import java.io.UnsupportedEncodingException;
2525
import java.net.URLEncoder;
26+
import java.util.Collections;
2627
import java.util.HashMap;
2728
import java.util.List;
2829
import java.util.Map;
30+
import java.util.Optional;
2931
import java.util.regex.Pattern;
3032

3133
import static java.util.Collections.singletonList;
@@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
4143
private String ytClientName;
4244
private String responseBody;
4345

44-
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
46+
/**
47+
* Caching mechanism and holder of the commentsDisabled value.
48+
* <br/>
49+
* Initial value = empty -> unknown if comments are disabled or not<br/>
50+
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
51+
* -> value is set<br/>
52+
* If the method or another one that is depending on disabled comments
53+
* is now called again, the method execution can avoid unnecessary calls
54+
*/
55+
private Optional<Boolean> optCommentsDisabled = Optional.empty();
56+
57+
public YoutubeCommentsExtractor(
58+
final StreamingService service,
59+
final ListLinkHandler uiHandler) {
4560
super(service, uiHandler);
4661
}
4762

4863
@Override
49-
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
64+
public InfoItemsPage<CommentsInfoItem> getInitialPage()
65+
throws IOException, ExtractionException {
66+
67+
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
68+
if (optCommentsDisabled.orElse(false)) {
69+
return getInfoItemsPageForDisabledComments();
70+
}
71+
72+
// Get the token
73+
final String commentsToken = findInitialCommentsToken();
74+
// Check if the comments have been disabled
75+
if (optCommentsDisabled.get()) {
76+
return getInfoItemsPageForDisabledComments();
77+
}
78+
79+
return getPage(getNextPage(commentsToken));
80+
}
81+
82+
/**
83+
* Finds the initial comments token and initializes commentsDisabled.
84+
* @return the continuation token or null if none was found
85+
*/
86+
private String findInitialCommentsToken() {
87+
final String continuationStartPattern = "continuation\":\"";
88+
5089
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
51-
if (!commentsTokenInside.contains("continuation\":\"")) {
90+
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
5291
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
5392
}
54-
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
55-
return getPage(getNextPage(commentsToken));
93+
94+
// If no continuation token is found the comments are disabled
95+
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
96+
optCommentsDisabled = Optional.of(true);
97+
return null;
98+
}
99+
100+
// If a continuation token is found there are >= 0 comments
101+
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
102+
103+
optCommentsDisabled = Optional.of(false);
104+
105+
return commentsToken;
106+
}
107+
108+
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
109+
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
56110
}
57111

58-
private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
112+
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
59113
final JsonArray arr;
60114
try {
61115
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
62-
} catch (Exception e) {
116+
} catch (final Exception e) {
63117
return null;
64118
}
65119
if (arr.isEmpty()) {
66120
return null;
67121
}
68-
String continuation;
122+
final String continuation;
69123
try {
70124
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
71-
} catch (Exception e) {
125+
} catch (final Exception e) {
72126
return null;
73127
}
74128
return getNextPage(continuation);
75129
}
76130

77-
private Page getNextPage(String continuation) throws ParsingException {
78-
Map<String, String> params = new HashMap<>();
131+
private Page getNextPage(final String continuation) throws ParsingException {
132+
final Map<String, String> params = new HashMap<>();
79133
params.put("action_get_comments", "1");
80134
params.put("pbj", "1");
81135
params.put("ctoken", continuation);
82136
try {
83137
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
84-
} catch (UnsupportedEncodingException e) {
138+
} catch (final UnsupportedEncodingException e) {
85139
throw new ParsingException("Could not get next page url", e);
86140
}
87141
}
88142

89143
@Override
90144
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
145+
if (optCommentsDisabled.orElse(false)) {
146+
return getInfoItemsPageForDisabledComments();
147+
}
91148
if (page == null || isNullOrEmpty(page.getUrl())) {
92149
throw new IllegalArgumentException("Page doesn't contain an URL");
93150
}
@@ -96,39 +153,40 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOExcepti
96153
final JsonObject ajaxJson;
97154
try {
98155
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
99-
} catch (Exception e) {
156+
} catch (final Exception e) {
100157
throw new ParsingException("Could not parse json data for comments", e);
101158
}
102159
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
103160
collectCommentsFrom(collector, ajaxJson);
104161
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
105162
}
106163

107-
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException {
108-
JsonArray contents;
164+
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
165+
final JsonArray contents;
109166
try {
110167
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
111-
} catch (Exception e) {
168+
} catch (final Exception e) {
112169
//no comments
113170
return;
114171
}
115-
List<Object> comments;
172+
final List<Object> comments;
116173
try {
117174
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
118-
} catch (Exception e) {
175+
} catch (final Exception e) {
119176
throw new ParsingException("unable to get parse youtube comments", e);
120177
}
121178

122-
for (Object c : comments) {
179+
for (final Object c : comments) {
123180
if (c instanceof JsonObject) {
124-
CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
181+
final CommentsInfoItemExtractor extractor =
182+
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
125183
collector.commit(extractor);
126184
}
127185
}
128186
}
129187

130188
@Override
131-
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
189+
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
132190
final Map<String, List<String>> requestHeaders = new HashMap<>();
133191
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
134192
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
@@ -138,23 +196,24 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
138196
}
139197

140198

141-
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
142-
Map<String, List<String>> requestHeaders = new HashMap<>();
199+
private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
200+
final Map<String, List<String>> requestHeaders = new HashMap<>();
143201
requestHeaders.put("Accept", singletonList("*/*"));
144202
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
145203
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
146204
requestHeaders.put("X-YouTube-Client-Name", singletonList(ytClientName));
147205
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
148206
}
149207

150-
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
151-
StringBuilder result = new StringBuilder();
208+
private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
209+
final StringBuilder result = new StringBuilder();
152210
boolean first = true;
153-
for (Map.Entry<String, String> entry : params.entrySet()) {
154-
if (first)
211+
for (final Map.Entry<String, String> entry : params.entrySet()) {
212+
if (first) {
155213
first = false;
156-
else
214+
} else {
157215
result.append("&");
216+
}
158217
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
159218
result.append("=");
160219
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
@@ -163,8 +222,28 @@ private String getDataString(Map<String, String> params) throws UnsupportedEncod
163222
}
164223

165224
private String findValue(final String doc, final String start, final String end) {
166-
final int beginIndex = doc.indexOf(start) + start.length();
225+
int beginIndex = doc.indexOf(start);
226+
// Start string was not found
227+
if (beginIndex == -1) {
228+
return null;
229+
}
230+
beginIndex = beginIndex + start.length();
167231
final int endIndex = doc.indexOf(end, beginIndex);
232+
// End string was not found
233+
if (endIndex == -1) {
234+
return null;
235+
}
168236
return doc.substring(beginIndex, endIndex);
169237
}
238+
239+
@Override
240+
public boolean isCommentsDisabled() {
241+
// Check if commentsDisabled has to be initialized
242+
if (!optCommentsDisabled.isPresent()) {
243+
// Initialize commentsDisabled
244+
this.findInitialCommentsToken();
245+
}
246+
247+
return optCommentsDisabled.get();
248+
}
170249
}

0 commit comments

Comments
 (0)