Skip to content

Commit c1040bc

Browse files
authored
Merge pull request #794 from FireMasterK/comments-count
[YouTube] Add support to extract total comment count
2 parents 6ccc43e + 22a47da commit c1040bc

4 files changed

Lines changed: 124 additions & 57 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ public boolean isCommentsDisabled() throws ExtractionException {
2222
return false;
2323
}
2424

25+
/**
26+
* @return the total number of comments
27+
*/
28+
public int getCommentsCount() throws ExtractionException {
29+
return -1;
30+
}
31+
2532
@Nonnull
2633
@Override
2734
public String getName() throws ParsingException {

extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
4848
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
4949
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
5050
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
51+
try {
52+
commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount());
53+
} catch (final Exception e) {
54+
commentsInfo.addError(e);
55+
}
5156
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
5257

5358
return commentsInfo;
@@ -76,6 +81,7 @@ public static InfoItemsPage<CommentsInfoItem> getMoreItems(
7681

7782
private transient CommentsExtractor commentsExtractor;
7883
private boolean commentsDisabled = false;
84+
private int commentsCount;
7985

8086
public CommentsExtractor getCommentsExtractor() {
8187
return commentsExtractor;
@@ -86,7 +92,6 @@ public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
8692
}
8793

8894
/**
89-
* @apiNote Warning: This method is experimental and may get removed in a future release.
9095
* @return {@code true} if the comments are disabled otherwise {@code false} (default)
9196
* @see CommentsExtractor#isCommentsDisabled()
9297
*/
@@ -95,10 +100,27 @@ public boolean isCommentsDisabled() {
95100
}
96101

97102
/**
98-
* @apiNote Warning: This method is experimental and may get removed in a future release.
99103
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
100104
*/
101105
public void setCommentsDisabled(final boolean commentsDisabled) {
102106
this.commentsDisabled = commentsDisabled;
103107
}
108+
109+
/**
110+
* Returns the total number of comments.
111+
*
112+
* @return the total number of comments
113+
*/
114+
public int getCommentsCount() {
115+
return commentsCount;
116+
}
117+
118+
/**
119+
* Sets the total number of comments.
120+
*
121+
* @param commentsCount the commentsCount to set.
122+
*/
123+
public void setCommentsCount(final int commentsCount) {
124+
this.commentsCount = commentsCount;
125+
}
104126
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 87 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,8 @@
11
package org.schabi.newpipe.extractor.services.youtube.extractors;
22

3-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
4-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
5-
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
6-
7-
import java.io.IOException;
8-
import java.nio.charset.StandardCharsets;
9-
import java.util.Collections;
10-
import java.util.List;
11-
import java.util.Optional;
12-
13-
import javax.annotation.Nonnull;
14-
import javax.annotation.Nullable;
15-
3+
import com.grack.nanojson.JsonArray;
4+
import com.grack.nanojson.JsonObject;
5+
import com.grack.nanojson.JsonWriter;
166
import org.schabi.newpipe.extractor.Page;
177
import org.schabi.newpipe.extractor.StreamingService;
188
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@@ -24,26 +14,31 @@
2414
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
2515
import org.schabi.newpipe.extractor.localization.Localization;
2616
import org.schabi.newpipe.extractor.utils.JsonUtils;
17+
import org.schabi.newpipe.extractor.utils.Utils;
2718

28-
import com.grack.nanojson.JsonArray;
29-
import com.grack.nanojson.JsonObject;
30-
import com.grack.nanojson.JsonWriter;
19+
import javax.annotation.Nonnull;
20+
import javax.annotation.Nullable;
21+
import java.io.IOException;
22+
import java.nio.charset.StandardCharsets;
23+
import java.util.Collections;
24+
import java.util.List;
25+
26+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
27+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
28+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
29+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
3130

3231
public class YoutubeCommentsExtractor extends CommentsExtractor {
3332

34-
private JsonObject nextResponse;
33+
/**
34+
* Whether comments are disabled on video.
35+
*/
36+
private boolean commentsDisabled;
3537

3638
/**
37-
* Caching mechanism and holder of the commentsDisabled value.
38-
* <br/>
39-
* Initial value = empty -> unknown if comments are disabled or not<br/>
40-
* Some method calls {@link #findInitialCommentsToken()}
41-
* -> value is set<br/>
42-
* If the method or another one that is depending on disabled comments
43-
* is now called again, the method execution can avoid unnecessary calls
39+
* The second ajax <b>/next</b> response.
4440
*/
45-
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
46-
private Optional<Boolean> optCommentsDisabled = Optional.empty();
41+
private JsonObject ajaxJson;
4742

4843
public YoutubeCommentsExtractor(
4944
final StreamingService service,
@@ -56,32 +51,25 @@ public YoutubeCommentsExtractor(
5651
public InfoItemsPage<CommentsInfoItem> getInitialPage()
5752
throws IOException, ExtractionException {
5853

59-
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
60-
if (optCommentsDisabled.orElse(false)) {
61-
return getInfoItemsPageForDisabledComments();
62-
}
63-
64-
// Get the token
65-
final String commentsToken = findInitialCommentsToken();
66-
// Check if the comments have been disabled
67-
if (optCommentsDisabled.get()) {
54+
if (commentsDisabled) {
6855
return getInfoItemsPageForDisabledComments();
6956
}
7057

71-
return getPage(getNextPage(commentsToken));
58+
return extractComments(ajaxJson);
7259
}
7360

7461
/**
7562
* Finds the initial comments token and initializes commentsDisabled.
7663
* <br/>
77-
* Also sets {@link #optCommentsDisabled}.
64+
* Also sets {@link #commentsDisabled}.
7865
*
7966
* @return the continuation token or null if none was found
8067
*/
8168
@Nullable
82-
private String findInitialCommentsToken() throws ExtractionException {
69+
private String findInitialCommentsToken(final JsonObject nextResponse)
70+
throws ExtractionException {
8371
final String token = JsonUtils.getArray(nextResponse,
84-
"contents.twoColumnWatchNextResults.results.results.contents")
72+
"contents.twoColumnWatchNextResults.results.results.contents")
8573
.stream()
8674
// Only use JsonObjects
8775
.filter(JsonObject.class::isInstance)
@@ -112,7 +100,7 @@ private String findInitialCommentsToken() throws ExtractionException {
112100
.orElse(null);
113101

114102
// The comments are disabled if we couldn't get a token
115-
optCommentsDisabled = Optional.of(token == null);
103+
commentsDisabled = token == null;
116104

117105
return token;
118106
}
@@ -123,9 +111,9 @@ private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
123111
}
124112

125113
@Nullable
126-
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
114+
private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
127115
final JsonArray onResponseReceivedEndpoints =
128-
ajaxJson.getArray("onResponseReceivedEndpoints");
116+
jsonObject.getArray("onResponseReceivedEndpoints");
129117

130118
// Prevent ArrayIndexOutOfBoundsException
131119
if (onResponseReceivedEndpoints.isEmpty()) {
@@ -173,30 +161,39 @@ private Page getNextPage(final String continuation) throws ParsingException {
173161
@Override
174162
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
175163
throws IOException, ExtractionException {
176-
if (optCommentsDisabled.orElse(false)) {
164+
165+
if (commentsDisabled) {
177166
return getInfoItemsPageForDisabledComments();
178167
}
168+
179169
if (page == null || isNullOrEmpty(page.getId())) {
180170
throw new IllegalArgumentException("Page doesn't have the continuation.");
181171
}
182172

183173
final Localization localization = getExtractorLocalization();
174+
// @formatter:off
184175
final byte[] body = JsonWriter.string(
185176
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
186177
.value("continuation", page.getId())
187178
.done())
188179
.getBytes(StandardCharsets.UTF_8);
180+
// @formatter:on
189181

190-
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
182+
final var jsonObject = getJsonPostResponse("next", body, localization);
191183

184+
return extractComments(jsonObject);
185+
}
186+
187+
private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
188+
throws ExtractionException {
192189
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
193190
getServiceId());
194-
collectCommentsFrom(collector, ajaxJson);
195-
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
191+
collectCommentsFrom(collector);
192+
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
196193
}
197194

198-
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
199-
@Nonnull final JsonObject ajaxJson) throws ParsingException {
195+
private void collectCommentsFrom(final CommentsInfoItemsCollector collector)
196+
throws ParsingException {
200197

201198
final JsonArray onResponseReceivedEndpoints =
202199
ajaxJson.getArray("onResponseReceivedEndpoints");
@@ -254,24 +251,59 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
254251
public void onFetchPage(@Nonnull final Downloader downloader)
255252
throws IOException, ExtractionException {
256253
final Localization localization = getExtractorLocalization();
254+
// @formatter:off
257255
final byte[] body = JsonWriter.string(
258256
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
259257
.value("videoId", getId())
260258
.done())
261259
.getBytes(StandardCharsets.UTF_8);
260+
// @formatter:on
261+
262+
final String initialToken =
263+
findInitialCommentsToken(getJsonPostResponse("next", body, localization));
264+
265+
if (initialToken == null) {
266+
return;
267+
}
268+
269+
// @formatter:off
270+
final byte[] ajaxBody = JsonWriter.string(
271+
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
272+
.value("continuation", initialToken)
273+
.done())
274+
.getBytes(StandardCharsets.UTF_8);
275+
// @formatter:on
262276

263-
nextResponse = getJsonPostResponse("next", body, localization);
277+
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
264278
}
265279

266280

267281
@Override
268-
public boolean isCommentsDisabled() throws ExtractionException {
269-
// Check if commentsDisabled has to be initialized
270-
if (!optCommentsDisabled.isPresent()) {
271-
// Initialize commentsDisabled
272-
this.findInitialCommentsToken();
282+
public boolean isCommentsDisabled() {
283+
return commentsDisabled;
284+
}
285+
286+
@Override
287+
public int getCommentsCount() throws ExtractionException {
288+
assertPageFetched();
289+
290+
if (commentsDisabled) {
291+
return -1;
273292
}
274293

275-
return optCommentsDisabled.get();
294+
final JsonObject countText = ajaxJson
295+
.getArray("onResponseReceivedEndpoints").getObject(0)
296+
.getObject("reloadContinuationItemsCommand")
297+
.getArray("continuationItems").getObject(0)
298+
.getObject("commentsHeaderRenderer")
299+
.getObject("countText");
300+
301+
try {
302+
return Integer.parseInt(
303+
Utils.removeNonDigitCharacters(getTextFromObject(countText))
304+
);
305+
} catch (final Exception e) {
306+
throw new ExtractionException("Unable to get comments count", e);
307+
}
276308
}
277309
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException
8989
@Test
9090
public void testGetCommentsAllData() throws IOException, ExtractionException {
9191
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
92+
assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments
9293

9394
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
9495
for (CommentsInfoItem c : comments.getItems()) {
@@ -344,6 +345,11 @@ public void testGetCommentsReplyCount() throws IOException, ExtractionException
344345
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
345346
assertGreater(300, firstComment.getReplyCount());
346347
}
348+
349+
@Test
350+
public void testCommentsCount() throws IOException, ExtractionException {
351+
assertTrue(extractor.getCommentsCount() > 18800);
352+
}
347353
}
348354

349355
public static class FormattingTest {

0 commit comments

Comments
 (0)