Skip to content

Commit f3e4c9d

Browse files
FireMasterKAudricV
andcommitted
Use the youtubei API for YouTube comments
Migrate YouTube comments to the desktop version by using the `next` endpoint of the InnerTube internal API. With the desktop version, we are able to get the exact like count of YouTube comments (by parsing the accessibility data) (the current extraction is used as a fallback). We are also now able to get if the uploader of the comment is verified or not. Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
1 parent 286d839 commit f3e4c9d

4 files changed

Lines changed: 184 additions & 153 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.schabi.newpipe.extractor.ListExtractor;
44
import org.schabi.newpipe.extractor.StreamingService;
5+
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
56
import org.schabi.newpipe.extractor.exceptions.ParsingException;
67
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
78

@@ -17,7 +18,7 @@ public CommentsExtractor(final StreamingService service, final ListLinkHandler u
1718
* @apiNote Warning: This method is experimental and may get removed in a future release.
1819
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
1920
*/
20-
public boolean isCommentsDisabled() {
21+
public boolean isCommentsDisabled() throws ExtractionException {
2122
return false;
2223
}
2324

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 124 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,38 @@
11
package org.schabi.newpipe.extractor.services.youtube.extractors;
22

3-
import com.grack.nanojson.JsonArray;
4-
import com.grack.nanojson.JsonObject;
5-
import com.grack.nanojson.JsonParser;
3+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
4+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
5+
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
6+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
7+
8+
import java.io.IOException;
9+
import java.util.Collections;
10+
import java.util.List;
11+
import java.util.Optional;
12+
13+
import javax.annotation.Nonnull;
14+
import javax.annotation.Nullable;
15+
616
import org.schabi.newpipe.extractor.Page;
717
import org.schabi.newpipe.extractor.StreamingService;
818
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
919
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
1020
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
1121
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
1222
import org.schabi.newpipe.extractor.downloader.Downloader;
13-
import org.schabi.newpipe.extractor.downloader.Response;
1423
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1524
import org.schabi.newpipe.extractor.exceptions.ParsingException;
16-
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1725
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
18-
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
26+
import org.schabi.newpipe.extractor.localization.Localization;
1927
import org.schabi.newpipe.extractor.utils.JsonUtils;
20-
import org.schabi.newpipe.extractor.utils.Parser;
21-
22-
import javax.annotation.Nonnull;
23-
import java.io.IOException;
24-
import java.io.UnsupportedEncodingException;
25-
import java.net.URLEncoder;
26-
import java.util.Collections;
27-
import java.util.HashMap;
28-
import java.util.List;
29-
import java.util.Map;
30-
import java.util.Optional;
31-
import java.util.regex.Pattern;
3228

33-
import static java.util.Collections.singletonList;
34-
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
35-
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
29+
import com.grack.nanojson.JsonArray;
30+
import com.grack.nanojson.JsonObject;
31+
import com.grack.nanojson.JsonWriter;
3632

3733
public class YoutubeCommentsExtractor extends CommentsExtractor {
38-
// using the mobile site for comments because it loads faster and uses get requests instead of post
39-
private static final String USER_AGENT = "Mozilla/5.0 (Android 9; Mobile; rv:78.0) Gecko/20100101 Firefox/78.0";
40-
private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]");
4134

42-
private String ytClientVersion;
43-
private String ytClientName;
44-
private String responseBody;
35+
private JsonObject nextResponse;
4536

4637
/**
4738
* Caching mechanism and holder of the commentsDisabled value.
@@ -52,6 +43,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
5243
* If the method or another one that is depending on disabled comments
5344
* is now called again, the method execution can avoid unnecessary calls
5445
*/
46+
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
5547
private Optional<Boolean> optCommentsDisabled = Optional.empty();
5648

5749
public YoutubeCommentsExtractor(
@@ -60,6 +52,7 @@ public YoutubeCommentsExtractor(
6052
super(service, uiHandler);
6153
}
6254

55+
@Nonnull
6356
@Override
6457
public InfoItemsPage<CommentsInfoItem> getInitialPage()
6558
throws IOException, ExtractionException {
@@ -81,163 +74,177 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
8174

8275
/**
8376
* Finds the initial comments token and initializes commentsDisabled.
77+
*
8478
* @return the continuation token or null if none was found
8579
*/
86-
private String findInitialCommentsToken() {
87-
final String continuationStartPattern = "continuation\":\"";
80+
@Nullable
81+
private String findInitialCommentsToken() throws ExtractionException {
82+
83+
final JsonArray jArray = JsonUtils.getArray(nextResponse,
84+
"contents.twoColumnWatchNextResults.results.results.contents");
8885

89-
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
90-
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
91-
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
86+
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
87+
JsonObject jObj = (JsonObject) o;
88+
89+
if (jObj.has("itemSectionRenderer")) {
90+
try {
91+
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
92+
.equals("comments-section");
93+
} catch (final ParsingException ignored) {
94+
}
95+
}
96+
97+
return false;
98+
}).findFirst();
99+
100+
final String token;
101+
102+
if (itemSectionRenderer.isPresent()) {
103+
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
104+
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
105+
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
106+
} else {
107+
token = null;
92108
}
93109

94-
// If no continuation token is found the comments are disabled
95-
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
110+
if (token == null) {
96111
optCommentsDisabled = Optional.of(true);
97112
return null;
98113
}
99114

100-
// If a continuation token is found there are >= 0 comments
101-
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
102-
103115
optCommentsDisabled = Optional.of(false);
104116

105-
return commentsToken;
117+
return token;
106118
}
107119

120+
@Nonnull
108121
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
109122
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
110123
}
111124

112-
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
113-
final JsonArray arr;
125+
@Nullable
126+
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
127+
final JsonArray jsonArray;
128+
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
129+
"onResponseReceivedEndpoints");
130+
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
131+
onResponseReceivedEndpoints.size() - 1);
132+
114133
try {
115-
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
134+
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
135+
"appendContinuationItemsAction")).getArray("continuationItems");
116136
} catch (final Exception e) {
117137
return null;
118138
}
119-
if (arr.isEmpty()) {
139+
if (jsonArray.isEmpty()) {
120140
return null;
121141
}
142+
122143
final String continuation;
123144
try {
124-
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
145+
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
146+
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
125147
} catch (final Exception e) {
126148
return null;
127149
}
128150
return getNextPage(continuation);
129151
}
130152

153+
@Nonnull
131154
private Page getNextPage(final String continuation) throws ParsingException {
132-
final Map<String, String> params = new HashMap<>();
133-
params.put("action_get_comments", "1");
134-
params.put("pbj", "1");
135-
params.put("ctoken", continuation);
136-
try {
137-
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
138-
} catch (final UnsupportedEncodingException e) {
139-
throw new ParsingException("Could not get next page url", e);
140-
}
155+
return new Page(getUrl(), continuation); // URL is ignored tho
141156
}
142157

143158
@Override
144-
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
159+
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
160+
throws IOException, ExtractionException {
145161
if (optCommentsDisabled.orElse(false)) {
146162
return getInfoItemsPageForDisabledComments();
147163
}
148-
if (page == null || isNullOrEmpty(page.getUrl())) {
149-
throw new IllegalArgumentException("Page doesn't contain an URL");
164+
if (page == null || isNullOrEmpty(page.getId())) {
165+
throw new IllegalArgumentException("Page doesn't have the continuation.");
150166
}
151167

152-
final String ajaxResponse = makeAjaxRequest(page.getUrl());
153-
final JsonObject ajaxJson;
154-
try {
155-
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
156-
} catch (final Exception e) {
157-
throw new ParsingException("Could not parse json data for comments", e);
158-
}
159-
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
168+
final Localization localization = getExtractorLocalization();
169+
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
170+
getExtractorContentCountry())
171+
.value("continuation", page.getId())
172+
.done())
173+
.getBytes(UTF_8);
174+
175+
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
176+
177+
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
178+
getServiceId());
160179
collectCommentsFrom(collector, ajaxJson);
161180
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
162181
}
163182

164-
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
183+
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
184+
@Nonnull final JsonObject ajaxJson) throws ParsingException {
185+
186+
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
187+
"onResponseReceivedEndpoints");
188+
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
189+
onResponseReceivedEndpoints.size() - 1);
190+
191+
final String path;
192+
193+
if (commentsEndpoint.has("reloadContinuationItemsCommand")) {
194+
path = "reloadContinuationItemsCommand.continuationItems";
195+
} else if (commentsEndpoint.has("appendContinuationItemsAction")) {
196+
path = "appendContinuationItemsAction.continuationItems";
197+
} else {
198+
// No comments
199+
return;
200+
}
201+
165202
final JsonArray contents;
166203
try {
167-
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
204+
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
168205
} catch (final Exception e) {
169-
//no comments
206+
// No comments
170207
return;
171208
}
209+
210+
final int index = contents.size() - 1;
211+
if (contents.getObject(index).has("continuationItemRenderer")) {
212+
contents.remove(index);
213+
}
214+
172215
final List<Object> comments;
173216
try {
174-
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
217+
comments = JsonUtils.getValues(contents,
218+
"commentThreadRenderer.comment.commentRenderer");
175219
} catch (final Exception e) {
176-
throw new ParsingException("unable to get parse youtube comments", e);
220+
throw new ParsingException("Unable to get parse youtube comments", e);
177221
}
178222

179223
for (final Object c : comments) {
180224
if (c instanceof JsonObject) {
181-
final CommentsInfoItemExtractor extractor =
182-
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
225+
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
226+
(JsonObject) c, getUrl(), getTimeAgoParser());
183227
collector.commit(extractor);
184228
}
185229
}
186230
}
187231

188232
@Override
189-
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
190-
final Map<String, List<String>> requestHeaders = new HashMap<>();
191-
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
192-
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
193-
responseBody = YoutubeParsingHelper.unescapeDocument(response.responseBody());
194-
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
195-
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);
196-
}
197-
233+
public void onFetchPage(@Nonnull final Downloader downloader)
234+
throws IOException, ExtractionException {
235+
final Localization localization = getExtractorLocalization();
236+
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
237+
getExtractorContentCountry())
238+
.value("videoId", getId())
239+
.done())
240+
.getBytes(UTF_8);
198241

199-
private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
200-
final Map<String, List<String>> requestHeaders = new HashMap<>();
201-
requestHeaders.put("Accept", singletonList("*/*"));
202-
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
203-
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
204-
requestHeaders.put("X-YouTube-Client-Name", singletonList(ytClientName));
205-
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
242+
nextResponse = getJsonPostResponse("next", body, localization);
206243
}
207244

208-
private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
209-
final StringBuilder result = new StringBuilder();
210-
boolean first = true;
211-
for (final Map.Entry<String, String> entry : params.entrySet()) {
212-
if (first) {
213-
first = false;
214-
} else {
215-
result.append("&");
216-
}
217-
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
218-
result.append("=");
219-
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
220-
}
221-
return result.toString();
222-
}
223-
224-
private String findValue(final String doc, final String start, final String end) {
225-
int beginIndex = doc.indexOf(start);
226-
// Start string was not found
227-
if (beginIndex == -1) {
228-
return null;
229-
}
230-
beginIndex = beginIndex + start.length();
231-
final int endIndex = doc.indexOf(end, beginIndex);
232-
// End string was not found
233-
if (endIndex == -1) {
234-
return null;
235-
}
236-
return doc.substring(beginIndex, endIndex);
237-
}
238245

239246
@Override
240-
public boolean isCommentsDisabled() {
247+
public boolean isCommentsDisabled() throws ExtractionException {
241248
// Check if commentsDisabled has to be initialized
242249
if (!optCommentsDisabled.isPresent()) {
243250
// Initialize commentsDisabled

0 commit comments

Comments
 (0)