Skip to content

Commit a3c6fce

Browse files
authored
Merge pull request #573 from B0pol/comments-performance
[youtube] improve comments extraction performance
2 parents ec1127d + ff5273b commit a3c6fce

2 files changed

Lines changed: 18 additions & 16 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,4 +824,14 @@ public static boolean isVerified(final JsonArray badges) {
824824

825825
return false;
826826
}
827+
828+
public static String unescapeDocument(final String doc) {
829+
return doc
830+
.replaceAll("\\\\x22", "\"")
831+
.replaceAll("\\\\x7b", "{")
832+
.replaceAll("\\\\x7d", "}")
833+
.replaceAll("\\\\x5b", "[")
834+
.replaceAll("\\\\x5d", "]");
835+
}
836+
827837
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1616
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1717
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
18+
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
1819
import org.schabi.newpipe.extractor.utils.JsonUtils;
1920
import org.schabi.newpipe.extractor.utils.Parser;
2021

@@ -46,11 +47,9 @@ public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHand
4647

4748
@Override
4849
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
49-
final String commentsTokenInside;
50-
if (responseBody.contains("commentSectionRenderer")) {
50+
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
51+
if (!commentsTokenInside.contains("continuation\":\"")) {
5152
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
52-
} else {
53-
commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
5453
}
5554
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
5655
return getPage(getNextPage(commentsToken));
@@ -133,7 +132,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
133132
final Map<String, List<String>> requestHeaders = new HashMap<>();
134133
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
135134
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
136-
responseBody = response.responseBody();
135+
responseBody = YoutubeParsingHelper.unescapeDocument(response.responseBody());
137136
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
138137
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);
139138
}
@@ -163,16 +162,9 @@ private String getDataString(Map<String, String> params) throws UnsupportedEncod
163162
return result.toString();
164163
}
165164

166-
private String findValue(String doc, String start, String end) {
167-
final String unescaped = doc
168-
.replaceAll("\\\\x22", "\"")
169-
.replaceAll("\\\\x7b", "{")
170-
.replaceAll("\\\\x7d", "}")
171-
.replaceAll("\\\\x5b", "[")
172-
.replaceAll("\\\\x5d", "]");
173-
174-
final int beginIndex = unescaped.indexOf(start) + start.length();
175-
final int endIndex = unescaped.indexOf(end, beginIndex);
176-
return unescaped.substring(beginIndex, endIndex);
165+
private String findValue(final String doc, final String start, final String end) {
166+
final int beginIndex = doc.indexOf(start) + start.length();
167+
final int endIndex = doc.indexOf(end, beginIndex);
168+
return doc.substring(beginIndex, endIndex);
177169
}
178170
}

0 commit comments

Comments
 (0)