Skip to content

Commit 92a0024

Browse files
authored
Merge pull request #1052 from TeamNewPipe/peertube/fix/nested-comment-replies
[PeerTube] Fix multi level comment replies
2 parents 3c036a9 + d358ba1 commit 92a0024

4 files changed

Lines changed: 147 additions & 40 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/Page.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ public Page(final String url, final String id) {
3737
this(url, id, null, null, null);
3838
}
3939

40+
public Page(final String url, final String id, final byte[] body) {
41+
this(url, id, null, null, body);
42+
}
43+
4044
public Page(final String url, final byte[] body) {
4145
this(url, null, null, null, body);
4246
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsExtractor.java

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.grack.nanojson.JsonArray;
44
import com.grack.nanojson.JsonObject;
55
import com.grack.nanojson.JsonParser;
6+
import com.grack.nanojson.JsonParserException;
67
import org.schabi.newpipe.extractor.Page;
78
import org.schabi.newpipe.extractor.StreamingService;
89
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@@ -17,6 +18,7 @@
1718
import org.schabi.newpipe.extractor.utils.Utils;
1819

1920
import java.io.IOException;
21+
import java.nio.charset.StandardCharsets;
2022

2123
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
2224
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
@@ -26,6 +28,9 @@
2628
import javax.annotation.Nonnull;
2729

2830
public class PeertubeCommentsExtractor extends CommentsExtractor {
31+
static final String CHILDREN = "children";
32+
private static final String IS_DELETED = "isDeleted";
33+
private static final String TOTAL = "total";
2934

3035
/**
3136
* Use {@link #isReply()} to access this variable.
@@ -67,22 +72,26 @@ private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector colle
6772
for (final Object c : contents) {
6873
if (c instanceof JsonObject) {
6974
final JsonObject item = (JsonObject) c;
70-
if (!item.getBoolean("isDeleted")) {
71-
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
75+
if (!item.getBoolean(IS_DELETED)) {
76+
collector.commit(new PeertubeCommentsInfoItemExtractor(
77+
item, null, getUrl(), getBaseUrl(), isReply()));
7278
}
7379
}
7480
}
7581
}
7682

7783
private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
7884
@Nonnull final JsonObject json) throws ParsingException {
79-
final JsonArray contents = json.getArray("children");
85+
final JsonArray contents = json.getArray(CHILDREN);
8086

8187
for (final Object c : contents) {
8288
if (c instanceof JsonObject) {
83-
final JsonObject item = ((JsonObject) c).getObject("comment");
84-
if (!item.getBoolean("isDeleted")) {
85-
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
89+
final JsonObject content = (JsonObject) c;
90+
final JsonObject item = content.getObject("comment");
91+
final JsonArray children = content.getArray(CHILDREN);
92+
if (!item.getBoolean(IS_DELETED)) {
93+
collector.commit(new PeertubeCommentsInfoItemExtractor(
94+
item, children, getUrl(), getBaseUrl(), isReply()));
8695
}
8796
}
8897
}
@@ -95,36 +104,46 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
95104
throw new IllegalArgumentException("Page doesn't contain an URL");
96105
}
97106

98-
final Response response = getDownloader().get(page.getUrl());
99-
100107
JsonObject json = null;
101-
if (response != null && !Utils.isBlank(response.responseBody())) {
108+
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
109+
final long total;
110+
if (page.getBody() == null) {
111+
final Response response = getDownloader().get(page.getUrl());
112+
if (response != null && !Utils.isBlank(response.responseBody())) {
113+
try {
114+
json = JsonParser.object().from(response.responseBody());
115+
} catch (final Exception e) {
116+
throw new ParsingException("Could not parse json data for comments info", e);
117+
}
118+
}
119+
if (json != null) {
120+
PeertubeParsingHelper.validate(json);
121+
if (isReply() || json.has(CHILDREN)) {
122+
total = json.getArray(CHILDREN).size();
123+
collectRepliesFrom(collector, json);
124+
} else {
125+
total = json.getLong(TOTAL);
126+
collectCommentsFrom(collector, json);
127+
}
128+
} else {
129+
throw new ExtractionException("Unable to get PeerTube kiosk info");
130+
}
131+
} else {
102132
try {
103-
json = JsonParser.object().from(response.responseBody());
104-
} catch (final Exception e) {
105-
throw new ParsingException("Could not parse json data for comments info", e);
133+
json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8));
134+
isReply = true;
135+
total = json.getArray(CHILDREN).size();
136+
collectRepliesFrom(collector, json);
137+
} catch (final JsonParserException e) {
138+
throw new ParsingException(
139+
"Could not parse json data for nested comments info", e);
106140
}
107141
}
108142

109-
if (json != null) {
110-
PeertubeParsingHelper.validate(json);
111-
final long total;
112-
final CommentsInfoItemsCollector collector
113-
= new CommentsInfoItemsCollector(getServiceId());
143+
return new InfoItemsPage<>(collector,
144+
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
114145

115-
if (isReply() || json.has("children")) {
116-
total = json.getArray("children").size();
117-
collectRepliesFrom(collector, json);
118-
} else {
119-
total = json.getLong("total");
120-
collectCommentsFrom(collector, json);
121-
}
122146

123-
return new InfoItemsPage<>(collector,
124-
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
125-
} else {
126-
throw new ExtractionException("Unable to get PeerTube kiosk info");
127-
}
128147
}
129148

130149
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.schabi.newpipe.extractor.services.peertube.extractors;
22

3+
import com.grack.nanojson.JsonArray;
34
import com.grack.nanojson.JsonObject;
45

6+
import com.grack.nanojson.JsonWriter;
57
import org.jsoup.Jsoup;
68
import org.jsoup.nodes.Document;
79
import org.schabi.newpipe.extractor.Page;
@@ -13,20 +15,36 @@
1315
import org.schabi.newpipe.extractor.stream.Description;
1416
import org.schabi.newpipe.extractor.utils.JsonUtils;
1517

18+
import javax.annotation.Nonnull;
1619
import javax.annotation.Nullable;
20+
import java.nio.charset.StandardCharsets;
1721
import java.util.Objects;
1822

23+
import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN;
24+
1925
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
26+
@Nonnull
2027
private final JsonObject item;
28+
@Nullable
29+
private final JsonArray children;
30+
@Nonnull
2131
private final String url;
32+
@Nonnull
2233
private final String baseUrl;
34+
private final boolean isReply;
35+
36+
private Integer replyCount;
2337

24-
public PeertubeCommentsInfoItemExtractor(final JsonObject item,
25-
final PeertubeCommentsExtractor extractor)
26-
throws ParsingException {
38+
public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item,
39+
@Nullable final JsonArray children,
40+
@Nonnull final String url,
41+
@Nonnull final String baseUrl,
42+
final boolean isReply) {
2743
this.item = item;
28-
this.url = extractor.getUrl();
29-
this.baseUrl = extractor.getBaseUrl();
44+
this.children = children;
45+
this.url = url;
46+
this.baseUrl = baseUrl;
47+
this.isReply = isReply;
3048
}
3149

3250
@Override
@@ -107,15 +125,34 @@ public String getUploaderUrl() throws ParsingException {
107125
@Override
108126
@Nullable
109127
public Page getReplies() throws ParsingException {
110-
if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) {
128+
if (getReplyCount() == 0) {
111129
return null;
112130
}
113131
final String threadId = JsonUtils.getNumber(item, "threadId").toString();
114-
return new Page(url + "/" + threadId, threadId);
132+
final String repliesUrl = url + "/" + threadId;
133+
if (isReply && children != null && !children.isEmpty()) {
134+
// Nested replies are already included in the original thread's request.
135+
// Wrap the replies into a JsonObject, because the original thread's request body
136+
// is also structured like a JsonObject.
137+
final JsonObject pageContent = new JsonObject();
138+
pageContent.put(CHILDREN, children);
139+
return new Page(repliesUrl, threadId,
140+
JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8));
141+
}
142+
return new Page(repliesUrl, threadId);
115143
}
116144

117145
@Override
118146
public int getReplyCount() throws ParsingException {
119-
return JsonUtils.getNumber(item, "totalReplies").intValue();
147+
if (replyCount == null) {
148+
if (children != null && !children.isEmpty()) {
149+
// The totalReplies field is inaccurate for nested replies and sometimes returns 0
150+
// although there are replies to that reply stored in children.
151+
replyCount = children.size();
152+
} else {
153+
replyCount = JsonUtils.getNumber(item, "totalReplies").intValue();
154+
}
155+
}
156+
return replyCount;
120157
}
121158
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414

1515
import java.io.IOException;
1616
import java.util.List;
17+
import java.util.Optional;
1718

18-
import static org.junit.jupiter.api.Assertions.assertEquals;
19-
import static org.junit.jupiter.api.Assertions.assertFalse;
20-
import static org.junit.jupiter.api.Assertions.assertTrue;
19+
import static org.junit.jupiter.api.Assertions.*;
2120
import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
2221

2322
public class PeertubeCommentsExtractorTest {
@@ -121,4 +120,52 @@ void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
121120
assertTrue(commentsInfo.getErrors().isEmpty());
122121
}
123122
}
123+
124+
/**
125+
* Test a video that has comments with nested replies.
126+
*/
127+
public static class NestedComments {
128+
private static PeertubeCommentsExtractor extractor;
129+
130+
@BeforeAll
131+
public static void setUp() throws Exception {
132+
NewPipe.init(DownloaderTestImpl.getInstance());
133+
extractor = (PeertubeCommentsExtractor) PeerTube
134+
.getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq");
135+
}
136+
137+
@Test
138+
void testGetComments() throws IOException, ExtractionException {
139+
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
140+
assertFalse(comments.getItems().isEmpty());
141+
final Optional<CommentsInfoItem> nestedCommentHeadOpt =
142+
comments.getItems()
143+
.stream()
144+
.filter(c -> c.getCommentId().equals("9770"))
145+
.findFirst();
146+
assertTrue(nestedCommentHeadOpt.isPresent());
147+
assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found");
148+
}
149+
}
150+
151+
private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment)
152+
throws IOException, ExtractionException {
153+
if (comment.getCommentId().equals(id)) {
154+
return true;
155+
}
156+
return PeerTube
157+
.getCommentsExtractor(comment.getUrl())
158+
.getPage(comment.getReplies())
159+
.getItems()
160+
.stream()
161+
.map(c -> {
162+
try {
163+
return findNestedCommentWithId(id, c);
164+
} catch (final Exception ignored) {
165+
return false;
166+
}
167+
})
168+
.reduce((a, b) -> a || b)
169+
.orElse(false);
170+
}
124171
}

0 commit comments

Comments
 (0)