Skip to content

Commit 5a9b6ed

Browse files
authored
[Bandcamp] Support loading additional comments (#1030)
1 parent 6bdd698 commit 5a9b6ed

3 files changed

Lines changed: 95 additions & 28 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsExtractor.java

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,36 @@
11
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
22

3+
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;
4+
5+
import com.grack.nanojson.JsonArray;
6+
import com.grack.nanojson.JsonObject;
7+
import com.grack.nanojson.JsonWriter;
8+
39
import org.jsoup.Jsoup;
410
import org.jsoup.nodes.Document;
5-
import org.jsoup.nodes.Element;
6-
import org.jsoup.select.Elements;
711
import org.schabi.newpipe.extractor.Page;
812
import org.schabi.newpipe.extractor.StreamingService;
913
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
1014
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
1115
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
1216
import org.schabi.newpipe.extractor.downloader.Downloader;
1317
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
18+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
19+
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1420
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
21+
import org.schabi.newpipe.extractor.utils.JsonUtils;
1522

16-
import javax.annotation.Nonnull;
1723
import java.io.IOException;
24+
import java.nio.charset.StandardCharsets;
25+
import java.util.Collections;
26+
import java.util.List;
27+
28+
import javax.annotation.Nonnull;
1829

1930
public class BandcampCommentsExtractor extends CommentsExtractor {
2031

32+
private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";
33+
2134
private Document document;
2235

2336

@@ -39,19 +52,81 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
3952

4053
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
4154

42-
final Elements writings = document.getElementsByClass("writing");
55+
final JsonObject collectorsData = JsonUtils.toJsonObject(
56+
document.getElementById("collectors-data").attr("data-blob"));
57+
final JsonArray reviews = collectorsData.getArray("reviews");
4358

44-
for (final Element writing : writings) {
45-
collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl()));
59+
for (final Object review : reviews) {
60+
collector.commit(
61+
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
4662
}
4763

48-
return new InfoItemsPage<>(collector, null);
64+
if (!collectorsData.getBoolean("more_reviews_available")) {
65+
return new InfoItemsPage<>(collector, null);
66+
}
67+
68+
final String trackId = getTrackId();
69+
final String token = getNextPageToken(reviews);
70+
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
4971
}
5072

5173
@Override
5274
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
5375
throws IOException, ExtractionException {
54-
return null;
76+
77+
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
78+
79+
final List<String> pageIds = page.getIds();
80+
final String trackId = pageIds.get(0);
81+
final String token = pageIds.get(1);
82+
final JsonObject reviewsData = fetchReviewsData(trackId, token);
83+
final JsonArray reviews = reviewsData.getArray("results");
84+
85+
for (final Object review : reviews) {
86+
collector.commit(
87+
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
88+
}
89+
90+
if (!reviewsData.getBoolean("more_available")) {
91+
return new InfoItemsPage<>(collector, null);
92+
}
93+
94+
return new InfoItemsPage<>(collector,
95+
new Page(List.of(trackId, getNextPageToken(reviews))));
96+
}
97+
98+
private JsonObject fetchReviewsData(final String trackId, final String token)
99+
throws ParsingException {
100+
try {
101+
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
102+
REVIEWS_API_URL,
103+
Collections.emptyMap(),
104+
JsonWriter.string().object()
105+
.value("tralbum_type", "t")
106+
.value("tralbum_id", trackId)
107+
.value("token", token)
108+
.value("count", 7)
109+
.array("exclude_fan_ids").end()
110+
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
111+
} catch (final IOException | ReCaptchaException e) {
112+
throw new ParsingException("Could not fetch reviews", e);
113+
}
114+
}
115+
116+
private String getNextPageToken(final JsonArray reviews) throws ParsingException {
117+
return reviews.stream()
118+
.filter(JsonObject.class::isInstance)
119+
.map(JsonObject.class::cast)
120+
.map(review -> review.getString("token"))
121+
.reduce((a, b) -> b) // keep only the last element
122+
.orElseThrow(() -> new ParsingException("Could not get token"));
123+
}
124+
125+
private String getTrackId() throws ParsingException {
126+
final JsonObject pageProperties = JsonUtils.toJsonObject(
127+
document.selectFirst("meta[name=bc-page-properties]")
128+
.attr("content"));
129+
return Long.toString(pageProperties.getLong("item_id"));
55130
}
56131

57132
@Override
Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
22

3-
import org.jsoup.nodes.Element;
3+
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
4+
5+
import com.grack.nanojson.JsonObject;
6+
47
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
58
import org.schabi.newpipe.extractor.exceptions.ParsingException;
69
import org.schabi.newpipe.extractor.stream.Description;
710

8-
import java.util.Objects;
9-
1011
public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
1112

12-
private final Element writing;
13+
private final JsonObject review;
1314
private final String url;
1415

15-
public BandcampCommentsInfoItemExtractor(final Element writing, final String url) {
16-
this.writing = writing;
16+
public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) {
17+
this.review = review;
1718
this.url = url;
1819
}
1920

@@ -29,31 +30,21 @@ public String getUrl() {
2930

3031
@Override
3132
public String getThumbnailUrl() throws ParsingException {
32-
return writing.getElementsByClass("thumb").attr("src");
33+
return getUploaderAvatarUrl();
3334
}
3435

3536
@Override
3637
public Description getCommentText() throws ParsingException {
37-
final var text = writing.getElementsByClass("text").stream()
38-
.filter(Objects::nonNull)
39-
.map(Element::ownText)
40-
.findFirst()
41-
.orElseThrow(() -> new ParsingException("Could not get comment text"));
42-
43-
return new Description(text, Description.PLAIN_TEXT);
38+
return new Description(review.getString("why"), Description.PLAIN_TEXT);
4439
}
4540

4641
@Override
4742
public String getUploaderName() throws ParsingException {
48-
return writing.getElementsByClass("name").stream()
49-
.filter(Objects::nonNull)
50-
.map(Element::text)
51-
.findFirst()
52-
.orElseThrow(() -> new ParsingException("Could not get uploader name"));
43+
return review.getString("name");
5344
}
5445

5546
@Override
5647
public String getUploaderAvatarUrl() {
57-
return writing.getElementsByClass("thumb").attr("src");
48+
return getImageUrl(review.getLong("image_id"), false);
5849
}
5950
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public void hasComments() throws IOException, ExtractionException {
3737
@Test
3838
public void testGetCommentsAllData() throws IOException, ExtractionException {
3939
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
40+
assertTrue(comments.hasNextPage());
4041

4142
DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
4243
for (CommentsInfoItem c : comments.getItems()) {

0 commit comments

Comments
 (0)