Skip to content

Commit 2e57a8f

Browse files
committed
[Bandcamp] Fix link handler acceptance behaviour
* Test for bandcamp footer instead of meta tag (which is not present on all pages) * Accept links to /music, not just /releases * Correctly handle uppercase URLs
1 parent 70d9e38 commit 2e57a8f

3 files changed

Lines changed: 21 additions & 11 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,15 @@
77
import com.grack.nanojson.JsonParserException;
88
import com.grack.nanojson.JsonWriter;
99
import org.jsoup.Jsoup;
10-
import org.jsoup.nodes.Document;
1110
import org.schabi.newpipe.extractor.NewPipe;
1211
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1312
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1413
import org.schabi.newpipe.extractor.localization.DateWrapper;
15-
import org.schabi.newpipe.extractor.utils.Utils;
1614

1715
import java.io.IOException;
1816
import java.time.DateTimeException;
1917
import java.time.ZonedDateTime;
2018
import java.time.format.DateTimeFormatter;
21-
import java.util.ArrayList;
22-
import java.util.Arrays;
23-
import java.util.List;
2419
import java.util.Locale;
2520

2621
public class BandcampExtractorHelper {
@@ -95,12 +90,18 @@ public static boolean isSupportedDomain(final String url) throws ParsingExceptio
9590
if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true;
9691

9792
try {
98-
// Accept all other URLs if they contain a <meta> tag that says they are generated by bandcamp
93+
// Test other URLs for whether they contain a footer that links to bandcamp
9994
return Jsoup.parse(
10095
NewPipe.getDownloader().get(url).responseBody()
10196
)
102-
.getElementsByAttributeValue("name", "generator")
103-
.attr("content").equals("Bandcamp");
97+
.getElementById("pgFt")
98+
.getElementById("pgFt-inner")
99+
.getElementById("footer-logo-wrapper")
100+
.getElementById("footer-logo")
101+
.getElementsByClass("hiddenAccess")
102+
.text().equals("Bandcamp");
103+
} catch (NullPointerException e) {
104+
return false;
104105
} catch (IOException | ReCaptchaException e) {
105106
throw new ParsingException("Could not determine whether URL is custom domain " +
106107
"(not available? network error?)");

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ public String getUrl(final String id, final List<String> contentFilter, final St
5555
* Accepts only pages that lead to the root of an artist profile. Supports external pages.
5656
*/
5757
@Override
58-
public boolean onAcceptUrl(final String url) throws ParsingException {
58+
public boolean onAcceptUrl(String url) throws ParsingException {
59+
60+
url = url.toLowerCase();
5961

6062
// https: | | artist.bandcamp.com | releases
6163
// 0 1 2 3
@@ -64,8 +66,10 @@ public boolean onAcceptUrl(final String url) throws ParsingException {
6466
// URL is too short
6567
if (splitUrl.length < 3) return false;
6668

67-
// Must have "releases" as segment after url or none at all
68-
if (splitUrl.length > 3 && !splitUrl[3].equals("releases")) {
69+
// Must have "releases" or "music" as segment after url or none at all
70+
if (splitUrl.length > 3 && !(
71+
splitUrl[3].equals("releases") || splitUrl[3].equals("music")
72+
)) {
6973

7074
return false;
7175

extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,16 @@ public void testAcceptUrl() throws ParsingException {
3737
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
3838
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
3939
assertFalse(linkHandler.acceptUrl("https://daily.bandcamp.com/"));
40+
assertFalse(linkHandler.acceptUrl("https://DAILY.BANDCAMP.COM"));
4041
assertFalse(linkHandler.acceptUrl("https://daily.bandcamp.com/best-of-2020/bandcamp-daily-staffers-on-their-favorite-albums-of-2020"));
4142

4243
// External URLs
4344
assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com"));
4445
assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music"));
4546
assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music/"));
47+
assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/"));
48+
assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/releases"));
49+
assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/RELEASES"));
4650

4751
assertFalse(linkHandler.acceptUrl("https://example.com/releases"));
4852
}
@@ -57,6 +61,7 @@ public void testGetId() throws ParsingException {
5761

5862
assertEquals("2735462545", linkHandler.getId("http://lobstertheremin.com/"));
5963
assertEquals("2735462545", linkHandler.getId("https://lobstertheremin.com/music/"));
64+
assertEquals("3826445168", linkHandler.getId("https://diskak.usopop.com/releases"));
6065
}
6166

6267
@Test

0 commit comments

Comments
 (0)