Skip to content

Commit 4b80d73

Browse files
committed
[Bandcamp] Add utility methods to get multiple images
Bandcamp images work with image IDs, which provide different resolutions. Images on Bandcamp are not always squares, and some IDs respect aspect ratios where some others not. The extractor will only use the ones which preserve aspect ratio and will not provide original images, for performance and size purposes. Because of this aspect ratio preservation constraint, only one dimension will be known at a time. The image IDs with their respective dimension used are: - 10: 1200w; - 101: 90h; - 170: 422h; - 171: 646h; - 20: 1024w; - 200: 420h; - 201: 280h; - 202: 140h; - 204: 360h; - 205: 240h; - 206: 180h; - 207: 120h; - 43: 100h; - 44: 200h. (Where w represents the width of the image and h the height of the image) Note that these dimensions are theoretical because if the image size is less than the dimensions of the image ID, it will be not upscaled but kept to its original size. All these resolutions are stored in a private static list of ThumbnailSuffixes in BandcampExtractorHelper, in which the methods to get mutliple images have been added: - getImagesFromImageUrl(String): public method to get images from an image URL; - getImagesFromImageId(long, boolean): public method to get images from an image ID; - getImagesFromImageBaseUrl(String): private utility method to get images from the static list of ThumbnailSuffixes from a given image base URL, containing the path to the image, a "a" letter if it comes from an album, its ID and an underscore. Some existing methods have been also edited: - the documentation of getImageUrl(long, boolean) has been changed to reflect the Bandcamp images findings; - getThumbnailUrlFromSearchResult has been renamed to getImagesFromSearchResult, and a documentation has been added to this method. The method replaceHttpWithHttps of the Utils class has been also used in BandcampExtractorHelper instead of doing manually what the method does.
1 parent 4e6fb36 commit 4b80d73

1 file changed

Lines changed: 162 additions & 16 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java

Lines changed: 162 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,81 @@
66
import com.grack.nanojson.JsonParser;
77
import com.grack.nanojson.JsonParserException;
88
import com.grack.nanojson.JsonWriter;
9+
910
import org.jsoup.Jsoup;
1011
import org.jsoup.nodes.Element;
12+
import org.schabi.newpipe.extractor.Image;
13+
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
1114
import org.schabi.newpipe.extractor.NewPipe;
1215
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1316
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1417
import org.schabi.newpipe.extractor.localization.DateWrapper;
15-
import org.schabi.newpipe.extractor.utils.Utils;
18+
import org.schabi.newpipe.extractor.utils.ImageSuffix;
1619

17-
import javax.annotation.Nullable;
1820
import java.io.IOException;
1921
import java.nio.charset.StandardCharsets;
2022
import java.time.DateTimeException;
2123
import java.time.ZonedDateTime;
2224
import java.time.format.DateTimeFormatter;
2325
import java.util.Collections;
26+
import java.util.List;
2427
import java.util.Locale;
28+
import java.util.stream.Collectors;
29+
30+
import javax.annotation.Nonnull;
31+
import javax.annotation.Nullable;
32+
33+
import static org.schabi.newpipe.extractor.Image.HEIGHT_UNKNOWN;
34+
import static org.schabi.newpipe.extractor.Image.WIDTH_UNKNOWN;
35+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
36+
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
2537

2638
public final class BandcampExtractorHelper {
2739

40+
/**
41+
* List of image IDs which preserve aspect ratio with their theoretical dimension known.
42+
*
43+
* <p>
44+
* Bandcamp images are not always squares, so images which preserve aspect ratio are only used.
45+
* </p>
46+
*
47+
* <p>
48+
* One of the direct consequences of this specificity is that only one dimension of images is
49+
* known at time, depending of the image ID.
50+
* </p>
51+
*
52+
* <p>
53+
* Note also that dimensions are only theoretical because if the image size is less than the
54+
* dimensions of the image ID, it will be not upscaled but kept to its original size.
55+
* </p>
56+
*
57+
* <p>
58+
* IDs come from <a href="https://gist.github.com/f2k1de/06f5fd0ae9c919a7c3693a44ee522213">the
59+
* GitHub Gist "Bandcamp File Format Parameters" by f2k1de</a>
60+
* </p>
61+
*/
62+
private static final List<ImageSuffix> IMAGE_URL_SUFFIXES_AND_RESOLUTIONS = List.of(
63+
// ID | HEIGHT | WIDTH
64+
new ImageSuffix("10.jpg", HEIGHT_UNKNOWN, 1200, ResolutionLevel.HIGH),
65+
new ImageSuffix("101.jpg", 90, WIDTH_UNKNOWN, ResolutionLevel.LOW),
66+
new ImageSuffix("170.jpg", 422, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
67+
// 180 returns the same image aspect ratio and size as 171
68+
new ImageSuffix("171.jpg", 646, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
69+
new ImageSuffix("20.jpg", HEIGHT_UNKNOWN, 1024, ResolutionLevel.HIGH),
70+
// 203 returns the same image aspect ratio and size as 200
71+
new ImageSuffix("200.jpg", 420, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
72+
new ImageSuffix("201.jpg", 280, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
73+
new ImageSuffix("202.jpg", 140, WIDTH_UNKNOWN, ResolutionLevel.LOW),
74+
new ImageSuffix("204.jpg", 360, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
75+
new ImageSuffix("205.jpg", 240, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
76+
new ImageSuffix("206.jpg", 180, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM),
77+
new ImageSuffix("207.jpg", 120, WIDTH_UNKNOWN, ResolutionLevel.LOW),
78+
new ImageSuffix("43.jpg", 100, WIDTH_UNKNOWN, ResolutionLevel.LOW),
79+
new ImageSuffix("44.jpg", 200, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM));
80+
81+
private static final String IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX = "_\\d+\\.\\w+";
82+
private static final String IMAGES_DOMAIN_AND_PATH = "https://f4.bcbits.com/img/";
83+
2884
public static final String BASE_URL = "https://bandcamp.com";
2985
public static final String BASE_API_URL = BASE_URL + "/api";
3086

@@ -44,7 +100,7 @@ public static String getStreamUrlFromIds(final long bandId,
44100
+ "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.charAt(0))
45101
.responseBody();
46102

47-
return Utils.replaceHttpWithHttps(JsonParser.object().from(jsonString)
103+
return replaceHttpWithHttps(JsonParser.object().from(jsonString)
48104
.getString("bandcamp_url"));
49105

50106
} catch (final JsonParserException | ReCaptchaException | IOException e) {
@@ -76,17 +132,26 @@ public static JsonObject getArtistDetails(final String id) throws ParsingExcepti
76132
}
77133

78134
/**
79-
* Generate image url from image ID.
135+
* Generate an image url from an image ID.
136+
*
137+
* <p>
138+
* The image ID {@code 10} was chosen because it provides images wide up to 1200px (when
139+
* the original image width is more than or equal this resolution).
140+
* </p>
141+
*
80142
* <p>
81-
* The appendix "_10" was chosen because it provides images sized 1200x1200. Other integer
82-
* values are possible as well (e.g. 0 is a very large resolution, possibly the original).
143+
* Other integer values are possible as well (e.g. 0 is a very large resolution, possibly the
144+
* original); see {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS} for more details about image
145+
* resolution IDs.
146+
* </p>
83147
*
84-
* @param id The image ID
85-
* @param album True if this is the cover of an album or track
86-
* @return URL of image with this ID sized 1200x1200
148+
* @param id the image ID
149+
* @param isAlbum whether the image is the cover of an album or a track
150+
* @return a URL of the image with this ID with a width up to 1200px
87151
*/
88-
public static String getImageUrl(final long id, final boolean album) {
89-
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
152+
@Nonnull
153+
public static String getImageUrl(final long id, final boolean isAlbum) {
154+
return IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "") + id + "_10.jpg";
90155
}
91156

92157
/**
@@ -136,13 +201,94 @@ public static DateWrapper parseDate(final String textDate) throws ParsingExcepti
136201
}
137202
}
138203

139-
@Nullable
140-
public static String getThumbnailUrlFromSearchResult(final Element searchResult) {
141-
return searchResult.getElementsByClass("art").stream()
204+
/**
205+
* Get a list of images from a search result {@link Element}.
206+
*
207+
* <p>
208+
* This method will call {@link #getImagesFromImageUrl(String)} using the first non null and
209+
* non empty image URL found from the {@code src} attribute of {@code img} HTML elements, or an
210+
* empty string if no valid image URL was found.
211+
* </p>
212+
*
213+
* @param searchResult a search result {@link Element}
214+
* @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
215+
* case where no valid image URL was found
216+
*/
217+
@Nonnull
218+
public static List<Image> getImagesFromSearchResult(@Nonnull final Element searchResult) {
219+
return getImagesFromImageUrl(searchResult.getElementsByClass("art")
220+
.stream()
142221
.flatMap(element -> element.getElementsByTag("img").stream())
143222
.map(element -> element.attr("src"))
144-
.filter(string -> !string.isEmpty())
223+
.filter(imageUrl -> !isNullOrEmpty(imageUrl))
145224
.findFirst()
146-
.orElse(null);
225+
.orElse(""));
226+
}
227+
228+
/**
229+
* Get all images which have resolutions preserving aspect ratio from an image URL.
230+
*
231+
* <p>
232+
* This method will remove the image ID and its extension from the end of the URL and then call
233+
* {@link #getImagesFromImageBaseUrl(String)}.
234+
* </p>
235+
*
236+
* @param imageUrl the full URL of an image provided by Bandcamp, such as in its HTML code
237+
* @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
238+
* case where the image URL has been not extracted (and so is null or empty)
239+
*/
240+
@Nonnull
241+
public static List<Image> getImagesFromImageUrl(@Nullable final String imageUrl) {
242+
if (isNullOrEmpty(imageUrl)) {
243+
return List.of();
244+
}
245+
246+
return getImagesFromImageBaseUrl(
247+
imageUrl.replaceFirst(IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX, "_"));
248+
}
249+
250+
/**
251+
* Get all images which have resolutions preserving aspect ratio from an image ID.
252+
*
253+
* <p>
254+
* This method will call {@link #getImagesFromImageBaseUrl(String)}.
255+
* </p>
256+
*
257+
* @param id the id of an image provided by Bandcamp
258+
* @param isAlbum whether the image is the cover of an album
259+
* @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
260+
* case where the image ID has been not extracted (and so equal to 0)
261+
*/
262+
@Nonnull
263+
public static List<Image> getImagesFromImageId(final long id, final boolean isAlbum) {
264+
if (id == 0) {
265+
return List.of();
266+
}
267+
268+
return getImagesFromImageBaseUrl(IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "") + id + "_");
269+
}
270+
271+
/**
272+
* Get all images resolutions preserving aspect ratio from a base image URL.
273+
*
274+
* <p>
275+
* Base image URLs are images containing the image path, a {@code a} letter if it comes from an
276+
* album, its ID and an underscore.
277+
* </p>
278+
*
279+
* <p>
280+
* Images resolutions returned are the ones of {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS}.
281+
* </p>
282+
*
283+
* @param baseUrl the base URL of the image
284+
* @return an unmodifiable and non-empty list of {@link Image}s
285+
*/
286+
@Nonnull
287+
private static List<Image> getImagesFromImageBaseUrl(@Nonnull final String baseUrl) {
288+
return IMAGE_URL_SUFFIXES_AND_RESOLUTIONS.stream()
289+
.map(imageSuffix -> new Image(baseUrl + imageSuffix.getSuffix(),
290+
imageSuffix.getHeight(), imageSuffix.getWidth(),
291+
imageSuffix.getResolutionLevel()))
292+
.collect(Collectors.toUnmodifiableList());
147293
}
148294
}

0 commit comments

Comments
 (0)