66import com .grack .nanojson .JsonParser ;
77import com .grack .nanojson .JsonParserException ;
88import com .grack .nanojson .JsonWriter ;
9+
910import org .jsoup .Jsoup ;
1011import org .jsoup .nodes .Element ;
12+ import org .schabi .newpipe .extractor .Image ;
13+ import org .schabi .newpipe .extractor .Image .ResolutionLevel ;
1114import org .schabi .newpipe .extractor .NewPipe ;
1215import org .schabi .newpipe .extractor .exceptions .ParsingException ;
1316import org .schabi .newpipe .extractor .exceptions .ReCaptchaException ;
1417import org .schabi .newpipe .extractor .localization .DateWrapper ;
15- import org .schabi .newpipe .extractor .utils .Utils ;
18+ import org .schabi .newpipe .extractor .utils .ImageSuffix ;
1619
17- import javax .annotation .Nullable ;
1820import java .io .IOException ;
1921import java .nio .charset .StandardCharsets ;
2022import java .time .DateTimeException ;
2123import java .time .ZonedDateTime ;
2224import java .time .format .DateTimeFormatter ;
2325import java .util .Collections ;
26+ import java .util .List ;
2427import java .util .Locale ;
28+ import java .util .stream .Collectors ;
29+
30+ import javax .annotation .Nonnull ;
31+ import javax .annotation .Nullable ;
32+
33+ import static org .schabi .newpipe .extractor .Image .HEIGHT_UNKNOWN ;
34+ import static org .schabi .newpipe .extractor .Image .WIDTH_UNKNOWN ;
35+ import static org .schabi .newpipe .extractor .utils .Utils .isNullOrEmpty ;
36+ import static org .schabi .newpipe .extractor .utils .Utils .replaceHttpWithHttps ;
2537
2638public final class BandcampExtractorHelper {
2739
40+ /**
41+ * List of image IDs which preserve aspect ratio with their theoretical dimension known.
42+ *
43+ * <p>
44+ * Bandcamp images are not always squares, so images which preserve aspect ratio are only used.
45+ * </p>
46+ *
47+ * <p>
48+ * One of the direct consequences of this specificity is that only one dimension of images is
49+ * known at time, depending of the image ID.
50+ * </p>
51+ *
52+ * <p>
53+ * Note also that dimensions are only theoretical because if the image size is less than the
54+ * dimensions of the image ID, it will be not upscaled but kept to its original size.
55+ * </p>
56+ *
57+ * <p>
58+ * IDs come from <a href="https://gist.github.com/f2k1de/06f5fd0ae9c919a7c3693a44ee522213">the
59+ * GitHub Gist "Bandcamp File Format Parameters" by f2k1de</a>
60+ * </p>
61+ */
62+ private static final List <ImageSuffix > IMAGE_URL_SUFFIXES_AND_RESOLUTIONS = List .of (
63+ // ID | HEIGHT | WIDTH
64+ new ImageSuffix ("10.jpg" , HEIGHT_UNKNOWN , 1200 , ResolutionLevel .HIGH ),
65+ new ImageSuffix ("101.jpg" , 90 , WIDTH_UNKNOWN , ResolutionLevel .LOW ),
66+ new ImageSuffix ("170.jpg" , 422 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
67+ // 180 returns the same image aspect ratio and size as 171
68+ new ImageSuffix ("171.jpg" , 646 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
69+ new ImageSuffix ("20.jpg" , HEIGHT_UNKNOWN , 1024 , ResolutionLevel .HIGH ),
70+ // 203 returns the same image aspect ratio and size as 200
71+ new ImageSuffix ("200.jpg" , 420 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
72+ new ImageSuffix ("201.jpg" , 280 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
73+ new ImageSuffix ("202.jpg" , 140 , WIDTH_UNKNOWN , ResolutionLevel .LOW ),
74+ new ImageSuffix ("204.jpg" , 360 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
75+ new ImageSuffix ("205.jpg" , 240 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
76+ new ImageSuffix ("206.jpg" , 180 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ),
77+ new ImageSuffix ("207.jpg" , 120 , WIDTH_UNKNOWN , ResolutionLevel .LOW ),
78+ new ImageSuffix ("43.jpg" , 100 , WIDTH_UNKNOWN , ResolutionLevel .LOW ),
79+ new ImageSuffix ("44.jpg" , 200 , WIDTH_UNKNOWN , ResolutionLevel .MEDIUM ));
80+
81+ private static final String IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX = "_\\ d+\\ .\\ w+" ;
82+ private static final String IMAGES_DOMAIN_AND_PATH = "https://f4.bcbits.com/img/" ;
83+
2884 public static final String BASE_URL = "https://bandcamp.com" ;
2985 public static final String BASE_API_URL = BASE_URL + "/api" ;
3086
@@ -44,7 +100,7 @@ public static String getStreamUrlFromIds(final long bandId,
44100 + "&tralbum_id=" + itemId + "&tralbum_type=" + itemType .charAt (0 ))
45101 .responseBody ();
46102
47- return Utils . replaceHttpWithHttps (JsonParser .object ().from (jsonString )
103+ return replaceHttpWithHttps (JsonParser .object ().from (jsonString )
48104 .getString ("bandcamp_url" ));
49105
50106 } catch (final JsonParserException | ReCaptchaException | IOException e ) {
@@ -76,17 +132,26 @@ public static JsonObject getArtistDetails(final String id) throws ParsingExcepti
76132 }
77133
78134 /**
79- * Generate image url from image ID.
135+ * Generate an image url from an image ID.
136+ *
137+ * <p>
138+ * The image ID {@code 10} was chosen because it provides images wide up to 1200px (when
139+ * the original image width is more than or equal this resolution).
140+ * </p>
141+ *
80142 * <p>
81- * The appendix "_10" was chosen because it provides images sized 1200x1200. Other integer
82- * values are possible as well (e.g. 0 is a very large resolution, possibly the original).
143+ * Other integer values are possible as well (e.g. 0 is a very large resolution, possibly the
144+ * original); see {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS} for more details about image
145+ * resolution IDs.
146+ * </p>
83147 *
84- * @param id The image ID
85- * @param album True if this is the cover of an album or track
86- * @return URL of image with this ID sized 1200x1200
148+ * @param id the image ID
149+ * @param isAlbum whether the image is the cover of an album or a track
150+ * @return a URL of the image with this ID with a width up to 1200px
87151 */
88- public static String getImageUrl (final long id , final boolean album ) {
89- return "https://f4.bcbits.com/img/" + (album ? 'a' : "" ) + id + "_10.jpg" ;
152+ @ Nonnull
153+ public static String getImageUrl (final long id , final boolean isAlbum ) {
154+ return IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "" ) + id + "_10.jpg" ;
90155 }
91156
92157 /**
@@ -136,13 +201,94 @@ public static DateWrapper parseDate(final String textDate) throws ParsingExcepti
136201 }
137202 }
138203
139- @ Nullable
140- public static String getThumbnailUrlFromSearchResult (final Element searchResult ) {
141- return searchResult .getElementsByClass ("art" ).stream ()
204+ /**
205+ * Get a list of images from a search result {@link Element}.
206+ *
207+ * <p>
208+ * This method will call {@link #getImagesFromImageUrl(String)} using the first non null and
209+ * non empty image URL found from the {@code src} attribute of {@code img} HTML elements, or an
210+ * empty string if no valid image URL was found.
211+ * </p>
212+ *
213+ * @param searchResult a search result {@link Element}
214+ * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
215+ * case where no valid image URL was found
216+ */
217+ @ Nonnull
218+ public static List <Image > getImagesFromSearchResult (@ Nonnull final Element searchResult ) {
219+ return getImagesFromImageUrl (searchResult .getElementsByClass ("art" )
220+ .stream ()
142221 .flatMap (element -> element .getElementsByTag ("img" ).stream ())
143222 .map (element -> element .attr ("src" ))
144- .filter (string -> !string . isEmpty ( ))
223+ .filter (imageUrl -> !isNullOrEmpty ( imageUrl ))
145224 .findFirst ()
146- .orElse (null );
225+ .orElse ("" ));
226+ }
227+
228+ /**
229+ * Get all images which have resolutions preserving aspect ratio from an image URL.
230+ *
231+ * <p>
232+ * This method will remove the image ID and its extension from the end of the URL and then call
233+ * {@link #getImagesFromImageBaseUrl(String)}.
234+ * </p>
235+ *
236+ * @param imageUrl the full URL of an image provided by Bandcamp, such as in its HTML code
237+ * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
238+ * case where the image URL has been not extracted (and so is null or empty)
239+ */
240+ @ Nonnull
241+ public static List <Image > getImagesFromImageUrl (@ Nullable final String imageUrl ) {
242+ if (isNullOrEmpty (imageUrl )) {
243+ return List .of ();
244+ }
245+
246+ return getImagesFromImageBaseUrl (
247+ imageUrl .replaceFirst (IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX , "_" ));
248+ }
249+
250+ /**
251+ * Get all images which have resolutions preserving aspect ratio from an image ID.
252+ *
253+ * <p>
254+ * This method will call {@link #getImagesFromImageBaseUrl(String)}.
255+ * </p>
256+ *
257+ * @param id the id of an image provided by Bandcamp
258+ * @param isAlbum whether the image is the cover of an album
259+ * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the
260+ * case where the image ID has been not extracted (and so equal to 0)
261+ */
262+ @ Nonnull
263+ public static List <Image > getImagesFromImageId (final long id , final boolean isAlbum ) {
264+ if (id == 0 ) {
265+ return List .of ();
266+ }
267+
268+ return getImagesFromImageBaseUrl (IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "" ) + id + "_" );
269+ }
270+
271+ /**
272+ * Get all images resolutions preserving aspect ratio from a base image URL.
273+ *
274+ * <p>
275+ * Base image URLs are images containing the image path, a {@code a} letter if it comes from an
276+ * album, its ID and an underscore.
277+ * </p>
278+ *
279+ * <p>
280+ * Images resolutions returned are the ones of {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS}.
281+ * </p>
282+ *
283+ * @param baseUrl the base URL of the image
284+ * @return an unmodifiable and non-empty list of {@link Image}s
285+ */
286+ @ Nonnull
287+ private static List <Image > getImagesFromImageBaseUrl (@ Nonnull final String baseUrl ) {
288+ return IMAGE_URL_SUFFIXES_AND_RESOLUTIONS .stream ()
289+ .map (imageSuffix -> new Image (baseUrl + imageSuffix .getSuffix (),
290+ imageSuffix .getHeight (), imageSuffix .getWidth (),
291+ imageSuffix .getResolutionLevel ()))
292+ .collect (Collectors .toUnmodifiableList ());
147293 }
148294}
0 commit comments