Skip to content

Commit 280b32d

Browse files
committed
Compress Image if it is to large to fit into single OPUS packet
1 parent d2a1c17 commit 280b32d

File tree

3 files changed

+158
-28
lines changed

3 files changed

+158
-28
lines changed

app/src/main/java/org/schabi/newpipe/streams/OggFromWebMWriter.java

Lines changed: 92 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.List;
2929
import java.util.stream.Collectors;
3030

31+
import us.shandian.giga.postprocessing.ImageUtils;
32+
3133
/**
3234
* <p>
3335
* This class is used to convert a WebM stream containing Opus or Vorbis audio
@@ -50,15 +52,50 @@
5052
* @author tobigr
5153
*/
5254
public class OggFromWebMWriter implements Closeable {
55+
private static final String TAG = OggFromWebMWriter.class.getSimpleName();
56+
57+
/**
58+
* No flags set.
59+
*/
5360
private static final byte FLAG_UNSET = 0x00;
54-
//private static final byte FLAG_CONTINUED = 0x01;
61+
/**
62+
* The packet is continued from previous the previous page.
63+
*/
64+
private static final byte FLAG_CONTINUED = 0x01;
65+
/**
66+
* BOS (beginning of stream).
67+
*/
5568
private static final byte FLAG_FIRST = 0x02;
69+
/**
70+
* EOS (end of stream).
71+
*/
5672
private static final byte FLAG_LAST = 0x04;
5773

5874
private static final byte HEADER_CHECKSUM_OFFSET = 22;
5975
private static final byte HEADER_SIZE = 27;
6076

61-
private static final int TIME_SCALE_NS = 1000000000;
77+
private static final int TIME_SCALE_NS = 1_000_000_000;
78+
79+
/**
80+
* The maximum size of a segment in the Ogg page, in bytes.
81+
* This is a fixed value defined by the Ogg specification.
82+
*/
83+
private static final int OGG_SEGMENT_SIZE = 255;
84+
85+
/**
86+
* The maximum size of the Opus packet in bytes, to be included in the Ogg page.
87+
*/
88+
private static final int OPUS_MAX_PACKETS_SIZE = 61_140;
89+
90+
/**
91+
* <p>The maximum size of the compressed thumbnail image in bytes,
92+
* to be included in the Opus metadata.</p>
93+
*
94+
* This is a safe size to avoid creating metadata tags that are too large for the Ogg page,
95+
* since the metadata header and other tags can also take up space in the page.
96+
*/
97+
private static final int MAX_THUMBNAIL_SIZE = OPUS_MAX_PACKETS_SIZE - 4500;
98+
6299

63100
private boolean done = false;
64101
private boolean parsed = false;
@@ -80,7 +117,7 @@ public class OggFromWebMWriter implements Closeable {
80117
private long webmBlockNearDuration = 0;
81118

82119
private short segmentTableSize = 0;
83-
private final byte[] segmentTable = new byte[255];
120+
private final byte[] segmentTable = new byte[OGG_SEGMENT_SIZE];
84121
private long segmentTableNextTimestamp = TIME_SCALE_NS;
85122

86123
private final int[] crc32Table = new int[256];
@@ -323,12 +360,12 @@ private int makePacketHeader(final long granPos, @NonNull final ByteBuffer buffe
323360
* @ImplNote See <a href="https://datatracker.ietf.org/doc/html/rfc7845.html#section-5.2">
324361
* RFC7845 5.2</a>
325362
*
326-
* @return
363+
* @return The binary metadata header, or null if not implemented for the codec
327364
*/
328365
@Nullable
329366
private byte[] makeMetadata() {
330367
if (DEBUG) {
331-
Log.d("OggFromWebMWriter", "Downloading media with codec ID " + webmTrack.codecId);
368+
Log.d(TAG, "Downloading media with codec ID " + webmTrack.codecId);
332369
}
333370

334371
if ("A_OPUS".equals(webmTrack.codecId)) {
@@ -343,18 +380,21 @@ private byte[] makeMetadata() {
343380
.getLocalDateTime()
344381
.format(DateTimeFormatter.ISO_DATE)));
345382
if (thumbnail != null) {
346-
metadata.add(makeOpusPictureTag(thumbnail));
383+
final var pictureTag = makeOpusPictureTag(thumbnail, MAX_THUMBNAIL_SIZE);
384+
if (pictureTag != null) {
385+
metadata.add(pictureTag);
386+
}
347387
}
348388
}
349389

350390
if (DEBUG) {
351-
Log.d("OggFromWebMWriter", "Creating metadata header with this data:");
352-
metadata.forEach(p -> Log.d("OggFromWebMWriter", p.first + "=" + p.second));
391+
Log.d(TAG, "Creating metadata header with this data:");
392+
metadata.forEach(p -> Log.d(TAG, p.first + "=" + p.second));
353393
}
354394

355395
return makeOpusTagsHeader(metadata);
356396
} else if ("A_VORBIS".equals(webmTrack.codecId)) {
357-
/**
397+
/*
358398
* See <a href="https://datatracker.ietf.org/doc/html/rfc7845.html#section-5.2">
359399
* RFC7845 5.2</a>
360400
*/
@@ -399,43 +439,68 @@ private static byte[] makeOpusMetadataTag(final Pair<String, String> pair) {
399439
* </p>
400440
*
401441
* @param bitmap The bitmap to use as cover art
442+
* @param maxSize The maximum size of the compressed image in bytes.
443+
* If the compressed image exceeds this size,
444+
* it will be further compressed until it fits.
445+
* This is necessary to avoid creating metadata tags
446+
* that are too large for the Ogg page.
402447
* @return The key-value pair representing the tag
448+
* or null if the image cannot be compressed to the maxSize
403449
*/
404-
private static Pair<String, String> makeOpusPictureTag(final Bitmap bitmap) {
450+
@Nullable
451+
private static Pair<String, String> makeOpusPictureTag(final Bitmap bitmap, final int maxSize) {
405452
// FLAC picture block format (big-endian):
406453
// uint32 picture_type
407-
// uint32 mime_length, mime_string
408-
// uint32 desc_length, desc_string
454+
// uint32 mime_length,
455+
// mime_string
456+
// uint32 desc_length,
457+
// desc_string
409458
// uint32 width
410459
// uint32 height
411460
// uint32 color_depth
412461
// uint32 colors_indexed
413-
// uint32 data_length, data_bytes
462+
// uint32 data_length,
463+
// data_bytes
414464

415465
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
416-
bitmap.compress(Bitmap.CompressFormat.JPEG, 100, baos);
466+
467+
final var compressedThumbnail = ImageUtils.INSTANCE.compressToSize(bitmap, maxSize);
468+
if (compressedThumbnail == null) {
469+
if (DEBUG) {
470+
Log.d(TAG, "failed to compress thumbnail to target size " + maxSize);
471+
}
472+
return null;
473+
}
474+
compressedThumbnail.getBitmap().compress(
475+
Bitmap.CompressFormat.JPEG, compressedThumbnail.getQuality(), baos);
417476

418477
final byte[] imageData = baos.toByteArray();
419478
final byte[] mimeBytes = "image/jpeg".getBytes(StandardCharsets.UTF_8);
420479
final byte[] descBytes = new byte[0]; // optional description
421480
// fixed ints + mime + desc
422481
final int headerSize = 4 * 8 + mimeBytes.length + descBytes.length;
423482
final ByteBuffer buf = ByteBuffer.allocate(headerSize + imageData.length);
424-
buf.putInt(3); // picture type: 3 = Cover (front)
483+
// See https://id3.org/id3v2.3.0#Attached_picture for a full list of picture types
484+
// TODO: allow specifying other picture types, i.e. cover (front) for music albums;
485+
// but this info needs to be provided by the extractor first.
486+
buf.putInt(3); // picture type: 0 = Other
425487
buf.putInt(mimeBytes.length);
426488
buf.put(mimeBytes);
427489
buf.putInt(descBytes.length);
428-
// no description
429490
if (descBytes.length > 0) {
491+
// currently no description available, might be added later.
430492
buf.put(descBytes);
431493
}
432-
buf.putInt(bitmap.getWidth()); // width (unknown)
433-
buf.putInt(bitmap.getHeight()); // height (unknown)
434-
buf.putInt(0); // color depth
435-
buf.putInt(0); // colors indexed
494+
buf.putInt(compressedThumbnail.getBitmap().getWidth());
495+
buf.putInt(compressedThumbnail.getBitmap().getHeight());
496+
buf.putInt(24); // color depth for JPEG and PNG is usually 24 bits
497+
buf.putInt(0); // colors indexed (0 for non-indexed images, i.e. JPEG, PNG)
436498
buf.putInt(imageData.length);
437499
buf.put(imageData);
500+
438501
final String b64 = Base64.getEncoder().encodeToString(buf.array());
502+
Log.d(TAG, "Compressed thumbnail size: " + imageData.length
503+
+ " bytes, base64 metadata size: " + b64.length() + " characters");
439504
return Pair.create("METADATA_BLOCK_PICTURE", b64);
440505
}
441506

@@ -457,7 +522,7 @@ private static byte[] makeOpusTagsHeader(final List<Pair<String, String>> keyVal
457522
.stream()
458523
.filter(p -> !p.second.isBlank())
459524
.map(OggFromWebMWriter::makeOpusMetadataTag)
460-
.collect(Collectors.toUnmodifiableList());
525+
.toList();
461526

462527
final var tagsBytes = tags.stream().collect(Collectors.summingInt(arr -> arr.length));
463528

@@ -554,22 +619,22 @@ private boolean addPacketSegment(final int size) {
554619
String.format("page size is %s but cannot be larger than 65025", size));
555620
}
556621

557-
int available = (segmentTable.length - segmentTableSize) * 255;
558-
final boolean extra = (size % 255) == 0;
622+
int available = (segmentTable.length - segmentTableSize) * OGG_SEGMENT_SIZE;
623+
final boolean extra = (size % OGG_SEGMENT_SIZE) == 0;
559624

560625
if (extra) {
561626
// add a zero byte entry in the table
562-
// required to indicate the sample size is multiple of 255
563-
available -= 255;
627+
// required to indicate the sample size is multiple of 255 / MAX_SEGMENT_SIZE
628+
available -= OGG_SEGMENT_SIZE;
564629
}
565630

566631
// check if possible add the segment, without overflow the table
567632
if (available < size) {
568633
return false; // not enough space on the page
569634
}
570635

571-
for (int seg = size; seg > 0; seg -= 255) {
572-
segmentTable[segmentTableSize++] = (byte) Math.min(seg, 255);
636+
for (int seg = size; seg > 0; seg -= OGG_SEGMENT_SIZE) {
637+
segmentTable[segmentTableSize++] = (byte) Math.min(seg, OGG_SEGMENT_SIZE);
573638
}
574639

575640
if (extra) {

app/src/main/java/us/shandian/giga/get/DownloadMission.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ public void fetchThumbnail(@NonNull final List<Image> images) {
864864
// Alternative approaches are to either downscale a high res image or
865865
// to download the correct size depending on the chosen post-processing algorithm.
866866
final String thumbnailUrl = ImageStrategy.choosePreferredImage(
867-
images, PreferredImageQuality.MEDIUM);
867+
images, PreferredImageQuality.HIGH);
868868
// TODO: get context from somewhere else
869869
thumbnail = CoilHelper.INSTANCE.loadBitmapBlocking(App.getInstance(), thumbnailUrl);
870870
thumbnailFetched = true;
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2026 NewPipe contributors <https://newpipe.net>
3+
* SPDX-License-Identifier: GPL-3.0-or-later
4+
*/
5+
6+
package us.shandian.giga.postprocessing
7+
8+
import android.graphics.Bitmap
9+
import java.io.ByteArrayOutputStream
10+
import java.util.Base64
11+
import org.schabi.newpipe.ktx.scale
12+
13+
object ImageUtils {
14+
fun getImageTypeFromUrl(url: String): String {
15+
val extension = url.substringAfterLast('.', "")
16+
return when (extension.lowercase()) {
17+
"jpg", "jpeg" -> "image/jpeg"
18+
"png" -> "image/png"
19+
"gif" -> "image/gif"
20+
"bmp" -> "image/bmp"
21+
"webp" -> "image/webp"
22+
else -> "application/octet-stream" // Default binary type
23+
}
24+
}
25+
26+
data class CompressedImage(
27+
val bitmap: Bitmap,
28+
val quality: Int,
29+
val width: Int,
30+
val height: Int
31+
)
32+
33+
fun compressToSize(original: Bitmap, maxSizeBytes: Int): CompressedImage? {
34+
var quality = 100
35+
var scale = 1.0f
36+
var width = original.width
37+
var height = original.height
38+
var compressedSize: Int
39+
40+
do {
41+
var bitmap = original.copy(original.config ?: Bitmap.Config.ARGB_8888, false)
42+
if (scale < 1.0f) {
43+
bitmap = bitmap.scale(width = width, height = height)
44+
}
45+
do {
46+
val outputStream = ByteArrayOutputStream()
47+
bitmap.compress(Bitmap.CompressFormat.JPEG, quality, outputStream)
48+
compressedSize = Base64.getEncoder().encodeToString(outputStream.toByteArray()).length
49+
quality -= 5 // Decrease quality by 5% for the next iteration
50+
} while (compressedSize > maxSizeBytes && quality > 70)
51+
if (compressedSize <= maxSizeBytes) {
52+
return CompressedImage(bitmap, quality, width, height)
53+
}
54+
if (scale > 0.5f) {
55+
scale -= 0.1f
56+
} else {
57+
scale *= 0.9f
58+
}
59+
width = (original.width * scale).toInt()
60+
height = (original.height * scale).toInt()
61+
quality = 100 // Reset quality for the next size reduction
62+
} while (width > 50 && height > 50) // Prevent too much downscaling
63+
return null
64+
}
65+
}

0 commit comments

Comments
 (0)