2828import java .util .List ;
2929import java .util .stream .Collectors ;
3030
31+ import us .shandian .giga .postprocessing .ImageUtils ;
32+
3133/**
3234 * <p>
3335 * This class is used to convert a WebM stream containing Opus or Vorbis audio
5052 * @author tobigr
5153 */
5254public class OggFromWebMWriter implements Closeable {
55+ private static final String TAG = OggFromWebMWriter .class .getSimpleName ();
56+
57+ /**
58+ * No flags set.
59+ */
5360 private static final byte FLAG_UNSET = 0x00 ;
54- //private static final byte FLAG_CONTINUED = 0x01;
61+ /**
62+ * The packet is continued from previous the previous page.
63+ */
64+ private static final byte FLAG_CONTINUED = 0x01 ;
65+ /**
66+ * BOS (beginning of stream).
67+ */
5568 private static final byte FLAG_FIRST = 0x02 ;
69+ /**
70+ * EOS (end of stream).
71+ */
5672 private static final byte FLAG_LAST = 0x04 ;
5773
5874 private static final byte HEADER_CHECKSUM_OFFSET = 22 ;
5975 private static final byte HEADER_SIZE = 27 ;
6076
61- private static final int TIME_SCALE_NS = 1000000000 ;
77+ private static final int TIME_SCALE_NS = 1_000_000_000 ;
78+
79+ /**
80+ * The maximum size of a segment in the Ogg page, in bytes.
81+ * This is a fixed value defined by the Ogg specification.
82+ */
83+ private static final int OGG_SEGMENT_SIZE = 255 ;
84+
85+ /**
86+ * The maximum size of the Opus packet in bytes, to be included in the Ogg page.
87+ */
88+ private static final int OPUS_MAX_PACKETS_SIZE = 61_140 ;
89+
90+ /**
91+ * <p>The maximum size of the compressed thumbnail image in bytes,
92+ * to be included in the Opus metadata.</p>
93+ *
94+ * This is a safe size to avoid creating metadata tags that are too large for the Ogg page,
95+ * since the metadata header and other tags can also take up space in the page.
96+ */
97+ private static final int MAX_THUMBNAIL_SIZE = OPUS_MAX_PACKETS_SIZE - 4500 ;
98+
6299
63100 private boolean done = false ;
64101 private boolean parsed = false ;
@@ -80,7 +117,7 @@ public class OggFromWebMWriter implements Closeable {
80117 private long webmBlockNearDuration = 0 ;
81118
82119 private short segmentTableSize = 0 ;
83- private final byte [] segmentTable = new byte [255 ];
120+ private final byte [] segmentTable = new byte [OGG_SEGMENT_SIZE ];
84121 private long segmentTableNextTimestamp = TIME_SCALE_NS ;
85122
86123 private final int [] crc32Table = new int [256 ];
@@ -323,12 +360,12 @@ private int makePacketHeader(final long granPos, @NonNull final ByteBuffer buffe
323360 * @ImplNote See <a href="https://datatracker.ietf.org/doc/html/rfc7845.html#section-5.2">
324361 * RFC7845 5.2</a>
325362 *
326- * @return
363+ * @return The binary metadata header, or null if not implemented for the codec
327364 */
328365 @ Nullable
329366 private byte [] makeMetadata () {
330367 if (DEBUG ) {
331- Log .d ("OggFromWebMWriter" , "Downloading media with codec ID " + webmTrack .codecId );
368+ Log .d (TAG , "Downloading media with codec ID " + webmTrack .codecId );
332369 }
333370
334371 if ("A_OPUS" .equals (webmTrack .codecId )) {
@@ -343,18 +380,21 @@ private byte[] makeMetadata() {
343380 .getLocalDateTime ()
344381 .format (DateTimeFormatter .ISO_DATE )));
345382 if (thumbnail != null ) {
346- metadata .add (makeOpusPictureTag (thumbnail ));
383+ final var pictureTag = makeOpusPictureTag (thumbnail , MAX_THUMBNAIL_SIZE );
384+ if (pictureTag != null ) {
385+ metadata .add (pictureTag );
386+ }
347387 }
348388 }
349389
350390 if (DEBUG ) {
351- Log .d ("OggFromWebMWriter" , "Creating metadata header with this data:" );
352- metadata .forEach (p -> Log .d ("OggFromWebMWriter" , p .first + "=" + p .second ));
391+ Log .d (TAG , "Creating metadata header with this data:" );
392+ metadata .forEach (p -> Log .d (TAG , p .first + "=" + p .second ));
353393 }
354394
355395 return makeOpusTagsHeader (metadata );
356396 } else if ("A_VORBIS" .equals (webmTrack .codecId )) {
357- /**
397+ /*
358398 * See <a href="https://datatracker.ietf.org/doc/html/rfc7845.html#section-5.2">
359399 * RFC7845 5.2</a>
360400 */
@@ -399,43 +439,68 @@ private static byte[] makeOpusMetadataTag(final Pair<String, String> pair) {
399439 * </p>
400440 *
401441 * @param bitmap The bitmap to use as cover art
442+ * @param maxSize The maximum size of the compressed image in bytes.
443+ * If the compressed image exceeds this size,
444+ * it will be further compressed until it fits.
445+ * This is necessary to avoid creating metadata tags
446+ * that are too large for the Ogg page.
402447 * @return The key-value pair representing the tag
448+ * or null if the image cannot be compressed to the maxSize
403449 */
404- private static Pair <String , String > makeOpusPictureTag (final Bitmap bitmap ) {
450+ @ Nullable
451+ private static Pair <String , String > makeOpusPictureTag (final Bitmap bitmap , final int maxSize ) {
405452 // FLAC picture block format (big-endian):
406453 // uint32 picture_type
407- // uint32 mime_length, mime_string
408- // uint32 desc_length, desc_string
454+ // uint32 mime_length,
455+ // mime_string
456+ // uint32 desc_length,
457+ // desc_string
409458 // uint32 width
410459 // uint32 height
411460 // uint32 color_depth
412461 // uint32 colors_indexed
413- // uint32 data_length, data_bytes
462+ // uint32 data_length,
463+ // data_bytes
414464
415465 final ByteArrayOutputStream baos = new ByteArrayOutputStream ();
416- bitmap .compress (Bitmap .CompressFormat .JPEG , 100 , baos );
466+
467+ final var compressedThumbnail = ImageUtils .INSTANCE .compressToSize (bitmap , maxSize );
468+ if (compressedThumbnail == null ) {
469+ if (DEBUG ) {
470+ Log .d (TAG , "failed to compress thumbnail to target size " + maxSize );
471+ }
472+ return null ;
473+ }
474+ compressedThumbnail .getBitmap ().compress (
475+ Bitmap .CompressFormat .JPEG , compressedThumbnail .getQuality (), baos );
417476
418477 final byte [] imageData = baos .toByteArray ();
419478 final byte [] mimeBytes = "image/jpeg" .getBytes (StandardCharsets .UTF_8 );
420479 final byte [] descBytes = new byte [0 ]; // optional description
421480 // fixed ints + mime + desc
422481 final int headerSize = 4 * 8 + mimeBytes .length + descBytes .length ;
423482 final ByteBuffer buf = ByteBuffer .allocate (headerSize + imageData .length );
424- buf .putInt (3 ); // picture type: 3 = Cover (front)
483+ // See https://id3.org/id3v2.3.0#Attached_picture for a full list of picture types
484+ // TODO: allow specifying other picture types, i.e. cover (front) for music albums;
485+ // but this info needs to be provided by the extractor first.
486+ buf .putInt (3 ); // picture type: 0 = Other
425487 buf .putInt (mimeBytes .length );
426488 buf .put (mimeBytes );
427489 buf .putInt (descBytes .length );
428- // no description
429490 if (descBytes .length > 0 ) {
491+ // currently no description available, might be added later.
430492 buf .put (descBytes );
431493 }
432- buf .putInt (bitmap . getWidth ()); // width (unknown)
433- buf .putInt (bitmap . getHeight ()); // height (unknown)
434- buf .putInt (0 ); // color depth
435- buf .putInt (0 ); // colors indexed
494+ buf .putInt (compressedThumbnail . getBitmap (). getWidth ());
495+ buf .putInt (compressedThumbnail . getBitmap (). getHeight ());
496+ buf .putInt (24 ); // color depth for JPEG and PNG is usually 24 bits
497+ buf .putInt (0 ); // colors indexed (0 for non-indexed images, i.e. JPEG, PNG)
436498 buf .putInt (imageData .length );
437499 buf .put (imageData );
500+
438501 final String b64 = Base64 .getEncoder ().encodeToString (buf .array ());
502+ Log .d (TAG , "Compressed thumbnail size: " + imageData .length
503+ + " bytes, base64 metadata size: " + b64 .length () + " characters" );
439504 return Pair .create ("METADATA_BLOCK_PICTURE" , b64 );
440505 }
441506
@@ -457,7 +522,7 @@ private static byte[] makeOpusTagsHeader(final List<Pair<String, String>> keyVal
457522 .stream ()
458523 .filter (p -> !p .second .isBlank ())
459524 .map (OggFromWebMWriter ::makeOpusMetadataTag )
460- .collect ( Collectors . toUnmodifiableList () );
525+ .toList ( );
461526
462527 final var tagsBytes = tags .stream ().collect (Collectors .summingInt (arr -> arr .length ));
463528
@@ -554,22 +619,22 @@ private boolean addPacketSegment(final int size) {
554619 String .format ("page size is %s but cannot be larger than 65025" , size ));
555620 }
556621
557- int available = (segmentTable .length - segmentTableSize ) * 255 ;
558- final boolean extra = (size % 255 ) == 0 ;
622+ int available = (segmentTable .length - segmentTableSize ) * OGG_SEGMENT_SIZE ;
623+ final boolean extra = (size % OGG_SEGMENT_SIZE ) == 0 ;
559624
560625 if (extra ) {
561626 // add a zero byte entry in the table
562- // required to indicate the sample size is multiple of 255
563- available -= 255 ;
627+ // required to indicate the sample size is multiple of 255 / MAX_SEGMENT_SIZE
628+ available -= OGG_SEGMENT_SIZE ;
564629 }
565630
566631 // check if possible add the segment, without overflow the table
567632 if (available < size ) {
568633 return false ; // not enough space on the page
569634 }
570635
571- for (int seg = size ; seg > 0 ; seg -= 255 ) {
572- segmentTable [segmentTableSize ++] = (byte ) Math .min (seg , 255 );
636+ for (int seg = size ; seg > 0 ; seg -= OGG_SEGMENT_SIZE ) {
637+ segmentTable [segmentTableSize ++] = (byte ) Math .min (seg , OGG_SEGMENT_SIZE );
573638 }
574639
575640 if (extra ) {
0 commit comments