From 558a2f325c7ecd8208d5e531cd9941b85de21842 Mon Sep 17 00:00:00 2001 From: tobigr Date: Sat, 7 Feb 2026 16:47:34 +0100 Subject: [PATCH] Add SongMetadata for YouTube Music and Bandcamp --- .../extractors/BandcampStreamExtractor.java | 19 +++ .../youtube/YoutubeParsingHelper.java | 53 ++++++ .../extractors/YoutubeStreamExtractor.java | 16 ++ .../extractor/stream/SongMetadata.java | 161 ++++++++++++++++++ .../extractor/stream/StreamExtractor.java | 17 ++ .../newpipe/extractor/stream/StreamInfo.java | 28 +++ 6 files changed, 294 insertions(+) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/stream/SongMetadata.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java index dfdbb56965..dc41213623 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java @@ -26,6 +26,7 @@ import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemsCollector; import org.schabi.newpipe.extractor.stream.AudioStream; import org.schabi.newpipe.extractor.stream.Description; +import org.schabi.newpipe.extractor.stream.SongMetadata; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.VideoStream; @@ -244,4 +245,22 @@ public List getTags() { .map(Element::text) .collect(Collectors.toList()); } + + @Nullable + @Override + public SongMetadata getSongMetadata() throws ParsingException { + final SongMetadata.Builder builder = new SongMetadata.Builder( + getName(), getUploaderName()) + .setAlbum(current.getString("album_title")) + .setTrack(current.getInt("track_number")) + .setReleaseDate(getUploadDate()); + if (!current.getArray("packages").isEmpty()) { + final JsonObject packageInfo = current.getArray("packages").getObject(0); + if (!packageInfo.getString("label").isEmpty()) { + builder.setLabel(packageInfo.getString("label")); + } + } + return builder.build(); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index 19ae0ac0af..70e66168e3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -44,6 +44,7 @@ import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonWriter; +import org.jsoup.Jsoup; import org.jsoup.nodes.Entities; import org.schabi.newpipe.extractor.Image; import org.schabi.newpipe.extractor.Image.ResolutionLevel; @@ -54,9 +55,12 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.localization.ContentCountry; +import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.playlist.PlaylistInfo; import org.schabi.newpipe.extractor.stream.AudioTrackType; +import org.schabi.newpipe.extractor.stream.Description; +import org.schabi.newpipe.extractor.stream.SongMetadata; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator; @@ -66,6 +70,8 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.format.DateTimeParseException; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -73,6 +79,7 @@ import java.util.Optional; import java.util.Random; import java.util.Set; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -1583,4 +1590,50 @@ public static JsonObject getFirstCollaborator(final JsonObject navigationEndpoin return null; } } + + private static final Pattern SONG_METADATA_PATTERN = Pattern.compile( + "Provided to YouTube by (.+)\\n\\n" // label + + "(.+)\\s\\u00b7\\s([\\w\\s]+)\\n\\n" // title and artist + + "(.+)\\n\\n" // album + + "(.+)\\n\\n" // publisher + + "Released on:\\s([\\d-]+)\\n" // release date + + "([\\s\\S]+)\\n" // performers, composers, etc. + + "Auto-generated by YouTube." + ); + @Nullable + public static SongMetadata getSongMetadata(@Nonnull final Description description) + throws ParsingException { + final String descriptionText; + if (description.getType() == Description.PLAIN_TEXT) { + descriptionText = description.getContent(); + } else { + descriptionText = Jsoup.parse(description.getContent()).text(); + } + final Matcher matcher = SONG_METADATA_PATTERN.matcher(descriptionText); + if (matcher.find()) { + final var builder = new SongMetadata.Builder(matcher.group(2), matcher.group(3)) + .setAlbum(matcher.group(4)) + .setCopyright(matcher.group(5).replace("℗", "").trim()); + final String releaseDateString = matcher.group(6); + try { + builder.setReleaseDate(new DateWrapper( + Instant.parse(releaseDateString + "T00:00:00Z"), false)); + } catch (final DateTimeParseException e) { + // Ignore parsing errors for the release date, as it's not critical information. + } + try { + final String collaborators = matcher.group(7); + for (final String line : collaborators.split("\\n")) { + final String[] parts = line.split(":"); + if (parts.length == 2) { + builder.addPerformer(parts[1].trim()); + } + } + } catch (final Exception ignored) { + // Ignore parsing errors for collaborators, as it's not critical information. + } + return builder.build(); + } + return null; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 0fa6ac21d8..89004960bb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -73,6 +73,7 @@ import org.schabi.newpipe.extractor.stream.DeliveryMethod; import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.stream.Frameset; +import org.schabi.newpipe.extractor.stream.SongMetadata; import org.schabi.newpipe.extractor.stream.Stream; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamSegment; @@ -1621,6 +1622,21 @@ public List getMetaInfo() throws ParsingException { .getArray("contents")); } + @Nullable + @Override + public SongMetadata getSongMetadata() throws ParsingException { + assertPageFetched(); + // The song info is only available for music videos + final String attributedDescription = getVideoSecondaryInfoRenderer() + .getObject("attributedDescription") + .getString("content"); + if (isNullOrEmpty(attributedDescription)) { + return null; + } + return YoutubeParsingHelper.getSongMetadata( + new Description(attributedDescription, Description.PLAIN_TEXT)); + } + /** * Set the {@link PoTokenProvider} instance to be used for fetching {@code poToken}s. * diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SongMetadata.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SongMetadata.java new file mode 100644 index 0000000000..dd22bfba2e --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SongMetadata.java @@ -0,0 +1,161 @@ + +package org.schabi.newpipe.extractor.stream; + +import org.schabi.newpipe.extractor.localization.DateWrapper; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Class representing the information on a song or single. + */ +public final class SongMetadata implements Serializable { + /** + * Constant representing that the track number of a {@link SongMetadata} is unknown. + */ + public static final int TRACK_UNKNOWN = -1; + + @Nonnull + public final String title; + @Nonnull + public final String artist; + @Nonnull + public final List performer; + @Nullable + public final String composer; + @Nullable + public final String genre; + @Nullable + public final String album; + public final int track; + @Nullable + public final Duration duration; + @Nullable + public final DateWrapper releaseDate; + @Nullable + public final String label; + @Nullable + public final String copyright; + @Nullable + public final String location; + + public SongMetadata(@Nonnull final String title, @Nonnull final String artist, + @Nonnull final List performer, @Nullable final String composer, + @Nullable final String genre, @Nullable final String album, + final int track, @Nullable final Duration duration, + @Nullable final DateWrapper releaseDate, @Nullable final String label, + @Nullable final String copyright, @Nullable final String location) { + this.title = title; + this.artist = artist; + this.performer = performer; + this.composer = composer; + this.genre = genre; + this.album = album; + this.track = track; + this.duration = duration; + this.releaseDate = releaseDate; + this.label = label; + this.copyright = copyright; + this.location = location; + } + + public static final class Builder { + @Nonnull + private final String mTitle; + @Nonnull + private final String mArtist; + @Nonnull + private List mPerformer = new ArrayList<>(); + @Nullable + private String mComposer; + @Nullable + private String mGenre; + @Nullable + private String mAlbum; + private int mTrack = TRACK_UNKNOWN; + @Nullable + private Duration mDuration; + @Nullable + private DateWrapper mReleaseDate; + @Nullable + private String mLabel; + @Nullable + private String mCopyright; + @Nullable + private String mLocation; + + public Builder(@Nonnull final String title, @Nonnull final String artist) { + this.mTitle = title; + this.mArtist = artist; + } + + public Builder setPerformer(@Nonnull final List performer) { + this.mPerformer = performer; + return this; + } + + public Builder addPerformer(@Nonnull final String performer) { + this.mPerformer.add(performer); + return this; + } + + + public Builder setComposer(@Nullable final String composer) { + this.mComposer = composer; + return this; + } + + public Builder setGenre(@Nullable final String genre) { + this.mGenre = genre; + return this; + } + + public Builder setAlbum(@Nullable final String album) { + this.mAlbum = album; + return this; + } + + public Builder setTrack(final int track) { + this.mTrack = track; + return this; + } + + public Builder setDuration(@Nullable final Duration duration) { + this.mDuration = duration; + return this; + } + + public Builder setReleaseDate(@Nullable final DateWrapper releaseDate) { + this.mReleaseDate = releaseDate; + return this; + } + + public Builder setLabel(@Nullable final String label) { + this.mLabel = label; + return this; + } + + public Builder setCopyright(@Nullable final String copyright) { + this.mCopyright = copyright; + return this; + } + + public Builder setLocation(@Nullable final String location) { + this.mLocation = location; + return this; + } + + public SongMetadata build() { + return new SongMetadata( + mTitle, mArtist, Collections.unmodifiableList(mPerformer), mComposer, mGenre, + mAlbum, mTrack, mDuration, mReleaseDate, mLabel, mCopyright, mLocation); + } + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java index 63650a7906..3411a7fcf0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java @@ -593,6 +593,23 @@ public ContentAvailability getContentAvailability() throws ParsingException { return ContentAvailability.UNKNOWN; } + /** + * Get the song info if this stream is a music stream and the service provides this information. + *

+ * A SongInfo is only provided if the information on the song does not match the StreamInfo's + * title and uploader or if the service explicitly provides this information. + * Otherwise, the StreamInfo's title and uploader are assumed to be the song's title and artist. + *

+ * + * @return the song info or null if this stream is not a music stream + * or if the service does not provide this information + * @throws ParsingException if there is an error in the extraction + */ + @Nullable + public SongMetadata getSongMetadata() throws ParsingException { + return null; + } + public enum Privacy { PUBLIC, UNLISTED, diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index 62fb6bbf74..1b0f2f018a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -37,6 +37,7 @@ import java.util.Locale; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -335,6 +336,11 @@ private static void extractOptionalData(final StreamInfo streamInfo, } catch (final Exception e) { streamInfo.addError(e); } + try { + streamInfo.setSongMetadata(extractor.getSongMetadata()); + } catch (final Exception e) { + streamInfo.addError(e); + } streamInfo.setRelatedItems(ExtractorHelper.getRelatedItemsOrLogError(streamInfo, extractor)); @@ -388,6 +394,7 @@ private static void extractOptionalData(final StreamInfo streamInfo, private boolean shortFormContent = false; @Nonnull private ContentAvailability contentAvailability = ContentAvailability.AVAILABLE; + private SongMetadata songMetadata = null; /** * Preview frames, e.g. for the storyboard / seekbar thumbnail preview @@ -743,4 +750,25 @@ public ContentAvailability getContentAvailability() { public void setContentAvailability(@Nonnull final ContentAvailability availability) { this.contentAvailability = availability; } + + /** + * Get the song metadata if this stream is a music stream + * and the service provides this information. + *

+ * A {@link SongMetadata} is only provided if the information on the song does not match + * the {@link StreamInfo}'s title and uploader or if the service explicitly provides + * this information. Otherwise, the {@link StreamInfo}'s title and uploader are assumed + * to be the song's title and artist. + *

+ * + * @return the song metadata or {@code null} + */ + @Nullable + public SongMetadata getSongMetadata() { + return songMetadata; + } + + public void setSongMetadata(final SongMetadata songMetadata) { + this.songMetadata = songMetadata; + } }