Skip to content

Commit 2361e03

Browse files
committed
Add SongMetadata for YouTube Music
1 parent c1336db commit 2361e03

5 files changed

Lines changed: 271 additions & 0 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import com.grack.nanojson.JsonParserException;
4545
import com.grack.nanojson.JsonWriter;
4646

47+
import org.jsoup.Jsoup;
4748
import org.jsoup.nodes.Entities;
4849
import org.schabi.newpipe.extractor.Image;
4950
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
@@ -54,9 +55,12 @@
5455
import org.schabi.newpipe.extractor.exceptions.ParsingException;
5556
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
5657
import org.schabi.newpipe.extractor.localization.ContentCountry;
58+
import org.schabi.newpipe.extractor.localization.DateWrapper;
5759
import org.schabi.newpipe.extractor.localization.Localization;
5860
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
5961
import org.schabi.newpipe.extractor.stream.AudioTrackType;
62+
import org.schabi.newpipe.extractor.stream.Description;
63+
import org.schabi.newpipe.extractor.stream.SongMetadata;
6064
import org.schabi.newpipe.extractor.utils.JsonUtils;
6165
import org.schabi.newpipe.extractor.utils.Parser;
6266
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
@@ -66,13 +70,16 @@
6670
import java.net.MalformedURLException;
6771
import java.net.URL;
6872
import java.nio.charset.StandardCharsets;
73+
import java.time.Instant;
74+
import java.time.format.DateTimeParseException;
6975
import java.util.HashMap;
7076
import java.util.List;
7177
import java.util.Locale;
7278
import java.util.Map;
7379
import java.util.Optional;
7480
import java.util.Random;
7581
import java.util.Set;
82+
import java.util.regex.Matcher;
7683
import java.util.regex.Pattern;
7784
import java.util.stream.Collectors;
7885
import java.util.stream.Stream;
@@ -1583,4 +1590,49 @@ public static JsonObject getFirstCollaborator(final JsonObject navigationEndpoin
15831590
return null;
15841591
}
15851592
}
1593+
1594+
private static final Pattern SONG_METADATA_PATTERN = Pattern.compile(
1595+
"Provided to YouTube by (.+)\\n\\n" // label
1596+
+ "(.+)\\s\\u00b7\\s([\\w\\s]+)\\n\\n" // title and artist
1597+
+ "(.+)\\n\\n" // album
1598+
+ "(.+)\\n\\n" // publisher
1599+
+ "Released on:\\s([\\d-]+)\\n" // release date
1600+
+ "([\\s\\S]+)\\n" // performers, composers, etc.
1601+
+ "Auto-generated by YouTube."
1602+
);
1603+
@Nullable
1604+
public static SongMetadata getSongMetadata(@Nonnull final Description description) throws ParsingException {
1605+
final String descriptionText;
1606+
if (description.getType() == Description.PLAIN_TEXT) {
1607+
descriptionText = description.getContent();
1608+
} else {
1609+
descriptionText = Jsoup.parse(description.getContent()).text();
1610+
}
1611+
final Matcher matcher = SONG_METADATA_PATTERN.matcher(descriptionText);
1612+
if (matcher.find()) {
1613+
final var builder = new SongMetadata.Builder(matcher.group(2), matcher.group(3))
1614+
.setAlbum(matcher.group(4))
1615+
.setCopyright(matcher.group(5).replace("℗", "").trim());
1616+
final String releaseDateString = matcher.group(6);
1617+
try {
1618+
builder.setReleaseDate(new DateWrapper(
1619+
Instant.parse(releaseDateString + "T00:00:00Z"), false));
1620+
} catch (final DateTimeParseException e) {
1621+
// Ignore parsing errors for the release date, as it's not critical information.
1622+
}
1623+
try {
1624+
final String collaborators = matcher.group(7);
1625+
for (final String line : collaborators.split("\\n")) {
1626+
final String[] parts = line.split(":");
1627+
if (parts.length == 2) {
1628+
builder.addPerformer(parts[1].trim());
1629+
}
1630+
}
1631+
} catch (Exception ignored) {
1632+
// Ignore parsing errors for collaborators, as it's not critical information.
1633+
}
1634+
return builder.build();
1635+
}
1636+
return null;
1637+
}
15861638
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import org.schabi.newpipe.extractor.stream.DeliveryMethod;
7373
import org.schabi.newpipe.extractor.stream.Description;
7474
import org.schabi.newpipe.extractor.stream.Frameset;
75+
import org.schabi.newpipe.extractor.stream.SongMetadata;
7576
import org.schabi.newpipe.extractor.stream.Stream;
7677
import org.schabi.newpipe.extractor.stream.StreamExtractor;
7778
import org.schabi.newpipe.extractor.stream.StreamSegment;
@@ -1616,6 +1617,21 @@ public List<MetaInfo> getMetaInfo() throws ParsingException {
16161617
.getArray("contents"));
16171618
}
16181619

1620+
@Nullable
1621+
@Override
1622+
public SongMetadata getSongInfo() throws ParsingException {
1623+
assertPageFetched();
1624+
// The song info is only available for music videos
1625+
final String attributedDescription = getVideoSecondaryInfoRenderer()
1626+
.getObject("attributedDescription")
1627+
.getString("content");
1628+
if (isNullOrEmpty(attributedDescription)) {
1629+
return null;
1630+
}
1631+
return YoutubeParsingHelper.getSongMetadata(
1632+
new Description(attributedDescription, Description.PLAIN_TEXT));
1633+
}
1634+
16191635
/**
16201636
* Set the {@link PoTokenProvider} instance to be used for fetching {@code poToken}s.
16211637
*
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
2+
package org.schabi.newpipe.extractor.stream;
3+
4+
import org.schabi.newpipe.extractor.localization.DateWrapper;
5+
6+
import javax.annotation.Nonnull;
7+
import javax.annotation.Nullable;
8+
9+
import java.io.Serializable;
10+
import java.time.Duration;
11+
import java.util.ArrayList;
12+
import java.util.Collections;
13+
import java.util.List;
14+
15+
/**
16+
* Class representing the information on a song or single.
17+
*/
18+
public final class SongMetadata implements Serializable {
19+
public static final int TRACK_UNKNOWN = -1;
20+
21+
@Nonnull
22+
public final String title;
23+
@Nonnull
24+
public final String artist;
25+
@Nonnull
26+
public final List<String> performer;
27+
@Nullable
28+
public final String composer;
29+
@Nullable
30+
public final String genre;
31+
@Nullable
32+
public final String album;
33+
public final int track;
34+
@Nullable
35+
public final Duration duration;
36+
@Nullable
37+
public final DateWrapper releaseDate;
38+
@Nullable
39+
public final String label;
40+
@Nullable
41+
public final String copyright;
42+
@Nullable
43+
public final String location;
44+
45+
public SongMetadata(@Nonnull String title, @Nonnull String artist,
46+
@Nonnull List<String> performer, @Nullable String composer,
47+
@Nullable String genre, @Nullable String album, int track,
48+
@Nullable Duration duration, @Nullable DateWrapper releaseDate,
49+
@Nullable String label, @Nullable String copyright,
50+
@Nullable String location) {
51+
this.title = title;
52+
this.artist = artist;
53+
this.performer = performer;
54+
this.composer = composer;
55+
this.genre = genre;
56+
this.album = album;
57+
this.track = track;
58+
this.duration = duration;
59+
this.releaseDate = releaseDate;
60+
this.label = label;
61+
this.copyright = copyright;
62+
this.location = location;
63+
}
64+
65+
public static class Builder {
66+
@Nonnull
67+
private final String title;
68+
@Nonnull
69+
private final String artist;
70+
@Nonnull
71+
private List<String> performer = new ArrayList<>();
72+
@Nullable
73+
private String composer;
74+
@Nullable
75+
private String genre;
76+
@Nullable
77+
private String album;
78+
private int track = TRACK_UNKNOWN;
79+
@Nullable
80+
private Duration duration;
81+
@Nullable
82+
private DateWrapper releaseDate;
83+
@Nullable
84+
private String label;
85+
@Nullable
86+
private String copyright;
87+
@Nullable
88+
private String location;
89+
90+
public Builder(@Nonnull String title, @Nonnull String artist) {
91+
this.title = title;
92+
this.artist = artist;
93+
}
94+
95+
public Builder setPerformer(@Nonnull List<String> performer) {
96+
this.performer = performer;
97+
return this;
98+
}
99+
100+
public Builder addPerformer(@Nonnull String performer) {
101+
this.performer.add(performer);
102+
return this;
103+
}
104+
105+
106+
public Builder setComposer(@Nullable String composer) {
107+
this.composer = composer;
108+
return this;
109+
}
110+
111+
public Builder setGenre(@Nullable String genre) {
112+
this.genre = genre;
113+
return this;
114+
}
115+
116+
public Builder setAlbum(@Nullable String album) {
117+
this.album = album;
118+
return this;
119+
}
120+
121+
public Builder setTrack(int track) {
122+
this.track = track;
123+
return this;
124+
}
125+
126+
public Builder setDuration(@Nullable Duration duration) {
127+
this.duration = duration;
128+
return this;
129+
}
130+
131+
public Builder setReleaseDate(@Nullable DateWrapper releaseDate) {
132+
this.releaseDate = releaseDate;
133+
return this;
134+
}
135+
136+
public Builder setLabel(@Nullable String label) {
137+
this.label = label;
138+
return this;
139+
}
140+
141+
public Builder setCopyright(@Nullable String copyright) {
142+
this.copyright = copyright;
143+
return this;
144+
}
145+
146+
public Builder setLocation(@Nullable String location) {
147+
this.location = location;
148+
return this;
149+
}
150+
151+
public SongMetadata build() {
152+
return new SongMetadata(
153+
title, artist, Collections.unmodifiableList(performer), composer, genre, album,
154+
track, duration, releaseDate, label, copyright, location);
155+
}
156+
}
157+
158+
}

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,23 @@ public ContentAvailability getContentAvailability() throws ParsingException {
593593
return ContentAvailability.UNKNOWN;
594594
}
595595

596+
/**
597+
* Get the song info if this stream is a music stream and the service provides this information.
598+
* <p>
599+
* A SongInfo is only provided if the information on the song does not match the StreamInfo's title and uploader
600+
* or if the service explicitly provides this information.
601+
* Otherwise, the StreamInfo's title and uploader are assumed to be the song's title and artist.
602+
* </p>
603+
*
604+
* @return the song info or null if this stream is not a music stream
605+
* or if the service does not provide this information
606+
* @throws ParsingException if there is an error in the extraction
607+
*/
608+
@Nullable
609+
public SongMetadata getSongInfo() throws ParsingException {
610+
return null;
611+
}
612+
596613
public enum Privacy {
597614
PUBLIC,
598615
UNLISTED,

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import java.util.Locale;
3838

3939
import javax.annotation.Nonnull;
40+
import javax.annotation.Nullable;
4041

4142
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
4243

@@ -335,6 +336,11 @@ private static void extractOptionalData(final StreamInfo streamInfo,
335336
} catch (final Exception e) {
336337
streamInfo.addError(e);
337338
}
339+
try {
340+
streamInfo.setSongMetadata(extractor.getSongInfo());
341+
} catch (final Exception e) {
342+
streamInfo.addError(e);
343+
}
338344

339345
streamInfo.setRelatedItems(ExtractorHelper.getRelatedItemsOrLogError(streamInfo,
340346
extractor));
@@ -388,6 +394,7 @@ private static void extractOptionalData(final StreamInfo streamInfo,
388394
private boolean shortFormContent = false;
389395
@Nonnull
390396
private ContentAvailability contentAvailability = ContentAvailability.AVAILABLE;
397+
private SongMetadata songMetadata = null;
391398

392399
/**
393400
* Preview frames, e.g. for the storyboard / seekbar thumbnail preview
@@ -743,4 +750,25 @@ public ContentAvailability getContentAvailability() {
743750
public void setContentAvailability(@Nonnull final ContentAvailability availability) {
744751
this.contentAvailability = availability;
745752
}
753+
754+
/**
755+
* Get the song metadata if this stream is a music stream
756+
* and the service provides this information.
757+
* <p>
758+
* A {@link SongMetadata} is only provided if the information on the song does not match
759+
* the {@link StreamInfo}'s title and uploader or if the service explicitly provides
760+
* this information. Otherwise, the {@link StreamInfo}'s title and uploader are assumed
761+
* to be the song's title and artist.
762+
* </p>
763+
*
764+
* @return the song metadata or {@code null}
765+
*/
766+
@Nullable
767+
public SongMetadata getSongMetadata() {
768+
return songMetadata;
769+
}
770+
771+
public void setSongMetadata(final SongMetadata songMetadata) {
772+
this.songMetadata = songMetadata;
773+
}
746774
}

0 commit comments

Comments
 (0)