Skip to content

Commit c47cc54

Browse files
B0polStypox
authored andcommitted
Extract metadata for YouTube, SoundCloud & MediaCCC
1 parent f71cfd4 commit c47cc54

22 files changed

Lines changed: 440 additions & 100 deletions

extractor/src/main/java/org/schabi/newpipe/extractor/localization/DateWrapper.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.schabi.newpipe.extractor.localization;
22

3-
import edu.umd.cs.findbugs.annotations.NonNull;
43

4+
import javax.annotation.Nonnull;
55
import java.io.Serializable;
66
import java.time.OffsetDateTime;
77
import java.time.ZoneOffset;
@@ -12,49 +12,49 @@
1212
* A wrapper class that provides a field to describe if the date/time is precise or just an approximation.
1313
*/
1414
public class DateWrapper implements Serializable {
15-
@NonNull private final OffsetDateTime offsetDateTime;
15+
@Nonnull
16+
private final OffsetDateTime offsetDateTime;
1617
private final boolean isApproximation;
1718

1819
/**
1920
* @deprecated Use {@link #DateWrapper(OffsetDateTime)} instead.
2021
*/
2122
@Deprecated
22-
public DateWrapper(@NonNull Calendar calendar) {
23+
public DateWrapper(@Nonnull Calendar calendar) {
2324
this(calendar, false);
2425
}
2526

2627
/**
2728
* @deprecated Use {@link #DateWrapper(OffsetDateTime, boolean)} instead.
2829
*/
2930
@Deprecated
30-
public DateWrapper(@NonNull Calendar calendar, boolean isApproximation) {
31+
public DateWrapper(@Nonnull Calendar calendar, boolean isApproximation) {
3132
this(OffsetDateTime.ofInstant(calendar.toInstant(), ZoneOffset.UTC), isApproximation);
3233
}
3334

34-
public DateWrapper(@NonNull OffsetDateTime offsetDateTime) {
35+
public DateWrapper(@Nonnull OffsetDateTime offsetDateTime) {
3536
this(offsetDateTime, false);
3637
}
3738

38-
public DateWrapper(@NonNull OffsetDateTime offsetDateTime, boolean isApproximation) {
39+
public DateWrapper(@Nonnull OffsetDateTime offsetDateTime, boolean isApproximation) {
3940
this.offsetDateTime = offsetDateTime.withOffsetSameInstant(ZoneOffset.UTC);
4041
this.isApproximation = isApproximation;
4142
}
4243

4344
/**
4445
* @return the wrapped date/time as a {@link Calendar}.
45-
*
4646
* @deprecated use {@link #offsetDateTime()} instead.
4747
*/
4848
@Deprecated
49-
@NonNull
49+
@Nonnull
5050
public Calendar date() {
5151
return GregorianCalendar.from(offsetDateTime.toZonedDateTime());
5252
}
5353

5454
/**
5555
* @return the wrapped date/time.
5656
*/
57-
@NonNull
57+
@Nonnull
5858
public OffsetDateTime offsetDateTime() {
5959
return offsetDateTime;
6060
}

extractor/src/main/java/org/schabi/newpipe/extractor/localization/Localization.java

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
package org.schabi.newpipe.extractor.localization;
22

3+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
4+
35
import javax.annotation.Nonnull;
46
import javax.annotation.Nullable;
57
import java.io.Serializable;
6-
import java.util.ArrayList;
7-
import java.util.Collections;
8-
import java.util.List;
9-
import java.util.Locale;
10-
import java.util.Objects;
8+
import java.util.*;
119

1210
public class Localization implements Serializable {
1311
public static final Localization DEFAULT = new Localization("en", "GB");
1412

15-
@Nonnull private final String languageCode;
16-
@Nullable private final String countryCode;
13+
@Nonnull
14+
private final String languageCode;
15+
@Nullable
16+
private final String countryCode;
1717

1818
/**
1919
* @param localizationCodeList a list of localization code, formatted like {@link #getLocalizationCode()}
@@ -100,4 +100,25 @@ public int hashCode() {
100100
result = 31 * result + Objects.hashCode(countryCode);
101101
return result;
102102
}
103+
104+
/**
105+
* Converts a three letter language code (ISO 639-2/T) to a Locale
106+
* in the limit of Java Locale class.
107+
*
108+
* @param code a three letter language code
109+
* @return the Locale corresponding
110+
*/
111+
public static Locale getLocaleFromThreeLetterCode(@Nonnull String code) throws ParsingException {
112+
String[] languages = Locale.getISOLanguages();
113+
Map<String, Locale> localeMap = new HashMap<>(languages.length);
114+
for (String language : languages) {
115+
final Locale locale = new Locale(language);
116+
localeMap.put(locale.getISO3Language(), locale);
117+
}
118+
if (localeMap.containsKey(code)) {
119+
return localeMap.get(code);
120+
} else {
121+
throw new ParsingException("Could not get Locale from this three letter language code" + code);
122+
}
123+
}
103124
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampRadioStreamExtractor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,10 @@ public String getCategory() {
140140
public List<String> getTags() {
141141
return Collections.emptyList();
142142
}
143+
144+
@Nonnull
145+
@Override
146+
public Privacy getPrivacy() {
147+
return Privacy.PUBLIC;
148+
}
143149
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ public String getHost() {
262262

263263
@Nonnull
264264
@Override
265-
public String getPrivacy() {
266-
return "";
265+
public Privacy getPrivacy() {
266+
return Privacy.PUBLIC;
267267
}
268268

269269
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCLiveStreamExtractor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,8 @@ public String getHost() throws ParsingException {
257257

258258
@Nonnull
259259
@Override
260-
public String getPrivacy() {
261-
return "Public";
260+
public Privacy getPrivacy() {
261+
return Privacy.PUBLIC;
262262
}
263263

264264
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCStreamExtractor.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,19 @@
1212
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1313
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
1414
import org.schabi.newpipe.extractor.localization.DateWrapper;
15+
import org.schabi.newpipe.extractor.localization.Localization;
1516
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferenceLinkHandlerFactory;
1617
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCStreamLinkHandlerFactory;
1718
import org.schabi.newpipe.extractor.stream.*;
19+
import org.schabi.newpipe.extractor.utils.JsonUtils;
1820

1921
import javax.annotation.Nonnull;
2022
import javax.annotation.Nullable;
2123
import java.io.IOException;
22-
import java.util.*;
24+
import java.util.ArrayList;
25+
import java.util.Collections;
26+
import java.util.List;
27+
import java.util.Locale;
2328

2429
public class MediaCCCStreamExtractor extends StreamExtractor {
2530
private JsonObject data;
@@ -256,8 +261,8 @@ public String getHost() {
256261

257262
@Nonnull
258263
@Override
259-
public String getPrivacy() {
260-
return "";
264+
public Privacy getPrivacy() {
265+
return Privacy.PUBLIC;
261266
}
262267

263268
@Nonnull
@@ -273,14 +278,14 @@ public String getLicence() {
273278
}
274279

275280
@Override
276-
public Locale getLanguageInfo() {
277-
return null;
281+
public Locale getLanguageInfo() throws ParsingException {
282+
return Localization.getLocaleFromThreeLetterCode(data.getString("original_language"));
278283
}
279284

280285
@Nonnull
281286
@Override
282287
public List<String> getTags() {
283-
return Arrays.asList(data.getArray("tags").toArray(new String[0]));
288+
return JsonUtils.getStringListFromJsonArray(data.getArray("tags"));
284289
}
285290

286291
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,7 @@ public StreamInfoItemsCollector getRelatedStreams() throws IOException, Extracti
286286
@Nonnull
287287
@Override
288288
public List<String> getTags() {
289-
try {
290-
return (List) JsonUtils.getArray(json, "tags");
291-
} catch (Exception e) {
292-
return Collections.emptyList();
293-
}
289+
return JsonUtils.getStringListFromJsonArray(json.getArray("tags"));
294290
}
295291

296292
@Nonnull
@@ -428,8 +424,19 @@ public String getHost() throws ParsingException {
428424

429425
@Nonnull
430426
@Override
431-
public String getPrivacy() throws ParsingException {
432-
return JsonUtils.getString(json, "privacy.label");
427+
public Privacy getPrivacy() {
428+
switch (json.getObject("privacy").getInt("id")) {
429+
case 1:
430+
return Privacy.PUBLIC;
431+
case 2:
432+
return Privacy.UNLISTED;
433+
case 3:
434+
return Privacy.PRIVATE;
435+
case 4:
436+
return Privacy.INTERNAL;
437+
default:
438+
return null;
439+
}
433440
}
434441

435442
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -374,22 +374,21 @@ public String getHost() {
374374
return "";
375375
}
376376

377-
@Nonnull
378377
@Override
379-
public String getPrivacy() {
380-
return "";
378+
public Privacy getPrivacy() {
379+
return track.getString("sharing").equals("public") ? Privacy.PUBLIC : Privacy.PRIVATE;
381380
}
382381

383382
@Nonnull
384383
@Override
385384
public String getCategory() {
386-
return "";
385+
return track.getString("genre");
387386
}
388387

389388
@Nonnull
390389
@Override
391390
public String getLicence() {
392-
return "";
391+
return track.getString("license");
393392
}
394393

395394
@Override
@@ -400,7 +399,29 @@ public Locale getLanguageInfo() {
400399
@Nonnull
401400
@Override
402401
public List<String> getTags() {
403-
return Collections.emptyList();
402+
// tags are separated by spaces, but they can be multiple words escaped by quotes "
403+
final String[] tag_list = track.getString("tag_list").split(" ");
404+
final List<String> tags = new ArrayList<>();
405+
String escapedTag = "";
406+
boolean isEscaped = false;
407+
for (int i = 0; i < tag_list.length; i++) {
408+
String tag = tag_list[i];
409+
if (tag.startsWith("\"")) {
410+
escapedTag += tag_list[i].replace("\"", "");
411+
isEscaped = true;
412+
} else if (isEscaped) {
413+
if (tag.endsWith("\"")) {
414+
escapedTag += " " + tag.replace("\"", "");
415+
isEscaped = false;
416+
tags.add(escapedTag);
417+
} else {
418+
escapedTag += " " + tag;
419+
}
420+
} else if (!tag.isEmpty()){
421+
tags.add(tag);
422+
}
423+
}
424+
return tags;
404425
}
405426

406427
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
3535
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
3636
import org.schabi.newpipe.extractor.stream.*;
37+
import org.schabi.newpipe.extractor.utils.JsonUtils;
3738
import org.schabi.newpipe.extractor.utils.Parser;
3839
import org.schabi.newpipe.extractor.utils.Utils;
3940

@@ -214,7 +215,7 @@ public Description getDescription() throws ParsingException {
214215
// description with more info on links
215216
try {
216217
String description = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("description"), true);
217-
if (description != null && !description.isEmpty()) return new Description(description, Description.HTML);
218+
if (!isNullOrEmpty(description)) return new Description(description, Description.HTML);
218219
} catch (final ParsingException ignored) {
219220
// age-restricted videos cause a ParsingException here
220221
}
@@ -1107,20 +1108,32 @@ public String getHost() {
11071108

11081109
@Nonnull
11091110
@Override
1110-
public String getPrivacy() {
1111-
return "";
1111+
public Privacy getPrivacy() {
1112+
boolean isUnlisted = playerResponse
1113+
.getObject("microformat")
1114+
.getObject("playerMicroformatRenderer")
1115+
.getBoolean("isUnlisted");
1116+
return isUnlisted ? Privacy.UNLISTED : Privacy.PUBLIC;
11121117
}
11131118

11141119
@Nonnull
11151120
@Override
11161121
public String getCategory() {
1117-
return "";
1122+
return playerResponse.getObject("microformat")
1123+
.getObject("playerMicroformatRenderer")
1124+
.getString("category");
11181125
}
11191126

11201127
@Nonnull
11211128
@Override
1122-
public String getLicence() {
1123-
return "";
1129+
public String getLicence() throws ParsingException {
1130+
final JsonObject metadataRowRenderer = getVideoSecondaryInfoRenderer()
1131+
.getObject("metadataRowContainer").getObject("metadataRowContainerRenderer").getArray("rows")
1132+
.getObject(0).getObject("metadataRowRenderer");
1133+
1134+
final JsonArray contents = metadataRowRenderer.getArray("contents");
1135+
final String license = getTextFromObject(contents.getObject(0));
1136+
return license != null && "Licence".equals(getTextFromObject(metadataRowRenderer.getObject("title"))) ? license : "YouTube licence";
11241137
}
11251138

11261139
@Override
@@ -1131,7 +1144,7 @@ public Locale getLanguageInfo() {
11311144
@Nonnull
11321145
@Override
11331146
public List<String> getTags() {
1134-
return Collections.emptyList();
1147+
return JsonUtils.getStringListFromJsonArray(playerResponse.getObject("videoDetails").getArray("keywords"));
11351148
}
11361149

11371150
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,7 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
428428
* @return the privacy of the stream or an empty String.
429429
* @throws ParsingException
430430
*/
431-
@Nonnull
432-
public abstract String getPrivacy() throws ParsingException;
431+
public abstract Privacy getPrivacy() throws ParsingException;
433432

434433
/**
435434
* The name of the category of the stream.
@@ -467,7 +466,7 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
467466
* The list of tags of the stream.
468467
* If the tag list is not available you can simply return an empty list.
469468
*
470-
* @return the list of tags of the stream or an empty list.
469+
* @return the list of tags of the stream or Collections.emptyList().
471470
* @throws ParsingException
472471
*/
473472
@Nonnull
@@ -510,4 +509,10 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
510509
*/
511510
@Nonnull
512511
public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
512+
public enum Privacy {
513+
PUBLIC,
514+
UNLISTED,
515+
PRIVATE,
516+
INTERNAL
517+
}
513518
}

0 commit comments

Comments
 (0)