Skip to content

Commit 1ff529a

Browse files
Merge branch 'dev' into Refactor-date-parsing
# Conflicts: # extractor/src/main/java/org/schabi/newpipe/extractor/localization/DateWrapper.java
2 parents 8997187 + 8dfb0d3 commit 1ff529a

36 files changed

Lines changed: 2326 additions & 133 deletions

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ If you're using Gradle, you could add NewPipe Extractor as a dependency with the
2020
-dontwarn org.mozilla.javascript.tools.**
2121
```
2222

23-
**Note:** To use NewPipe Extractor in Android projects with a `minSdk` below 33, [core library desugaring](https://developer.android.com/studio/write/java8-support#library-desugaring) with the `desugar_jdk_libs_nio` artifact is required.
23+
> [!NOTE]
24+
> To use NewPipe Extractor in Android projects with a `minSdk` below 33, [core library desugaring](https://developer.android.com/studio/write/java8-support#library-desugaring) with the `desugar_jdk_libs_nio` artifact is required.
2425
2526
### Testing changes
2627

@@ -41,6 +42,8 @@ Another approach would be to use the local Maven repository, here's a gist of ho
4142
3. Run gradle's `ìnstall` task to deploy this library to your local repository (using the wrapper, present in the root of this project: `./gradlew install`)
4243
4. Change the dependency version used in your project to match the one you chose in step 2 (`implementation 'com.github.teamnewpipe:NewPipeExtractor:LOCAL_SNAPSHOT'`)
4344

45+
46+
> [!TIP]
4447
> Tip for Android Studio users: After you make changes and run the `install` task, use the menu option `File → "Sync with File System"` to refresh the library in your project.
4548
4649
## Supported sites

build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ allprojects {
2929
ext {
3030
nanojsonVersion = "e9d656ddb49a412a5a0a5d5ef20ca7ef09549996"
3131
jsr305Version = "3.0.2"
32-
junitVersion = "5.13.4"
33-
checkstyleVersion = "10.4"
32+
junitVersion = "5.14.0"
33+
checkstyleVersion = "10.26.1"
3434
}
3535
}
3636

extractor/build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ checkstyleTest {
2828

2929
ext {
3030
rhinoVersion = '1.8.0'
31-
protobufVersion = '4.32.0'
31+
protobufVersion = '4.32.1'
3232
}
3333

3434
dependencies {
@@ -51,7 +51,7 @@ dependencies {
5151
testImplementation 'org.junit.jupiter:junit-jupiter-params'
5252

5353
testImplementation "com.squareup.okhttp3:okhttp:4.12.0"
54-
testImplementation 'com.google.code.gson:gson:2.13.1'
54+
testImplementation 'com.google.code.gson:gson:2.13.2'
5555
}
5656

5757
protobuf {

extractor/src/main/java/org/schabi/newpipe/extractor/localization/DateWrapper.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ public boolean isApproximation() {
8282
return isApproximation;
8383
}
8484

85+
@Override
86+
public String toString() {
87+
return "DateWrapper{" +
88+
"instant=" + instant +
89+
", isApproximation=" + isApproximation +
90+
'}';
91+
}
92+
8593
public static DateWrapper fromOffsetDateTime(final String date) throws ParsingException {
8694
if (date == null) {
8795
return null;

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ public long getLength() {
119119

120120
@Override
121121
public long getTimeStamp() throws ParsingException {
122-
return getTimestampSeconds("(#t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)");
122+
final var timestamp = getTimestampSeconds("(#t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)");
123+
return timestamp == -2 ? 0 : timestamp;
123124
}
124125

125126
@Override
@@ -168,7 +169,7 @@ public List<AudioStream> getAudioStreams() throws ExtractionException {
168169

169170
try {
170171
final JsonArray transcodings = track.getObject("media")
171-
.getArray("transcodings");
172+
.getArray("transcodings");
172173
if (!isNullOrEmpty(transcodings)) {
173174
// Get information about what stream formats are available
174175
extractAudioStreams(transcodings, audioStreams);

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.schabi.newpipe.extractor.services.soundcloud.linkHandler;
22

3+
import java.util.regex.Pattern;
4+
35
import org.schabi.newpipe.extractor.exceptions.ParsingException;
46
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
57
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
@@ -9,11 +11,18 @@
911
public final class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory {
1012
private static final SoundcloudStreamLinkHandlerFactory INSTANCE
1113
= new SoundcloudStreamLinkHandlerFactory();
12-
private static final String URL_PATTERN = "^https?://(www\\.|m\\.|on\\.)?"
13-
+ "soundcloud.com/[0-9a-z_-]+"
14-
+ "/(?!(tracks|albums|sets|reposts|followers|following)/?$)[0-9a-z_-]+/?([#?].*)?$";
15-
private static final String API_URL_PATTERN = "^https?://api-v2\\.soundcloud.com"
16-
+ "/(tracks|albums|sets|reposts|followers|following)/([0-9a-z_-]+)/";
14+
15+
private static final Pattern URL_PATTERN = Pattern.compile(
16+
"^https?://(?:www\\.|m\\.|on\\.)?"
17+
+ "soundcloud.com/[0-9a-z_-]+"
18+
+ "/(?!(?:tracks|albums|sets|reposts|followers|following)/?$)[0-9a-z_-]+/?(?:[#?].*)?$"
19+
);
20+
21+
private static final Pattern API_URL_PATTERN = Pattern.compile(
22+
"^https?://api-v2\\.soundcloud.com"
23+
+ "/(tracks|albums|sets|reposts|followers|following)/([0-9a-z_-]+)/"
24+
);
25+
1726
private SoundcloudStreamLinkHandlerFactory() {
1827
}
1928

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
3636
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
3737
import org.schabi.newpipe.extractor.stream.StreamType;
38+
import org.schabi.newpipe.extractor.stream.ContentAvailability;
3839
import org.schabi.newpipe.extractor.utils.JsonUtils;
3940
import org.schabi.newpipe.extractor.utils.Parser;
4041
import org.schabi.newpipe.extractor.utils.Utils;
@@ -472,4 +473,33 @@ public boolean isShortFormContent() throws ParsingException {
472473
throw new ParsingException("Could not determine if this is short-form content", e);
473474
}
474475
}
476+
477+
private boolean isMembersOnly() throws ParsingException {
478+
return videoInfo.getArray("badges")
479+
.stream()
480+
.filter(JsonObject.class::isInstance)
481+
.map(JsonObject.class::cast)
482+
.map(badge -> badge.getObject("metadataBadgeRenderer").getString("style"))
483+
.anyMatch("BADGE_STYLE_TYPE_MEMBERS_ONLY"::equals);
484+
}
485+
486+
487+
@Nonnull
488+
@Override
489+
public ContentAvailability getContentAvailability() throws ParsingException {
490+
if (isPremiere()) {
491+
return ContentAvailability.UPCOMING;
492+
}
493+
494+
if (isMembersOnly()) {
495+
return ContentAvailability.MEMBERSHIP;
496+
}
497+
498+
if (isPremium()) {
499+
return ContentAvailability.PAID;
500+
}
501+
502+
return ContentAvailability.AVAILABLE;
503+
}
504+
475505
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemLockupExtractor.java

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
import org.schabi.newpipe.extractor.utils.JsonUtils;
1818
import org.schabi.newpipe.extractor.utils.Utils;
1919

20+
import java.time.LocalDateTime;
21+
import java.time.OffsetDateTime;
22+
import java.time.ZoneOffset;
23+
import java.time.format.DateTimeFormatter;
24+
import java.time.format.DateTimeParseException;
2025
import java.util.List;
2126
import java.util.Optional;
2227
import java.util.stream.Collectors;
@@ -30,20 +35,24 @@
3035
* The following features are currently not implemented because they have never been observed:
3136
* <ul>
3237
* <li>Shorts</li>
33-
* <li>Premieres</li>
3438
* <li>Paid content (Premium, members first or only)</li>
3539
* </ul>
3640
*/
3741
public class YoutubeStreamInfoItemLockupExtractor implements StreamInfoItemExtractor {
3842

3943
private static final String NO_VIEWS_LOWERCASE = "no views";
44+
// This approach is language dependant (en-GB)
45+
// Leading end space is voluntary included
46+
private static final String PREMIERES_TEXT = "Premieres ";
47+
private static final DateTimeFormatter PREMIERES_DATE_FORMATTER =
48+
DateTimeFormatter.ofPattern("dd/MM/yyyy, HH:mm");
4049

4150
private final JsonObject lockupViewModel;
4251
private final TimeAgoParser timeAgoParser;
4352

4453
private StreamType cachedStreamType;
4554
private String cachedName;
46-
private Optional<String> cachedTextualUploadDate;
55+
private Optional<String> cachedDateText;
4756

4857
private ChannelImageViewModel cachedChannelImageViewModel;
4958
private JsonArray cachedMetadataRows;
@@ -137,7 +146,9 @@ public String getName() throws ParsingException {
137146
@Override
138147
public long getDuration() throws ParsingException {
139148
// Duration cannot be extracted for live streams, but only for normal videos
140-
if (isLive()) {
149+
// Exact duration cannot be extracted for premieres, an approximation is only available in
150+
// accessibility context label
151+
if (isLive() || isPremiere()) {
141152
return -1;
142153
}
143154

@@ -237,20 +248,27 @@ public boolean isUploaderVerified() throws ParsingException {
237248
@Nullable
238249
@Override
239250
public String getTextualUploadDate() throws ParsingException {
240-
if (cachedTextualUploadDate != null) {
241-
return cachedTextualUploadDate.orElse(null);
242-
}
243-
244251
// Live streams have no upload date
245252
if (isLive()) {
246-
cachedTextualUploadDate = Optional.empty();
247253
return null;
248254
}
249255

250-
// This might be null e.g. for live streams
251-
this.cachedTextualUploadDate = metadataPart(1, 1)
252-
.map(this::getTextContentFromMetadataPart);
253-
return cachedTextualUploadDate.orElse(null);
256+
// Date string might be null e.g. for live streams
257+
final Optional<String> dateText = getDateText();
258+
259+
if (isPremiere()) {
260+
return getDateFromPremiere(dateText);
261+
}
262+
263+
return dateText.orElse(null);
264+
}
265+
266+
@Nullable
267+
private String getDateFromPremiere(final Optional<String> dateText) {
268+
// This approach is language dependent
269+
// Remove the premieres text from the upload date metadata part
270+
return dateText.map(str -> str.replace(PREMIERES_TEXT, ""))
271+
.orElse(null);
254272
}
255273

256274
@Nullable
@@ -265,11 +283,32 @@ public DateWrapper getUploadDate() throws ParsingException {
265283
if (textualUploadDate == null) {
266284
return null;
267285
}
286+
287+
if (isPremiere()) {
288+
final String premiereDate = getDateFromPremiere(getDateText());
289+
if (premiereDate == null) {
290+
throw new ParsingException("Could not get upload date from premiere");
291+
}
292+
293+
try {
294+
// As we request a UTC offset of 0 minutes, we get the UTC date
295+
return new DateWrapper(OffsetDateTime.of(LocalDateTime.parse(
296+
premiereDate, PREMIERES_DATE_FORMATTER), ZoneOffset.UTC));
297+
} catch (final DateTimeParseException e) {
298+
throw new ParsingException("Could not parse premiere upload date", e);
299+
}
300+
}
301+
268302
return timeAgoParser.parse(textualUploadDate);
269303
}
270304

271305
@Override
272306
public long getViewCount() throws ParsingException {
307+
if (isPremiere()) {
308+
// The number of people returned for premieres is the one currently waiting
309+
return -1;
310+
}
311+
273312
final Optional<String> optTextContent = metadataPart(1, 0)
274313
.map(this::getTextContentFromMetadataPart);
275314
// We could do this inline if the ParsingException would be a RuntimeException -.-
@@ -357,6 +396,20 @@ private boolean isLive() throws ParsingException {
357396
return getStreamType() != StreamType.VIDEO_STREAM;
358397
}
359398

399+
private Optional<String> getDateText() throws ParsingException {
400+
if (cachedDateText == null) {
401+
cachedDateText = metadataPart(1, 1)
402+
.map(this::getTextContentFromMetadataPart);
403+
}
404+
return cachedDateText;
405+
}
406+
407+
private boolean isPremiere() throws ParsingException {
408+
return getDateText().map(dateText -> dateText.contains(PREMIERES_TEXT))
409+
// If we can't get date text, assume it is not a premiere, it should be a livestream
410+
.orElse(false);
411+
}
412+
360413
abstract static class ChannelImageViewModel {
361414
protected JsonObject viewModel;
362415

0 commit comments

Comments
 (0)