Skip to content

Commit 4cae66f

Browse files
authored
Merge pull request #946 from chowder/dev
Add ability to identify short-form `StreamInfoItem`s
2 parents eb40bb8 + b1a899f commit 4cae66f

11 files changed

Lines changed: 1591 additions & 8 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
11
package org.schabi.newpipe.extractor.services.youtube.extractors;
22

3-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
4-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
5-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
6-
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
7-
83
import com.grack.nanojson.JsonArray;
94
import com.grack.nanojson.JsonObject;
10-
115
import org.schabi.newpipe.extractor.exceptions.ParsingException;
126
import org.schabi.newpipe.extractor.localization.DateWrapper;
137
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
@@ -18,12 +12,16 @@
1812
import org.schabi.newpipe.extractor.utils.JsonUtils;
1913
import org.schabi.newpipe.extractor.utils.Utils;
2014

15+
import javax.annotation.Nullable;
2116
import java.time.Instant;
2217
import java.time.OffsetDateTime;
2318
import java.time.ZoneOffset;
2419
import java.time.format.DateTimeFormatter;
2520

26-
import javax.annotation.Nullable;
21+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
22+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
23+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
24+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2725

2826
/*
2927
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@@ -324,4 +322,46 @@ public String getShortDescription() throws ParsingException {
324322

325323
return null;
326324
}
325+
326+
@Override
327+
public boolean isShortFormContent() throws ParsingException {
328+
try {
329+
final String webPageType = videoInfo.getObject("navigationEndpoint")
330+
.getObject("commandMetadata").getObject("webCommandMetadata")
331+
.getString("webPageType");
332+
333+
boolean isShort = !isNullOrEmpty(webPageType)
334+
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
335+
336+
if (!isShort) {
337+
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
338+
}
339+
340+
if (!isShort) {
341+
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
342+
.stream()
343+
.filter(JsonObject.class::isInstance)
344+
.map(JsonObject.class::cast)
345+
.filter(thumbnailOverlay -> thumbnailOverlay.has(
346+
"thumbnailOverlayTimeStatusRenderer"))
347+
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
348+
"thumbnailOverlayTimeStatusRenderer"))
349+
.findFirst()
350+
.orElse(null);
351+
352+
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
353+
isShort = thumbnailTimeOverlay.getString("style", "")
354+
.equalsIgnoreCase("SHORTS")
355+
|| thumbnailTimeOverlay.getObject("icon")
356+
.getString("iconType", "")
357+
.toLowerCase()
358+
.contains("shorts");
359+
}
360+
}
361+
362+
return isShort;
363+
} catch (final Exception e) {
364+
throw new ParsingException("Could not determine if this is short-form content", e);
365+
}
366+
}
327367
}

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,20 @@ public List<MetaInfo> getMetaInfo() throws ParsingException {
554554
return Collections.emptyList();
555555
}
556556

557+
/**
558+
* Whether the stream is a short-form content.
559+
*
560+
* <p>
561+
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
562+
* videos.
563+
* </p>
564+
*
565+
* @return whether the stream is a short-form content
566+
*/
567+
public boolean isShortFormContent() throws ParsingException {
568+
return false;
569+
}
570+
557571
public enum Privacy {
558572
PUBLIC,
559573
UNLISTED,

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,11 @@ private static void extractOptionalData(final StreamInfo streamInfo,
342342
} catch (final Exception e) {
343343
streamInfo.addError(e);
344344
}
345+
try {
346+
streamInfo.setShortFormContent(extractor.isShortFormContent());
347+
} catch (final Exception e) {
348+
streamInfo.addError(e);
349+
}
345350

346351
streamInfo.setRelatedItems(ExtractorHelper.getRelatedItemsOrLogError(streamInfo,
347352
extractor));
@@ -389,6 +394,7 @@ private static void extractOptionalData(final StreamInfo streamInfo,
389394
private List<String> tags = new ArrayList<>();
390395
private List<StreamSegment> streamSegments = new ArrayList<>();
391396
private List<MetaInfo> metaInfo = new ArrayList<>();
397+
private boolean shortFormContent = false;
392398

393399
/**
394400
* Preview frames, e.g. for the storyboard / seekbar thumbnail preview
@@ -724,4 +730,12 @@ public void setPreviewFrames(final List<Frameset> previewFrames) {
724730
public List<MetaInfo> getMetaInfo() {
725731
return this.metaInfo;
726732
}
733+
734+
public boolean isShortFormContent() {
735+
return shortFormContent;
736+
}
737+
738+
public void setShortFormContent(final boolean isShortFormContent) {
739+
this.shortFormContent = isShortFormContent;
740+
}
727741
}

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItem.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
4242
private String uploaderUrl = null;
4343
private String uploaderAvatarUrl = null;
4444
private boolean uploaderVerified = false;
45+
private boolean shortFormContent = false;
4546

4647
public StreamInfoItem(final int serviceId,
4748
final String url,
@@ -130,6 +131,14 @@ public void setUploaderVerified(final boolean uploaderVerified) {
130131
this.uploaderVerified = uploaderVerified;
131132
}
132133

134+
public boolean isShortFormContent() {
135+
return shortFormContent;
136+
}
137+
138+
public void setShortFormContent(final boolean shortFormContent) {
139+
this.shortFormContent = shortFormContent;
140+
}
141+
133142
@Override
134143
public String toString() {
135144
return "StreamInfoItem{"

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemExtractor.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,4 +127,18 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
127127
default String getShortDescription() throws ParsingException {
128128
return null;
129129
}
130+
131+
/**
132+
* Whether the stream is a short-form content.
133+
*
134+
* <p>
135+
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
136+
* videos.
137+
* </p>
138+
*
139+
* @return whether the stream is a short-form content
140+
*/
141+
default boolean isShortFormContent() throws ParsingException {
142+
return false;
143+
}
130144
}

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ public StreamInfoItem extract(final StreamInfoItemExtractor extractor) throws Pa
9898
} catch (final Exception e) {
9999
addError(e);
100100
}
101+
try {
102+
resultItem.setShortFormContent(extractor.isShortFormContent());
103+
} catch (final Exception e) {
104+
addError(e);
105+
}
101106

102107
return resultItem;
103108
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorTest.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
3131

3232
import java.io.IOException;
33+
import java.io.UnsupportedEncodingException;
3334
import java.net.MalformedURLException;
3435
import java.net.URL;
35-
import java.util.ArrayList;
36+
import java.net.URLEncoder;
3637
import java.util.Collections;
3738
import java.util.List;
3839

@@ -379,4 +380,45 @@ public void testVideoDescription() throws IOException, ExtractionException {
379380
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
380381
}
381382
}
383+
384+
public static class ShortFormContent extends DefaultSearchExtractorTest {
385+
private static SearchExtractor extractor;
386+
private static final String QUERY = "#shorts";
387+
388+
@BeforeAll
389+
public static void setUp() throws Exception {
390+
YoutubeTestsUtils.ensureStateless();
391+
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
392+
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
393+
extractor.fetchPage();
394+
}
395+
396+
private String getUrlEncodedQuery() {
397+
try {
398+
return URLEncoder.encode(QUERY, "UTF-8");
399+
} catch (UnsupportedEncodingException e) {
400+
throw new RuntimeException(e);
401+
}
402+
}
403+
404+
@Override public SearchExtractor extractor() { return extractor; }
405+
@Override public StreamingService expectedService() { return YouTube; }
406+
@Override public String expectedName() { return QUERY; }
407+
@Override public String expectedId() { return QUERY; }
408+
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
409+
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
410+
@Override public String expectedSearchString() { return QUERY; }
411+
@Nullable @Override public String expectedSearchSuggestion() { return null; }
412+
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
413+
414+
@Test
415+
void testShortFormContent() throws IOException, ExtractionException {
416+
assertTrue(extractor.getInitialPage()
417+
.getItems()
418+
.stream()
419+
.filter(StreamInfoItem.class::isInstance)
420+
.map(StreamInfoItem.class::cast)
421+
.anyMatch(StreamInfoItem::isShortFormContent));
422+
}
423+
}
382424
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"request": {
3+
"httpMethod": "GET",
4+
"url": "https://www.youtube.com/sw.js",
5+
"headers": {
6+
"Origin": [
7+
"https://www.youtube.com"
8+
],
9+
"Referer": [
10+
"https://www.youtube.com"
11+
],
12+
"Accept-Language": [
13+
"en-GB, en;q\u003d0.9"
14+
]
15+
},
16+
"localization": {
17+
"languageCode": "en",
18+
"countryCode": "GB"
19+
}
20+
},
21+
"response": {
22+
"responseCode": 200,
23+
"responseMessage": "",
24+
"responseHeaders": {
25+
"access-control-allow-credentials": [
26+
"true"
27+
],
28+
"access-control-allow-origin": [
29+
"https://www.youtube.com"
30+
],
31+
"alt-svc": [
32+
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
33+
],
34+
"cache-control": [
35+
"private, max-age\u003d0"
36+
],
37+
"content-type": [
38+
"text/javascript; charset\u003dutf-8"
39+
],
40+
"cross-origin-opener-policy-report-only": [
41+
"same-origin; report-to\u003d\"youtube_main\""
42+
],
43+
"date": [
44+
"Tue, 18 Oct 2022 22:13:02 GMT"
45+
],
46+
"expires": [
47+
"Tue, 18 Oct 2022 22:13:02 GMT"
48+
],
49+
"p3p": [
50+
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
51+
],
52+
"permissions-policy": [
53+
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
54+
],
55+
"report-to": [
56+
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
57+
],
58+
"server": [
59+
"ESF"
60+
],
61+
"set-cookie": [
62+
"YSC\u003dtvxMn34iTRM; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
63+
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dWed, 22-Jan-2020 22:13:02 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
64+
"CONSENT\u003dPENDING+265; expires\u003dThu, 17-Oct-2024 22:13:02 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
65+
],
66+
"strict-transport-security": [
67+
"max-age\u003d31536000"
68+
],
69+
"x-content-type-options": [
70+
"nosniff"
71+
],
72+
"x-frame-options": [
73+
"SAMEORIGIN"
74+
],
75+
"x-xss-protection": [
76+
"0"
77+
]
78+
},
79+
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
80+
"latestUrl": "https://www.youtube.com/sw.js"
81+
}
82+
}

0 commit comments

Comments
 (0)