Skip to content

Commit 596bce2

Browse files
authored
Merge pull request #1221 from AudricV/yt_support-new-shorts-ui-data
[YouTube] Fix extraction of Shorts in channels and remove visitor data usage
2 parents 6e3a4a6 + f9ffdd9 commit 596bce2

7 files changed

Lines changed: 1491 additions & 1349 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42,30 +42,14 @@
4242
*/
4343
public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
4444

45-
/**
46-
* Whether the visitor data extracted from the initial channel response is required to be used
47-
* for continuations.
48-
*
49-
* <p>
50-
* A valid {@code visitorData} is required to get continuations of shorts in channels.
51-
* </p>
52-
*
53-
* <p>
54-
* It should be not used when it is not needed, in order to reduce YouTube's tracking.
55-
* </p>
56-
*/
57-
private final boolean useVisitorData;
5845
private JsonObject jsonResponse;
5946
private String channelId;
60-
@Nullable
61-
private String visitorData;
6247
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
6348
protected Optional<YoutubeChannelHelper.ChannelHeader> channelHeader;
6449

6550
public YoutubeChannelTabExtractor(final StreamingService service,
6651
final ListLinkHandler linkHandler) {
6752
super(service, linkHandler);
68-
useVisitorData = getName().equals(ChannelTabs.SHORTS);
6953
}
7054

7155
@Nonnull
@@ -100,9 +84,6 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException
10084
jsonResponse = data.jsonResponse;
10185
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
10286
channelId = data.channelId;
103-
if (useVisitorData) {
104-
visitorData = jsonResponse.getObject("responseContext").getString("visitorData");
105-
}
10687
}
10788

10889
@Nonnull
@@ -176,10 +157,8 @@ public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionEx
176157
channelName, channelUrl)
177158
.orElse(null);
178159

179-
final Page nextPage = getNextPageFrom(continuation,
180-
useVisitorData && !isNullOrEmpty(visitorData)
181-
? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData)
182-
: List.of(channelName, channelUrl, verifiedStatus.toString()));
160+
final Page nextPage = getNextPageFrom(
161+
continuation, List.of(channelName, channelUrl, verifiedStatus.toString()));
183162

184163
return new InfoItemsPage<>(collector, nextPage);
185164
}
@@ -299,6 +278,9 @@ private Optional<JsonObject> collectItem(@Nonnull final MultiInfoItemsCollector
299278
} else if (richItem.has("reelItemRenderer")) {
300279
commitReel(collector, richItem.getObject("reelItemRenderer"),
301280
channelVerifiedStatus, channelName, channelUrl);
281+
} else if (richItem.has("shortsLockupViewModel")) {
282+
commitShortsLockup(collector, richItem.getObject("shortsLockupViewModel"),
283+
channelVerifiedStatus, channelName, channelUrl);
302284
} else if (richItem.has("playlistRenderer")) {
303285
commitPlaylist(collector, richItem.getObject("playlistRenderer"),
304286
channelVerifiedStatus, channelName, channelUrl);
@@ -356,6 +338,30 @@ public boolean isUploaderVerified() {
356338
});
357339
}
358340

341+
private static void commitShortsLockup(@Nonnull final MultiInfoItemsCollector collector,
342+
@Nonnull final JsonObject shortsLockupViewModel,
343+
@Nonnull final VerifiedStatus channelVerifiedStatus,
344+
@Nullable final String channelName,
345+
@Nullable final String channelUrl) {
346+
collector.commit(
347+
new YoutubeShortsLockupInfoItemExtractor(shortsLockupViewModel) {
348+
@Override
349+
public String getUploaderName() throws ParsingException {
350+
return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName;
351+
}
352+
353+
@Override
354+
public String getUploaderUrl() throws ParsingException {
355+
return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl;
356+
}
357+
358+
@Override
359+
public boolean isUploaderVerified() {
360+
return channelVerifiedStatus == VerifiedStatus.VERIFIED;
361+
}
362+
});
363+
}
364+
359365
private void commitVideo(@Nonnull final MultiInfoItemsCollector collector,
360366
@Nonnull final TimeAgoParser timeAgoParser,
361367
@Nonnull final JsonObject jsonObject,
@@ -434,8 +440,7 @@ private Page getNextPageFrom(final JsonObject continuations,
434440
.getString("token");
435441

436442
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
437-
getExtractorContentCountry(),
438-
useVisitorData && channelIds.size() >= 3 ? channelIds.get(2) : null)
443+
getExtractorContentCountry())
439444
.value("continuation", continuation)
440445
.done())
441446
.getBytes(StandardCharsets.UTF_8);

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeReelInfoItemExtractor.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,19 @@
2020
import javax.annotation.Nullable;
2121

2222
/**
23-
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderers}.
23+
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderer}s.
2424
*
2525
* <p>
26-
* {@code reelItemRenderers} are returned on YouTube for their short-form contents on almost every
26+
* {@code reelItemRenderer}s were returned on YouTube for their short-form contents on almost every
2727
* place and every major client. They provide a limited amount of information and do not provide
2828
* the exact view count, any uploader info (name, URL, avatar, verified status) and the upload date.
2929
* </p>
30+
*
31+
* <p>
32+
* At the time this documentation has been updated, they are being replaced by
33+
* {@code shortsLockupViewModel}s. See {@link YoutubeShortsLockupInfoItemExtractor} for an
34+
* extractor for this new UI data type.
35+
* </p>
3036
*/
3137
public class YoutubeReelInfoItemExtractor implements StreamInfoItemExtractor {
3238

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package org.schabi.newpipe.extractor.services.youtube.extractors;
2+
3+
import com.grack.nanojson.JsonObject;
4+
import org.schabi.newpipe.extractor.Image;
5+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
6+
import org.schabi.newpipe.extractor.localization.DateWrapper;
7+
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
8+
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
9+
import org.schabi.newpipe.extractor.stream.StreamType;
10+
import org.schabi.newpipe.extractor.utils.Utils;
11+
12+
import javax.annotation.Nonnull;
13+
import javax.annotation.Nullable;
14+
15+
import java.util.List;
16+
17+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailsFromInfoItem;
18+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
19+
20+
/**
21+
* A {@link StreamInfoItemExtractor} for YouTube's {@code shortsLockupViewModel}s.
22+
*
23+
* <p>
24+
* {@code shortsLockupViewModel}s are returned on YouTube for their short-form contents on almost
25+
* every place and every major client. They provide a limited amount of information and do not
26+
* provide the exact view count, any uploader info (name, URL, avatar, verified status) and the
27+
* upload date.
28+
* </p>
29+
*
30+
* <p>
31+
* At the time this documentation has been written, this data UI type is not fully used (rolled
32+
* out), so {@code reelItemRenderer}s are also returned. See {@link YoutubeReelInfoItemExtractor}
33+
* for an extractor for this UI data type.
34+
* </p>
35+
*/
36+
public class YoutubeShortsLockupInfoItemExtractor implements StreamInfoItemExtractor {
37+
38+
@Nonnull
39+
private final JsonObject shortsLockupViewModel;
40+
41+
public YoutubeShortsLockupInfoItemExtractor(@Nonnull final JsonObject shortsLockupViewModel) {
42+
this.shortsLockupViewModel = shortsLockupViewModel;
43+
}
44+
45+
@Override
46+
public String getName() throws ParsingException {
47+
return shortsLockupViewModel.getObject("overlayMetadata")
48+
.getObject("primaryText")
49+
.getString("content");
50+
}
51+
52+
@Override
53+
public String getUrl() throws ParsingException {
54+
String videoId = shortsLockupViewModel.getObject("onTap")
55+
.getObject("innertubeCommand")
56+
.getObject("reelWatchEndpoint")
57+
.getString("videoId");
58+
59+
if (isNullOrEmpty(videoId)) {
60+
videoId = shortsLockupViewModel.getObject("inlinePlayerData")
61+
.getObject("onVisible")
62+
.getObject("innertubeCommand")
63+
.getObject("watchEndpoint")
64+
.getString("videoId");
65+
}
66+
67+
if (isNullOrEmpty(videoId)) {
68+
throw new ParsingException("Could not get video ID");
69+
}
70+
71+
try {
72+
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
73+
} catch (final Exception e) {
74+
throw new ParsingException("Could not get URL", e);
75+
}
76+
}
77+
78+
@Nonnull
79+
@Override
80+
public List<Image> getThumbnails() throws ParsingException {
81+
return getThumbnailsFromInfoItem(shortsLockupViewModel.getObject("thumbnail")
82+
.getObject("sources"));
83+
}
84+
85+
@Override
86+
public StreamType getStreamType() throws ParsingException {
87+
return StreamType.VIDEO_STREAM;
88+
}
89+
90+
@Override
91+
public long getViewCount() throws ParsingException {
92+
final String viewCountText = shortsLockupViewModel.getObject("overlayMetadata")
93+
.getObject("secondaryText")
94+
.getString("content");
95+
if (!isNullOrEmpty(viewCountText)) {
96+
// This approach is language dependent
97+
if (viewCountText.toLowerCase().contains("no views")) {
98+
return 0;
99+
}
100+
101+
return Utils.mixedNumberWordToLong(viewCountText);
102+
}
103+
104+
throw new ParsingException("Could not get short view count");
105+
}
106+
107+
@Override
108+
public boolean isShortFormContent() {
109+
return true;
110+
}
111+
112+
// All the following properties cannot be obtained from shortsLockupViewModels
113+
114+
@Override
115+
public boolean isAd() throws ParsingException {
116+
return false;
117+
}
118+
119+
@Override
120+
public long getDuration() throws ParsingException {
121+
return -1;
122+
}
123+
124+
@Override
125+
public String getUploaderName() throws ParsingException {
126+
return null;
127+
}
128+
129+
@Override
130+
public String getUploaderUrl() throws ParsingException {
131+
return null;
132+
}
133+
134+
@Override
135+
public boolean isUploaderVerified() throws ParsingException {
136+
return false;
137+
}
138+
139+
@Nullable
140+
@Override
141+
public String getTextualUploadDate() throws ParsingException {
142+
return null;
143+
}
144+
145+
@Nullable
146+
@Override
147+
public DateWrapper getUploadDate() throws ParsingException {
148+
return null;
149+
}
150+
}

extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/channelTabs/shorts/generated_mock_0.json

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
"httpMethod": "GET",
44
"url": "https://www.youtube.com/sw.js",
55
"headers": {
6-
"Origin": [
6+
"Referer": [
77
"https://www.youtube.com"
88
],
9-
"Referer": [
9+
"Origin": [
1010
"https://www.youtube.com"
1111
],
1212
"Accept-Language": [
@@ -34,17 +34,20 @@
3434
"cache-control": [
3535
"private, max-age\u003d0"
3636
],
37+
"content-security-policy": [
38+
"require-trusted-types-for \u0027script\u0027"
39+
],
3740
"content-type": [
3841
"text/javascript; charset\u003dutf-8"
3942
],
4043
"cross-origin-opener-policy": [
4144
"same-origin; report-to\u003d\"youtube_main\""
4245
],
4346
"date": [
44-
"Wed, 24 Jul 2024 17:37:25 GMT"
47+
"Sun, 08 Sep 2024 15:45:44 GMT"
4548
],
4649
"expires": [
47-
"Wed, 24 Jul 2024 17:37:25 GMT"
50+
"Sun, 08 Sep 2024 15:45:44 GMT"
4851
],
4952
"origin-trial": [
5053
"AmhMBR6zCLzDDxpW+HfpP67BqwIknWnyMOXOQGfzYswFmJe+fgaI6XZgAzcxOrzNtP7hEDsOo1jdjFnVr2IdxQ4AAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTc1ODA2NzE5OSwiaXNTdWJkb21haW4iOnRydWV9"
@@ -62,8 +65,8 @@
6265
"ESF"
6366
],
6467
"set-cookie": [
65-
"YSC\u003dQqImeZ_ECz4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
66-
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 28-Oct-2021 17:37:25 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
68+
"YSC\u003dbsHskp20CKw; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
69+
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 13-Dec-2021 15:45:44 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
6770
],
6871
"strict-transport-security": [
6972
"max-age\u003d31536000"

extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/channelTabs/shorts/generated_mock_1.json

Lines changed: 9 additions & 6 deletions
Large diffs are not rendered by default.

extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/channelTabs/shorts/generated_mock_2.json

Lines changed: 9 additions & 9 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)