Skip to content

Commit dc03909

Browse files
committed
feat: yt featured channels extractor
1 parent a6e57cd commit dc03909

2 files changed

Lines changed: 262 additions & 1 deletion

File tree

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
package org.schabi.newpipe.extractor.services.youtube.extractors;
2+
3+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelResponse;
4+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.resolveChannelId;
5+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
6+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
7+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
8+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
9+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
10+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
11+
12+
import com.grack.nanojson.JsonArray;
13+
import com.grack.nanojson.JsonObject;
14+
import com.grack.nanojson.JsonWriter;
15+
16+
import org.schabi.newpipe.extractor.Page;
17+
import org.schabi.newpipe.extractor.StreamingService;
18+
import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
19+
import org.schabi.newpipe.extractor.channel.ChannelInfoItemsCollector;
20+
import org.schabi.newpipe.extractor.channel.list.ChannelListExtractor;
21+
import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs;
22+
import org.schabi.newpipe.extractor.downloader.Downloader;
23+
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
24+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
25+
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
26+
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper;
27+
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
28+
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelTabLinkHandlerFactory;
29+
30+
import java.io.IOException;
31+
import java.nio.charset.StandardCharsets;
32+
import java.util.List;
33+
import java.util.Optional;
34+
35+
import javax.annotation.Nonnull;
36+
import javax.annotation.Nullable;
37+
38+
public class YoutubeFeaturedChannelListExtractor extends ChannelListExtractor {
39+
40+
private YoutubeChannelHelper.ChannelHeader channelHeader;
41+
private JsonObject jsonResponse;
42+
43+
private JsonObject jsonRendererListData;
44+
45+
private String channelId;
46+
47+
private final int rendererListIndex;
48+
49+
public YoutubeFeaturedChannelListExtractor(final StreamingService service,
50+
final ListLinkHandler linkHandler)
51+
throws ExtractionException {
52+
super(service, linkHandler);
53+
54+
final Optional<String> rendererlist = linkHandler.getContentFilters().stream()
55+
.filter(filter -> filter.matches("^rendererlist_index=(\\d+)$"))
56+
.findFirst();
57+
58+
if (rendererlist.isEmpty()) {
59+
throw new ExtractionException("content filter for featured channels "
60+
+ "must contain rendererlist_index={index}"
61+
+ " to extract featured channels from featured tab");
62+
}
63+
64+
this.rendererListIndex = YoutubeParsingHelper
65+
.parseRendererListIndexParam(rendererlist.get());
66+
}
67+
68+
@Nonnull
69+
@Override
70+
public String getUrl() throws ParsingException {
71+
try {
72+
return YoutubeChannelTabLinkHandlerFactory.getInstance()
73+
.getUrl("channel/" + getId(), List.of(ChannelTabs.FEATURED), "");
74+
} catch (final ParsingException e) {
75+
return super.getUrl();
76+
}
77+
}
78+
79+
@Nonnull
80+
@Override
81+
public String getId() throws ParsingException {
82+
return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId);
83+
}
84+
85+
@Override
86+
public void onFetchPage(@Nonnull final Downloader downloader)
87+
throws IOException, ExtractionException {
88+
final String channelIdFromId = resolveChannelId(super.getId());
89+
90+
final YoutubeChannelHelper.ChannelResponseData data = getChannelResponse(
91+
channelIdFromId,
92+
"EghmZWF0dXJlZPIGBAoCMgA%3D",
93+
getExtractorLocalization(),
94+
getExtractorContentCountry());
95+
96+
jsonResponse = data.jsonResponse;
97+
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
98+
channelId = data.channelId;
99+
jsonRendererListData = YoutubeParsingHelper
100+
.getRendererListData(this.jsonResponse, this.rendererListIndex);
101+
102+
if (!this.jsonRendererListData.getObject("shelfRenderer")
103+
.getObject("content")
104+
.getObject("horizontalListRenderer")
105+
.getArray("items")
106+
.getObject(0)
107+
.has("gridChannelRenderer")) {
108+
throw new ExtractionException("rendererlist index does not point to featured channels");
109+
}
110+
}
111+
112+
@Nonnull
113+
@Override
114+
public InfoItemsPage<ChannelInfoItem> getInitialPage() throws IOException, ExtractionException {
115+
final JsonObject continuation = this.jsonRendererListData
116+
.getObject("shelfRenderer")
117+
.getObject("endpoint")
118+
.getObject("showEngagementPanelEndpoint")
119+
.getObject("engagementPanel")
120+
.getObject("engagementPanelSectionListRenderer")
121+
.getObject("content")
122+
.getObject("sectionListRenderer")
123+
.getArray("contents")
124+
.getObject(0)
125+
.getObject("itemSectionRenderer")
126+
.getArray("contents")
127+
.getObject(0)
128+
.getObject("continuationItemRenderer");
129+
130+
final VerifiedStatus verifiedStatus;
131+
if (channelHeader == null) {
132+
verifiedStatus = VerifiedStatus.UNKNOWN;
133+
} else {
134+
verifiedStatus = YoutubeChannelHelper
135+
.isChannelVerified(channelHeader)
136+
? VerifiedStatus.VERIFIED
137+
: VerifiedStatus.UNVERIFIED;
138+
}
139+
140+
final Page firstPage = getNextPageFrom(
141+
continuation, List.of(getChannelName(),
142+
getLinkHandler().getOriginalUrl(),
143+
verifiedStatus.toString()));
144+
145+
return getPage(firstPage);
146+
}
147+
148+
@Override
149+
public InfoItemsPage<ChannelInfoItem> getPage(final Page page)
150+
throws IOException, ExtractionException {
151+
if (page == null || isNullOrEmpty(page.getUrl())) {
152+
throw new IllegalArgumentException("Page doesn't contain an URL");
153+
}
154+
155+
final List<String> channelIds = page.getIds();
156+
157+
final ChannelInfoItemsCollector collector = new ChannelInfoItemsCollector(getServiceId());
158+
159+
final JsonObject ajaxJson = getJsonPostResponse("browse", page.getBody(),
160+
getExtractorLocalization());
161+
162+
final JsonObject gridRendererContinuation = ajaxJson.getArray("onResponseReceivedEndpoints")
163+
.getObject(0)
164+
.getObject("appendContinuationItemsAction")
165+
.getArray("continuationItems")
166+
.getObject(0)
167+
.getObject("gridRenderer");
168+
169+
final JsonObject continuation = collectItemsFrom(collector,
170+
gridRendererContinuation.getArray("items"))
171+
.orElse(null);
172+
173+
return new InfoItemsPage<>(collector, getNextPageFrom(continuation, channelIds));
174+
}
175+
176+
private Optional<JsonObject> collectItemsFrom(
177+
@Nonnull final ChannelInfoItemsCollector collector,
178+
@Nonnull final JsonArray items) {
179+
return items.stream()
180+
.filter(JsonObject.class::isInstance)
181+
.map(JsonObject.class::cast)
182+
.map(item ->
183+
collectItem(collector, item)
184+
)
185+
.reduce(Optional.empty(), (c1, c2) -> c1.or(() -> c2));
186+
}
187+
188+
private Optional<JsonObject> collectItem(@Nonnull final ChannelInfoItemsCollector collector,
189+
@Nonnull final JsonObject item) {
190+
if (item.has("gridChannelRenderer")) {
191+
commitFeaturedChannel(collector, item.getObject("gridChannelRenderer"));
192+
} else if (item.has("continuationItemRenderer")) {
193+
return Optional.ofNullable(item.getObject("continuationItemRenderer"));
194+
}
195+
196+
return Optional.empty();
197+
}
198+
199+
public void commitFeaturedChannel(final ChannelInfoItemsCollector collector,
200+
final JsonObject featuredChannelInfoItem) {
201+
collector.commit(new YoutubeChannelInfoItemExtractor(featuredChannelInfoItem));
202+
}
203+
204+
@Nullable
205+
private Page getNextPageFrom(final JsonObject continuations,
206+
final List<String> channelIds) throws IOException,
207+
ExtractionException {
208+
if (isNullOrEmpty(continuations)) {
209+
return null;
210+
}
211+
212+
final JsonObject continuationEndpoint = continuations.getObject("continuationEndpoint");
213+
final String continuation = continuationEndpoint.getObject("continuationCommand")
214+
.getString("token");
215+
216+
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
217+
getExtractorContentCountry())
218+
.value("continuation", continuation)
219+
.done())
220+
.getBytes(StandardCharsets.UTF_8);
221+
222+
return new Page(YOUTUBEI_V1_URL + "browse?" + DISABLE_PRETTY_PRINT_PARAMETER, null,
223+
channelIds, null, body);
224+
}
225+
226+
protected String getChannelName() throws ParsingException {
227+
return YoutubeChannelHelper.getChannelName(channelHeader,
228+
YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse),
229+
jsonResponse);
230+
}
231+
232+
@Nonnull
233+
@Override
234+
public String getName() throws ParsingException {
235+
try {
236+
final JsonObject title = this.jsonRendererListData.getObject("shelfRenderer")
237+
.getObject("title");
238+
final String name = getTextFromObject(title);
239+
240+
if (name == null) {
241+
return "";
242+
}
243+
244+
return name;
245+
} catch (final Exception e) {
246+
throw new ParsingException("Could not get name", e);
247+
}
248+
}
249+
250+
/**
251+
* Enum representing the verified state of a channel
252+
*/
253+
private enum VerifiedStatus {
254+
VERIFIED,
255+
UNVERIFIED,
256+
UNKNOWN
257+
}
258+
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ public static long mixedNumberWordToLong(final String numberWord)
8686
throws NumberFormatException, ParsingException {
8787
String multiplier = "";
8888
try {
89-
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb])+", numberWord, 2);
89+
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb])*", numberWord, 2);
90+
if (multiplier == null) {
91+
multiplier = "";
92+
}
9093
} catch (final ParsingException ignored) {
9194
}
9295
final double count = Double.parseDouble(

0 commit comments

Comments
 (0)