Skip to content

Commit 62b81c3

Browse files
authored
Merge pull request #222 from mauriciocolli/feed-extractor
Introduce FeedExtractor to fetch from dedicated feeds when available
2 parents 033a9fb + 26234a1 commit 62b81c3

11 files changed

Lines changed: 372 additions & 28 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
88
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
99
import org.schabi.newpipe.extractor.exceptions.ParsingException;
10+
import org.schabi.newpipe.extractor.feed.FeedExtractor;
1011
import org.schabi.newpipe.extractor.kiosk.KioskList;
1112
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
1213
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
@@ -24,6 +25,8 @@
2425
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
2526
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
2627

28+
import javax.annotation.Nullable;
29+
2730
/*
2831
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
2932
* StreamingService.java is part of NewPipe.
@@ -65,7 +68,7 @@ public ServiceInfo(String name, List<MediaCapability> mediaCapabilities) {
6568
public String getName() {
6669
return name;
6770
}
68-
71+
6972
public List<MediaCapability> getMediaCapabilities() {
7073
return mediaCapabilities;
7174
}
@@ -116,7 +119,7 @@ public ServiceInfo getServiceInfo() {
116119
public String toString() {
117120
return serviceId + ":" + serviceInfo.getName();
118121
}
119-
122+
120123
public abstract String getBaseUrl();
121124

122125
/*//////////////////////////////////////////////////////////////////////////
@@ -173,6 +176,19 @@ public String toString() {
173176
*/
174177
public abstract SubscriptionExtractor getSubscriptionExtractor();
175178

179+
/**
180+
* This method decides which strategy will be chosen to fetch the feed. In YouTube, for example, a separate feed
181+
* exists which is lightweight and made specifically to be used like this.
182+
* <p>
183+
* In services which there's no other way to retrieve them, null should be returned.
184+
*
185+
* @return a {@link FeedExtractor} instance or null.
186+
*/
187+
@Nullable
188+
public FeedExtractor getFeedExtractor(String url) throws ExtractionException {
189+
return null;
190+
}
191+
176192
/**
177193
* Must create a new instance of a KioskList implementation.
178194
* @return a new KioskList instance
@@ -258,7 +274,7 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
258274
}
259275
return getCommentsExtractor(llhf.fromUrl(url));
260276
}
261-
277+
262278
/*//////////////////////////////////////////////////////////////////////////
263279
// Utils
264280
//////////////////////////////////////////////////////////////////////////*/

extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
import org.schabi.newpipe.extractor.NewPipe;
66
import org.schabi.newpipe.extractor.StreamingService;
77
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
8-
import org.schabi.newpipe.extractor.exceptions.ParsingException;
98
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
10-
import org.schabi.newpipe.extractor.localization.Localization;
119
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
1210
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
1311

@@ -35,8 +33,8 @@
3533

3634
public class ChannelInfo extends ListInfo<StreamInfoItem> {
3735

38-
public ChannelInfo(int serviceId, ListLinkHandler linkHandler, String name) throws ParsingException {
39-
super(serviceId, linkHandler, name);
36+
public ChannelInfo(int serviceId, String id, String url, String originalUrl, String name, ListLinkHandler listLinkHandler) {
37+
super(serviceId, id, url, originalUrl, name, listLinkHandler.getContentFilters(), listLinkHandler.getSortFilter());
4038
}
4139

4240
public static ChannelInfo getInfo(String url) throws IOException, ExtractionException {
@@ -57,15 +55,14 @@ public static InfoItemsPage<StreamInfoItem> getMoreItems(StreamingService servic
5755

5856
public static ChannelInfo getInfo(ChannelExtractor extractor) throws IOException, ExtractionException {
5957

60-
ChannelInfo info = new ChannelInfo(extractor.getServiceId(),
61-
extractor.getLinkHandler(),
62-
extractor.getName());
58+
final int serviceId = extractor.getServiceId();
59+
final String id = extractor.getId();
60+
final String url = extractor.getUrl();
61+
final String originalUrl = extractor.getOriginalUrl();
62+
final String name = extractor.getName();
63+
64+
final ChannelInfo info = new ChannelInfo(serviceId, id, url, originalUrl, name, extractor.getLinkHandler());
6365

64-
try {
65-
info.setOriginalUrl(extractor.getOriginalUrl());
66-
} catch (Exception e) {
67-
info.addError(e);
68-
}
6966
try {
7067
info.setAvatarUrl(extractor.getAvatarUrl());
7168
} catch (Exception e) {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.schabi.newpipe.extractor.feed;
2+
3+
import org.schabi.newpipe.extractor.ListExtractor;
4+
import org.schabi.newpipe.extractor.StreamingService;
5+
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
6+
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
7+
8+
/**
9+
* This class helps to extract items from lightweight feeds that the services may provide.
10+
* <p>
11+
* YouTube is an example of a service that has this alternative available.
12+
*/
13+
public abstract class FeedExtractor extends ListExtractor<StreamInfoItem> {
14+
public FeedExtractor(StreamingService service, ListLinkHandler listLinkHandler) {
15+
super(service, listLinkHandler);
16+
}
17+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.schabi.newpipe.extractor.feed;
2+
3+
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
4+
import org.schabi.newpipe.extractor.ListInfo;
5+
import org.schabi.newpipe.extractor.NewPipe;
6+
import org.schabi.newpipe.extractor.StreamingService;
7+
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
8+
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
9+
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
10+
11+
import java.io.IOException;
12+
import java.util.List;
13+
14+
public class FeedInfo extends ListInfo<StreamInfoItem> {
15+
16+
public FeedInfo(int serviceId, String id, String url, String originalUrl, String name, List<String> contentFilter, String sortFilter) {
17+
super(serviceId, id, url, originalUrl, name, contentFilter, sortFilter);
18+
}
19+
20+
public static FeedInfo getInfo(String url) throws IOException, ExtractionException {
21+
return getInfo(NewPipe.getServiceByUrl(url), url);
22+
}
23+
24+
public static FeedInfo getInfo(StreamingService service, String url) throws IOException, ExtractionException {
25+
final FeedExtractor extractor = service.getFeedExtractor(url);
26+
27+
if (extractor == null) {
28+
throw new IllegalArgumentException("Service \"" + service.getServiceInfo().getName() + "\" doesn't support FeedExtractor.");
29+
}
30+
31+
extractor.fetchPage();
32+
return getInfo(extractor);
33+
}
34+
35+
public static FeedInfo getInfo(FeedExtractor extractor) throws IOException, ExtractionException {
36+
extractor.fetchPage();
37+
38+
final int serviceId = extractor.getServiceId();
39+
final String id = extractor.getId();
40+
final String url = extractor.getUrl();
41+
final String originalUrl = extractor.getOriginalUrl();
42+
final String name = extractor.getName();
43+
44+
final FeedInfo info = new FeedInfo(serviceId, id, url, originalUrl, name, null, null);
45+
46+
final InfoItemsPage<StreamInfoItem> itemsPage = ExtractorHelper.getItemsPageOrLogError(info, extractor);
47+
info.setRelatedItems(itemsPage.getItems());
48+
info.setNextPageUrl(itemsPage.getNextPageUrl());
49+
50+
return info;
51+
}
52+
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
1313
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
1414
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
15+
import org.schabi.newpipe.extractor.feed.FeedExtractor;
1516
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
1617
import org.schabi.newpipe.extractor.kiosk.KioskList;
1718
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
@@ -24,14 +25,7 @@
2425
import org.schabi.newpipe.extractor.localization.Localization;
2526
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
2627
import org.schabi.newpipe.extractor.search.SearchExtractor;
27-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor;
28-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor;
29-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor;
30-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
31-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
32-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor;
33-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor;
34-
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor;
28+
import org.schabi.newpipe.extractor.services.youtube.extractors.*;
3529
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
3630
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory;
3731
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
@@ -42,6 +36,8 @@
4236
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
4337
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
4438

39+
import javax.annotation.Nonnull;
40+
4541
/*
4642
* Created by Christian Schabesberger on 23.08.15.
4743
*
@@ -72,7 +68,7 @@ public YoutubeService(int id) {
7268
public String getBaseUrl() {
7369
return "https://youtube.com";
7470
}
75-
71+
7672
@Override
7773
public LinkHandlerFactory getStreamLHFactory() {
7874
return YoutubeStreamLinkHandlerFactory.getInstance();
@@ -147,6 +143,12 @@ public SubscriptionExtractor getSubscriptionExtractor() {
147143
return new YoutubeSubscriptionExtractor(this);
148144
}
149145

146+
@Nonnull
147+
@Override
148+
public FeedExtractor getFeedExtractor(final String channelUrl) throws ExtractionException {
149+
return new YoutubeFeedExtractor(this, getChannelLHFactory().fromUrl(channelUrl));
150+
}
151+
150152
@Override
151153
public ListLinkHandlerFactory getCommentsLHFactory() {
152154
return YoutubeCommentsLinkHandlerFactory.getInstance();

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
@SuppressWarnings("WeakerAccess")
4747
public class YoutubeChannelExtractor extends ChannelExtractor {
4848
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
49-
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
5049
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
5150

5251
private Document doc;
@@ -130,7 +129,7 @@ public String getBannerUrl() throws ParsingException {
130129
@Override
131130
public String getFeedUrl() throws ParsingException {
132131
try {
133-
return CHANNEL_FEED_BASE + getId();
132+
return YoutubeParsingHelper.getFeedUrlFrom(getId());
134133
} catch (Exception e) {
135134
throw new ParsingException("Could not get feed url", e);
136135
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package org.schabi.newpipe.extractor.services.youtube.extractors;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
import org.jsoup.nodes.Element;
6+
import org.jsoup.select.Elements;
7+
import org.schabi.newpipe.extractor.ListExtractor;
8+
import org.schabi.newpipe.extractor.StreamingService;
9+
import org.schabi.newpipe.extractor.downloader.Downloader;
10+
import org.schabi.newpipe.extractor.downloader.Response;
11+
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
12+
import org.schabi.newpipe.extractor.feed.FeedExtractor;
13+
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
14+
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
15+
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
16+
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
17+
18+
import javax.annotation.Nonnull;
19+
import java.io.IOException;
20+
21+
public class YoutubeFeedExtractor extends FeedExtractor {
22+
public YoutubeFeedExtractor(StreamingService service, ListLinkHandler linkHandler) {
23+
super(service, linkHandler);
24+
}
25+
26+
private Document document;
27+
28+
@Override
29+
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
30+
final String channelIdOrUser = getLinkHandler().getId();
31+
final String feedUrl = YoutubeParsingHelper.getFeedUrlFrom(channelIdOrUser);
32+
33+
final Response response = downloader.get(feedUrl);
34+
document = Jsoup.parse(response.responseBody());
35+
}
36+
37+
@Nonnull
38+
@Override
39+
public ListExtractor.InfoItemsPage<StreamInfoItem> getInitialPage() {
40+
final Elements entries = document.select("feed > entry");
41+
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
42+
43+
for (Element entryElement : entries) {
44+
collector.commit(new YoutubeFeedInfoItemExtractor(entryElement));
45+
}
46+
47+
return new InfoItemsPage<>(collector, null);
48+
}
49+
50+
@Nonnull
51+
@Override
52+
public String getId() {
53+
return document.getElementsByTag("yt:channelId").first().text();
54+
}
55+
56+
@Nonnull
57+
@Override
58+
public String getUrl() {
59+
return document.select("feed > author > uri").first().text();
60+
}
61+
62+
@Nonnull
63+
@Override
64+
public String getName() {
65+
return document.select("feed > author > name").first().text();
66+
}
67+
68+
@Override
69+
public String getNextPageUrl() {
70+
return null;
71+
}
72+
73+
@Override
74+
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) {
75+
return null;
76+
}
77+
78+
@Override
79+
public boolean hasNextPage() {
80+
return false;
81+
}
82+
}

0 commit comments

Comments
 (0)