Skip to content

Commit 180836c

Browse files
wojcik-onlinemauriciocolli
authored andcommitted
Base Implementation: Parse the upload date of StreamInfoItems
In the format '2 days ago' (in English) on a YouTube channel page. (Parser extensible to other pages.)
1 parent 514ed7b commit 180836c

16 files changed

Lines changed: 316 additions & 44 deletions

extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
1818
import org.schabi.newpipe.extractor.search.SearchExtractor;
1919
import org.schabi.newpipe.extractor.stream.StreamExtractor;
20+
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
2021
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
2122
import org.schabi.newpipe.extractor.utils.Localization;
2223

@@ -222,15 +223,15 @@ public SuggestionExtractor getSuggestionExtractor() {
222223
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
223224
return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization());
224225
}
225-
226+
226227
public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException {
227228
return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization());
228229
}
229230

230231
public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException {
231232
return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization());
232233
}
233-
234+
234235
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
235236
return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization());
236237
}
@@ -287,7 +288,7 @@ public PlaylistExtractor getPlaylistExtractor(String url) throws ExtractionExcep
287288
public StreamExtractor getStreamExtractor(String url) throws ExtractionException {
288289
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
289290
}
290-
291+
291292
public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
292293
ListLinkHandlerFactory llhf = getCommentsLHFactory();
293294
if(null == llhf) {
@@ -296,6 +297,9 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
296297
return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization());
297298
}
298299

300+
public TimeAgoParser getTimeAgoParser() {
301+
return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES);
302+
}
299303

300304
/**
301305
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,23 +79,22 @@ static boolean checkIfHardcodedClientIdIsValid(Downloader dl) throws IOException
7979
return dl.head(apiUrl).getResponseCode() == 200;
8080
}
8181

82-
public static String toDateString(String time) throws ParsingException {
82+
static Date parseDate(String time) throws ParsingException {
8383
try {
84-
Date date;
85-
// Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'.
84+
return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
85+
} catch (ParseException e1) {
8686
try {
87-
date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
88-
} catch (Exception e) {
89-
date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
87+
return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
88+
} catch (ParseException e2) {
89+
throw new ParsingException(e1.getMessage(), e2);
9090
}
91-
92-
SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd");
93-
return newDateFormat.format(date);
94-
} catch (ParseException e) {
95-
throw new ParsingException(e.getMessage(), e);
9691
}
9792
}
9893

94+
static String toTextualDate(String time) throws ParsingException {
95+
return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time));
96+
}
97+
9998
/**
10099
* Call the endpoint "/resolve" of the api.<p>
101100
*

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public String getName() {
5151
@Nonnull
5252
@Override
5353
public String getUploadDate() throws ParsingException {
54-
return SoundcloudParsingHelper.toDateString(track.getString("created_at"));
54+
return SoundcloudParsingHelper.toTextualDate(track.getString("created_at"));
5555
}
5656

5757
@Nonnull

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamInfoItemExtractor.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
66
import org.schabi.newpipe.extractor.stream.StreamType;
77

8+
import java.util.Calendar;
9+
810
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
911

1012
public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor {
@@ -41,8 +43,19 @@ public String getUploaderUrl() {
4143
}
4244

4345
@Override
44-
public String getUploadDate() throws ParsingException {
45-
return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at"));
46+
public String getTextualUploadDate() throws ParsingException {
47+
return SoundcloudParsingHelper.toTextualDate(getCreatedAt());
48+
}
49+
50+
@Override
51+
public Calendar getUploadDate() throws ParsingException {
52+
Calendar uploadTime = Calendar.getInstance();
53+
uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt()));
54+
return uploadTime;
55+
}
56+
57+
private String getCreatedAt() {
58+
return itemObject.getString("created_at");
4659
}
4760

4861
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
1919
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
2020
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
21+
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
2122
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
2223
import org.schabi.newpipe.extractor.utils.Localization;
2324
import org.schabi.newpipe.extractor.utils.Parser;
@@ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
5354
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
5455
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
5556

57+
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
58+
5659
private Document doc;
5760

5861
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
@@ -230,7 +233,7 @@ private void collectStreamsFrom(StreamInfoItemsCollector collector, Element elem
230233
final String uploaderUrl = getUrl();
231234
for (final Element li : element.children()) {
232235
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
233-
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
236+
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
234237
@Override
235238
public String getUrl() throws ParsingException {
236239
try {

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
1919
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
2020
import org.schabi.newpipe.extractor.stream.StreamType;
21+
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
2122
import org.schabi.newpipe.extractor.utils.Localization;
2223
import org.schabi.newpipe.extractor.utils.Utils;
2324

@@ -28,6 +29,8 @@
2829
@SuppressWarnings("WeakerAccess")
2930
public class YoutubePlaylistExtractor extends PlaylistExtractor {
3031

32+
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
33+
3134
private Document doc;
3235

3336
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
@@ -192,7 +195,7 @@ private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nu
192195
continue;
193196
}
194197

195-
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
198+
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
196199
public Element uploaderLink;
197200

198201
@Override
@@ -258,7 +261,7 @@ public String getUploaderUrl() throws ParsingException {
258261
}
259262

260263
@Override
261-
public String getUploadDate() throws ParsingException {
264+
public String getTextualUploadDate() throws ParsingException {
262265
return "";
263266
}
264267

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.schabi.newpipe.extractor.StreamingService;
1010
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1111
import org.schabi.newpipe.extractor.exceptions.ParsingException;
12+
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
1213
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
1314
import org.schabi.newpipe.extractor.search.SearchExtractor;
1415
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
@@ -129,7 +130,7 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE
129130

130131
// video item type
131132
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
132-
collector.commit(new YoutubeStreamInfoItemExtractor(el));
133+
collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser()));
133134
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
134135
collector.commit(new YoutubeChannelInfoItemExtractor(el));
135136
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public class SubtitlesException extends ContentNotAvailableException {
7575

7676
/*//////////////////////////////////////////////////////////////////////////*/
7777

78+
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
79+
7880
private Document doc;
7981
@Nullable
8082
private JsonObject playerArgs;
@@ -932,7 +934,7 @@ private Map<String, ItagItem> getItags(String streamingDataKey, ItagItem.ItagTyp
932934
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
933935
*/
934936
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
935-
return new YoutubeStreamInfoItemExtractor(li) {
937+
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
936938

937939
@Override
938940
public String getUrl() throws ParsingException {
@@ -959,7 +961,7 @@ public String getUploaderUrl() throws ParsingException {
959961
}
960962

961963
@Override
962-
public String getUploadDate() throws ParsingException {
964+
public String getTextualUploadDate() throws ParsingException {
963965
return "";
964966
}
965967

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
package org.schabi.newpipe.extractor.services.youtube.extractors;
22

33
import org.jsoup.nodes.Element;
4+
import org.jsoup.select.Elements;
45
import org.schabi.newpipe.extractor.exceptions.ParsingException;
56
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
67
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
78
import org.schabi.newpipe.extractor.stream.StreamType;
9+
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
810
import org.schabi.newpipe.extractor.utils.Utils;
911

12+
import javax.annotation.Nullable;
13+
import java.util.Calendar;
14+
1015
/*
1116
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
1217
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
@@ -28,9 +33,18 @@
2833
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
2934

3035
private final Element item;
36+
private final TimeAgoParser timeAgoParser;
37+
38+
private String cachedUploadDate;
3139

32-
public YoutubeStreamInfoItemExtractor(Element item) {
40+
/**
41+
* Creates an extractor of StreamInfoItems from a YouTube page.
42+
* @param item The page element
43+
* @param timeAgoParser A parser of the textual dates or {@code null}.
44+
*/
45+
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
3346
this.item = item;
47+
this.timeAgoParser = timeAgoParser;
3448
}
3549

3650
@Override
@@ -126,20 +140,35 @@ public String getUploaderUrl() throws ParsingException {
126140
}
127141

128142
@Override
129-
public String getUploadDate() throws ParsingException {
143+
public String getTextualUploadDate() throws ParsingException {
144+
if (cachedUploadDate != null) {
145+
return cachedUploadDate;
146+
}
147+
130148
try {
131149
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
132150
if (meta == null) return "";
133151

134-
Element li = meta.select("li").first();
135-
if(li == null) return "";
152+
final Elements li = meta.select("li");
153+
if (li.isEmpty()) return "";
136154

137-
return meta.select("li").first().text();
155+
return cachedUploadDate = li.first().text();
138156
} catch (Exception e) {
139157
throw new ParsingException("Could not get upload date", e);
140158
}
141159
}
142160

161+
@Override
162+
public Calendar getUploadDate() throws ParsingException {
163+
String textualUploadDate = getTextualUploadDate();
164+
if (timeAgoParser != null
165+
&& textualUploadDate != null && !"".equals(textualUploadDate)) {
166+
return timeAgoParser.parse(textualUploadDate);
167+
} else {
168+
return null;
169+
}
170+
}
171+
143172
@Override
144173
public long getViewCount() throws ParsingException {
145174
String input;

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,15 @@
3535
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
3636
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
3737
import org.schabi.newpipe.extractor.utils.Localization;
38+
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
3839

3940
import javax.annotation.Nonnull;
4041
import java.io.IOException;
4142

4243
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
4344

45+
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
46+
4447
private Document doc;
4548

4649
public YoutubeTrendingExtractor(StreamingService service,
@@ -93,7 +96,7 @@ public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
9396
for(Element ul : uls) {
9497
for(final Element li : ul.children()) {
9598
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
96-
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
99+
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
97100
@Override
98101
public String getUrl() throws ParsingException {
99102
try {

0 commit comments

Comments
 (0)