Skip to content

Commit f13c028

Browse files
wb9688TobiGr
authored andcommitted
Reimplement some methods in YoutubeStreamExtractor
1 parent 02b5990 commit f13c028

1 file changed

Lines changed: 52 additions & 169 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 52 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import com.grack.nanojson.JsonObject;
55
import com.grack.nanojson.JsonParser;
66

7-
import org.jsoup.Jsoup;
87
import org.jsoup.nodes.Document;
98
import org.jsoup.nodes.Element;
109
import org.mozilla.javascript.Context;
@@ -39,17 +38,13 @@
3938

4039
import java.io.IOException;
4140
import java.io.UnsupportedEncodingException;
42-
import java.net.MalformedURLException;
43-
import java.net.URL;
4441
import java.util.ArrayList;
4542
import java.util.Collections;
4643
import java.util.HashMap;
4744
import java.util.LinkedHashMap;
4845
import java.util.List;
4946
import java.util.Locale;
5047
import java.util.Map;
51-
import java.util.regex.Matcher;
52-
import java.util.regex.Pattern;
5348

5449
import javax.annotation.Nonnull;
5550
import javax.annotation.Nullable;
@@ -75,8 +70,6 @@
7570
*/
7671

7772
public class YoutubeStreamExtractor extends StreamExtractor {
78-
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
79-
8073
/*//////////////////////////////////////////////////////////////////////////
8174
// Exceptions
8275
//////////////////////////////////////////////////////////////////////////*/
@@ -87,12 +80,6 @@ public class DecryptException extends ParsingException {
8780
}
8881
}
8982

90-
public class SubtitlesException extends ContentNotAvailableException {
91-
SubtitlesException(String message, Throwable cause) {
92-
super(message, cause);
93-
}
94-
}
95-
9683
/*//////////////////////////////////////////////////////////////////////////*/
9784

9885
private Document doc;
@@ -120,22 +107,17 @@ public YoutubeStreamExtractor(StreamingService service, LinkHandler linkHandler)
120107
@Override
121108
public String getName() throws ParsingException {
122109
assertPageFetched();
110+
String title = null;
123111
try {
124-
return playerResponse.getObject("videoDetails").getString("title");
125-
126-
} catch (Exception e) {
127-
// fallback HTML method
128-
String name = null;
112+
title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
113+
} catch (Exception ignored) {}
114+
if (title == null) {
129115
try {
130-
name = doc.select("meta[name=title]").attr(CONTENT);
131-
} catch (Exception ignored) {
132-
}
133-
134-
if (name == null) {
135-
throw new ParsingException("Could not get name", e);
136-
}
137-
return name;
116+
title = playerResponse.getObject("videoDetails").getString("title");
117+
} catch (Exception ignored) {}
138118
}
119+
if (title != null) return title;
120+
throw new ParsingException("Could not get name");
139121
}
140122

141123
@Override
@@ -144,19 +126,12 @@ public String getTextualUploadDate() throws ParsingException {
144126
return null;
145127
}
146128

129+
// TODO: try videoPrimaryInfoRenderer.dateText.simpleText
130+
147131
try {
148132
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
149133
} catch (Exception e) {
150-
String uploadDate = null;
151-
try {
152-
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
153-
} catch (Exception ignored) {
154-
}
155-
156-
if (uploadDate == null) {
157-
throw new ParsingException("Could not get upload date", e);
158-
}
159-
return uploadDate;
134+
throw new ParsingException("Could not get upload date");
160135
}
161136
}
162137

@@ -181,15 +156,7 @@ public String getThumbnailUrl() throws ParsingException {
181156
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
182157

183158
} catch (Exception e) {
184-
String url = null;
185-
try {
186-
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
187-
} catch (Exception ignored) {}
188-
189-
if (url == null) {
190-
throw new ParsingException("Could not get thumbnail url", e);
191-
}
192-
return url;
159+
throw new ParsingException("Could not get thumbnail url");
193160
}
194161

195162
}
@@ -198,93 +165,19 @@ public String getThumbnailUrl() throws ParsingException {
198165
@Override
199166
public Description getDescription() throws ParsingException {
200167
assertPageFetched();
168+
// TODO: Parse videoSecondaryInfoRenderer.description
201169
try {
202-
// first try to get html-formatted description
203-
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML);
204-
} catch (Exception e) {
205-
try {
206-
// fallback to raw non-html description
207-
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
208-
} catch (Exception ignored) {
209-
throw new ParsingException("Could not get the description", e);
210-
}
211-
}
212-
}
213-
214-
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
215-
// :00 is NOT recognized as a timestamp in description or comments.
216-
// 0:00 is recognized in both description and comments.
217-
// https://www.youtube.com/watch?v=4cccfDXu1vA
218-
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
219-
"seekTo\\("
220-
+ "(?:(\\d+)\\*3600\\+)?" // hours?
221-
+ "(\\d+)\\*60\\+" // minutes
222-
+ "(\\d+)" // seconds
223-
+ "\\)");
224-
225-
@SafeVarargs
226-
private static <T> T coalesce(T... args) {
227-
for (T arg : args) {
228-
if (arg != null) return arg;
170+
// raw non-html description
171+
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
172+
} catch (Exception ignored) {
173+
throw new ParsingException("Could not get the description");
229174
}
230-
throw new IllegalArgumentException("all arguments to coalesce() were null");
231-
}
232-
233-
private String parseHtmlAndGetFullLinks(String descriptionHtml)
234-
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
235-
final Document description = Jsoup.parse(descriptionHtml, getUrl());
236-
for (Element a : description.select("a")) {
237-
final String rawUrl = a.attr("abs:href");
238-
final URL redirectLink = new URL(rawUrl);
239-
240-
final Matcher onClickTimestamp;
241-
final String queryString;
242-
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
243-
.find()) {
244-
a.removeAttr("onclick");
245-
246-
String hours = coalesce(onClickTimestamp.group(1), "0");
247-
String minutes = onClickTimestamp.group(2);
248-
String seconds = onClickTimestamp.group(3);
249-
250-
int timestamp = 0;
251-
timestamp += Integer.parseInt(hours) * 3600;
252-
timestamp += Integer.parseInt(minutes) * 60;
253-
timestamp += Integer.parseInt(seconds);
254-
255-
String setTimestamp = "&t=" + timestamp;
256-
257-
// Even after clicking https://youtu.be/...?t=6,
258-
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
259-
a.attr("href", getUrl() + setTimestamp);
260-
261-
} else if ((queryString = redirectLink.getQuery()) != null) {
262-
// if the query string is null we are not dealing with a redirect link,
263-
// so we don't need to override it.
264-
final String link =
265-
Parser.compatParseMap(queryString).get("q");
266-
267-
if (link != null) {
268-
// if link is null the a tag is a hashtag.
269-
// They refer to the youtube search. We do not handle them.
270-
a.text(link);
271-
a.attr("href", link);
272-
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
273-
a.text(redirectLink.toString());
274-
a.attr("href", redirectLink.toString());
275-
}
276-
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
277-
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
278-
a.text(redirectLink.toString());
279-
a.attr("href", redirectLink.toString());
280-
}
281-
}
282-
return description.select("body").first().html();
283175
}
284176

285177
@Override
286178
public int getAgeLimit() throws ParsingException {
287179
assertPageFetched();
180+
// TODO: Find new way to get age limit
288181
if (!isAgeRestricted) {
289182
return NO_AGE_LIMIT;
290183
}
@@ -332,54 +225,25 @@ public long getTimeStamp() throws ParsingException {
332225
@Override
333226
public long getViewCount() throws ParsingException {
334227
assertPageFetched();
228+
String views = null;
335229
try {
336-
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
337-
// The array index is variable, therefore we loop throw the complete array.
338-
// videoPrimaryInfoRenderer is often stored at index 1
339-
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
340-
.getObject("results").getObject("results").getArray("contents");
341-
for (Object c : contents) {
342-
try {
343-
// this gets current view count, but there is also an overall view count which is stored here:
344-
// contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results[0]
345-
// .compactAutoplayRenderer.contents[0].compactVideoRenderer.viewCountText.simpleText
346-
String views = ((JsonObject) c).getObject("videoPrimaryInfoRenderer")
347-
.getObject("viewCount").getObject("videoViewCountRenderer").getObject("viewCount")
348-
.getArray("runs").getObject(0).getString("text");
349-
return Long.parseLong(Utils.removeNonDigitCharacters(views));
350-
} catch (Exception ignored) {}
351-
}
352-
throw new ParsingException("Could not get view count from live stream");
353-
354-
} else {
355-
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
356-
}
357-
} catch (Exception e) {
230+
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
231+
.getObject("videoViewCountRenderer").getObject("viewCount")
232+
.getArray("runs").getObject(0).getString("text");
233+
} catch (Exception ignored) {}
234+
if (views == null) {
358235
try {
359-
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
360-
} catch (Exception ignored) {
361-
throw new ParsingException("Could not get view count", e);
362-
}
363-
}
364-
}
365-
366-
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
367-
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
368-
.getObject("results").getObject("results").getArray("contents");
369-
JsonObject videoPrimaryInfoRenderer = null;
370-
371-
for (Object content : contents) {
372-
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
373-
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
374-
break;
375-
}
236+
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
237+
.getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
238+
} catch (Exception ignored) {}
376239
}
377-
378-
if (videoPrimaryInfoRenderer == null) {
379-
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
240+
if (views == null) {
241+
try {
242+
views = playerResponse.getObject("videoDetails").getString("viewCount");
243+
} catch (Exception ignored) {}
380244
}
381-
382-
return videoPrimaryInfoRenderer;
245+
if (views != null) return Long.parseLong(views);
246+
throw new ParsingException("Could not get view count");
383247
}
384248

385249
@Override
@@ -993,6 +857,25 @@ public SubtitlesStream getSubtitle(final MediaFormat format) {
993857
// Utils
994858
//////////////////////////////////////////////////////////////////////////*/
995859

860+
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
861+
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
862+
.getObject("results").getObject("results").getArray("contents");
863+
JsonObject videoPrimaryInfoRenderer = null;
864+
865+
for (Object content : contents) {
866+
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
867+
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
868+
break;
869+
}
870+
}
871+
872+
if (videoPrimaryInfoRenderer == null) {
873+
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
874+
}
875+
876+
return videoPrimaryInfoRenderer;
877+
}
878+
996879
@Nonnull
997880
private static String getVideoInfoUrl(final String id, final String sts) {
998881
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +

0 commit comments

Comments
 (0)