Skip to content

Commit 1ed89aa

Browse files
committed
Use more often playerResponse in yt stream extractor
This enhances performance and should make the extractor more reliable since it get info from a stable json structure that shouldn't be subject to many changes. Fallback html methods have been kept. In case of error the thrown exception contains the data about the playerResponse failure, that should be clearer than a NPE caused by not-found html tags.
1 parent d83787a commit 1ed89aa

1 file changed

Lines changed: 100 additions & 79 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 100 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,21 @@ public YoutubeStreamExtractor(StreamingService service, LinkHandler linkHandler)
106106
@Override
107107
public String getName() throws ParsingException {
108108
assertPageFetched();
109-
String name = getStringFromMetaData("title");
110-
if(name == null) {
111-
// Fallback to HTML method
109+
try {
110+
return playerResponse.getObject("videoDetails").getString("title");
111+
112+
} catch (Exception e) {
113+
// fallback HTML method
114+
String name = null;
112115
try {
113116
name = doc.select("meta[name=title]").attr(CONTENT);
114-
} catch (Exception e) {
115-
throw new ParsingException("Could not get the title", e);
117+
} catch (Exception ignored) {}
118+
119+
if (name == null) {
120+
throw new ParsingException("Could not get name", e);
116121
}
122+
return name;
117123
}
118-
if(name == null || name.isEmpty()) {
119-
throw new ParsingException("Could not get the title");
120-
}
121-
return name;
122124
}
123125

124126
@Override
@@ -128,9 +130,17 @@ public String getTextualUploadDate() throws ParsingException {
128130
}
129131

130132
try {
131-
return doc.select("meta[itemprop=datePublished]").attr(CONTENT);
132-
} catch (Exception e) {//todo: add fallback method
133-
throw new ParsingException("Could not get upload date", e);
133+
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
134+
} catch (Exception e) {
135+
String uploadDate = null;
136+
try {
137+
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
138+
} catch (Exception ignored) {}
139+
140+
if (uploadDate == null) {
141+
throw new ParsingException("Could not get upload date", e);
142+
}
143+
return uploadDate;
134144
}
135145
}
136146

@@ -149,34 +159,39 @@ public DateWrapper getUploadDate() throws ParsingException {
149159
@Override
150160
public String getThumbnailUrl() throws ParsingException {
151161
assertPageFetched();
152-
// Try to get high resolution thumbnail first, if it fails, use low res from the player instead
153162
try {
154-
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
155-
} catch (Exception ignored) {
156-
// Try other method...
157-
}
163+
JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails");
164+
// the last thumbnail is the one with the highest resolution
165+
return thumbnails.getObject(thumbnails.size()-1).getString("url");
158166

159-
try {
160-
if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url");
161-
} catch (Exception ignored) {
162-
// Try other method...
163-
}
164-
165-
try {
166-
return videoInfoPage.get("thumbnail_url");
167167
} catch (Exception e) {
168-
throw new ParsingException("Could not get thumbnail url", e);
168+
String url = null;
169+
try {
170+
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
171+
} catch (Exception ignored) {}
172+
173+
if (url == null) {
174+
throw new ParsingException("Could not get thumbnail url", e);
175+
}
176+
return url;
169177
}
178+
170179
}
171180

172181
@Nonnull
173182
@Override
174183
public String getDescription() throws ParsingException {
175184
assertPageFetched();
176185
try {
186+
// first try to get html-formatted description
177187
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
178188
} catch (Exception e) {
179-
throw new ParsingException("Could not get the description", e);
189+
try {
190+
// fallback to raw non-html description
191+
return playerResponse.getObject("videoDetails").getString("shortDescription");
192+
} catch (Exception ignored) {
193+
throw new ParsingException("Could not get the description", e);
194+
}
180195
}
181196
}
182197

@@ -269,25 +284,22 @@ public int getAgeLimit() throws ParsingException {
269284
public long getLength() throws ParsingException {
270285
assertPageFetched();
271286

272-
// try getting duration from playerargs
273-
try {
274-
String durationMs = playerResponse
275-
.getObject("streamingData")
276-
.getArray("formats")
277-
.getObject(0)
278-
.getString("approxDurationMs");
279-
return Long.parseLong(durationMs)/1000;
280-
} catch (Exception e) {
281-
}
282-
283-
//try getting value from age gated video
284287
try {
285288
String duration = playerResponse
286289
.getObject("videoDetails")
287290
.getString("lengthSeconds");
288291
return Long.parseLong(duration);
289292
} catch (Exception e) {
290-
throw new ParsingException("Every methode to get the duration has failed: ", e);
293+
try {
294+
String durationMs = playerResponse
295+
.getObject("streamingData")
296+
.getArray("formats")
297+
.getObject(0)
298+
.getString("approxDurationMs");
299+
return Math.round(Long.parseLong(durationMs)/1000.0f);
300+
} catch (Exception ignored) {
301+
throw new ParsingException("Could not get duration", e);
302+
}
291303
}
292304
}
293305

@@ -307,11 +319,15 @@ public long getViewCount() throws ParsingException {
307319
try {
308320
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
309321
return getLiveStreamWatchingCount();
322+
} else {
323+
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
324+
}
325+
} catch (Exception e) {
326+
try {
327+
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
328+
} catch (Exception ignored) {
329+
throw new ParsingException("Could not get view count", e);
310330
}
311-
312-
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
313-
} catch (Exception e) {//todo: find fallback method
314-
throw new ParsingException("Could not get number of views", e);
315331
}
316332
}
317333

@@ -373,7 +389,10 @@ public long getLikeCount() throws ParsingException {
373389
try {
374390
likesString = button.select("span.yt-uix-button-content").first().text();
375391
} catch (NullPointerException e) {
376-
//if this kicks in our button has no content and therefore likes/dislikes are disabled
392+
//if this kicks in our button has no content and therefore ratings must be disabled
393+
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
394+
throw new ParsingException("Ratings are enabled even though the like button is missing", e);
395+
}
377396
return -1;
378397
}
379398
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
@@ -393,7 +412,10 @@ public long getDislikeCount() throws ParsingException {
393412
try {
394413
dislikesString = button.select("span.yt-uix-button-content").first().text();
395414
} catch (NullPointerException e) {
396-
//if this kicks in our button has no content and therefore likes/dislikes are disabled
415+
//if this kicks in our button has no content and therefore ratings must be disabled
416+
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
417+
throw new ParsingException("Ratings are enabled even though the dislike button is missing", e);
418+
}
397419
return -1;
398420
}
399421
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
@@ -409,60 +431,59 @@ public long getDislikeCount() throws ParsingException {
409431
public String getUploaderUrl() throws ParsingException {
410432
assertPageFetched();
411433
try {
412-
return doc.select("div[class=\"yt-user-info\"]").first().children()
413-
.select("a").first().attr("abs:href");
434+
return "https://www.youtube.com/channel/" +
435+
playerResponse.getObject("videoDetails").getString("channelId");
414436
} catch (Exception e) {
415-
throw new ParsingException("Could not get channel link", e);
416-
}
417-
}
418-
437+
String uploaderUrl = null;
438+
try {
439+
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
440+
.select("a").first().attr("abs:href");
441+
} catch (Exception ignored) {}
419442

420-
@Nullable
421-
private String getStringFromMetaData(String field) {
422-
assertPageFetched();
423-
String value = null;
424-
if(playerArgs != null) {
425-
// This can not fail
426-
value = playerArgs.getString(field);
427-
}
428-
if(value == null) {
429-
// This can not fail too
430-
value = videoInfoPage.get(field);
443+
if (uploaderUrl == null) {
444+
throw new ParsingException("Could not get channel link", e);
445+
}
446+
return uploaderUrl;
431447
}
432-
return value;
433448
}
434449

435450
@Nonnull
436451
@Override
437452
public String getUploaderName() throws ParsingException {
438453
assertPageFetched();
439-
String name = getStringFromMetaData("author");
440-
441-
if(name == null) {
454+
try {
455+
return playerResponse.getObject("videoDetails").getString("author");
456+
} catch (Exception e) {
457+
String name = null;
442458
try {
443-
// Fallback to HTML method
444459
name = doc.select("div.yt-user-info").first().text();
445-
} catch (Exception e) {
446-
throw new ParsingException("Could not get uploader name", e);
460+
} catch (Exception ignored) {}
461+
462+
if (name == null) {
463+
throw new ParsingException("Could not get uploader name");
447464
}
465+
return name;
448466
}
449-
if(name == null || name.isEmpty()) {
450-
throw new ParsingException("Could not get uploader name");
451-
}
452-
return name;
453467
}
454468

455469
@Nonnull
456470
@Override
457471
public String getUploaderAvatarUrl() throws ParsingException {
458472
assertPageFetched();
473+
474+
String uploaderAvatarUrl = null;
459475
try {
460-
return doc.select("a[class*=\"yt-user-photo\"]").first()
476+
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
461477
.select("img").first()
462478
.attr("abs:data-thumb");
463479
} catch (Exception e) {//todo: add fallback method
464-
throw new ParsingException("Could not get uploader thumbnail URL.", e);
480+
throw new ParsingException("Could not get uploader avatar url", e);
481+
}
482+
483+
if (uploaderAvatarUrl == null) {
484+
throw new ParsingException("Could not get uploader avatar url");
465485
}
486+
return uploaderAvatarUrl;
466487
}
467488

468489
@Nonnull
@@ -590,12 +611,12 @@ public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOExc
590611
public StreamType getStreamType() throws ParsingException {
591612
assertPageFetched();
592613
try {
593-
if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") ||
594-
(!playerResponse.getObject("streamingData").has(FORMATS)))) {
614+
if (!playerResponse.getObject("streamingData").has(FORMATS) ||
615+
(playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) {
595616
return StreamType.LIVE_STREAM;
596617
}
597618
} catch (Exception e) {
598-
throw new ParsingException("Could not get hls manifest url", e);
619+
throw new ParsingException("Could not get stream type", e);
599620
}
600621
return StreamType.VIDEO_STREAM;
601622
}

0 commit comments

Comments
 (0)