Skip to content

Commit c7e9ad5

Browse files
authored
Merge branch 'dev' into patch-1
2 parents 7fb1768 + 7169bcf commit c7e9ad5

15 files changed

Lines changed: 127 additions & 38 deletions

extractor/src/main/java/org/schabi/newpipe/extractor/exceptions/ReCaptchaException.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@
2121
*/
2222

2323
public class ReCaptchaException extends ExtractionException {
24-
public ReCaptchaException(String message) {
24+
private String url;
25+
26+
public ReCaptchaException(String message, String url) {
2527
super(message);
28+
this.url = url;
29+
}
30+
31+
public String getUrl() {
32+
return url;
2633
}
2734
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
@SuppressWarnings("WeakerAccess")
4949
public class YoutubeChannelExtractor extends ChannelExtractor {
50+
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
5051
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
5152
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
5253

@@ -72,7 +73,7 @@ public String getNextPageUrl() throws ExtractionException {
7273
@Override
7374
public String getUrl() throws ParsingException {
7475
try {
75-
return "https://www.youtube.com/channel/" + getId();
76+
return CHANNEL_URL_BASE + getId();
7677
} catch (ParsingException e) {
7778
return super.getUrl();
7879
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import org.schabi.newpipe.extractor.exceptions.ParsingException;
66
import org.schabi.newpipe.extractor.utils.Utils;
77

8+
import java.util.regex.Matcher;
9+
import java.util.regex.Pattern;
10+
811
/*
912
* Created by Christian Schabesberger on 12.02.17.
1013
*
@@ -53,8 +56,20 @@ public String getName() throws ParsingException {
5356

5457
@Override
5558
public String getUrl() throws ParsingException {
56-
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
57-
.attr("abs:href");
59+
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
60+
.attr("abs:data-href");
61+
62+
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
63+
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
64+
65+
if (match.matches()) {
66+
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
67+
} else {
68+
// fallback method just in case youtube changes things; it should never run and tests will fail
69+
// provides an url with "/user/NAME", that is inconsistent with stream and channel extractor
70+
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
71+
.attr("abs:href");
72+
}
5873
}
5974

6075
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public String getName() throws ParsingException {
5050
try {
5151
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
5252
} catch (Exception e) {
53-
throw new ParsingException("Could not get playlist name");
53+
throw new ParsingException("Could not get playlist name", e);
5454
}
5555
}
5656

@@ -59,7 +59,7 @@ public String getThumbnailUrl() throws ParsingException {
5959
try {
6060
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
6161
} catch (Exception e) {
62-
throw new ParsingException("Could not get playlist thumbnail");
62+
throw new ParsingException("Could not get playlist thumbnail", e);
6363
}
6464
}
6565

@@ -72,9 +72,11 @@ public String getBannerUrl() {
7272
@Override
7373
public String getUploaderUrl() throws ParsingException {
7474
try {
75-
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
75+
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
76+
doc.select("button[class*=\"yt-uix-subscription-button\"]")
77+
.first().attr("data-channel-external-id");
7678
} catch (Exception e) {
77-
throw new ParsingException("Could not get playlist uploader name");
79+
throw new ParsingException("Could not get playlist uploader url", e);
7880
}
7981
}
8082

@@ -83,7 +85,7 @@ public String getUploaderName() throws ParsingException {
8385
try {
8486
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
8587
} catch (Exception e) {
86-
throw new ParsingException("Could not get playlist uploader name");
88+
throw new ParsingException("Could not get playlist uploader name", e);
8789
}
8890
}
8991

@@ -92,7 +94,7 @@ public String getUploaderAvatarUrl() throws ParsingException {
9294
try {
9395
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
9496
} catch (Exception e) {
95-
throw new ParsingException("Could not get playlist uploader avatar");
97+
throw new ParsingException("Could not get playlist uploader avatar", e);
9698
}
9799
}
98100

@@ -248,6 +250,8 @@ public String getUploaderName() throws ParsingException {
248250

249251
@Override
250252
public String getUploaderUrl() throws ParsingException {
253+
// this url is not always in the form "/channel/..."
254+
// sometimes Youtube provides urls in the from "/user/..."
251255
return getUploaderLink().attr("abs:href");
252256
}
253257

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ public String getName() throws ParsingException {
4949
@Override
5050
public String getUrl() throws ParsingException {
5151
try {
52-
final Element div = el.select("div[class=\"yt-lockup-meta\"]").first();
52+
final Element a = el.select("div[class=\"yt-lockup-meta\"]")
53+
.select("ul[class=\"yt-lockup-meta-info\"]")
54+
.select("li").select("a").first();
5355

54-
if(div != null) {
55-
final Element a = div.select("a").first();
56+
if(a != null) {
5657
return a.attr("abs:href");
5758
}
5859

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.net.MalformedURLException;
3131
import java.net.URL;
3232
import java.util.*;
33+
import java.util.regex.Matcher;
34+
import java.util.regex.Pattern;
3335

3436
/*
3537
* Created by Christian Schabesberger on 06.08.15.
@@ -162,14 +164,54 @@ public String getDescription() throws ParsingException {
162164
}
163165
}
164166

167+
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
168+
// :00 is NOT recognized as a timestamp in description or comments.
169+
// 0:00 is recognized in both description and comments.
170+
// https://www.youtube.com/watch?v=4cccfDXu1vA
171+
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
172+
"seekTo\\("
173+
+ "(?:(\\d+)\\*3600\\+)?" // hours?
174+
+ "(\\d+)\\*60\\+" // minutes
175+
+ "(\\d+)" // seconds
176+
+ "\\)");
177+
178+
@SafeVarargs
179+
private static <T> T coalesce(T... args) {
180+
for (T arg : args) {
181+
if (arg != null) return arg;
182+
}
183+
throw new IllegalArgumentException("all arguments to coalesce() were null");
184+
}
185+
165186
private String parseHtmlAndGetFullLinks(String descriptionHtml)
166187
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
167188
final Document description = Jsoup.parse(descriptionHtml, getUrl());
168189
for(Element a : description.select("a")) {
169190
final String rawUrl = a.attr("abs:href");
170191
final URL redirectLink = new URL(rawUrl);
171-
final String queryString = redirectLink.getQuery();
172-
if(queryString != null) {
192+
193+
final Matcher onClickTimestamp;
194+
final String queryString;
195+
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
196+
.find()) {
197+
a.removeAttr("onclick");
198+
199+
String hours = coalesce(onClickTimestamp.group(1), "0");
200+
String minutes = onClickTimestamp.group(2);
201+
String seconds = onClickTimestamp.group(3);
202+
203+
int timestamp = 0;
204+
timestamp += Integer.parseInt(hours) * 3600;
205+
timestamp += Integer.parseInt(minutes) * 60;
206+
timestamp += Integer.parseInt(seconds);
207+
208+
String setTimestamp = "&t=" + timestamp;
209+
210+
// Even after clicking https://youtu.be/...?t=6,
211+
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
212+
a.attr("href", getUrl() + setTimestamp);
213+
214+
} else if((queryString = redirectLink.getQuery()) != null) {
173215
// if the query string is null we are not dealing with a redirect link,
174216
// so we don't need to override it.
175217
final String link =
@@ -714,8 +756,6 @@ private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaExcepti
714756
} catch (IOException e) {
715757
throw new ParsingException(
716758
"Could load decryption code form restricted video for the Youtube service.", e);
717-
} catch (ReCaptchaException e) {
718-
throw new ReCaptchaException("reCaptcha Challenge requested");
719759
}
720760
}
721761

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ private boolean isPremiumVideo() {
6161
@Override
6262
public String getUrl() throws ParsingException {
6363
try {
64-
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
64+
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
6565
Element dl = el.select("h3").first().select("a").first();
6666
return dl.attr("abs:href");
6767
} catch (Exception e) {
@@ -72,7 +72,7 @@ public String getUrl() throws ParsingException {
7272
@Override
7373
public String getName() throws ParsingException {
7474
try {
75-
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
75+
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
7676
Element dl = el.select("h3").first().select("a").first();
7777
return dl.text();
7878
} catch (Exception e) {
@@ -107,6 +107,8 @@ public String getUploaderName() throws ParsingException {
107107

108108
@Override
109109
public String getUploaderUrl() throws ParsingException {
110+
// this url is not always in the form "/channel/..."
111+
// sometimes Youtube provides urls in the from "/user/..."
110112
try {
111113
try {
112114
return item.select("div[class=\"yt-lockup-byline\"]").first()
@@ -119,7 +121,7 @@ public String getUploaderUrl() throws ParsingException {
119121
.text().split(" - ")[0];
120122
} catch (Exception e) {
121123
System.out.println(item.html());
122-
throw new ParsingException("Could not get uploader", e);
124+
throw new ParsingException("Could not get uploader url", e);
123125
}
124126
}
125127

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ public String getUploaderUrl() throws ParsingException {
126126
}
127127

128128
private Element getUploaderLink() {
129+
// this url is not always in the form "/channel/..."
130+
// sometimes Youtube provides urls in the from "/user/..."
129131
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
130132
return uploaderEl.select("a").first();
131133
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/DashMpdParser.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,6 @@ public static ParserResult getStreams(final StreamInfo streamInfo)
123123
dashDoc = downloader.download(streamInfo.getDashMpdUrl());
124124
} catch (IOException ioe) {
125125
throw new DashMpdParsingException("Could not get dash mpd: " + streamInfo.getDashMpdUrl(), ioe);
126-
} catch (ReCaptchaException e) {
127-
throw new ReCaptchaException("reCaptcha Challenge needed");
128126
}
129127

130128
try {

extractor/src/test/java/org/schabi/newpipe/Downloader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaEx
129129
* request See : https://github.com/rg3/youtube-dl/issues/5138
130130
*/
131131
if (con.getResponseCode() == 429) {
132-
throw new ReCaptchaException("reCaptcha Challenge requested");
132+
throw new ReCaptchaException("reCaptcha Challenge requested", con.getURL().toString());
133133
}
134134

135135
throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e);

0 commit comments

Comments
 (0)