Skip to content

Commit b242e1d

Browse files
authored
Merge pull request #362 from Stypox/google-search-urls
Add support for Google search redirect url
2 parents a9303b2 + 9e53cf0 commit b242e1d

9 files changed

Lines changed: 93 additions & 51 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.schabi.newpipe.extractor.stream.StreamExtractor;
1717
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
1818
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
19+
import org.schabi.newpipe.extractor.utils.Utils;
1920

2021
import javax.annotation.Nullable;
2122
import java.util.Collections;
@@ -277,18 +278,19 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
277278
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
278279
* @param url the url on which it should be decided of which link type it is
279280
* @return the link type of url
280-
* @throws ParsingException
281281
*/
282-
public final LinkType getLinkTypeByUrl(String url) throws ParsingException {
283-
LinkHandlerFactory sH = getStreamLHFactory();
284-
LinkHandlerFactory cH = getChannelLHFactory();
285-
LinkHandlerFactory pH = getPlaylistLHFactory();
282+
public final LinkType getLinkTypeByUrl(final String url) throws ParsingException {
283+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
286284

287-
if (sH != null && sH.acceptUrl(url)) {
285+
final LinkHandlerFactory sH = getStreamLHFactory();
286+
final LinkHandlerFactory cH = getChannelLHFactory();
287+
final LinkHandlerFactory pH = getPlaylistLHFactory();
288+
289+
if (sH != null && sH.acceptUrl(polishedUrl)) {
288290
return LinkType.STREAM;
289-
} else if (cH != null && cH.acceptUrl(url)) {
291+
} else if (cH != null && cH.acceptUrl(polishedUrl)) {
290292
return LinkType.CHANNEL;
291-
} else if (pH != null && pH.acceptUrl(url)) {
293+
} else if (pH != null && pH.acceptUrl(polishedUrl)) {
292294
return LinkType.PLAYLIST;
293295
} else {
294296
return LinkType.NONE;

extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,29 @@ public String getUrl(String id, String baseUrl) throws ParsingException {
4242
// Logic
4343
///////////////////////////////////
4444

45-
public LinkHandler fromUrl(String url) throws ParsingException {
46-
if (url == null) throw new IllegalArgumentException("url can not be null");
47-
final String baseUrl = Utils.getBaseUrl(url);
48-
return fromUrl(url, baseUrl);
45+
/**
46+
* Builds a {@link LinkHandler} from a url.<br>
47+
* Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
48+
* this function.
49+
* @param url the url to extract path and id from
50+
* @return a {@link LinkHandler} complete with information
51+
*/
52+
public LinkHandler fromUrl(final String url) throws ParsingException {
53+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
54+
final String baseUrl = Utils.getBaseUrl(polishedUrl);
55+
return fromUrl(polishedUrl, baseUrl);
4956
}
5057

58+
/**
59+
* Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already
60+
* polished from google search redirects (otherwise how could {@code baseUrl} have been
61+
* extracted?).<br>
62+
* So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
63+
* this function, since that should be done in {@link #fromUrl(String)}.
64+
* @param url the url without google search redirects to extract id from
65+
* @param baseUrl the base url
66+
* @return a {@link LinkHandler} complete with information
67+
*/
5168
public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException {
5269
if (url == null) throw new IllegalArgumentException("url can not be null");
5370
if (!acceptUrl(url)) {

extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ public String getUrl(String id, List<String> contentFilter, String sortFilter, S
3131
///////////////////////////////////
3232

3333
@Override
34-
public ListLinkHandler fromUrl(String url) throws ParsingException {
35-
String baseUrl = Utils.getBaseUrl(url);
36-
return fromUrl(url, baseUrl);
34+
public ListLinkHandler fromUrl(final String url) throws ParsingException {
35+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
36+
final String baseUrl = Utils.getBaseUrl(polishedUrl);
37+
return fromUrl(polishedUrl, baseUrl);
3738
}
3839

3940
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ public class YoutubeParsingHelper {
5555
private YoutubeParsingHelper() {
5656
}
5757

58-
/**
59-
* The official youtube app supports intents in this format, where after the ':' is the videoId.
60-
* Accordingly there are other apps sharing streams in this format.
61-
*/
62-
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";
63-
6458
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
6559
private static String clientVersion;
6660

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
22

3-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
4-
53
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
64
import org.schabi.newpipe.extractor.exceptions.ParsingException;
7-
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
85
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
96

107
import java.util.List;
@@ -17,15 +14,6 @@ public static YoutubeCommentsLinkHandlerFactory getInstance() {
1714
return instance;
1815
}
1916

20-
@Override
21-
public ListLinkHandler fromUrl(String url) throws ParsingException {
22-
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){
23-
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
24-
} else {
25-
return super.fromUrl(url);
26-
}
27-
}
28-
2917
@Override
3018
public String getUrl(String id) {
3119
return "https://m.youtube.com/watch?v=" + id;

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
44
import org.schabi.newpipe.extractor.exceptions.ParsingException;
5-
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
65
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
76
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
87
import org.schabi.newpipe.extractor.utils.Utils;
@@ -15,8 +14,6 @@
1514
import java.util.regex.Matcher;
1615
import java.util.regex.Pattern;
1716

18-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
19-
2017
/*
2118
* Created by Christian Schabesberger on 02.02.16.
2219
*
@@ -67,15 +64,6 @@ private static String assertIsId(@Nullable final String id) throws ParsingExcept
6764
}
6865
}
6966

70-
@Override
71-
public LinkHandler fromUrl(String url) throws ParsingException {
72-
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) {
73-
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
74-
} else {
75-
return super.fromUrl(url);
76-
}
77-
}
78-
7967
@Override
8068
public String getUrl(String id) {
8169
return "https://www.youtube.com/watch?v=" + id;

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,39 @@ public static String removeUTF8BOM(String s) {
181181
return s;
182182
}
183183

184-
public static String getBaseUrl(String url) throws ParsingException {
185-
URL uri;
184+
public static String getBaseUrl(final String url) throws ParsingException {
186185
try {
187-
uri = stringToURL(url);
188-
} catch (MalformedURLException e) {
186+
final URL uri = stringToURL(url);
187+
return uri.getProtocol() + "://" + uri.getAuthority();
188+
} catch (final MalformedURLException e) {
189+
final String message = e.getMessage();
190+
if (message.startsWith("unknown protocol: ")) {
191+
// return just the protocol (e.g. vnd.youtube)
192+
return message.substring("unknown protocol: ".length());
193+
}
194+
189195
throw new ParsingException("Malformed url: " + url, e);
190196
}
191-
return uri.getProtocol() + "://" + uri.getAuthority();
197+
}
198+
199+
/**
200+
* If the provided url is a Google search redirect, then the actual url is extracted from the
201+
* {@code url=} query value and returned, otherwise the original url is returned.
202+
* @param url the url which can possibly be a Google search redirect
203+
* @return an url with no Google search redirects
204+
*/
205+
public static String followGoogleRedirectIfNeeded(final String url) {
206+
// if the url is a redirect from a Google search, extract the actual url
207+
try {
208+
final URL decoded = Utils.stringToURL(url);
209+
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
210+
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8");
211+
}
212+
} catch (final Exception ignored) {
213+
}
214+
215+
// url is not a google search redirect
216+
return url;
192217
}
193218

194219
public static boolean isNullOrEmpty(final String str) {

extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import static org.junit.Assert.*;
88
import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl;
9+
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
910
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
1011

1112
public class NewPipeTest {
@@ -39,8 +40,10 @@ public void getServiceWithUrl() throws Exception {
3940
assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube);
4041
assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube);
4142
assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube);
43+
assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube);
4244

43-
assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube);
45+
assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud);
46+
assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud);
4447
}
4548

4649
@Test

extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,28 @@ public void testMixedNumberWordToLong() throws ParsingException {
2121
public void testJoin() {
2222
assertEquals("some,random,stuff", Utils.join(",", Arrays.asList("some", "random", "stuff")));
2323
}
24+
25+
@Test
26+
public void testGetBaseUrl() throws ParsingException {
27+
assertEquals("https://www.youtube.com", Utils.getBaseUrl("https://www.youtube.com/watch?v=Hu80uDzh8RY"));
28+
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
29+
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube:jZViOEv90dI"));
30+
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg"));
31+
assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc"));
32+
}
33+
34+
@Test
35+
public void testFollowGoogleRedirect() {
36+
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY",
37+
Utils.followGoogleRedirectIfNeeded("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"));
38+
assertEquals("https://www.youtube.com/watch?v=0b6cFWG45kA",
39+
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=video&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3D0b6cFWG45kA"));
40+
assertEquals("https://soundcloud.com/ciaoproduction",
41+
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="));
42+
43+
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz",
44+
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz"));
45+
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello",
46+
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello"));
47+
}
2448
}

0 commit comments

Comments
 (0)