Skip to content

Commit a12c0e2

Browse files
committed
fix:invidious: getID and onAccpetURl for comments
1 parent 4aaf123 commit a12c0e2

2 files changed

Lines changed: 154 additions & 66 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java

Lines changed: 152 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
22

3-
import java.io.UnsupportedEncodingException;
4-
import java.net.URLDecoder;
5-
import java.util.List;
6-
73
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
84
import org.schabi.newpipe.extractor.exceptions.ParsingException;
95
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
10-
import org.schabi.newpipe.extractor.utils.Parser;
6+
import org.schabi.newpipe.extractor.utils.Utils;
7+
8+
import java.net.MalformedURLException;
9+
import java.net.URI;
10+
import java.net.URISyntaxException;
11+
import java.net.URL;
12+
import java.util.List;
1113

1214
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
1315

@@ -18,84 +20,170 @@ public static YoutubeCommentsLinkHandlerFactory getInstance() {
1820
return instance;
1921
}
2022

23+
private static String assertIsID(String id) throws ParsingException {
24+
if (id == null || !id.matches("[a-zA-Z0-9_-]{11}")) {
25+
throw new ParsingException("The given string is not a Youtube-Video-ID");
26+
}
27+
28+
return id;
29+
}
30+
2131
@Override
2232
public String getUrl(String id) {
2333
return "https://m.youtube.com/watch?v=" + id;
2434
}
2535

2636
@Override
27-
public String getId(String url) throws ParsingException, IllegalArgumentException {
28-
if (url.isEmpty()) {
29-
throw new IllegalArgumentException("The url parameter should not be empty");
37+
public String getId(String urlString) throws ParsingException, IllegalArgumentException {
38+
try {
39+
URI uri = new URI(urlString);
40+
String scheme = uri.getScheme();
41+
42+
if (scheme != null && (scheme.equals("vnd.youtube") || scheme.equals("vnd.youtube.launch"))) {
43+
String schemeSpecificPart = uri.getSchemeSpecificPart();
44+
if (schemeSpecificPart.startsWith("//")) {
45+
urlString = "https:" + schemeSpecificPart;
46+
} else {
47+
return assertIsID(schemeSpecificPart);
48+
}
49+
}
50+
} catch (URISyntaxException ignored) {
51+
}
52+
53+
URL url;
54+
try {
55+
url = Utils.stringToURL(urlString);
56+
} catch (MalformedURLException e) {
57+
throw new IllegalArgumentException("The given URL is not valid");
58+
}
59+
60+
String host = url.getHost();
61+
String path = url.getPath();
62+
// remove leading "/" of URL-path if URL-path is given
63+
if (!path.isEmpty()) {
64+
path = path.substring(1);
65+
}
66+
67+
if (!Utils.isHTTP(url) || !(YoutubeParsingHelper.isYoutubeURL(url) ||
68+
YoutubeParsingHelper.isYoutubeServiceURL(url) || YoutubeParsingHelper.isHooktubeURL(url) ||
69+
YoutubeParsingHelper.isInvidioURL(url))) {
70+
if (host.equalsIgnoreCase("googleads.g.doubleclick.net")) {
71+
throw new FoundAdException("Error found ad: " + urlString);
72+
}
73+
74+
throw new ParsingException("The url is not a Youtube-URL");
75+
}
76+
77+
if (YoutubePlaylistLinkHandlerFactory.getInstance().acceptUrl(urlString)) {
78+
throw new ParsingException("Error no suitable url: " + urlString);
3079
}
3180

32-
String id;
33-
String lowercaseUrl = url.toLowerCase();
34-
if (lowercaseUrl.contains("youtube")) {
35-
if (url.contains("attribution_link")) {
36-
try {
37-
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
38-
String query = URLDecoder.decode(escapedQuery, "UTF-8");
39-
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
40-
} catch (UnsupportedEncodingException uee) {
41-
throw new ParsingException("Could not parse attribution_link", uee);
81+
// using uppercase instead of lowercase, because toLowercase replaces some unicode characters
82+
// with their lowercase ASCII equivalent. Using toLowercase could result in faultily matching unicode urls.
83+
switch (host.toUpperCase()) {
84+
case "WWW.YOUTUBE-NOCOOKIE.COM": {
85+
if (path.startsWith("embed/")) {
86+
String id = path.split("/")[1];
87+
88+
return assertIsID(id);
4289
}
43-
} else if (url.contains("vnd.youtube")) {
44-
id = Parser.matchGroup1(ID_PATTERN, url);
45-
} else if (url.contains("embed")) {
46-
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
47-
} else if (url.contains("googleads")) {
48-
throw new FoundAdException("Error found add: " + url);
49-
} else {
50-
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
90+
91+
break;
5192
}
52-
} else if (lowercaseUrl.contains("youtu.be")) {
53-
if (url.contains("v=")) {
54-
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
55-
} else {
56-
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
93+
94+
case "YOUTUBE.COM":
95+
case "WWW.YOUTUBE.COM":
96+
case "M.YOUTUBE.COM":
97+
case "MUSIC.YOUTUBE.COM": {
98+
if (path.equals("attribution_link")) {
99+
String uQueryValue = Utils.getQueryValue(url, "u");
100+
101+
URL decodedURL;
102+
try {
103+
decodedURL = Utils.stringToURL("http://www.youtube.com" + uQueryValue);
104+
} catch (MalformedURLException e) {
105+
throw new ParsingException("Error no suitable url: " + urlString);
106+
}
107+
108+
String viewQueryValue = Utils.getQueryValue(decodedURL, "v");
109+
return assertIsID(viewQueryValue);
110+
}
111+
112+
if (path.startsWith("embed/")) {
113+
String id = path.split("/")[1];
114+
115+
return assertIsID(id);
116+
}
117+
118+
String viewQueryValue = Utils.getQueryValue(url, "v");
119+
return assertIsID(viewQueryValue);
57120
}
58-
} else if(lowercaseUrl.contains("hooktube")) {
59-
if(lowercaseUrl.contains("&v=")
60-
|| lowercaseUrl.contains("?v=")) {
61-
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
62-
} else if (url.contains("/embed/")) {
63-
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
64-
} else if (url.contains("/v/")) {
65-
id = Parser.matchGroup1("v/" + ID_PATTERN, url);
66-
} else if (url.contains("/watch/")) {
67-
id = Parser.matchGroup1("watch/" + ID_PATTERN, url);
68-
} else {
69-
throw new ParsingException("Error no suitable url: " + url);
121+
122+
case "YOUTU.BE": {
123+
String viewQueryValue = Utils.getQueryValue(url, "v");
124+
if (viewQueryValue != null) {
125+
return assertIsID(viewQueryValue);
126+
}
127+
128+
return assertIsID(path);
70129
}
71-
} else {
72-
throw new ParsingException("Error no suitable url: " + url);
73-
}
74130

131+
case "HOOKTUBE.COM": {
132+
if (path.startsWith("v/")) {
133+
String id = path.substring("v/".length());
75134

76-
if (!id.isEmpty()) {
77-
return id;
78-
} else {
79-
throw new ParsingException("Error could not parse url: " + url);
135+
return assertIsID(id);
136+
}
137+
if (path.startsWith("watch/")) {
138+
String id = path.substring("watch/".length());
139+
140+
return assertIsID(id);
141+
}
142+
// there is no break-statement here on purpose so the next code-block gets also run for hooktube
143+
}
144+
145+
case "WWW.INVIDIO.US":
146+
case "DEV.INVIDIO.US":
147+
case "INVIDIO.US":
148+
case "INVIDIOUS.SNOPYTA.ORG":
149+
case "DE.INVIDIOUS.SNOPYTA.ORG":
150+
case "FI.INVIDIOUS.SNOPYTA.ORG":
151+
case "VID.WXZM.SX":
152+
case "INVIDIOUS.KABI.TK":
153+
case "INVIDIOU.SH":
154+
case "WWW.INVIDIOU.SH":
155+
case "NO.INVIDIOU.SH":
156+
case "INVIDIOUS.ENKIRTON.NET":
157+
case "TUBE.POAL.CO":
158+
case "INVIDIOUS.13AD.DE":
159+
case "YT.ELUKERIO.ORG": { // code-block for hooktube.com and Invidious instances
160+
if (path.equals("watch")) {
161+
String viewQueryValue = Utils.getQueryValue(url, "v");
162+
if (viewQueryValue != null) {
163+
return assertIsID(viewQueryValue);
164+
}
165+
}
166+
if (path.startsWith("embed/")) {
167+
String id = path.substring("embed/".length());
168+
169+
return assertIsID(id);
170+
}
171+
172+
break;
173+
}
80174
}
175+
176+
throw new ParsingException("Error no suitable url: " + urlString);
81177
}
82178

83179
@Override
84180
public boolean onAcceptUrl(final String url) throws FoundAdException {
85-
final String lowercaseUrl = url.toLowerCase();
86-
if (lowercaseUrl.contains("youtube")
87-
|| lowercaseUrl.contains("youtu.be")
88-
|| lowercaseUrl.contains("hooktube")) {
89-
// bad programming I know
90-
try {
91-
getId(url);
92-
return true;
93-
} catch (FoundAdException fe) {
94-
throw fe;
95-
} catch (ParsingException e) {
96-
return false;
97-
}
98-
} else {
181+
try {
182+
getId(url);
183+
return true;
184+
} catch (FoundAdException fe) {
185+
throw fe;
186+
} catch (ParsingException e) {
99187
return false;
100188
}
101189
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public class YoutubeCommentsExtractorTest {
2727
public static void setUp() throws Exception {
2828
NewPipe.init(DownloaderTestImpl.getInstance());
2929
extractor = (YoutubeCommentsExtractor) YouTube
30-
.getCommentsExtractor("https://www.youtube.com/watch?v=D00Au7k3i6o");
30+
.getCommentsExtractor("https://www.invidio.us/watch?v=D00Au7k3i6o");
3131
}
3232

3333
@Test
@@ -47,7 +47,7 @@ public void testGetComments() throws IOException, ExtractionException {
4747
@Test
4848
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
4949
boolean result = false;
50-
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=D00Au7k3i6o");
50+
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.invidio.us/watch?v=D00Au7k3i6o");
5151
assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName()));
5252
result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
5353

0 commit comments

Comments
 (0)