11package org .schabi .newpipe .extractor .services .youtube .linkHandler ;
22
3- import java .io .UnsupportedEncodingException ;
4- import java .net .URLDecoder ;
5- import java .util .List ;
6-
73import org .schabi .newpipe .extractor .exceptions .FoundAdException ;
84import org .schabi .newpipe .extractor .exceptions .ParsingException ;
95import org .schabi .newpipe .extractor .linkhandler .ListLinkHandlerFactory ;
10- import org .schabi .newpipe .extractor .utils .Parser ;
6+ import org .schabi .newpipe .extractor .utils .Utils ;
7+
8+ import java .net .MalformedURLException ;
9+ import java .net .URI ;
10+ import java .net .URISyntaxException ;
11+ import java .net .URL ;
12+ import java .util .List ;
1113
1214public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
1315
@@ -18,84 +20,170 @@ public static YoutubeCommentsLinkHandlerFactory getInstance() {
1820 return instance ;
1921 }
2022
23+ private static String assertIsID (String id ) throws ParsingException {
24+ if (id == null || !id .matches ("[a-zA-Z0-9_-]{11}" )) {
25+ throw new ParsingException ("The given string is not a Youtube-Video-ID" );
26+ }
27+
28+ return id ;
29+ }
30+
2131 @ Override
2232 public String getUrl (String id ) {
2333 return "https://m.youtube.com/watch?v=" + id ;
2434 }
2535
2636 @ Override
27- public String getId (String url ) throws ParsingException , IllegalArgumentException {
28- if (url .isEmpty ()) {
29- throw new IllegalArgumentException ("The url parameter should not be empty" );
37+ public String getId (String urlString ) throws ParsingException , IllegalArgumentException {
38+ try {
39+ URI uri = new URI (urlString );
40+ String scheme = uri .getScheme ();
41+
42+ if (scheme != null && (scheme .equals ("vnd.youtube" ) || scheme .equals ("vnd.youtube.launch" ))) {
43+ String schemeSpecificPart = uri .getSchemeSpecificPart ();
44+ if (schemeSpecificPart .startsWith ("//" )) {
45+ urlString = "https:" + schemeSpecificPart ;
46+ } else {
47+ return assertIsID (schemeSpecificPart );
48+ }
49+ }
50+ } catch (URISyntaxException ignored ) {
51+ }
52+
53+ URL url ;
54+ try {
55+ url = Utils .stringToURL (urlString );
56+ } catch (MalformedURLException e ) {
57+ throw new IllegalArgumentException ("The given URL is not valid" );
58+ }
59+
60+ String host = url .getHost ();
61+ String path = url .getPath ();
62+ // remove leading "/" of URL-path if URL-path is given
63+ if (!path .isEmpty ()) {
64+ path = path .substring (1 );
65+ }
66+
67+ if (!Utils .isHTTP (url ) || !(YoutubeParsingHelper .isYoutubeURL (url ) ||
68+ YoutubeParsingHelper .isYoutubeServiceURL (url ) || YoutubeParsingHelper .isHooktubeURL (url ) ||
69+ YoutubeParsingHelper .isInvidioURL (url ))) {
70+ if (host .equalsIgnoreCase ("googleads.g.doubleclick.net" )) {
71+ throw new FoundAdException ("Error found ad: " + urlString );
72+ }
73+
74+ throw new ParsingException ("The url is not a Youtube-URL" );
75+ }
76+
77+ if (YoutubePlaylistLinkHandlerFactory .getInstance ().acceptUrl (urlString )) {
78+ throw new ParsingException ("Error no suitable url: " + urlString );
3079 }
3180
32- String id ;
33- String lowercaseUrl = url .toLowerCase ();
34- if (lowercaseUrl .contains ("youtube" )) {
35- if (url .contains ("attribution_link" )) {
36- try {
37- String escapedQuery = Parser .matchGroup1 ("u=(.[^&|$]*)" , url );
38- String query = URLDecoder .decode (escapedQuery , "UTF-8" );
39- id = Parser .matchGroup1 ("v=" + ID_PATTERN , query );
40- } catch (UnsupportedEncodingException uee ) {
41- throw new ParsingException ("Could not parse attribution_link" , uee );
81+ // using uppercase instead of lowercase, because toLowercase replaces some unicode characters
82+ // with their lowercase ASCII equivalent. Using toLowercase could result in faultily matching unicode urls.
83+ switch (host .toUpperCase ()) {
84+ case "WWW.YOUTUBE-NOCOOKIE.COM" : {
85+ if (path .startsWith ("embed/" )) {
86+ String id = path .split ("/" )[1 ];
87+
88+ return assertIsID (id );
4289 }
43- } else if (url .contains ("vnd.youtube" )) {
44- id = Parser .matchGroup1 (ID_PATTERN , url );
45- } else if (url .contains ("embed" )) {
46- id = Parser .matchGroup1 ("embed/" + ID_PATTERN , url );
47- } else if (url .contains ("googleads" )) {
48- throw new FoundAdException ("Error found add: " + url );
49- } else {
50- id = Parser .matchGroup1 ("[?&]v=" + ID_PATTERN , url );
90+
91+ break ;
5192 }
52- } else if (lowercaseUrl .contains ("youtu.be" )) {
53- if (url .contains ("v=" )) {
54- id = Parser .matchGroup1 ("v=" + ID_PATTERN , url );
55- } else {
56- id = Parser .matchGroup1 ("[Yy][Oo][Uu][Tt][Uu]\\ .[Bb][Ee]/" + ID_PATTERN , url );
93+
94+ case "YOUTUBE.COM" :
95+ case "WWW.YOUTUBE.COM" :
96+ case "M.YOUTUBE.COM" :
97+ case "MUSIC.YOUTUBE.COM" : {
98+ if (path .equals ("attribution_link" )) {
99+ String uQueryValue = Utils .getQueryValue (url , "u" );
100+
101+ URL decodedURL ;
102+ try {
103+ decodedURL = Utils .stringToURL ("http://www.youtube.com" + uQueryValue );
104+ } catch (MalformedURLException e ) {
105+ throw new ParsingException ("Error no suitable url: " + urlString );
106+ }
107+
108+ String viewQueryValue = Utils .getQueryValue (decodedURL , "v" );
109+ return assertIsID (viewQueryValue );
110+ }
111+
112+ if (path .startsWith ("embed/" )) {
113+ String id = path .split ("/" )[1 ];
114+
115+ return assertIsID (id );
116+ }
117+
118+ String viewQueryValue = Utils .getQueryValue (url , "v" );
119+ return assertIsID (viewQueryValue );
57120 }
58- } else if (lowercaseUrl .contains ("hooktube" )) {
59- if (lowercaseUrl .contains ("&v=" )
60- || lowercaseUrl .contains ("?v=" )) {
61- id = Parser .matchGroup1 ("[?&]v=" + ID_PATTERN , url );
62- } else if (url .contains ("/embed/" )) {
63- id = Parser .matchGroup1 ("embed/" + ID_PATTERN , url );
64- } else if (url .contains ("/v/" )) {
65- id = Parser .matchGroup1 ("v/" + ID_PATTERN , url );
66- } else if (url .contains ("/watch/" )) {
67- id = Parser .matchGroup1 ("watch/" + ID_PATTERN , url );
68- } else {
69- throw new ParsingException ("Error no suitable url: " + url );
121+
122+ case "YOUTU.BE" : {
123+ String viewQueryValue = Utils .getQueryValue (url , "v" );
124+ if (viewQueryValue != null ) {
125+ return assertIsID (viewQueryValue );
126+ }
127+
128+ return assertIsID (path );
70129 }
71- } else {
72- throw new ParsingException ("Error no suitable url: " + url );
73- }
74130
131+ case "HOOKTUBE.COM" : {
132+ if (path .startsWith ("v/" )) {
133+ String id = path .substring ("v/" .length ());
75134
76- if (!id .isEmpty ()) {
77- return id ;
78- } else {
79- throw new ParsingException ("Error could not parse url: " + url );
135+ return assertIsID (id );
136+ }
137+ if (path .startsWith ("watch/" )) {
138+ String id = path .substring ("watch/" .length ());
139+
140+ return assertIsID (id );
141+ }
142+ // there is no break-statement here on purpose so the next code-block gets also run for hooktube
143+ }
144+
145+ case "WWW.INVIDIO.US" :
146+ case "DEV.INVIDIO.US" :
147+ case "INVIDIO.US" :
148+ case "INVIDIOUS.SNOPYTA.ORG" :
149+ case "DE.INVIDIOUS.SNOPYTA.ORG" :
150+ case "FI.INVIDIOUS.SNOPYTA.ORG" :
151+ case "VID.WXZM.SX" :
152+ case "INVIDIOUS.KABI.TK" :
153+ case "INVIDIOU.SH" :
154+ case "WWW.INVIDIOU.SH" :
155+ case "NO.INVIDIOU.SH" :
156+ case "INVIDIOUS.ENKIRTON.NET" :
157+ case "TUBE.POAL.CO" :
158+ case "INVIDIOUS.13AD.DE" :
159+ case "YT.ELUKERIO.ORG" : { // code-block for hooktube.com and Invidious instances
160+ if (path .equals ("watch" )) {
161+ String viewQueryValue = Utils .getQueryValue (url , "v" );
162+ if (viewQueryValue != null ) {
163+ return assertIsID (viewQueryValue );
164+ }
165+ }
166+ if (path .startsWith ("embed/" )) {
167+ String id = path .substring ("embed/" .length ());
168+
169+ return assertIsID (id );
170+ }
171+
172+ break ;
173+ }
80174 }
175+
176+ throw new ParsingException ("Error no suitable url: " + urlString );
81177 }
82178
83179 @ Override
84180 public boolean onAcceptUrl (final String url ) throws FoundAdException {
85- final String lowercaseUrl = url .toLowerCase ();
86- if (lowercaseUrl .contains ("youtube" )
87- || lowercaseUrl .contains ("youtu.be" )
88- || lowercaseUrl .contains ("hooktube" )) {
89- // bad programming I know
90- try {
91- getId (url );
92- return true ;
93- } catch (FoundAdException fe ) {
94- throw fe ;
95- } catch (ParsingException e ) {
96- return false ;
97- }
98- } else {
181+ try {
182+ getId (url );
183+ return true ;
184+ } catch (FoundAdException fe ) {
185+ throw fe ;
186+ } catch (ParsingException e ) {
99187 return false ;
100188 }
101189 }
0 commit comments