1010import org .schabi .newpipe .extractor .utils .Parser ;
1111
1212import javax .annotation .Nonnull ;
13+ import java .net .MalformedURLException ;
14+ import java .net .URL ;
1315import java .util .regex .Pattern ;
1416
1517/**
1618 * The extractor of YouTube's base JavaScript player file.
1719 *
1820 * <p>
19- * YouTube restrict streaming their media in multiple ways by requiring their HTML5 clients to use
20- * a signature timestamp, and on streaming URLs a signature deobfuscation function for some
21- * contents and a throttling parameter deobfuscation one for all contents.
22- * </p>
23- *
24- * <p>
2521 * This class handles fetching of this base JavaScript player file in order to allow other classes
2622 * to extract the needed data.
2723 * </p>
3127 * watch page as a fallback.
3228 * </p>
3329 */
34- public final class YoutubeJavaScriptExtractor {
30+ final class YoutubeJavaScriptExtractor {
3531
3632 private static final String HTTPS = "https:" ;
3733 private static final String BASE_JS_PLAYER_URL_FORMAT =
@@ -40,49 +36,45 @@ public final class YoutubeJavaScriptExtractor {
4036 "player\\ \\ /([a-z0-9]{8})\\ \\ /" );
4137 private static final Pattern EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN = Pattern .compile (
4238 "\" jsUrl\" :\" (/s/player/[A-Za-z0-9]+/player_ias\\ .vflset/[A-Za-z_-]+/base\\ .js)\" " );
43- private static String cachedJavaScriptCode ;
4439
4540 private YoutubeJavaScriptExtractor () {
4641 }
4742
4843 /**
49- * Extracts the JavaScript file.
44+ * Extracts the JavaScript base player file.
5045 *
51- * <p>
52- * The result is cached, so subsequent calls use the result of previous calls.
53- * </p>
54- *
55- * @param videoId a YouTube video ID, which doesn't influence the result, but it may help in
56- * the chance that YouTube track it
57- * @return the whole JavaScript file as a string
58- * @throws ParsingException if the extraction failed
46+ * @param videoId the video ID used to get the JavaScript base player file (an empty one can be
47+ * passed, even it is not recommend in order to spoof better official YouTube
48+ * clients)
49+ * @return the whole JavaScript base player file as a string
50+ * @throws ParsingException if the extraction of the file failed
5951 */
6052 @ Nonnull
61- public static String extractJavaScriptCode (@ Nonnull final String videoId )
53+ static String extractJavaScriptPlayerCode (@ Nonnull final String videoId )
6254 throws ParsingException {
63- if (cachedJavaScriptCode == null ) {
64- String url ;
65- try {
66- url = YoutubeJavaScriptExtractor .extractJavaScriptUrlWithIframeResource ();
67- } catch (final Exception e ) {
68- url = YoutubeJavaScriptExtractor .extractJavaScriptUrlWithEmbedWatchPage (videoId );
69- }
55+ String url ;
56+ try {
57+ url = YoutubeJavaScriptExtractor .extractJavaScriptUrlWithIframeResource ();
7058 final String playerJsUrl = YoutubeJavaScriptExtractor .cleanJavaScriptUrl (url );
71- cachedJavaScriptCode = YoutubeJavaScriptExtractor .downloadJavaScriptCode (playerJsUrl );
72- }
7359
74- return cachedJavaScriptCode ;
75- }
60+ // Assert that the URL we extracted and built is valid
61+ new URL ( playerJsUrl );
7662
77- /**
78- * Reset the cached JavaScript code.
79- *
80- * <p>
81- * It will be fetched again the next time {@link #extractJavaScriptCode(String)} is called.
82- * </p>
83- */
84- public static void resetJavaScriptCode () {
85- cachedJavaScriptCode = null ;
63+ return YoutubeJavaScriptExtractor .downloadJavaScriptCode (playerJsUrl );
64+ } catch (final Exception e ) {
65+ url = YoutubeJavaScriptExtractor .extractJavaScriptUrlWithEmbedWatchPage (videoId );
66+ final String playerJsUrl = YoutubeJavaScriptExtractor .cleanJavaScriptUrl (url );
67+
68+ try {
69+ // Assert that the URL we extracted and built is valid
70+ new URL (playerJsUrl );
71+ } catch (final MalformedURLException exception ) {
72+ throw new ParsingException (
73+ "The extracted and built JavaScript URL is invalid" , exception );
74+ }
75+
76+ return YoutubeJavaScriptExtractor .downloadJavaScriptCode (playerJsUrl );
77+ }
8678 }
8779
8880 @ Nonnull
@@ -134,7 +126,7 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
134126 }
135127 }
136128
137- // Use regexes to match the URL in a JavaScript embedded script of the HTML page
129+ // Use regexes to match the URL in an embedded script of the HTML page
138130 try {
139131 return Parser .matchGroup1 (
140132 EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN , embedPageContent );
@@ -145,29 +137,28 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
145137 }
146138
147139 @ Nonnull
148- private static String cleanJavaScriptUrl (@ Nonnull final String playerJsUrl ) {
149- if (playerJsUrl .startsWith ("//" )) {
140+ private static String cleanJavaScriptUrl (@ Nonnull final String javaScriptPlayerUrl ) {
141+ if (javaScriptPlayerUrl .startsWith ("//" )) {
150142 // https part has to be added manually if the URL is protocol-relative
151- return HTTPS + playerJsUrl ;
152- } else if (playerJsUrl .startsWith ("/" )) {
143+ return HTTPS + javaScriptPlayerUrl ;
144+ } else if (javaScriptPlayerUrl .startsWith ("/" )) {
153145 // https://www.youtube.com part has to be added manually if the URL is relative to
154146 // YouTube's domain
155- return HTTPS + "//www.youtube.com" + playerJsUrl ;
147+ return HTTPS + "//www.youtube.com" + javaScriptPlayerUrl ;
156148 } else {
157- return playerJsUrl ;
149+ return javaScriptPlayerUrl ;
158150 }
159151 }
160152
161153 @ Nonnull
162- private static String downloadJavaScriptCode (@ Nonnull final String playerJsUrl )
154+ private static String downloadJavaScriptCode (@ Nonnull final String javaScriptPlayerUrl )
163155 throws ParsingException {
164156 try {
165157 return NewPipe .getDownloader ()
166- .get (playerJsUrl , Localization .DEFAULT )
158+ .get (javaScriptPlayerUrl , Localization .DEFAULT )
167159 .responseBody ();
168160 } catch (final Exception e ) {
169- throw new ParsingException (
170- "Could not get JavaScript base player's code from URL: " + playerJsUrl , e );
161+ throw new ParsingException ("Could not get JavaScript base player's code" , e );
171162 }
172163 }
173164}
0 commit comments