Skip to content

Commit 7d07924

Browse files
committed
[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music
This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music. Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0. Also some code refactoring has been made in this class.
1 parent 05b7fee commit 7d07924

3 files changed

Lines changed: 264 additions & 115 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 82 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
66
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
77
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
8+
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
89
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
910

1011
import com.grack.nanojson.JsonArray;
@@ -57,20 +58,20 @@
5758
* Created by Christian Schabesberger on 02.03.16.
5859
*
5960
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
60-
* YoutubeParsingHelper.java is part of NewPipe.
61+
* YoutubeParsingHelper.java is part of NewPipe Extractor.
6162
*
62-
* NewPipe is free software: you can redistribute it and/or modify
63+
* NewPipe Extractor is free software: you can redistribute it and/or modify
6364
* it under the terms of the GNU General Public License as published by
6465
* the Free Software Foundation, either version 3 of the License, or
6566
* (at your option) any later version.
6667
*
67-
* NewPipe is distributed in the hope that it will be useful,
68+
* NewPipe Extractor is distributed in the hope that it will be useful,
6869
* but WITHOUT ANY WARRANTY; without even the implied warranty of
69-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
70+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7071
* GNU General Public License for more details.
7172
*
7273
* You should have received a copy of the GNU General Public License
73-
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
74+
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
7475
*/
7576

7677
public final class YoutubeParsingHelper {
@@ -98,6 +99,15 @@ private YoutubeParsingHelper() {
9899
private static boolean keyAndVersionExtracted = false;
99100
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
100101
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
102+
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
103+
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
104+
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
105+
"client.version=([0-9\\.]+)"};
106+
private static final String[] INNERTUBE_API_KEY_REGEXES =
107+
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
108+
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
109+
private static final String INNERTUBE_CLIENT_NAME_REGEX =
110+
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
101111

102112
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
103113
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
@@ -484,12 +494,33 @@ public static boolean areHardcodedClientVersionAndKeyValid()
484494
return hardcodedClientVersionAndKeyValid.get();
485495
}
486496

487-
private static void extractClientVersionAndKey() throws IOException, ExtractionException {
488-
// Don't extract the client version and the InnerTube key if it has been already extracted
497+
498+
private static void extractClientVersionAndKeyFromSwJs()
499+
throws IOException, ExtractionException {
489500
if (keyAndVersionExtracted) {
490501
return;
491502
}
503+
final String url = "https://www.youtube.com/sw.js";
504+
final Map<String, List<String>> headers = new HashMap<>();
505+
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
506+
headers.put("Referer", Collections.singletonList("https://www.youtube.com"));
507+
final String response = getDownloader().get(url, headers).responseBody();
508+
try {
509+
clientVersion = getStringResultFromRegexArray(response,
510+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
511+
key = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
512+
} catch (final Parser.RegexException e) {
513+
throw new ParsingException("Could not extract YouTube WEB InnerTube client version and API key from sw.js", e);
514+
}
515+
keyAndVersionExtracted = true;
516+
}
492517

518+
private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
519+
throws IOException, ExtractionException {
520+
// Don't extract the client version and the InnerTube key if it has been already extracted
521+
if (keyAndVersionExtracted) {
522+
return;
523+
}
493524
// Don't provide a search term in order to have a smaller response
494525
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
495526
final Map<String, List<String>> headers = new HashMap<>();
@@ -526,35 +557,21 @@ private static void extractClientVersionAndKey() throws IOException, ExtractionE
526557
}
527558
}
528559

529-
String contextClientVersion;
530-
final String[] patterns = {
531-
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
532-
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
533-
"client.version=([0-9\\.]+)"
534-
};
535-
for (final String pattern : patterns) {
536-
try {
537-
contextClientVersion = Parser.matchGroup1(pattern, html);
538-
if (!isNullOrEmpty(contextClientVersion)) {
539-
clientVersion = contextClientVersion;
540-
break;
541-
}
542-
} catch (final Parser.RegexException ignored) {
543-
}
560+
try {
561+
clientVersion = getStringResultFromRegexArray(html,
562+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
563+
} catch (final Parser.RegexException ignored) {
544564
}
545565

546566
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
547567
clientVersion = shortClientVersion;
548568
}
549569

550570
try {
551-
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
552-
} catch (final Parser.RegexException e1) {
553-
try {
554-
key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html);
555-
} catch (final Parser.RegexException e2) {
556-
throw new ParsingException("Could not extract client version and key");
557-
}
571+
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
572+
} catch (final Parser.RegexException e) {
573+
throw new ParsingException(
574+
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
558575
}
559576
keyAndVersionExtracted = true;
560577
}
@@ -567,7 +584,11 @@ public static String getClientVersion() throws IOException, ExtractionException
567584
return clientVersion;
568585
}
569586

570-
extractClientVersionAndKey();
587+
try {
588+
extractClientVersionAndKeyFromSwJs();
589+
} catch (final Exception e) {
590+
extractClientVersionAndKeyFromHtmlSearchResultsPage();
591+
}
571592

572593
if (keyAndVersionExtracted) {
573594
return clientVersion;
@@ -588,7 +609,11 @@ public static String getKey() throws IOException, ExtractionException {
588609
return key;
589610
}
590611

591-
extractClientVersionAndKey();
612+
try {
613+
extractClientVersionAndKeyFromSwJs();
614+
} catch (final Exception e) {
615+
extractClientVersionAndKeyFromHtmlSearchResultsPage();
616+
}
592617

593618
if (keyAndVersionExtracted) {
594619
return key;
@@ -682,8 +707,8 @@ public static boolean isHardcodedYoutubeMusicKeyValid() throws IOException,
682707
return response.responseBody().length() > 500 && response.responseCode() == 200;
683708
}
684709

685-
public static String[] getYoutubeMusicKey() throws IOException, ReCaptchaException,
686-
Parser.RegexException {
710+
public static String[] getYoutubeMusicKey()
711+
throws IOException, ReCaptchaException, Parser.RegexException {
687712
if (youtubeMusicKey != null && youtubeMusicKey.length == 3) {
688713
return youtubeMusicKey;
689714
}
@@ -692,40 +717,34 @@ public static String[] getYoutubeMusicKey() throws IOException, ReCaptchaExcepti
692717
return youtubeMusicKey;
693718
}
694719

695-
final String url = "https://music.youtube.com/";
696-
final Map<String, List<String>> headers = new HashMap<>();
697-
addCookieHeader(headers);
698-
final String html = getDownloader().get(url, headers).responseBody();
720+
String musicClientVersion = null;
721+
String musicKey = null;
722+
String musicClientName = null;
699723

700-
String innertubeApiKey;
701724
try {
702-
innertubeApiKey = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
703-
} catch (final Parser.RegexException e) {
704-
innertubeApiKey = Parser.matchGroup1("innertube_api_key\":\"([0-9a-zA-Z_-]+?)\"", html);
705-
}
706-
707-
final String innertubeClientName
708-
= Parser.matchGroup1("INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),", html);
709-
710-
String innertubeClientVersion;
711-
try {
712-
innertubeClientVersion = Parser.matchGroup1(
713-
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
714-
} catch (final Parser.RegexException e) {
715-
try {
716-
innertubeClientVersion = Parser.matchGroup1(
717-
"INNERTUBE_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
718-
} catch (final Parser.RegexException ee) {
719-
innertubeClientVersion = Parser.matchGroup1(
720-
"innertube_context_client_version\":\"([0-9\\.]+?)\"", html);
721-
}
725+
final String url = "https://music.youtube.com/sw.js";
726+
final Map<String, List<String>> headers = new HashMap<>();
727+
headers.put("Origin", Collections.singletonList("https://music.youtube.com"));
728+
headers.put("Referer", Collections.singletonList("https://music.youtube.com"));
729+
final String response = getDownloader().get(url, headers).responseBody();
730+
musicClientVersion = getStringResultFromRegexArray(response,
731+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
732+
musicKey = getStringResultFromRegexArray(response,
733+
INNERTUBE_API_KEY_REGEXES, 1);
734+
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
735+
} catch (final Exception e) {
736+
final String url = "https://music.youtube.com/";
737+
final Map<String, List<String>> headers = new HashMap<>();
738+
addCookieHeader(headers);
739+
final String html = getDownloader().get(url, headers).responseBody();
740+
741+
musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
742+
musicClientVersion = getStringResultFromRegexArray(html,
743+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
744+
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
722745
}
723746

724-
youtubeMusicKey = new String[]{
725-
innertubeApiKey,
726-
innertubeClientName,
727-
innertubeClientVersion
728-
};
747+
youtubeMusicKey = new String[] { musicKey, musicClientName, musicClientVersion };
729748
return youtubeMusicKey;
730749
}
731750

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,45 @@
1+
/*
2+
* Created by Christian Schabesberger on 02.02.16.
3+
*
4+
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
5+
* Parser.java is part of NewPipe Extractor.
6+
*
7+
* NewPipe Extractor is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* NewPipe Extractor is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
121
package org.schabi.newpipe.extractor.utils;
222

323
import org.nibor.autolink.LinkExtractor;
424
import org.nibor.autolink.LinkSpan;
525
import org.nibor.autolink.LinkType;
626
import org.schabi.newpipe.extractor.exceptions.ParsingException;
727

28+
import javax.annotation.Nonnull;
829
import java.io.UnsupportedEncodingException;
930
import java.net.URLDecoder;
1031
import java.util.ArrayList;
1132
import java.util.EnumSet;
1233
import java.util.HashMap;
34+
import java.util.List;
1335
import java.util.Map;
1436
import java.util.regex.Matcher;
1537
import java.util.regex.Pattern;
1638

1739
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
1840

19-
/*
20-
* Created by Christian Schabesberger on 02.02.16.
21-
*
22-
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
23-
* Parser.java is part of NewPipe.
24-
*
25-
* NewPipe is free software: you can redistribute it and/or modify
26-
* it under the terms of the GNU General Public License as published by
27-
* the Free Software Foundation, either version 3 of the License, or
28-
* (at your option) any later version.
29-
*
30-
* NewPipe is distributed in the hope that it will be useful,
31-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
32-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33-
* GNU General Public License for more details.
34-
*
35-
* You should have received a copy of the GNU General Public License
36-
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
37-
*/
38-
3941
/**
40-
* avoid using regex !!!
42+
* Avoid using regex !!!
4143
*/
4244
public final class Parser {
4345

@@ -66,18 +68,19 @@ public static String matchGroup(final String pattern,
6668
return matchGroup(Pattern.compile(pattern), input, group);
6769
}
6870

69-
public static String matchGroup(final Pattern pat, final String input, final int group)
70-
throws RegexException {
71+
public static String matchGroup(@Nonnull final Pattern pat,
72+
final String input,
73+
final int group) throws RegexException {
7174
final Matcher matcher = pat.matcher(input);
7275
final boolean foundMatch = matcher.find();
7376
if (foundMatch) {
7477
return matcher.group(group);
7578
} else {
7679
// only pass input to exception message when it is not too long
7780
if (input.length() > 1024) {
78-
throw new RegexException("failed to find pattern \"" + pat.pattern() + "\"");
81+
throw new RegexException("Failed to find pattern \"" + pat.pattern() + "\"");
7982
} else {
80-
throw new RegexException("failed to find pattern \"" + pat.pattern()
83+
throw new RegexException("Failed to find pattern \"" + pat.pattern()
8184
+ "\" inside of \"" + input + "\"");
8285
}
8386
}
@@ -89,14 +92,15 @@ public static boolean isMatch(final String pattern, final String input) {
8992
return mat.find();
9093
}
9194

92-
public static boolean isMatch(final Pattern pattern, final String input) {
95+
public static boolean isMatch(@Nonnull final Pattern pattern, final String input) {
9396
final Matcher mat = pattern.matcher(input);
9497
return mat.find();
9598
}
9699

97-
public static Map<String, String> compatParseMap(final String input)
100+
@Nonnull
101+
public static Map<String, String> compatParseMap(@Nonnull final String input)
98102
throws UnsupportedEncodingException {
99-
final Map<String, String> map = new HashMap<>();
103+
final Map<String, String> map = new HashMap<>();
100104
for (final String arg : input.split("&")) {
101105
final String[] splitArg = arg.split("=");
102106
if (splitArg.length > 1) {
@@ -108,9 +112,10 @@ public static Map<String, String> compatParseMap(final String input)
108112
return map;
109113
}
110114

115+
@Nonnull
111116
public static String[] getLinksFromString(final String txt) throws ParsingException {
112117
try {
113-
final ArrayList<String> links = new ArrayList<>();
118+
final List<String> links = new ArrayList<>();
114119
final LinkExtractor linkExtractor = LinkExtractor.builder()
115120
.linkTypes(EnumSet.of(LinkType.URL, LinkType.WWW))
116121
.build();

0 commit comments

Comments
 (0)