Skip to content

Commit d0d91e6

Browse files
committed
Adress requested changes
1 parent b6bc521 commit d0d91e6

2 files changed

Lines changed: 124 additions & 46 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
88
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
99
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
10+
import static org.schabi.newpipe.extractor.utils.Utils.randomStringFromAlphabet;
1011

1112
import com.grack.nanojson.JsonArray;
1213
import com.grack.nanojson.JsonBuilder;
@@ -36,7 +37,6 @@
3637
import java.net.URL;
3738
import java.net.URLDecoder;
3839
import java.nio.charset.StandardCharsets;
39-
import java.security.SecureRandom;
4040
import java.time.LocalDate;
4141
import java.time.OffsetDateTime;
4242
import java.time.ZoneOffset;
@@ -83,6 +83,11 @@ private YoutubeParsingHelper() {
8383
public static final String CPN = "cpn";
8484
public static final String VIDEO_ID = "videoId";
8585

86+
/**
87+
* Seed that will be used for video tests, in order to mock video requests.
88+
*/
89+
private static final long SEED_FOR_VIDEOS_TESTS = 3000;
90+
8691
private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00";
8792
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
8893

@@ -100,13 +105,17 @@ private YoutubeParsingHelper() {
100105
private static boolean keyAndVersionExtracted = false;
101106
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
102107
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
108+
103109
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
104110
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
105111
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
106112
"client.version=([0-9\\.]+)"};
107113
private static final String[] INNERTUBE_API_KEY_REGEXES =
108114
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
109115
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
116+
private static final String[] INITIAL_DATA_REGEXES =
117+
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
118+
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
110119
private static final String INNERTUBE_CLIENT_NAME_REGEX =
111120
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
112121

@@ -116,13 +125,24 @@ private YoutubeParsingHelper() {
116125
private static Random numberGenerator = new Random();
117126

118127
/**
119-
* <code>PENDING+</code> means that the user did not yet submit their choices.
128+
* {@code PENDING+} means that the user did not yet submit their choices.
129+
*
130+
* <p>
120131
* Therefore, YouTube & Google should not track the user, because they did not give consent.
132+
* </p>
133+
*
134+
* <p>
121135
* The three digits at the end can be random, but are required.
136+
* </p>
122137
*/
123138
private static final String CONSENT_COOKIE_VALUE = "PENDING+";
139+
124140
/**
125-
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com
141+
* YouTube {@code CONSENT} cookie.
142+
*
143+
* <p>
144+
* Should prevent redirect to {@code consent.youtube.com}.
145+
* </p>
126146
*/
127147
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
128148

@@ -439,17 +459,10 @@ public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
439459
}
440460
}
441461

442-
public static JsonObject getInitialData(final String html) throws ParsingException {
462+
private static JsonObject getInitialData(final String html) throws ParsingException {
443463
try {
444-
try {
445-
final String initialData = Parser.matchGroup1(
446-
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
447-
return JsonParser.object().from(initialData);
448-
} catch (final Parser.RegexException e) {
449-
final String initialData = Parser.matchGroup1(
450-
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
451-
return JsonParser.object().from(initialData);
452-
}
464+
return JsonParser.object().from(getStringResultFromRegexArray(html,
465+
INITIAL_DATA_REGEXES, 1));
453466
} catch (final JsonParserException | Parser.RegexException e) {
454467
throw new ParsingException("Could not get ytInitialData", e);
455468
}
@@ -572,7 +585,7 @@ private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
572585
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
573586
} catch (final Parser.RegexException e) {
574587
throw new ParsingException(
575-
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
588+
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page", e);
576589
}
577590
keyAndVersionExtracted = true;
578591
}
@@ -730,8 +743,7 @@ public static String[] getYoutubeMusicKey()
730743
final String response = getDownloader().get(url, headers).responseBody();
731744
musicClientVersion = getStringResultFromRegexArray(response,
732745
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
733-
musicKey = getStringResultFromRegexArray(response,
734-
INNERTUBE_API_KEY_REGEXES, 1);
746+
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
735747
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
736748
} catch (final Exception e) {
737749
final String url = "https://music.youtube.com/";
@@ -815,10 +827,11 @@ public static String getUrlFromNavigationEndpoint(@Nonnull final JsonObject navi
815827
}
816828

817829
/**
818-
* Get the text from a JSON object that has either a simpleText or a runs array.
830+
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
831+
* array.
819832
*
820833
* @param textObject JSON object to get the text from
821-
* @param html whether to return HTML, by parsing the navigationEndpoint
834+
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
822835
* @return text in the JSON object or {@code null}
823836
*/
824837
@Nullable
@@ -1495,15 +1508,7 @@ public static String unescapeDocument(@Nonnull final String doc) {
14951508
*/
14961509
@Nonnull
14971510
public static String generateContentPlaybackNonce() {
1498-
final SecureRandom random = new SecureRandom();
1499-
final StringBuilder stringBuilder = new StringBuilder();
1500-
1501-
for (int i = 0; i < 16; i++) {
1502-
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
1503-
(random.nextInt(128) + 1) & 63));
1504-
}
1505-
1506-
return stringBuilder.toString();
1511+
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 16);
15071512
}
15081513

15091514
/**
@@ -1519,14 +1524,23 @@ public static String generateContentPlaybackNonce() {
15191524
*/
15201525
@Nonnull
15211526
public static String generateTParameter() {
1522-
final SecureRandom random = new SecureRandom();
1523-
final StringBuilder stringBuilder = new StringBuilder();
1524-
1525-
for (int i = 0; i < 12; i++) {
1526-
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
1527-
(random.nextInt(128) + 1) & 63));
1528-
}
1527+
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 12);
1528+
}
15291529

1530-
return stringBuilder.toString();
1530+
/**
1531+
* Set the seed for video tests.
1532+
*
1533+
* <p>
1534+
* This seed will be used to generate the same {@code t} and {@code cpn} values between
1535+
* different execution of tests so mocks can be used for stream tests.
1536+
* </p>
1537+
*
1538+
* <p>
1539+
* This method will call {@link Utils#setSecureRandomSeed(long)} with the
1540+
* {@link #SEED_FOR_VIDEOS_TESTS value}.
1541+
* </p>
1542+
*/
1543+
public static void setSeedForVideoTests() {
1544+
Utils.setSecureRandomSeed(SEED_FOR_VIDEOS_TESTS);
15311545
}
15321546
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import java.util.ArrayList;
1616
import java.util.List;
1717
import java.util.Map;
18+
import java.security.SecureRandom;
1819
import java.util.regex.Pattern;
1920

2021
public final class Utils {
@@ -25,16 +26,23 @@ public final class Utils {
2526
public static final String EMPTY_STRING = "";
2627
private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\.");
2728
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
29+
private static final SecureRandom random = new SecureRandom();
2830

2931
private Utils() {
3032
// no instance
3133
}
3234

3335
/**
34-
* Remove all non-digit characters from a string.<p>
35-
* Examples:<p>
36-
* <ul><li>1 234 567 views -&gt; 1234567</li>
37-
* <li>$31,133.124 -&gt; 31133124</li></ul>
36+
* Remove all non-digit characters from a string.
37+
*
38+
* <p>
39+
* Examples:
40+
* </p>
41+
*
42+
* <ul>
43+
* <li>1 234 567 views -&gt; 1234567</li>
44+
* <li>$31,133.124 -&gt; 31133124</li>
45+
* </ul>
3846
*
3947
* @param toRemove string to remove non-digit chars
4048
* @return a string that contains only digits
@@ -45,8 +53,12 @@ public static String removeNonDigitCharacters(@Nonnull final String toRemove) {
4553
}
4654

4755
/**
48-
* <p>Convert a mixed number word to a long.</p>
49-
* <p>Examples:</p>
56+
* Convert a mixed number word to a long.
57+
*
58+
* <p>
59+
* Examples:
60+
* </p>
61+
*
5062
* <ul>
5163
* <li>123 -&gt; 123</li>
5264
* <li>1.23K -&gt; 1230</li>
@@ -106,11 +118,15 @@ public static String replaceHttpWithHttps(final String url) {
106118

107119
/**
108120
* Get the value of a URL-query by name.
109-
* If a url-query is give multiple times, only the value of the first query is returned
121+
*
122+
* <p>
123+
* If an url-query is give multiple times, only the value of the first query is returned.
124+
* </p>
110125
*
111126
* @param url the url to be used
112127
* @param parameterName the pattern that will be used to check the url
113-
* @return a string that contains the value of the query parameter or null if nothing was found
128+
* @return a string that contains the value of the query parameter or {@code null} if nothing
129+
* was found
114130
*/
115131
@Nullable
116132
public static String getQueryValue(@Nonnull final URL url,
@@ -144,11 +160,14 @@ public static String getQueryValue(@Nonnull final URL url,
144160
}
145161

146162
/**
147-
* converts a string to a URL-Object.
148-
* defaults to HTTP if no protocol is given
163+
* Convert a string to a {@link URL URL object}.
164+
*
165+
* <p>
166+
* Defaults to HTTP if no protocol is given.
167+
* </p>
149168
*
150169
* @param url the string to be converted to a URL-Object
151-
* @return a URL-Object containing the url
170+
* @return a {@link URL URL object} containing the url
152171
*/
153172
@Nonnull
154173
public static URL stringToURL(final String url) throws MalformedURLException {
@@ -187,6 +206,7 @@ public static String removeMAndWWWFromUrl(final String url) {
187206
return url;
188207
}
189208

209+
@Nonnull
190210
public static String removeUTF8BOM(@Nonnull final String s) {
191211
String result = s;
192212
if (result.startsWith("\uFEFF")) {
@@ -198,6 +218,7 @@ public static String removeUTF8BOM(@Nonnull final String s) {
198218
return result;
199219
}
200220

221+
@Nonnull
201222
public static String getBaseUrl(final String url) throws ParsingException {
202223
try {
203224
final URL uri = stringToURL(url);
@@ -244,6 +265,7 @@ public static boolean isNullOrEmpty(final String str) {
244265
* <p>
245266
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
246267
* </p>
268+
*
247269
* @param collection the collection on which check if it's null or empty
248270
* @return whether the collection is null or empty
249271
*/
@@ -257,6 +279,7 @@ public static boolean isNullOrEmpty(final Collection<?> collection) {
257279
* <p>
258280
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
259281
* </p>
282+
*
260283
* @param map the {@link Map map} on which check if it's null or empty
261284
* @return whether the {@link Map map} is null or empty
262285
*/
@@ -380,6 +403,7 @@ public static String getStringResultFromRegexArray(@Nonnull final String input,
380403
} catch (final Parser.RegexException ignored) {
381404
}
382405
}
406+
383407
if (result == null) {
384408
throw new Parser.RegexException("No regex matched the input on group " + group);
385409
}
@@ -413,9 +437,49 @@ public static String getStringResultFromRegexArray(@Nonnull final String input,
413437
} catch (final Parser.RegexException ignored) {
414438
}
415439
}
440+
416441
if (result == null) {
417442
throw new Parser.RegexException("No regex matched the input on group " + group);
418443
}
419444
return result;
420445
}
446+
447+
/**
448+
* Generate a random string using the secure random device {@link #random}.
449+
*
450+
* <p>
451+
* {@link #setSecureRandomSeed(long)} might be useful when mocking tests.
452+
* </p>
453+
*
454+
* @param alphabet the characters' alphabet to use
455+
* @param length the length of the returned string
456+
* @return a random string of the requested length made of only characters from the provided
457+
* alphabet
458+
*/
459+
@Nonnull
460+
public static String randomStringFromAlphabet(final String alphabet, final int length) {
461+
final StringBuilder stringBuilder = new StringBuilder();
462+
for (int i = 0; i < length; ++i) {
463+
stringBuilder.append(alphabet.charAt(random.nextInt(alphabet.length())));
464+
}
465+
return stringBuilder.toString();
466+
}
467+
468+
/**
469+
* Seed the secure random device used for {@link #randomStringFromAlphabet(String, int)}.
470+
*
471+
* <p>
472+
* Use this in tests so that they can be mocked as the same random numbers are always
473+
* generated.
474+
* </p>
475+
*
476+
* <p>
477+
* This is not intended to be used outside of tests.
478+
* </p>
479+
*
480+
* @param seed the seed to pass to {@link SecureRandom#setSeed(long)}
481+
*/
482+
public static void setSecureRandomSeed(final long seed) {
483+
random.setSeed(seed);
484+
}
421485
}

0 commit comments

Comments
 (0)