Skip to content

Commit a9ca5c4

Browse files
authored
Merge pull request #1056 from AudricV/yt-improve-search-suggestions-extraction
[YouTube] Switch to new search suggestion domain and improve error handling
2 parents 00408db + cf6040d commit a9ca5c4

3 files changed

Lines changed: 117 additions & 66 deletions

File tree

Lines changed: 61 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,46 @@
1+
/*
2+
* Created by Christian Schabesberger on 28.09.16.
3+
*
4+
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
5+
* YoutubeSuggestionExtractor.java is part of NewPipe Extractor.
6+
*
7+
* NewPipe Extractor is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* NewPipe Extractor is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
121
package org.schabi.newpipe.extractor.services.youtube.extractors;
222

3-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getCookieHeader;
23+
import static org.schabi.newpipe.extractor.utils.Utils.isBlank;
24+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
425

526
import com.grack.nanojson.JsonArray;
627
import com.grack.nanojson.JsonParser;
728
import com.grack.nanojson.JsonParserException;
829

930
import org.schabi.newpipe.extractor.NewPipe;
1031
import org.schabi.newpipe.extractor.StreamingService;
11-
import org.schabi.newpipe.extractor.downloader.Downloader;
32+
import org.schabi.newpipe.extractor.downloader.Response;
1233
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1334
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1435
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
1536
import org.schabi.newpipe.extractor.utils.Utils;
1637

1738
import java.io.IOException;
18-
import java.util.ArrayList;
39+
import java.util.Collections;
40+
import java.util.HashMap;
1941
import java.util.List;
20-
21-
/*
22-
* Created by Christian Schabesberger on 28.09.16.
23-
*
24-
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
25-
* YoutubeSuggestionExtractor.java is part of NewPipe.
26-
*
27-
* NewPipe is free software: you can redistribute it and/or modify
28-
* it under the terms of the GNU General Public License as published by
29-
* the Free Software Foundation, either version 3 of the License, or
30-
* (at your option) any later version.
31-
*
32-
* NewPipe is distributed in the hope that it will be useful,
33-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
34-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35-
* GNU General Public License for more details.
36-
*
37-
* You should have received a copy of the GNU General Public License
38-
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
39-
*/
42+
import java.util.Map;
43+
import java.util.stream.Collectors;
4044

4145
public class YoutubeSuggestionExtractor extends SuggestionExtractor {
4246

@@ -46,35 +50,45 @@ public YoutubeSuggestionExtractor(final StreamingService service) {
4650

4751
@Override
4852
public List<String> suggestionList(final String query) throws IOException, ExtractionException {
49-
final Downloader dl = NewPipe.getDownloader();
50-
final List<String> suggestions = new ArrayList<>();
51-
52-
final String url = "https://suggestqueries.google.com/complete/search"
53-
+ "?client=" + "youtube" //"firefox" for JSON, 'toolbar' for xml
54-
+ "&jsonp=" + "JP"
53+
final String url = "https://suggestqueries-clients6.youtube.com/complete/search"
54+
+ "?client=" + "youtube"
5555
+ "&ds=" + "yt"
5656
+ "&gl=" + Utils.encodeUrlUtf8(getExtractorContentCountry().getCountryCode())
57-
+ "&q=" + Utils.encodeUrlUtf8(query);
57+
+ "&q=" + Utils.encodeUrlUtf8(query)
58+
+ "&xhr=t";
5859

59-
String response = dl.get(url, getCookieHeader(), getExtractorLocalization()).responseBody();
60-
// trim JSONP part "JP(...)"
61-
response = response.substring(3, response.length() - 1);
62-
try {
63-
final JsonArray collection = JsonParser.array().from(response).getArray(1);
64-
for (final Object suggestion : collection) {
65-
if (!(suggestion instanceof JsonArray)) {
66-
continue;
67-
}
68-
final String suggestionStr = ((JsonArray) suggestion).getString(0);
69-
if (suggestionStr == null) {
70-
continue;
71-
}
72-
suggestions.add(suggestionStr);
73-
}
60+
final Map<String, List<String>> headers = new HashMap<>();
61+
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
62+
headers.put("Referer", Collections.singletonList("https://www.youtube.com"));
63+
64+
final Response response = NewPipe.getDownloader()
65+
.get(url, headers, getExtractorLocalization());
66+
67+
final String contentTypeHeader = response.getHeader("Content-Type");
68+
if (isNullOrEmpty(contentTypeHeader) || !contentTypeHeader.contains("application/json")) {
69+
throw new ExtractionException("Invalid response type (got \"" + contentTypeHeader
70+
+ "\", excepted a JSON response) (response code "
71+
+ response.responseCode() + ")");
72+
}
73+
74+
final String responseBody = response.responseBody();
7475

75-
return suggestions;
76+
if (responseBody.isEmpty()) {
77+
throw new ExtractionException("Empty response received");
78+
}
79+
80+
try {
81+
final JsonArray suggestions = JsonParser.array()
82+
.from(responseBody)
83+
.getArray(1); // 0: search query, 1: search suggestions, 2: tracking data?
84+
return suggestions.stream()
85+
.filter(JsonArray.class::isInstance)
86+
.map(JsonArray.class::cast)
87+
.map(suggestion -> suggestion.getString(0)) // 0 is the search suggestion
88+
.filter(suggestion -> !isBlank(suggestion)) // Filter blank suggestions
89+
.collect(Collectors.toUnmodifiableList());
7690
} catch (final JsonParserException e) {
77-
throw new ParsingException("Could not parse json response", e);
91+
throw new ParsingException("Could not parse JSON response", e);
7892
}
7993
}
8094
}
Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
package org.schabi.newpipe.extractor.services.youtube;
2-
31
/*
42
* Created by Christian Schabesberger on 18.11.16.
53
*
64
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
7-
* YoutubeSuggestionExtractorTest.java is part of NewPipe.
5+
* YoutubeSuggestionExtractorTest.java is part of NewPipe Extractor.
86
*
9-
* NewPipe is free software: you can redistribute it and/or modify
7+
* NewPipe Extractor is free software: you can redistribute it and/or modify
108
* it under the terms of the GNU General Public License as published by
119
* the Free Software Foundation, either version 3 of the License, or
1210
* (at your option) any later version.
1311
*
14-
* NewPipe is distributed in the hope that it will be useful,
12+
* NewPipe Extractor is distributed in the hope that it will be useful,
1513
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1614
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1715
* GNU General Public License for more details.
1816
*
1917
* You should have received a copy of the GNU General Public License
20-
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
18+
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
2119
*/
2220

21+
package org.schabi.newpipe.extractor.services.youtube;
22+
2323
import static org.junit.jupiter.api.Assertions.assertFalse;
2424
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
2525

@@ -29,14 +29,15 @@
2929
import org.schabi.newpipe.extractor.NewPipe;
3030
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
3131
import org.schabi.newpipe.extractor.localization.Localization;
32+
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor;
3233
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
3334

3435
import java.io.IOException;
3536

3637
/**
37-
* Test for {@link SuggestionExtractor}
38+
* Test for {@link YoutubeSuggestionExtractor}
3839
*/
39-
public class YoutubeSuggestionExtractorTest {
40+
class YoutubeSuggestionExtractorTest {
4041

4142
private static final String RESOURCE_PATH = DownloaderFactory.RESOURCE_PATH + "services/youtube/extractor/suggestions/";
4243

@@ -45,12 +46,12 @@ public class YoutubeSuggestionExtractorTest {
4546
@BeforeAll
4647
public static void setUp() throws Exception {
4748
YoutubeTestsUtils.ensureStateless();
48-
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + ""), new Localization("de", "DE"));
49+
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH), new Localization("de", "DE"));
4950
suggestionExtractor = YouTube.getSuggestionExtractor();
5051
}
5152

5253
@Test
53-
public void testIfSuggestions() throws IOException, ExtractionException {
54+
void testIfSuggestions() throws IOException, ExtractionException {
5455
assertFalse(suggestionExtractor.suggestionList("hello").isEmpty());
5556
}
5657
}
Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
{
22
"request": {
33
"httpMethod": "GET",
4-
"url": "https://suggestqueries.google.com/complete/search?client\u003dyoutube\u0026jsonp\u003dJP\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello",
4+
"url": "https://suggestqueries-clients6.youtube.com/complete/search?client\u003dyoutube\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello\u0026xhr\u003dt",
55
"headers": {
6-
"Cookie": [
7-
"CONSENT\u003dPENDING+385"
6+
"Origin": [
7+
"https://www.youtube.com"
8+
],
9+
"Referer": [
10+
"https://www.youtube.com"
811
],
912
"Accept-Language": [
1013
"en-GB, en;q\u003d0.9"
@@ -19,35 +22,68 @@
1922
"responseCode": 200,
2023
"responseMessage": "",
2124
"responseHeaders": {
25+
"access-control-allow-credentials": [
26+
"true"
27+
],
28+
"access-control-allow-headers": [
29+
"Authorization, X-Goog-Visitor-Id, X-Goog-PageId"
30+
],
31+
"access-control-allow-origin": [
32+
"https://www.youtube.com"
33+
],
34+
"access-control-max-age": [
35+
"86400"
36+
],
2237
"alt-svc": [
23-
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
38+
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000"
2439
],
2540
"cache-control": [
2641
"private, max-age\u003d3600"
2742
],
2843
"content-disposition": [
2944
"attachment; filename\u003d\"f.txt\""
3045
],
46+
"content-security-policy": [
47+
"object-src \u0027none\u0027;base-uri \u0027self\u0027;script-src \u0027nonce-BWS-e4XX2c-fmiIRfypT7Q\u0027 \u0027strict-dynamic\u0027 \u0027report-sample\u0027 \u0027unsafe-eval\u0027 \u0027unsafe-inline\u0027 https: http:;report-uri https://csp.withgoogle.com/csp/gws/other"
48+
],
3149
"content-type": [
32-
"text/javascript; charset\u003dUTF-8"
50+
"application/json; charset\u003dUTF-8"
51+
],
52+
"cross-origin-opener-policy": [
53+
"same-origin-allow-popups; report-to\u003d\"gws\""
3354
],
3455
"date": [
35-
"Tue, 22 Nov 2022 10:40:53 GMT"
56+
"Sun, 30 Apr 2023 17:40:12 GMT"
3657
],
3758
"expires": [
38-
"Tue, 22 Nov 2022 10:40:53 GMT"
59+
"Sun, 30 Apr 2023 17:40:12 GMT"
60+
],
61+
"p3p": [
62+
"CP\u003d\"This is not a P3P policy! See g.co/p3phelp for more info.\""
63+
],
64+
"report-to": [
65+
"{\"group\":\"gws\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/gws/other\"}]}"
3966
],
4067
"server": [
4168
"gws"
4269
],
70+
"set-cookie": [
71+
"CONSENT\u003dPENDING+465; expires\u003dTue, 29-Apr-2025 17:40:12 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
72+
],
73+
"vary": [
74+
"Sec-Fetch-Site"
75+
],
76+
"x-content-type-options": [
77+
"nosniff"
78+
],
4379
"x-frame-options": [
4480
"SAMEORIGIN"
4581
],
4682
"x-xss-protection": [
4783
"0"
4884
]
4985
},
50-
"responseBody": "JP([\"hello\",[[\"hello\",0,[512,433]],[\"hello adele\",0,[512,433,131]],[\"hello kitty\",0,[512,433]],[\"hello neighbor\",0,[512,433,131]],[\"hello hello\",0,[512,433]],[\"hello darkness my old friend\",0,[512,433,131]],[\"hello song\",0,[512,433,131]],[\"hello treasure\",0,[512,433]],[\"hello lionel richie\",0,[512,433]],[\"hello darkness my old friend lyrics\",0,[433,131]],[\"hello karaoke\",0,[433,131]],[\"hello kitty song\",0,[512,433,131]],[\"hello there\",0,[512]],[\"hello everybody my name is markiplier\",0,[433,131]]],{\"k\":1,\"q\":\"hL1qy05h57rukpOvO6x7ykUZN_0\"}])",
51-
"latestUrl": "https://suggestqueries.google.com/complete/search?client\u003dyoutube\u0026jsonp\u003dJP\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello"
86+
"responseBody": "[\"hello\",[[\"hello kitty\",0,[512,433,131]],[\"hello\",0,[512,433]],[\"hello neighbor\",0,[512,433,131]],[\"hello adele\",0,[512,433]],[\"hello kitty and friends\",0,[512,433,131]],[\"hello movie\",0,[650,433,131]],[\"hello darkness my old friend\",0,[512,433]],[\"hello hello\",0,[512,433]],[\"hello brother songs\",0,[650,433,131]],[\"hello neighbor 2\",0,[512,433]],[\"hello mama\",0,[650,433,131]],[\"hello lionel richie\",0,[512,433,131]],[\"hello neighbor song\",0,[512,433,131]],[\"hello blueface\",0,[3]]],{\"k\":1,\"q\":\"NYmTwY0mojkxWaSAZEeA30TjXp4\"}]",
87+
"latestUrl": "https://suggestqueries-clients6.youtube.com/complete/search?client\u003dyoutube\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello\u0026xhr\u003dt"
5288
}
5389
}

0 commit comments

Comments
 (0)