Skip to content

Commit 430da57

Browse files
authored
Merge pull request #184 from Stypox/duplicate-subscription-fix
Fix inconsistency in youtube channel urls
2 parents f9da8bc + 58e3996 commit 430da57

10 files changed

Lines changed: 68 additions & 25 deletions

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
@SuppressWarnings("WeakerAccess")
4949
public class YoutubeChannelExtractor extends ChannelExtractor {
50+
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
5051
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
5152
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
5253

@@ -72,7 +73,7 @@ public String getNextPageUrl() throws ExtractionException {
7273
@Override
7374
public String getUrl() throws ParsingException {
7475
try {
75-
return "https://www.youtube.com/channel/" + getId();
76+
return CHANNEL_URL_BASE + getId();
7677
} catch (ParsingException e) {
7778
return super.getUrl();
7879
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import org.schabi.newpipe.extractor.exceptions.ParsingException;
66
import org.schabi.newpipe.extractor.utils.Utils;
77

8+
import java.util.regex.Matcher;
9+
import java.util.regex.Pattern;
10+
811
/*
912
* Created by Christian Schabesberger on 12.02.17.
1013
*
@@ -53,8 +56,20 @@ public String getName() throws ParsingException {
5356

5457
@Override
5558
public String getUrl() throws ParsingException {
56-
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
57-
.attr("abs:href");
59+
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
60+
.attr("abs:data-href");
61+
62+
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
63+
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
64+
65+
if (match.matches()) {
66+
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
67+
} else {
68+
// fallback method just in case youtube changes things; it should never run and tests will fail
69+
// provides an url with "/user/NAME", that is inconsistent with stream and channel extractor
70+
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
71+
.attr("abs:href");
72+
}
5873
}
5974

6075
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public String getName() throws ParsingException {
5050
try {
5151
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
5252
} catch (Exception e) {
53-
throw new ParsingException("Could not get playlist name");
53+
throw new ParsingException("Could not get playlist name", e);
5454
}
5555
}
5656

@@ -59,7 +59,7 @@ public String getThumbnailUrl() throws ParsingException {
5959
try {
6060
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
6161
} catch (Exception e) {
62-
throw new ParsingException("Could not get playlist thumbnail");
62+
throw new ParsingException("Could not get playlist thumbnail", e);
6363
}
6464
}
6565

@@ -72,9 +72,11 @@ public String getBannerUrl() {
7272
@Override
7373
public String getUploaderUrl() throws ParsingException {
7474
try {
75-
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
75+
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
76+
doc.select("button[class*=\"yt-uix-subscription-button\"]")
77+
.first().attr("data-channel-external-id");
7678
} catch (Exception e) {
77-
throw new ParsingException("Could not get playlist uploader name");
79+
throw new ParsingException("Could not get playlist uploader url", e);
7880
}
7981
}
8082

@@ -83,7 +85,7 @@ public String getUploaderName() throws ParsingException {
8385
try {
8486
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
8587
} catch (Exception e) {
86-
throw new ParsingException("Could not get playlist uploader name");
88+
throw new ParsingException("Could not get playlist uploader name", e);
8789
}
8890
}
8991

@@ -92,7 +94,7 @@ public String getUploaderAvatarUrl() throws ParsingException {
9294
try {
9395
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
9496
} catch (Exception e) {
95-
throw new ParsingException("Could not get playlist uploader avatar");
97+
throw new ParsingException("Could not get playlist uploader avatar", e);
9698
}
9799
}
98100

@@ -248,6 +250,8 @@ public String getUploaderName() throws ParsingException {
248250

249251
@Override
250252
public String getUploaderUrl() throws ParsingException {
253+
// this url is not always in the form "/channel/..."
254+
// sometimes Youtube provides urls in the from "/user/..."
251255
return getUploaderLink().attr("abs:href");
252256
}
253257

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ public String getUploaderName() throws ParsingException {
107107

108108
@Override
109109
public String getUploaderUrl() throws ParsingException {
110+
// this url is not always in the form "/channel/..."
111+
// sometimes Youtube provides urls in the from "/user/..."
110112
try {
111113
try {
112114
return item.select("div[class=\"yt-lockup-byline\"]").first()
@@ -119,7 +121,7 @@ public String getUploaderUrl() throws ParsingException {
119121
.text().split(" - ")[0];
120122
} catch (Exception e) {
121123
System.out.println(item.html());
122-
throw new ParsingException("Could not get uploader", e);
124+
throw new ParsingException("Could not get uploader url", e);
123125
}
124126
}
125127

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ public String getUploaderUrl() throws ParsingException {
126126
}
127127

128128
private Element getUploaderLink() {
129+
// this url is not always in the form "/channel/..."
130+
// sometimes Youtube provides urls in the from "/user/..."
129131
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
130132
return uploaderEl.select("a").first();
131133
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudPlaylistExtractorTest.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.schabi.newpipe.extractor.services.soundcloud;
22

3+
import org.hamcrest.CoreMatchers;
34
import org.junit.BeforeClass;
45
import org.junit.Ignore;
56
import org.junit.Test;
@@ -119,14 +120,14 @@ public void testStreamCount() {
119120
}
120121
}
121122

122-
public static class RandomHouseDanceMusic implements BasePlaylistExtractorTest {
123+
public static class RandomHouseMusic implements BasePlaylistExtractorTest {
123124
private static SoundcloudPlaylistExtractor extractor;
124125

125126
@BeforeClass
126127
public static void setUp() throws Exception {
127128
NewPipe.init(Downloader.getInstance(), new Localization("GB", "en"));
128129
extractor = (SoundcloudPlaylistExtractor) SoundCloud
129-
.getPlaylistExtractor("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2");
130+
.getPlaylistExtractor("https://soundcloud.com/micky96/sets/house");
130131
extractor.fetchPage();
131132
}
132133

@@ -141,22 +142,22 @@ public void testServiceId() {
141142

142143
@Test
143144
public void testName() {
144-
assertEquals("House, Electro , Dance Music 2", extractor.getName());
145+
assertEquals("House", extractor.getName());
145146
}
146147

147148
@Test
148149
public void testId() {
149-
assertEquals("310980722", extractor.getId());
150+
assertEquals("123062856", extractor.getId());
150151
}
151152

152153
@Test
153154
public void testUrl() throws Exception {
154-
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getUrl());
155+
assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getUrl());
155156
}
156157

157158
@Test
158159
public void testOriginalUrl() throws Exception {
159-
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getOriginalUrl());
160+
assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getOriginalUrl());
160161
}
161162

162163
/*//////////////////////////////////////////////////////////////////////////
@@ -182,7 +183,7 @@ public void testThumbnailUrl() {
182183
assertIsSecureUrl(extractor.getThumbnailUrl());
183184
}
184185

185-
@Ignore
186+
@Ignore("not implemented")
186187
@Test
187188
public void testBannerUrl() {
188189
assertIsSecureUrl(extractor.getBannerUrl());
@@ -192,12 +193,12 @@ public void testBannerUrl() {
192193
public void testUploaderUrl() {
193194
final String uploaderUrl = extractor.getUploaderUrl();
194195
assertIsSecureUrl(uploaderUrl);
195-
assertTrue(uploaderUrl, uploaderUrl.contains("hunter-leader"));
196+
assertThat(uploaderUrl, CoreMatchers.containsString("micky96"));
196197
}
197198

198199
@Test
199200
public void testUploaderName() {
200-
assertEquals("Gosu", extractor.getUploaderName());
201+
assertEquals("_mickyyy", extractor.getUploaderName());
201202
}
202203

203204
@Test
@@ -266,6 +267,7 @@ public void testOriginalUrl() throws Exception {
266267
// ListExtractor
267268
//////////////////////////////////////////////////////////////////////////*/
268269

270+
@Ignore
269271
@Test
270272
public void testRelatedItems() throws Exception {
271273
defaultTestRelatedItems(extractor, SoundCloud.getServiceId());
@@ -287,6 +289,7 @@ public void testMoreRelatedItems() throws Exception {
287289
// PlaylistExtractor
288290
//////////////////////////////////////////////////////////////////////////*/
289291

292+
@Ignore
290293
@Test
291294
public void testThumbnailUrl() {
292295
assertIsSecureUrl(extractor.getThumbnailUrl());

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public void testBannerUrl() throws Exception {
100100

101101
@Test
102102
public void testUploaderUrl() throws Exception {
103-
assertTrue(extractor.getUploaderUrl().contains("youtube.com"));
103+
assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl());
104104
}
105105

106106
@Test
@@ -185,8 +185,8 @@ public void testRelatedItems() throws Exception {
185185
public void testMoreRelatedItems() throws Exception {
186186
ListExtractor.InfoItemsPage<StreamInfoItem> currentPage
187187
= defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId());
188-
// Test for 2 more levels
189188

189+
// test for 2 more levels
190190
for (int i = 0; i < 2; i++) {
191191
currentPage = extractor.getPage(currentPage.getNextPageUrl());
192192
defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors());
@@ -214,7 +214,7 @@ public void testBannerUrl() throws Exception {
214214

215215
@Test
216216
public void testUploaderUrl() throws Exception {
217-
assertTrue(extractor.getUploaderUrl().contains("youtube.com"));
217+
assertEquals("https://www.youtube.com/channel/UCHSPWoY1J5fbDVbcnyeqwdw", extractor.getUploaderUrl());
218218
}
219219

220220
@Test

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public void testGetDescription() throws ParsingException {
8181
}
8282

8383
@Test
84-
public void testGetFullLinksInDescriptlion() throws ParsingException {
84+
public void testGetFullLinksInDescription() throws ParsingException {
8585
assertTrue(extractor.getDescription().contains("http://adele.com"));
8686
assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi..."));
8787
}
@@ -111,7 +111,7 @@ public void testGetUploadDate() throws ParsingException {
111111

112112
@Test
113113
public void testGetUploaderUrl() throws ParsingException {
114-
assertTrue(extractor.getUploaderUrl().length() > 0);
114+
assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl());
115115
}
116116

117117
@Test

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.schabi.newpipe.extractor.services.youtube.search;
22

3+
import org.hamcrest.CoreMatchers;
34
import org.junit.BeforeClass;
45
import org.junit.Ignore;
56
import org.junit.Test;
@@ -63,4 +64,19 @@ public void testOnlyContainChannels() {
6364
}
6465
}
6566
}
67+
68+
@Test
69+
public void testChannelUrl() {
70+
for(InfoItem item : itemsPage.getItems()) {
71+
if (item instanceof ChannelInfoItem) {
72+
ChannelInfoItem channel = (ChannelInfoItem) item;
73+
74+
if (channel.getSubscriberCount() > 5e7) { // the real PewDiePie
75+
assertEquals("https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", item.getUrl());
76+
} else {
77+
assertThat(item.getUrl(), CoreMatchers.startsWith("https://www.youtube.com/channel/"));
78+
}
79+
}
80+
}
81+
}
6682
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorDefaultTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public void testResultList_FirstElement() {
7373
assertTrue((firstInfoItem instanceof ChannelInfoItem)
7474
|| (secondInfoItem instanceof ChannelInfoItem));
7575
assertEquals("name", "PewDiePie", channelItem.getName());
76-
assertEquals("url","https://www.youtube.com/user/PewDiePie", channelItem.getUrl());
76+
assertEquals("url", "https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", channelItem.getUrl());
7777
}
7878

7979
@Test

0 commit comments

Comments
 (0)