File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 4949public class YoutubeChannelExtractor extends ChannelExtractor {
5050 /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/" ;
5151 private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=" ;
52- private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000" ;
52+ private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en " ;
5353
5454 private Document doc ;
5555
@@ -135,10 +135,11 @@ public String getFeedUrl() throws ParsingException {
135135
136136 @ Override
137137 public long getSubscriberCount () throws ParsingException {
138- final Element el = doc .select ("span[class*=\" yt-subscription-button-subscriber-count\" ]" ).first ();
138+ final String el = doc .select ("span[class*=\" yt-subscription-button-subscriber-count\" ]" )
139+ .first ().attr ("title" );
139140 if (el != null ) {
140141 try {
141- return Long . parseLong ( Utils .removeNonDigitCharacters (el . text ()) );
142+ return Utils .mixedNumberWordToLong (el );
142143 } catch (NumberFormatException e ) {
143144 throw new ParsingException ("Could not get subscriber count" , e );
144145 }
Original file line number Diff line number Diff line change @@ -27,6 +27,35 @@ public static String removeNonDigitCharacters(String toRemove) {
2727 return toRemove .replaceAll ("\\ D+" , "" );
2828 }
2929
30+ /**
31+ * <p>Convert a mixed number word to a long.</p>
32+ * <p>Examples:</p>
33+ * <ul>
34+ * <li>123 -> 123</li>
35+ * <li>1.23K -> 1230</li>
36+ * <li>1.23M -> 1230000</li>
37+ * </ul>
38+ * @param numberWord string to be converted to a long
39+ * @return a long
40+ * @throws NumberFormatException
41+ * @throws ParsingException
42+ */
43+ public static long mixedNumberWordToLong (String numberWord ) throws NumberFormatException , ParsingException {
44+ String multiplier = "" ;
45+ try {
46+ multiplier = Parser .matchGroup ("[\\ d]+([\\ .,][\\ d]+)?([KMkm])+" , numberWord , 2 );
47+ } catch (ParsingException ignored ) {}
48+ double count = Double .parseDouble (Parser .matchGroup1 ("([\\ d]+([\\ .,][\\ d]+)?)" , numberWord ));
49+ switch (multiplier .toUpperCase ()) {
50+ case "K" :
51+ return (long ) (count * 1e3 );
52+ case "M" :
53+ return (long ) (count * 1e6 );
54+ default :
55+ return (long ) (count );
56+ }
57+ }
58+
3059 /**
3160 * Check if the url matches the pattern.
3261 *
Original file line number Diff line number Diff line change @@ -105,6 +105,7 @@ public void testFeedUrl() throws Exception {
105105 @ Test
106106 public void testSubscriberCount () throws Exception {
107107 assertTrue ("Wrong subscriber count" , extractor .getSubscriberCount () >= 0 );
108+ assertTrue ("Subscriber count too small" , extractor .getSubscriberCount () >= 4e6 );
108109 }
109110 }
110111
@@ -195,6 +196,7 @@ public void testFeedUrl() throws Exception {
195196 @ Test
196197 public void testSubscriberCount () throws Exception {
197198 assertTrue ("Wrong subscriber count" , extractor .getSubscriberCount () >= 0 );
199+ assertTrue ("Subscriber count too small" , extractor .getSubscriberCount () >= 10e6 );
198200 }
199201
200202 }
You can’t perform that action at this time.
0 commit comments