1010import org .schabi .newpipe .extractor .subscription .SubscriptionExtractor ;
1111import org .schabi .newpipe .extractor .subscription .SubscriptionItem ;
1212
13+ import java .io .BufferedReader ;
14+ import java .io .IOException ;
1315import java .io .InputStream ;
16+ import java .io .InputStreamReader ;
1417import java .util .ArrayList ;
1518import java .util .Collections ;
1619import java .util .List ;
20+ import java .util .zip .ZipEntry ;
21+ import java .util .zip .ZipInputStream ;
1722
1823import javax .annotation .Nonnull ;
1924
2025import static org .schabi .newpipe .extractor .subscription .SubscriptionExtractor .ContentSource .INPUT_STREAM ;
2126
2227/**
23- * Extract subscriptions from a Google takeout export (the user has to get the JSON out of the zip)
28+ * Extract subscriptions from a Google takeout export
2429 */
2530public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
2631 private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/" ;
@@ -37,6 +42,30 @@ public String getRelatedUrl() {
3742 @ Override
3843 public List <SubscriptionItem > fromInputStream (@ Nonnull final InputStream contentInputStream )
3944 throws ExtractionException {
45+ return fromJsonInputStream (contentInputStream );
46+ }
47+
48+ @ Override
49+ public List <SubscriptionItem > fromInputStream (@ Nonnull final InputStream contentInputStream , String contentType )
50+ throws ExtractionException {
51+ switch (contentType ) {
52+ case "json" :
53+ case "application/json" :
54+ return fromJsonInputStream (contentInputStream );
55+ case "csv" :
56+ case "text/csv" :
57+ case "text/comma-separated-values" :
58+ return fromCsvInputStream (contentInputStream );
59+ case "zip" :
60+ case "application/zip" :
61+ return fromZipInputStream (contentInputStream );
62+ default :
63+ throw new InvalidSourceException ("Unsupported content type: " + contentType );
64+ }
65+ }
66+
67+ public List <SubscriptionItem > fromJsonInputStream (@ Nonnull final InputStream contentInputStream )
68+ throws ExtractionException {
4069 final JsonArray subscriptions ;
4170 try {
4271 subscriptions = JsonParser .array ().from (contentInputStream );
@@ -68,4 +97,109 @@ public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream content
6897 }
6998 return subscriptionItems ;
7099 }
100+
101+ public List <SubscriptionItem > fromZipInputStream (@ Nonnull final InputStream contentInputStream )
102+ throws ExtractionException {
103+ final ZipInputStream zipInputStream = new ZipInputStream (contentInputStream );
104+
105+ try {
106+ ZipEntry zipEntry ;
107+ while ((zipEntry = zipInputStream .getNextEntry ()) != null ) {
108+ if (zipEntry .getName ().toLowerCase ().endsWith (".csv" )) {
109+ try {
110+ final List <SubscriptionItem > csvItems = fromCsvInputStream (zipInputStream );
111+
112+ // Return it only if it has items (it exits early if it's the wrong file format)
113+ // Otherwise try the next file
114+ if (csvItems .size () > 0 ) {
115+ return csvItems ;
116+ }
117+ } catch (ExtractionException e ) {
118+ // Ignore error and go to next file
119+ // (maybe log it?)
120+ }
121+ }
122+ }
123+ } catch (IOException e ) {
124+ throw new InvalidSourceException ("Error reading contents of zip file" , e );
125+ }
126+
127+ throw new InvalidSourceException ("Unable to find a valid subscriptions.csv file (try extracting and selecting the csv file)" );
128+ }
129+
130+ public List <SubscriptionItem > fromCsvInputStream (@ Nonnull final InputStream contentInputStream )
131+ throws ExtractionException {
132+ // Expected format of CSV file:
133+ // Channel Id,Channel Url,Channel Title
134+ // UC1JTQBa5QxZCpXrFSkMxmPw,http://www.youtube.com/channel/UC1JTQBa5QxZCpXrFSkMxmPw,Raycevick
135+ // UCFl7yKfcRcFmIUbKeCA-SJQ,http://www.youtube.com/channel/UCFl7yKfcRcFmIUbKeCA-SJQ,Joji
136+ //
137+ // Notes:
138+ // It's always 3 columns
139+ // The first line is always a header
140+ // Header names are different based on the locale
141+ // Fortunately the data is always the same order no matter what locale
142+
143+ int currentLine = 0 ;
144+ String line = "" ;
145+
146+ try (BufferedReader br = new BufferedReader (new InputStreamReader (contentInputStream ))) {
147+ final List <SubscriptionItem > subscriptionItems = new ArrayList <>();
148+
149+ // Ignore header
150+ currentLine = 1 ;
151+ line = br .readLine ();
152+
153+ while ((line = br .readLine ()) != null ) {
154+ currentLine ++;
155+
156+ // Exit early if we've read the first few lines and we haven't added any items
157+ // It's likely we're in the wrong file
158+ if (currentLine > 5 && subscriptionItems .size () == 0 ) {
159+ break ;
160+ }
161+
162+ // First comma
163+ int i1 = line .indexOf ("," );
164+ if (i1 == -1 ) {
165+ continue ;
166+ }
167+
168+ // Second comma
169+ int i2 = line .indexOf ("," , i1 + 1 );
170+ if (i2 == -1 ) {
171+ continue ;
172+ }
173+
174+ // Third comma or line length
175+ int i3 = line .indexOf ("," , i2 + 1 );
176+ if (i3 == -1 ) {
177+ i3 = line .length ();
178+ }
179+
180+ // Channel URL from second entry
181+ final String channelUrl = line
182+ .substring (i1 + 1 , i2 )
183+ .replace ("http://" , "https://" );
184+ if (!channelUrl .startsWith (BASE_CHANNEL_URL )) {
185+ continue ;
186+ }
187+
188+ // Channel title from third entry
189+ final String channelTitle = line .substring (i2 + 1 , i3 );
190+
191+ final SubscriptionItem newItem = new SubscriptionItem (service .getServiceId (), channelUrl , channelTitle );
192+ subscriptionItems .add (newItem );
193+ }
194+
195+ return subscriptionItems ;
196+ } catch (IOException e ) {
197+ if (line == null ) {
198+ line = "<null>" ;
199+ } else if (line .length () > 10 ) {
200+ line = line .substring (0 , 10 ) + "..." ;
201+ }
202+ throw new InvalidSourceException ("Error reading CSV file, line = '" + line + "', line number = " + currentLine );
203+ }
204+ }
71205}
0 commit comments