44import com .grack .nanojson .JsonObject ;
55import com .grack .nanojson .JsonParser ;
66
7- import org .jsoup .Jsoup ;
87import org .jsoup .nodes .Document ;
98import org .jsoup .nodes .Element ;
109import org .mozilla .javascript .Context ;
3938
4039import java .io .IOException ;
4140import java .io .UnsupportedEncodingException ;
42- import java .net .MalformedURLException ;
43- import java .net .URL ;
4441import java .util .ArrayList ;
4542import java .util .Collections ;
4643import java .util .HashMap ;
4744import java .util .LinkedHashMap ;
4845import java .util .List ;
4946import java .util .Locale ;
5047import java .util .Map ;
51- import java .util .regex .Matcher ;
52- import java .util .regex .Pattern ;
5348
5449import javax .annotation .Nonnull ;
5550import javax .annotation .Nullable ;
7570 */
7671
7772public class YoutubeStreamExtractor extends StreamExtractor {
78- private static final String TAG = YoutubeStreamExtractor .class .getSimpleName ();
79-
8073 /*//////////////////////////////////////////////////////////////////////////
8174 // Exceptions
8275 //////////////////////////////////////////////////////////////////////////*/
@@ -87,12 +80,6 @@ public class DecryptException extends ParsingException {
8780 }
8881 }
8982
90- public class SubtitlesException extends ContentNotAvailableException {
91- SubtitlesException (String message , Throwable cause ) {
92- super (message , cause );
93- }
94- }
95-
9683 /*//////////////////////////////////////////////////////////////////////////*/
9784
9885 private Document doc ;
@@ -120,22 +107,17 @@ public YoutubeStreamExtractor(StreamingService service, LinkHandler linkHandler)
120107 @ Override
121108 public String getName () throws ParsingException {
122109 assertPageFetched ();
110+ String title = null ;
123111 try {
124- return playerResponse .getObject ("videoDetails" ).getString ("title" );
125-
126- } catch (Exception e ) {
127- // fallback HTML method
128- String name = null ;
112+ title = getVideoPrimaryInfoRenderer ().getObject ("title" ).getArray ("runs" ).getObject (0 ).getString ("text" );
113+ } catch (Exception ignored ) {}
114+ if (title == null ) {
129115 try {
130- name = doc .select ("meta[name=title]" ).attr (CONTENT );
131- } catch (Exception ignored ) {
132- }
133-
134- if (name == null ) {
135- throw new ParsingException ("Could not get name" , e );
136- }
137- return name ;
116+ title = playerResponse .getObject ("videoDetails" ).getString ("title" );
117+ } catch (Exception ignored ) {}
138118 }
119+ if (title != null ) return title ;
120+ throw new ParsingException ("Could not get name" );
139121 }
140122
141123 @ Override
@@ -144,19 +126,12 @@ public String getTextualUploadDate() throws ParsingException {
144126 return null ;
145127 }
146128
129+ // TODO: try videoPrimaryInfoRenderer.dateText.simpleText
130+
147131 try {
148132 return playerResponse .getObject ("microformat" ).getObject ("playerMicroformatRenderer" ).getString ("publishDate" );
149133 } catch (Exception e ) {
150- String uploadDate = null ;
151- try {
152- uploadDate = doc .select ("meta[itemprop=datePublished]" ).attr (CONTENT );
153- } catch (Exception ignored ) {
154- }
155-
156- if (uploadDate == null ) {
157- throw new ParsingException ("Could not get upload date" , e );
158- }
159- return uploadDate ;
134+ throw new ParsingException ("Could not get upload date" );
160135 }
161136 }
162137
@@ -181,15 +156,7 @@ public String getThumbnailUrl() throws ParsingException {
181156 return thumbnails .getObject (thumbnails .size () - 1 ).getString ("url" );
182157
183158 } catch (Exception e ) {
184- String url = null ;
185- try {
186- url = doc .select ("link[itemprop=\" thumbnailUrl\" ]" ).first ().attr ("abs:href" );
187- } catch (Exception ignored ) {}
188-
189- if (url == null ) {
190- throw new ParsingException ("Could not get thumbnail url" , e );
191- }
192- return url ;
159+ throw new ParsingException ("Could not get thumbnail url" );
193160 }
194161
195162 }
@@ -198,93 +165,19 @@ public String getThumbnailUrl() throws ParsingException {
198165 @ Override
199166 public Description getDescription () throws ParsingException {
200167 assertPageFetched ();
168+ // TODO: Parse videoSecondaryInfoRenderer.description
201169 try {
202- // first try to get html-formatted description
203- return new Description (parseHtmlAndGetFullLinks (doc .select ("p[id=\" eow-description\" ]" ).first ().html ()), Description .HTML );
204- } catch (Exception e ) {
205- try {
206- // fallback to raw non-html description
207- return new Description (playerResponse .getObject ("videoDetails" ).getString ("shortDescription" ), Description .PLAIN_TEXT );
208- } catch (Exception ignored ) {
209- throw new ParsingException ("Could not get the description" , e );
210- }
211- }
212- }
213-
214- // onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
215- // :00 is NOT recognized as a timestamp in description or comments.
216- // 0:00 is recognized in both description and comments.
217- // https://www.youtube.com/watch?v=4cccfDXu1vA
218- private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern .compile (
219- "seekTo\\ ("
220- + "(?:(\\ d+)\\ *3600\\ +)?" // hours?
221- + "(\\ d+)\\ *60\\ +" // minutes
222- + "(\\ d+)" // seconds
223- + "\\ )" );
224-
225- @ SafeVarargs
226- private static <T > T coalesce (T ... args ) {
227- for (T arg : args ) {
228- if (arg != null ) return arg ;
170+ // raw non-html description
171+ return new Description (playerResponse .getObject ("videoDetails" ).getString ("shortDescription" ), Description .PLAIN_TEXT );
172+ } catch (Exception ignored ) {
173+ throw new ParsingException ("Could not get the description" );
229174 }
230- throw new IllegalArgumentException ("all arguments to coalesce() were null" );
231- }
232-
233- private String parseHtmlAndGetFullLinks (String descriptionHtml )
234- throws MalformedURLException , UnsupportedEncodingException , ParsingException {
235- final Document description = Jsoup .parse (descriptionHtml , getUrl ());
236- for (Element a : description .select ("a" )) {
237- final String rawUrl = a .attr ("abs:href" );
238- final URL redirectLink = new URL (rawUrl );
239-
240- final Matcher onClickTimestamp ;
241- final String queryString ;
242- if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX .matcher (a .attr ("onclick" )))
243- .find ()) {
244- a .removeAttr ("onclick" );
245-
246- String hours = coalesce (onClickTimestamp .group (1 ), "0" );
247- String minutes = onClickTimestamp .group (2 );
248- String seconds = onClickTimestamp .group (3 );
249-
250- int timestamp = 0 ;
251- timestamp += Integer .parseInt (hours ) * 3600 ;
252- timestamp += Integer .parseInt (minutes ) * 60 ;
253- timestamp += Integer .parseInt (seconds );
254-
255- String setTimestamp = "&t=" + timestamp ;
256-
257- // Even after clicking https://youtu.be/...?t=6,
258- // getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
259- a .attr ("href" , getUrl () + setTimestamp );
260-
261- } else if ((queryString = redirectLink .getQuery ()) != null ) {
262- // if the query string is null we are not dealing with a redirect link,
263- // so we don't need to override it.
264- final String link =
265- Parser .compatParseMap (queryString ).get ("q" );
266-
267- if (link != null ) {
268- // if link is null the a tag is a hashtag.
269- // They refer to the youtube search. We do not handle them.
270- a .text (link );
271- a .attr ("href" , link );
272- } else if (redirectLink .toString ().contains ("https://www.youtube.com/" )) {
273- a .text (redirectLink .toString ());
274- a .attr ("href" , redirectLink .toString ());
275- }
276- } else if (redirectLink .toString ().contains ("https://www.youtube.com/" )) {
277- descriptionHtml = descriptionHtml .replace (rawUrl , redirectLink .toString ());
278- a .text (redirectLink .toString ());
279- a .attr ("href" , redirectLink .toString ());
280- }
281- }
282- return description .select ("body" ).first ().html ();
283175 }
284176
285177 @ Override
286178 public int getAgeLimit () throws ParsingException {
287179 assertPageFetched ();
180+ // TODO: Find new way to get age limit
288181 if (!isAgeRestricted ) {
289182 return NO_AGE_LIMIT ;
290183 }
@@ -332,54 +225,25 @@ public long getTimeStamp() throws ParsingException {
332225 @ Override
333226 public long getViewCount () throws ParsingException {
334227 assertPageFetched ();
228+ String views = null ;
335229 try {
336- if (getStreamType ().equals (StreamType .LIVE_STREAM )) {
337- // The array index is variable, therefore we loop throw the complete array.
338- // videoPrimaryInfoRenderer is often stored at index 1
339- JsonArray contents = initialData .getObject ("contents" ).getObject ("twoColumnWatchNextResults" )
340- .getObject ("results" ).getObject ("results" ).getArray ("contents" );
341- for (Object c : contents ) {
342- try {
343- // this gets current view count, but there is also an overall view count which is stored here:
344- // contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results[0]
345- // .compactAutoplayRenderer.contents[0].compactVideoRenderer.viewCountText.simpleText
346- String views = ((JsonObject ) c ).getObject ("videoPrimaryInfoRenderer" )
347- .getObject ("viewCount" ).getObject ("videoViewCountRenderer" ).getObject ("viewCount" )
348- .getArray ("runs" ).getObject (0 ).getString ("text" );
349- return Long .parseLong (Utils .removeNonDigitCharacters (views ));
350- } catch (Exception ignored ) {}
351- }
352- throw new ParsingException ("Could not get view count from live stream" );
353-
354- } else {
355- return Long .parseLong (playerResponse .getObject ("videoDetails" ).getString ("viewCount" ));
356- }
357- } catch (Exception e ) {
230+ views = getVideoPrimaryInfoRenderer ().getObject ("viewCount" )
231+ .getObject ("videoViewCountRenderer" ).getObject ("viewCount" )
232+ .getArray ("runs" ).getObject (0 ).getString ("text" );
233+ } catch (Exception ignored ) {}
234+ if (views == null ) {
358235 try {
359- return Long .parseLong (doc .select ("meta[itemprop=interactionCount]" ).attr (CONTENT ));
360- } catch (Exception ignored ) {
361- throw new ParsingException ("Could not get view count" , e );
362- }
363- }
364- }
365-
366- private JsonObject getVideoPrimaryInfoRenderer () throws ParsingException {
367- JsonArray contents = initialData .getObject ("contents" ).getObject ("twoColumnWatchNextResults" )
368- .getObject ("results" ).getObject ("results" ).getArray ("contents" );
369- JsonObject videoPrimaryInfoRenderer = null ;
370-
371- for (Object content : contents ) {
372- if (((JsonObject ) content ).getObject ("videoPrimaryInfoRenderer" ) != null ) {
373- videoPrimaryInfoRenderer = ((JsonObject ) content ).getObject ("videoPrimaryInfoRenderer" );
374- break ;
375- }
236+ views = getVideoPrimaryInfoRenderer ().getObject ("viewCount" )
237+ .getObject ("videoViewCountRenderer" ).getObject ("viewCount" ).getString ("simpleText" );
238+ } catch (Exception ignored ) {}
376239 }
377-
378- if (videoPrimaryInfoRenderer == null ) {
379- throw new ParsingException ("Could not find videoPrimaryInfoRenderer" );
240+ if (views == null ) {
241+ try {
242+ views = playerResponse .getObject ("videoDetails" ).getString ("viewCount" );
243+ } catch (Exception ignored ) {}
380244 }
381-
382- return videoPrimaryInfoRenderer ;
245+ if ( views != null ) return Long . parseLong ( views );
246+ throw new ParsingException ( "Could not get view count" ) ;
383247 }
384248
385249 @ Override
@@ -993,6 +857,25 @@ public SubtitlesStream getSubtitle(final MediaFormat format) {
993857 // Utils
994858 //////////////////////////////////////////////////////////////////////////*/
995859
860+ private JsonObject getVideoPrimaryInfoRenderer () throws ParsingException {
861+ JsonArray contents = initialData .getObject ("contents" ).getObject ("twoColumnWatchNextResults" )
862+ .getObject ("results" ).getObject ("results" ).getArray ("contents" );
863+ JsonObject videoPrimaryInfoRenderer = null ;
864+
865+ for (Object content : contents ) {
866+ if (((JsonObject ) content ).getObject ("videoPrimaryInfoRenderer" ) != null ) {
867+ videoPrimaryInfoRenderer = ((JsonObject ) content ).getObject ("videoPrimaryInfoRenderer" );
868+ break ;
869+ }
870+ }
871+
872+ if (videoPrimaryInfoRenderer == null ) {
873+ throw new ParsingException ("Could not find videoPrimaryInfoRenderer" );
874+ }
875+
876+ return videoPrimaryInfoRenderer ;
877+ }
878+
996879 @ Nonnull
997880 private static String getVideoInfoUrl (final String id , final String sts ) {
998881 return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
0 commit comments