1313import org .schabi .newpipe .extractor .utils .Parser ;
1414import org .schabi .newpipe .extractor .utils .Utils ;
1515
16+ import javax .annotation .Nonnull ;
1617import javax .annotation .Nullable ;
1718import java .time .Instant ;
1819import java .time .OffsetDateTime ;
1920import java .time .ZoneOffset ;
2021import java .time .format .DateTimeFormatter ;
22+ import java .util .regex .Pattern ;
2123
2224import static org .schabi .newpipe .extractor .services .youtube .YoutubeParsingHelper .getTextFromObject ;
2325import static org .schabi .newpipe .extractor .services .youtube .YoutubeParsingHelper .getThumbnailUrlFromInfoItem ;
4345 */
4446
4547public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
48+
49+ private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
50+ Pattern .compile ("([\\ d,]+) views$" );
51+ private static final String NO_VIEWS_LOWERCASE = "no views" ;
52+
4653 private final JsonObject videoInfo ;
4754 private final TimeAgoParser timeAgoParser ;
4855 private StreamType cachedStreamType ;
@@ -284,20 +291,14 @@ public long getViewCount() throws ParsingException {
284291 return -1 ;
285292 }
286293
287- final String viewCount = getTextFromObject (videoInfo .getObject ("viewCountText" ));
294+ // Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
295+ // found in this case
288296
289- if (!isNullOrEmpty (viewCount )) {
297+ final String viewCountText = getTextFromObject (videoInfo .getObject ("viewCountText" ));
298+ if (!isNullOrEmpty (viewCountText )) {
290299 try {
291- // These approaches are language dependent
292- if (viewCount .toLowerCase ().contains ("no views" )) {
293- return 0 ;
294- } else if (viewCount .toLowerCase ().contains ("recommended" )) {
295- return -1 ;
296- }
297-
298- return Long .parseLong (Utils .removeNonDigitCharacters (viewCount ));
300+ return getViewCountFromViewCountText (viewCountText , false );
299301 } catch (final Exception ignored ) {
300- // Ignore all exceptions, as we can fallback to accessibility data
301302 }
302303 }
303304
@@ -306,45 +307,70 @@ public long getViewCount() throws ParsingException {
306307 // the livestream)
307308 if (getStreamType () != StreamType .LIVE_STREAM ) {
308309 try {
309- return Long .parseLong (Utils .removeNonDigitCharacters (
310- // This approach is language dependent
311- Parser .matchGroup1 ("([\\ d,]+) views$" ,
312- videoInfo .getObject ("title" )
313- .getObject ("accessibility" )
314- .getObject ("accessibilityData" )
315- .getString ("label" , "" ))));
310+ return getViewCountFromAccessibilityData ();
316311 } catch (final Exception ignored ) {
317- // Ignore all exceptions, as the view count can be hidden by creators, and so
318- // cannot be found in this case
319312 }
320313 }
321314
322315 // Fallback to a short view count, always used for livestreams (see why above)
323- try {
316+ if ( videoInfo . has ( "videoInfo" )) {
324317 // Returned in playlists, in the form: view count separator upload date
325- if ( videoInfo . has ( "videoInfo" )) {
326- return Utils . mixedNumberWordToLong (videoInfo .getObject ("videoInfo" )
318+ try {
319+ return getViewCountFromViewCountText (videoInfo .getObject ("videoInfo" )
327320 .getArray ("runs" )
328321 .getObject (0 )
329- .getString ("text" ));
322+ .getString ("text" , "" ), true );
323+ } catch (final Exception ignored ) {
330324 }
325+ }
331326
327+ if (videoInfo .has ("shortViewCountText" )) {
332328 // Returned everywhere but in playlists, used by the website to show view counts
333- if (videoInfo .has ("shortViewCountText" )) {
334- return Utils .mixedNumberWordToLong (videoInfo .getObject ("shortViewCountText" )
335- .getArray ("runs" )
336- .getObject (0 )
337- .getString ("text" ));
329+ try {
330+ final String shortViewCountText =
331+ getTextFromObject (videoInfo .getObject ("shortViewCountText" ));
332+ if (!isNullOrEmpty (shortViewCountText )) {
333+ return getViewCountFromViewCountText (shortViewCountText , true );
334+ }
335+ } catch (final Exception ignored ) {
338336 }
339- } catch (final Exception ignored ) {
340- // Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
341- // found in this case
342337 }
343338
344339 // No view count extracted: return -1, as the view count can be hidden by creators on videos
345340 return -1 ;
346341 }
347342
343+ private long getViewCountFromViewCountText (@ Nonnull final String viewCountText ,
344+ final boolean isMixedNumber )
345+ throws NumberFormatException , ParsingException {
346+ // These approaches are language dependent
347+ if (viewCountText .toLowerCase ().contains (NO_VIEWS_LOWERCASE )) {
348+ return 0 ;
349+ } else if (viewCountText .toLowerCase ().contains ("recommended" )) {
350+ return -1 ;
351+ }
352+
353+ return isMixedNumber ? Utils .mixedNumberWordToLong (viewCountText )
354+ : Long .parseLong (Utils .removeNonDigitCharacters (viewCountText ));
355+ }
356+
357+ private long getViewCountFromAccessibilityData ()
358+ throws NumberFormatException , Parser .RegexException {
359+ // These approaches are language dependent
360+ final String videoInfoTitleAccessibilityData = videoInfo .getObject ("title" )
361+ .getObject ("accessibility" )
362+ .getObject ("accessibilityData" )
363+ .getString ("label" , "" );
364+
365+ if (videoInfoTitleAccessibilityData .toLowerCase ().endsWith (NO_VIEWS_LOWERCASE )) {
366+ return 0 ;
367+ }
368+
369+ return Long .parseLong (Utils .removeNonDigitCharacters (
370+ Parser .matchGroup1 (ACCESSIBILITY_DATA_VIEW_COUNT_REGEX ,
371+ videoInfoTitleAccessibilityData )));
372+ }
373+
348374 @ Override
349375 public String getThumbnailUrl () throws ParsingException {
350376 return getThumbnailUrlFromInfoItem (videoInfo );
0 commit comments