Skip to content

Commit b129849

Browse files
authored
Merge pull request #1029 from AudricV/yt_fix-no-views-extraction-playlist-items
[YouTube] Fix partial non-extraction of "No views" string in stream items
2 parents 3fdb6ee + bd79b92 commit b129849

1 file changed

Lines changed: 58 additions & 32 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
import org.schabi.newpipe.extractor.utils.Parser;
1414
import org.schabi.newpipe.extractor.utils.Utils;
1515

16+
import javax.annotation.Nonnull;
1617
import javax.annotation.Nullable;
1718
import java.time.Instant;
1819
import java.time.OffsetDateTime;
1920
import java.time.ZoneOffset;
2021
import java.time.format.DateTimeFormatter;
22+
import java.util.regex.Pattern;
2123

2224
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
2325
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
@@ -43,6 +45,11 @@
4345
*/
4446

4547
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
48+
49+
private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
50+
Pattern.compile("([\\d,]+) views$");
51+
private static final String NO_VIEWS_LOWERCASE = "no views";
52+
4653
private final JsonObject videoInfo;
4754
private final TimeAgoParser timeAgoParser;
4855
private StreamType cachedStreamType;
@@ -284,20 +291,14 @@ public long getViewCount() throws ParsingException {
284291
return -1;
285292
}
286293

287-
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
294+
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
295+
// found in this case
288296

289-
if (!isNullOrEmpty(viewCount)) {
297+
final String viewCountText = getTextFromObject(videoInfo.getObject("viewCountText"));
298+
if (!isNullOrEmpty(viewCountText)) {
290299
try {
291-
// These approaches are language dependent
292-
if (viewCount.toLowerCase().contains("no views")) {
293-
return 0;
294-
} else if (viewCount.toLowerCase().contains("recommended")) {
295-
return -1;
296-
}
297-
298-
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
300+
return getViewCountFromViewCountText(viewCountText, false);
299301
} catch (final Exception ignored) {
300-
// Ignore all exceptions, as we can fallback to accessibility data
301302
}
302303
}
303304

@@ -306,45 +307,70 @@ public long getViewCount() throws ParsingException {
306307
// the livestream)
307308
if (getStreamType() != StreamType.LIVE_STREAM) {
308309
try {
309-
return Long.parseLong(Utils.removeNonDigitCharacters(
310-
// This approach is language dependent
311-
Parser.matchGroup1("([\\d,]+) views$",
312-
videoInfo.getObject("title")
313-
.getObject("accessibility")
314-
.getObject("accessibilityData")
315-
.getString("label", ""))));
310+
return getViewCountFromAccessibilityData();
316311
} catch (final Exception ignored) {
317-
// Ignore all exceptions, as the view count can be hidden by creators, and so
318-
// cannot be found in this case
319312
}
320313
}
321314

322315
// Fallback to a short view count, always used for livestreams (see why above)
323-
try {
316+
if (videoInfo.has("videoInfo")) {
324317
// Returned in playlists, in the form: view count separator upload date
325-
if (videoInfo.has("videoInfo")) {
326-
return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo")
318+
try {
319+
return getViewCountFromViewCountText(videoInfo.getObject("videoInfo")
327320
.getArray("runs")
328321
.getObject(0)
329-
.getString("text"));
322+
.getString("text", ""), true);
323+
} catch (final Exception ignored) {
330324
}
325+
}
331326

327+
if (videoInfo.has("shortViewCountText")) {
332328
// Returned everywhere but in playlists, used by the website to show view counts
333-
if (videoInfo.has("shortViewCountText")) {
334-
return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText")
335-
.getArray("runs")
336-
.getObject(0)
337-
.getString("text"));
329+
try {
330+
final String shortViewCountText =
331+
getTextFromObject(videoInfo.getObject("shortViewCountText"));
332+
if (!isNullOrEmpty(shortViewCountText)) {
333+
return getViewCountFromViewCountText(shortViewCountText, true);
334+
}
335+
} catch (final Exception ignored) {
338336
}
339-
} catch (final Exception ignored) {
340-
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
341-
// found in this case
342337
}
343338

344339
// No view count extracted: return -1, as the view count can be hidden by creators on videos
345340
return -1;
346341
}
347342

343+
private long getViewCountFromViewCountText(@Nonnull final String viewCountText,
344+
final boolean isMixedNumber)
345+
throws NumberFormatException, ParsingException {
346+
// These approaches are language dependent
347+
if (viewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) {
348+
return 0;
349+
} else if (viewCountText.toLowerCase().contains("recommended")) {
350+
return -1;
351+
}
352+
353+
return isMixedNumber ? Utils.mixedNumberWordToLong(viewCountText)
354+
: Long.parseLong(Utils.removeNonDigitCharacters(viewCountText));
355+
}
356+
357+
private long getViewCountFromAccessibilityData()
358+
throws NumberFormatException, Parser.RegexException {
359+
// These approaches are language dependent
360+
final String videoInfoTitleAccessibilityData = videoInfo.getObject("title")
361+
.getObject("accessibility")
362+
.getObject("accessibilityData")
363+
.getString("label", "");
364+
365+
if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) {
366+
return 0;
367+
}
368+
369+
return Long.parseLong(Utils.removeNonDigitCharacters(
370+
Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX,
371+
videoInfoTitleAccessibilityData)));
372+
}
373+
348374
@Override
349375
public String getThumbnailUrl() throws ParsingException {
350376
return getThumbnailUrlFromInfoItem(videoInfo);

0 commit comments

Comments
 (0)