Skip to content

Commit ffffb04

Browse files
authored
Merge pull request #953 from Theta-Dev/attributed-text-desc
[YouTube] Add support for attributed text description
2 parents 31bf704 + 592e1d6 commit ffffb04

3 files changed

Lines changed: 92 additions & 1 deletion

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,90 @@ public static String getTextFromObject(final JsonObject textObject, final boolea
10091009
return text;
10101010
}
10111011

1012+
/**
1013+
* Parse a video description in the new "attributed" format, which contains the entire visible
1014+
* plaintext ({@code content}) and an array of {@code commandRuns}.
1015+
*
1016+
* <p>
1017+
* The {@code commandRuns} include the links and their position in the text.
1018+
* </p>
1019+
*
1020+
* @param attributedDescription the JSON object of the attributed description
1021+
* @return the parsed description, in HTML format, as a string
1022+
*/
1023+
@Nullable
1024+
public static String getAttributedDescription(
1025+
@Nullable final JsonObject attributedDescription) {
1026+
if (isNullOrEmpty(attributedDescription)) {
1027+
return null;
1028+
}
1029+
1030+
final String content = attributedDescription.getString("content");
1031+
final JsonArray commandRuns = attributedDescription.getArray("commandRuns");
1032+
if (content == null) {
1033+
return null;
1034+
}
1035+
1036+
final StringBuilder textBuilder = new StringBuilder();
1037+
int textStart = 0;
1038+
1039+
for (final Object commandRun: commandRuns) {
1040+
if (!(commandRun instanceof JsonObject)) {
1041+
continue;
1042+
}
1043+
1044+
final JsonObject run = ((JsonObject) commandRun);
1045+
final int startIndex = run.getInt("startIndex", -1);
1046+
final int length = run.getInt("length");
1047+
final JsonObject navigationEndpoint = run.getObject("onTap")
1048+
.getObject("innertubeCommand");
1049+
1050+
if (startIndex < 0 || length < 1 || navigationEndpoint == null) {
1051+
continue;
1052+
}
1053+
1054+
final String url;
1055+
try {
1056+
url = getUrlFromNavigationEndpoint(navigationEndpoint);
1057+
} catch (final ParsingException e) {
1058+
continue;
1059+
}
1060+
1061+
if (url == null) {
1062+
continue;
1063+
}
1064+
1065+
// Append text before the link
1066+
if (startIndex > textStart) {
1067+
textBuilder.append(content, textStart, startIndex);
1068+
}
1069+
1070+
// Trim and append link text
1071+
// Channel/Video format: 3xu00a0, (/ •), u00a0, <Name>, 2xu00a0
1072+
final String linkText = content.substring(startIndex, startIndex + length)
1073+
.replace('\u00a0', ' ')
1074+
.trim()
1075+
.replaceFirst("^[/•] *", "");
1076+
1077+
textBuilder.append("<a href=\"")
1078+
.append(url)
1079+
.append("\">")
1080+
.append(linkText)
1081+
.append("</a>");
1082+
1083+
textStart = startIndex + length;
1084+
}
1085+
1086+
// Append the remaining text
1087+
if (textStart < content.length()) {
1088+
textBuilder.append(content.substring(textStart));
1089+
}
1090+
1091+
return textBuilder.toString()
1092+
.replaceAll("\\n", "<br>")
1093+
.replaceAll(" {2}", " &nbsp;");
1094+
}
1095+
10121096
@Nullable
10131097
public static String getTextFromObject(final JsonObject textObject) throws ParsingException {
10141098
return getTextFromObject(textObject, false);

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonIosPostResponse;
3535
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
3636
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
37+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getAttributedDescription;
3738
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareAndroidMobileJsonBuilder;
3839
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
3940
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareIosMobileJsonBuilder;
@@ -288,6 +289,12 @@ public Description getDescription() throws ParsingException {
288289
if (!isNullOrEmpty(description)) {
289290
return new Description(description, Description.HTML);
290291
}
292+
293+
final String attributedDescription = getAttributedDescription(
294+
getVideoSecondaryInfoRenderer().getObject("attributedDescription"));
295+
if (!isNullOrEmpty(attributedDescription)) {
296+
return new Description(attributedDescription, Description.HTML);
297+
}
291298
} catch (final ParsingException ignored) {
292299
// Age-restricted videos cause a ParsingException here
293300
}

extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/stream/unboxing/generated_mock_5.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)