Skip to content

Commit 5b59a1a

Browse files
committed
[YouTube] Move meta info extraction to separate file
YoutubeParsingHelper was longer than 2000 lines which caused checkstyle issues
1 parent b8e12dd commit 5b59a1a

4 files changed

Lines changed: 205 additions & 178 deletions

File tree

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCachedUrlIfNeeded;
4+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
5+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObjectOrThrow;
6+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
7+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.isGoogleURL;
8+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
9+
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
10+
11+
import com.grack.nanojson.JsonArray;
12+
import com.grack.nanojson.JsonObject;
13+
14+
import org.schabi.newpipe.extractor.MetaInfo;
15+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
16+
import org.schabi.newpipe.extractor.stream.Description;
17+
18+
import java.net.MalformedURLException;
19+
import java.net.URL;
20+
import java.util.ArrayList;
21+
import java.util.List;
22+
import java.util.Objects;
23+
import java.util.function.Consumer;
24+
import java.util.stream.Collectors;
25+
26+
import javax.annotation.Nonnull;
27+
28+
public final class YoutubeMetaInfoHelper {
29+
30+
private YoutubeMetaInfoHelper() {
31+
}
32+
33+
34+
@Nonnull
35+
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
36+
throws ParsingException {
37+
final List<MetaInfo> metaInfo = new ArrayList<>();
38+
for (final Object content : contents) {
39+
final JsonObject resultObject = (JsonObject) content;
40+
if (resultObject.has("itemSectionRenderer")) {
41+
for (final Object sectionContentObject
42+
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {
43+
44+
final JsonObject sectionContent = (JsonObject) sectionContentObject;
45+
if (sectionContent.has("infoPanelContentRenderer")) {
46+
metaInfo.add(getInfoPanelContent(sectionContent
47+
.getObject("infoPanelContentRenderer")));
48+
}
49+
if (sectionContent.has("clarificationRenderer")) {
50+
metaInfo.add(getClarificationRenderer(sectionContent
51+
.getObject("clarificationRenderer")
52+
));
53+
}
54+
if (sectionContent.has("emergencyOneboxRenderer")) {
55+
getEmergencyOneboxRenderer(
56+
sectionContent.getObject("emergencyOneboxRenderer"),
57+
metaInfo::add
58+
);
59+
}
60+
}
61+
}
62+
}
63+
return metaInfo;
64+
}
65+
66+
@Nonnull
67+
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
68+
throws ParsingException {
69+
final MetaInfo metaInfo = new MetaInfo();
70+
final StringBuilder sb = new StringBuilder();
71+
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
72+
if (sb.length() != 0) {
73+
sb.append("<br>");
74+
}
75+
sb.append(getTextFromObject((JsonObject) paragraph));
76+
}
77+
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
78+
if (infoPanelContentRenderer.has("sourceEndpoint")) {
79+
final String metaInfoLinkUrl = getUrlFromNavigationEndpoint(
80+
infoPanelContentRenderer.getObject("sourceEndpoint"));
81+
try {
82+
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
83+
metaInfoLinkUrl))));
84+
} catch (final NullPointerException | MalformedURLException e) {
85+
throw new ParsingException("Could not get metadata info URL", e);
86+
}
87+
88+
final String metaInfoLinkText = getTextFromObject(
89+
infoPanelContentRenderer.getObject("inlineSource"));
90+
if (isNullOrEmpty(metaInfoLinkText)) {
91+
throw new ParsingException("Could not get metadata info link text.");
92+
}
93+
metaInfo.addUrlText(metaInfoLinkText);
94+
}
95+
96+
return metaInfo;
97+
}
98+
99+
@Nonnull
100+
private static MetaInfo getClarificationRenderer(
101+
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
102+
final MetaInfo metaInfo = new MetaInfo();
103+
104+
final String title = getTextFromObject(clarificationRenderer
105+
.getObject("contentTitle"));
106+
final String text = getTextFromObject(clarificationRenderer
107+
.getObject("text"));
108+
if (title == null || text == null) {
109+
throw new ParsingException("Could not extract clarification renderer content");
110+
}
111+
metaInfo.setTitle(title);
112+
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
113+
114+
if (clarificationRenderer.has("actionButton")) {
115+
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
116+
.getObject("buttonRenderer");
117+
try {
118+
final String url = getUrlFromNavigationEndpoint(actionButton
119+
.getObject("command"));
120+
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
121+
} catch (final NullPointerException | MalformedURLException e) {
122+
throw new ParsingException("Could not get metadata info URL", e);
123+
}
124+
125+
final String metaInfoLinkText = getTextFromObject(
126+
actionButton.getObject("text"));
127+
if (isNullOrEmpty(metaInfoLinkText)) {
128+
throw new ParsingException("Could not get metadata info link text.");
129+
}
130+
metaInfo.addUrlText(metaInfoLinkText);
131+
}
132+
133+
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
134+
.has("secondarySource")) {
135+
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
136+
.getObject("secondaryEndpoint"));
137+
// Ignore Google URLs, because those point to a Google search about "Covid-19"
138+
if (url != null && !isGoogleURL(url)) {
139+
try {
140+
metaInfo.addUrl(new URL(url));
141+
final String description = getTextFromObject(clarificationRenderer
142+
.getObject("secondarySource"));
143+
metaInfo.addUrlText(description == null ? url : description);
144+
} catch (final MalformedURLException e) {
145+
throw new ParsingException("Could not get metadata info secondary URL", e);
146+
}
147+
}
148+
}
149+
150+
return metaInfo;
151+
}
152+
153+
private static void getEmergencyOneboxRenderer(
154+
@Nonnull final JsonObject emergencyOneboxRenderer,
155+
final Consumer<MetaInfo> addMetaInfo
156+
) throws ParsingException {
157+
final List<JsonObject> supportRenderers = emergencyOneboxRenderer.values()
158+
.stream()
159+
.filter(o -> o instanceof JsonObject
160+
&& ((JsonObject) o).has("singleActionEmergencySupportRenderer"))
161+
.map(o -> ((JsonObject) o).getObject("singleActionEmergencySupportRenderer"))
162+
.collect(Collectors.toList());
163+
164+
if (supportRenderers.isEmpty()) {
165+
throw new ParsingException("Could not extract any meta info from emergency renderer");
166+
}
167+
168+
for (final JsonObject r : supportRenderers) {
169+
final MetaInfo metaInfo = new MetaInfo();
170+
171+
// usually an encouragement like "We are with you"
172+
final String title = getTextFromObjectOrThrow(r.getObject("title"), "title");
173+
// usually a phone number
174+
final String action = getTextFromObjectOrThrow(r.getObject("actionText"), "action");
175+
// usually details about the phone number
176+
final String details = getTextFromObjectOrThrow(r.getObject("detailsText"), "details");
177+
// usually the name of an association
178+
final String urlText = getTextFromObjectOrThrow(r.getObject("navigationText"),
179+
"urlText");
180+
181+
metaInfo.setTitle(title);
182+
metaInfo.setContent(new Description(details + "\n" + action, Description.PLAIN_TEXT));
183+
metaInfo.addUrlText(urlText);
184+
185+
// usually the webpage of the association
186+
final String url = getUrlFromNavigationEndpoint(r.getObject("navigationEndpoint"));
187+
if (url == null) {
188+
throw new ParsingException("Could not extract emergency renderer url");
189+
}
190+
191+
try {
192+
metaInfo.addUrl(new URL(replaceHttpWithHttps(url)));
193+
} catch (final MalformedURLException e) {
194+
throw new ParsingException("Could not parse emergency renderer url", e);
195+
}
196+
197+
addMetaInfo.accept(metaInfo);
198+
}
199+
}
200+
}

0 commit comments

Comments
 (0)