Skip to content

Commit ff030ad

Browse files
committed
[YouTube] Add support for extracting auto-translated captions
Closes #977 Based on and adresses TeamNewPipe/NewPipe#8023
1 parent fafd471 commit ff030ad

3 files changed

Lines changed: 64 additions & 8 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,7 @@ private void loadSubtitles() {
476476
.setMediaFormat(fmt)
477477
.setLanguageCode(languageCode)
478478
.setAutoGenerated(false)
479+
.setAutoTranslated(false)
479480
.build());
480481
}
481482
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -665,21 +665,21 @@ public List<SubtitlesStream> getSubtitlesDefault() throws ParsingException {
665665

666666
@Override
667667
@Nonnull
668-
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws ParsingException {
668+
public List<SubtitlesStream> getSubtitles(@Nonnull final MediaFormat format)
669+
throws ParsingException {
669670
assertPageFetched();
670671

671672
// We cannot store the subtitles list because the media format may change
672673
final List<SubtitlesStream> subtitlesToReturn = new ArrayList<>();
673674
final JsonObject renderer = playerResponse.getObject("captions")
674675
.getObject("playerCaptionsTracklistRenderer");
675676
final JsonArray captionsArray = renderer.getArray("captionTracks");
676-
// TODO: use this to apply auto translation to different language from a source language
677-
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
678677

679678
for (int i = 0; i < captionsArray.size(); i++) {
680-
final String languageCode = captionsArray.getObject(i).getString("languageCode");
681-
final String baseUrl = captionsArray.getObject(i).getString("baseUrl");
682-
final String vssId = captionsArray.getObject(i).getString("vssId");
679+
final JsonObject caption = captionsArray.getObject(i);
680+
final String languageCode = caption.getString("languageCode");
681+
final String baseUrl = caption.getString("baseUrl");
682+
final String vssId = caption.getString("vssId");
683683

684684
if (languageCode != null && baseUrl != null && vssId != null) {
685685
final boolean isAutoGenerated = vssId.startsWith("a.");
@@ -694,7 +694,24 @@ public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws Parsi
694694
.setMediaFormat(format)
695695
.setLanguageCode(languageCode)
696696
.setAutoGenerated(isAutoGenerated)
697+
.setAutoTranslated(false)
697698
.build());
699+
if (i == 0 && caption.getBoolean("isTranslatable")
700+
&& renderer.has("translationLanguages")) {
701+
final JsonArray languages = renderer.getArray("translationLanguages");
702+
for (int j = 0; j < languages.size(); j++) {
703+
final JsonObject lang = languages.getObject(j);
704+
final String tLanguageCode = lang.getString("languageCode");
705+
subtitlesToReturn.add(new SubtitlesStream.Builder()
706+
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
707+
+ "&tlang=" + tLanguageCode, true)
708+
.setMediaFormat(format)
709+
.setLanguageCode(tLanguageCode)
710+
.setAutoGenerated(isAutoGenerated)
711+
.setAutoTranslated(true)
712+
.build());
713+
}
714+
}
698715
}
699716
}
700717

extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ public final class SubtitlesStream extends Stream {
1414
private final MediaFormat format;
1515
private final Locale locale;
1616
private final boolean autoGenerated;
17+
private final boolean autoTranslated;
1718
private final String code;
1819

1920
/**
@@ -32,6 +33,7 @@ public static final class Builder {
3233
private String languageCode;
3334
// Use of the Boolean class instead of the primitive type needed for setter call check
3435
private Boolean autoGenerated;
36+
private Boolean autoTranslated;
3537

3638
/**
3739
* Create a new {@link Builder} instance with default values.
@@ -152,6 +154,18 @@ public Builder setAutoGenerated(final boolean autoGenerated) {
152154
return this;
153155
}
154156

157+
/**
158+
* Set whether the subtitles have been automatically translated
159+
* (i.e. by a machine like Google Translator) by the streaming service.
160+
* @param autoTranslated whether the subtitles have been automatically translated by the
161+
* streaming service
162+
* @return this {@link Builder} instance
163+
*/
164+
public Builder setAutoTranslated(final boolean autoTranslated) {
165+
this.autoTranslated = autoTranslated;
166+
return this;
167+
}
168+
155169
/**
156170
* Build a {@link SubtitlesStream} using the builder's current values.
157171
*
@@ -196,13 +210,19 @@ public SubtitlesStream build() throws ParsingException {
196210
+ "with setIsAutoGenerated.");
197211
}
198212

213+
if (autoTranslated == null) {
214+
throw new IllegalStateException("The subtitles stream has been not set as an "
215+
+ "automatically translated subtitles stream or not. "
216+
+ "Please specify this information with setIsAutoTranslated.");
217+
}
218+
199219
if (id == null) {
200220
id = languageCode + (mediaFormat != null ? "." + mediaFormat.suffix
201221
: "");
202222
}
203223

204224
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
205-
languageCode, autoGenerated, manifestUrl);
225+
languageCode, autoGenerated, autoTranslated, manifestUrl);
206226
}
207227
}
208228

@@ -219,6 +239,7 @@ public SubtitlesStream build() throws ParsingException {
219239
* @param deliveryMethod the {@link DeliveryMethod} of the stream
220240
* @param languageCode the language code of the stream
221241
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
242+
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
222243
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
223244
* otherwise null)
224245
*/
@@ -230,6 +251,7 @@ private SubtitlesStream(@Nonnull final String id,
230251
@Nonnull final DeliveryMethod deliveryMethod,
231252
@Nonnull final String languageCode,
232253
final boolean autoGenerated,
254+
final boolean autoTranslated,
233255
@Nullable final String manifestUrl) throws ParsingException {
234256
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
235257
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
@@ -238,6 +260,7 @@ private SubtitlesStream(@Nonnull final String id,
238260
this.code = languageCode;
239261
this.format = mediaFormat;
240262
this.autoGenerated = autoGenerated;
263+
this.autoTranslated = autoTranslated;
241264
}
242265

243266
/**
@@ -250,7 +273,7 @@ public String getExtension() {
250273
}
251274

252275
/**
253-
* Return whether if the subtitles are auto-generated.
276+
* Return whether the subtitles are auto-generated.
254277
* <p>
255278
* Some streaming services can generate subtitles for their contents, like YouTube.
256279
* </p>
@@ -261,6 +284,21 @@ public boolean isAutoGenerated() {
261284
return autoGenerated;
262285
}
263286

287+
/**
288+
* Whether the subtitles are translated automatically by a machine.
289+
*
290+
* <p>
291+
* Some streaming services provide automatically translated subtitles.
292+
* YouTube, for example, uses Google translator to generate translated subtitles.
293+
* Automatically translated subtitles might not coincide completely with the original text.
294+
* </p>
295+
*
296+
* @return {code true} if the subtitles are auto-translated, {@link false} otherwise
297+
*/
298+
public boolean isAutoTranslated() {
299+
return autoTranslated;
300+
}
301+
264302
/**
265303
* {@inheritDoc}
266304
*/

0 commit comments

Comments
 (0)