Skip to content

Commit 98358cb

Browse files
committed
Merge branch 'feature-YTsubtitles' of https://github.com/tonakriz/NewPipeExtractor into refactor
2 parents 3c9c5c1 + bd19146 commit 98358cb

10 files changed

Lines changed: 188 additions & 44 deletions

File tree

NewPipeExtractor.iml

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,13 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<module external.linked.project.id=":NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$/.." external.system.id="GRADLE" type="JAVA_MODULE" version="4">
3-
<component name="FacetManager">
4-
<facet type="java-gradle" name="Java-Gradle">
5-
<configuration>
6-
<option name="BUILD_FOLDER_PATH" value="$MODULE_DIR$/build" />
7-
<option name="BUILDABLE" value="true" />
8-
</configuration>
9-
</facet>
10-
</component>
11-
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
12-
<output url="file://$MODULE_DIR$/build/classes/java/main" />
13-
<output-test url="file://$MODULE_DIR$/build/classes/java/test" />
2+
<module external.linked.project.id="NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="unspecified" type="JAVA_MODULE" version="4">
3+
<component name="NewModuleRootManager" inherit-compiler-output="true">
144
<exclude-output />
155
<content url="file://$MODULE_DIR$">
16-
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
17-
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
18-
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
19-
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
206
<excludeFolder url="file://$MODULE_DIR$/.gradle" />
7+
<excludeFolder url="file://$MODULE_DIR$/build" />
8+
<excludeFolder url="file://$MODULE_DIR$/out" />
219
</content>
2210
<orderEntry type="inheritedJdk" />
2311
<orderEntry type="sourceFolder" forTests="false" />
24-
<orderEntry type="library" exported="" scope="PROVIDED" name="nanojson-1.1" level="project" />
25-
<orderEntry type="library" exported="" scope="PROVIDED" name="jsoup-1.9.2" level="project" />
26-
<orderEntry type="library" exported="" scope="PROVIDED" name="rhino-1.7.7.1" level="project" />
27-
<orderEntry type="library" exported="" scope="TEST" name="junit-4.12" level="project" />
28-
<orderEntry type="library" exported="" scope="TEST" name="hamcrest-core-1.3" level="project" />
2912
</component>
3013
</module>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package org.schabi.newpipe.extractor;
2+
3+
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
4+
5+
public class Subtitles {
6+
private SubtitlesFormat format;
7+
private String languageCode, URL;
8+
private boolean autoGenerated;
9+
10+
public Subtitles(SubtitlesFormat format, String languageCode, String URL, boolean autoGenerated) {
11+
this.format = format;
12+
this.languageCode = languageCode;
13+
this.URL = URL;
14+
this.autoGenerated = autoGenerated;
15+
}
16+
17+
public SubtitlesFormat getFileType() { return format; }
18+
19+
public String getLanguageCode() {
20+
return languageCode;
21+
}
22+
23+
public String getURL() {
24+
return URL;
25+
}
26+
27+
public boolean isAutoGenerated() {
28+
return autoGenerated;
29+
}
30+
}

src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,16 @@
33
import com.grack.nanojson.JsonObject;
44
import com.grack.nanojson.JsonParser;
55
import com.grack.nanojson.JsonParserException;
6-
import org.schabi.newpipe.extractor.Downloader;
7-
import org.schabi.newpipe.extractor.MediaFormat;
8-
import org.schabi.newpipe.extractor.NewPipe;
9-
import org.schabi.newpipe.extractor.StreamingService;
6+
import org.schabi.newpipe.extractor.*;
107
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
118
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
129
import org.schabi.newpipe.extractor.exceptions.ParsingException;
10+
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1311
import org.schabi.newpipe.extractor.stream.*;
1412
import org.schabi.newpipe.extractor.utils.Parser;
1513

1614
import java.io.IOException;
17-
import java.util.ArrayList;
18-
import java.util.List;
15+
import java.util.*;
1916

2017
public class SoundcloudStreamExtractor extends StreamExtractor {
2118
private JsonObject track;
@@ -150,6 +147,16 @@ public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionExc
150147
return null;
151148
}
152149

150+
@Override
151+
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
152+
return null;
153+
}
154+
155+
@Override
156+
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
157+
return null;
158+
}
159+
153160
@Override
154161
public StreamType getStreamType() {
155162
return StreamType.AUDIO_STREAM;

src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.schabi.newpipe.extractor.services.youtube;
22

3+
import com.grack.nanojson.JsonArray;
34
import com.grack.nanojson.JsonObject;
45
import com.grack.nanojson.JsonParser;
6+
import com.grack.nanojson.JsonParserException;
57
import org.jsoup.Jsoup;
68
import org.jsoup.nodes.Document;
79
import org.jsoup.nodes.Element;
@@ -11,6 +13,7 @@
1113
import org.schabi.newpipe.extractor.Downloader;
1214
import org.schabi.newpipe.extractor.NewPipe;
1315
import org.schabi.newpipe.extractor.StreamingService;
16+
import org.schabi.newpipe.extractor.Subtitles;
1417
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
1518
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1619
import org.schabi.newpipe.extractor.exceptions.ParsingException;
@@ -20,10 +23,7 @@
2023
import org.schabi.newpipe.extractor.utils.Utils;
2124

2225
import java.io.IOException;
23-
import java.util.ArrayList;
24-
import java.util.LinkedHashMap;
25-
import java.util.List;
26-
import java.util.Map;
26+
import java.util.*;
2727
import java.util.regex.Matcher;
2828
import java.util.regex.Pattern;
2929

@@ -379,6 +379,49 @@ public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionExc
379379
return videoOnlyStreams;
380380
}
381381

382+
@Override
383+
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
384+
return getSubtitles(SubtitlesFormat.TTML);
385+
}
386+
387+
@Override
388+
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
389+
JsonObject playerConfig = getPlayerConfig(getPageHtml());
390+
String playerResponse = playerConfig.getObject("args").getString("player_response");
391+
392+
JsonObject captions;
393+
try {
394+
// Captions does not exist, return null
395+
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
396+
397+
captions = JsonParser.object().from(playerResponse).getObject("captions");
398+
} catch (JsonParserException e) {
399+
// Failed to parse subtitles
400+
return null;
401+
}
402+
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
403+
404+
int captionsSize = captionsArray.size();
405+
// Should not happen, if there is the "captions" object, it should always has some captions in it
406+
if(captionsSize == 0) return null;
407+
408+
List<Subtitles> result = new ArrayList<>();
409+
for (int x = 0; x < captionsSize; x++) {
410+
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
411+
412+
String extension = format.getExtension();
413+
414+
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
415+
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
416+
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
417+
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
418+
419+
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
420+
}
421+
422+
return result;
423+
}
424+
382425
@Override
383426
public StreamType getStreamType() throws ParsingException {
384427
//todo: if implementing livestream support this value should be generated dynamically
@@ -456,13 +499,24 @@ public String getErrorMessage() {
456499

457500
private static volatile String decryptionCode = "";
458501

502+
private static String pageHtml = null;
503+
504+
private String getPageHtml() throws IOException, ExtractionException{
505+
if (pageHtml == null) {
506+
Downloader dl = NewPipe.getDownloader();
507+
pageHtml = dl.download(getCleanUrl());
508+
}
509+
return pageHtml;
510+
}
511+
459512
@Override
460513
public void fetchPage() throws IOException, ExtractionException {
461514
Downloader dl = NewPipe.getDownloader();
462515

463-
String pageContent = dl.download(getCleanUrl());
516+
String pageContent = getPageHtml();
464517
doc = Jsoup.parse(pageContent, getCleanUrl());
465518

519+
466520
String playerUrl;
467521
// Check if the video is age restricted
468522
if (pageContent.contains("<meta property=\"og:restrictions:age")) {
@@ -616,7 +670,7 @@ private String loadDecryptionCode(String playerUrl) throws DecryptException {
616670
private String decryptSignature(String encryptedSig, String decryptionCode) throws DecryptException {
617671
Context context = Context.enter();
618672
context.setOptimizationLevel(-1);
619-
Object result = null;
673+
Object result;
620674
try {
621675
ScriptableObject scope = context.initStandardObjects();
622676
context.evaluateString(scope, decryptionCode, "decryptionCode", 1, null);

src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@
2020
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
2121
*/
2222

23+
import com.grack.nanojson.JsonParserException;
2324
import org.schabi.newpipe.extractor.Extractor;
2425
import org.schabi.newpipe.extractor.StreamingService;
26+
import org.schabi.newpipe.extractor.Subtitles;
2527
import org.schabi.newpipe.extractor.UrlIdHandler;
2628
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
2729
import org.schabi.newpipe.extractor.exceptions.ParsingException;
30+
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
2831
import org.schabi.newpipe.extractor.utils.Parser;
2932

3033
import java.io.IOException;
@@ -109,6 +112,8 @@ protected long getTimestampSeconds(String regexPattern) throws ParsingException
109112
public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;
110113
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
111114
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
115+
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
116+
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
112117

113118
public abstract StreamType getStreamType() throws ParsingException;
114119
public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package org.schabi.newpipe.extractor.stream;
2+
3+
import org.schabi.newpipe.extractor.Subtitles;
4+
5+
public enum SubtitlesFormat {
6+
// YouTube subtitles formats
7+
// TRANSCRIPT(3) is default YT format based on TTML,
8+
// but unlike VTT or TTML, it is NOT W3 standard
9+
// TRANSCRIPT subtitles are NOT supported by ExoPlayer, only VTT and TTML
10+
VTT (0x0, "vtt"),
11+
TTML (0x1, "ttml"),
12+
TRANSCRIPT1 (0x2, "srv1"),
13+
TRANSCRIPT2 (0x3, "srv2"),
14+
TRANSCRIPT3 (0x4, "srv3");
15+
16+
private int id;
17+
private String extension;
18+
19+
SubtitlesFormat(int id, String extension) {
20+
this.id = id;
21+
this.extension = extension;
22+
}
23+
24+
public String getExtension() {
25+
return extension;
26+
}
27+
}

src/main/java/org/schabi/newpipe/extractor/utils/Parser.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ public static String matchGroup(String pattern, String input, int group) throws
5454
if (foundMatch) {
5555
return mat.group(group);
5656
} else {
57-
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
5857
if (input.length() > 1024) {
5958
throw new RegexException("failed to find pattern \"" + pattern);
6059
} else {
@@ -64,12 +63,9 @@ public static String matchGroup(String pattern, String input, int group) throws
6463
}
6564

6665
public static boolean isMatch(String pattern, String input) {
67-
try {
68-
matchGroup1(pattern, input);
69-
return true;
70-
} catch (RegexException e) {
71-
return false;
72-
}
66+
Pattern pat = Pattern.compile(pattern);
67+
Matcher mat = pat.matcher(input);
68+
return mat.find();
7369
}
7470

7571
public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException {

src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
package org.schabi.newpipe.extractor.services.soundcloud;
22

3+
import com.grack.nanojson.JsonParserException;
34
import org.junit.Before;
45
import org.junit.Test;
56
import org.schabi.newpipe.Downloader;
67
import org.schabi.newpipe.extractor.NewPipe;
8+
import org.schabi.newpipe.extractor.Subtitles;
79
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
810
import org.schabi.newpipe.extractor.exceptions.ParsingException;
911
import org.schabi.newpipe.extractor.stream.StreamExtractor;
1012
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
1113
import org.schabi.newpipe.extractor.stream.StreamType;
14+
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
1215

1316
import java.io.IOException;
17+
import java.util.List;
1418

1519
import static org.junit.Assert.*;
1620
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
@@ -101,4 +105,16 @@ public void testGetRelatedVideos() throws ExtractionException, IOException {
101105
assertFalse(relatedVideos.getItemList().isEmpty());
102106
assertTrue(relatedVideos.getErrors().isEmpty());
103107
}
108+
109+
@Test
110+
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
111+
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
112+
assertTrue(extractor.getSubtitlesDefault() == null);
113+
}
114+
115+
@Test
116+
public void testGetSubtitlesList() throws IOException, ExtractionException {
117+
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
118+
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
119+
}
104120
}

src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
package org.schabi.newpipe.extractor.services.youtube;
22

3+
import com.grack.nanojson.JsonParserException;
34
import org.junit.Before;
45
import org.junit.Test;
56
import org.schabi.newpipe.Downloader;
67
import org.schabi.newpipe.extractor.NewPipe;
78
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
89
import org.schabi.newpipe.extractor.exceptions.ParsingException;
9-
import org.schabi.newpipe.extractor.stream.StreamExtractor;
10-
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
11-
import org.schabi.newpipe.extractor.stream.StreamType;
12-
import org.schabi.newpipe.extractor.stream.VideoStream;
10+
import org.schabi.newpipe.extractor.stream.*;
1311

1412
import java.io.IOException;
13+
import java.util.HashMap;
1514

1615
import static org.junit.Assert.*;
1716
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
@@ -148,4 +147,16 @@ public void testGetRelatedVideos() throws ExtractionException, IOException {
148147
}
149148
assertTrue(relatedVideos.getErrors().isEmpty());
150149
}
150+
151+
@Test
152+
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
153+
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
154+
assertTrue(extractor.getSubtitlesDefault() == null);
155+
}
156+
157+
@Test
158+
public void testGetSubtitlesList() throws IOException, ExtractionException {
159+
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
160+
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
161+
}
151162
}

0 commit comments

Comments
 (0)