Skip to content

Commit e662c97

Browse files
committed
make dash parser ignore segmented streams
1 parent c4e16c7 commit e662c97

5 files changed

Lines changed: 128 additions & 20 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ public List<VideoStream> getVideoStreams() throws IOException, ExtractionExcepti
433433
}
434434

435435
@Override
436-
public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException {
436+
public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
437437
assertPageFetched();
438438
List<VideoStream> videoOnlyStreams = new ArrayList<>();
439439
try {

extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,10 @@ private static StreamInfo extractStreams(StreamInfo streamInfo, StreamExtractor
147147
Exception dashMpdError = null;
148148
if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) {
149149
try {
150-
DashMpdParser.getStreams(streamInfo);
150+
DashMpdParser.ParserResult result = DashMpdParser.getStreams(streamInfo);
151+
streamInfo.getVideoOnlyStreams().addAll(result.getVideoOnlyStreams());
152+
streamInfo.getAudioStreams().addAll(result.getAudioStreams());
153+
streamInfo.getVideoStreams().addAll(result.getVideoStreams());
151154
} catch (Exception e) {
152155
// Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl),
153156
// just skip the exception (but store it somewhere), as we later check if we have streams anyway.

extractor/src/main/java/org/schabi/newpipe/extractor/utils/DashMpdParser.java

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
import org.schabi.newpipe.extractor.stream.VideoStream;
1313
import org.w3c.dom.Document;
1414
import org.w3c.dom.Element;
15+
import org.w3c.dom.Node;
1516
import org.w3c.dom.NodeList;
1617

1718
import javax.xml.parsers.DocumentBuilder;
1819
import javax.xml.parsers.DocumentBuilderFactory;
1920
import java.io.ByteArrayInputStream;
2021
import java.io.IOException;
2122
import java.io.InputStream;
23+
import java.util.ArrayList;
24+
import java.util.List;
2225

2326
/*
2427
* Created by Christian Schabesberger on 02.02.16.
@@ -51,16 +54,43 @@ public static class DashMpdParsingException extends ParsingException {
5154
}
5255
}
5356

57+
public static class ParserResult {
58+
private final List<VideoStream> videoStreams;
59+
private final List<AudioStream> audioStreams;
60+
private final List<VideoStream> videoOnlyStreams;
61+
62+
public ParserResult(List<VideoStream> videoStreams, List<AudioStream> audioStreams, List<VideoStream> videoOnlyStreams) {
63+
this.videoStreams = videoStreams;
64+
this.audioStreams = audioStreams;
65+
this.videoOnlyStreams = videoOnlyStreams;
66+
}
67+
68+
public List<VideoStream> getVideoStreams() {
69+
return videoStreams;
70+
}
71+
72+
public List<AudioStream> getAudioStreams() {
73+
return audioStreams;
74+
}
75+
76+
public List<VideoStream> getVideoOnlyStreams() {
77+
return videoOnlyStreams;
78+
}
79+
}
80+
5481
/**
5582
* Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest,
5683
* then it will search for any stream that the ItagItem has (by the id).
5784
* <p>
5885
* It has video, video only and audio streams and will only add to the list if it don't
5986
* find a similar stream in the respective lists (calling {@link Stream#equalStats}).
6087
*
88+
* Info about dash MPD can be found here
89+
* @see <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">www.brendanlog.com</a>
90+
*
6191
* @param streamInfo where the parsed streams will be added
6292
*/
63-
public static void getStreams(StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
93+
public static ParserResult getStreams(final StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
6494
String dashDoc;
6595
Downloader downloader = NewPipe.getDownloader();
6696
try {
@@ -72,45 +102,58 @@ public static void getStreams(StreamInfo streamInfo) throws DashMpdParsingExcept
72102
}
73103

74104
try {
75-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
76-
DocumentBuilder builder = factory.newDocumentBuilder();
77-
InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
105+
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
106+
final DocumentBuilder builder = factory.newDocumentBuilder();
107+
final InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
108+
109+
final Document doc = builder.parse(stream);
110+
final NodeList representationList = doc.getElementsByTagName("Representation");
78111

79-
Document doc = builder.parse(stream);
80-
NodeList representationList = doc.getElementsByTagName("Representation");
112+
final List<VideoStream> videoStreams = new ArrayList<>();
113+
final List<AudioStream> audioStreams = new ArrayList<>();
114+
final List<VideoStream> videoOnlyStreams = new ArrayList<>();
81115

82116
for (int i = 0; i < representationList.getLength(); i++) {
83-
Element representation = ((Element) representationList.item(i));
117+
final Element representation = (Element) representationList.item(i);
84118
try {
85-
String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
86-
String id = representation.getAttribute("id");
87-
String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
88-
ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
89-
if (itag != null) {
90-
MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
119+
final String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
120+
final String id = representation.getAttribute("id");
121+
final String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
122+
final ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
123+
final Node segmentationList = representation.getElementsByTagName("SegmentList").item(0);
124+
125+
// if SegmentList is not null this means that BaseUrl is not representing the url to the stream.
126+
// instead we need to add the "media=" value from the <SegementURL/> tags inside the <SegmentList/>
127+
// tag in order to get a full working url. However each of these is just pointing to a part of the
128+
// video, so we can not return a URL with a working stream here.
129+
// We decided not to ignore such streams for the moment.
130+
if (itag != null && segmentationList == null) {
131+
final MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
91132

92133
if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) {
93-
AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
134+
final AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
94135

95136
if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) {
96-
streamInfo.getAudioStreams().add(audioStream);
137+
audioStreams.add(audioStream);
97138
}
98139
} else {
99140
boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY);
100-
VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
141+
final VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
101142

102143
if (isVideoOnly) {
103144
if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) {
104145
streamInfo.getVideoOnlyStreams().add(videoStream);
146+
videoOnlyStreams.add(videoStream);
105147
}
106148
} else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) {
107-
streamInfo.getVideoStreams().add(videoStream);
149+
videoStreams.add(videoStream);
108150
}
109151
}
110152
}
111153
} catch (Exception ignored) {
112154
}
113155
}
156+
return new ParserResult(videoStreams, audioStreams, videoOnlyStreams);
114157
} catch (Exception e) {
115158
throw new DashMpdParsingException("Could not parse Dash mpd", e);
116159
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package org.schabi.newpipe.extractor.services.youtube;
2+
3+
4+
/*
5+
* Created by Christian Schabesberger on 30.12.15.
6+
*
7+
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
8+
* YoutubeVideoExtractorDefault.java is part of NewPipe.
9+
*
10+
* NewPipe is free software: you can redistribute it and/or modify
11+
* it under the terms of the GNU General Public License as published by
12+
* the Free Software Foundation, either version 3 of the License, or
13+
* (at your option) any later version.
14+
*
15+
* NewPipe is distributed in the hope that it will be useful,
16+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
* GNU General Public License for more details.
19+
*
20+
* You should have received a copy of the GNU General Public License
21+
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
22+
*/
23+
24+
import org.junit.BeforeClass;
25+
import org.junit.Test;
26+
import org.schabi.newpipe.Downloader;
27+
import org.schabi.newpipe.extractor.NewPipe;
28+
import org.schabi.newpipe.extractor.stream.StreamExtractor;
29+
import org.schabi.newpipe.extractor.stream.StreamInfo;
30+
31+
import static org.junit.Assert.assertEquals;
32+
import static org.junit.Assert.assertTrue;
33+
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
34+
35+
/**
36+
* Test for {@link StreamExtractor}
37+
*/
38+
public class YoutubeStreamExtractorDASHText {
39+
private static StreamInfo info;
40+
41+
@BeforeClass
42+
public static void setUp() throws Exception {
43+
NewPipe.init(Downloader.getInstance());
44+
info = StreamInfo.getInfo(YouTube, "https://www.youtube.com/watch?v=00Q4SUnVQK4");
45+
}
46+
47+
@Test
48+
public void testGetDashMpd() {
49+
System.out.println(info.getDashMpdUrl());
50+
assertTrue(info.getDashMpdUrl(),
51+
info.getDashMpdUrl() != null && !info.getDashMpdUrl().isEmpty());
52+
}
53+
54+
@Test
55+
public void testDashMpdParser() {
56+
assertEquals(0, info.getAudioStreams().size());
57+
assertEquals(0, info.getVideoOnlyStreams().size());
58+
assertEquals(4, info.getVideoStreams().size());
59+
}
60+
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.schabi.newpipe.extractor.exceptions.ParsingException;
99
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
1010
import org.schabi.newpipe.extractor.stream.*;
11+
import org.schabi.newpipe.extractor.utils.DashMpdParser;
1112
import org.schabi.newpipe.extractor.utils.Utils;
1213

1314
import java.io.IOException;
@@ -140,8 +141,9 @@ public void testStreamType() throws ParsingException {
140141

141142
@Test
142143
public void testGetDashMpd() throws ParsingException {
144+
// we dont expect this particular video to have a DASH file. For this purpouse we use a different test class.
143145
assertTrue(extractor.getDashMpdUrl(),
144-
extractor.getDashMpdUrl() != null || !extractor.getDashMpdUrl().isEmpty());
146+
extractor.getDashMpdUrl() != null && extractor.getDashMpdUrl().isEmpty());
145147
}
146148

147149
@Test

0 commit comments

Comments
 (0)