Skip to content

Commit cfc72d8

Browse files
authored
Merge pull request #188 from jimbo1qaz/fix-description-timestamps
Fix timestamp links in Youtube video descriptions
2 parents 430da57 + e38d906 commit cfc72d8

1 file changed

Lines changed: 44 additions & 2 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.net.MalformedURLException;
3131
import java.net.URL;
3232
import java.util.*;
33+
import java.util.regex.Matcher;
34+
import java.util.regex.Pattern;
3335

3436
/*
3537
* Created by Christian Schabesberger on 06.08.15.
@@ -162,14 +164,54 @@ public String getDescription() throws ParsingException {
162164
}
163165
}
164166

167+
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
168+
// :00 is NOT recognized as a timestamp in description or comments.
169+
// 0:00 is recognized in both description and comments.
170+
// https://www.youtube.com/watch?v=4cccfDXu1vA
171+
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
172+
"seekTo\\("
173+
+ "(?:(\\d+)\\*3600\\+)?" // hours?
174+
+ "(\\d+)\\*60\\+" // minutes
175+
+ "(\\d+)" // seconds
176+
+ "\\)");
177+
178+
@SafeVarargs
179+
private static <T> T coalesce(T... args) {
180+
for (T arg : args) {
181+
if (arg != null) return arg;
182+
}
183+
throw new IllegalArgumentException("all arguments to coalesce() were null");
184+
}
185+
165186
private String parseHtmlAndGetFullLinks(String descriptionHtml)
166187
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
167188
final Document description = Jsoup.parse(descriptionHtml, getUrl());
168189
for(Element a : description.select("a")) {
169190
final String rawUrl = a.attr("abs:href");
170191
final URL redirectLink = new URL(rawUrl);
171-
final String queryString = redirectLink.getQuery();
172-
if(queryString != null) {
192+
193+
final Matcher onClickTimestamp;
194+
final String queryString;
195+
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
196+
.find()) {
197+
a.removeAttr("onclick");
198+
199+
String hours = coalesce(onClickTimestamp.group(1), "0");
200+
String minutes = onClickTimestamp.group(2);
201+
String seconds = onClickTimestamp.group(3);
202+
203+
int timestamp = 0;
204+
timestamp += Integer.parseInt(hours) * 3600;
205+
timestamp += Integer.parseInt(minutes) * 60;
206+
timestamp += Integer.parseInt(seconds);
207+
208+
String setTimestamp = "&t=" + timestamp;
209+
210+
// Even after clicking https://youtu.be/...?t=6,
211+
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
212+
a.attr("href", getUrl() + setTimestamp);
213+
214+
} else if((queryString = redirectLink.getQuery()) != null) {
173215
// if the query string is null we are not dealing with a redirect link,
174216
// so we don't need to override it.
175217
final String link =

0 commit comments

Comments
 (0)