forked from TeamNewPipe/NewPipe
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSrtFromTtmlWriter.java
More file actions
108 lines (90 loc) · 3.46 KB
/
SrtFromTtmlWriter.java
File metadata and controls
108 lines (90 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package org.schabi.newpipe.streams;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;
import org.schabi.newpipe.streams.io.SharpStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
/**
* @author kapodamy
*/
public class SrtFromTtmlWriter {
private static final String NEW_LINE = "\r\n";
private final SharpStream out;
private final boolean ignoreEmptyFrames;
private final Charset charset = StandardCharsets.UTF_8;
// According to the SubRip (.srt) specification, subtitle
// numbering must start from 1.
// Some players accept 0 or even negative indices,
// but to ensure compliance we start at 1.
private int frameIndex = 1;
public SrtFromTtmlWriter(final SharpStream out, final boolean ignoreEmptyFrames) {
this.out = out;
this.ignoreEmptyFrames = ignoreEmptyFrames;
}
private static String getTimestamp(final Element frame, final String attr) {
return frame
.attr(attr)
.replace('.', ','); // SRT subtitles uses comma as decimal separator
}
private void writeFrame(final String begin, final String end, final StringBuilder text)
throws IOException {
writeString(String.valueOf(frameIndex));
frameIndex += 1;
writeString(NEW_LINE);
writeString(begin);
writeString(" --> ");
writeString(end);
writeString(NEW_LINE);
writeString(text.toString());
writeString(NEW_LINE);
writeString(NEW_LINE);
}
private void writeString(final String text) throws IOException {
out.write(text.getBytes(charset));
}
public void build(final SharpStream ttml) throws IOException {
/*
* TTML parser with BASIC support
* multiple CUE is not supported
* styling is not supported
* tag timestamps (in auto-generated subtitles) are not supported, maybe in the future
* also TimestampTagOption enum is not applicable
* Language parsing is not supported
*/
// parse XML
final byte[] buffer = new byte[(int) ttml.available()];
ttml.read(buffer);
final Document doc = Jsoup.parse(new ByteArrayInputStream(buffer), "UTF-8", "",
Parser.xmlParser());
final StringBuilder text = new StringBuilder(128);
final Elements paragraphList = doc.select("body > div > p");
// check if has frames
if (paragraphList.size() < 1) {
return;
}
for (final Element paragraph : paragraphList) {
text.setLength(0);
for (final Node children : paragraph.childNodes()) {
if (children instanceof TextNode) {
text.append(((TextNode) children).text());
} else if (children instanceof Element
&& ((Element) children).tagName().equalsIgnoreCase("br")) {
text.append(NEW_LINE);
}
}
if (ignoreEmptyFrames && text.length() < 1) {
continue;
}
final String begin = getTimestamp(paragraph, "begin");
final String end = getTimestamp(paragraph, "end");
writeFrame(begin, end, text);
}
}
}