Skip to content

Commit 2c35db7

Browse files
committed
[Bug] Fix missing subtitle text in manually downloaded *.SRT files. (issue #10030)
- Previously, *.SRT files only contained timestamps and sequence numbers, without the actual text content. - Added recursive text extraction to handle nested tags in TTML files.(e.g.: <span> tags)
1 parent 9bc8139 commit 2c35db7

1 file changed

Lines changed: 26 additions & 8 deletions

File tree

app/src/main/java/org/schabi/newpipe/streams/SrtFromTtmlWriter.java

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,30 @@ private void writeString(final String text) throws IOException {
5454
out.write(text.getBytes(charset));
5555
}
5656

57+
/*
58+
* Recursive method to extract text from all nodes
59+
* - This method processes TextNode and <br> tags, recursively
60+
* extracting text from nested tags.
61+
* For example: extract text from nested <span> tags
62+
* - Appends newlines for <br> tags.
63+
*/
64+
private void extractText(final Node node, final StringBuilder text) {
65+
if (node instanceof TextNode) {
66+
text.append(((TextNode) node).text());
67+
} else if (node instanceof Element) {
68+
final Element element = (Element) node;
69+
// <br> is a self-closing HTML tag used to insert a line break.
70+
if (element.tagName().equalsIgnoreCase("br")) {
71+
// Add a newline for <br> tags
72+
text.append(NEW_LINE);
73+
}
74+
}
75+
// Recursively process child nodes
76+
for (final Node child : node.childNodes()) {
77+
extractText(child, text);
78+
}
79+
}
80+
5781
public void build(final SharpStream ttml) throws IOException {
5882
/*
5983
* TTML parser with BASIC support
@@ -81,14 +105,8 @@ public void build(final SharpStream ttml) throws IOException {
81105
for (final Element paragraph : paragraphList) {
82106
text.setLength(0);
83107

84-
for (final Node children : paragraph.childNodes()) {
85-
if (children instanceof TextNode) {
86-
text.append(((TextNode) children).text());
87-
} else if (children instanceof Element
88-
&& ((Element) children).tagName().equalsIgnoreCase("br")) {
89-
text.append(NEW_LINE);
90-
}
91-
}
108+
// Recursively extract text from all child nodes
109+
extractText(paragraph, text);
92110

93111
if (ignoreEmptyFrames && text.length() < 1) {
94112
continue;

0 commit comments

Comments
 (0)