Skip to content

Commit 6818991

Browse files
committed
[duplicated subtitle] Support subtitle deduplication for manual SRT downloads
- After remote subtitles (TTML format) are downloaded, the subtitle content is processed by SubtitleDeduplicator to remove duplicated segments. The cleaned content is then passed to SrtFromTtmlWriter to generate the final SRT subtitle file without duplicated entries. - This logic is platform-independent and does not distinguish whether the video source is YouTube or another platform. - A minimal ByteArraySharpStream implementation is used to adapt the deduplicated byte content back into the SharpStream interface without modifying existing stream APIs. - Add comment explaining why `> 0` is used when reading SharpStream.
1 parent 52e2808 commit 6818991

1 file changed

Lines changed: 115 additions & 3 deletions

File tree

app/src/main/java/us/shandian/giga/postprocessing/TtmlConverter.java

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44

55
import org.schabi.newpipe.streams.SrtFromTtmlWriter;
66
import org.schabi.newpipe.streams.io.SharpStream;
7+
import org.schabi.newpipe.util.subtitle.SubtitleDeduplicator;
78

9+
import java.nio.charset.StandardCharsets;
810
import java.io.IOException;
11+
import java.io.ByteArrayInputStream;
12+
import java.io.ByteArrayOutputStream;
913

1014
/**
1115
* @author kapodamy
@@ -23,12 +27,17 @@ int process(SharpStream out, SharpStream... sources) throws IOException {
2327
// check if the subtitle is already in srt and copy, this should never happen
2428
String format = getArgumentAt(0, null);
2529
boolean ignoreEmptyFrames = getArgumentAt(1, "true").equals("true");
26-
2730
if (format == null || format.equals("ttml")) {
2831
SrtFromTtmlWriter writer = new SrtFromTtmlWriter(out, ignoreEmptyFrames);
29-
3032
try {
31-
writer.build(sources[0]);
33+
final String subtitleContent =
34+
readSharpStreamToString(sources[0]);
35+
final String deduplicated =
36+
SubtitleDeduplicator.deduplicateContent(subtitleContent);
37+
final SharpStream stream =
38+
new ByteArraySharpStream(
39+
deduplicated.getBytes(StandardCharsets.UTF_8));
40+
writer.build(stream);
3241
} catch (IOException err) {
3342
Log.e(TAG, "subtitle conversion failed due to I/O error", err);
3443
throw err;
@@ -50,4 +59,107 @@ int process(SharpStream out, SharpStream... sources) throws IOException {
5059
throw new UnsupportedOperationException("Can't convert this subtitle, unimplemented format: " + format);
5160
}
5261

62+
private static String readSharpStreamToString(final SharpStream stream) throws IOException {
63+
64+
final ByteArrayOutputStream out = new ByteArrayOutputStream();
65+
final byte[] buffer = new byte[8192];
66+
67+
int read;
68+
69+
// Note: `> 0` is required here because ChunkFileInputStream.read()
70+
// returns 0 at EOF instead of -1. Using `!= -1` would result in
71+
// an infinite loop in that case.
72+
//
73+
// Standard Java InputStream.read() returns -1 at EOF.
74+
//
75+
// Reference implementation:
76+
// - ChunkFileInputStream.java
77+
//
78+
// Future note:
79+
// - If ChunkFileInputStream changes to return -1 at EOF, this loop
80+
// can safely be switched back to `read != -1`. Keeping `> 0` is
81+
// also safe and will continue to work.
82+
while ((read = stream.read(buffer)) > 0) {
83+
out.write(buffer, 0, read);
84+
}
85+
86+
final String result = out.toString(StandardCharsets.UTF_8);
87+
88+
return result;
89+
}
90+
91+
/**
92+
* Minimal SharpStream backed by a byte array.
93+
*/
94+
private static final class ByteArraySharpStream extends SharpStream {
95+
private final ByteArrayInputStream in;
96+
97+
ByteArraySharpStream(byte[] data) {
98+
this.in = new ByteArrayInputStream(data);
99+
}
100+
101+
@Override
102+
public int read() {
103+
return in.read();
104+
}
105+
106+
@Override
107+
public int read(byte[] buffer) {
108+
return in.read(buffer, 0, buffer.length);
109+
}
110+
111+
@Override
112+
public int read(byte[] buffer, int offset, int count) {
113+
return in.read(buffer, offset, count);
114+
}
115+
116+
@Override
117+
public long skip(long amount) {
118+
return in.skip(amount);
119+
}
120+
121+
@Override
122+
public long available() {
123+
return in.available();
124+
}
125+
126+
@Override
127+
public void rewind() {
128+
in.reset();
129+
}
130+
131+
@Override
132+
public boolean isClosed() {
133+
return false;
134+
}
135+
136+
@Override
137+
public void close() {}
138+
139+
@Override
140+
public boolean canRewind() { return true; }
141+
142+
@Override
143+
public boolean canRead() { return true; }
144+
145+
@Override
146+
public boolean canWrite() { return false; }
147+
148+
@Override
149+
public void write(byte value) throws IOException {
150+
// This stream is read-only
151+
// and used only for reading subtitle data.
152+
throw new IOException("Stream is read-only");
153+
}
154+
155+
@Override
156+
public void write(byte[] buffer) throws IOException {
157+
throw new IOException("Stream is read-only");
158+
}
159+
160+
@Override
161+
public void write(byte[] buffer, int offset, int count) throws IOException {
162+
throw new IOException("Stream is read-only");
163+
}
164+
}
53165
}

0 commit comments

Comments
 (0)