Skip to content

Commit ec00c79

Browse files
committed
[duplicated subtitle] Add unit tests for SubtitleDeduplicator in SubtitleDeduplicatorTest.java.
1 parent 05e7b86 commit ec00c79

1 file changed

Lines changed: 98 additions & 0 deletions

File tree

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package org.schabi.newpipe.extractor.utils;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import static org.junit.jupiter.api.Assertions.assertEquals;
6+
import static org.junit.jupiter.api.Assertions.assertTrue;
7+
import static org.junit.jupiter.api.Assertions.assertFalse;
8+
9+
public class SubtitleDeduplicatorTest {
10+
11+
@Test
12+
public void deduplicate_exactDuplicateEntries_shouldRemoveDuplicate() {
13+
String input =
14+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>\n" +
15+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>";
16+
17+
String output = SubtitleDeduplicator.deduplicateContent(input);
18+
19+
String expected =
20+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>";
21+
22+
// The `strip()` method is used here to remove the trailing
23+
// newline character (\n, outside of <p> tags) at the end of the `output`.
24+
// Removing this (\n) does not affect the TTML subtitle paragraphs,
25+
// as only the content within <p> tags is considered valid for subtitles.
26+
assertEquals(expected, output.strip());
27+
}
28+
29+
@Test
30+
public void deduplicate_sameTimeDifferentText_shouldNotDeduplicate() {
31+
String input =
32+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>\n" +
33+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">World</p>";
34+
35+
String output = SubtitleDeduplicator.deduplicateContent(input);
36+
37+
String expected = input;
38+
39+
assertEquals(expected, output);
40+
}
41+
42+
@Test
43+
public void deduplicate_sameTextDifferentTime_shouldNotDeduplicate() {
44+
String input =
45+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>\n" +
46+
"<p begin=\"00:00:02.000\" end=\"00:00:03.000\">Hello</p>";
47+
48+
String output = SubtitleDeduplicator.deduplicateContent(input);
49+
50+
String expected = input;
51+
52+
assertEquals(expected, output);
53+
}
54+
55+
@Test
56+
public void containsDuplicatedEntries_exactDuplicate_shouldReturnTrue() {
57+
String input =
58+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>\n" +
59+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>";
60+
61+
assertTrue(SubtitleDeduplicator.containsDuplicatedEntries(input));
62+
}
63+
64+
@Test
65+
public void containsDuplicatedEntries_noDuplicate_shouldReturnFalse() {
66+
String input =
67+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello</p>\n" +
68+
"<p begin=\"00:00:02.000\" end=\"00:00:03.000\">World</p>";
69+
70+
assertFalse(SubtitleDeduplicator.containsDuplicatedEntries(input));
71+
}
72+
73+
@Test
74+
public void containsDuplicatedEntries_normalizeLeadingAndTrailingWhitespace_shouldConsiderAsSame() {
75+
// Note:
76+
// This test verifies that the deduplication logic normalizes
77+
// leading and trailing whitespace, and considers the content
78+
// as the same after this normalization, without modifying
79+
// the original subtitle content.
80+
String input =
81+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\"> Hello world </p>\n" +
82+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello world</p>";
83+
assertTrue(SubtitleDeduplicator.containsDuplicatedEntries(input));
84+
}
85+
86+
@Test
87+
public void containsDuplicatedEntries_normalizeMultipleSpaces_shouldConsiderAsSingleSpace() {
88+
// Note:
89+
// This test verifies that the deduplication logic normalizes
90+
// multiple consecutive spaces into a single space,
91+
// considering the content as the same after this normalization,
92+
// without modifying the original subtitle content.
93+
String input =
94+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello world</p>\n" +
95+
"<p begin=\"00:00:01.000\" end=\"00:00:02.000\">Hello world</p>";
96+
assertTrue(SubtitleDeduplicator.containsDuplicatedEntries(input));
97+
}
98+
}

0 commit comments

Comments
 (0)