Skip to content

Commit 02274d5

Browse files
committed
[YouTube] Avoid XSS attacks in description or comments
1 parent 3f7b265 commit 02274d5

2 files changed

Lines changed: 20 additions & 15 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelper.java

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ private YoutubeDescriptionHelper() {
3535
// special link chips (e.g. for YT videos, YT channels or social media accounts):
3636
// (u00a0) u00a0 u00a0 [/•] u00a0 <link content> u00a0 u00a0
3737
private static final Pattern LINK_CONTENT_CLEANER_REGEX
38-
= Pattern.compile("(?s)^\u00a0+[/•]\u00a0+(.*?)\u00a0+$");
38+
= Pattern.compile("(?s)^ +[/•] +(.*?) +$");
3939

4040
/**
4141
* Can be a command run, or a style run.
@@ -74,10 +74,13 @@ public boolean sameOpen(@Nonnull final Run other) {
7474

7575
/**
7676
* Parse a video description in the new "attributed" format, which contains the entire visible
77-
* plaintext ({@code content}) and an array of {@code commandRuns}.
77+
* plaintext ({@code content}) and an array of {@code commandRuns} and {@code styleRuns}.
78+
* Returns the formatted content in HTML format, and escapes the text to make sure there are no
79+
* XSS attacks.
7880
*
7981
* <p>
80-
* The {@code commandRuns} include the links and their position in the text.
82+
* {@code commandRuns} include the links and their range in the text, while {@code styleRuns}
83+
* include the styling to apply to various ranges in the text.
8184
* </p>
8285
*
8386
* @param attributedDescription the JSON object of the attributed description
@@ -119,25 +122,28 @@ public static String attributedDescriptionToHtml(
119122
* Applies the formatting specified by the intervals stored in {@code openers} and {@code
120123
* closers} to {@code content} in order to obtain valid HTML even when intervals overlap. For
121124
* example &lt;b&gt;b&lt;i&gt;b&i&lt;/b&gt;i&lt;/i&gt; would not be valid HTML, so this function
122-
* instead generates &lt;b&gt;b&lt;i&gt;b&i&lt;/i&gt;&lt;/b&gt;&lt;i&gt;i&lt;/i&gt;.
125+
* instead generates &lt;b&gt;b&lt;i&gt;b&i&lt;/i&gt;&lt;/b&gt;&lt;i&gt;i&lt;/i&gt;. Any HTML
126+
* special characters in {@code rawContent} are escaped to make sure there are no XSS attacks.
127+
*
123128
* <p>
124129
* Every opener in {@code openers} must have a corresponding closer in {@code closers}. Every
125130
* corresponding (opener, closer) pair must have a length of at least one (i.e. empty intervals
126131
* are not allowed).
127132
* </p>
128133
*
129-
* @param openers contains all of the places where a run begins, must have the same size of
130-
* closers, must be ordered by {@link Run#pos}
131-
* @param closers contains all of the places where a run ends, must have the same size of
132-
* openers, must be ordered by {@link Run#pos}
133-
* @param content the content to apply formatting to
134+
* @param openers contains all of the places where a run begins, must have the same size of
135+
* closers, must be ordered by {@link Run#pos}
136+
* @param closers contains all of the places where a run ends, must have the same size of
137+
* openers, must be ordered by {@link Run#pos}
138+
* @param rawContent the content to apply formatting to, and to escape to avoid XSS
134139
* @return the formatted content in HTML
135140
*/
136141
static String runsToHtml(
137142
@Nonnull final List<Run> openers,
138143
@Nonnull final List<Run> closers,
139-
@Nonnull final String content
144+
@Nonnull final String rawContent
140145
) {
146+
final String content = rawContent.replace('\u00a0', ' ');
141147
final Stack<Run> openRuns = new Stack<>();
142148
final Stack<Run> tempStack = new Stack<>();
143149
final StringBuilder textBuilder = new StringBuilder();
@@ -154,7 +160,7 @@ static String runsToHtml(
154160
: closers.get(closersIndex).pos;
155161

156162
// append piece of text until current index
157-
textBuilder.append(content, currentTextPos, minPos);
163+
textBuilder.append(Entities.escape(content.substring(currentTextPos, minPos)));
158164
currentTextPos = minPos;
159165

160166
if (closers.get(closersIndex).pos == minPos) {
@@ -205,12 +211,11 @@ static String runsToHtml(
205211
}
206212

207213
// append last piece of text
208-
textBuilder.append(content, currentTextPos, content.length());
214+
textBuilder.append(Entities.escape(content.substring(currentTextPos)));
209215

210216
return textBuilder.toString()
211217
.replace("\n", "<br>")
212-
.replace(" ", " &nbsp;")
213-
.replace('\u00a0', ' ');
218+
.replace(" ", " &nbsp;");
214219
}
215220

216221
private static void addAllCommandRuns(

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelperTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ private static void assertRunsToHtml(final String expectedHtml,
3434
@Test
3535
public void testNoRuns() {
3636
assertRunsToHtml(
37-
"abc *a* _c_ <br> <br> <a href=\"#\">test</a> &nbsp;&amp;",
37+
"abc *a* _c_ &lt;br&gt; <br> &lt;a href=\"#\"&gt;test&lt;/a&gt; &nbsp;&amp;amp;",
3838
List.of(),
3939
List.of(),
4040
"abc *a* _c_ <br>\u00a0\n\u00a0<a href=\"#\">test</a> &amp;"

0 commit comments

Comments
 (0)