Skip to content

Commit fc8eda8

Browse files
committed
perf: Optimize serialization of attributes and text nodes
1 parent 282cbda commit fc8eda8

5 files changed

Lines changed: 79 additions & 25 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Performance
6+
7+
- Optimize serialization of attributes and text nodes.
8+
59
## [0.11.2] - 2023-12-09
610

711
### Performance

bindings/python/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Performance
6+
7+
- Optimize serialization of attributes and text nodes.
8+
59
## [0.11.2] - 2023-12-09
610

711
### Performance

bindings/ruby/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Performance
6+
7+
- Optimize serialization of attributes and text nodes.
8+
59
## [0.11.2] - 2023-12-09
610

711
### Performance

bindings/wasm/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Performance
6+
7+
- Optimize serialization of attributes and text nodes.
8+
59
## [0.11.2] - 2023-12-09
610

711
### Performance

css-inline/src/html/serializer.rs

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -156,37 +156,51 @@ impl<'a, W: Write> HtmlSerializer<'a, W> {
156156
}
157157

158158
fn write_escaped(&mut self, text: &str) -> Result<(), InlineError> {
159-
// UTF-8 characters are maximum 4 bytes wide.
160-
let mut buffer = [0u8; 4];
161-
for c in text.chars() {
162-
match c {
163-
'&' => self.writer.write_all(b"&amp;"),
164-
'\u{00A0}' => self.writer.write_all(b"&nbsp;"),
165-
'<' => self.writer.write_all(b"&lt;"),
166-
'>' => self.writer.write_all(b"&gt;"),
167-
c => {
168-
let slice = c.encode_utf8(&mut buffer);
169-
self.writer.write_all(slice.as_bytes())
170-
}
171-
}?;
159+
let mut last_end = 0;
160+
for (start, part) in text.match_indices(['&', '\u{00A0}', '<', '>']) {
161+
self.writer.write_all(
162+
text.get(last_end..start)
163+
.expect("Invalid substring")
164+
.as_bytes(),
165+
)?;
166+
match part {
167+
"&" => self.writer.write_all(b"&amp;")?,
168+
"\u{00A0}" => self.writer.write_all(b"&nbsp;")?,
169+
"<" => self.writer.write_all(b"&lt;")?,
170+
">" => self.writer.write_all(b"&gt;")?,
171+
_ => unreachable!("Only the variants above are searched"),
172+
};
173+
last_end = start.checked_add(part.len()).expect("Size overflow");
172174
}
175+
self.writer.write_all(
176+
text.get(last_end..text.len())
177+
.expect("Invalid substring")
178+
.as_bytes(),
179+
)?;
173180
Ok(())
174181
}
175182

176183
fn write_attributes(&mut self, text: &str) -> Result<(), InlineError> {
177-
// UTF-8 characters are maximum 4 bytes wide.
178-
let mut buffer = [0u8; 4];
179-
for c in text.chars() {
180-
match c {
181-
'&' => self.writer.write_all(b"&amp;"),
182-
'\u{00A0}' => self.writer.write_all(b"&nbsp;"),
183-
'"' => self.writer.write_all(b"&quot;"),
184-
c => {
185-
let slice = c.encode_utf8(&mut buffer);
186-
self.writer.write_all(slice.as_bytes())
187-
}
188-
}?;
184+
let mut last_end = 0;
185+
for (start, part) in text.match_indices(['&', '\u{00A0}', '"']) {
186+
self.writer.write_all(
187+
text.get(last_end..start)
188+
.expect("Invalid substring")
189+
.as_bytes(),
190+
)?;
191+
match part {
192+
"&" => self.writer.write_all(b"&amp;")?,
193+
"\u{00A0}" => self.writer.write_all(b"&nbsp;")?,
194+
"\"" => self.writer.write_all(b"&quot;")?,
195+
_ => unreachable!("Only the variants above are searched"),
196+
};
197+
last_end = start.checked_add(part.len()).expect("Size overflow");
189198
}
199+
self.writer.write_all(
200+
text.get(last_end..text.len())
201+
.expect("Invalid substring")
202+
.as_bytes(),
203+
)?;
190204
Ok(())
191205
}
192206

@@ -549,4 +563,28 @@ mod tests {
549563
.expect("Should not fail");
550564
assert_eq!(buffer, b"<html><head></head><body></body></html>");
551565
}
566+
567+
#[test]
568+
fn test_escaped() {
569+
let doc = Document::parse_with_options(
570+
b"<!DOCTYPE html><html><head><title>& < > \xC2\xA0</title></head><body></body></html>",
571+
0,
572+
);
573+
let mut buffer = Vec::new();
574+
doc.serialize(&mut buffer, IndexMap::default(), false, false)
575+
.expect("Should not fail");
576+
assert_eq!(buffer, b"<!DOCTYPE html><html><head><title>&amp; &lt; &gt; &nbsp;</title></head><body></body></html>");
577+
}
578+
579+
#[test]
580+
fn test_attributes() {
581+
let doc = Document::parse_with_options(
582+
b"<!DOCTYPE html><html><head></head><body data-foo='& \xC2\xA0 \"'></body></html>",
583+
0,
584+
);
585+
let mut buffer = Vec::new();
586+
doc.serialize(&mut buffer, IndexMap::default(), false, false)
587+
.expect("Should not fail");
588+
assert_eq!(buffer, b"<!DOCTYPE html><html><head></head><body data-foo=\"&amp; &nbsp; &quot;\"></body></html>");
589+
}
552590
}

0 commit comments

Comments
 (0)