Skip to content

Commit 996b042

Browse files
committed
perf: Use element indexes for faster selector lookups in some cases
Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
1 parent 713416a commit 996b042

File tree

12 files changed

+468
-17
lines changed

12 files changed

+468
-17
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
- CLI tests compatibility with future Cargo versions. [#602](https://github.com/Stranger6667/css-inline/issues/602)
1414

15+
### Performance
16+
17+
- Use element indexes for faster selector lookups on large documents.
18+
1519
## [0.18.0] - 2025-11-01
1620

1721
### Added

bindings/c/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Update `html5ever` to `0.36`.
99
- Update `selectors` to `0.33`.
1010

11+
### Performance
12+
13+
- Use element indexes for faster selector lookups on large documents.
14+
1115
## [0.18.0] - 2025-11-01
1216

1317
### Added

bindings/java/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Update `html5ever` to `0.36`.
99
- Update `selectors` to `0.33`.
1010

11+
### Performance
12+
13+
- Use element indexes for faster selector lookups on large documents.
14+
1115
## [0.18.0] - 2025-11-01
1216

1317
### Added

bindings/javascript/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Update `html5ever` to `0.36`.
99
- Update `selectors` to `0.33`.
1010

11+
### Performance
12+
13+
- Use element indexes for faster selector lookups on large documents.
14+
1115
## [0.18.0] - 2025-11-01
1216

1317
### Added

bindings/python/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Update `html5ever` to `0.36`.
99
- Update `selectors` to `0.33`.
1010

11+
### Performance
12+
13+
- Use element indexes for faster selector lookups on large documents.
14+
1115
## [0.18.0] - 2025-11-01
1216

1317
### Added

bindings/ruby/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Update `html5ever` to `0.36`.
99
- Update `selectors` to `0.33`.
1010

11+
### Performance
12+
13+
- Use element indexes for faster selector lookups on large documents.
14+
1115
## [0.18.0] - 2025-11-01
1216

1317
### Added

css-inline/src/html/document.rs

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,15 @@ use super::{
88
InliningMode,
99
};
1010
use crate::{html::DocumentStyleMap, InlineError};
11-
use html5ever::local_name;
11+
use html5ever::{local_name, tendril::StrTendril, LocalName};
12+
use rustc_hash::FxHashMap;
1213
use selectors::context::SelectorCaches;
1314
use std::{fmt, fmt::Formatter, io::Write, iter::successors};
1415

16+
/// Minimum input size (bytes) to build element indexes.
17+
/// For smaller inputs, linear scan is faster than hash map operations.
18+
const INDEX_SIZE_THRESHOLD: usize = 1024;
19+
1520
/// HTML document representation.
1621
///
1722
/// A `Document` holds a collection of nodes, with each node representing an HTML element.
@@ -46,6 +51,14 @@ pub(crate) struct Document {
4651
/// Ids of `link` nodes, specifically those with the `rel` attribute value set as `stylesheet`.
4752
/// They represent the locations (URLs) of all linked stylesheet resources in the document.
4853
linked_stylesheets: Vec<NodeId>,
54+
/// Whether element indexes are enabled for this document.
55+
use_indexes: bool,
56+
/// Index: ID attribute value -> `NodeId` (IDs should be unique per document).
57+
by_id: FxHashMap<LocalName, NodeId>,
58+
/// Index: class name -> list of `NodeId`s with that class.
59+
by_class: FxHashMap<LocalName, Vec<NodeId>>,
60+
/// Index: tag name -> list of `NodeId`s with that tag.
61+
by_tag: FxHashMap<LocalName, Vec<NodeId>>,
4962
}
5063

5164
impl fmt::Debug for Document {
@@ -68,16 +81,22 @@ impl Document {
6881
parser::parse_with_options(bytes, preallocate_node_capacity, mode)
6982
}
7083

71-
pub(super) fn with_capacity(capacity: usize) -> Self {
84+
pub(super) fn with_capacity(capacity: usize, input_size: usize) -> Self {
7285
// Dummy node at index 0 so that other indices fit in NonZero
7386
let mut nodes = vec![Node::new(NodeData::Document), Node::new(NodeData::Document)];
7487
// Usually there are a lot of nodes, hence, reserve some space for them
7588
nodes.reserve(capacity);
89+
// Only build indexes for larger documents where the lookup benefit outweighs overhead
90+
let use_indexes = input_size >= INDEX_SIZE_THRESHOLD;
7691
Document {
7792
nodes,
7893
elements: Vec::with_capacity(capacity),
7994
styles: Vec::new(),
8095
linked_stylesheets: Vec::new(),
96+
use_indexes,
97+
by_id: FxHashMap::default(),
98+
by_class: FxHashMap::default(),
99+
by_tag: FxHashMap::default(),
81100
}
82101
}
83102

@@ -130,6 +149,58 @@ impl Document {
130149
self.elements.push(node);
131150
}
132151

152+
/// Whether element indexes are enabled for this document.
153+
#[inline]
154+
pub(crate) fn has_indexes(&self) -> bool {
155+
self.use_indexes
156+
}
157+
158+
/// Index an element by its tag name.
159+
#[inline]
160+
pub(super) fn index_by_tag(&mut self, tag: LocalName, node_id: NodeId) {
161+
if self.use_indexes {
162+
self.by_tag.entry(tag).or_default().push(node_id);
163+
}
164+
}
165+
166+
/// Index an element by its ID attribute value.
167+
#[inline]
168+
pub(super) fn index_by_id(&mut self, id: &StrTendril, node_id: NodeId) {
169+
if self.use_indexes && !id.is_empty() {
170+
let id_atom = LocalName::from(&**id);
171+
// IDs should be unique; later declarations override earlier ones
172+
self.by_id.insert(id_atom, node_id);
173+
}
174+
}
175+
176+
/// Index an element by its class names (space-separated).
177+
pub(super) fn index_by_classes(&mut self, classes: &StrTendril, node_id: NodeId) {
178+
if self.use_indexes {
179+
for class in classes.split_whitespace() {
180+
let class_atom = LocalName::from(class);
181+
self.by_class.entry(class_atom).or_default().push(node_id);
182+
}
183+
}
184+
}
185+
186+
/// Lookup an element by its ID attribute value.
187+
#[inline]
188+
pub(crate) fn get_by_id(&self, id: &LocalName) -> Option<NodeId> {
189+
self.by_id.get(id).copied()
190+
}
191+
192+
/// Lookup elements by class name.
193+
#[inline]
194+
pub(crate) fn get_by_class(&self, class: &LocalName) -> &[NodeId] {
195+
self.by_class.get(class).map_or(&[], Vec::as_slice)
196+
}
197+
198+
/// Lookup elements by tag name.
199+
#[inline]
200+
pub(crate) fn get_by_tag(&self, tag: &LocalName) -> &[NodeId] {
201+
self.by_tag.get(tag).map_or(&[], Vec::as_slice)
202+
}
203+
133204
/// Detach a node from its siblings and its parent.
134205
///
135206
/// Before:
@@ -402,7 +473,7 @@ mod tests {
402473

403474
#[test]
404475
fn test_insert_before() {
405-
let mut doc = Document::with_capacity(0);
476+
let mut doc = Document::with_capacity(0, 0);
406477

407478
let node1_id = doc.push_node(new_element());
408479
let node2_id = doc.push_node(new_element());
@@ -420,7 +491,7 @@ mod tests {
420491

421492
#[test]
422493
fn test_append() {
423-
let mut doc = Document::with_capacity(0);
494+
let mut doc = Document::with_capacity(0, 0);
424495

425496
let node1_id = doc.push_node(new_element());
426497
let node2_id = doc.push_node(new_element());

css-inline/src/html/iter.rs

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,67 @@ use super::{
22
document::Document,
33
element::Element,
44
node::{NodeData, NodeId},
5-
selectors::{ParseError, Selectors},
5+
selectors::{ParseError, SelectorAnchor, Selectors},
66
Specificity,
77
};
88
use selectors::context::SelectorCaches;
99

10+
/// Source of elements to iterate over.
11+
/// Allows using different indexes for faster lookup.
12+
enum ElementSource<'a> {
13+
/// Iterate over a slice of elements (all elements or indexed subset).
14+
Slice(std::slice::Iter<'a, NodeId>),
15+
/// Single element from ID lookup.
16+
Single(Option<NodeId>),
17+
}
18+
19+
impl ElementSource<'_> {
20+
#[inline]
21+
fn next(&mut self) -> Option<NodeId> {
22+
match self {
23+
ElementSource::Slice(iter) => iter.next().copied(),
24+
ElementSource::Single(opt) => opt.take(),
25+
}
26+
}
27+
}
28+
1029
/// Compile selectors from a string and create an element iterator that yields elements matching these selectors.
1130
#[inline]
1231
pub(crate) fn select<'a, 'b, 'c>(
1332
document: &'a Document,
1433
selectors: &'b str,
1534
caches: &'c mut SelectorCaches,
1635
) -> Result<Select<'a, 'c>, ParseError<'b>> {
17-
Selectors::compile(selectors).map(|selectors| Select {
18-
document,
19-
caches,
20-
iter: document.elements.iter(),
21-
selectors,
36+
Selectors::compile(selectors).map(|selectors| {
37+
// Only use indexes if they were built during parsing
38+
let source = if document.has_indexes() {
39+
match selectors.anchor() {
40+
SelectorAnchor::Id(id) => ElementSource::Single(document.get_by_id(id.as_inner())),
41+
SelectorAnchor::Class(class) => {
42+
ElementSource::Slice(document.get_by_class(class.as_inner()).iter())
43+
}
44+
SelectorAnchor::Tag(tag) => {
45+
ElementSource::Slice(document.get_by_tag(tag.as_inner()).iter())
46+
}
47+
SelectorAnchor::None => ElementSource::Slice(document.elements.iter()),
48+
}
49+
} else {
50+
ElementSource::Slice(document.elements.iter())
51+
};
52+
Select {
53+
document,
54+
caches,
55+
source,
56+
selectors,
57+
}
2258
})
2359
}
2460

2561
/// An element iterator adaptor that yields elements matching given selectors.
2662
pub(crate) struct Select<'a, 'c> {
2763
document: &'a Document,
2864
caches: &'c mut SelectorCaches,
29-
iter: std::slice::Iter<'a, NodeId>,
65+
source: ElementSource<'a>,
3066
/// The selectors to be matched.
3167
selectors: Selectors,
3268
}
@@ -44,11 +80,11 @@ impl<'a> Iterator for Select<'a, '_> {
4480

4581
#[inline]
4682
fn next(&mut self) -> Option<Element<'a>> {
47-
for element_id in self.iter.by_ref() {
48-
let NodeData::Element { element, .. } = &self.document[*element_id].data else {
83+
while let Some(element_id) = self.source.next() {
84+
let NodeData::Element { element, .. } = &self.document[element_id].data else {
4985
unreachable!("Element ids always point to element nodes")
5086
};
51-
let element = Element::new(self.document, *element_id, element);
87+
let element = Element::new(self.document, element_id, element);
5288
for selector in self.selectors.iter() {
5389
if element.matches(selector, self.caches) {
5490
return Some(element);

css-inline/src/html/parser.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ pub(crate) fn parse_with_options(
2929
mode: InliningMode,
3030
) -> Document {
3131
let sink = Sink {
32-
document: RefCell::new(Document::with_capacity(preallocate_node_capacity)),
32+
document: RefCell::new(Document::with_capacity(
33+
preallocate_node_capacity,
34+
bytes.len(),
35+
)),
3336
};
3437
let options = html5ever::ParseOpts::default();
3538
match mode {
@@ -80,11 +83,32 @@ impl Sink {
8083
attributes: Vec<Attribute>,
8184
inlining_ignored: bool,
8285
) -> NodeId {
86+
// Extract ID and class values before moving attributes
87+
let mut id_value = None;
88+
let mut class_value = None;
89+
for attr in &attributes {
90+
if attr.name.local == local_name!("id") {
91+
id_value = Some(attr.value.clone());
92+
} else if attr.name.local == local_name!("class") {
93+
class_value = Some(attr.value.clone());
94+
}
95+
}
96+
let tag_name = name.local.clone();
8397
let node_id = self.push_node(NodeData::Element {
8498
element: ElementData::new(name, attributes),
8599
inlining_ignored,
86100
});
87-
self.document.borrow_mut().push_element_id(node_id);
101+
let mut document = self.document.borrow_mut();
102+
document.push_element_id(node_id);
103+
// Index by tag name
104+
document.index_by_tag(tag_name, node_id);
105+
// Index by ID and class attributes
106+
if let Some(id) = &id_value {
107+
document.index_by_id(id, node_id);
108+
}
109+
if let Some(class) = &class_value {
110+
document.index_by_classes(class, node_id);
111+
}
88112
node_id
89113
}
90114

css-inline/src/html/selectors/local_name.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ impl LocalName {
1212
pub(crate) fn into_inner(self) -> html5ever::LocalName {
1313
self.0
1414
}
15+
/// Returns a reference to the inner `html5ever::LocalName`.
16+
#[inline]
17+
pub(crate) fn as_inner(&self) -> &html5ever::LocalName {
18+
&self.0
19+
}
1520
pub(crate) fn as_bytes(&self) -> &[u8] {
1621
self.0.as_bytes()
1722
}

0 commit comments

Comments
 (0)