@@ -8,10 +8,15 @@ use super::{
88 InliningMode ,
99} ;
1010use crate :: { html:: DocumentStyleMap , InlineError } ;
11- use html5ever:: local_name;
11+ use html5ever:: { local_name, tendril:: StrTendril , LocalName } ;
12+ use rustc_hash:: FxHashMap ;
1213use selectors:: context:: SelectorCaches ;
1314use std:: { fmt, fmt:: Formatter , io:: Write , iter:: successors} ;
1415
16+ /// Minimum input size (bytes) to build element indexes.
17+ /// For smaller inputs, linear scan is faster than hash map operations.
18+ const INDEX_SIZE_THRESHOLD : usize = 1024 ;
19+
1520/// HTML document representation.
1621///
1722/// A `Document` holds a collection of nodes, with each node representing an HTML element.
@@ -46,6 +51,14 @@ pub(crate) struct Document {
4651 /// Ids of `link` nodes, specifically those with the `rel` attribute value set as `stylesheet`.
4752 /// They represent the locations (URLs) of all linked stylesheet resources in the document.
4853 linked_stylesheets : Vec < NodeId > ,
54+ /// Whether element indexes are enabled for this document.
55+ use_indexes : bool ,
56+ /// Index: ID attribute value -> `NodeId` (IDs should be unique per document).
57+ by_id : FxHashMap < LocalName , NodeId > ,
58+ /// Index: class name -> list of `NodeId`s with that class.
59+ by_class : FxHashMap < LocalName , Vec < NodeId > > ,
60+ /// Index: tag name -> list of `NodeId`s with that tag.
61+ by_tag : FxHashMap < LocalName , Vec < NodeId > > ,
4962}
5063
5164impl fmt:: Debug for Document {
@@ -68,16 +81,22 @@ impl Document {
6881 parser:: parse_with_options ( bytes, preallocate_node_capacity, mode)
6982 }
7083
71- pub ( super ) fn with_capacity ( capacity : usize ) -> Self {
84+ pub ( super ) fn with_capacity ( capacity : usize , input_size : usize ) -> Self {
7285 // Dummy node at index 0 so that other indices fit in NonZero
7386 let mut nodes = vec ! [ Node :: new( NodeData :: Document ) , Node :: new( NodeData :: Document ) ] ;
7487 // Usually there are a lot of nodes, hence, reserve some space for them
7588 nodes. reserve ( capacity) ;
89+ // Only build indexes for larger documents where the lookup benefit outweighs overhead
90+ let use_indexes = input_size >= INDEX_SIZE_THRESHOLD ;
7691 Document {
7792 nodes,
7893 elements : Vec :: with_capacity ( capacity) ,
7994 styles : Vec :: new ( ) ,
8095 linked_stylesheets : Vec :: new ( ) ,
96+ use_indexes,
97+ by_id : FxHashMap :: default ( ) ,
98+ by_class : FxHashMap :: default ( ) ,
99+ by_tag : FxHashMap :: default ( ) ,
81100 }
82101 }
83102
@@ -130,6 +149,58 @@ impl Document {
130149 self . elements . push ( node) ;
131150 }
132151
152+ /// Whether element indexes are enabled for this document.
153+ #[ inline]
154+ pub ( crate ) fn has_indexes ( & self ) -> bool {
155+ self . use_indexes
156+ }
157+
158+ /// Index an element by its tag name.
159+ #[ inline]
160+ pub ( super ) fn index_by_tag ( & mut self , tag : LocalName , node_id : NodeId ) {
161+ if self . use_indexes {
162+ self . by_tag . entry ( tag) . or_default ( ) . push ( node_id) ;
163+ }
164+ }
165+
166+ /// Index an element by its ID attribute value.
167+ #[ inline]
168+ pub ( super ) fn index_by_id ( & mut self , id : & StrTendril , node_id : NodeId ) {
169+ if self . use_indexes && !id. is_empty ( ) {
170+ let id_atom = LocalName :: from ( & * * id) ;
171+ // IDs should be unique; later declarations override earlier ones
172+ self . by_id . insert ( id_atom, node_id) ;
173+ }
174+ }
175+
176+ /// Index an element by its class names (space-separated).
177+ pub ( super ) fn index_by_classes ( & mut self , classes : & StrTendril , node_id : NodeId ) {
178+ if self . use_indexes {
179+ for class in classes. split_whitespace ( ) {
180+ let class_atom = LocalName :: from ( class) ;
181+ self . by_class . entry ( class_atom) . or_default ( ) . push ( node_id) ;
182+ }
183+ }
184+ }
185+
186+ /// Lookup an element by its ID attribute value.
187+ #[ inline]
188+ pub ( crate ) fn get_by_id ( & self , id : & LocalName ) -> Option < NodeId > {
189+ self . by_id . get ( id) . copied ( )
190+ }
191+
192+ /// Lookup elements by class name.
193+ #[ inline]
194+ pub ( crate ) fn get_by_class ( & self , class : & LocalName ) -> & [ NodeId ] {
195+ self . by_class . get ( class) . map_or ( & [ ] , Vec :: as_slice)
196+ }
197+
198+ /// Lookup elements by tag name.
199+ #[ inline]
200+ pub ( crate ) fn get_by_tag ( & self , tag : & LocalName ) -> & [ NodeId ] {
201+ self . by_tag . get ( tag) . map_or ( & [ ] , Vec :: as_slice)
202+ }
203+
133204 /// Detach a node from its siblings and its parent.
134205 ///
135206 /// Before:
@@ -402,7 +473,7 @@ mod tests {
402473
403474 #[ test]
404475 fn test_insert_before ( ) {
405- let mut doc = Document :: with_capacity ( 0 ) ;
476+ let mut doc = Document :: with_capacity ( 0 , 0 ) ;
406477
407478 let node1_id = doc. push_node ( new_element ( ) ) ;
408479 let node2_id = doc. push_node ( new_element ( ) ) ;
@@ -420,7 +491,7 @@ mod tests {
420491
421492 #[ test]
422493 fn test_append ( ) {
423- let mut doc = Document :: with_capacity ( 0 ) ;
494+ let mut doc = Document :: with_capacity ( 0 , 0 ) ;
424495
425496 let node1_id = doc. push_node ( new_element ( ) ) ;
426497 let node2_id = doc. push_node ( new_element ( ) ) ;
0 commit comments