77// option. This file may not be copied, modified, or distributed
88// except according to those terms.
99
10- use super :: codegen:: { resolve_unique_hash_value, Node , DAFSA_NODES } ;
10+ mod codegen;
11+
12+ use codegen:: { resolve_unique_hash_value, Node , DAFSA_NODES } ;
1113use super :: { CharRef , Status } ;
12- use crate :: tokenizer:: TokenSink ;
13- use crate :: tokenizer:: Tokenizer ;
14- use markup5ever:: buffer_queue:: BufferQueue ;
15- use markup5ever:: tendril:: StrTendril ;
14+ use crate :: buffer_queue:: BufferQueue ;
15+ use crate :: tendril:: StrTendril ;
1616
1717use std:: borrow:: Cow ;
1818use std:: mem;
1919
20+ type EmitErrorFn = Fn ( & str ) ;
21+
2022#[ derive( Clone , Debug ) ]
2123pub ( crate ) struct Match {
2224 hash_value : usize ,
2325 matched_text : StrTendril ,
2426}
2527
28+ impl CharRef {
29+ pub const EMPTY : CharRef = CharRef {
30+ chars : [ '\0' , '\0' ] ,
31+ num_chars : 0 ,
32+ } ;
33+ }
34+
2635#[ derive( Clone , Debug ) ]
2736pub ( crate ) struct NamedReferenceTokenizerState {
2837 current_node : & ' static Node ,
@@ -62,15 +71,15 @@ impl NamedReferenceTokenizerState {
6271 }
6372 }
6473
65- fn feed_character < Sink : TokenSink > (
74+ fn feed_character (
6675 & mut self ,
6776 c : char ,
68- tokenizer : & Tokenizer < Sink > ,
77+ error_callback : EmitErrorFn ,
6978 input : & BufferQueue ,
7079 ) -> NamedReferenceTokenizationResult {
7180 self . name_buffer . push_char ( c) ;
7281 if !c. is_ascii_alphanumeric ( ) && c != ';' {
73- return self . did_find_invalid_character ( tokenizer , input) ;
82+ return self . did_find_invalid_character ( error_callback , input) ;
7483 }
7584
7685 let code_point = c as u32 as u8 ;
@@ -85,7 +94,7 @@ impl NamedReferenceTokenizerState {
8594 }
8695
8796 let Some ( next_node) = next_node else {
88- return self . did_find_invalid_character ( tokenizer , input) ;
97+ return self . did_find_invalid_character ( error_callback , input) ;
8998 } ;
9099
91100 self . current_node = next_node;
@@ -101,24 +110,24 @@ impl NamedReferenceTokenizerState {
101110 NamedReferenceTokenizationResult :: Continue
102111 }
103112
104- fn did_find_invalid_character < Sink : TokenSink > (
113+ fn did_find_invalid_character (
105114 & mut self ,
106- tokenizer : & Tokenizer < Sink > ,
115+ error_callback : EmitErrorFn ,
107116 input : & BufferQueue ,
108117 ) -> NamedReferenceTokenizationResult {
109118 if let Some ( last_match) = self . last_match . take ( ) {
110119 input. push_front ( self . name_buffer . clone ( ) ) ;
111120 return NamedReferenceTokenizationResult :: Success {
112- reference : self . finish_matching_reference ( last_match, tokenizer , input) ,
121+ reference : self . finish_matching_reference ( last_match, error_callback , input) ,
113122 } ;
114123 }
115124
116125 NamedReferenceTokenizationResult :: Failed
117126 }
118127
119- pub ( crate ) fn step < Sink : TokenSink > (
128+ pub ( crate ) fn step (
120129 & mut self ,
121- tokenizer : & Tokenizer < Sink > ,
130+ error_callback : EmitErrorFn ,
122131 input : & BufferQueue ,
123132 ) -> Result < Status , StrTendril > {
124133 loop {
@@ -139,18 +148,18 @@ impl NamedReferenceTokenizerState {
139148 }
140149 }
141150
142- pub ( crate ) fn notify_end_of_file < Sink : TokenSink > (
151+ pub ( crate ) fn notify_end_of_file (
143152 & mut self ,
144- tokenizer : & Tokenizer < Sink > ,
153+ error_callback : EmitErrorFn ,
145154 input : & BufferQueue ,
146155 ) -> Option < CharRef > {
147156 input. push_front ( self . name_buffer . clone ( ) ) ;
148157 if let Some ( last_match) = self . last_match . take ( ) {
149- Some ( self . finish_matching_reference ( last_match, tokenizer , input) )
158+ Some ( self . finish_matching_reference ( last_match, error_callback , input) )
150159 } else {
151160 if self . name_buffer . ends_with ( ';' ) {
152161 println ! ( "end of file and last is semicolon" ) ;
153- emit_name_error ( mem:: take ( & mut self . name_buffer ) , tokenizer ) ;
162+ emit_name_error ( mem:: take ( & mut self . name_buffer ) , error_callback ) ;
154163 }
155164 None
156165 }
@@ -159,10 +168,10 @@ impl NamedReferenceTokenizerState {
159168 /// Called whenever the tokenizer has finished matching a named reference.
160169 ///
161170 /// This method takes care of emitting appropriate errors and implement some legacy quirks.
162- pub ( crate ) fn finish_matching_reference < Sink : TokenSink > (
171+ pub ( crate ) fn finish_matching_reference (
163172 & self ,
164173 matched : Match ,
165- tokenizer : & Tokenizer < Sink > ,
174+ error_callback : EmitErrorFn ,
166175 input : & BufferQueue ,
167176 ) -> CharRef {
168177 let char_ref = resolve_unique_hash_value ( matched. hash_value ) ;
@@ -190,19 +199,17 @@ impl NamedReferenceTokenizerState {
190199 // (;), then this is a missing-semicolon-after-character-reference parse
191200 // error.
192201 if last_matched_codepoint != ';' {
193- tokenizer. emit_error ( Cow :: Borrowed (
194- "Character reference does not end with semicolon" ,
195- ) ) ;
202+ error_callback ( "Character reference does not end with semicolon" ) ;
196203 }
197204 char_ref
198205 }
199206}
200207
201- pub ( crate ) fn emit_name_error < Sink : TokenSink > ( name : StrTendril , tokenizer : & Tokenizer < Sink > ) {
202- let msg = if tokenizer. opts . exact_errors {
203- Cow :: from ( format ! ( "Invalid character reference &{}" , name) )
204- } else {
205- Cow :: from ( "Invalid character reference" )
206- } ;
207- tokenizer. emit_error ( msg) ;
208- }
208+ // pub(crate) fn emit_name_error(name: StrTendril, tokenizer: &Tokenizer<Sink>) {
209+ // let msg = if tokenizer.opts.exact_errors {
210+ // Cow::from(format!("Invalid character reference &{}", name))
211+ // } else {
212+ // Cow::from("Invalid character reference")
213+ // };
214+ // tokenizer.emit_error(msg);
215+ // }
0 commit comments