1 | use crate::reader::error::SyntaxError; |
2 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; |
3 | use crate::reader::lexer::Token; |
4 | |
5 | use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State}; |
6 | |
7 | impl PullParser { |
8 | pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> { |
9 | match substate { |
10 | DoctypeSubstate::Outside => match t { |
11 | Token::TagEnd => self.into_state_continue(State::OutsideTag), |
12 | Token::MarkupDeclarationStart => { |
13 | self.buf.clear(); |
14 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName)) |
15 | }, |
16 | Token::Character('%' ) => { |
17 | self.data.ref_data.clear(); |
18 | self.data.ref_data.push('%' ); |
19 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd)) |
20 | }, |
21 | Token::CommentStart => { |
22 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment)) |
23 | }, |
24 | Token::SingleQuote | Token::DoubleQuote => { |
25 | // just discard string literals |
26 | self.data.quote = Some(super::QuoteToken::from_token(&t)); |
27 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String)) |
28 | }, |
29 | Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))), |
30 | // TODO: parse SYSTEM, and [ |
31 | _ => None, |
32 | }, |
33 | DoctypeSubstate::String => match t { |
34 | Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None, |
35 | Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None, |
36 | Token::SingleQuote | Token::DoubleQuote => { |
37 | self.data.quote = None; |
38 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) |
39 | }, |
40 | _ => None, |
41 | }, |
42 | DoctypeSubstate::Comment => match t { |
43 | Token::CommentEnd => { |
44 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) |
45 | }, |
46 | _ => None, |
47 | }, |
48 | DoctypeSubstate::InsideName => match t { |
49 | Token::Character(c @ 'A' ..='Z' ) => { |
50 | self.buf.push(c); |
51 | None |
52 | }, |
53 | Token::Character(c) if is_whitespace_char(c) => { |
54 | let buf = self.take_buf(); |
55 | match buf.as_str() { |
56 | "ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)), |
57 | "NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)), |
58 | _ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))), |
59 | } |
60 | }, |
61 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
62 | }, |
63 | DoctypeSubstate::BeforeEntityName => { |
64 | self.data.name.clear(); |
65 | match t { |
66 | Token::Character(c) if is_whitespace_char(c) => None, |
67 | Token::Character('%' ) => { // % is for PEDecl |
68 | self.data.name.push('%' ); |
69 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart)) |
70 | }, |
71 | Token::Character(c) if is_name_start_char(c) => { |
72 | if self.data.name.len() > self.config.max_name_length { |
73 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
74 | } |
75 | self.data.name.push(c); |
76 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName)) |
77 | }, |
78 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
79 | } |
80 | }, |
81 | DoctypeSubstate::EntityName => match t { |
82 | Token::Character(c) if is_whitespace_char(c) => { |
83 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue)) |
84 | }, |
85 | Token::Character(c) if is_name_char(c) => { |
86 | if self.data.name.len() > self.config.max_name_length { |
87 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
88 | } |
89 | self.data.name.push(c); |
90 | None |
91 | }, |
92 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
93 | }, |
94 | DoctypeSubstate::BeforeEntityValue => { |
95 | self.buf.clear(); |
96 | match t { |
97 | Token::Character(c) if is_whitespace_char(c) => None, |
98 | // SYSTEM/PUBLIC not supported |
99 | Token::Character('S' | 'P' ) => { |
100 | let name = self.data.take_name(); |
101 | self.entities.entry(name).or_insert_with(String::new); // Dummy value, but at least the name is recognized |
102 | |
103 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) |
104 | }, |
105 | Token::SingleQuote | Token::DoubleQuote => { |
106 | self.data.quote = Some(super::QuoteToken::from_token(&t)); |
107 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) |
108 | }, |
109 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
110 | } |
111 | }, |
112 | DoctypeSubstate::EntityValue => match t { |
113 | Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push(' \'' ); None }, |
114 | Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"' ); None }, |
115 | Token::SingleQuote | Token::DoubleQuote => { |
116 | self.data.quote = None; |
117 | let name = self.data.take_name(); |
118 | let val = self.take_buf(); |
119 | self.entities.entry(name).or_insert(val); // First wins |
120 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) // FIXME |
121 | }, |
122 | Token::ReferenceStart | Token::Character('&' ) => { |
123 | self.data.ref_data.clear(); |
124 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart)) |
125 | }, |
126 | Token::Character('%' ) => { |
127 | self.data.ref_data.clear(); |
128 | self.data.ref_data.push('%' ); // include literal % in the name to distinguish from regular entities |
129 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue)) |
130 | }, |
131 | Token::Character(c) if !self.is_valid_xml_char(c) => { |
132 | Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
133 | }, |
134 | Token::Character(c) => { |
135 | self.buf.push(c); |
136 | None |
137 | }, |
138 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
139 | }, |
140 | DoctypeSubstate::PEReferenceDefinitionStart => match t { |
141 | Token::Character(c) if is_whitespace_char(c) => { |
142 | None |
143 | }, |
144 | Token::Character(c) if is_name_start_char(c) => { |
145 | debug_assert_eq!(self.data.name, "%" ); |
146 | self.data.name.push(c); |
147 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition)) |
148 | }, |
149 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
150 | }, |
151 | DoctypeSubstate::PEReferenceDefinition => match t { |
152 | Token::Character(c) if is_name_char(c) => { |
153 | if self.data.name.len() > self.config.max_name_length { |
154 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
155 | } |
156 | self.data.name.push(c); |
157 | None |
158 | }, |
159 | Token::Character(c) if is_whitespace_char(c) => { |
160 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue)) |
161 | }, |
162 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
163 | }, |
164 | DoctypeSubstate::PEReferenceInDtd => match t { |
165 | Token::Character(c) if is_name_char(c) => { |
166 | self.data.ref_data.push(c); |
167 | None |
168 | }, |
169 | Token::ReferenceEnd | Token::Character(';' ) => { |
170 | let name = self.data.take_ref_data(); |
171 | match self.entities.get(&name) { |
172 | Some(ent) => { |
173 | if let Err(e) = self.lexer.reparse(ent) { |
174 | return Some(Err(e)); |
175 | } |
176 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) |
177 | }, |
178 | None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))), |
179 | } |
180 | }, |
181 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
182 | }, |
183 | DoctypeSubstate::PEReferenceInValue => match t { |
184 | Token::Character(c) if is_name_char(c) => { |
185 | self.data.ref_data.push(c); |
186 | None |
187 | }, |
188 | Token::ReferenceEnd | Token::Character(';' ) => { |
189 | let name = self.data.take_ref_data(); |
190 | match self.entities.get(&name) { |
191 | Some(ent) => { |
192 | self.buf.push_str(ent); |
193 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) |
194 | }, |
195 | None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))), |
196 | } |
197 | }, |
198 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
199 | }, |
200 | DoctypeSubstate::NumericReferenceStart => match t { |
201 | Token::Character('#' ) => { |
202 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference)) |
203 | }, |
204 | Token::Character(c) if !self.is_valid_xml_char(c) => { |
205 | Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
206 | }, |
207 | Token::Character(c) => { |
208 | self.buf.push('&' ); |
209 | self.buf.push(c); |
210 | // named entities are not expanded inside doctype |
211 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) |
212 | }, |
213 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
214 | }, |
215 | DoctypeSubstate::NumericReference => match t { |
216 | Token::ReferenceEnd | Token::Character(';' ) => { |
217 | let r = self.data.take_ref_data(); |
218 | // https://www.w3.org/TR/xml/#sec-entexpand |
219 | match self.numeric_reference_from_str(&r) { |
220 | Ok(c) => { |
221 | self.buf.push(c); |
222 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue)) |
223 | } |
224 | Err(e) => Some(self.error(e)), |
225 | } |
226 | }, |
227 | Token::Character(c) if !self.is_valid_xml_char(c) => { |
228 | Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
229 | }, |
230 | Token::Character(c) => { |
231 | self.data.ref_data.push(c); |
232 | None |
233 | }, |
234 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
235 | }, |
236 | DoctypeSubstate::SkipDeclaration => match t { |
237 | Token::TagEnd => { |
238 | self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside)) |
239 | }, |
240 | _ => None, |
241 | }, |
242 | } |
243 | } |
244 | } |
245 | |