1use crate::reader::error::SyntaxError;
2use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
3use crate::reader::lexer::Token;
4
5use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State};
6
7impl PullParser {
8 pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> {
9 match substate {
10 DoctypeSubstate::Outside => match t {
11 Token::TagEnd => self.into_state_continue(State::OutsideTag),
12 Token::MarkupDeclarationStart => {
13 self.buf.clear();
14 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName))
15 },
16 Token::Character('%') => {
17 self.data.ref_data.clear();
18 self.data.ref_data.push('%');
19 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd))
20 },
21 Token::CommentStart => {
22 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment))
23 },
24 Token::SingleQuote | Token::DoubleQuote => {
25 // just discard string literals
26 self.data.quote = Some(super::QuoteToken::from_token(&t));
27 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String))
28 },
29 Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))),
30 // TODO: parse SYSTEM, and [
31 _ => None,
32 },
33 DoctypeSubstate::String => match t {
34 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None,
35 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None,
36 Token::SingleQuote | Token::DoubleQuote => {
37 self.data.quote = None;
38 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
39 },
40 _ => None,
41 },
42 DoctypeSubstate::Comment => match t {
43 Token::CommentEnd => {
44 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
45 },
46 _ => None,
47 },
48 DoctypeSubstate::InsideName => match t {
49 Token::Character(c @ 'A'..='Z') => {
50 self.buf.push(c);
51 None
52 },
53 Token::Character(c) if is_whitespace_char(c) => {
54 let buf = self.take_buf();
55 match buf.as_str() {
56 "ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)),
57 "NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)),
58 _ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))),
59 }
60 },
61 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
62 },
63 DoctypeSubstate::BeforeEntityName => {
64 self.data.name.clear();
65 match t {
66 Token::Character(c) if is_whitespace_char(c) => None,
67 Token::Character('%') => { // % is for PEDecl
68 self.data.name.push('%');
69 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart))
70 },
71 Token::Character(c) if is_name_start_char(c) => {
72 if self.data.name.len() > self.config.max_name_length {
73 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
74 }
75 self.data.name.push(c);
76 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName))
77 },
78 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
79 }
80 },
81 DoctypeSubstate::EntityName => match t {
82 Token::Character(c) if is_whitespace_char(c) => {
83 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
84 },
85 Token::Character(c) if is_name_char(c) => {
86 if self.data.name.len() > self.config.max_name_length {
87 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
88 }
89 self.data.name.push(c);
90 None
91 },
92 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
93 },
94 DoctypeSubstate::BeforeEntityValue => {
95 self.buf.clear();
96 match t {
97 Token::Character(c) if is_whitespace_char(c) => None,
98 // SYSTEM/PUBLIC not supported
99 Token::Character('S' | 'P') => {
100 let name = self.data.take_name();
101 self.entities.entry(name).or_insert_with(String::new); // Dummy value, but at least the name is recognized
102
103 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration))
104 },
105 Token::SingleQuote | Token::DoubleQuote => {
106 self.data.quote = Some(super::QuoteToken::from_token(&t));
107 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
108 },
109 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
110 }
111 },
112 DoctypeSubstate::EntityValue => match t {
113 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push('\''); None },
114 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"'); None },
115 Token::SingleQuote | Token::DoubleQuote => {
116 self.data.quote = None;
117 let name = self.data.take_name();
118 let val = self.take_buf();
119 self.entities.entry(name).or_insert(val); // First wins
120 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) // FIXME
121 },
122 Token::ReferenceStart | Token::Character('&') => {
123 self.data.ref_data.clear();
124 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart))
125 },
126 Token::Character('%') => {
127 self.data.ref_data.clear();
128 self.data.ref_data.push('%'); // include literal % in the name to distinguish from regular entities
129 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue))
130 },
131 Token::Character(c) if !self.is_valid_xml_char(c) => {
132 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
133 },
134 Token::Character(c) => {
135 self.buf.push(c);
136 None
137 },
138 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
139 },
140 DoctypeSubstate::PEReferenceDefinitionStart => match t {
141 Token::Character(c) if is_whitespace_char(c) => {
142 None
143 },
144 Token::Character(c) if is_name_start_char(c) => {
145 debug_assert_eq!(self.data.name, "%");
146 self.data.name.push(c);
147 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition))
148 },
149 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
150 },
151 DoctypeSubstate::PEReferenceDefinition => match t {
152 Token::Character(c) if is_name_char(c) => {
153 if self.data.name.len() > self.config.max_name_length {
154 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
155 }
156 self.data.name.push(c);
157 None
158 },
159 Token::Character(c) if is_whitespace_char(c) => {
160 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
161 },
162 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
163 },
164 DoctypeSubstate::PEReferenceInDtd => match t {
165 Token::Character(c) if is_name_char(c) => {
166 self.data.ref_data.push(c);
167 None
168 },
169 Token::ReferenceEnd | Token::Character(';') => {
170 let name = self.data.take_ref_data();
171 match self.entities.get(&name) {
172 Some(ent) => {
173 if let Err(e) = self.lexer.reparse(ent) {
174 return Some(Err(e));
175 }
176 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
177 },
178 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
179 }
180 },
181 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
182 },
183 DoctypeSubstate::PEReferenceInValue => match t {
184 Token::Character(c) if is_name_char(c) => {
185 self.data.ref_data.push(c);
186 None
187 },
188 Token::ReferenceEnd | Token::Character(';') => {
189 let name = self.data.take_ref_data();
190 match self.entities.get(&name) {
191 Some(ent) => {
192 self.buf.push_str(ent);
193 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
194 },
195 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
196 }
197 },
198 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
199 },
200 DoctypeSubstate::NumericReferenceStart => match t {
201 Token::Character('#') => {
202 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference))
203 },
204 Token::Character(c) if !self.is_valid_xml_char(c) => {
205 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
206 },
207 Token::Character(c) => {
208 self.buf.push('&');
209 self.buf.push(c);
210 // named entities are not expanded inside doctype
211 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
212 },
213 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
214 },
215 DoctypeSubstate::NumericReference => match t {
216 Token::ReferenceEnd | Token::Character(';') => {
217 let r = self.data.take_ref_data();
218 // https://www.w3.org/TR/xml/#sec-entexpand
219 match self.numeric_reference_from_str(&r) {
220 Ok(c) => {
221 self.buf.push(c);
222 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
223 }
224 Err(e) => Some(self.error(e)),
225 }
226 },
227 Token::Character(c) if !self.is_valid_xml_char(c) => {
228 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
229 },
230 Token::Character(c) => {
231 self.data.ref_data.push(c);
232 None
233 },
234 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
235 },
236 DoctypeSubstate::SkipDeclaration => match t {
237 Token::TagEnd => {
238 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
239 },
240 _ => None,
241 },
242 }
243 }
244}
245