1 | use crate::common::{is_whitespace_char, XmlVersion}; |
2 | use crate::reader::error::SyntaxError; |
3 | use crate::reader::events::XmlEvent; |
4 | use crate::reader::lexer::Token; |
5 | use crate::util::Encoding; |
6 | |
7 | use super::{ |
8 | DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State, |
9 | DEFAULT_VERSION, |
10 | }; |
11 | |
12 | impl PullParser { |
13 | #[inline (never)] |
14 | fn emit_start_document(&mut self) -> Option<Result> { |
15 | debug_assert!(self.encountered == Encountered::None); |
16 | self.encountered = Encountered::Declaration; |
17 | |
18 | let version = self.data.version; |
19 | let encoding = self.data.take_encoding(); |
20 | let standalone = self.data.standalone; |
21 | |
22 | if let Some(new_encoding) = encoding.as_deref() { |
23 | let new_encoding = match new_encoding.parse() { |
24 | Ok(e) => e, |
25 | Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1, |
26 | Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))), |
27 | }; |
28 | let current_encoding = self.lexer.encoding(); |
29 | if current_encoding != new_encoding { |
30 | let set = match (current_encoding, new_encoding) { |
31 | (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new, |
32 | (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding, |
33 | _ if self.config.ignore_invalid_encoding_declarations => current_encoding, |
34 | _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))), |
35 | }; |
36 | self.lexer.set_encoding(set); |
37 | } |
38 | } |
39 | |
40 | let current_encoding = self.lexer.encoding(); |
41 | self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument { |
42 | version: version.unwrap_or(DEFAULT_VERSION), |
43 | encoding: encoding.unwrap_or_else(move || current_encoding.to_string()), |
44 | standalone |
45 | })) |
46 | } |
47 | |
48 | // TODO: remove redundancy via macros or extra methods |
49 | pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> { |
50 | |
51 | match s { |
52 | DeclarationSubstate::BeforeVersion => match t { |
53 | Token::Character('v' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)), |
54 | Token::Character(c) if is_whitespace_char(c) => None, // continue |
55 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
56 | }, |
57 | |
58 | DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
59 | match &*name.local_name { |
60 | "ersion" if name.namespace.is_none() => |
61 | this.into_state_continue(State::InsideDeclaration( |
62 | if token == Token::EqualsSign { |
63 | DeclarationSubstate::InsideVersionValue |
64 | } else { |
65 | DeclarationSubstate::AfterVersion |
66 | } |
67 | )), |
68 | _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))), |
69 | } |
70 | }), |
71 | |
72 | DeclarationSubstate::AfterVersion => match t { |
73 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)), |
74 | Token::Character(c) if is_whitespace_char(c) => None, |
75 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
76 | }, |
77 | |
78 | DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| { |
79 | this.data.version = match &*value { |
80 | "1.0" => Some(XmlVersion::Version10), |
81 | "1.1" => Some(XmlVersion::Version11), |
82 | _ => None |
83 | }; |
84 | if this.data.version.is_some() { |
85 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue)) |
86 | } else { |
87 | Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into()))) |
88 | } |
89 | }), |
90 | |
91 | DeclarationSubstate::AfterVersionValue => match t { |
92 | Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)), |
93 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
94 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
95 | }, |
96 | |
97 | DeclarationSubstate::BeforeEncoding => match t { |
98 | Token::Character('e' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)), |
99 | Token::Character('s' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), |
100 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
101 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
102 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
103 | }, |
104 | |
105 | DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
106 | match &*name.local_name { |
107 | "ncoding" if name.namespace.is_none() => |
108 | this.into_state_continue(State::InsideDeclaration( |
109 | if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding } |
110 | )), |
111 | _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))) |
112 | } |
113 | }), |
114 | |
115 | DeclarationSubstate::AfterEncoding => match t { |
116 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)), |
117 | Token::Character(c) if is_whitespace_char(c) => None, |
118 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
119 | }, |
120 | |
121 | DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| { |
122 | this.data.encoding = Some(value); |
123 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue)) |
124 | }), |
125 | |
126 | DeclarationSubstate::AfterEncodingValue => match t { |
127 | Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)), |
128 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
129 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
130 | }, |
131 | |
132 | DeclarationSubstate::BeforeStandaloneDecl => match t { |
133 | Token::Character('s' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), |
134 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
135 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
136 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
137 | }, |
138 | |
139 | DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
140 | match &*name.local_name { |
141 | "tandalone" if name.namespace.is_none() => |
142 | this.into_state_continue(State::InsideDeclaration( |
143 | if token == Token::EqualsSign { |
144 | DeclarationSubstate::InsideStandaloneDeclValue |
145 | } else { |
146 | DeclarationSubstate::AfterStandaloneDecl |
147 | } |
148 | )), |
149 | _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))), |
150 | } |
151 | }), |
152 | |
153 | DeclarationSubstate::AfterStandaloneDecl => match t { |
154 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)), |
155 | Token::Character(c) if is_whitespace_char(c) => None, |
156 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
157 | }, |
158 | |
159 | DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| { |
160 | let standalone = match &*value { |
161 | "yes" => Some(true), |
162 | "no" => Some(false), |
163 | _ => None |
164 | }; |
165 | if standalone.is_some() { |
166 | this.data.standalone = standalone; |
167 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue)) |
168 | } else { |
169 | Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into()))) |
170 | } |
171 | }), |
172 | |
173 | DeclarationSubstate::AfterStandaloneDeclValue => match t { |
174 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
175 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
176 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
177 | }, |
178 | } |
179 | } |
180 | } |
181 | |