| 1 | use crate::common::{is_whitespace_char, XmlVersion}; |
| 2 | use crate::reader::error::SyntaxError; |
| 3 | use crate::reader::events::XmlEvent; |
| 4 | use crate::reader::lexer::Token; |
| 5 | use crate::util::Encoding; |
| 6 | |
| 7 | use super::{ |
| 8 | DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State, |
| 9 | DEFAULT_VERSION, |
| 10 | }; |
| 11 | |
| 12 | impl PullParser { |
| 13 | #[inline (never)] |
| 14 | fn emit_start_document(&mut self) -> Option<Result> { |
| 15 | debug_assert!(self.encountered == Encountered::None); |
| 16 | self.encountered = Encountered::Declaration; |
| 17 | |
| 18 | let version = self.data.version; |
| 19 | let encoding = self.data.take_encoding(); |
| 20 | let standalone = self.data.standalone; |
| 21 | |
| 22 | if let Some(new_encoding) = encoding.as_deref() { |
| 23 | let new_encoding = match new_encoding.parse() { |
| 24 | Ok(e) => e, |
| 25 | Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1, |
| 26 | Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))), |
| 27 | }; |
| 28 | let current_encoding = self.lexer.encoding(); |
| 29 | if current_encoding != new_encoding { |
| 30 | let set = match (current_encoding, new_encoding) { |
| 31 | (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new, |
| 32 | (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding, |
| 33 | _ if self.config.ignore_invalid_encoding_declarations => current_encoding, |
| 34 | _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))), |
| 35 | }; |
| 36 | self.lexer.set_encoding(set); |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | let current_encoding = self.lexer.encoding(); |
| 41 | self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument { |
| 42 | version: version.unwrap_or(DEFAULT_VERSION), |
| 43 | encoding: encoding.unwrap_or_else(move || current_encoding.to_string()), |
| 44 | standalone |
| 45 | })) |
| 46 | } |
| 47 | |
| 48 | // TODO: remove redundancy via macros or extra methods |
| 49 | pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> { |
| 50 | |
| 51 | match s { |
| 52 | DeclarationSubstate::BeforeVersion => match t { |
| 53 | Token::Character('v' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)), |
| 54 | Token::Character(c) if is_whitespace_char(c) => None, // continue |
| 55 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 56 | }, |
| 57 | |
| 58 | DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
| 59 | match &*name.local_name { |
| 60 | "ersion" if name.namespace.is_none() => |
| 61 | this.into_state_continue(State::InsideDeclaration( |
| 62 | if token == Token::EqualsSign { |
| 63 | DeclarationSubstate::InsideVersionValue |
| 64 | } else { |
| 65 | DeclarationSubstate::AfterVersion |
| 66 | } |
| 67 | )), |
| 68 | _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))), |
| 69 | } |
| 70 | }), |
| 71 | |
| 72 | DeclarationSubstate::AfterVersion => match t { |
| 73 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)), |
| 74 | Token::Character(c) if is_whitespace_char(c) => None, |
| 75 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 76 | }, |
| 77 | |
| 78 | DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| { |
| 79 | this.data.version = match &*value { |
| 80 | "1.0" => Some(XmlVersion::Version10), |
| 81 | "1.1" => Some(XmlVersion::Version11), |
| 82 | _ => None |
| 83 | }; |
| 84 | if this.data.version.is_some() { |
| 85 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue)) |
| 86 | } else { |
| 87 | Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into()))) |
| 88 | } |
| 89 | }), |
| 90 | |
| 91 | DeclarationSubstate::AfterVersionValue => match t { |
| 92 | Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)), |
| 93 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
| 94 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 95 | }, |
| 96 | |
| 97 | DeclarationSubstate::BeforeEncoding => match t { |
| 98 | Token::Character('e' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)), |
| 99 | Token::Character('s' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), |
| 100 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
| 101 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
| 102 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 103 | }, |
| 104 | |
| 105 | DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
| 106 | match &*name.local_name { |
| 107 | "ncoding" if name.namespace.is_none() => |
| 108 | this.into_state_continue(State::InsideDeclaration( |
| 109 | if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding } |
| 110 | )), |
| 111 | _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))) |
| 112 | } |
| 113 | }), |
| 114 | |
| 115 | DeclarationSubstate::AfterEncoding => match t { |
| 116 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)), |
| 117 | Token::Character(c) if is_whitespace_char(c) => None, |
| 118 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 119 | }, |
| 120 | |
| 121 | DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| { |
| 122 | this.data.encoding = Some(value); |
| 123 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue)) |
| 124 | }), |
| 125 | |
| 126 | DeclarationSubstate::AfterEncodingValue => match t { |
| 127 | Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)), |
| 128 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
| 129 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 130 | }, |
| 131 | |
| 132 | DeclarationSubstate::BeforeStandaloneDecl => match t { |
| 133 | Token::Character('s' ) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), |
| 134 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
| 135 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
| 136 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 137 | }, |
| 138 | |
| 139 | DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { |
| 140 | match &*name.local_name { |
| 141 | "tandalone" if name.namespace.is_none() => |
| 142 | this.into_state_continue(State::InsideDeclaration( |
| 143 | if token == Token::EqualsSign { |
| 144 | DeclarationSubstate::InsideStandaloneDeclValue |
| 145 | } else { |
| 146 | DeclarationSubstate::AfterStandaloneDecl |
| 147 | } |
| 148 | )), |
| 149 | _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))), |
| 150 | } |
| 151 | }), |
| 152 | |
| 153 | DeclarationSubstate::AfterStandaloneDecl => match t { |
| 154 | Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)), |
| 155 | Token::Character(c) if is_whitespace_char(c) => None, |
| 156 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 157 | }, |
| 158 | |
| 159 | DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| { |
| 160 | let standalone = match &*value { |
| 161 | "yes" => Some(true), |
| 162 | "no" => Some(false), |
| 163 | _ => None |
| 164 | }; |
| 165 | if standalone.is_some() { |
| 166 | this.data.standalone = standalone; |
| 167 | this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue)) |
| 168 | } else { |
| 169 | Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into()))) |
| 170 | } |
| 171 | }), |
| 172 | |
| 173 | DeclarationSubstate::AfterStandaloneDeclValue => match t { |
| 174 | Token::ProcessingInstructionEnd => self.emit_start_document(), |
| 175 | Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace |
| 176 | _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| 177 | }, |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | |