| 1 | use super::{PullParser, Result, State}; |
| 2 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; |
| 3 | use crate::reader::error::SyntaxError; |
| 4 | use crate::reader::lexer::Token; |
| 5 | use std::char; |
| 6 | |
| 7 | impl PullParser { |
| 8 | pub fn inside_reference(&mut self, t: Token) -> Option<Result> { |
| 9 | match t { |
| 10 | Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) || |
| 11 | self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#' ) => { |
| 12 | self.data.ref_data.push(c); |
| 13 | None |
| 14 | }, |
| 15 | |
| 16 | Token::ReferenceEnd => { |
| 17 | let name = self.data.take_ref_data(); |
| 18 | if name.is_empty() { |
| 19 | return Some(self.error(SyntaxError::EmptyEntity)); |
| 20 | } |
| 21 | |
| 22 | let c = match &*name { |
| 23 | "lt" => Some('<' ), |
| 24 | "gt" => Some('>' ), |
| 25 | "amp" => Some('&' ), |
| 26 | "apos" => Some(' \'' ), |
| 27 | "quot" => Some('"' ), |
| 28 | _ if name.starts_with('#' ) => match self.numeric_reference_from_str(&name[1..]) { |
| 29 | Ok(c) => Some(c), |
| 30 | Err(e) => return Some(self.error(e)), |
| 31 | }, |
| 32 | _ => None, |
| 33 | }; |
| 34 | if let Some(c) = c { |
| 35 | self.buf.push(c); |
| 36 | } else if let Some(v) = self.config.c.extra_entities.get(&name) { |
| 37 | self.buf.push_str(v); |
| 38 | } else if let Some(v) = self.entities.get(&name) { |
| 39 | if self.state_after_reference == State::OutsideTag { |
| 40 | // an entity can expand to *elements*, so outside of a tag it needs a full reparse |
| 41 | if let Err(e) = self.lexer.reparse(v) { |
| 42 | return Some(Err(e)); |
| 43 | } |
| 44 | } else { |
| 45 | // however, inside attributes it's not allowed to affect attribute quoting, |
| 46 | // so it can't be fed to the lexer |
| 47 | self.buf.push_str(v); |
| 48 | } |
| 49 | } else { |
| 50 | return Some(self.error(SyntaxError::UnexpectedEntity(name.into()))); |
| 51 | } |
| 52 | let prev_st = self.state_after_reference; |
| 53 | if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or(' \0' )) { |
| 54 | self.inside_whitespace = false; |
| 55 | } |
| 56 | self.into_state_continue(prev_st) |
| 57 | }, |
| 58 | |
| 59 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> { |
| 64 | let val = if let Some(hex) = num_str.strip_prefix('x' ) { |
| 65 | u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? |
| 66 | } else { |
| 67 | num_str.parse::<u32>().map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? |
| 68 | }; |
| 69 | match char::from_u32(val) { |
| 70 | Some(c) if self.is_valid_xml_char(c) => Ok(c), |
| 71 | Some(_) if self.config.c.replace_unknown_entity_references => Ok(' \u{fffd}' ), |
| 72 | None if self.config.c.replace_unknown_entity_references => Ok(' \u{fffd}' ), |
| 73 | _ => Err(SyntaxError::InvalidCharacterEntity(val)), |
| 74 | } |
| 75 | } |
| 76 | } |
| 77 | |