1 | use crate::reader::error::SyntaxError; |
2 | use std::char; |
3 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; |
4 | use crate::reader::lexer::Token; |
5 | use super::{PullParser, Result, State}; |
6 | |
7 | impl PullParser { |
8 | pub fn inside_reference(&mut self, t: Token) -> Option<Result> { |
9 | match t { |
10 | Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) || |
11 | self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#' ) => { |
12 | self.data.ref_data.push(c); |
13 | None |
14 | } |
15 | |
16 | Token::ReferenceEnd => { |
17 | let name = self.data.take_ref_data(); |
18 | if name.is_empty() { |
19 | return Some(self.error(SyntaxError::EmptyEntity)); |
20 | } |
21 | |
22 | let c = match &*name { |
23 | "lt" => Some('<' ), |
24 | "gt" => Some('>' ), |
25 | "amp" => Some('&' ), |
26 | "apos" => Some(' \'' ), |
27 | "quot" => Some('"' ), |
28 | _ if name.starts_with('#' ) => match self.numeric_reference_from_str(&name[1..]) { |
29 | Ok(c) => Some(c), |
30 | Err(e) => return Some(self.error(e)) |
31 | }, |
32 | _ => None, |
33 | }; |
34 | if let Some(c) = c { |
35 | self.buf.push(c); |
36 | } else if let Some(v) = self.config.c.extra_entities.get(&name) { |
37 | self.buf.push_str(v); |
38 | } else if let Some(v) = self.entities.get(&name) { |
39 | if self.state_after_reference == State::OutsideTag { |
40 | // an entity can expand to *elements*, so outside of a tag it needs a full reparse |
41 | if let Err(e) = self.lexer.reparse(v) { |
42 | return Some(Err(e)); |
43 | } |
44 | } else { |
45 | // however, inside attributes it's not allowed to affect attribute quoting, |
46 | // so it can't be fed to the lexer |
47 | self.buf.push_str(v); |
48 | } |
49 | } else { |
50 | return Some(self.error(SyntaxError::UnexpectedEntity(name.into()))); |
51 | } |
52 | let prev_st = self.state_after_reference; |
53 | if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or(' \0' )) { |
54 | self.inside_whitespace = false; |
55 | } |
56 | self.into_state_continue(prev_st) |
57 | } |
58 | |
59 | _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))), |
60 | } |
61 | } |
62 | |
63 | pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> { |
64 | let val = if let Some(hex) = num_str.strip_prefix('x' ) { |
65 | u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? |
66 | } else { |
67 | u32::from_str_radix(num_str, 10).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))? |
68 | }; |
69 | match char::from_u32(val) { |
70 | Some(c) if self.is_valid_xml_char(c) => Ok(c), |
71 | Some(_) if self.config.c.replace_unknown_entity_references => Ok(' \u{fffd}' ), |
72 | None if self.config.c.replace_unknown_entity_references => { |
73 | Ok(' \u{fffd}' ) |
74 | }, |
75 | _ => Err(SyntaxError::InvalidCharacterEntity(val)), |
76 | } |
77 | } |
78 | } |
79 | |