1 | use crate::reader::error::SyntaxError; |
2 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; |
3 | |
4 | use crate::reader::events::XmlEvent; |
5 | use crate::reader::lexer::Token; |
6 | |
7 | use super::{DeclarationSubstate, ProcessingInstructionSubstate, PullParser, Result, State, Encountered}; |
8 | |
9 | impl PullParser { |
10 | pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> { |
11 | match s { |
12 | ProcessingInstructionSubstate::PIInsideName => match t { |
13 | Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) || |
14 | self.buf_has_data() && is_name_char(c) => { |
15 | if self.buf.len() > self.config.max_name_length { |
16 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
17 | } |
18 | self.buf.push(c); |
19 | None |
20 | }, |
21 | |
22 | Token::ProcessingInstructionEnd => { |
23 | // self.buf contains PI name |
24 | let name = self.take_buf(); |
25 | |
26 | // Don't need to check for declaration because it has mandatory attributes |
27 | // but there is none |
28 | match &*name { |
29 | // Name is empty, it is an error |
30 | "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)), |
31 | |
32 | // Found <?xml-like PI not at the beginning of a document, |
33 | // it is an error - see section 2.6 of XML 1.1 spec |
34 | n if "xml" .eq_ignore_ascii_case(n) => |
35 | Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), |
36 | |
37 | // All is ok, emitting event |
38 | _ => { |
39 | debug_assert!(self.next_event.is_none(), " {:?}" , self.next_event); |
40 | // can't have a PI before `<?xml` |
41 | let event1 = self.set_encountered(Encountered::Declaration); |
42 | let event2 = Some(Ok(XmlEvent::ProcessingInstruction { |
43 | name, |
44 | data: None |
45 | })); |
46 | // emitting two events at once is cumbersome |
47 | let event1 = if event1.is_some() { |
48 | self.next_event = event2; |
49 | event1 |
50 | } else { |
51 | event2 |
52 | }; |
53 | self.into_state(State::OutsideTag, event1) |
54 | } |
55 | } |
56 | } |
57 | |
58 | Token::Character(c) if is_whitespace_char(c) => { |
59 | // self.buf contains PI name |
60 | let name = self.take_buf(); |
61 | |
62 | match &*name { |
63 | // We have not ever encountered an element and have not parsed XML declaration |
64 | "xml" if self.encountered == Encountered::None => |
65 | self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)), |
66 | |
67 | // Found <?xml-like PI after the beginning of a document, |
68 | // it is an error - see section 2.6 of XML 1.1 spec |
69 | n if "xml" .eq_ignore_ascii_case(n) => |
70 | Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), |
71 | |
72 | // All is ok, starting parsing PI data |
73 | _ => { |
74 | self.data.name = name; |
75 | // can't have a PI before `<?xml` |
76 | let next_event = self.set_encountered(Encountered::Declaration); |
77 | self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event) |
78 | } |
79 | } |
80 | } |
81 | |
82 | _ => { |
83 | let buf = self.take_buf(); |
84 | Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t))) |
85 | } |
86 | }, |
87 | |
88 | ProcessingInstructionSubstate::PIInsideData => match t { |
89 | Token::ProcessingInstructionEnd => { |
90 | let name = self.data.take_name(); |
91 | let data = self.take_buf(); |
92 | self.into_state_emit( |
93 | State::OutsideTag, |
94 | Ok(XmlEvent::ProcessingInstruction { |
95 | name, |
96 | data: Some(data), |
97 | }), |
98 | ) |
99 | }, |
100 | |
101 | Token::Character(c) if !self.is_valid_xml_char(c) => { |
102 | Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
103 | }, |
104 | |
105 | // Any other token should be treated as plain characters |
106 | _ => { |
107 | if self.buf.len() > self.config.max_data_length { |
108 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
109 | } |
110 | t.push_to_string(&mut self.buf); |
111 | None |
112 | } |
113 | }, |
114 | } |
115 | } |
116 | } |
117 | |