1use crate::reader::error::SyntaxError;
2use crate::common::is_whitespace_char;
3use crate::reader::events::XmlEvent;
4use crate::reader::lexer::Token;
5
6use super::{
7 ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate,
8 ProcessingInstructionSubstate, PullParser, Result, State,
9};
10
11impl PullParser {
12 pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
13 match t {
14 Token::Character(c) => {
15 if is_whitespace_char(c) {
16 // skip whitespace outside of the root element
17 if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
18 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
19 return None;
20 }
21 } else {
22 self.inside_whitespace = false;
23 if self.depth() == 0 {
24 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
25 }
26 }
27
28 if !self.is_valid_xml_char_not_restricted(c) {
29 return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
30 }
31
32 if self.buf.is_empty() {
33 self.push_pos();
34 } else if self.buf.len() > self.config.max_data_length {
35 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
36 }
37 self.buf.push(c);
38 None
39 },
40
41 Token::CommentEnd | Token::TagEnd | Token::EqualsSign |
42 Token::DoubleQuote | Token::SingleQuote |
43 Token::ProcessingInstructionEnd | Token::EmptyTagEnd => {
44 if self.depth() == 0 {
45 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
46 }
47 self.inside_whitespace = false;
48
49 if let Some(s) = t.as_static_str() {
50 if self.buf.is_empty() {
51 self.push_pos();
52 } else if self.buf.len() > self.config.max_data_length {
53 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
54 }
55
56 self.buf.push_str(s);
57 }
58 None
59 },
60
61 Token::ReferenceStart if self.depth() > 0 => {
62 self.state_after_reference = State::OutsideTag;
63 self.into_state_continue(State::InsideReference)
64 },
65
66 Token::ReferenceEnd if self.depth() > 0 => { // Semi-colon in a text outside an entity
67 self.inside_whitespace = false;
68 if self.buf.len() > self.config.max_data_length {
69 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
70 }
71 Token::ReferenceEnd.push_to_string(&mut self.buf);
72 None
73 },
74
75 Token::CommentStart if self.config.c.coalesce_characters && self.config.c.ignore_comments => {
76 let next_event = self.set_encountered(Encountered::Comment);
77 // We need to switch the lexer into a comment mode inside comments
78 self.into_state(State::InsideComment, next_event)
79 }
80
81 Token::CDataStart if self.depth() > 0 && self.config.c.coalesce_characters && self.config.c.cdata_to_characters => {
82 if self.buf.is_empty() {
83 self.push_pos();
84 }
85 self.into_state_continue(State::InsideCData)
86 },
87
88 _ => {
89 // Encountered some markup event, flush the buffer as characters
90 // or a whitespace
91 let mut next_event = if self.buf_has_data() {
92 let buf = self.take_buf();
93 if self.inside_whitespace && self.config.c.trim_whitespace {
94 None
95 } else if self.inside_whitespace && !self.config.c.whitespace_to_characters {
96 debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
97 Some(Ok(XmlEvent::Whitespace(buf)))
98 } else if self.config.c.trim_whitespace {
99 Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
100 } else {
101 Some(Ok(XmlEvent::Characters(buf)))
102 }
103 } else { None };
104 self.inside_whitespace = true; // Reset inside_whitespace flag
105
106 // pos is popped whenever an event is emitted, so pushes must happen only if there will be an event to balance it
107 // and ignored comments don't pop
108 if t != Token::CommentStart || !self.config.c.ignore_comments {
109 self.push_pos();
110 }
111 match t {
112 Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => {
113 if let Some(e) = self.set_encountered(Encountered::Element) {
114 next_event = Some(e);
115 }
116 self.nst.push_empty();
117 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
118 },
119
120 Token::ClosingTagStart if self.depth() > 0 =>
121 self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
122
123 Token::CommentStart => {
124 if let Some(e) = self.set_encountered(Encountered::Comment) {
125 next_event = Some(e);
126 }
127 // We need to switch the lexer into a comment mode inside comments
128 self.into_state(State::InsideComment, next_event)
129 },
130
131 Token::DoctypeStart if self.encountered < Encountered::Doctype => {
132 if let Some(e) = self.set_encountered(Encountered::Doctype) {
133 next_event = Some(e);
134 }
135
136 // We don't have a doctype event so skip this position
137 // FIXME: update when we have a doctype event
138 self.next_pos();
139 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
140 },
141
142 Token::ProcessingInstructionStart =>
143 self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
144
145 Token::CDataStart if self.depth() > 0 => {
146 self.into_state(State::InsideCData, next_event)
147 },
148
149 _ => Some(self.error(SyntaxError::UnexpectedToken(t)))
150 }
151 }
152 }
153 }
154
155 pub fn document_start(&mut self, t: Token) -> Option<Result> {
156 debug_assert!(self.encountered < Encountered::Declaration);
157
158 match t {
159 Token::Character(c) => {
160 let next_event = self.set_encountered(Encountered::AnyChars);
161
162 if !is_whitespace_char(c) {
163 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
164 }
165 self.inside_whitespace = true;
166
167 // skip whitespace outside of the root element
168 if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
169 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
170 return self.into_state(State::OutsideTag, next_event);
171 }
172
173 self.push_pos();
174 self.buf.push(c);
175 self.into_state(State::OutsideTag, next_event)
176 },
177
178 Token::CommentStart => {
179 let next_event = self.set_encountered(Encountered::Comment);
180 self.into_state(State::InsideComment, next_event)
181 }
182
183 Token::OpeningTagStart => {
184 let next_event = self.set_encountered(Encountered::Element);
185 self.nst.push_empty();
186 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
187 },
188
189 Token::DoctypeStart => {
190 let next_event = self.set_encountered(Encountered::Doctype);
191 // We don't have a doctype event so skip this position
192 // FIXME: update when we have a doctype event
193 self.next_pos();
194 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
195 },
196
197 Token::ProcessingInstructionStart => {
198 self.push_pos();
199 self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName))
200 },
201
202 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
203 }
204 }
205}
206