1//! Contains an implementation of pull-based XML parser.
2
3use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
4use crate::common::{Position, TextPosition, XmlVersion};
5use crate::name::OwnedName;
6use crate::namespace::NamespaceStack;
7use crate::reader::config::ParserConfig2;
8use crate::reader::error::SyntaxError;
9use crate::reader::events::XmlEvent;
10use crate::reader::indexset::AttributesSet;
11use crate::reader::lexer::{Lexer, Token};
12use super::{Error, ErrorKind};
13
14use std::collections::HashMap;
15use std::io::Read;
16
17macro_rules! gen_takes(
18 ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
19 $(
20 impl MarkupData {
21 #[inline]
22 #[allow(clippy::mem_replace_option_with_none)]
23 fn $method(&mut self) -> $t {
24 std::mem::replace(&mut self.$field, $def)
25 }
26 }
27 )+
28 )
29);
30
31gen_takes!(
32 name -> take_name, String, String::new();
33 ref_data -> take_ref_data, String, String::new();
34
35 encoding -> take_encoding, Option<String>, None;
36
37 element_name -> take_element_name, Option<OwnedName>, None;
38
39 attr_name -> take_attr_name, Option<OwnedName>, None;
40 attributes -> take_attributes, AttributesSet, AttributesSet::new()
41);
42
43mod inside_cdata;
44mod inside_closing_tag_name;
45mod inside_comment;
46mod inside_declaration;
47mod inside_doctype;
48mod inside_opening_tag;
49mod inside_processing_instruction;
50mod inside_reference;
51mod outside_tag;
52
53static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
54static DEFAULT_STANDALONE: Option<bool> = None;
55
56type ElementStack = Vec<OwnedName>;
57pub type Result = super::Result<XmlEvent>;
58
59/// Pull-based XML parser.
60pub(crate) struct PullParser {
61 config: ParserConfig2,
62 lexer: Lexer,
63 st: State,
64 state_after_reference: State,
65 buf: String,
66
67 /// From DTD internal subset
68 entities: HashMap<String, String>,
69
70 nst: NamespaceStack,
71
72 data: MarkupData,
73 final_result: Option<Result>,
74 next_event: Option<Result>,
75 est: ElementStack,
76 pos: Vec<TextPosition>,
77
78 encountered: Encountered,
79 inside_whitespace: bool,
80 read_prefix_separator: bool,
81 pop_namespace: bool,
82}
83
84// Keeps track when XML declaration can happen
85#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
86enum Encountered {
87 None = 0,
88 AnyChars, // whitespace before <?xml is not allowed
89 Declaration,
90 Comment,
91 Doctype,
92 Element,
93}
94
95impl PullParser {
96 /// Returns a new parser using the given config.
97 #[inline]
98 pub fn new(config: impl Into<ParserConfig2>) -> PullParser {
99 let config = config.into();
100 Self::new_with_config2(config)
101 }
102
103 #[inline]
104 fn new_with_config2(config: ParserConfig2) -> PullParser {
105 let mut lexer = Lexer::new(&config);
106 if let Some(enc) = config.override_encoding {
107 lexer.set_encoding(enc);
108 }
109
110 let mut pos = Vec::with_capacity(16);
111 pos.push(TextPosition::new());
112
113 PullParser {
114 config,
115 lexer,
116 st: State::DocumentStart,
117 state_after_reference: State::OutsideTag,
118 buf: String::new(),
119 entities: HashMap::new(),
120 nst: NamespaceStack::default(),
121
122 data: MarkupData {
123 name: String::new(),
124 version: None,
125 encoding: None,
126 standalone: None,
127 ref_data: String::new(),
128 element_name: None,
129 quote: None,
130 attr_name: None,
131 attributes: AttributesSet::new(),
132 },
133 final_result: None,
134 next_event: None,
135 est: Vec::new(),
136 pos,
137
138 encountered: Encountered::None,
139 inside_whitespace: true,
140 read_prefix_separator: false,
141 pop_namespace: false,
142 }
143 }
144
145 /// Checks if this parser ignores the end of stream errors.
146 pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }
147
148 #[inline(never)]
149 fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
150 if new_encounter <= self.encountered {
151 return None;
152 }
153 let prev_enc = self.encountered;
154 self.encountered = new_encounter;
155
156 // If declaration was not parsed and we have encountered an element,
157 // emit this declaration as the next event.
158 if prev_enc == Encountered::None {
159 self.push_pos();
160 Some(Ok(XmlEvent::StartDocument {
161 version: DEFAULT_VERSION,
162 encoding: self.lexer.encoding().to_string(),
163 standalone: DEFAULT_STANDALONE,
164 }))
165 } else {
166 None
167 }
168 }
169}
170
171impl Position for PullParser {
172 /// Returns the position of the last event produced by the parser
173 #[inline]
174 fn position(&self) -> TextPosition {
175 self.pos[0]
176 }
177}
178
179#[derive(Copy, Clone, PartialEq)]
180pub enum State {
181 OutsideTag,
182 InsideOpeningTag(OpeningTagSubstate),
183 InsideClosingTag(ClosingTagSubstate),
184 InsideProcessingInstruction(ProcessingInstructionSubstate),
185 InsideComment,
186 InsideCData,
187 InsideDeclaration(DeclarationSubstate),
188 InsideDoctype(DoctypeSubstate),
189 InsideReference,
190 DocumentStart,
191}
192
193#[derive(Copy, Clone, PartialEq)]
194pub enum DoctypeSubstate {
195 Outside,
196 String,
197 InsideName,
198 BeforeEntityName,
199 EntityName,
200 BeforeEntityValue,
201 EntityValue,
202 NumericReferenceStart,
203 NumericReference,
204 /// expansion
205 PEReferenceInValue,
206 PEReferenceInDtd,
207 /// name definition
208 PEReferenceDefinitionStart,
209 PEReferenceDefinition,
210 SkipDeclaration,
211 Comment,
212}
213
214#[derive(Copy, Clone, PartialEq)]
215pub enum OpeningTagSubstate {
216 InsideName,
217
218 InsideTag,
219
220 InsideAttributeName,
221 AfterAttributeName,
222
223 InsideAttributeValue,
224 AfterAttributeValue,
225}
226
227#[derive(Copy, Clone, PartialEq)]
228pub enum ClosingTagSubstate {
229 CTInsideName,
230 CTAfterName,
231}
232
233#[derive(Copy, Clone, PartialEq)]
234pub enum ProcessingInstructionSubstate {
235 PIInsideName,
236 PIInsideData,
237}
238
239#[derive(Copy, Clone, PartialEq)]
240pub enum DeclarationSubstate {
241 BeforeVersion,
242 InsideVersion,
243 AfterVersion,
244
245 InsideVersionValue,
246 AfterVersionValue,
247
248 BeforeEncoding,
249 InsideEncoding,
250 AfterEncoding,
251
252 InsideEncodingValue,
253 AfterEncodingValue,
254
255 BeforeStandaloneDecl,
256 InsideStandaloneDecl,
257 AfterStandaloneDecl,
258
259 InsideStandaloneDeclValue,
260 AfterStandaloneDeclValue,
261}
262
263#[derive(PartialEq)]
264enum QualifiedNameTarget {
265 AttributeNameTarget,
266 OpeningTagNameTarget,
267 ClosingTagNameTarget,
268}
269
270#[derive(Copy, Clone, PartialEq, Eq)]
271enum QuoteToken {
272 SingleQuoteToken,
273 DoubleQuoteToken,
274}
275
276impl QuoteToken {
277 fn from_token(t: &Token) -> QuoteToken {
278 match *t {
279 Token::SingleQuote => QuoteToken::SingleQuoteToken,
280 Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
281 _ => panic!("Unexpected token: {t}"),
282 }
283 }
284
285 fn as_token(self) -> Token {
286 match self {
287 QuoteToken::SingleQuoteToken => Token::SingleQuote,
288 QuoteToken::DoubleQuoteToken => Token::DoubleQuote,
289 }
290 }
291}
292
293struct MarkupData {
294 name: String, // used for processing instruction name
295 ref_data: String, // used for reference content
296
297 version: Option<XmlVersion>, // used for XML declaration version
298 encoding: Option<String>, // used for XML declaration encoding
299 standalone: Option<bool>, // used for XML declaration standalone parameter
300
301 element_name: Option<OwnedName>, // used for element name
302
303 quote: Option<QuoteToken>, // used to hold opening quote for attribute value
304 attr_name: Option<OwnedName>, // used to hold attribute name
305 attributes: AttributesSet, // used to hold all accumulated attributes
306}
307
308impl PullParser {
309 /// Returns next event read from the given buffer.
310 ///
311 /// This method should be always called with the same buffer. If you call it
312 /// providing different buffers each time, the result will be undefined.
313 pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
314 if let Some(ref ev) = self.final_result {
315 return ev.clone();
316 }
317
318 if let Some(ev) = self.next_event.take() {
319 return ev;
320 }
321
322 if self.pop_namespace {
323 self.pop_namespace = false;
324 self.nst.pop();
325 }
326
327 loop {
328 debug_assert!(self.next_event.is_none());
329 debug_assert!(!self.pop_namespace);
330
331 // While lexer gives us Ok(maybe_token) -- we loop.
332 // Upon having a complete XML-event -- we return from the whole function.
333 match self.lexer.next_token(r) {
334 Ok(Some(token)) => {
335 match self.dispatch_token(token) {
336 None => {} // continue
337 Some(Ok(xml_event)) => {
338 self.next_pos();
339 return Ok(xml_event)
340 },
341 Some(Err(xml_error)) => {
342 self.next_pos();
343 return self.set_final_result(Err(xml_error))
344 },
345 }
346 },
347 Ok(None) => break,
348 Err(lexer_error) => {
349 return self.set_final_result(Err(lexer_error))
350 },
351 }
352 }
353
354 self.handle_eof()
355 }
356
357 /// Handle end of stream
358 fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
359 // Forward pos to the lexer head
360 self.next_pos();
361 let ev = if self.depth() == 0 {
362 if self.encountered == Encountered::Element && self.st == State::OutsideTag { // all is ok
363 Ok(XmlEvent::EndDocument)
364 } else if self.encountered < Encountered::Element {
365 self.error(SyntaxError::NoRootElement)
366 } else { // self.st != State::OutsideTag
367 self.error(SyntaxError::UnexpectedEof) // TODO: add expected hint?
368 }
369 } else if self.config.c.ignore_end_of_stream {
370 self.final_result = None;
371 self.lexer.reset_eof_handled();
372 return self.error(SyntaxError::UnbalancedRootElement);
373 } else {
374 self.error(SyntaxError::UnbalancedRootElement)
375 };
376 self.set_final_result(ev)
377 }
378
379 // This function is to be called when a terminal event is reached.
380 // The function sets up the `self.final_result` into `Some(result)` and return `result`.
381 #[inline]
382 fn set_final_result(&mut self, result: Result) -> Result {
383 self.final_result = Some(result.clone());
384 result
385 }
386
387 #[cold]
388 fn error(&self, e: SyntaxError) -> Result {
389 Err(Error {
390 pos: self.lexer.position(),
391 kind: ErrorKind::Syntax(e.to_cow()),
392 })
393 }
394
395 #[inline]
396 fn next_pos(&mut self) {
397 // unfortunately calls to next_pos will never be perfectly balanced with push_pos,
398 // at very least because parse errors and EOF can happen unexpectedly without a prior push.
399 if !self.pos.is_empty() {
400 if self.pos.len() > 1 {
401 self.pos.remove(0);
402 } else {
403 self.pos[0] = self.lexer.position();
404 }
405 }
406 }
407
408 #[inline]
409 #[track_caller]
410 fn push_pos(&mut self) {
411 debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
412 This case is ignored in release mode, and merely causes document positions to be out of sync.
413 Please file a bug and include the XML document that triggers this assert.");
414
415 // it has capacity preallocated for more than it ever needs, so this reduces code size
416 if self.pos.len() != self.pos.capacity() {
417 self.pos.push(self.lexer.position());
418 } else if self.pos.len() > 1 {
419 self.pos.remove(0); // this mitigates the excessive push_pos() call
420 }
421 }
422
423 #[inline(never)]
424 fn dispatch_token(&mut self, t: Token) -> Option<Result> {
425 match self.st {
426 State::OutsideTag => self.outside_tag(t),
427 State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
428 State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
429 State::InsideReference => self.inside_reference(t),
430 State::InsideComment => self.inside_comment(t),
431 State::InsideCData => self.inside_cdata(t),
432 State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
433 State::InsideDoctype(s) => self.inside_doctype(t, s),
434 State::InsideDeclaration(s) => self.inside_declaration(t, s),
435 State::DocumentStart => self.document_start(t),
436 }
437 }
438
439 #[inline]
440 fn depth(&self) -> usize {
441 self.est.len()
442 }
443
444 #[inline]
445 fn buf_has_data(&self) -> bool {
446 !self.buf.is_empty()
447 }
448
449 #[inline]
450 fn take_buf(&mut self) -> String {
451 std::mem::take(&mut self.buf)
452 }
453
454 #[inline]
455 fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
456 self.st = st;
457 ev
458 }
459
460 #[inline]
461 fn into_state_continue(&mut self, st: State) -> Option<Result> {
462 self.into_state(st, None)
463 }
464
465 #[inline]
466 fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
467 self.into_state(st, Some(ev))
468 }
469
470 /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
471 /// an error is returned.
472 ///
473 /// # Parameters
474 /// * `t` --- next token;
475 /// * `on_name` --- a callback which is executed when whitespace is encountered.
476 fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
477 where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> {
478 // We can get here for the first time only when self.data.name contains zero or one character,
479 // but first character cannot be a colon anyway
480 if self.buf.len() <= 1 {
481 self.read_prefix_separator = false;
482 }
483
484 let invoke_callback = move |this: &mut PullParser, t| {
485 let name = this.take_buf();
486 match name.parse() {
487 Ok(name) => on_name(this, t, name),
488 Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
489 }
490 };
491
492 match t {
493 // There can be only one colon, and not as the first character
494 Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
495 self.buf.push(':');
496 self.read_prefix_separator = true;
497 None
498 }
499
500 Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
501 self.buf_has_data() && is_name_char(c)) => {
502 if self.buf.len() > self.config.max_name_length {
503 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
504 }
505 self.buf.push(c);
506 None
507 },
508
509 Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
510
511 Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
512
513 Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
514 target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
515
516 Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
517
518 _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
519 }
520 }
521
522 /// Dispatches tokens in order to process attribute value.
523 ///
524 /// # Parameters
525 /// * `t` --- next token;
526 /// * `on_value` --- a callback which is called when terminating quote is encountered.
527 fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
528 where F: Fn(&mut PullParser, String) -> Option<Result> {
529 match t {
530 Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
531
532 Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
533 None => { // Entered attribute value
534 self.data.quote = Some(QuoteToken::from_token(&t));
535 None
536 }
537 Some(q) if q.as_token() == t => {
538 self.data.quote = None;
539 let value = self.take_buf();
540 on_value(self, value)
541 }
542 _ => {
543 if let Token::Character(c) = t {
544 if !self.is_valid_xml_char_not_restricted(c) {
545 return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
546 }
547 }
548 if self.buf.len() > self.config.max_attribute_length {
549 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
550 }
551 t.push_to_string(&mut self.buf);
552 None
553 }
554 },
555
556 Token::ReferenceStart if self.data.quote.is_some() => {
557 self.state_after_reference = self.st;
558 self.into_state_continue(State::InsideReference)
559 },
560
561 Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
562
563 Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
564 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
565 },
566
567 // Every character except " and ' and < is okay
568 _ if self.data.quote.is_some() => {
569 if self.buf.len() > self.config.max_attribute_length {
570 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
571 }
572 t.push_to_string(&mut self.buf);
573 None
574 }
575
576 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
577 }
578 }
579
580 fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
581 let mut name = self.data.take_element_name()?;
582 let mut attributes = self.data.take_attributes().into_vec();
583
584 // check whether the name prefix is bound and fix its namespace
585 match self.nst.get(name.borrow().prefix_repr()) {
586 Some("") => name.namespace = None, // default namespace
587 Some(ns) => name.namespace = Some(ns.into()),
588 None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
589 }
590
591 // check and fix accumulated attributes prefixes
592 for attr in &mut attributes {
593 if let Some(ref pfx) = attr.name.prefix {
594 let new_ns = match self.nst.get(pfx) {
595 Some("") => None, // default namespace
596 Some(ns) => Some(ns.into()),
597 None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into())))
598 };
599 attr.name.namespace = new_ns;
600 }
601 }
602
603 if emit_end_element {
604 self.pop_namespace = true;
605 self.next_event = Some(Ok(XmlEvent::EndElement {
606 name: name.clone()
607 }));
608 } else {
609 self.est.push(name.clone());
610 }
611 let namespace = self.nst.squash();
612 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
613 name,
614 attributes,
615 namespace
616 }))
617 }
618
619 fn emit_end_element(&mut self) -> Option<Result> {
620 let mut name = self.data.take_element_name()?;
621
622 // check whether the name prefix is bound and fix its namespace
623 match self.nst.get(name.borrow().prefix_repr()) {
624 Some("") => name.namespace = None, // default namespace
625 Some(ns) => name.namespace = Some(ns.into()),
626 None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
627 }
628
629 let op_name = self.est.pop()?;
630
631 if name == op_name {
632 self.pop_namespace = true;
633 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
634 } else {
635 Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
636 }
637 }
638
639 #[inline]
640 fn is_valid_xml_char(&self, c: char) -> bool {
641 if Some(XmlVersion::Version11) == self.data.version {
642 is_xml11_char(c)
643 } else {
644 is_xml10_char(c)
645 }
646 }
647
648 #[inline]
649 fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
650 if Some(XmlVersion::Version11) == self.data.version {
651 is_xml11_char_not_restricted(c)
652 } else {
653 is_xml10_char(c)
654 }
655 }
656}
657
658#[cfg(test)]
659mod tests {
660 use std::io::BufReader;
661 use crate::attribute::OwnedAttribute;
662 use crate::common::TextPosition;
663 use crate::name::OwnedName;
664 use crate::reader::events::XmlEvent;
665 use crate::reader::parser::PullParser;
666 use crate::reader::ParserConfig;
667
668 fn new_parser() -> PullParser {
669 PullParser::new(ParserConfig::new())
670 }
671
672 macro_rules! expect_event(
673 ($r:expr, $p:expr, $t:pat) => (
674 match $p.next(&mut $r) {
675 $t => {}
676 e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
677 }
678 );
679 ($r:expr, $p:expr, $t:pat => $c:expr ) => (
680 match $p.next(&mut $r) {
681 $t if $c => {}
682 e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
683 }
684 )
685 );
686
687 macro_rules! test_data(
688 ($d:expr) => ({
689 static DATA: &'static str = $d;
690 let r = BufReader::new(DATA.as_bytes());
691 let p = new_parser();
692 (r, p)
693 })
694 );
695
696 #[test]
697 fn issue_3_semicolon_in_attribute_value() {
698 let (mut r, mut p) = test_data!(r#"
699 <a attr="zzz;zzz" />
700 "#);
701
702 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
703 expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
704 *name == OwnedName::local("a") &&
705 attributes.len() == 1 &&
706 attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
707 namespace.is_essentially_empty()
708 );
709 expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
710 expect_event!(r, p, Ok(XmlEvent::EndDocument));
711 }
712
713 #[test]
714 fn issue_140_entity_reference_inside_tag() {
715 let (mut r, mut p) = test_data!(r#"
716 <bla>&#9835;</bla>
717 "#);
718
719 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
720 expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
721 expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
722 expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
723 expect_event!(r, p, Ok(XmlEvent::EndDocument));
724 }
725
726 #[test]
727 fn issue_220_comment() {
728 let (mut r, mut p) = test_data!(r#"<x><!-- <!--></x>"#);
729 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
730 expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
731 expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
732 expect_event!(r, p, Ok(XmlEvent::EndDocument));
733
734 let (mut r, mut p) = test_data!(r#"<x><!-- <!---></x>"#);
735 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
736 expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
737 expect_event!(r, p, Err(_)); // ---> is forbidden in comments
738
739 let (mut r, mut p) = test_data!(r#"<x><!--<text&x;> <!--></x>"#);
740 p.config.c.ignore_comments = false;
741 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
742 expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
743 expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
744 expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
745 expect_event!(r, p, Ok(XmlEvent::EndDocument));
746 }
747
748 #[test]
749 fn malformed_declaration_attrs() {
750 let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
751 expect_event!(r, p, Err(_));
752
753 let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
754 expect_event!(r, p, Err(_));
755
756 let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
757 expect_event!(r, p, Err(_));
758
759 let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
760 expect_event!(r, p, Err(_));
761
762 let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
763 expect_event!(r, p, Err(_));
764 }
765
766 #[test]
767 fn opening_tag_in_attribute_value() {
768 use crate::reader::error::{SyntaxError, Error, ErrorKind};
769
770 let (mut r, mut p) = test_data!(r#"
771 <a attr="zzz<zzz" />
772 "#);
773
774 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
775 expect_event!(r, p, Err(ref e) =>
776 *e == Error {
777 kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
778 pos: TextPosition { row: 1, column: 24 }
779 }
780 );
781 }
782
783 #[test]
784 fn reference_err() {
785 let (mut r, mut p) = test_data!(r#"
786 <a>&&amp;</a>
787 "#);
788
789 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
790 expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
791 expect_event!(r, p, Err(_));
792 }
793
794 #[test]
795 fn state_size() {
796 assert_eq!(2, std::mem::size_of::<super::State>());
797 assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
798 }
799}
800