1 | //! Contains a parser for an XML element.
|
2 |
|
3 | use crate::errors::SyntaxError;
|
4 | use crate::parser::Parser;
|
5 |
|
6 | /// A parser that search a `>` symbol in the slice outside of quoted regions.
|
7 | ///
|
8 | /// The parser considers two quoted regions: a double-quoted (`"..."`) and
|
9 | /// a single-quoted (`'...'`) region. Matches found inside those regions are not
|
10 | /// considered as results. Each region starts and ends by its quote symbol,
|
11 | /// which cannot be escaped (but can be encoded as XML character entity or named
|
12 | /// entity. Anyway, that encoding does not contain literal quotes).
|
13 | ///
|
14 | /// To use a parser create an instance of parser and [`feed`] data into it.
|
15 | /// After successful search the parser will return [`Some`] with position of
|
16 | /// found symbol. If search is unsuccessful, a [`None`] will be returned. You
|
17 | /// typically would expect positive result of search, so that you should feed
|
18 | /// new data until you get it.
|
19 | ///
|
20 | /// NOTE: after successful match the parser does not returned to the initial
|
21 | /// state and should not be used anymore. Create a new parser if you want to perform
|
22 | /// new search.
|
23 | ///
|
24 | /// # Example
|
25 | ///
|
26 | /// ```
|
27 | /// # use pretty_assertions::assert_eq;
|
28 | /// use quick_xml::parser::{ElementParser, Parser};
|
29 | ///
|
30 | /// let mut parser = ElementParser::default();
|
31 | ///
|
32 | /// // Parse `<my-element with = 'some > inside'>and the text follow...`
|
33 | /// // splitted into three chunks
|
34 | /// assert_eq!(parser.feed(b"<my-element" ), None);
|
35 | /// // ...get new chunk of data
|
36 | /// assert_eq!(parser.feed(b" with = 'some >" ), None);
|
37 | /// // ...get another chunk of data
|
38 | /// assert_eq!(parser.feed(b" inside'>and the text follow..." ), Some(8));
|
39 | /// // ^ ^
|
40 | /// // 0 8
|
41 | /// ```
|
42 | ///
|
43 | /// [`feed`]: Self::feed()
|
44 | #[derive (Clone, Copy, Debug, Eq, PartialEq)]
|
45 | pub enum ElementParser {
|
46 | /// The initial state (inside element, but outside of attribute value).
|
47 | Outside,
|
48 | /// Inside a single-quoted region (`'...'`).
|
49 | SingleQ,
|
50 | /// Inside a double-quoted region (`"..."`).
|
51 | DoubleQ,
|
52 | }
|
53 |
|
54 | impl Parser for ElementParser {
|
55 | /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
|
56 | #[inline ]
|
57 | fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
|
58 | for i in memchr::memchr3_iter(b'>' , b' \'' , b'"' , bytes) {
|
59 | *self = match (*self, bytes[i]) {
|
60 | // only allowed to match `>` while we are in state `Outside`
|
61 | (Self::Outside, b'>' ) => return Some(i),
|
62 | (Self::Outside, b' \'' ) => Self::SingleQ,
|
63 | (Self::Outside, b' \"' ) => Self::DoubleQ,
|
64 |
|
65 | // the only end_byte that gets us out if the same character
|
66 | (Self::SingleQ, b' \'' ) | (Self::DoubleQ, b'"' ) => Self::Outside,
|
67 |
|
68 | // all other bytes: no state change
|
69 | _ => continue,
|
70 | };
|
71 | }
|
72 | None
|
73 | }
|
74 |
|
75 | #[inline ]
|
76 | fn eof_error() -> SyntaxError {
|
77 | SyntaxError::UnclosedTag
|
78 | }
|
79 | }
|
80 |
|
81 | impl Default for ElementParser {
|
82 | #[inline ]
|
83 | fn default() -> Self {
|
84 | Self::Outside
|
85 | }
|
86 | }
|
87 |
|
88 | #[test ]
|
89 | fn parse() {
|
90 | use pretty_assertions::assert_eq;
|
91 | use ElementParser::*;
|
92 |
|
93 | /// Returns `Ok(pos)` with the position in the buffer where element is ended.
|
94 | ///
|
95 | /// Returns `Err(internal_state)` if parsing does not done yet.
|
96 | fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
|
97 | match parser.feed(bytes) {
|
98 | Some(i) => Ok(i),
|
99 | None => Err(parser),
|
100 | }
|
101 | }
|
102 |
|
103 | assert_eq!(parse_element(b"" , Outside), Err(Outside));
|
104 | assert_eq!(parse_element(b"" , SingleQ), Err(SingleQ));
|
105 | assert_eq!(parse_element(b"" , DoubleQ), Err(DoubleQ));
|
106 |
|
107 | assert_eq!(parse_element(b"'" , Outside), Err(SingleQ));
|
108 | assert_eq!(parse_element(b"'" , SingleQ), Err(Outside));
|
109 | assert_eq!(parse_element(b"'" , DoubleQ), Err(DoubleQ));
|
110 |
|
111 | assert_eq!(parse_element(b" \"" , Outside), Err(DoubleQ));
|
112 | assert_eq!(parse_element(b" \"" , SingleQ), Err(SingleQ));
|
113 | assert_eq!(parse_element(b" \"" , DoubleQ), Err(Outside));
|
114 |
|
115 | assert_eq!(parse_element(b">" , Outside), Ok(0));
|
116 | assert_eq!(parse_element(b">" , SingleQ), Err(SingleQ));
|
117 | assert_eq!(parse_element(b">" , DoubleQ), Err(DoubleQ));
|
118 |
|
119 | assert_eq!(parse_element(b"''>" , Outside), Ok(2));
|
120 | assert_eq!(parse_element(b"''>" , SingleQ), Err(SingleQ));
|
121 | assert_eq!(parse_element(b"''>" , DoubleQ), Err(DoubleQ));
|
122 | }
|
123 | |