1 | //! Contains a parser for an XML processing instruction.
|
2 |
|
3 | use crate::errors::SyntaxError;
|
4 | use crate::parser::Parser;
|
5 |
|
6 | /// A parser that search a `?>` sequence in the slice.
|
7 | ///
|
8 | /// To use a parser create an instance of parser and [`feed`] data into it.
|
9 | /// After successful search the parser will return [`Some`] with position where
|
10 | /// processing instruction is ended (the position after `?>`). If search was
|
11 | /// unsuccessful, a [`None`] will be returned. You typically would expect positive
|
12 | /// result of search, so that you should feed new data until you get it.
|
13 | ///
|
14 | /// NOTE: after successful match the parser does not returned to the initial
|
15 | /// state and should not be used anymore. Create a new parser if you want to perform
|
16 | /// new search.
|
17 | ///
|
18 | /// # Example
|
19 | ///
|
20 | /// ```
|
21 | /// # use pretty_assertions::assert_eq;
|
22 | /// use quick_xml::parser::{Parser, PiParser};
|
23 | ///
|
24 | /// let mut parser = PiParser::default();
|
25 | ///
|
26 | /// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...`
|
27 | /// // splitted into three chunks
|
28 | /// assert_eq!(parser.feed(b"<?instruction" ), None);
|
29 | /// // ...get new chunk of data
|
30 | /// assert_eq!(parser.feed(b" with = 'some > and ?" ), None);
|
31 | /// // ...get another chunk of data
|
32 | /// assert_eq!(parser.feed(b"' inside?>and the text follow..." ), Some(9));
|
33 | /// // ^ ^
|
34 | /// // 0 9
|
35 | /// ```
|
36 | ///
|
37 | /// [`feed`]: Self::feed()
|
38 | #[derive (Clone, Copy, Debug, Default, Eq, PartialEq)]
|
39 | pub struct PiParser(
|
40 | /// A flag that indicates was the `bytes` in the previous attempt to find the
|
41 | /// end ended with `?`.
|
42 | pub bool,
|
43 | );
|
44 |
|
45 | impl Parser for PiParser {
|
46 | /// Determines the end position of a processing instruction in the provided slice.
|
47 | /// Processing instruction ends on the first occurrence of `?>` which cannot be
|
48 | /// escaped.
|
49 | ///
|
50 | /// Returns position after the `?>` or `None` if such sequence was not found.
|
51 | ///
|
52 | /// [Section 2.6]: Parameter entity references MUST NOT be recognized within
|
53 | /// processing instructions, so parser do not search for them.
|
54 | ///
|
55 | /// # Parameters
|
56 | /// - `bytes`: a slice to find the end of a processing instruction.
|
57 | /// Should contain text in ASCII-compatible encoding
|
58 | ///
|
59 | /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi
|
60 | #[inline ]
|
61 | fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
|
62 | for i in memchr::memchr_iter(b'>' , bytes) {
|
63 | match i {
|
64 | 0 if self.0 => return Some(0),
|
65 | // If the previous byte is `?`, then we found `?>`
|
66 | i if i > 0 && bytes[i - 1] == b'?' => return Some(i),
|
67 | _ => {}
|
68 | }
|
69 | }
|
70 | self.0 = bytes.last().copied() == Some(b'?' );
|
71 | None
|
72 | }
|
73 |
|
74 | #[inline ]
|
75 | fn eof_error() -> SyntaxError {
|
76 | SyntaxError::UnclosedPIOrXmlDecl
|
77 | }
|
78 | }
|
79 |
|
80 | #[test ]
|
81 | fn pi() {
|
82 | use pretty_assertions::assert_eq;
|
83 |
|
84 | /// Returns `Ok(pos)` with the position in the buffer where processing
|
85 | /// instruction is ended.
|
86 | ///
|
87 | /// Returns `Err(internal_state)` if parsing is not done yet.
|
88 | fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> {
|
89 | let mut parser = PiParser(had_question_mark);
|
90 | match parser.feed(bytes) {
|
91 | Some(i) => Ok(i),
|
92 | None => Err(parser.0),
|
93 | }
|
94 | }
|
95 |
|
96 | // Comments shows which character was seen the last before calling `feed`.
|
97 | // `x` means any character, pipe denotes start of the buffer that passed to `feed`
|
98 |
|
99 | assert_eq!(parse_pi(b"" , false), Err(false)); // x|
|
100 | assert_eq!(parse_pi(b"" , true), Err(false)); // ?|
|
101 |
|
102 | assert_eq!(parse_pi(b"?" , false), Err(true)); // x|?
|
103 | assert_eq!(parse_pi(b"?" , true), Err(true)); // ?|?
|
104 |
|
105 | assert_eq!(parse_pi(b">" , false), Err(false)); // x|>
|
106 | assert_eq!(parse_pi(b">" , true), Ok(0)); // ?|>
|
107 |
|
108 | assert_eq!(parse_pi(b"?>" , false), Ok(1)); // x|?>
|
109 | assert_eq!(parse_pi(b"?>" , true), Ok(1)); // ?|?>
|
110 |
|
111 | assert_eq!(parse_pi(b">?>" , false), Ok(2)); // x|>?>
|
112 | assert_eq!(parse_pi(b">?>" , true), Ok(0)); // ?|>?>
|
113 | }
|
114 | |