mod.rs source code [crates/quick-xml-0.31.0/src/reader/mod.rs]

1	//! Contains high-level interface for a pull-based XML parser.
2
3	#[cfg(feature = "encoding")]
4	use encoding_rs::Encoding;
5	use std::ops::Range;
6
7	use crate::encoding::Decoder;
8	use crate::errors::{Error, Result};
9	use crate::events::Event;
10	use crate::reader::state::ReaderState;
11
12	use memchr;
13
14	macro_rules! configure_methods {
15	($($holder:ident)?) => {
16	/// Changes whether empty elements should be split into an `Open` and a `Close` event.
17	///
18	/// When set to `true`, all [`Empty`] events produced by a self-closing tag like `<tag/>` are
19	/// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the
20	/// default), those tags are represented by an [`Empty`] event instead.
21	///
22	/// Note, that setting this to `true` will lead to additional allocates that
23	/// needed to store tag name for an [`End`] event. However if [`check_end_names`]
24	/// is also set, only one additional allocation will be performed that support
25	/// both these options.
26	///
27	/// (`false` by default)
28	///
29	/// [`Empty`]: Event::Empty
30	/// [`Start`]: Event::Start
31	/// [`End`]: Event::End
32	/// [`check_end_names`]: Self::check_end_names
33	pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self {
34	self $(.$holder)? .state.expand_empty_elements = val;
35	self
36	}
37
38	/// Changes whether whitespace before and after character data should be removed.
39	///
40	/// When set to `true`, all [`Text`] events are trimmed.
41	/// If after that the event is empty it will not be pushed.
42	///
43	/// Changing this option automatically changes the [`trim_text_end`] option.
44	///
45	/// (`false` by default).
46	///
47	/// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
48	///
49	/// WARNING: With this option every text events will be trimmed which is
50	/// incorrect behavior when text events delimited by comments, processing
51	/// instructions or CDATA sections. To correctly trim data manually apply
52	/// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
53	/// only to necessary events.
54	/// </div>
55	///
56	/// [`Text`]: Event::Text
57	/// [`trim_text_end`]: Self::trim_text_end
58	/// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
59	/// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
60	pub fn trim_text(&mut self, val: bool) -> &mut Self {
61	self $(.$holder)? .state.trim_text_start = val;
62	self $(.$holder)? .state.trim_text_end = val;
63	self
64	}
65
66	/// Changes whether whitespace after character data should be removed.
67	///
68	/// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
69	/// If after that the event is empty it will not be pushed.
70	///
71	/// (`false` by default).
72	///
73	/// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
74	///
75	/// WARNING: With this option every text events will be trimmed which is
76	/// incorrect behavior when text events delimited by comments, processing
77	/// instructions or CDATA sections. To correctly trim data manually apply
78	/// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
79	/// only to necessary events.
80	/// </div>
81	///
82	/// [`Text`]: Event::Text
83	/// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
84	/// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
85	pub fn trim_text_end(&mut self, val: bool) -> &mut Self {
86	self $(.$holder)? .state.trim_text_end = val;
87	self
88	}
89
90	/// Changes whether trailing whitespaces after the markup name are trimmed in closing tags
91	/// `</a >`.
92	///
93	/// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name.
94	///
95	/// Note that if set to `false` and `check_end_names` is true the comparison of markup names is
96	/// going to fail erroneously if a closing tag contains trailing whitespaces.
97	///
98	/// (`true` by default)
99	///
100	/// [`End`]: Event::End
101	pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self {
102	self $(.$holder)? .state.trim_markup_names_in_closing_tags = val;
103	self
104	}
105
106	/// Changes whether mismatched closing tag names should be detected.
107	///
108	/// Note, that start and end tags [should match literally][spec], they cannot
109	/// have different prefixes even if both prefixes resolve to the same namespace.
110	/// The XML
111	///
112	/// ```xml
113	/// <outer xmlns="namespace" xmlns:p="namespace">
114	/// </p:outer>
115	/// ```
116	///
117	/// is not valid, even though semantically the start tag is the same as the
118	/// end tag. The reason is that namespaces are an extension of the original
119	/// XML specification (without namespaces) and it should be backward-compatible.
120	///
121	/// When set to `false`, it won't check if a closing tag matches the corresponding opening tag.
122	/// For example, `<mytag></different_tag>` will be permitted.
123	///
124	/// If the XML is known to be sane (already processed, etc.) this saves extra time.
125	///
126	/// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will
127	/// contain the data of the mismatched end tag.
128	///
129	/// Note, that setting this to `true` will lead to additional allocates that
130	/// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
131	/// is also set, only one additional allocation will be performed that support
132	/// both these options.
133	///
134	/// (`true` by default)
135	///
136	/// [spec]: https://www.w3.org/TR/xml11/#dt-etag
137	/// [`End`]: Event::End
138	/// [`expand_empty_elements`]: Self::expand_empty_elements
139	pub fn check_end_names(&mut self, val: bool) -> &mut Self {
140	self $(.$holder)? .state.check_end_names = val;
141	self
142	}
143
144	/// Changes whether comments should be validated.
145	///
146	/// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which
147	/// is not allowed in XML comments. Most of the time we don't want comments at all so we don't
148	/// really care about comment correctness, thus the default value is `false` to improve
149	/// performance.
150	///
151	/// (`false` by default)
152	///
153	/// [`Comment`]: Event::Comment
154	pub fn check_comments(&mut self, val: bool) -> &mut Self {
155	self $(.$holder)? .state.check_comments = val;
156	self
157	}
158	};
159	}
160
161	macro_rules! read_event_impl {
162	(
163	$self:ident, $buf:ident,
164	$reader:expr,
165	$read_until_open:ident,
166	$read_until_close:ident
167	$(, $await:ident)?
168	) => {{
169	let event = loop {
170	match $self.state.state {
171	ParseState::Init => { // Go to OpenedTag state
172	// If encoding set explicitly, we not need to detect it. For example,
173	// explicit UTF-8 set automatically if Reader was created using `from_str`.
174	// But we still need to remove BOM for consistency with no encoding
175	// feature enabled path
176	#[cfg(feature = "encoding")]
177	if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
178	if $self.state.encoding.can_be_refined() {
179	$self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
180	}
181	}
182
183	// Removes UTF-8 BOM if it is present
184	#[cfg(not(feature = "encoding"))]
185	$reader.remove_utf8_bom() $(.$await)? ?;
186
187	// Go to OpenedTag state
188	match $self.$read_until_open($buf) $(.$await)? {
189	Ok(Ok(ev)) => break Ok(ev),
190	Ok(Err(b)) => $buf = b,
191	Err(err) => break Err(err),
192	}
193	},
194	ParseState::ClosedTag => { // Go to OpenedTag state
195	match $self.$read_until_open($buf) $(.$await)? {
196	Ok(Ok(ev)) => break Ok(ev),
197	Ok(Err(b)) => $buf = b,
198	Err(err) => break Err(err),
199	}
200	},
201	// Go to ClosedTag state in next two arms
202	ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?,
203	ParseState::Empty => break $self.state.close_expanded_empty(),
204	ParseState::Exit => break Ok(Event::Eof),
205	};
206	};
207	match event {
208	Err(_) \| Ok(Event::Eof) => $self.state.state = ParseState::Exit,
209	_ => {}
210	}
211	event
212	}};
213	}
214
215	/// Read bytes up to `<` and skip it. If current byte (after skipping all space
216	/// characters if [`ReaderState::trim_text_start`] is `true`) is already `<`, then
217	/// returns the next event, otherwise stay at position just after the `<` symbol.
218	///
219	/// Moves parser to the `OpenedTag` state.
220	///
221	/// This code is executed in two cases:
222	/// - after start of parsing just after skipping BOM if it is present
223	/// - after parsing `</tag>` or `<tag>`
224	macro_rules! read_until_open {
225	(
226	$self:ident, $buf:ident,
227	$reader:expr,
228	$read_event:ident
229	$(, $await:ident)?
230	) => {{
231	$self.state.state = ParseState::OpenedTag;
232
233	if $self.state.trim_text_start {
234	$reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
235	}
236
237	// If we already at the `<` symbol, do not try to return an empty Text event
238	if $reader.skip_one(b'<', &mut $self.state.offset) $(.$await)? ? {
239	// Pass $buf to the next next iteration of parsing loop
240	return Ok(Err($buf));
241	}
242
243	match $reader
244	.read_bytes_until(b'<', $buf, &mut $self.state.offset)
245	$(.$await)?
246	{
247	// Return Text event with `bytes` content
248	Ok(Some(bytes)) => $self.state.emit_text(bytes).map(Ok),
249	Ok(None) => Ok(Ok(Event::Eof)),
250	Err(e) => Err(e),
251	}
252	}};
253	}
254
255	/// Read bytes up to the `>` and skip it. This method is expected to be called
256	/// after seeing the `<` symbol and skipping it. Inspects the next (current)
257	/// symbol and returns an appropriate [`Event`]:
258	///
259	/// \|Symbol \|Event
260	/// \|-------\|-------------------------------------
261	/// \|`!` \|[`Comment`], [`CData`] or [`DocType`]
262	/// \|`/` \|[`End`]
263	/// \|`?` \|[`PI`]
264	/// \|_other_\|[`Start`] or [`Empty`]
265	///
266	/// Moves parser to the `ClosedTag` state.
267	///
268	/// [`Comment`]: Event::Comment
269	/// [`CData`]: Event::CData
270	/// [`DocType`]: Event::DocType
271	/// [`End`]: Event::End
272	/// [`PI`]: Event::PI
273	/// [`Start`]: Event::Start
274	/// [`Empty`]: Event::Empty
275	macro_rules! read_until_close {
276	(
277	$self:ident, $buf:ident,
278	$reader:expr
279	$(, $await:ident)?
280	) => {{
281	$self.state.state = ParseState::ClosedTag;
282
283	match $reader.peek_one() $(.$await)? {
284	// `<!` - comment, CDATA or DOCTYPE declaration
285	Ok(Some(b'!')) => match $reader
286	.read_bang_element($buf, &mut $self.state.offset)
287	$(.$await)?
288	{
289	Ok(None) => Ok(Event::Eof),
290	Ok(Some((bang_type, bytes))) => $self.state.emit_bang(bang_type, bytes),
291	Err(e) => Err(e),
292	},
293	// `</` - closing tag
294	Ok(Some(b'/')) => match $reader
295	.read_bytes_until(b'>', $buf, &mut $self.state.offset)
296	$(.$await)?
297	{
298	Ok(None) => Ok(Event::Eof),
299	Ok(Some(bytes)) => $self.state.emit_end(bytes),
300	Err(e) => Err(e),
301	},
302	// `<?` - processing instruction
303	Ok(Some(b'?')) => match $reader
304	.read_bytes_until(b'>', $buf, &mut $self.state.offset)
305	$(.$await)?
306	{
307	Ok(None) => Ok(Event::Eof),
308	Ok(Some(bytes)) => $self.state.emit_question_mark(bytes),
309	Err(e) => Err(e),
310	},
311	// `<...` - opening or self-closed tag
312	Ok(Some(_)) => match $reader
313	.read_element($buf, &mut $self.state.offset)
314	$(.$await)?
315	{
316	Ok(None) => Ok(Event::Eof),
317	Ok(Some(bytes)) => $self.state.emit_start(bytes),
318	Err(e) => Err(e),
319	},
320	Ok(None) => Ok(Event::Eof),
321	Err(e) => Err(e),
322	}
323	}};
324	}
325
326	/// Generalization of `read_to_end` method for buffered and borrowed readers
327	macro_rules! read_to_end {
328	(
329	$self:expr, $end:expr, $buf:expr,
330	$read_event:ident,
331	// Code block that performs clearing of internal buffer after read of each event
332	$clear:block
333	$(, $await:ident)?
334	) => {{
335	let start = $self.buffer_position();
336	let mut depth = `0`;
337	loop {
338	$clear
339	let end = $self.buffer_position();
340	match $self.$read_event($buf) $(.$await)? {
341	Err(e) => return Err(e),
342
343	Ok(Event::Start(e)) if e.name() == $end => depth += `1`,
344	Ok(Event::End(e)) if e.name() == $end => {
345	if depth == `0` {
346	break start..end;
347	}
348	depth -= `1`;
349	}
350	Ok(Event::Eof) => {
351	let name = $self.decoder().decode($end.as_ref());
352	return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
353	}
354	_ => (),
355	}
356	}
357	}};
358	}
359
360	#[cfg(feature = "async-tokio")]
361	mod async_tokio;
362	mod buffered_reader;
363	mod ns_reader;
364	mod slice_reader;
365	mod state;
366
367	pub use ns_reader::NsReader;
368
369	/// Range of input in bytes, that corresponds to some piece of XML
370	pub type Span = Range<usize>;
371
372	////////////////////////////////////////////////////////////////////////////////////////////////////
373
374	/// Possible reader states. The state transition diagram (`true` and `false` shows
375	/// value of [`Reader::expand_empty_elements()`] option):
376	///
377	/// ```mermaid
378	/// flowchart LR
379	/// subgraph _
380	/// direction LR
381	///
382	/// Init -- "(no event)"\n --> OpenedTag
383	/// OpenedTag -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> ClosedTag
384	/// ClosedTag -- "#lt;false#gt;\n(no event)"\nText --> OpenedTag
385	/// end
386	/// ClosedTag -- "#lt;true#gt;"\nStart --> Empty
387	/// Empty -- End --> ClosedTag
388	/// _ -. Eof .-> Exit
389	/// ```
390	#[derive(Clone)]
391	enum ParseState {
392	/// Initial state in which reader stay after creation. Transition from that
393	/// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
394	/// state is always `OpenedTag`. The reader will never return to this state. The
395	/// event emitted during transition to `OpenedTag` is a `StartEvent` if the
396	/// first symbol not `<`, otherwise no event are emitted.
397	Init,
398	/// State after seeing the `<` symbol. Depending on the next symbol all other
399	/// events could be generated.
400	///
401	/// After generating one event the reader moves to the `ClosedTag` state.
402	OpenedTag,
403	/// State in which reader searches the `<` symbol of a markup. All bytes before
404	/// that symbol will be returned in the [`Event::Text`] event. After that
405	/// the reader moves to the `OpenedTag` state.
406	ClosedTag,
407	/// This state is used only if option [`expand_empty_elements`] is set to `true`.
408	/// Reader enters to this state when it is in a `ClosedTag` state and emits an
409	/// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
410	/// after which reader returned to the `ClosedTag` state.
411	///
412	/// [`expand_empty_elements`]: ReaderState::expand_empty_elements
413	Empty,
414	/// Reader enters this state when `Eof` event generated or an error occurred.
415	/// This is the last state, the reader stay in it forever.
416	Exit,
417	}
418
419	/// A reference to an encoding together with information about how it was retrieved.
420	///
421	/// The state transition diagram:
422	///
423	/// ```mermaid
424	/// flowchart LR
425	/// Implicit -- from_str --> Explicit
426	/// Implicit -- BOM --> BomDetected
427	/// Implicit -- "encoding=..." --> XmlDetected
428	/// BomDetected -- "encoding=..." --> XmlDetected
429	/// ```
430	#[cfg(feature = "encoding")]
431	#[derive(Clone, Copy)]
432	enum EncodingRef {
433	/// Encoding was implicitly assumed to have a specified value. It can be refined
434	/// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
435	Implicit(&'static Encoding),
436	/// Encoding was explicitly set to the desired value. It cannot be changed
437	/// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
438	Explicit(&'static Encoding),
439	/// Encoding was detected from a byte order mark (BOM) or by the first bytes
440	/// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
441	BomDetected(&'static Encoding),
442	/// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
443	/// It can no longer change
444	XmlDetected(&'static Encoding),
445	}
446	#[cfg(feature = "encoding")]
447	impl EncodingRef {
448	#[inline]
449	fn encoding(&self) -> &'static Encoding {
450	match self {
451	Self::Implicit(e) => e,
452	Self::Explicit(e) => e,
453	Self::BomDetected(e) => e,
454	Self::XmlDetected(e) => e,
455	}
456	}
457	#[inline]
458	fn can_be_refined(&self) -> bool {
459	match self {
460	Self::Implicit(_) \| Self::BomDetected(_) => `true`,
461	Self::Explicit(_) \| Self::XmlDetected(_) => `false`,
462	}
463	}
464	}
465
466	////////////////////////////////////////////////////////////////////////////////////////////////////
467
468	/// A low level encoding-agnostic XML event reader.
469	///
470	/// Consumes bytes and streams XML [`Event`]s.
471	///
472	/// This reader does not manage namespace declarations and not able to resolve
473	/// prefixes. If you want these features, use the [`NsReader`].
474	///
475	/// # Examples
476	///
477	/// ```
478	/// use quick_xml::events::Event;
479	/// use quick_xml::reader::Reader;
480	///
481	/// let xml = r#"<tag1 att1 = "test">
482	/// <tag2><!--Test comment-->Test</tag2>
483	/// <tag2>Test 2</tag2>
484	/// </tag1>"#;
485	/// let mut reader = Reader::from_str(xml);
486	/// reader.trim_text(`true`);
487	///
488	/// let mut count = `0`;
489	/// let mut txt = Vec::new();
490	/// let mut buf = Vec::new();
491	///
492	/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
493	/// loop {
494	/// // NOTE: this is the generic case when we don't know about the input BufRead.
495	/// // when the input is a &str or a &[u8], we don't actually need to use another
496	/// // buffer, we could directly call `reader.read_event()`
497	/// match reader.read_event_into(&mut buf) {
498	/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
499	/// // exits the loop when reaching end of file
500	/// Ok(Event::Eof) => break,
501	///
502	/// Ok(Event::Start(e)) => {
503	/// match e.name().as_ref() {
504	/// b"tag1" => println!("attributes values: {:?}",
505	/// e.attributes().map(\|a\| a.unwrap().value)
506	/// .collect::<Vec<_>>()),
507	/// b"tag2" => count += `1`,
508	/// _ => (),
509	/// }
510	/// }
511	/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
512	///
513	/// // There are several other `Event`s we do not consider here
514	/// _ => (),
515	/// }
516	/// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
517	/// buf.clear();
518	/// }
519	/// ```
520	///
521	/// [`NsReader`]: crate::reader::NsReader
522	#[derive(Clone)]
523	pub struct Reader<R> {
524	/// Source of data for parse
525	reader: R,
526	/// Configuration and current parse state
527	state: ReaderState,
528	}
529
530	/// Builder methods
531	impl<R> Reader<R> {
532	/// Creates a `Reader` that reads from a given reader.
533	pub fn from_reader(reader: R) -> Self {
534	Self {
535	reader,
536	state: ReaderState::default(),
537	}
538	}
539
540	configure_methods!();
541	}
542
543	/// Getters
544	impl<R> Reader<R> {
545	/// Consumes `Reader` returning the underlying reader
546	///
547	/// Can be used to compute line and column of a parsing error position
548	///
549	/// # Examples
550	///
551	/// ```
552	/// # use pretty_assertions::assert_eq;
553	/// use std::{str, io::Cursor};
554	/// use quick_xml::events::Event;
555	/// use quick_xml::reader::Reader;
556	///
557	/// let xml = r#"<tag1 att1 = "test">
558	/// <tag2><!--Test comment-->Test</tag2>
559	/// <tag3>Test 2</tag3>
560	/// </tag1>"#;
561	/// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
562	/// let mut buf = Vec::new();
563	///
564	/// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
565	/// let end_pos = reader.buffer_position();
566	/// let mut cursor = reader.into_inner();
567	/// let s = String::from_utf8(cursor.into_inner()[`0`..end_pos].to_owned())
568	/// .expect("can't make a string");
569	/// let mut line = `1`;
570	/// let mut column = `0`;
571	/// for c in s.chars() {
572	/// if c == '`\n`' {
573	/// line += `1`;
574	/// column = `0`;
575	/// } else {
576	/// column += `1`;
577	/// }
578	/// }
579	/// (line, column)
580	/// }
581	///
582	/// loop {
583	/// match reader.read_event_into(&mut buf) {
584	/// Ok(Event::Start(ref e)) => match e.name().as_ref() {
585	/// b"tag1" \| b"tag2" => (),
586	/// tag => {
587	/// assert_eq!(b"tag3", tag);
588	/// assert_eq!((`3`, `22`), into_line_and_column(reader));
589	/// break;
590	/// }
591	/// },
592	/// Ok(Event::Eof) => unreachable!(),
593	/// _ => (),
594	/// }
595	/// buf.clear();
596	/// }
597	/// ```
598	pub fn into_inner(self) -> R {
599	self.reader
600	}
601
602	/// Gets a reference to the underlying reader.
603	pub fn get_ref(&self) -> &R {
604	&self.reader
605	}
606
607	/// Gets a mutable reference to the underlying reader.
608	pub fn get_mut(&mut self) -> &mut R {
609	&mut self.reader
610	}
611
612	/// Gets the current byte position in the input data.
613	///
614	/// Useful when debugging errors.
615	pub fn buffer_position(&self) -> usize {
616	// when internal state is OpenedTag, we have actually read until '<',
617	// which we don't want to show
618	if let ParseState::OpenedTag = self.state.state {
619	self.state.offset - `1`
620	} else {
621	self.state.offset
622	}
623	}
624
625	/// Get the decoder, used to decode bytes, read by this reader, to the strings.
626	///
627	/// If [`encoding`] feature is enabled, the used encoding may change after
628	/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
629	///
630	/// If [`encoding`] feature is enabled and no encoding is specified in declaration,
631	/// defaults to UTF-8.
632	///
633	/// [`encoding`]: ../index.html#encoding
634	#[inline]
635	pub fn decoder(&self) -> Decoder {
636	self.state.decoder()
637	}
638	}
639
640	/// Private sync reading methods
641	impl<R> Reader<R> {
642	/// Read text into the given buffer, and return an event that borrows from
643	/// either that buffer or from the input itself, based on the type of the
644	/// reader.
645	fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>>
646	where
647	R: XmlSource<'i, B>,
648	{
649	read_event_impl!(self, buf, self.reader, read_until_open, read_until_close)
650	}
651
652	/// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event.
653	///
654	/// Returns inner `Ok` if the loop should be broken and an event returned.
655	/// Returns inner `Err` with the same `buf` because Rust borrowck stumbles upon this case in particular.
656	fn read_until_open<'i, B>(&mut self, buf: B) -> Result<std::result::Result<Event<'i>, B>>
657	where
658	R: XmlSource<'i, B>,
659	{
660	read_until_open!(self, buf, self.reader, read_event_impl)
661	}
662
663	/// Private function to read until `>` is found. This function expects that
664	/// it was called just after encounter a `<` symbol.
665	fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
666	where
667	R: XmlSource<'i, B>,
668	{
669	read_until_close!(self, buf, self.reader)
670	}
671	}
672
673	////////////////////////////////////////////////////////////////////////////////////////////////////
674
675	/// Represents an input for a reader that can return borrowed data.
676	///
677	/// There are two implementors of this trait: generic one that read data from
678	/// `Self`, copies some part of it into a provided buffer of type `B` and then
679	/// returns data that borrow from that buffer.
680	///
681	/// The other implementor is for `&[u8]` and instead of copying data returns
682	/// borrowed data from `Self` instead. This implementation allows zero-copy
683	/// deserialization.
684	///
685	/// # Parameters
686	/// - `'r`: lifetime of a buffer from which events will borrow
687	/// - `B`: a type of a buffer that can be used to store data read from `Self` and
688	/// from which events can borrow
689	trait XmlSource<'r, B> {
690	/// Removes UTF-8 BOM if it is present
691	#[cfg(not(feature = "encoding"))]
692	fn remove_utf8_bom(&mut self) -> Result<()>;
693
694	/// Determines encoding from the start of input and removes BOM if it is present
695	#[cfg(feature = "encoding")]
696	fn detect_encoding(&mut self) -> Result<Option<&'static Encoding>>;
697
698	/// Read input until `byte` is found or end of input is reached.
699	///
700	/// Returns a slice of data read up to `byte`, which does not include into result.
701	/// If input (`Self`) is exhausted, returns `None`.
702	///
703	/// # Example
704	///
705	/// ```ignore
706	/// let mut position = `0`;
707	/// let mut input = b"abc*def".as_ref();
708	/// // ^= 4
709	///
710	/// assert_eq!(
711	/// input.read_bytes_until(b'', (), &mut* position).unwrap(),
712	/// Some(b"abc".as_ref())
713	/// );
714	/// assert_eq!(position, `4`); // position after the symbol matched
715	/// ```
716	///
717	/// # Parameters
718	/// - `byte`: Byte for search
719	/// - `buf`: Buffer that could be filled from an input (`Self`) and
720	/// from which [events] could borrow their data
721	/// - `position`: Will be increased by amount of bytes consumed
722	///
723	/// [events]: crate::events::Event
724	fn read_bytes_until(
725	&mut self,
726	byte: u8,
727	buf: B,
728	position: &mut usize,
729	) -> Result<Option<&'r [u8]>>;
730
731	/// Read input until comment, CDATA or processing instruction is finished.
732	///
733	/// This method expect that `<` already was read.
734	///
735	/// Returns a slice of data read up to end of comment, CDATA or processing
736	/// instruction (`>`), which does not include into result.
737	///
738	/// If input (`Self`) is exhausted and nothing was read, returns `None`.
739	///
740	/// # Parameters
741	/// - `buf`: Buffer that could be filled from an input (`Self`) and
742	/// from which [events] could borrow their data
743	/// - `position`: Will be increased by amount of bytes consumed
744	///
745	/// [events]: crate::events::Event
746	fn read_bang_element(
747	&mut self,
748	buf: B,
749	position: &mut usize,
750	) -> Result<Option<(BangType, &'r [u8])>>;
751
752	/// Read input until XML element is closed by approaching a `>` symbol.
753	/// Returns `Some(buffer)` that contains a data between `<` and `>` or
754	/// `None` if end-of-input was reached and nothing was read.
755	///
756	/// Derived from `read_until`, but modified to handle XML attributes
757	/// using a minimal state machine.
758	///
759	/// Attribute values are [defined] as follows:
760	/// ```plain
761	/// AttValue := '"' (([^<&"]) \| Reference) '"'*
762	/// \| "'" (([^<&']) \| Reference) "'"*
763	/// ```
764	/// (`Reference` is something like `"`, but we don't care about
765	/// escaped characters at this level)
766	///
767	/// # Parameters
768	/// - `buf`: Buffer that could be filled from an input (`Self`) and
769	/// from which [events] could borrow their data
770	/// - `position`: Will be increased by amount of bytes consumed
771	///
772	/// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
773	/// [events]: crate::events::Event
774	fn read_element(&mut self, buf: B, position: &mut usize) -> Result<Option<&'r [u8]>>;
775
776	/// Consume and discard all the whitespace until the next non-whitespace
777	/// character or EOF.
778	///
779	/// # Parameters
780	/// - `position`: Will be increased by amount of bytes consumed
781	fn skip_whitespace(&mut self, position: &mut usize) -> Result<()>;
782
783	/// Consume and discard one character if it matches the given byte. Return
784	/// `true` if it matched.
785	///
786	/// # Parameters
787	/// - `position`: Will be increased by 1 if byte is matched
788	fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result<bool>;
789
790	/// Return one character without consuming it, so that future `read_` calls*
791	/// will still include it. On EOF, return `None`.
792	fn peek_one(&mut self) -> Result<Option<u8>>;
793	}
794
795	/// Possible elements started with `<!`
796	#[derive(Debug, PartialEq)]
797	enum BangType {
798	/// <![CDATA[...]]>
799	CData,
800	/// <!--...-->
801	Comment,
802	/// <!DOCTYPE...>
803	DocType,
804	}
805	impl BangType {
806	#[inline(always)]
807	fn new(byte: Option<u8>) -> Result<Self> {
808	Ok(match byte {
809	Some(b'[') => Self::CData,
810	Some(b'-') => Self::Comment,
811	Some(b'D') \| Some(b'd') => Self::DocType,
812	Some(b) => return Err(Error::UnexpectedBang(b)),
813	None => return Err(Error::UnexpectedEof("Bang".to_string())),
814	})
815	}
816
817	/// If element is finished, returns its content up to `>` symbol and
818	/// an index of this symbol, otherwise returns `None`
819	///
820	/// # Parameters
821	/// - `buf`: buffer with data consumed on previous iterations
822	/// - `chunk`: data read on current iteration and not yet consumed from reader
823	#[inline(always)]
824	fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
825	for i in memchr::memchr_iter(b'>', chunk) {
826	match self {
827	// Need to read at least 6 symbols (`!---->`) for properly finished comment
828	// <!----> - XML comment
829	// 012345 - i
830	Self::Comment if buf.len() + i > `4` => {
831	if chunk[..i].ends_with(b"--") {
832	// We cannot strip last `--` from the buffer because we need it in case of
833	// check_comments enabled option. XML standard requires that comment
834	// will not end with `--->` sequence because this is a special case of
835	// `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
836	return Some((&chunk[..i], i + `1`)); // +1 for `>`
837	}
838	// End sequence `-\|->` was splitted at \|
839	// buf --/ \-- chunk
840	if i == `1` && buf.ends_with(b"-") && chunk[`0`] == b'-' {
841	return Some((&chunk[..i], i + `1`)); // +1 for `>`
842	}
843	// End sequence `--\|>` was splitted at \|
844	// buf --/ \-- chunk
845	if i == `0` && buf.ends_with(b"--") {
846	return Some((&[], i + `1`)); // +1 for `>`
847	}
848	}
849	Self::Comment => {}
850	Self::CData => {
851	if chunk[..i].ends_with(b"]]") {
852	return Some((&chunk[..i], i + `1`)); // +1 for `>`
853	}
854	// End sequence `]\|]>` was splitted at \|
855	// buf --/ \-- chunk
856	if i == `1` && buf.ends_with(b"]") && chunk[`0`] == b']' {
857	return Some((&chunk[..i], i + `1`)); // +1 for `>`
858	}
859	// End sequence `]]\|>` was splitted at \|
860	// buf --/ \-- chunk
861	if i == `0` && buf.ends_with(b"]]") {
862	return Some((&[], i + `1`)); // +1 for `>`
863	}
864	}
865	Self::DocType => {
866	let content = &chunk[..i];
867	let balance = memchr::memchr2_iter(b'<', b'>', content)
868	.map(\|p\| if content[p] == b'<' { `1i32` } else { `-1` })
869	.sum::<i32>();
870	if balance == `0` {
871	return Some((content, i + `1`)); // +1 for `>`
872	}
873	}
874	}
875	}
876	None
877	}
878	#[inline]
879	fn to_err(&self) -> Error {
880	let bang_str = match self {
881	Self::CData => "CData",
882	Self::Comment => "Comment",
883	Self::DocType => "DOCTYPE",
884	};
885	Error::UnexpectedEof(bang_str.to_string())
886	}
887	}
888
889	/// State machine for the [`XmlSource::read_element`]
890	#[derive(Clone, Copy)]
891	enum ReadElementState {
892	/// The initial state (inside element, but outside of attribute value)
893	Elem,
894	/// Inside a single-quoted attribute value
895	SingleQ,
896	/// Inside a double-quoted attribute value
897	DoubleQ,
898	}
899	impl ReadElementState {
900	/// Changes state by analyzing part of input.
901	/// Returns a tuple with part of chunk up to element closing symbol `>`
902	/// and a position after that symbol or `None` if such symbol was not found
903	#[inline(always)]
904	fn change<'b>(&mut self, chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
905	for i: usize in memchr::memchr3_iter(needle1:b'>', needle2:b'`\'`', needle3:b'"', haystack:chunk) {
906	self = match* (*self, chunk[i]) {
907	// only allowed to match `>` while we are in state `Elem`
908	(Self::Elem, b'>') => return Some((&chunk[..i], i + `1`)),
909	(Self::Elem, b'`\'`') => Self::SingleQ,
910	(Self::Elem, b'`\"`') => Self::DoubleQ,
911
912	// the only end_byte that gets us out if the same character
913	(Self::SingleQ, b'`\'`') \| (Self::DoubleQ, b'"') => Self::Elem,
914
915	// all other bytes: no state change
916	_ => *self,
917	};
918	}
919	None
920	}
921	}
922
923	/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
924	#[inline]
925	pub(crate) const fn is_whitespace(b: u8) -> bool {
926	matches!(b, b' ' \| b'`\r`' \| b'`\n`' \| b'`\t`')
927	}
928
929	////////////////////////////////////////////////////////////////////////////////////////////////////
930
931	#[cfg(test)]
932	mod test {
933	/// Checks the internal implementation of the various reader methods
934	macro_rules! check {
935	(
936	#[$test:meta]
937	$read_event:ident,
938	$read_until_close:ident,
939	// constructor of the XML source on which internal functions will be called
940	$source:path,
941	// constructor of the buffer to which read data will stored
942	$buf:expr
943	$(, $async:ident, $await:ident)?
944	) => {
945	mod read_bytes_until {
946	use super::*;
947	// Use Bytes for printing bytes as strings for ASCII range
948	use crate::utils::Bytes;
949	use pretty_assertions::assert_eq;
950
951	/// Checks that search in the empty buffer returns `None`
952	#[$test]
953	$($async)? fn empty() {
954	let buf = $buf;
955	let mut position = `0`;
956	let mut input = b"".as_ref();
957	// ^= 0
958
959	assert_eq!(
960	$source(&mut input)
961	.read_bytes_until(b'', buf, &mut* position)
962	$(.$await)?
963	.unwrap()
964	.map(Bytes),
965	None
966	);
967	assert_eq!(position, `0`);
968	}
969
970	/// Checks that search in the buffer non-existent value returns entire buffer
971	/// as a result and set `position` to `len()`
972	#[$test]
973	$($async)? fn non_existent() {
974	let buf = $buf;
975	let mut position = `0`;
976	let mut input = b"abcdef".as_ref();
977	// ^= 6
978
979	assert_eq!(
980	$source(&mut input)
981	.read_bytes_until(b'', buf, &mut* position)
982	$(.$await)?
983	.unwrap()
984	.map(Bytes),
985	Some(Bytes(b"abcdef"))
986	);
987	assert_eq!(position, `6`);
988	}
989
990	/// Checks that search in the buffer an element that is located in the front of
991	/// buffer returns empty slice as a result and set `position` to one symbol
992	/// after match (`1`)
993	#[$test]
994	$($async)? fn at_the_start() {
995	let buf = $buf;
996	let mut position = `0`;
997	let mut input = b"*abcdef".as_ref();
998	// ^= 1
999
1000	assert_eq!(
1001	$source(&mut input)
1002	.read_bytes_until(b'', buf, &mut* position)
1003	$(.$await)?
1004	.unwrap()
1005	.map(Bytes),
1006	Some(Bytes(b""))
1007	);
1008	assert_eq!(position, `1`); // position after the symbol matched
1009	}
1010
1011	/// Checks that search in the buffer an element that is located in the middle of
1012	/// buffer returns slice before that symbol as a result and set `position` to one
1013	/// symbol after match
1014	#[$test]
1015	$($async)? fn inside() {
1016	let buf = $buf;
1017	let mut position = `0`;
1018	let mut input = b"abc*def".as_ref();
1019	// ^= 4
1020
1021	assert_eq!(
1022	$source(&mut input)
1023	.read_bytes_until(b'', buf, &mut* position)
1024	$(.$await)?
1025	.unwrap()
1026	.map(Bytes),
1027	Some(Bytes(b"abc"))
1028	);
1029	assert_eq!(position, `4`); // position after the symbol matched
1030	}
1031
1032	/// Checks that search in the buffer an element that is located in the end of
1033	/// buffer returns slice before that symbol as a result and set `position` to one
1034	/// symbol after match (`len()`)
1035	#[$test]
1036	$($async)? fn in_the_end() {
1037	let buf = $buf;
1038	let mut position = `0`;
1039	let mut input = b"abcdef*".as_ref();
1040	// ^= 7
1041
1042	assert_eq!(
1043	$source(&mut input)
1044	.read_bytes_until(b'', buf, &mut* position)
1045	$(.$await)?
1046	.unwrap()
1047	.map(Bytes),
1048	Some(Bytes(b"abcdef"))
1049	);
1050	assert_eq!(position, `7`); // position after the symbol matched
1051	}
1052	}
1053
1054	mod read_bang_element {
1055	use super::*;
1056
1057	/// Checks that reading CDATA content works correctly
1058	mod cdata {
1059	use super::*;
1060	use crate::errors::Error;
1061	use crate::reader::BangType;
1062	use crate::utils::Bytes;
1063	use pretty_assertions::assert_eq;
1064
1065	/// Checks that if input begins like CDATA element, but CDATA start sequence
1066	/// is not finished, parsing ends with an error
1067	#[$test]
1068	#[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1069	$($async)? fn not_properly_start() {
1070	let buf = $buf;
1071	let mut position = `0`;
1072	let mut input = b"![]]>other content".as_ref();
1073	// ^= 0
1074
1075	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1076	Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1077	x => assert!(
1078	`false`,
1079	r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1080	x
1081	),
1082	}
1083	assert_eq!(position, `0`);
1084	}
1085
1086	/// Checks that if CDATA startup sequence was matched, but an end sequence
1087	/// is not found, parsing ends with an error
1088	#[$test]
1089	$($async)? fn not_closed() {
1090	let buf = $buf;
1091	let mut position = `0`;
1092	let mut input = b"![CDATA[other content".as_ref();
1093	// ^= 0
1094
1095	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1096	Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1097	x => assert!(
1098	`false`,
1099	r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1100	x
1101	),
1102	}
1103	assert_eq!(position, `0`);
1104	}
1105
1106	/// Checks that CDATA element without content inside parsed successfully
1107	#[$test]
1108	$($async)? fn empty() {
1109	let buf = $buf;
1110	let mut position = `0`;
1111	let mut input = b"![CDATA[]]>other content".as_ref();
1112	// ^= 11
1113
1114	assert_eq!(
1115	$source(&mut input)
1116	.read_bang_element(buf, &mut position)
1117	$(.$await)?
1118	.unwrap()
1119	.map(\|(ty, data)\| (ty, Bytes(data))),
1120	Some((BangType::CData, Bytes(b"![CDATA[]]")))
1121	);
1122	assert_eq!(position, `11`);
1123	}
1124
1125	/// Checks that CDATA element with content parsed successfully.
1126	/// Additionally checks that sequences inside CDATA that may look like
1127	/// a CDATA end sequence do not interrupt CDATA parsing
1128	#[$test]
1129	$($async)? fn with_content() {
1130	let buf = $buf;
1131	let mut position = `0`;
1132	let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1133	// ^= 28
1134
1135	assert_eq!(
1136	$source(&mut input)
1137	.read_bang_element(buf, &mut position)
1138	$(.$await)?
1139	.unwrap()
1140	.map(\|(ty, data)\| (ty, Bytes(data))),
1141	Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")))
1142	);
1143	assert_eq!(position, `28`);
1144	}
1145	}
1146
1147	/// Checks that reading XML comments works correctly. According to the [specification],
1148	/// comment data can contain any sequence except `--`:
1149	///
1150	/// ```peg
1151	/// comment = '<--' (!'--' char) '-->';*
1152	/// char = [#x1-#x2C]
1153	/// / [#x2E-#xD7FF]
1154	/// / [#xE000-#xFFFD]
1155	/// / [#x10000-#x10FFFF]
1156	/// ```
1157	///
1158	/// The presence of this limitation, however, is simply a poorly designed specification
1159	/// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1160	/// presence of these sequences by default. This tests allow such content.
1161	///
1162	/// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1163	mod comment {
1164	use super::*;
1165	use crate::errors::Error;
1166	use crate::reader::BangType;
1167	use crate::utils::Bytes;
1168	use pretty_assertions::assert_eq;
1169
1170	#[$test]
1171	#[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1172	$($async)? fn not_properly_start() {
1173	let buf = $buf;
1174	let mut position = `0`;
1175	let mut input = b"!- -->other content".as_ref();
1176	// ^= 0
1177
1178	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1179	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1180	x => assert!(
1181	`false`,
1182	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1183	x
1184	),
1185	}
1186	assert_eq!(position, `0`);
1187	}
1188
1189	#[$test]
1190	$($async)? fn not_properly_end() {
1191	let buf = $buf;
1192	let mut position = `0`;
1193	let mut input = b"!->other content".as_ref();
1194	// ^= 0
1195
1196	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1197	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1198	x => assert!(
1199	`false`,
1200	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1201	x
1202	),
1203	}
1204	assert_eq!(position, `0`);
1205	}
1206
1207	#[$test]
1208	$($async)? fn not_closed1() {
1209	let buf = $buf;
1210	let mut position = `0`;
1211	let mut input = b"!--other content".as_ref();
1212	// ^= 0
1213
1214	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1215	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1216	x => assert!(
1217	`false`,
1218	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1219	x
1220	),
1221	}
1222	assert_eq!(position, `0`);
1223	}
1224
1225	#[$test]
1226	$($async)? fn not_closed2() {
1227	let buf = $buf;
1228	let mut position = `0`;
1229	let mut input = b"!-->other content".as_ref();
1230	// ^= 0
1231
1232	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1233	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1234	x => assert!(
1235	`false`,
1236	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1237	x
1238	),
1239	}
1240	assert_eq!(position, `0`);
1241	}
1242
1243	#[$test]
1244	$($async)? fn not_closed3() {
1245	let buf = $buf;
1246	let mut position = `0`;
1247	let mut input = b"!--->other content".as_ref();
1248	// ^= 0
1249
1250	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1251	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1252	x => assert!(
1253	`false`,
1254	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1255	x
1256	),
1257	}
1258	assert_eq!(position, `0`);
1259	}
1260
1261	#[$test]
1262	$($async)? fn empty() {
1263	let buf = $buf;
1264	let mut position = `0`;
1265	let mut input = b"!---->other content".as_ref();
1266	// ^= 6
1267
1268	assert_eq!(
1269	$source(&mut input)
1270	.read_bang_element(buf, &mut position)
1271	$(.$await)?
1272	.unwrap()
1273	.map(\|(ty, data)\| (ty, Bytes(data))),
1274	Some((BangType::Comment, Bytes(b"!----")))
1275	);
1276	assert_eq!(position, `6`);
1277	}
1278
1279	#[$test]
1280	$($async)? fn with_content() {
1281	let buf = $buf;
1282	let mut position = `0`;
1283	let mut input = b"!--->comment<--->other content".as_ref();
1284	// ^= 17
1285
1286	assert_eq!(
1287	$source(&mut input)
1288	.read_bang_element(buf, &mut position)
1289	$(.$await)?
1290	.unwrap()
1291	.map(\|(ty, data)\| (ty, Bytes(data))),
1292	Some((BangType::Comment, Bytes(b"!--->comment<---")))
1293	);
1294	assert_eq!(position, `17`);
1295	}
1296	}
1297
1298	/// Checks that reading DOCTYPE definition works correctly
1299	mod doctype {
1300	use super::*;
1301
1302	mod uppercase {
1303	use super::*;
1304	use crate::errors::Error;
1305	use crate::reader::BangType;
1306	use crate::utils::Bytes;
1307	use pretty_assertions::assert_eq;
1308
1309	#[$test]
1310	$($async)? fn not_properly_start() {
1311	let buf = $buf;
1312	let mut position = `0`;
1313	let mut input = b"!D other content".as_ref();
1314	// ^= 0
1315
1316	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1317	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1318	x => assert!(
1319	`false`,
1320	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1321	x
1322	),
1323	}
1324	assert_eq!(position, `0`);
1325	}
1326
1327	#[$test]
1328	$($async)? fn without_space() {
1329	let buf = $buf;
1330	let mut position = `0`;
1331	let mut input = b"!DOCTYPEother content".as_ref();
1332	// ^= 0
1333
1334	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1335	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1336	x => assert!(
1337	`false`,
1338	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1339	x
1340	),
1341	}
1342	assert_eq!(position, `0`);
1343	}
1344
1345	#[$test]
1346	$($async)? fn empty() {
1347	let buf = $buf;
1348	let mut position = `0`;
1349	let mut input = b"!DOCTYPE>other content".as_ref();
1350	// ^= 9
1351
1352	assert_eq!(
1353	$source(&mut input)
1354	.read_bang_element(buf, &mut position)
1355	$(.$await)?
1356	.unwrap()
1357	.map(\|(ty, data)\| (ty, Bytes(data))),
1358	Some((BangType::DocType, Bytes(b"!DOCTYPE")))
1359	);
1360	assert_eq!(position, `9`);
1361	}
1362
1363	#[$test]
1364	$($async)? fn not_closed() {
1365	let buf = $buf;
1366	let mut position = `0`;
1367	let mut input = b"!DOCTYPE other content".as_ref();
1368	// ^= 0
1369
1370	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1371	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1372	x => assert!(
1373	`false`,
1374	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1375	x
1376	),
1377	}
1378	assert_eq!(position, `0`);
1379	}
1380	}
1381
1382	mod lowercase {
1383	use super::*;
1384	use crate::errors::Error;
1385	use crate::reader::BangType;
1386	use crate::utils::Bytes;
1387	use pretty_assertions::assert_eq;
1388
1389	#[$test]
1390	$($async)? fn not_properly_start() {
1391	let buf = $buf;
1392	let mut position = `0`;
1393	let mut input = b"!d other content".as_ref();
1394	// ^= 0
1395
1396	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1397	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1398	x => assert!(
1399	`false`,
1400	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1401	x
1402	),
1403	}
1404	assert_eq!(position, `0`);
1405	}
1406
1407	#[$test]
1408	$($async)? fn without_space() {
1409	let buf = $buf;
1410	let mut position = `0`;
1411	let mut input = b"!doctypeother content".as_ref();
1412	// ^= 0
1413
1414	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1415	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1416	x => assert!(
1417	`false`,
1418	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1419	x
1420	),
1421	}
1422	assert_eq!(position, `0`);
1423	}
1424
1425	#[$test]
1426	$($async)? fn empty() {
1427	let buf = $buf;
1428	let mut position = `0`;
1429	let mut input = b"!doctype>other content".as_ref();
1430	// ^= 9
1431
1432	assert_eq!(
1433	$source(&mut input)
1434	.read_bang_element(buf, &mut position)
1435	$(.$await)?
1436	.unwrap()
1437	.map(\|(ty, data)\| (ty, Bytes(data))),
1438	Some((BangType::DocType, Bytes(b"!doctype")))
1439	);
1440	assert_eq!(position, `9`);
1441	}
1442
1443	#[$test]
1444	$($async)? fn not_closed() {
1445	let buf = $buf;
1446	let mut position = `0`;
1447	let mut input = b"!doctype other content".as_ref();
1448	// ^= 0
1449
1450	match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1451	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1452	x => assert!(
1453	`false`,
1454	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1455	x
1456	),
1457	}
1458	assert_eq!(position, `0`);
1459	}
1460	}
1461	}
1462	}
1463
1464	mod read_element {
1465	use super::*;
1466	use crate::utils::Bytes;
1467	use pretty_assertions::assert_eq;
1468
1469	/// Checks that nothing was read from empty buffer
1470	#[$test]
1471	$($async)? fn empty() {
1472	let buf = $buf;
1473	let mut position = `0`;
1474	let mut input = b"".as_ref();
1475	// ^= 0
1476
1477	assert_eq!(
1478	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1479	None
1480	);
1481	assert_eq!(position, `0`);
1482	}
1483
1484	mod open {
1485	use super::*;
1486	use crate::utils::Bytes;
1487	use pretty_assertions::assert_eq;
1488
1489	#[$test]
1490	$($async)? fn empty_tag() {
1491	let buf = $buf;
1492	let mut position = `0`;
1493	let mut input = b">".as_ref();
1494	// ^= 1
1495
1496	assert_eq!(
1497	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1498	Some(Bytes(b""))
1499	);
1500	assert_eq!(position, `1`);
1501	}
1502
1503	#[$test]
1504	$($async)? fn normal() {
1505	let buf = $buf;
1506	let mut position = `0`;
1507	let mut input = b"tag>".as_ref();
1508	// ^= 4
1509
1510	assert_eq!(
1511	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1512	Some(Bytes(b"tag"))
1513	);
1514	assert_eq!(position, `4`);
1515	}
1516
1517	#[$test]
1518	$($async)? fn empty_ns_empty_tag() {
1519	let buf = $buf;
1520	let mut position = `0`;
1521	let mut input = b":>".as_ref();
1522	// ^= 2
1523
1524	assert_eq!(
1525	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1526	Some(Bytes(b":"))
1527	);
1528	assert_eq!(position, `2`);
1529	}
1530
1531	#[$test]
1532	$($async)? fn empty_ns() {
1533	let buf = $buf;
1534	let mut position = `0`;
1535	let mut input = b":tag>".as_ref();
1536	// ^= 5
1537
1538	assert_eq!(
1539	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1540	Some(Bytes(b":tag"))
1541	);
1542	assert_eq!(position, `5`);
1543	}
1544
1545	#[$test]
1546	$($async)? fn with_attributes() {
1547	let buf = $buf;
1548	let mut position = `0`;
1549	let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref();
1550	// ^= 38
1551
1552	assert_eq!(
1553	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1554	Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#))
1555	);
1556	assert_eq!(position, `38`);
1557	}
1558	}
1559
1560	mod self_closed {
1561	use super::*;
1562	use crate::utils::Bytes;
1563	use pretty_assertions::assert_eq;
1564
1565	#[$test]
1566	$($async)? fn empty_tag() {
1567	let buf = $buf;
1568	let mut position = `0`;
1569	let mut input = b"/>".as_ref();
1570	// ^= 2
1571
1572	assert_eq!(
1573	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1574	Some(Bytes(b"/"))
1575	);
1576	assert_eq!(position, `2`);
1577	}
1578
1579	#[$test]
1580	$($async)? fn normal() {
1581	let buf = $buf;
1582	let mut position = `0`;
1583	let mut input = b"tag/>".as_ref();
1584	// ^= 5
1585
1586	assert_eq!(
1587	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1588	Some(Bytes(b"tag/"))
1589	);
1590	assert_eq!(position, `5`);
1591	}
1592
1593	#[$test]
1594	$($async)? fn empty_ns_empty_tag() {
1595	let buf = $buf;
1596	let mut position = `0`;
1597	let mut input = b":/>".as_ref();
1598	// ^= 3
1599
1600	assert_eq!(
1601	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1602	Some(Bytes(b":/"))
1603	);
1604	assert_eq!(position, `3`);
1605	}
1606
1607	#[$test]
1608	$($async)? fn empty_ns() {
1609	let buf = $buf;
1610	let mut position = `0`;
1611	let mut input = b":tag/>".as_ref();
1612	// ^= 6
1613
1614	assert_eq!(
1615	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1616	Some(Bytes(b":tag/"))
1617	);
1618	assert_eq!(position, `6`);
1619	}
1620
1621	#[$test]
1622	$($async)? fn with_attributes() {
1623	let buf = $buf;
1624	let mut position = `0`;
1625	let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref();
1626	// ^= 41
1627
1628	assert_eq!(
1629	$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1630	Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#))
1631	);
1632	assert_eq!(position, `41`);
1633	}
1634	}
1635	}
1636
1637	mod issue_344 {
1638	use crate::errors::Error;
1639	use crate::reader::Reader;
1640
1641	#[$test]
1642	$($async)? fn cdata() {
1643	let mut reader = Reader::from_str("![]]>");
1644
1645	match reader.$read_until_close($buf) $(.$await)? {
1646	Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1647	x => assert!(
1648	`false`,
1649	r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1650	x
1651	),
1652	}
1653	}
1654
1655	#[$test]
1656	$($async)? fn comment() {
1657	let mut reader = Reader::from_str("!- -->");
1658
1659	match reader.$read_until_close($buf) $(.$await)? {
1660	Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1661	x => assert!(
1662	`false`,
1663	r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1664	x
1665	),
1666	}
1667	}
1668
1669	#[$test]
1670	$($async)? fn doctype_uppercase() {
1671	let mut reader = Reader::from_str("!D>");
1672
1673	match reader.$read_until_close($buf) $(.$await)? {
1674	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1675	x => assert!(
1676	`false`,
1677	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1678	x
1679	),
1680	}
1681	}
1682
1683	#[$test]
1684	$($async)? fn doctype_lowercase() {
1685	let mut reader = Reader::from_str("!d>");
1686
1687	match reader.$read_until_close($buf) $(.$await)? {
1688	Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1689	x => assert!(
1690	`false`,
1691	r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1692	x
1693	),
1694	}
1695	}
1696	}
1697
1698	/// Ensures, that no empty `Text` events are generated
1699	mod $read_event {
1700	use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
1701	use crate::reader::Reader;
1702	use pretty_assertions::assert_eq;
1703
1704	/// When `encoding` feature is enabled, encoding should be detected
1705	/// from BOM (UTF-8) and BOM should be stripped.
1706	///
1707	/// When `encoding` feature is disabled, UTF-8 is assumed and BOM
1708	/// character should be stripped for consistency
1709	#[$test]
1710	$($async)? fn bom_from_reader() {
1711	let mut reader = Reader::from_reader("`\u{feff}\u{feff}`".as_bytes());
1712
1713	assert_eq!(
1714	reader.$read_event($buf) $(.$await)? .unwrap(),
1715	Event::Text(BytesText::from_escaped("`\u{feff}`"))
1716	);
1717
1718	assert_eq!(
1719	reader.$read_event($buf) $(.$await)? .unwrap(),
1720	Event::Eof
1721	);
1722	}
1723
1724	/// When parsing from &str, encoding is fixed (UTF-8), so
1725	/// - when `encoding` feature is disabled, the behavior the
1726	/// same as in `bom_from_reader` text
1727	/// - when `encoding` feature is enabled, the behavior should
1728	/// stay consistent, so the first BOM character is stripped
1729	#[$test]
1730	$($async)? fn bom_from_str() {
1731	let mut reader = Reader::from_str("`\u{feff}\u{feff}`");
1732
1733	assert_eq!(
1734	reader.$read_event($buf) $(.$await)? .unwrap(),
1735	Event::Text(BytesText::from_escaped("`\u{feff}`"))
1736	);
1737
1738	assert_eq!(
1739	reader.$read_event($buf) $(.$await)? .unwrap(),
1740	Event::Eof
1741	);
1742	}
1743
1744	#[$test]
1745	$($async)? fn declaration() {
1746	let mut reader = Reader::from_str("<?xml ?>");
1747
1748	assert_eq!(
1749	reader.$read_event($buf) $(.$await)? .unwrap(),
1750	Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", `3`)))
1751	);
1752	}
1753
1754	#[$test]
1755	$($async)? fn doctype() {
1756	let mut reader = Reader::from_str("<!DOCTYPE x>");
1757
1758	assert_eq!(
1759	reader.$read_event($buf) $(.$await)? .unwrap(),
1760	Event::DocType(BytesText::from_escaped("x"))
1761	);
1762	}
1763
1764	#[$test]
1765	$($async)? fn processing_instruction() {
1766	let mut reader = Reader::from_str("<?xml-stylesheet?>");
1767
1768	assert_eq!(
1769	reader.$read_event($buf) $(.$await)? .unwrap(),
1770	Event::PI(BytesText::from_escaped("xml-stylesheet"))
1771	);
1772	}
1773
1774	#[$test]
1775	$($async)? fn start() {
1776	let mut reader = Reader::from_str("<tag>");
1777
1778	assert_eq!(
1779	reader.$read_event($buf) $(.$await)? .unwrap(),
1780	Event::Start(BytesStart::new("tag"))
1781	);
1782	}
1783
1784	#[$test]
1785	$($async)? fn end() {
1786	let mut reader = Reader::from_str("</tag>");
1787	// Because we expect invalid XML, do not check that
1788	// the end name paired with the start name
1789	reader.check_end_names(`false`);
1790
1791	assert_eq!(
1792	reader.$read_event($buf) $(.$await)? .unwrap(),
1793	Event::End(BytesEnd::new("tag"))
1794	);
1795	}
1796
1797	#[$test]
1798	$($async)? fn empty() {
1799	let mut reader = Reader::from_str("<tag/>");
1800
1801	assert_eq!(
1802	reader.$read_event($buf) $(.$await)? .unwrap(),
1803	Event::Empty(BytesStart::new("tag"))
1804	);
1805	}
1806
1807	#[$test]
1808	$($async)? fn text() {
1809	let mut reader = Reader::from_str("text");
1810
1811	assert_eq!(
1812	reader.$read_event($buf) $(.$await)? .unwrap(),
1813	Event::Text(BytesText::from_escaped("text"))
1814	);
1815	}
1816
1817	#[$test]
1818	$($async)? fn cdata() {
1819	let mut reader = Reader::from_str("<![CDATA[]]>");
1820
1821	assert_eq!(
1822	reader.$read_event($buf) $(.$await)? .unwrap(),
1823	Event::CData(BytesCData::new(""))
1824	);
1825	}
1826
1827	#[$test]
1828	$($async)? fn comment() {
1829	let mut reader = Reader::from_str("<!---->");
1830
1831	assert_eq!(
1832	reader.$read_event($buf) $(.$await)? .unwrap(),
1833	Event::Comment(BytesText::from_escaped(""))
1834	);
1835	}
1836
1837	#[$test]
1838	$($async)? fn eof() {
1839	let mut reader = Reader::from_str("");
1840
1841	assert_eq!(
1842	reader.$read_event($buf) $(.$await)? .unwrap(),
1843	Event::Eof
1844	);
1845	}
1846	}
1847	};
1848	}
1849
1850	/// Tests for https://github.com/tafia/quick-xml/issues/469
1851	macro_rules! small_buffers {
1852	(
1853	#[$test:meta]
1854	$read_event:ident: $BufReader:ty
1855	$(, $async:ident, $await:ident)?
1856	) => {
1857	mod small_buffers {
1858	use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
1859	use crate::reader::Reader;
1860	use pretty_assertions::assert_eq;
1861
1862	#[$test]
1863	$($async)? fn decl() {
1864	let xml = "<?xml ?>";
1865	// ^^^^^^^ data that fit into buffer
1866	let size = xml.match_indices("?>").next().unwrap().`0` + `1`;
1867	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1868	let mut reader = Reader::from_reader(br);
1869	let mut buf = Vec::new();
1870
1871	assert_eq!(
1872	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1873	Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", `3`)))
1874	);
1875	assert_eq!(
1876	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1877	Event::Eof
1878	);
1879	}
1880
1881	#[$test]
1882	$($async)? fn pi() {
1883	let xml = "<?pi?>";
1884	// ^^^^^ data that fit into buffer
1885	let size = xml.match_indices("?>").next().unwrap().`0` + `1`;
1886	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1887	let mut reader = Reader::from_reader(br);
1888	let mut buf = Vec::new();
1889
1890	assert_eq!(
1891	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1892	Event::PI(BytesText::new("pi"))
1893	);
1894	assert_eq!(
1895	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1896	Event::Eof
1897	);
1898	}
1899
1900	#[$test]
1901	$($async)? fn empty() {
1902	let xml = "<empty/>";
1903	// ^^^^^^^ data that fit into buffer
1904	let size = xml.match_indices("/>").next().unwrap().`0` + `1`;
1905	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1906	let mut reader = Reader::from_reader(br);
1907	let mut buf = Vec::new();
1908
1909	assert_eq!(
1910	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1911	Event::Empty(BytesStart::new("empty"))
1912	);
1913	assert_eq!(
1914	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1915	Event::Eof
1916	);
1917	}
1918
1919	#[$test]
1920	$($async)? fn cdata1() {
1921	let xml = "<![CDATA[cdata]]>";
1922	// ^^^^^^^^^^^^^^^ data that fit into buffer
1923	let size = xml.match_indices("]]>").next().unwrap().`0` + `1`;
1924	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1925	let mut reader = Reader::from_reader(br);
1926	let mut buf = Vec::new();
1927
1928	assert_eq!(
1929	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1930	Event::CData(BytesCData::new("cdata"))
1931	);
1932	assert_eq!(
1933	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1934	Event::Eof
1935	);
1936	}
1937
1938	#[$test]
1939	$($async)? fn cdata2() {
1940	let xml = "<![CDATA[cdata]]>";
1941	// ^^^^^^^^^^^^^^^^ data that fit into buffer
1942	let size = xml.match_indices("]]>").next().unwrap().`0` + `2`;
1943	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1944	let mut reader = Reader::from_reader(br);
1945	let mut buf = Vec::new();
1946
1947	assert_eq!(
1948	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1949	Event::CData(BytesCData::new("cdata"))
1950	);
1951	assert_eq!(
1952	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1953	Event::Eof
1954	);
1955	}
1956
1957	#[$test]
1958	$($async)? fn comment1() {
1959	let xml = "<!--comment-->";
1960	// ^^^^^^^^^^^^ data that fit into buffer
1961	let size = xml.match_indices("-->").next().unwrap().`0` + `1`;
1962	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1963	let mut reader = Reader::from_reader(br);
1964	let mut buf = Vec::new();
1965
1966	assert_eq!(
1967	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1968	Event::Comment(BytesText::new("comment"))
1969	);
1970	assert_eq!(
1971	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1972	Event::Eof
1973	);
1974	}
1975
1976	#[$test]
1977	$($async)? fn comment2() {
1978	let xml = "<!--comment-->";
1979	// ^^^^^^^^^^^^^ data that fit into buffer
1980	let size = xml.match_indices("-->").next().unwrap().`0` + `2`;
1981	let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1982	let mut reader = Reader::from_reader(br);
1983	let mut buf = Vec::new();
1984
1985	assert_eq!(
1986	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1987	Event::Comment(BytesText::new("comment"))
1988	);
1989	assert_eq!(
1990	reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1991	Event::Eof
1992	);
1993	}
1994	}
1995	};
1996	}
1997
1998	// Export macros for the child modules:
1999	// - buffered_reader
2000	// - slice_reader
2001	pub(super) use check;
2002	pub(super) use small_buffers;
2003	}
2004