1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//! - [Optional attributes and elements](#optional-attributes-and-elements)
19//! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
20//! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
21//! - [Composition Rules](#composition-rules)
22//! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
23//! - [`$text`](#text)
24//! - [`$value`](#value)
25//! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
26//! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
27//! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
28//! - [Frequently Used Patterns](#frequently-used-patterns)
29//! - [`<element>` lists](#element-lists)
30//! - [Enum::Unit Variants As a Text](#enumunit-variants-as-a-text)
31//! - [Internally Tagged Enums](#internally-tagged-enums)
32//!
33//!
34//!
35//! Mapping XML to Rust types
36//! =========================
37//!
38//! Type names are never considered when deserializing, so you can name your
39//! types as you wish. Other general rules:
40//! - `struct` field name could be represented in XML only as an attribute name
41//! or an element name;
42//! - `enum` variant name could be represented in XML only as an attribute name
43//! or an element name;
44//! - the unit struct, unit type `()` and unit enum variant can be deserialized
45//! from any valid XML content:
46//! - attribute and element names;
47//! - attribute and element values;
48//! - text or CDATA content (including mixed text and CDATA content).
49//!
50//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
51//!
52//! NOTE: All tests are marked with an `ignore` option, even though they do
53//! compile. This is because rustdoc marks such blocks with an information
54//! icon unlike `no_run` blocks.
55//!
56//! </div>
57//!
58//! <table>
59//! <thead>
60//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
61//! </thead>
62//! <tbody style="vertical-align:top;">
63//! <tr>
64//! <td>
65//! Content of attributes and text / CDATA content of elements (including mixed
66//! text and CDATA content):
67//!
68//! ```xml
69//! <... ...="content" />
70//! ```
71//! ```xml
72//! <...>content</...>
73//! ```
74//! ```xml
75//! <...><![CDATA[content]]></...>
76//! ```
77//! ```xml
78//! <...>text<![CDATA[cdata]]>text</...>
79//! ```
80//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
81//! </td>
82//! <td>
83//!
84//! You can use any type that can be deserialized from an `&str`, for example:
85//! - [`String`] and [`&str`]
86//! - [`Cow<str>`]
87//! - [`u32`], [`f32`] and other numeric types
88//! - `enum`s, like
89//! ```
90//! # use pretty_assertions::assert_eq;
91//! # use serde::Deserialize;
92//! # #[derive(Debug, PartialEq)]
93//! #[derive(Deserialize)]
94//! enum Language {
95//! Rust,
96//! Cpp,
97//! #[serde(other)]
98//! Other,
99//! }
100//! # #[derive(Debug, PartialEq, Deserialize)]
101//! # struct X { #[serde(rename = "$text")] x: Language }
102//! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
103//! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
104//! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
105//! ```
106//!
107//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
108//!
109//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
110//! such as `&str`, is possible only if you parse document in the UTF-8
111//! encoding and content does not contain entity references such as `&amp;`,
112//! or character references such as `&#xD;`, as well as text content represented
113//! by one piece of [text] or [CDATA] element.
114//! </div>
115//! <!-- TODO: document an error type returned -->
116//!
117//! [text]: Event::Text
118//! [CDATA]: Event::CData
119//! </td>
120//! </tr>
121//! <!-- 2 ===================================================================================== -->
122//! <tr>
123//! <td>
124//!
125//! Content of attributes and text / CDATA content of elements (including mixed
126//! text and CDATA content), which represents a space-delimited lists, as
127//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
128//!
129//! ```xml
130//! <... ...="element1 element2 ..." />
131//! ```
132//! ```xml
133//! <...>
134//! element1
135//! element2
136//! ...
137//! </...>
138//! ```
139//! ```xml
140//! <...><![CDATA[
141//! element1
142//! element2
143//! ...
144//! ]]></...>
145//! ```
146//!
147//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
148//! </td>
149//! <td>
150//!
151//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
152//!
153//! ```
154//! type List = Vec<u32>;
155//! ```
156//!
157//! See the next row to learn where in your struct definition you should
158//! use that type.
159//!
160//! According to the XML Schema specification, delimiters for elements is one
161//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
162//!
163//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
164//!
165//! NOTE: according to the XML Schema restrictions, you cannot escape those
166//! white-space characters, so list elements will _never_ contain them.
167//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
168//! values which looks like identifiers in many languages, for example, `item`,
169//! `some_item` or `some-item`, so that shouldn't be a problem.
170//!
171//! NOTE: according to the XML Schema specification, list elements can be
172//! delimited only by spaces. Other delimiters (for example, commas) are not
173//! allowed.
174//!
175//! </div>
176//!
177//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
178//! </td>
179//! </tr>
180//! <!-- 3 ===================================================================================== -->
181//! <tr>
182//! <td>
183//! A typical XML with attributes. The root tag name does not matter:
184//!
185//! ```xml
186//! <any-tag one="..." two="..."/>
187//! ```
188//! </td>
189//! <td>
190//!
191//! A structure where each XML attribute is mapped to a field with a name
192//! starting with `@`. Because Rust identifiers do not permit the `@` character,
193//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
194//! The name of the struct itself does not matter:
195//!
196//! ```
197//! # use serde::Deserialize;
198//! # type T = ();
199//! # type U = ();
200//! // Get both attributes
201//! # #[derive(Debug, PartialEq)]
202//! #[derive(Deserialize)]
203//! struct AnyName {
204//! #[serde(rename = "@one")]
205//! one: T,
206//!
207//! #[serde(rename = "@two")]
208//! two: U,
209//! }
210//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
211//! ```
212//! ```
213//! # use serde::Deserialize;
214//! # type T = ();
215//! // Get only the one attribute, ignore the other
216//! # #[derive(Debug, PartialEq)]
217//! #[derive(Deserialize)]
218//! struct AnyName {
219//! #[serde(rename = "@one")]
220//! one: T,
221//! }
222//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
223//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
224//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
225//! ```
226//! ```
227//! # use serde::Deserialize;
228//! // Ignore all attributes
229//! // You can also use the `()` type (unit type)
230//! # #[derive(Debug, PartialEq)]
231//! #[derive(Deserialize)]
232//! struct AnyName;
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
235//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
236//! ```
237//!
238//! All these structs can be used to deserialize from an XML on the
239//! left side depending on amount of information that you want to get.
240//! Of course, you can combine them with elements extractor structs (see below).
241//!
242//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
243//!
244//! NOTE: XML allows you to have an attribute and an element with the same name
245//! inside the one element. quick-xml deals with that by prepending a `@` prefix
246//! to the name of attributes.
247//! </div>
248//! </td>
249//! </tr>
250//! <!-- 4 ===================================================================================== -->
251//! <tr>
252//! <td>
253//! A typical XML with child elements. The root tag name does not matter:
254//!
255//! ```xml
256//! <any-tag>
257//! <one>...</one>
258//! <two>...</two>
259//! </any-tag>
260//! ```
261//! </td>
262//! <td>
263//! A structure where each XML child element is mapped to the field.
264//! Each element name becomes a name of field. The name of the struct itself
265//! does not matter:
266//!
267//! ```
268//! # use serde::Deserialize;
269//! # type T = ();
270//! # type U = ();
271//! // Get both elements
272//! # #[derive(Debug, PartialEq)]
273//! #[derive(Deserialize)]
274//! struct AnyName {
275//! one: T,
276//! two: U,
277//! }
278//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
279//! #
280//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
281//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
282//! ```
283//! ```
284//! # use serde::Deserialize;
285//! # type T = ();
286//! // Get only the one element, ignore the other
287//! # #[derive(Debug, PartialEq)]
288//! #[derive(Deserialize)]
289//! struct AnyName {
290//! one: T,
291//! }
292//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
293//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
294//! ```
295//! ```
296//! # use serde::Deserialize;
297//! // Ignore all elements
298//! // You can also use the `()` type (unit type)
299//! # #[derive(Debug, PartialEq)]
300//! #[derive(Deserialize)]
301//! struct AnyName;
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
304//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
305//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
306//! ```
307//!
308//! All these structs can be used to deserialize from an XML on the
309//! left side depending on amount of information that you want to get.
310//! Of course, you can combine them with attributes extractor structs (see above).
311//!
312//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
313//!
314//! NOTE: XML allows you to have an attribute and an element with the same name
315//! inside the one element. quick-xml deals with that by prepending a `@` prefix
316//! to the name of attributes.
317//! </div>
318//! </td>
319//! </tr>
320//! <!-- 5 ===================================================================================== -->
321//! <tr>
322//! <td>
323//! An XML with an attribute and a child element named equally:
324//!
325//! ```xml
326//! <any-tag field="...">
327//! <field>...</field>
328//! </any-tag>
329//! ```
330//! </td>
331//! <td>
332//!
333//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
334//! for an attribute:
335//!
336//! ```
337//! # use pretty_assertions::assert_eq;
338//! # use serde::Deserialize;
339//! # type T = ();
340//! # type U = ();
341//! # #[derive(Debug, PartialEq)]
342//! #[derive(Deserialize)]
343//! struct AnyName {
344//! #[serde(rename = "@field")]
345//! attribute: T,
346//! field: U,
347//! }
348//! # assert_eq!(
349//! # AnyName { attribute: (), field: () },
350//! # quick_xml::de::from_str(r#"
351//! # <any-tag field="...">
352//! # <field>...</field>
353//! # </any-tag>
354//! # "#).unwrap(),
355//! # );
356//! ```
357//! </td>
358//! </tr>
359//! <!-- ======================================================================================= -->
360//! <tr><th colspan="2">
361//!
362//! ## Optional attributes and elements
363//!
364//! </th></tr>
365//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
366//! <!-- 6 ===================================================================================== -->
367//! <tr>
368//! <td>
369//! An optional XML attribute that you want to capture.
370//! The root tag name does not matter:
371//!
372//! ```xml
373//! <any-tag optional="..."/>
374//! ```
375//! ```xml
376//! <any-tag/>
377//! ```
378//! </td>
379//! <td>
380//!
381//! A structure with an optional field, renamed according to the requirements
382//! for attributes:
383//!
384//! ```
385//! # use pretty_assertions::assert_eq;
386//! # use serde::Deserialize;
387//! # type T = ();
388//! # #[derive(Debug, PartialEq)]
389//! #[derive(Deserialize)]
390//! struct AnyName {
391//! #[serde(rename = "@optional")]
392//! optional: Option<T>,
393//! }
394//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
395//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
396//! ```
397//! When the XML attribute is present, type `T` will be deserialized from
398//! an attribute value (which is a string). Note, that if `T = String` or other
399//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
400//! represents the missed attribute:
401//! ```xml
402//! <any-tag optional="..."/><!-- Some("...") -->
403//! <any-tag optional=""/> <!-- Some("") -->
404//! <any-tag/> <!-- None -->
405//! ```
406//! </td>
407//! </tr>
408//! <!-- 7 ===================================================================================== -->
409//! <tr>
410//! <td>
411//! An optional XML elements that you want to capture.
412//! The root tag name does not matter:
413//!
414//! ```xml
415//! <any-tag/>
416//! <optional>...</optional>
417//! </any-tag>
418//! ```
419//! ```xml
420//! <any-tag/>
421//! <optional/>
422//! </any-tag>
423//! ```
424//! ```xml
425//! <any-tag/>
426//! ```
427//! </td>
428//! <td>
429//!
430//! A structure with an optional field:
431//!
432//! ```
433//! # use pretty_assertions::assert_eq;
434//! # use serde::Deserialize;
435//! # type T = ();
436//! # #[derive(Debug, PartialEq)]
437//! #[derive(Deserialize)]
438//! struct AnyName {
439//! optional: Option<T>,
440//! }
441//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
442//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
443//! ```
444//! When the XML element is present, type `T` will be deserialized from an
445//! element (which is a string or a multi-mapping -- i.e. mapping which can have
446//! duplicated keys).
447//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
448//!
449//! Currently some edge cases exists described in the issue [#497].
450//! </div>
451//! </td>
452//! </tr>
453//! <!-- ======================================================================================= -->
454//! <tr><th colspan="2">
455//!
456//! ## Choices (`xs:choice` XML Schema type)
457//!
458//! </th></tr>
459//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
460//! <!-- 8 ===================================================================================== -->
461//! <tr>
462//! <td>
463//! An XML with different root tag names, as well as text / CDATA content:
464//!
465//! ```xml
466//! <one field1="...">...</one>
467//! ```
468//! ```xml
469//! <two>
470//! <field2>...</field2>
471//! </two>
472//! ```
473//! ```xml
474//! Text <![CDATA[or (mixed)
475//! CDATA]]> content
476//! ```
477//! </td>
478//! <td>
479//!
480//! An enum where each variant has the name of a possible root tag. The name of
481//! the enum itself does not matter.
482//!
483//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
484//!
485//! All these structs can be used to deserialize from any XML on the
486//! left side depending on amount of information that you want to get:
487//!
488//! ```
489//! # use pretty_assertions::assert_eq;
490//! # use serde::Deserialize;
491//! # type T = ();
492//! # type U = ();
493//! # #[derive(Debug, PartialEq)]
494//! #[derive(Deserialize)]
495//! #[serde(rename_all = "snake_case")]
496//! enum AnyName {
497//! One { #[serde(rename = "@field1")] field1: T },
498//! Two { field2: U },
499//!
500//! /// Use unit variant, if you do not care of a content.
501//! /// You can use tuple variant if you want to parse
502//! /// textual content as an xs:list.
503//! /// Struct variants are not supported and will return
504//! /// Err(Unsupported)
505//! #[serde(rename = "$text")]
506//! Text(String),
507//! }
508//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
509//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
510//! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
511//! ```
512//! ```
513//! # use pretty_assertions::assert_eq;
514//! # use serde::Deserialize;
515//! # type T = ();
516//! # #[derive(Debug, PartialEq)]
517//! #[derive(Deserialize)]
518//! struct Two {
519//! field2: T,
520//! }
521//! # #[derive(Debug, PartialEq)]
522//! #[derive(Deserialize)]
523//! #[serde(rename_all = "snake_case")]
524//! enum AnyName {
525//! // `field1` content discarded
526//! One,
527//! Two(Two),
528//! #[serde(rename = "$text")]
529//! Text,
530//! }
531//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
532//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
533//! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
534//! ```
535//! ```
536//! # use pretty_assertions::assert_eq;
537//! # use serde::Deserialize;
538//! # #[derive(Debug, PartialEq)]
539//! #[derive(Deserialize)]
540//! #[serde(rename_all = "snake_case")]
541//! enum AnyName {
542//! One,
543//! // the <two> and textual content will be mapped to this
544//! #[serde(other)]
545//! Other,
546//! }
547//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
548//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
549//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
550//! ```
551//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
552//!
553//! NOTE: You should have variants for all possible tag names in your enum
554//! or have an `#[serde(other)]` variant.
555//! <!-- TODO: document an error type if that requirement is violated -->
556//! </div>
557//! </td>
558//! </tr>
559//! <!-- 9 ===================================================================================== -->
560//! <tr>
561//! <td>
562//!
563//! `<xs:choice>` embedded in the other element, and at the same time you want
564//! to get access to other attributes that can appear in the same container
565//! (`<any-tag>`). Also this case can be described, as if you want to choose
566//! Rust enum variant based on a tag name:
567//!
568//! ```xml
569//! <any-tag field="...">
570//! <one>...</one>
571//! </any-tag>
572//! ```
573//! ```xml
574//! <any-tag field="...">
575//! <two>...</two>
576//! </any-tag>
577//! ```
578//! ```xml
579//! <any-tag field="...">
580//! Text <![CDATA[or (mixed)
581//! CDATA]]> content
582//! </any-tag>
583//! ```
584//! </td>
585//! <td>
586//!
587//! A structure with a field which type is an `enum`.
588//!
589//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
590//!
591//! Names of the enum, struct, and struct field with `Choice` type does not matter:
592//!
593//! ```
594//! # use pretty_assertions::assert_eq;
595//! # use serde::Deserialize;
596//! # type T = ();
597//! # #[derive(Debug, PartialEq)]
598//! #[derive(Deserialize)]
599//! #[serde(rename_all = "snake_case")]
600//! enum Choice {
601//! One,
602//! Two,
603//!
604//! /// Use unit variant, if you do not care of a content.
605//! /// You can use tuple variant if you want to parse
606//! /// textual content as an xs:list.
607//! /// Struct variants are not supported and will return
608//! /// Err(Unsupported)
609//! #[serde(rename = "$text")]
610//! Text(String),
611//! }
612//! # #[derive(Debug, PartialEq)]
613//! #[derive(Deserialize)]
614//! struct AnyName {
615//! #[serde(rename = "@field")]
616//! field: T,
617//!
618//! #[serde(rename = "$value")]
619//! any_name: Choice,
620//! }
621//! # assert_eq!(
622//! # AnyName { field: (), any_name: Choice::One },
623//! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
624//! # );
625//! # assert_eq!(
626//! # AnyName { field: (), any_name: Choice::Two },
627//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
628//! # );
629//! # assert_eq!(
630//! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
631//! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
632//! # );
633//! ```
634//! </td>
635//! </tr>
636//! <!-- 10 ==================================================================================== -->
637//! <tr>
638//! <td>
639//!
640//! `<xs:choice>` embedded in the other element, and at the same time you want
641//! to get access to other elements that can appear in the same container
642//! (`<any-tag>`). Also this case can be described, as if you want to choose
643//! Rust enum variant based on a tag name:
644//!
645//! ```xml
646//! <any-tag>
647//! <field>...</field>
648//! <one>...</one>
649//! </any-tag>
650//! ```
651//! ```xml
652//! <any-tag>
653//! <two>...</two>
654//! <field>...</field>
655//! </any-tag>
656//! ```
657//! </td>
658//! <td>
659//!
660//! A structure with a field which type is an `enum`.
661//!
662//! Names of the enum, struct, and struct field with `Choice` type does not matter:
663//!
664//! ```
665//! # use pretty_assertions::assert_eq;
666//! # use serde::Deserialize;
667//! # type T = ();
668//! # #[derive(Debug, PartialEq)]
669//! #[derive(Deserialize)]
670//! #[serde(rename_all = "snake_case")]
671//! enum Choice {
672//! One,
673//! Two,
674//! }
675//! # #[derive(Debug, PartialEq)]
676//! #[derive(Deserialize)]
677//! struct AnyName {
678//! field: T,
679//!
680//! #[serde(rename = "$value")]
681//! any_name: Choice,
682//! }
683//! # assert_eq!(
684//! # AnyName { field: (), any_name: Choice::One },
685//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
686//! # );
687//! # assert_eq!(
688//! # AnyName { field: (), any_name: Choice::Two },
689//! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
690//! # );
691//! ```
692//!
693//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
694//!
695//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
696//! variant, element `<field>` will be mapped to the `field` and not to the enum
697//! variant.
698//! </div>
699//!
700//! </td>
701//! </tr>
702//! <!-- 11 ==================================================================================== -->
703//! <tr>
704//! <td>
705//!
706//! `<xs:choice>` encapsulated in other element with a fixed name:
707//!
708//! ```xml
709//! <any-tag field="...">
710//! <choice>
711//! <one>...</one>
712//! </choice>
713//! </any-tag>
714//! ```
715//! ```xml
716//! <any-tag field="...">
717//! <choice>
718//! <two>...</two>
719//! </choice>
720//! </any-tag>
721//! ```
722//! </td>
723//! <td>
724//!
725//! A structure with a field of an intermediate type with one field of `enum` type.
726//! Actually, this example is not necessary, because you can construct it by yourself
727//! using the composition rules that were described above. However the XML construction
728//! described here is very common, so it is shown explicitly.
729//!
730//! Names of the enum and struct does not matter:
731//!
732//! ```
733//! # use pretty_assertions::assert_eq;
734//! # use serde::Deserialize;
735//! # type T = ();
736//! # #[derive(Debug, PartialEq)]
737//! #[derive(Deserialize)]
738//! #[serde(rename_all = "snake_case")]
739//! enum Choice {
740//! One,
741//! Two,
742//! }
743//! # #[derive(Debug, PartialEq)]
744//! #[derive(Deserialize)]
745//! struct Holder {
746//! #[serde(rename = "$value")]
747//! any_name: Choice,
748//! }
749//! # #[derive(Debug, PartialEq)]
750//! #[derive(Deserialize)]
751//! struct AnyName {
752//! #[serde(rename = "@field")]
753//! field: T,
754//!
755//! choice: Holder,
756//! }
757//! # assert_eq!(
758//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
759//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
760//! # );
761//! # assert_eq!(
762//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
763//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
764//! # );
765//! ```
766//! </td>
767//! </tr>
768//! <!-- 12 ==================================================================================== -->
769//! <tr>
770//! <td>
771//!
772//! `<xs:choice>` encapsulated in other element with a fixed name:
773//!
774//! ```xml
775//! <any-tag>
776//! <field>...</field>
777//! <choice>
778//! <one>...</one>
779//! </choice>
780//! </any-tag>
781//! ```
782//! ```xml
783//! <any-tag>
784//! <choice>
785//! <two>...</two>
786//! </choice>
787//! <field>...</field>
788//! </any-tag>
789//! ```
790//! </td>
791//! <td>
792//!
793//! A structure with a field of an intermediate type with one field of `enum` type.
794//! Actually, this example is not necessary, because you can construct it by yourself
795//! using the composition rules that were described above. However the XML construction
796//! described here is very common, so it is shown explicitly.
797//!
798//! Names of the enum and struct does not matter:
799//!
800//! ```
801//! # use pretty_assertions::assert_eq;
802//! # use serde::Deserialize;
803//! # type T = ();
804//! # #[derive(Debug, PartialEq)]
805//! #[derive(Deserialize)]
806//! #[serde(rename_all = "snake_case")]
807//! enum Choice {
808//! One,
809//! Two,
810//! }
811//! # #[derive(Debug, PartialEq)]
812//! #[derive(Deserialize)]
813//! struct Holder {
814//! #[serde(rename = "$value")]
815//! any_name: Choice,
816//! }
817//! # #[derive(Debug, PartialEq)]
818//! #[derive(Deserialize)]
819//! struct AnyName {
820//! field: T,
821//!
822//! choice: Holder,
823//! }
824//! # assert_eq!(
825//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
826//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
827//! # );
828//! # assert_eq!(
829//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
830//! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
831//! # );
832//! ```
833//! </td>
834//! </tr>
835//! <!-- ======================================================================================== -->
836//! <tr><th colspan="2">
837//!
838//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
839//!
840//! </th></tr>
841//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
842//! <!-- 13 ==================================================================================== -->
843//! <tr>
844//! <td>
845//! A sequence inside of a tag without a dedicated name:
846//!
847//! ```xml
848//! <any-tag/>
849//! ```
850//! ```xml
851//! <any-tag>
852//! <item/>
853//! </any-tag>
854//! ```
855//! ```xml
856//! <any-tag>
857//! <item/>
858//! <item/>
859//! <item/>
860//! </any-tag>
861//! ```
862//! </td>
863//! <td>
864//!
865//! A structure with a field which is a sequence type, for example, [`Vec`].
866//! Because XML syntax does not distinguish between empty sequences and missed
867//! elements, we should indicate that on the Rust side, because serde will require
868//! that field `item` exists. You can do that in two possible ways:
869//!
870//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
871//! ```
872//! # use pretty_assertions::assert_eq;
873//! # use serde::Deserialize;
874//! # type Item = ();
875//! # #[derive(Debug, PartialEq)]
876//! #[derive(Deserialize)]
877//! struct AnyName {
878//! #[serde(default)]
879//! item: Vec<Item>,
880//! }
881//! # assert_eq!(
882//! # AnyName { item: vec![] },
883//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
884//! # );
885//! # assert_eq!(
886//! # AnyName { item: vec![()] },
887//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
888//! # );
889//! # assert_eq!(
890//! # AnyName { item: vec![(), (), ()] },
891//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
892//! # );
893//! ```
894//!
895//! Use the [`Option`]. In that case inner array will always contains at least one
896//! element after deserialization:
897//! ```ignore
898//! # use pretty_assertions::assert_eq;
899//! # use serde::Deserialize;
900//! # type Item = ();
901//! # #[derive(Debug, PartialEq)]
902//! #[derive(Deserialize)]
903//! struct AnyName {
904//! item: Option<Vec<Item>>,
905//! }
906//! # assert_eq!(
907//! # AnyName { item: None },
908//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
909//! # );
910//! # assert_eq!(
911//! # AnyName { item: Some(vec![()]) },
912//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
913//! # );
914//! # assert_eq!(
915//! # AnyName { item: Some(vec![(), (), ()]) },
916//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
917//! # );
918//! ```
919//!
920//! See also [Frequently Used Patterns](#element-lists).
921//!
922//! [field]: https://serde.rs/field-attrs.html#default
923//! [struct]: https://serde.rs/container-attrs.html#default
924//! </td>
925//! </tr>
926//! <!-- 14 ==================================================================================== -->
927//! <tr>
928//! <td>
929//! A sequence with a strict order, probably with mixed content
930//! (text / CDATA and tags):
931//!
932//! ```xml
933//! <one>...</one>
934//! text
935//! <![CDATA[cdata]]>
936//! <two>...</two>
937//! <one>...</one>
938//! ```
939//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
940//!
941//! NOTE: this is just an example for showing mapping. XML does not allow
942//! multiple root tags -- you should wrap the sequence into a tag.
943//! </div>
944//! </td>
945//! <td>
946//!
947//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
948//! Each element of the tuple should be able to be deserialized from the nested
949//! element content (`...`), except the enum types which would be deserialized
950//! from the full element (`<one>...</one>`), so they could use the element name
951//! to choose the right variant:
952//!
953//! ```
954//! # use pretty_assertions::assert_eq;
955//! # use serde::Deserialize;
956//! # type One = ();
957//! # type Two = ();
958//! # /*
959//! type One = ...;
960//! type Two = ...;
961//! # */
962//! # #[derive(Debug, PartialEq)]
963//! #[derive(Deserialize)]
964//! struct AnyName(One, String, Two, One);
965//! # assert_eq!(
966//! # AnyName((), "text cdata".into(), (), ()),
967//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
968//! # );
969//! ```
970//! ```
971//! # use pretty_assertions::assert_eq;
972//! # use serde::Deserialize;
973//! # #[derive(Debug, PartialEq)]
974//! #[derive(Deserialize)]
975//! #[serde(rename_all = "snake_case")]
976//! enum Choice {
977//! One,
978//! }
979//! # type Two = ();
980//! # /*
981//! type Two = ...;
982//! # */
983//! type AnyName = (Choice, String, Two, Choice);
984//! # assert_eq!(
985//! # (Choice::One, "text cdata".to_string(), (), Choice::One),
986//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
987//! # );
988//! ```
989//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
990//!
991//! NOTE: consequent text and CDATA nodes are merged into the one text node,
992//! so you cannot have two adjacent string types in your sequence.
993//! </div>
994//! </td>
995//! </tr>
996//! <!-- 15 ==================================================================================== -->
997//! <tr>
998//! <td>
999//! A sequence with a non-strict order, probably with a mixed content
1000//! (text / CDATA and tags).
1001//!
1002//! ```xml
1003//! <one>...</one>
1004//! text
1005//! <![CDATA[cdata]]>
1006//! <two>...</two>
1007//! <one>...</one>
1008//! ```
1009//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1010//!
1011//! NOTE: this is just an example for showing mapping. XML does not allow
1012//! multiple root tags -- you should wrap the sequence into a tag.
1013//! </div>
1014//! </td>
1015//! <td>
1016//! A homogeneous sequence of elements with a fixed or dynamic size:
1017//!
1018//! ```
1019//! # use pretty_assertions::assert_eq;
1020//! # use serde::Deserialize;
1021//! # #[derive(Debug, PartialEq)]
1022//! #[derive(Deserialize)]
1023//! #[serde(rename_all = "snake_case")]
1024//! enum Choice {
1025//! One,
1026//! Two,
1027//! #[serde(other)]
1028//! Other,
1029//! }
1030//! type AnyName = [Choice; 4];
1031//! # assert_eq!(
1032//! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
1033//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1034//! # );
1035//! ```
1036//! ```
1037//! # use pretty_assertions::assert_eq;
1038//! # use serde::Deserialize;
1039//! # #[derive(Debug, PartialEq)]
1040//! #[derive(Deserialize)]
1041//! #[serde(rename_all = "snake_case")]
1042//! enum Choice {
1043//! One,
1044//! Two,
1045//! #[serde(rename = "$text")]
1046//! Other(String),
1047//! }
1048//! type AnyName = Vec<Choice>;
1049//! # assert_eq!(
1050//! # vec![
1051//! # Choice::One,
1052//! # Choice::Other("text cdata".into()),
1053//! # Choice::Two,
1054//! # Choice::One,
1055//! # ],
1056//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1057//! # );
1058//! ```
1059//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1060//!
1061//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1062//! so you cannot have two adjacent string types in your sequence.
1063//! </div>
1064//! </td>
1065//! </tr>
1066//! <!-- 16 ==================================================================================== -->
1067//! <tr>
1068//! <td>
1069//! A sequence with a strict order, probably with a mixed content,
1070//! (text and tags) inside of the other element:
1071//!
1072//! ```xml
1073//! <any-tag attribute="...">
1074//! <one>...</one>
1075//! text
1076//! <![CDATA[cdata]]>
1077//! <two>...</two>
1078//! <one>...</one>
1079//! </any-tag>
1080//! ```
1081//! </td>
1082//! <td>
1083//!
1084//! A structure where all child elements mapped to the one field which have
1085//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1086//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1087//!
1088//! You MUST specify `#[serde(rename = "$value")]` on that field:
1089//!
1090//! ```
1091//! # use pretty_assertions::assert_eq;
1092//! # use serde::Deserialize;
1093//! # type One = ();
1094//! # type Two = ();
1095//! # /*
1096//! type One = ...;
1097//! type Two = ...;
1098//! # */
1099//!
1100//! # #[derive(Debug, PartialEq)]
1101//! #[derive(Deserialize)]
1102//! struct AnyName {
1103//! #[serde(rename = "@attribute")]
1104//! # attribute: (),
1105//! # /*
1106//! attribute: ...,
1107//! # */
1108//! // Does not (yet?) supported by the serde
1109//! // https://github.com/serde-rs/serde/issues/1905
1110//! // #[serde(flatten)]
1111//! #[serde(rename = "$value")]
1112//! any_name: (One, String, Two, One),
1113//! }
1114//! # assert_eq!(
1115//! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1116//! # quick_xml::de::from_str("\
1117//! # <any-tag attribute='...'>\
1118//! # <one>...</one>\
1119//! # text \
1120//! # <![CDATA[cdata]]>\
1121//! # <two>...</two>\
1122//! # <one>...</one>\
1123//! # </any-tag>"
1124//! # ).unwrap(),
1125//! # );
1126//! ```
1127//! ```
1128//! # use pretty_assertions::assert_eq;
1129//! # use serde::Deserialize;
1130//! # type One = ();
1131//! # type Two = ();
1132//! # /*
1133//! type One = ...;
1134//! type Two = ...;
1135//! # */
1136//!
1137//! # #[derive(Debug, PartialEq)]
1138//! #[derive(Deserialize)]
1139//! struct NamedTuple(One, String, Two, One);
1140//!
1141//! # #[derive(Debug, PartialEq)]
1142//! #[derive(Deserialize)]
1143//! struct AnyName {
1144//! #[serde(rename = "@attribute")]
1145//! # attribute: (),
1146//! # /*
1147//! attribute: ...,
1148//! # */
1149//! // Does not (yet?) supported by the serde
1150//! // https://github.com/serde-rs/serde/issues/1905
1151//! // #[serde(flatten)]
1152//! #[serde(rename = "$value")]
1153//! any_name: NamedTuple,
1154//! }
1155//! # assert_eq!(
1156//! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1157//! # quick_xml::de::from_str("\
1158//! # <any-tag attribute='...'>\
1159//! # <one>...</one>\
1160//! # text \
1161//! # <![CDATA[cdata]]>\
1162//! # <two>...</two>\
1163//! # <one>...</one>\
1164//! # </any-tag>"
1165//! # ).unwrap(),
1166//! # );
1167//! ```
1168//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1169//!
1170//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1171//! so you cannot have two adjacent string types in your sequence.
1172//! </div>
1173//! </td>
1174//! </tr>
1175//! <!-- 17 ==================================================================================== -->
1176//! <tr>
1177//! <td>
1178//! A sequence with a non-strict order, probably with a mixed content
1179//! (text / CDATA and tags) inside of the other element:
1180//!
1181//! ```xml
1182//! <any-tag>
1183//! <one>...</one>
1184//! text
1185//! <![CDATA[cdata]]>
1186//! <two>...</two>
1187//! <one>...</one>
1188//! </any-tag>
1189//! ```
1190//! </td>
1191//! <td>
1192//!
1193//! A structure where all child elements mapped to the one field which have
1194//! a homogeneous sequential type: array-like container. A container type `T`
1195//! should be able to be deserialized from the nested element content (`...`),
1196//! except if it is an enum type which would be deserialized from the full
1197//! element (`<one>...</one>`).
1198//!
1199//! You MUST specify `#[serde(rename = "$value")]` on that field:
1200//!
1201//! ```
1202//! # use pretty_assertions::assert_eq;
1203//! # use serde::Deserialize;
1204//! # #[derive(Debug, PartialEq)]
1205//! #[derive(Deserialize)]
1206//! #[serde(rename_all = "snake_case")]
1207//! enum Choice {
1208//! One,
1209//! Two,
1210//! #[serde(rename = "$text")]
1211//! Other(String),
1212//! }
1213//! # #[derive(Debug, PartialEq)]
1214//! #[derive(Deserialize)]
1215//! struct AnyName {
1216//! #[serde(rename = "@attribute")]
1217//! # attribute: (),
1218//! # /*
1219//! attribute: ...,
1220//! # */
1221//! // Does not (yet?) supported by the serde
1222//! // https://github.com/serde-rs/serde/issues/1905
1223//! // #[serde(flatten)]
1224//! #[serde(rename = "$value")]
1225//! any_name: [Choice; 4],
1226//! }
1227//! # assert_eq!(
1228//! # AnyName { attribute: (), any_name: [
1229//! # Choice::One,
1230//! # Choice::Other("text cdata".into()),
1231//! # Choice::Two,
1232//! # Choice::One,
1233//! # ] },
1234//! # quick_xml::de::from_str("\
1235//! # <any-tag attribute='...'>\
1236//! # <one>...</one>\
1237//! # text \
1238//! # <![CDATA[cdata]]>\
1239//! # <two>...</two>\
1240//! # <one>...</one>\
1241//! # </any-tag>"
1242//! # ).unwrap(),
1243//! # );
1244//! ```
1245//! ```
1246//! # use pretty_assertions::assert_eq;
1247//! # use serde::Deserialize;
1248//! # #[derive(Debug, PartialEq)]
1249//! #[derive(Deserialize)]
1250//! #[serde(rename_all = "snake_case")]
1251//! enum Choice {
1252//! One,
1253//! Two,
1254//! #[serde(rename = "$text")]
1255//! Other(String),
1256//! }
1257//! # #[derive(Debug, PartialEq)]
1258//! #[derive(Deserialize)]
1259//! struct AnyName {
1260//! #[serde(rename = "@attribute")]
1261//! # attribute: (),
1262//! # /*
1263//! attribute: ...,
1264//! # */
1265//! // Does not (yet?) supported by the serde
1266//! // https://github.com/serde-rs/serde/issues/1905
1267//! // #[serde(flatten)]
1268//! #[serde(rename = "$value")]
1269//! any_name: Vec<Choice>,
1270//! }
1271//! # assert_eq!(
1272//! # AnyName { attribute: (), any_name: vec![
1273//! # Choice::One,
1274//! # Choice::Other("text cdata".into()),
1275//! # Choice::Two,
1276//! # Choice::One,
1277//! # ] },
1278//! # quick_xml::de::from_str("\
1279//! # <any-tag attribute='...'>\
1280//! # <one>...</one>\
1281//! # text \
1282//! # <![CDATA[cdata]]>\
1283//! # <two>...</two>\
1284//! # <one>...</one>\
1285//! # </any-tag>"
1286//! # ).unwrap(),
1287//! # );
1288//! ```
1289//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1290//!
1291//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1292//! so you cannot have two adjacent string types in your sequence.
1293//! </div>
1294//! </td>
1295//! </tr>
1296//! </tbody>
1297//! </table>
1298//!
1299//!
1300//!
1301//! Composition Rules
1302//! =================
1303//!
1304//! The XML format is very different from other formats supported by `serde`.
1305//! One such difference it is how data in the serialized form is related to
1306//! the Rust type. Usually each byte in the data can be associated only with
1307//! one field in the data structure. However, XML is an exception.
1308//!
1309//! For example, took this XML:
1310//!
1311//! ```xml
1312//! <any>
1313//! <key attr="value"/>
1314//! </any>
1315//! ```
1316//!
1317//! and try to deserialize it to the struct `AnyName`:
1318//!
1319//! ```no_run
1320//! # use serde::Deserialize;
1321//! #[derive(Deserialize)]
1322//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1323//! // Used data: ^^^^^^^^^^^^^^^^^^^
1324//! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
1325//! // Used data: ^^^^^^^^^^^^
1326//! }
1327//! #[derive(Deserialize)]
1328//! struct Inner {
1329//! #[serde(rename = "@attr")]
1330//! attr: String, // String calls `deserialize_string` on `value`
1331//! // Used data: ^^^^^
1332//! }
1333//! ```
1334//!
1335//! Comments shows what methods of a [`Deserializer`] called by each struct
1336//! `deserialize` method and which input their seen. **Used data** shows, what
1337//! content is actually used for deserializing. As you see, name of the inner
1338//! `<key>` tag used both as a map key / outer struct field name and as part
1339//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1340//! by it).
1341//!
1342//!
1343//!
1344//! Difference between `$text` and `$value` special names
1345//! =====================================================
1346//!
1347//! quick-xml supports two special names for fields -- `$text` and `$value`.
1348//! Although they may seem the same, there is a distinction. Two different
1349//! names is required mostly for serialization, because quick-xml should know
1350//! how you want to serialize certain constructs, which could be represented
1351//! through XML in multiple different ways.
1352//!
1353//! The only difference is in how complex types and sequences are serialized.
1354//! If you doubt which one you should select, begin with [`$value`](#value).
1355//!
1356//! ## `$text`
1357//! `$text` is used when you want to write your XML as a text or a CDATA content.
1358//! More formally, field with that name represents simple type definition with
1359//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1360//! as described in the [specification].
1361//!
1362//! As a result, not all types of such fields can be serialized. Only serialization
1363//! of following types are supported:
1364//! - all primitive types (strings, numbers, booleans)
1365//! - unit variants of enumerations (serializes to a name of a variant)
1366//! - newtypes (delegates serialization to inner type)
1367//! - [`Option`] of above (`None` serializes to nothing)
1368//! - sequences (including tuples and tuple variants of enumerations) of above,
1369//! excluding `None` and empty string elements (because it will not be possible
1370//! to deserialize them back). The elements are separated by space(s)
1371//! - unit type `()` and unit structs (serializes to nothing)
1372//!
1373//! Complex types, such as structs and maps, are not supported in this field.
1374//! If you want them, you should use `$value`.
1375//!
1376//! Sequences serialized to a space-delimited string, that is why only certain
1377//! types are allowed in this mode:
1378//!
1379//! ```
1380//! # use serde::{Deserialize, Serialize};
1381//! # use quick_xml::de::from_str;
1382//! # use quick_xml::se::to_string;
1383//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1384//! struct AnyName {
1385//! #[serde(rename = "$text")]
1386//! field: Vec<usize>,
1387//! }
1388//!
1389//! let obj = AnyName { field: vec![1, 2, 3] };
1390//! let xml = to_string(&obj).unwrap();
1391//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1392//!
1393//! let object: AnyName = from_str(&xml).unwrap();
1394//! assert_eq!(object, obj);
1395//! ```
1396//!
1397//! ## `$value`
1398//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1399//!
1400//! NOTE: a name `#content` would better explain the purpose of that field,
1401//! but `$value` is used for compatibility with other XML serde crates, which
1402//! uses that name. This will allow you to switch XML crates more smoothly if required.
1403//! </div>
1404//!
1405//! Representation of primitive types in `$value` does not differ from their
1406//! representation in `$text` field. The difference is how sequences are serialized.
1407//! `$value` serializes each sequence item as a separate XML element. The name
1408//! of that element is taken from serialized type, and because only `enum`s provide
1409//! such name (their variant name), only they should be used for such fields.
1410//!
1411//! `$value` fields does not support `struct` types with fields, the serialization
1412//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1413//! type `()` serializing to nothing and can be deserialized from any content.
1414//!
1415//! Serialization and deserialization of `$value` field performed as usual, except
1416//! that name for an XML element will be given by the serialized type, instead of
1417//! field. The latter allow to serialize enumerated types, where variant is encoded
1418//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1419//!
1420//! In the example below, field will be serialized as `<field/>`, because elements
1421//! get their names from the field name. It cannot be deserialized, because `Enum`
1422//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1423//!
1424//! ```no_run
1425//! # use serde::{Deserialize, Serialize};
1426//! #[derive(Deserialize, Serialize)]
1427//! enum Enum { A, B, C }
1428//!
1429//! #[derive(Deserialize, Serialize)]
1430//! struct AnyName {
1431//! // <field/>
1432//! field: Enum,
1433//! }
1434//! ```
1435//!
1436//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1437//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1438//! deserialize it from the same elements:
1439//!
1440//! ```no_run
1441//! # use serde::{Deserialize, Serialize};
1442//! # #[derive(Deserialize, Serialize)]
1443//! # enum Enum { A, B, C }
1444//! #
1445//! #[derive(Deserialize, Serialize)]
1446//! struct AnyName {
1447//! // <A/>, <B/> or <C/>
1448//! #[serde(rename = "$value")]
1449//! field: Enum,
1450//! }
1451//! ```
1452//!
1453//! ### Primitives and sequences of primitives
1454//!
1455//! Sequences serialized to a list of elements. Note, that types that does not
1456//! produce their own tag (i. e. primitives) are written as is, without delimiters:
1457//!
1458//! ```
1459//! # use serde::{Deserialize, Serialize};
1460//! # use quick_xml::de::from_str;
1461//! # use quick_xml::se::to_string;
1462//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1463//! struct AnyName {
1464//! #[serde(rename = "$value")]
1465//! field: Vec<usize>,
1466//! }
1467//!
1468//! let obj = AnyName { field: vec![1, 2, 3] };
1469//! let xml = to_string(&obj).unwrap();
1470//! // Note, that types that does not produce their own tag are written as is!
1471//! assert_eq!(xml, "<AnyName>123</AnyName>");
1472//!
1473//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1474//! assert_eq!(object, AnyName { field: vec![123] });
1475//!
1476//! // `1 2 3` is mapped to a single `usize` element
1477//! // It is impossible to deserialize list of primitives to such field
1478//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1479//! ```
1480//!
1481//! A particular case of that example is a string `$value` field, which probably
1482//! would be a most used example of that attribute:
1483//!
1484//! ```
1485//! # use serde::{Deserialize, Serialize};
1486//! # use quick_xml::de::from_str;
1487//! # use quick_xml::se::to_string;
1488//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1489//! struct AnyName {
1490//! #[serde(rename = "$value")]
1491//! field: String,
1492//! }
1493//!
1494//! let obj = AnyName { field: "content".to_string() };
1495//! let xml = to_string(&obj).unwrap();
1496//! assert_eq!(xml, "<AnyName>content</AnyName>");
1497//! ```
1498//!
1499//! ### Structs and sequences of structs
1500//!
1501//! Note, that structures do not have a serializable name as well (name of the
1502//! type is never used), so it is impossible to serialize non-unit struct or
1503//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1504//! are serialized as empty string, because units itself serializing
1505//! to nothing:
1506//!
1507//! ```
1508//! # use serde::{Deserialize, Serialize};
1509//! # use quick_xml::de::from_str;
1510//! # use quick_xml::se::to_string;
1511//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1512//! struct Unit;
1513//!
1514//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1515//! struct AnyName {
1516//! // #[serde(default)] is required to deserialization of empty lists
1517//! // This is a general note, not related to $value
1518//! #[serde(rename = "$value", default)]
1519//! field: Vec<Unit>,
1520//! }
1521//!
1522//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1523//! let xml = to_string(&obj).unwrap();
1524//! assert_eq!(xml, "<AnyName/>");
1525//!
1526//! let object: AnyName = from_str("<AnyName/>").unwrap();
1527//! assert_eq!(object, AnyName { field: vec![] });
1528//!
1529//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1530//! assert_eq!(object, AnyName { field: vec![] });
1531//!
1532//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1533//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1534//! ```
1535//!
1536//! ### Enums and sequences of enums
1537//!
1538//! Enumerations uses the variant name as an element name:
1539//!
1540//! ```
1541//! # use serde::{Deserialize, Serialize};
1542//! # use quick_xml::de::from_str;
1543//! # use quick_xml::se::to_string;
1544//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1545//! struct AnyName {
1546//! #[serde(rename = "$value")]
1547//! field: Vec<Enum>,
1548//! }
1549//!
1550//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1551//! enum Enum { A, B, C }
1552//!
1553//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1554//! let xml = to_string(&obj).unwrap();
1555//! assert_eq!(
1556//! xml,
1557//! "<AnyName>\
1558//! <A/>\
1559//! <B/>\
1560//! <C/>\
1561//! </AnyName>"
1562//! );
1563//!
1564//! let object: AnyName = from_str(&xml).unwrap();
1565//! assert_eq!(object, obj);
1566//! ```
1567//!
1568//! ----------------------------------------------------------------------------
1569//!
1570//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1571//! that is not enforced, so you can theoretically have both, but you should
1572//! avoid that.
1573//!
1574//!
1575//!
1576//! Frequently Used Patterns
1577//! ========================
1578//!
1579//! Some XML constructs used so frequent, that it is worth to document the recommended
1580//! way to represent them in the Rust. The sections below describes them.
1581//!
1582//! `<element>` lists
1583//! -----------------
1584//! Many XML formats wrap lists of elements in the additional container,
1585//! although this is not required by the XML rules:
1586//!
1587//! ```xml
1588//! <root>
1589//! <field1/>
1590//! <field2/>
1591//! <list><!-- Container -->
1592//! <element/>
1593//! <element/>
1594//! <element/>
1595//! </list>
1596//! <field3/>
1597//! </root>
1598//! ```
1599//! In this case, there is a great desire to describe this XML in this way:
1600//! ```
1601//! /// Represents <element/>
1602//! type Element = ();
1603//!
1604//! /// Represents <root>...</root>
1605//! struct AnyName {
1606//! // Incorrect
1607//! list: Vec<Element>,
1608//! }
1609//! ```
1610//! This will not work, because potentially `<list>` element can have attributes
1611//! and other elements inside. You should define the struct for the `<list>`
1612//! explicitly, as you do that in the XSD for that XML:
1613//! ```
1614//! /// Represents <element/>
1615//! type Element = ();
1616//!
1617//! /// Represents <root>...</root>
1618//! struct AnyName {
1619//! // Correct
1620//! list: List,
1621//! }
1622//! /// Represents <list>...</list>
1623//! struct List {
1624//! element: Vec<Element>,
1625//! }
1626//! ```
1627//!
1628//! If you want to simplify your API, you could write a simple function for unwrapping
1629//! inner list and apply it via [`deserialize_with`]:
1630//!
1631//! ```
1632//! # use pretty_assertions::assert_eq;
1633//! use quick_xml::de::from_str;
1634//! use serde::{Deserialize, Deserializer};
1635//!
1636//! /// Represents <element/>
1637//! type Element = ();
1638//!
1639//! /// Represents <root>...</root>
1640//! #[derive(Deserialize, Debug, PartialEq)]
1641//! struct AnyName {
1642//! #[serde(deserialize_with = "unwrap_list")]
1643//! list: Vec<Element>,
1644//! }
1645//!
1646//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1647//! where
1648//! D: Deserializer<'de>,
1649//! {
1650//! /// Represents <list>...</list>
1651//! #[derive(Deserialize)]
1652//! struct List {
1653//! // default allows empty list
1654//! #[serde(default)]
1655//! element: Vec<Element>,
1656//! }
1657//! Ok(List::deserialize(deserializer)?.element)
1658//! }
1659//!
1660//! assert_eq!(
1661//! AnyName { list: vec![(), (), ()] },
1662//! from_str("
1663//! <root>
1664//! <list>
1665//! <element/>
1666//! <element/>
1667//! <element/>
1668//! </list>
1669//! </root>
1670//! ").unwrap(),
1671//! );
1672//! ```
1673//!
1674//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1675//!
1676//! Enum::Unit Variants As a Text
1677//! -----------------------------
1678//! One frequent task and a typical mistake is to creation of mapping a text
1679//! content of some tag to a Rust `enum`. For example, for the XML:
1680//!
1681//! ```xml
1682//! <some-container>
1683//! <field>EnumValue</field>
1684//! </some-container>
1685//! ```
1686//! one could create an _incorrect_ mapping
1687//!
1688//! ```
1689//! # use serde::{Deserialize, Serialize};
1690//! #
1691//! #[derive(Serialize, Deserialize)]
1692//! enum SomeEnum {
1693//! EnumValue,
1694//! # /*
1695//! ...
1696//! # */
1697//! }
1698//!
1699//! #[derive(Serialize, Deserialize)]
1700//! #[serde(rename = "some-container")]
1701//! struct SomeContainer {
1702//! field: SomeEnum,
1703//! }
1704//! ```
1705//!
1706//! Actually, those types will be serialized into:
1707//! ```xml
1708//! <some-container>
1709//! <EnumValue/>
1710//! </some-container>
1711//! ```
1712//! and will not be able to be deserialized.
1713//!
1714//! You can easily see what's wrong if you think about attributes, which could
1715//! be defined in the `<field>` tag:
1716//! ```xml
1717//! <some-container>
1718//! <field some="attribute">EnumValue</field>
1719//! </some-container>
1720//! ```
1721//!
1722//! After that you can find the correct solution, using the principles explained
1723//! above. You should wrap `SomeEnum` into wrapper struct under the [`$text`](#text)
1724//! name:
1725//! ```
1726//! # use serde::{Serialize, Deserialize};
1727//! # type SomeEnum = ();
1728//! #[derive(Serialize, Deserialize)]
1729//! struct Field {
1730//! // Use a special name `$text` to map field to the text content
1731//! #[serde(rename = "$text")]
1732//! content: SomeEnum,
1733//! }
1734//!
1735//! #[derive(Serialize, Deserialize)]
1736//! #[serde(rename = "some-container")]
1737//! struct SomeContainer {
1738//! field: Field,
1739//! }
1740//! ```
1741//!
1742//! If you still want to keep your struct untouched, you can instead use the
1743//! helper module [`text_content`].
1744//!
1745//!
1746//! Internally Tagged Enums
1747//! -----------------------
1748//! [Tagged enums] are currently not supported because of an issue in the Serde
1749//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1750//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1751//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1752//! or implementing [`Deserialize`], but this can get very tedious very fast for
1753//! files with large amounts of tagged enums. To help with this issue quick-xml
1754//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1755//! macro documentation for details.
1756//!
1757//!
1758//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1759//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1760//! [#497]: https://github.com/tafia/quick-xml/issues/497
1761//! [`text_content`]: crate::serde_helpers::text_content
1762//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1763//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1764//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1765//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1766//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1767
1768// Macros should be defined before the modules that using them
1769// Also, macros should be imported before using them
1770use serde::serde_if_integer128;
1771
1772macro_rules! deserialize_type {
1773 ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1774 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1775 where
1776 V: Visitor<'de>,
1777 {
1778 // No need to unescape because valid integer representations cannot be escaped
1779 let text = self.read_string()?;
1780 visitor.$visit(text.parse()?)
1781 }
1782 };
1783}
1784
1785/// Implement deserialization methods for scalar types, such as numbers, strings,
1786/// byte arrays, booleans and identifiers.
1787macro_rules! deserialize_primitives {
1788 ($($mut:tt)?) => {
1789 deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
1790 deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
1791 deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
1792 deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
1793
1794 deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
1795 deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
1796 deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
1797 deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
1798
1799 serde_if_integer128! {
1800 deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
1801 deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
1802 }
1803
1804 deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
1805 deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
1806
1807 fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1808 where
1809 V: Visitor<'de>,
1810 {
1811 let text = self.read_string()?;
1812
1813 str2bool(&text, visitor)
1814 }
1815
1816 /// Character represented as [strings](#method.deserialize_str).
1817 fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1818 where
1819 V: Visitor<'de>,
1820 {
1821 self.deserialize_str(visitor)
1822 }
1823
1824 fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1825 where
1826 V: Visitor<'de>,
1827 {
1828 let text = self.read_string()?;
1829 match text {
1830 Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1831 Cow::Owned(string) => visitor.visit_string(string),
1832 }
1833 }
1834
1835 /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1836 fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1837 where
1838 V: Visitor<'de>,
1839 {
1840 self.deserialize_str(visitor)
1841 }
1842
1843 /// Returns [`DeError::Unsupported`]
1844 fn deserialize_bytes<V>(self, _visitor: V) -> Result<V::Value, DeError>
1845 where
1846 V: Visitor<'de>,
1847 {
1848 Err(DeError::Unsupported("binary data content is not supported by XML format".into()))
1849 }
1850
1851 /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1852 fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1853 where
1854 V: Visitor<'de>,
1855 {
1856 self.deserialize_bytes(visitor)
1857 }
1858
1859 /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1860 fn deserialize_unit_struct<V>(
1861 self,
1862 _name: &'static str,
1863 visitor: V,
1864 ) -> Result<V::Value, DeError>
1865 where
1866 V: Visitor<'de>,
1867 {
1868 self.deserialize_unit(visitor)
1869 }
1870
1871 /// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
1872 fn deserialize_newtype_struct<V>(
1873 self,
1874 _name: &'static str,
1875 visitor: V,
1876 ) -> Result<V::Value, DeError>
1877 where
1878 V: Visitor<'de>,
1879 {
1880 self.deserialize_tuple(1, visitor)
1881 }
1882
1883 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1884 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1885 where
1886 V: Visitor<'de>,
1887 {
1888 self.deserialize_seq(visitor)
1889 }
1890
1891 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1892 fn deserialize_tuple_struct<V>(
1893 self,
1894 _name: &'static str,
1895 len: usize,
1896 visitor: V,
1897 ) -> Result<V::Value, DeError>
1898 where
1899 V: Visitor<'de>,
1900 {
1901 self.deserialize_tuple(len, visitor)
1902 }
1903
1904 /// Identifiers represented as [strings](#method.deserialize_str).
1905 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1906 where
1907 V: Visitor<'de>,
1908 {
1909 self.deserialize_str(visitor)
1910 }
1911 };
1912}
1913
1914macro_rules! deserialize_option {
1915 ($de:expr, $deserializer:ident, $visitor:ident) => {
1916 match $de.peek()? {
1917 DeEvent::Text(t) if t.is_empty() => $visitor.visit_none(),
1918 DeEvent::Eof => $visitor.visit_none(),
1919 _ => $visitor.visit_some($deserializer),
1920 }
1921 };
1922}
1923
1924mod key;
1925mod map;
1926mod resolver;
1927mod simple_type;
1928mod var;
1929
1930pub use crate::errors::serialize::DeError;
1931pub use resolver::{EntityResolver, NoEntityResolver};
1932
1933use crate::{
1934 encoding::Decoder,
1935 errors::Error,
1936 events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
1937 name::QName,
1938 reader::Reader,
1939};
1940use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
1941use std::borrow::Cow;
1942#[cfg(feature = "overlapped-lists")]
1943use std::collections::VecDeque;
1944use std::io::BufRead;
1945use std::mem::replace;
1946#[cfg(feature = "overlapped-lists")]
1947use std::num::NonZeroUsize;
1948use std::ops::Deref;
1949
1950/// Data represented by a text node or a CDATA node. XML markup is not expected
1951pub(crate) const TEXT_KEY: &str = "$text";
1952/// Data represented by any XML markup inside
1953pub(crate) const VALUE_KEY: &str = "$value";
1954
1955/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
1956/// events. _Consequent_ means that events should follow each other or be
1957/// delimited only by (any count of) [`Comment`] or [`PI`] events.
1958///
1959/// [`Text`]: Event::Text
1960/// [`CData`]: Event::CData
1961/// [`Comment`]: Event::Comment
1962/// [`PI`]: Event::PI
1963#[derive(Debug, PartialEq, Eq)]
1964pub struct Text<'a> {
1965 text: Cow<'a, str>,
1966}
1967
1968impl<'a> Deref for Text<'a> {
1969 type Target = str;
1970
1971 #[inline]
1972 fn deref(&self) -> &Self::Target {
1973 self.text.deref()
1974 }
1975}
1976
1977impl<'a> From<&'a str> for Text<'a> {
1978 #[inline]
1979 fn from(text: &'a str) -> Self {
1980 Self {
1981 text: Cow::Borrowed(text),
1982 }
1983 }
1984}
1985
1986////////////////////////////////////////////////////////////////////////////////////////////////////
1987
1988/// Simplified event which contains only these variants that used by deserializer
1989#[derive(Debug, PartialEq, Eq)]
1990pub enum DeEvent<'a> {
1991 /// Start tag (with attributes) `<tag attr="value">`.
1992 Start(BytesStart<'a>),
1993 /// End tag `</tag>`.
1994 End(BytesEnd<'a>),
1995 /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
1996 /// events. _Consequent_ means that events should follow each other or be
1997 /// delimited only by (any count of) [`Comment`] or [`PI`] events.
1998 ///
1999 /// [`Text`]: Event::Text
2000 /// [`CData`]: Event::CData
2001 /// [`Comment`]: Event::Comment
2002 /// [`PI`]: Event::PI
2003 Text(Text<'a>),
2004 /// End of XML document.
2005 Eof,
2006}
2007
2008////////////////////////////////////////////////////////////////////////////////////////////////////
2009
2010/// Simplified event which contains only these variants that used by deserializer,
2011/// but [`Text`] events not yet fully processed.
2012///
2013/// [`Text`] events should be trimmed if they does not surrounded by the other
2014/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2015/// event, where they are trimmed from the start, but not from the end. To trim
2016/// end spaces we should lookahead by one deserializer event (i. e. skip all
2017/// comments and processing instructions).
2018///
2019/// [`Text`]: Event::Text
2020/// [`CData`]: Event::CData
2021#[derive(Debug, PartialEq, Eq)]
2022pub enum PayloadEvent<'a> {
2023 /// Start tag (with attributes) `<tag attr="value">`.
2024 Start(BytesStart<'a>),
2025 /// End tag `</tag>`.
2026 End(BytesEnd<'a>),
2027 /// Escaped character data between tags.
2028 Text(BytesText<'a>),
2029 /// Unescaped character data stored in `<![CDATA[...]]>`.
2030 CData(BytesCData<'a>),
2031 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2032 DocType(BytesText<'a>),
2033 /// End of XML document.
2034 Eof,
2035}
2036
2037impl<'a> PayloadEvent<'a> {
2038 /// Ensures that all data is owned to extend the object's lifetime if necessary.
2039 #[inline]
2040 fn into_owned(self) -> PayloadEvent<'static> {
2041 match self {
2042 PayloadEvent::Start(e: BytesStart<'_>) => PayloadEvent::Start(e.into_owned()),
2043 PayloadEvent::End(e: BytesEnd<'_>) => PayloadEvent::End(e.into_owned()),
2044 PayloadEvent::Text(e: BytesText<'_>) => PayloadEvent::Text(e.into_owned()),
2045 PayloadEvent::CData(e: BytesCData<'_>) => PayloadEvent::CData(e.into_owned()),
2046 PayloadEvent::DocType(e: BytesText<'_>) => PayloadEvent::DocType(e.into_owned()),
2047 PayloadEvent::Eof => PayloadEvent::Eof,
2048 }
2049 }
2050}
2051
2052/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2053/// [`PayloadEvent::Text`] events, that followed by any event except
2054/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2055struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = NoEntityResolver> {
2056 /// A source of low-level XML events
2057 reader: R,
2058 /// Intermediate event, that could be returned by the next call to `next()`.
2059 /// If that is the `Text` event then leading spaces already trimmed, but
2060 /// trailing spaces is not. Before the event will be returned, trimming of
2061 /// the spaces could be necessary
2062 lookahead: Result<PayloadEvent<'i>, DeError>,
2063
2064 /// Used to resolve unknown entities that would otherwise cause the parser
2065 /// to return an [`EscapeError::UnrecognizedSymbol`] error.
2066 ///
2067 /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
2068 entity_resolver: E,
2069}
2070
2071impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2072 fn new(mut reader: R, entity_resolver: E) -> Self {
2073 // Lookahead by one event immediately, so we do not need to check in the
2074 // loop if we need lookahead or not
2075 let lookahead = reader.next();
2076
2077 Self {
2078 reader,
2079 lookahead,
2080 entity_resolver,
2081 }
2082 }
2083
2084 /// Read next event and put it in lookahead, return the current lookahead
2085 #[inline(always)]
2086 fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2087 replace(&mut self.lookahead, self.reader.next())
2088 }
2089
2090 #[inline(always)]
2091 fn need_trim_end(&self) -> bool {
2092 // If next event is a text or CDATA, we should not trim trailing spaces
2093 !matches!(
2094 self.lookahead,
2095 Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2096 )
2097 }
2098
2099 /// Read all consequent [`Text`] and [`CData`] events until non-text event
2100 /// occurs. Content of all events would be appended to `result` and returned
2101 /// as [`DeEvent::Text`].
2102 ///
2103 /// [`Text`]: PayloadEvent::Text
2104 /// [`CData`]: PayloadEvent::CData
2105 fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2106 loop {
2107 match self.lookahead {
2108 Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
2109 let text = self.next_text()?;
2110
2111 let mut s = result.into_owned();
2112 s += &text;
2113 result = Cow::Owned(s);
2114 }
2115 _ => break,
2116 }
2117 }
2118 Ok(DeEvent::Text(Text { text: result }))
2119 }
2120
2121 /// Read one text event, panics if current event is not a text event
2122 ///
2123 /// |Event |XML |Handling
2124 /// |-----------------------|---------------------------|----------------------------------------
2125 /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
2126 /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
2127 /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
2128 /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
2129 /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
2130 #[inline(always)]
2131 fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
2132 match self.next_impl()? {
2133 PayloadEvent::Text(mut e) => {
2134 if self.need_trim_end() {
2135 e.inplace_trim_end();
2136 }
2137 Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2138 }
2139 PayloadEvent::CData(e) => Ok(e.decode()?),
2140
2141 // SAFETY: this method is called only when we peeked Text or CData
2142 _ => unreachable!("Only `Text` and `CData` events can come here"),
2143 }
2144 }
2145
2146 /// Return an input-borrowing event.
2147 fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2148 loop {
2149 return match self.next_impl()? {
2150 PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2151 PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2152 PayloadEvent::Text(mut e) => {
2153 if self.need_trim_end() && e.inplace_trim_end() {
2154 continue;
2155 }
2156 self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2157 }
2158 PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2159 PayloadEvent::DocType(e) => {
2160 self.entity_resolver
2161 .capture(e)
2162 .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2163 continue;
2164 }
2165 PayloadEvent::Eof => Ok(DeEvent::Eof),
2166 };
2167 }
2168 }
2169
2170 #[inline]
2171 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2172 match self.lookahead {
2173 // We pre-read event with the same name that is required to be skipped.
2174 // First call of `read_to_end` will end out pre-read event, the second
2175 // will consume other events
2176 Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2177 let result1 = self.reader.read_to_end(name);
2178 let result2 = self.reader.read_to_end(name);
2179
2180 // In case of error `next` returns `Eof`
2181 self.lookahead = self.reader.next();
2182 result1?;
2183 result2?;
2184 }
2185 // We pre-read event with the same name that is required to be skipped.
2186 // Because this is end event, we already consume the whole tree, so
2187 // nothing to do, just update lookahead
2188 Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2189 self.lookahead = self.reader.next();
2190 }
2191 Ok(_) => {
2192 let result = self.reader.read_to_end(name);
2193
2194 // In case of error `next` returns `Eof`
2195 self.lookahead = self.reader.next();
2196 result?;
2197 }
2198 // Read next lookahead event, unpack error from the current lookahead
2199 Err(_) => {
2200 self.next_impl()?;
2201 }
2202 }
2203 Ok(())
2204 }
2205
2206 #[inline]
2207 fn decoder(&self) -> Decoder {
2208 self.reader.decoder()
2209 }
2210}
2211
2212////////////////////////////////////////////////////////////////////////////////////////////////////
2213
2214/// Deserialize an instance of type `T` from a string of XML text.
2215pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2216where
2217 T: Deserialize<'de>,
2218{
2219 let mut de: Deserializer<'_, SliceReader<'_>> = Deserializer::from_str(source:s);
2220 T::deserialize(&mut de)
2221}
2222
2223/// Deserialize from a reader. This method will do internal copies of data
2224/// readed from `reader`. If you want have a `&str` input and want to borrow
2225/// as much as possible, use [`from_str`].
2226pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2227where
2228 R: BufRead,
2229 T: DeserializeOwned,
2230{
2231 let mut de: Deserializer<'_, IoReader<…>> = Deserializer::from_reader(reader);
2232 T::deserialize(&mut de)
2233}
2234
2235// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
2236// valid boolean representations are only "true", "false", "1", and "0"
2237fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
2238where
2239 V: de::Visitor<'de>,
2240{
2241 match value {
2242 "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
2243 visitor.visit_bool(true)
2244 }
2245 "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
2246 visitor.visit_bool(false)
2247 }
2248 _ => Err(DeError::InvalidBoolean(value.into())),
2249 }
2250}
2251
2252fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
2253where
2254 V: Visitor<'de>,
2255{
2256 #[cfg(feature = "encoding")]
2257 {
2258 let value = decoder.decode(value)?;
2259 // No need to unescape because valid boolean representations cannot be escaped
2260 str2bool(value.as_ref(), visitor)
2261 }
2262
2263 #[cfg(not(feature = "encoding"))]
2264 {
2265 // No need to unescape because valid boolean representations cannot be escaped
2266 match value {
2267 b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
2268 visitor.visit_bool(true)
2269 }
2270 b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
2271 visitor.visit_bool(false)
2272 }
2273 e: &[u8] => Err(DeError::InvalidBoolean(decoder.decode(bytes:e)?.into())),
2274 }
2275 }
2276}
2277
2278////////////////////////////////////////////////////////////////////////////////////////////////////
2279
2280/// A structure that deserializes XML into Rust values.
2281pub struct Deserializer<'de, R, E: EntityResolver = NoEntityResolver>
2282where
2283 R: XmlRead<'de>,
2284{
2285 /// An XML reader that streams events into this deserializer
2286 reader: XmlReader<'de, R, E>,
2287
2288 /// When deserializing sequences sometimes we have to skip unwanted events.
2289 /// That events should be stored and then replayed. This is a replay buffer,
2290 /// that streams events while not empty. When it exhausted, events will
2291 /// requested from [`Self::reader`].
2292 #[cfg(feature = "overlapped-lists")]
2293 read: VecDeque<DeEvent<'de>>,
2294 /// When deserializing sequences sometimes we have to skip events, because XML
2295 /// is tolerant to elements order and even if in the XSD order is strictly
2296 /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2297 /// That means, that elements, forming a sequence, could be overlapped with
2298 /// other elements, do not related to that sequence.
2299 ///
2300 /// In order to support this, deserializer will scan events and skip unwanted
2301 /// events, store them here. After call [`Self::start_replay()`] all events
2302 /// moved from this to [`Self::read`].
2303 #[cfg(feature = "overlapped-lists")]
2304 write: VecDeque<DeEvent<'de>>,
2305 /// Maximum number of events that can be skipped when processing sequences
2306 /// that occur out-of-order. This field is used to prevent potential
2307 /// denial-of-service (DoS) attacks which could cause infinite memory
2308 /// consumption when parsing a very large amount of XML into a sequence field.
2309 #[cfg(feature = "overlapped-lists")]
2310 limit: Option<NonZeroUsize>,
2311
2312 #[cfg(not(feature = "overlapped-lists"))]
2313 peek: Option<DeEvent<'de>>,
2314}
2315
2316impl<'de, R, E> Deserializer<'de, R, E>
2317where
2318 R: XmlRead<'de>,
2319 E: EntityResolver,
2320{
2321 /// Create an XML deserializer from one of the possible quick_xml input sources.
2322 ///
2323 /// Typically it is more convenient to use one of these methods instead:
2324 ///
2325 /// - [`Deserializer::from_str`]
2326 /// - [`Deserializer::from_reader`]
2327 fn new(reader: R, entity_resolver: E) -> Self {
2328 Self {
2329 reader: XmlReader::new(reader, entity_resolver),
2330
2331 #[cfg(feature = "overlapped-lists")]
2332 read: VecDeque::new(),
2333 #[cfg(feature = "overlapped-lists")]
2334 write: VecDeque::new(),
2335 #[cfg(feature = "overlapped-lists")]
2336 limit: None,
2337
2338 #[cfg(not(feature = "overlapped-lists"))]
2339 peek: None,
2340 }
2341 }
2342
2343 /// Set the maximum number of events that could be skipped during deserialization
2344 /// of sequences.
2345 ///
2346 /// If `<element>` contains more than specified nested elements, `$text` or
2347 /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2348 /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2349 /// for the deserialization, for example, `Vec<T>`).
2350 ///
2351 /// This method can be used to prevent a [DoS] attack and infinite memory
2352 /// consumption when parsing a very large XML to a sequence field.
2353 ///
2354 /// It is strongly recommended to set limit to some value when you parse data
2355 /// from untrusted sources. You should choose a value that your typical XMLs
2356 /// can have _between_ different elements that corresponds to the same sequence.
2357 ///
2358 /// # Examples
2359 ///
2360 /// Let's imagine, that we deserialize such structure:
2361 /// ```
2362 /// struct List {
2363 /// item: Vec<()>,
2364 /// }
2365 /// ```
2366 ///
2367 /// The XML that we try to parse look like this:
2368 /// ```xml
2369 /// <any-name>
2370 /// <item/>
2371 /// <!-- Bufferization starts at this point -->
2372 /// <another-item>
2373 /// <some-element>with text</some-element>
2374 /// <yet-another-element/>
2375 /// </another-item>
2376 /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
2377 /// <item/>
2378 /// <!-- There is nothing to buffer, because elements follows each other -->
2379 /// <item/>
2380 /// </any-name>
2381 /// ```
2382 ///
2383 /// There, when we deserialize the `item` field, we need to buffer 7 events,
2384 /// before we can deserialize the second `<item/>`:
2385 ///
2386 /// - `<another-item>`
2387 /// - `<some-element>`
2388 /// - `$text(with text)`
2389 /// - `</some-element>`
2390 /// - `<yet-another-element/>` (virtual start event)
2391 /// - `<yet-another-element/>` (virtual end event)
2392 /// - `</another-item>`
2393 ///
2394 /// Note, that `<yet-another-element/>` internally represented as 2 events:
2395 /// one for the start tag and one for the end tag. In the future this can be
2396 /// eliminated, but for now we use [auto-expanding feature] of a reader,
2397 /// because this simplifies deserializer code.
2398 ///
2399 /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2400 /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2401 /// [auto-expanding feature]: Reader::expand_empty_elements
2402 #[cfg(feature = "overlapped-lists")]
2403 pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2404 self.limit = limit;
2405 self
2406 }
2407
2408 #[cfg(feature = "overlapped-lists")]
2409 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2410 if self.read.is_empty() {
2411 self.read.push_front(self.reader.next()?);
2412 }
2413 if let Some(event) = self.read.front() {
2414 return Ok(event);
2415 }
2416 // SAFETY: `self.read` was filled in the code above.
2417 // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2418 // if unsafe code will be allowed
2419 unreachable!()
2420 }
2421 #[cfg(not(feature = "overlapped-lists"))]
2422 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2423 if self.peek.is_none() {
2424 self.peek = Some(self.reader.next()?);
2425 }
2426 match self.peek.as_ref() {
2427 Some(v) => Ok(v),
2428 // SAFETY: a `None` variant for `self.peek` would have been replaced
2429 // by a `Some` variant in the code above.
2430 // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2431 // if unsafe code will be allowed
2432 None => unreachable!(),
2433 }
2434 }
2435
2436 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2437 // Replay skipped or peeked events
2438 #[cfg(feature = "overlapped-lists")]
2439 if let Some(event) = self.read.pop_front() {
2440 return Ok(event);
2441 }
2442 #[cfg(not(feature = "overlapped-lists"))]
2443 if let Some(e) = self.peek.take() {
2444 return Ok(e);
2445 }
2446 self.reader.next()
2447 }
2448
2449 /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2450 /// should be replayed after calling [`Self::start_replay()`].
2451 #[cfg(feature = "overlapped-lists")]
2452 #[inline]
2453 #[must_use = "returned checkpoint should be used in `start_replay`"]
2454 fn skip_checkpoint(&self) -> usize {
2455 self.write.len()
2456 }
2457
2458 /// Extracts XML tree of events from and stores them in the skipped events
2459 /// buffer from which they can be retrieved later. You MUST call
2460 /// [`Self::start_replay()`] after calling this to give access to the skipped
2461 /// events and release internal buffers.
2462 #[cfg(feature = "overlapped-lists")]
2463 fn skip(&mut self) -> Result<(), DeError> {
2464 let event = self.next()?;
2465 self.skip_event(event)?;
2466 match self.write.back() {
2467 // Skip all subtree, if we skip a start event
2468 Some(DeEvent::Start(e)) => {
2469 let end = e.name().as_ref().to_owned();
2470 let mut depth = 0;
2471 loop {
2472 let event = self.next()?;
2473 match event {
2474 DeEvent::Start(ref e) if e.name().as_ref() == end => {
2475 self.skip_event(event)?;
2476 depth += 1;
2477 }
2478 DeEvent::End(ref e) if e.name().as_ref() == end => {
2479 self.skip_event(event)?;
2480 if depth == 0 {
2481 break;
2482 }
2483 depth -= 1;
2484 }
2485 DeEvent::Eof => {
2486 self.skip_event(event)?;
2487 break;
2488 }
2489 _ => self.skip_event(event)?,
2490 }
2491 }
2492 }
2493 _ => (),
2494 }
2495 Ok(())
2496 }
2497
2498 #[cfg(feature = "overlapped-lists")]
2499 #[inline]
2500 fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2501 if let Some(max) = self.limit {
2502 if self.write.len() >= max.get() {
2503 return Err(DeError::TooManyEvents(max));
2504 }
2505 }
2506 self.write.push_back(event);
2507 Ok(())
2508 }
2509
2510 /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2511 /// skip buffer to [`Self::read`] buffer.
2512 ///
2513 /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2514 /// return events that was skipped previously by calling [`Self::skip()`],
2515 /// and only when all that events will be consumed, the deserializer starts
2516 /// to drain events from underlying reader.
2517 ///
2518 /// This method MUST be called if any number of [`Self::skip()`] was called
2519 /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2520 #[cfg(feature = "overlapped-lists")]
2521 fn start_replay(&mut self, checkpoint: usize) {
2522 if checkpoint == 0 {
2523 self.write.append(&mut self.read);
2524 std::mem::swap(&mut self.read, &mut self.write);
2525 } else {
2526 let mut read = self.write.split_off(checkpoint);
2527 read.append(&mut self.read);
2528 self.read = read;
2529 }
2530 }
2531
2532 #[inline]
2533 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2534 self.read_string_impl(true)
2535 }
2536
2537 /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2538 /// events, merge them into one string. If there are no such events, returns
2539 /// an empty string.
2540 ///
2541 /// If `allow_start` is `false`, then only text events is consumed, for other
2542 /// events an error is returned (see table below).
2543 ///
2544 /// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
2545 /// and all other content is skipped until corresponding end tag will be consumed.
2546 ///
2547 /// # Handling events
2548 ///
2549 /// The table below shows how events is handled by this method:
2550 ///
2551 /// |Event |XML |Handling
2552 /// |------------------|---------------------------|----------------------------------------
2553 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2554 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
2555 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2556 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2557 ///
2558 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2559 ///
2560 /// |Event |XML |Handling
2561 /// |------------------|---------------------------|----------------------------------------------------------------------------------
2562 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2563 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice, if close tag matched the open one
2564 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
2565 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
2566 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2567 ///
2568 /// [`Text`]: Event::Text
2569 /// [`CData`]: Event::CData
2570 fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2571 match self.next()? {
2572 DeEvent::Text(e) => Ok(e.text),
2573 // allow one nested level
2574 DeEvent::Start(e) if allow_start => match self.next()? {
2575 DeEvent::Text(t) => {
2576 self.read_to_end(e.name())?;
2577 Ok(t.text)
2578 }
2579 DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2580 // We can get End event in case of `<tag></tag>` or `<tag/>` input
2581 // Return empty text in that case
2582 DeEvent::End(end) if end.name() == e.name() => Ok("".into()),
2583 DeEvent::End(end) => Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned())),
2584 DeEvent::Eof => Err(DeError::UnexpectedEof),
2585 },
2586 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2587 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2588 DeEvent::Eof => Err(DeError::UnexpectedEof),
2589 }
2590 }
2591
2592 /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2593 /// dropped. This method should be called after [`Self::next()`]
2594 #[cfg(feature = "overlapped-lists")]
2595 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2596 let mut depth = 0;
2597 loop {
2598 match self.read.pop_front() {
2599 Some(DeEvent::Start(e)) if e.name() == name => {
2600 depth += 1;
2601 }
2602 Some(DeEvent::End(e)) if e.name() == name => {
2603 if depth == 0 {
2604 break;
2605 }
2606 depth -= 1;
2607 }
2608
2609 // Drop all other skipped events
2610 Some(_) => continue,
2611
2612 // If we do not have skipped events, use effective reading that will
2613 // not allocate memory for events
2614 None => {
2615 // We should close all opened tags, because we could buffer
2616 // Start events, but not the corresponding End events. So we
2617 // keep reading events until we exit all nested tags.
2618 // `read_to_end()` will return an error if an Eof was encountered
2619 // preliminary (in case of malformed XML).
2620 //
2621 // <tag><tag></tag></tag>
2622 // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2623 // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
2624 // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2625 loop {
2626 self.reader.read_to_end(name)?;
2627 if depth == 0 {
2628 break;
2629 }
2630 depth -= 1;
2631 }
2632 break;
2633 }
2634 }
2635 }
2636 Ok(())
2637 }
2638 #[cfg(not(feature = "overlapped-lists"))]
2639 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2640 // First one might be in self.peek
2641 match self.next()? {
2642 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2643 DeEvent::End(e) if e.name() == name => return Ok(()),
2644 _ => (),
2645 }
2646 self.reader.read_to_end(name)
2647 }
2648}
2649
2650impl<'de> Deserializer<'de, SliceReader<'de>> {
2651 /// Create new deserializer that will borrow data from the specified string.
2652 ///
2653 /// Deserializer created with this method will not resolve custom entities.
2654 #[allow(clippy::should_implement_trait)]
2655 pub fn from_str(source: &'de str) -> Self {
2656 Self::from_str_with_resolver(source, entity_resolver:NoEntityResolver)
2657 }
2658}
2659
2660impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2661where
2662 E: EntityResolver,
2663{
2664 /// Create new deserializer that will borrow data from the specified string
2665 /// and use specified entity resolver.
2666 pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2667 let mut reader: Reader<&[u8]> = Reader::from_str(source);
2668 reader.expand_empty_elements(val:true);
2669
2670 Self::new(
2671 reader:SliceReader {
2672 reader,
2673 start_trimmer: StartTrimmer::default(),
2674 },
2675 entity_resolver,
2676 )
2677 }
2678}
2679
2680impl<'de, R> Deserializer<'de, IoReader<R>>
2681where
2682 R: BufRead,
2683{
2684 /// Create new deserializer that will copy data from the specified reader
2685 /// into internal buffer.
2686 ///
2687 /// If you already have a string use [`Self::from_str`] instead, because it
2688 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2689 /// UTF-8, you can decode it first before using [`from_str`].
2690 ///
2691 /// Deserializer created with this method will not resolve custom entities.
2692 pub fn from_reader(reader: R) -> Self {
2693 Self::with_resolver(reader, entity_resolver:NoEntityResolver)
2694 }
2695}
2696
2697impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2698where
2699 R: BufRead,
2700 E: EntityResolver,
2701{
2702 /// Create new deserializer that will copy data from the specified reader
2703 /// into internal buffer and use specified entity resolver.
2704 ///
2705 /// If you already have a string use [`Self::from_str`] instead, because it
2706 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2707 /// UTF-8, you can decode it first before using [`from_str`].
2708 pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2709 let mut reader: Reader = Reader::from_reader(reader);
2710 reader.expand_empty_elements(val:true);
2711
2712 Self::new(
2713 reader:IoReader {
2714 reader,
2715 start_trimmer: StartTrimmer::default(),
2716 buf: Vec::new(),
2717 },
2718 entity_resolver,
2719 )
2720 }
2721}
2722
2723impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2724where
2725 R: XmlRead<'de>,
2726 E: EntityResolver,
2727{
2728 type Error = DeError;
2729
2730 deserialize_primitives!();
2731
2732 fn deserialize_struct<V>(
2733 self,
2734 _name: &'static str,
2735 fields: &'static [&'static str],
2736 visitor: V,
2737 ) -> Result<V::Value, DeError>
2738 where
2739 V: Visitor<'de>,
2740 {
2741 match self.next()? {
2742 DeEvent::Start(e) => {
2743 let name = e.name().as_ref().to_vec();
2744 let map = map::MapAccess::new(self, e, fields)?;
2745 let value = visitor.visit_map(map)?;
2746 self.read_to_end(QName(&name))?;
2747 Ok(value)
2748 }
2749 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2750 DeEvent::Text(_) => Err(DeError::ExpectedStart),
2751 DeEvent::Eof => Err(DeError::UnexpectedEof),
2752 }
2753 }
2754
2755 /// Unit represented in XML as a `xs:element` or text/CDATA content.
2756 /// Any content inside `xs:element` is ignored and skipped.
2757 ///
2758 /// Produces unit struct from any of following inputs:
2759 /// - any `<tag ...>...</tag>`
2760 /// - any `<tag .../>`
2761 /// - any consequent text / CDATA content (can consist of several parts
2762 /// delimited by comments and processing instructions)
2763 ///
2764 /// # Events handling
2765 ///
2766 /// |Event |XML |Handling
2767 /// |------------------|---------------------------|-------------------------------------------
2768 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2769 /// |[`DeEvent::End`] |`</tag>` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
2770 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2771 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2772 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2773 where
2774 V: Visitor<'de>,
2775 {
2776 match self.next()? {
2777 DeEvent::Start(s) => {
2778 self.read_to_end(s.name())?;
2779 visitor.visit_unit()
2780 }
2781 DeEvent::Text(_) => visitor.visit_unit(),
2782 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2783 DeEvent::Eof => Err(DeError::UnexpectedEof),
2784 }
2785 }
2786
2787 fn deserialize_enum<V>(
2788 self,
2789 _name: &'static str,
2790 _variants: &'static [&'static str],
2791 visitor: V,
2792 ) -> Result<V::Value, DeError>
2793 where
2794 V: Visitor<'de>,
2795 {
2796 visitor.visit_enum(var::EnumAccess::new(self))
2797 }
2798
2799 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2800 where
2801 V: Visitor<'de>,
2802 {
2803 visitor.visit_seq(self)
2804 }
2805
2806 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
2807 where
2808 V: Visitor<'de>,
2809 {
2810 self.deserialize_struct("", &[], visitor)
2811 }
2812
2813 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2814 where
2815 V: Visitor<'de>,
2816 {
2817 deserialize_option!(self, self, visitor)
2818 }
2819
2820 /// Always call `visitor.visit_unit()` because returned value ignored in any case.
2821 ///
2822 /// This method consumes any single [event][DeEvent] except the [`Start`]
2823 /// event, in which case all events up to and including corresponding [`End`]
2824 /// event will be consumed.
2825 ///
2826 /// This method returns error if current event is [`End`] or [`Eof`].
2827 ///
2828 /// [`Start`]: DeEvent::Start
2829 /// [`End`]: DeEvent::End
2830 /// [`Eof`]: DeEvent::Eof
2831 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2832 where
2833 V: Visitor<'de>,
2834 {
2835 match self.next()? {
2836 DeEvent::Start(e) => self.read_to_end(e.name())?,
2837 DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2838 DeEvent::Eof => return Err(DeError::UnexpectedEof),
2839 _ => (),
2840 }
2841 visitor.visit_unit()
2842 }
2843
2844 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2845 where
2846 V: Visitor<'de>,
2847 {
2848 match self.peek()? {
2849 DeEvent::Start(_) => self.deserialize_map(visitor),
2850 // Redirect to deserialize_unit in order to consume an event and return an appropriate error
2851 DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
2852 _ => self.deserialize_string(visitor),
2853 }
2854 }
2855}
2856
2857/// An accessor to sequence elements forming a value for top-level sequence of XML
2858/// elements.
2859///
2860/// Technically, multiple top-level elements violates XML rule of only one top-level
2861/// element, but we consider this as several concatenated XML documents.
2862impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2863where
2864 R: XmlRead<'de>,
2865 E: EntityResolver,
2866{
2867 type Error = DeError;
2868
2869 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2870 where
2871 T: DeserializeSeed<'de>,
2872 {
2873 match self.peek()? {
2874 DeEvent::Eof => Ok(None),
2875
2876 // Start(tag), End(tag), Text
2877 _ => seed.deserialize(&mut **self).map(op:Some),
2878 }
2879 }
2880}
2881
2882////////////////////////////////////////////////////////////////////////////////////////////////////
2883
2884/// Helper struct that contains a state for an algorithm of converting events
2885/// from raw events to semi-trimmed events that is independent from a way of
2886/// events reading.
2887struct StartTrimmer {
2888 /// If `true`, then leading whitespace will be removed from next returned
2889 /// [`Event::Text`]. This field is set to `true` after reading each event
2890 /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
2891 /// read right after them does not trimmed.
2892 trim_start: bool,
2893}
2894
2895impl StartTrimmer {
2896 /// Converts raw reader's event into a payload event.
2897 /// Returns `None`, if event should be skipped.
2898 #[inline(always)]
2899 fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
2900 let (event, trim_next_event) = match event {
2901 Event::DocType(e) => (PayloadEvent::DocType(e), true),
2902 Event::Start(e) => (PayloadEvent::Start(e), true),
2903 Event::End(e) => (PayloadEvent::End(e), true),
2904 Event::Eof => (PayloadEvent::Eof, true),
2905
2906 // Do not trim next text event after Text or CDATA event
2907 Event::CData(e) => (PayloadEvent::CData(e), false),
2908 Event::Text(mut e) => {
2909 // If event is empty after trimming, skip it
2910 if self.trim_start && e.inplace_trim_start() {
2911 return None;
2912 }
2913 (PayloadEvent::Text(e), false)
2914 }
2915
2916 _ => return None,
2917 };
2918 self.trim_start = trim_next_event;
2919 Some(event)
2920 }
2921}
2922
2923impl Default for StartTrimmer {
2924 #[inline]
2925 fn default() -> Self {
2926 Self { trim_start: true }
2927 }
2928}
2929
2930////////////////////////////////////////////////////////////////////////////////////////////////////
2931
2932/// Trait used by the deserializer for iterating over input. This is manually
2933/// "specialized" for iterating over `&[u8]`.
2934///
2935/// You do not need to implement this trait, it is needed to abstract from
2936/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
2937/// deserializer
2938pub trait XmlRead<'i> {
2939 /// Return an input-borrowing event.
2940 fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
2941
2942 /// Skips until end element is found. Unlike `next()` it will not allocate
2943 /// when it cannot satisfy the lifetime.
2944 fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
2945
2946 /// A copy of the reader's decoder used to decode strings.
2947 fn decoder(&self) -> Decoder;
2948}
2949
2950/// XML input source that reads from a std::io input stream.
2951///
2952/// You cannot create it, it is created automatically when you call
2953/// [`Deserializer::from_reader`]
2954pub struct IoReader<R: BufRead> {
2955 reader: Reader<R>,
2956 start_trimmer: StartTrimmer,
2957 buf: Vec<u8>,
2958}
2959
2960impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
2961 fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
2962 loop {
2963 self.buf.clear();
2964
2965 let event: Event<'_> = self.reader.read_event_into(&mut self.buf)?;
2966 if let Some(event: PayloadEvent<'_>) = self.start_trimmer.trim(event) {
2967 return Ok(event.into_owned());
2968 }
2969 }
2970 }
2971
2972 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2973 match self.reader.read_to_end_into(end:name, &mut self.buf) {
2974 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
2975 Err(e: Error) => Err(e.into()),
2976 Ok(_) => Ok(()),
2977 }
2978 }
2979
2980 fn decoder(&self) -> Decoder {
2981 self.reader.decoder()
2982 }
2983}
2984
2985/// XML input source that reads from a slice of bytes and can borrow from it.
2986///
2987/// You cannot create it, it is created automatically when you call
2988/// [`Deserializer::from_str`].
2989pub struct SliceReader<'de> {
2990 reader: Reader<&'de [u8]>,
2991 start_trimmer: StartTrimmer,
2992}
2993
2994impl<'de> XmlRead<'de> for SliceReader<'de> {
2995 fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
2996 loop {
2997 let event: Event<'de> = self.reader.read_event()?;
2998 if let Some(event: PayloadEvent<'de>) = self.start_trimmer.trim(event) {
2999 return Ok(event);
3000 }
3001 }
3002 }
3003
3004 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3005 match self.reader.read_to_end(name) {
3006 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
3007 Err(e: Error) => Err(e.into()),
3008 Ok(_) => Ok(()),
3009 }
3010 }
3011
3012 fn decoder(&self) -> Decoder {
3013 self.reader.decoder()
3014 }
3015}
3016
3017#[cfg(test)]
3018mod tests {
3019 use super::*;
3020 use pretty_assertions::assert_eq;
3021
3022 #[cfg(feature = "overlapped-lists")]
3023 mod skip {
3024 use super::*;
3025 use crate::de::DeEvent::*;
3026 use crate::events::BytesEnd;
3027 use pretty_assertions::assert_eq;
3028
3029 /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3030 #[test]
3031 fn read_and_peek() {
3032 let mut de = Deserializer::from_str(
3033 r#"
3034 <root>
3035 <inner>
3036 text
3037 <inner/>
3038 </inner>
3039 <next/>
3040 <target/>
3041 </root>
3042 "#,
3043 );
3044
3045 // Initial conditions - both are empty
3046 assert_eq!(de.read, vec![]);
3047 assert_eq!(de.write, vec![]);
3048
3049 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3050 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3051
3052 // Mark that start_replay() should begin replay from this point
3053 let checkpoint = de.skip_checkpoint();
3054 assert_eq!(checkpoint, 0);
3055
3056 // Should skip first <inner> tree
3057 de.skip().unwrap();
3058 assert_eq!(de.read, vec![]);
3059 assert_eq!(
3060 de.write,
3061 vec![
3062 Start(BytesStart::new("inner")),
3063 Text("text".into()),
3064 Start(BytesStart::new("inner")),
3065 End(BytesEnd::new("inner")),
3066 End(BytesEnd::new("inner")),
3067 ]
3068 );
3069
3070 // Consume <next/>. Now unconsumed XML looks like:
3071 //
3072 // <inner>
3073 // text
3074 // <inner/>
3075 // </inner>
3076 // <target/>
3077 // </root>
3078 assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3079 assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3080
3081 // We finish writing. Next call to `next()` should start replay that messages:
3082 //
3083 // <inner>
3084 // text
3085 // <inner/>
3086 // </inner>
3087 //
3088 // and after that stream that messages:
3089 //
3090 // <target/>
3091 // </root>
3092 de.start_replay(checkpoint);
3093 assert_eq!(
3094 de.read,
3095 vec![
3096 Start(BytesStart::new("inner")),
3097 Text("text".into()),
3098 Start(BytesStart::new("inner")),
3099 End(BytesEnd::new("inner")),
3100 End(BytesEnd::new("inner")),
3101 ]
3102 );
3103 assert_eq!(de.write, vec![]);
3104 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3105
3106 // Mark that start_replay() should begin replay from this point
3107 let checkpoint = de.skip_checkpoint();
3108 assert_eq!(checkpoint, 0);
3109
3110 // Skip `$text` node and consume <inner/> after it
3111 de.skip().unwrap();
3112 assert_eq!(
3113 de.read,
3114 vec![
3115 Start(BytesStart::new("inner")),
3116 End(BytesEnd::new("inner")),
3117 End(BytesEnd::new("inner")),
3118 ]
3119 );
3120 assert_eq!(
3121 de.write,
3122 vec![
3123 // This comment here to keep the same formatting of both arrays
3124 // otherwise rustfmt suggest one-line it
3125 Text("text".into()),
3126 ]
3127 );
3128
3129 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3130 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3131
3132 // We finish writing. Next call to `next()` should start replay messages:
3133 //
3134 // text
3135 // </inner>
3136 //
3137 // and after that stream that messages:
3138 //
3139 // <target/>
3140 // </root>
3141 de.start_replay(checkpoint);
3142 assert_eq!(
3143 de.read,
3144 vec![
3145 // This comment here to keep the same formatting as others
3146 // otherwise rustfmt suggest one-line it
3147 Text("text".into()),
3148 End(BytesEnd::new("inner")),
3149 ]
3150 );
3151 assert_eq!(de.write, vec![]);
3152 assert_eq!(de.next().unwrap(), Text("text".into()));
3153 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3154 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3155 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3156 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3157 assert_eq!(de.next().unwrap(), Eof);
3158 }
3159
3160 /// Checks that `read_to_end()` behaves correctly after `skip()`
3161 #[test]
3162 fn read_to_end() {
3163 let mut de = Deserializer::from_str(
3164 r#"
3165 <root>
3166 <skip>
3167 text
3168 <skip/>
3169 </skip>
3170 <target>
3171 <target/>
3172 </target>
3173 </root>
3174 "#,
3175 );
3176
3177 // Initial conditions - both are empty
3178 assert_eq!(de.read, vec![]);
3179 assert_eq!(de.write, vec![]);
3180
3181 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3182
3183 // Mark that start_replay() should begin replay from this point
3184 let checkpoint = de.skip_checkpoint();
3185 assert_eq!(checkpoint, 0);
3186
3187 // Skip the <skip> tree
3188 de.skip().unwrap();
3189 assert_eq!(de.read, vec![]);
3190 assert_eq!(
3191 de.write,
3192 vec![
3193 Start(BytesStart::new("skip")),
3194 Text("text".into()),
3195 Start(BytesStart::new("skip")),
3196 End(BytesEnd::new("skip")),
3197 End(BytesEnd::new("skip")),
3198 ]
3199 );
3200
3201 // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3202 //
3203 // <skip>
3204 // text
3205 // <skip/>
3206 // </skip>
3207 // </root>
3208 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3209 de.read_to_end(QName(b"target")).unwrap();
3210 assert_eq!(de.read, vec![]);
3211 assert_eq!(
3212 de.write,
3213 vec![
3214 Start(BytesStart::new("skip")),
3215 Text("text".into()),
3216 Start(BytesStart::new("skip")),
3217 End(BytesEnd::new("skip")),
3218 End(BytesEnd::new("skip")),
3219 ]
3220 );
3221
3222 // We finish writing. Next call to `next()` should start replay that messages:
3223 //
3224 // <skip>
3225 // text
3226 // <skip/>
3227 // </skip>
3228 //
3229 // and after that stream that messages:
3230 //
3231 // </root>
3232 de.start_replay(checkpoint);
3233 assert_eq!(
3234 de.read,
3235 vec![
3236 Start(BytesStart::new("skip")),
3237 Text("text".into()),
3238 Start(BytesStart::new("skip")),
3239 End(BytesEnd::new("skip")),
3240 End(BytesEnd::new("skip")),
3241 ]
3242 );
3243 assert_eq!(de.write, vec![]);
3244
3245 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3246 de.read_to_end(QName(b"skip")).unwrap();
3247
3248 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3249 assert_eq!(de.next().unwrap(), Eof);
3250 }
3251
3252 /// Checks that replay replayes only part of events
3253 /// Test for https://github.com/tafia/quick-xml/issues/435
3254 #[test]
3255 fn partial_replay() {
3256 let mut de = Deserializer::from_str(
3257 r#"
3258 <root>
3259 <skipped-1/>
3260 <skipped-2/>
3261 <inner>
3262 <skipped-3/>
3263 <skipped-4/>
3264 <target-2/>
3265 </inner>
3266 <target-1/>
3267 </root>
3268 "#,
3269 );
3270
3271 // Initial conditions - both are empty
3272 assert_eq!(de.read, vec![]);
3273 assert_eq!(de.write, vec![]);
3274
3275 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3276
3277 // start_replay() should start replay from this point
3278 let checkpoint1 = de.skip_checkpoint();
3279 assert_eq!(checkpoint1, 0);
3280
3281 // Should skip first and second <skipped-N/> elements
3282 de.skip().unwrap(); // skipped-1
3283 de.skip().unwrap(); // skipped-2
3284 assert_eq!(de.read, vec![]);
3285 assert_eq!(
3286 de.write,
3287 vec![
3288 Start(BytesStart::new("skipped-1")),
3289 End(BytesEnd::new("skipped-1")),
3290 Start(BytesStart::new("skipped-2")),
3291 End(BytesEnd::new("skipped-2")),
3292 ]
3293 );
3294
3295 ////////////////////////////////////////////////////////////////////////////////////////
3296
3297 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3298 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3299 assert_eq!(
3300 de.read,
3301 vec![
3302 // This comment here to keep the same formatting of both arrays
3303 // otherwise rustfmt suggest one-line it
3304 Start(BytesStart::new("skipped-3")),
3305 ]
3306 );
3307 assert_eq!(
3308 de.write,
3309 vec![
3310 Start(BytesStart::new("skipped-1")),
3311 End(BytesEnd::new("skipped-1")),
3312 Start(BytesStart::new("skipped-2")),
3313 End(BytesEnd::new("skipped-2")),
3314 ]
3315 );
3316
3317 // start_replay() should start replay from this point
3318 let checkpoint2 = de.skip_checkpoint();
3319 assert_eq!(checkpoint2, 4);
3320
3321 // Should skip third and forth <skipped-N/> elements
3322 de.skip().unwrap(); // skipped-3
3323 de.skip().unwrap(); // skipped-4
3324 assert_eq!(de.read, vec![]);
3325 assert_eq!(
3326 de.write,
3327 vec![
3328 // checkpoint 1
3329 Start(BytesStart::new("skipped-1")),
3330 End(BytesEnd::new("skipped-1")),
3331 Start(BytesStart::new("skipped-2")),
3332 End(BytesEnd::new("skipped-2")),
3333 // checkpoint 2
3334 Start(BytesStart::new("skipped-3")),
3335 End(BytesEnd::new("skipped-3")),
3336 Start(BytesStart::new("skipped-4")),
3337 End(BytesEnd::new("skipped-4")),
3338 ]
3339 );
3340 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3341 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3342 assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3343 assert_eq!(
3344 de.read,
3345 vec![
3346 // This comment here to keep the same formatting of both arrays
3347 // otherwise rustfmt suggest one-line it
3348 End(BytesEnd::new("inner")),
3349 ]
3350 );
3351 assert_eq!(
3352 de.write,
3353 vec![
3354 // checkpoint 1
3355 Start(BytesStart::new("skipped-1")),
3356 End(BytesEnd::new("skipped-1")),
3357 Start(BytesStart::new("skipped-2")),
3358 End(BytesEnd::new("skipped-2")),
3359 // checkpoint 2
3360 Start(BytesStart::new("skipped-3")),
3361 End(BytesEnd::new("skipped-3")),
3362 Start(BytesStart::new("skipped-4")),
3363 End(BytesEnd::new("skipped-4")),
3364 ]
3365 );
3366
3367 // Start replay events from checkpoint 2
3368 de.start_replay(checkpoint2);
3369 assert_eq!(
3370 de.read,
3371 vec![
3372 Start(BytesStart::new("skipped-3")),
3373 End(BytesEnd::new("skipped-3")),
3374 Start(BytesStart::new("skipped-4")),
3375 End(BytesEnd::new("skipped-4")),
3376 End(BytesEnd::new("inner")),
3377 ]
3378 );
3379 assert_eq!(
3380 de.write,
3381 vec![
3382 Start(BytesStart::new("skipped-1")),
3383 End(BytesEnd::new("skipped-1")),
3384 Start(BytesStart::new("skipped-2")),
3385 End(BytesEnd::new("skipped-2")),
3386 ]
3387 );
3388
3389 // Replayed events
3390 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3391 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3392 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3393 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3394
3395 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3396 assert_eq!(de.read, vec![]);
3397 assert_eq!(
3398 de.write,
3399 vec![
3400 Start(BytesStart::new("skipped-1")),
3401 End(BytesEnd::new("skipped-1")),
3402 Start(BytesStart::new("skipped-2")),
3403 End(BytesEnd::new("skipped-2")),
3404 ]
3405 );
3406
3407 ////////////////////////////////////////////////////////////////////////////////////////
3408
3409 // New events
3410 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3411 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3412
3413 assert_eq!(de.read, vec![]);
3414 assert_eq!(
3415 de.write,
3416 vec![
3417 Start(BytesStart::new("skipped-1")),
3418 End(BytesEnd::new("skipped-1")),
3419 Start(BytesStart::new("skipped-2")),
3420 End(BytesEnd::new("skipped-2")),
3421 ]
3422 );
3423
3424 // Start replay events from checkpoint 1
3425 de.start_replay(checkpoint1);
3426 assert_eq!(
3427 de.read,
3428 vec![
3429 Start(BytesStart::new("skipped-1")),
3430 End(BytesEnd::new("skipped-1")),
3431 Start(BytesStart::new("skipped-2")),
3432 End(BytesEnd::new("skipped-2")),
3433 ]
3434 );
3435 assert_eq!(de.write, vec![]);
3436
3437 // Replayed events
3438 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3439 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3440 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3441 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3442
3443 assert_eq!(de.read, vec![]);
3444 assert_eq!(de.write, vec![]);
3445
3446 // New events
3447 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3448 assert_eq!(de.next().unwrap(), Eof);
3449 }
3450
3451 /// Checks that limiting buffer size works correctly
3452 #[test]
3453 fn limit() {
3454 use serde::Deserialize;
3455
3456 #[derive(Debug, Deserialize)]
3457 #[allow(unused)]
3458 struct List {
3459 item: Vec<()>,
3460 }
3461
3462 let mut de = Deserializer::from_str(
3463 r#"
3464 <any-name>
3465 <item/>
3466 <another-item>
3467 <some-element>with text</some-element>
3468 <yet-another-element/>
3469 </another-item>
3470 <item/>
3471 <item/>
3472 </any-name>
3473 "#,
3474 );
3475 de.event_buffer_size(NonZeroUsize::new(3));
3476
3477 match List::deserialize(&mut de) {
3478 Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3479 e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e),
3480 }
3481 }
3482
3483 /// Without handling Eof in `skip` this test failed with memory allocation
3484 #[test]
3485 fn invalid_xml() {
3486 use crate::de::DeEvent::*;
3487
3488 let mut de = Deserializer::from_str("<root>");
3489
3490 // Cache all events
3491 let checkpoint = de.skip_checkpoint();
3492 de.skip().unwrap();
3493 de.start_replay(checkpoint);
3494 assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3495 }
3496 }
3497
3498 mod read_to_end {
3499 use super::*;
3500 use crate::de::DeEvent::*;
3501 use pretty_assertions::assert_eq;
3502
3503 #[test]
3504 fn complex() {
3505 let mut de = Deserializer::from_str(
3506 r#"
3507 <root>
3508 <tag a="1"><tag>text</tag>content</tag>
3509 <tag a="2"><![CDATA[cdata content]]></tag>
3510 <self-closed/>
3511 </root>
3512 "#,
3513 );
3514
3515 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3516
3517 assert_eq!(
3518 de.next().unwrap(),
3519 Start(BytesStart::from_content(r#"tag a="1""#, 3))
3520 );
3521 assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3522
3523 assert_eq!(
3524 de.next().unwrap(),
3525 Start(BytesStart::from_content(r#"tag a="2""#, 3))
3526 );
3527 assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3528 assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3529
3530 assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3531 assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3532
3533 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3534 assert_eq!(de.next().unwrap(), Eof);
3535 }
3536
3537 #[test]
3538 fn invalid_xml1() {
3539 let mut de = Deserializer::from_str("<tag><tag></tag>");
3540
3541 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3542 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3543
3544 match de.read_to_end(QName(b"tag")) {
3545 Err(DeError::UnexpectedEof) => (),
3546 x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
3547 }
3548 assert_eq!(de.next().unwrap(), Eof);
3549 }
3550
3551 #[test]
3552 fn invalid_xml2() {
3553 let mut de = Deserializer::from_str("<tag><![CDATA[]]><tag></tag>");
3554
3555 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3556 assert_eq!(de.peek().unwrap(), &Text("".into()));
3557
3558 match de.read_to_end(QName(b"tag")) {
3559 Err(DeError::UnexpectedEof) => (),
3560 x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
3561 }
3562 assert_eq!(de.next().unwrap(), Eof);
3563 }
3564 }
3565
3566 #[test]
3567 fn borrowing_reader_parity() {
3568 let s = r#"
3569 <item name="hello" source="world.rs">Some text</item>
3570 <item2/>
3571 <item3 value="world" />
3572 "#;
3573
3574 let mut reader1 = IoReader {
3575 reader: Reader::from_reader(s.as_bytes()),
3576 start_trimmer: StartTrimmer::default(),
3577 buf: Vec::new(),
3578 };
3579 let mut reader2 = SliceReader {
3580 reader: Reader::from_str(s),
3581 start_trimmer: StartTrimmer::default(),
3582 };
3583
3584 loop {
3585 let event1 = reader1.next().unwrap();
3586 let event2 = reader2.next().unwrap();
3587
3588 if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3589 break;
3590 }
3591
3592 assert_eq!(event1, event2);
3593 }
3594 }
3595
3596 #[test]
3597 fn borrowing_reader_events() {
3598 let s = r#"
3599 <item name="hello" source="world.rs">Some text</item>
3600 <item2></item2>
3601 <item3/>
3602 <item4 value="world" />
3603 "#;
3604
3605 let mut reader = SliceReader {
3606 reader: Reader::from_str(s),
3607 start_trimmer: StartTrimmer::default(),
3608 };
3609
3610 reader.reader.expand_empty_elements(true);
3611
3612 let mut events = Vec::new();
3613
3614 loop {
3615 let event = reader.next().unwrap();
3616 if let PayloadEvent::Eof = event {
3617 break;
3618 }
3619 events.push(event);
3620 }
3621
3622 use crate::de::PayloadEvent::*;
3623
3624 assert_eq!(
3625 events,
3626 vec![
3627 Start(BytesStart::from_content(
3628 r#"item name="hello" source="world.rs""#,
3629 4
3630 )),
3631 Text(BytesText::from_escaped("Some text")),
3632 End(BytesEnd::new("item")),
3633 Start(BytesStart::from_content("item2", 5)),
3634 End(BytesEnd::new("item2")),
3635 Start(BytesStart::from_content("item3", 5)),
3636 End(BytesEnd::new("item3")),
3637 Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3638 End(BytesEnd::new("item4")),
3639 ]
3640 )
3641 }
3642
3643 /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3644 /// because parser reports error early
3645 #[test]
3646 fn read_string() {
3647 match from_str::<String>(r#"</root>"#) {
3648 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
3649 assert_eq!(expected, "");
3650 assert_eq!(found, "root");
3651 }
3652 x => panic!(
3653 r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"#,
3654 x
3655 ),
3656 }
3657
3658 let s: String = from_str(r#"<root></root>"#).unwrap();
3659 assert_eq!(s, "");
3660
3661 match from_str::<String>(r#"<root></other>"#) {
3662 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
3663 assert_eq!(expected, "root");
3664 assert_eq!(found, "other");
3665 }
3666 x => panic!(
3667 r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"#,
3668 x
3669 ),
3670 }
3671 }
3672
3673 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3674 ///
3675 /// That tests ensures that comments and processed instructions is ignored
3676 /// and can split one logical string in pieces.
3677 mod merge_text {
3678 use super::*;
3679 use pretty_assertions::assert_eq;
3680
3681 #[test]
3682 fn text() {
3683 let mut de = Deserializer::from_str("text");
3684 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3685 }
3686
3687 #[test]
3688 fn cdata() {
3689 let mut de = Deserializer::from_str("<![CDATA[cdata]]>");
3690 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3691 }
3692
3693 #[test]
3694 fn text_and_cdata() {
3695 let mut de = Deserializer::from_str("text and <![CDATA[cdata]]>");
3696 assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3697 }
3698
3699 #[test]
3700 fn text_and_empty_cdata() {
3701 let mut de = Deserializer::from_str("text and <![CDATA[]]>");
3702 assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3703 }
3704
3705 #[test]
3706 fn cdata_and_text() {
3707 let mut de = Deserializer::from_str("<![CDATA[cdata]]> and text");
3708 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3709 }
3710
3711 #[test]
3712 fn empty_cdata_and_text() {
3713 let mut de = Deserializer::from_str("<![CDATA[]]> and text");
3714 assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3715 }
3716
3717 #[test]
3718 fn cdata_and_cdata() {
3719 let mut de = Deserializer::from_str(
3720 "\
3721 <![CDATA[cdata]]]]>\
3722 <![CDATA[>cdata]]>\
3723 ",
3724 );
3725 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3726 }
3727
3728 mod comment_between {
3729 use super::*;
3730 use pretty_assertions::assert_eq;
3731
3732 #[test]
3733 fn text() {
3734 let mut de = Deserializer::from_str(
3735 "\
3736 text \
3737 <!--comment 1--><!--comment 2--> \
3738 text\
3739 ",
3740 );
3741 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3742 }
3743
3744 #[test]
3745 fn cdata() {
3746 let mut de = Deserializer::from_str(
3747 "\
3748 <![CDATA[cdata]]]]>\
3749 <!--comment 1--><!--comment 2-->\
3750 <![CDATA[>cdata]]>\
3751 ",
3752 );
3753 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3754 }
3755
3756 #[test]
3757 fn text_and_cdata() {
3758 let mut de = Deserializer::from_str(
3759 "\
3760 text \
3761 <!--comment 1--><!--comment 2-->\
3762 <![CDATA[ cdata]]>\
3763 ",
3764 );
3765 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3766 }
3767
3768 #[test]
3769 fn text_and_empty_cdata() {
3770 let mut de = Deserializer::from_str(
3771 "\
3772 text \
3773 <!--comment 1--><!--comment 2-->\
3774 <![CDATA[]]>\
3775 ",
3776 );
3777 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3778 }
3779
3780 #[test]
3781 fn cdata_and_text() {
3782 let mut de = Deserializer::from_str(
3783 "\
3784 <![CDATA[cdata ]]>\
3785 <!--comment 1--><!--comment 2--> \
3786 text \
3787 ",
3788 );
3789 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3790 }
3791
3792 #[test]
3793 fn empty_cdata_and_text() {
3794 let mut de = Deserializer::from_str(
3795 "\
3796 <![CDATA[]]>\
3797 <!--comment 1--><!--comment 2--> \
3798 text \
3799 ",
3800 );
3801 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
3802 }
3803
3804 #[test]
3805 fn cdata_and_cdata() {
3806 let mut de = Deserializer::from_str(
3807 "\
3808 <![CDATA[cdata]]]>\
3809 <!--comment 1--><!--comment 2-->\
3810 <![CDATA[]>cdata]]>\
3811 ",
3812 );
3813 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3814 }
3815 }
3816
3817 mod pi_between {
3818 use super::*;
3819 use pretty_assertions::assert_eq;
3820
3821 #[test]
3822 fn text() {
3823 let mut de = Deserializer::from_str(
3824 "\
3825 text \
3826 <?pi 1?><?pi 2?> \
3827 text\
3828 ",
3829 );
3830 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3831 }
3832
3833 #[test]
3834 fn cdata() {
3835 let mut de = Deserializer::from_str(
3836 "\
3837 <![CDATA[cdata]]]]>\
3838 <?pi 1?><?pi 2?>\
3839 <![CDATA[>cdata]]>\
3840 ",
3841 );
3842 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3843 }
3844
3845 #[test]
3846 fn text_and_cdata() {
3847 let mut de = Deserializer::from_str(
3848 "\
3849 text \
3850 <?pi 1?><?pi 2?>\
3851 <![CDATA[ cdata]]>\
3852 ",
3853 );
3854 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3855 }
3856
3857 #[test]
3858 fn text_and_empty_cdata() {
3859 let mut de = Deserializer::from_str(
3860 "\
3861 text \
3862 <?pi 1?><?pi 2?>\
3863 <![CDATA[]]>\
3864 ",
3865 );
3866 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3867 }
3868
3869 #[test]
3870 fn cdata_and_text() {
3871 let mut de = Deserializer::from_str(
3872 "\
3873 <![CDATA[cdata ]]>\
3874 <?pi 1?><?pi 2?> \
3875 text \
3876 ",
3877 );
3878 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3879 }
3880
3881 #[test]
3882 fn empty_cdata_and_text() {
3883 let mut de = Deserializer::from_str(
3884 "\
3885 <![CDATA[]]>\
3886 <?pi 1?><?pi 2?> \
3887 text \
3888 ",
3889 );
3890 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
3891 }
3892
3893 #[test]
3894 fn cdata_and_cdata() {
3895 let mut de = Deserializer::from_str(
3896 "\
3897 <![CDATA[cdata]]]>\
3898 <?pi 1?><?pi 2?>\
3899 <![CDATA[]>cdata]]>\
3900 ",
3901 );
3902 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3903 }
3904 }
3905 }
3906
3907 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3908 ///
3909 /// This tests ensures that any combination of payload data is processed
3910 /// as expected.
3911 mod triples {
3912 use super::*;
3913 use pretty_assertions::assert_eq;
3914
3915 mod start {
3916 use super::*;
3917
3918 /// <tag1><tag2>...
3919 mod start {
3920 use super::*;
3921 use pretty_assertions::assert_eq;
3922
3923 #[test]
3924 fn start() {
3925 let mut de = Deserializer::from_str("<tag1><tag2><tag3>");
3926 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3927 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3928 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
3929 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3930 }
3931
3932 /// Not matching end tag will result to error
3933 #[test]
3934 fn end() {
3935 let mut de = Deserializer::from_str("<tag1><tag2></tag2>");
3936 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3937 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3938 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
3939 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3940 }
3941
3942 #[test]
3943 fn text() {
3944 let mut de = Deserializer::from_str("<tag1><tag2> text ");
3945 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3946 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3947 // Text is trimmed from both sides
3948 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3949 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3950 }
3951
3952 #[test]
3953 fn cdata() {
3954 let mut de = Deserializer::from_str("<tag1><tag2><![CDATA[ cdata ]]>");
3955 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3956 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3957 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
3958 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3959 }
3960
3961 #[test]
3962 fn eof() {
3963 let mut de = Deserializer::from_str("<tag1><tag2>");
3964 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3965 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3966 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3967 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3968 }
3969 }
3970
3971 /// <tag></tag>...
3972 mod end {
3973 use super::*;
3974 use pretty_assertions::assert_eq;
3975
3976 #[test]
3977 fn start() {
3978 let mut de = Deserializer::from_str("<tag></tag><tag2>");
3979 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
3980 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
3981 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3982 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3983 }
3984
3985 #[test]
3986 fn end() {
3987 let mut de = Deserializer::from_str("<tag></tag></tag2>");
3988 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
3989 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
3990 match de.next() {
3991 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
3992 assert_eq!(expected, "");
3993 assert_eq!(found, "tag2");
3994 }
3995 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag2' }})`, but got {:?}", x),
3996 }
3997 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3998 }
3999
4000 #[test]
4001 fn text() {
4002 let mut de = Deserializer::from_str("<tag></tag> text ");
4003 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4004 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4005 // Text is trimmed from both sides
4006 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4007 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4008 }
4009
4010 #[test]
4011 fn cdata() {
4012 let mut de = Deserializer::from_str("<tag></tag><![CDATA[ cdata ]]>");
4013 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4014 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4015 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4016 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4017 }
4018
4019 #[test]
4020 fn eof() {
4021 let mut de = Deserializer::from_str("<tag></tag>");
4022 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4023 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4024 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4025 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4026 }
4027 }
4028
4029 /// <tag> text ...
4030 mod text {
4031 use super::*;
4032 use pretty_assertions::assert_eq;
4033
4034 #[test]
4035 fn start() {
4036 let mut de = Deserializer::from_str("<tag> text <tag2>");
4037 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4038 // Text is trimmed from both sides
4039 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4040 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4041 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4042 }
4043
4044 #[test]
4045 fn end() {
4046 let mut de = Deserializer::from_str("<tag> text </tag>");
4047 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4048 // Text is trimmed from both sides
4049 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4050 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4051 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4052 }
4053
4054 // start::text::text has no difference from start::text
4055
4056 #[test]
4057 fn cdata() {
4058 let mut de = Deserializer::from_str("<tag> text <![CDATA[ cdata ]]>");
4059 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4060 // Text is trimmed from the start
4061 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4062 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4063 }
4064
4065 #[test]
4066 fn eof() {
4067 let mut de = Deserializer::from_str("<tag> text ");
4068 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4069 // Text is trimmed from both sides
4070 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4071 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4072 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4073 }
4074 }
4075
4076 /// <tag><![CDATA[ cdata ]]>...
4077 mod cdata {
4078 use super::*;
4079 use pretty_assertions::assert_eq;
4080
4081 #[test]
4082 fn start() {
4083 let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]><tag2>");
4084 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4085 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4086 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4087 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4088 }
4089
4090 #[test]
4091 fn end() {
4092 let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]></tag>");
4093 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4094 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4095 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4096 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4097 }
4098
4099 #[test]
4100 fn text() {
4101 let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]> text ");
4102 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4103 // Text is trimmed from the end
4104 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4105 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4106 }
4107
4108 #[test]
4109 fn cdata() {
4110 let mut de =
4111 Deserializer::from_str("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4112 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4113 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4114 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4115 }
4116
4117 #[test]
4118 fn eof() {
4119 let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]>");
4120 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4121 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4122 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4123 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4124 }
4125 }
4126 }
4127
4128 /// Start from End event will always generate an error
4129 #[test]
4130 fn end() {
4131 let mut de = Deserializer::from_str("</tag>");
4132 match de.next() {
4133 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4134 assert_eq!(expected, "");
4135 assert_eq!(found, "tag");
4136 }
4137 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4138 }
4139 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4140 }
4141
4142 mod text {
4143 use super::*;
4144 use pretty_assertions::assert_eq;
4145
4146 mod start {
4147 use super::*;
4148 use pretty_assertions::assert_eq;
4149
4150 #[test]
4151 fn start() {
4152 let mut de = Deserializer::from_str(" text <tag1><tag2>");
4153 // Text is trimmed from both sides
4154 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4155 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4156 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4157 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4158 }
4159
4160 /// Not matching end tag will result in error
4161 #[test]
4162 fn end() {
4163 let mut de = Deserializer::from_str(" text <tag></tag>");
4164 // Text is trimmed from both sides
4165 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4166 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4167 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4168 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4169 }
4170
4171 #[test]
4172 fn text() {
4173 let mut de = Deserializer::from_str(" text <tag> text2 ");
4174 // Text is trimmed from both sides
4175 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4176 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4177 // Text is trimmed from both sides
4178 assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4179 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4180 }
4181
4182 #[test]
4183 fn cdata() {
4184 let mut de = Deserializer::from_str(" text <tag><![CDATA[ cdata ]]>");
4185 // Text is trimmed from both sides
4186 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4187 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4188 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4189 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4190 }
4191
4192 #[test]
4193 fn eof() {
4194 // Text is trimmed from both sides
4195 let mut de = Deserializer::from_str(" text <tag>");
4196 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4197 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4198 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4199 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4200 }
4201 }
4202
4203 /// End event without corresponding start event will always generate an error
4204 #[test]
4205 fn end() {
4206 let mut de = Deserializer::from_str(" text </tag>");
4207 // Text is trimmed from both sides
4208 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4209 match de.next() {
4210 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4211 assert_eq!(expected, "");
4212 assert_eq!(found, "tag");
4213 }
4214 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4215 }
4216 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4217 }
4218
4219 // text::text::something is equivalent to text::something
4220
4221 mod cdata {
4222 use super::*;
4223 use pretty_assertions::assert_eq;
4224
4225 #[test]
4226 fn start() {
4227 let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]><tag>");
4228 // Text is trimmed from the start
4229 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4230 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4231 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4232 }
4233
4234 #[test]
4235 fn end() {
4236 let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]></tag>");
4237 // Text is trimmed from the start
4238 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4239 match de.next() {
4240 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4241 assert_eq!(expected, "");
4242 assert_eq!(found, "tag");
4243 }
4244 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4245 }
4246 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4247 }
4248
4249 #[test]
4250 fn text() {
4251 let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]> text2 ");
4252 // Text is trimmed from the start and from the end
4253 assert_eq!(
4254 de.next().unwrap(),
4255 DeEvent::Text("text cdata text2".into())
4256 );
4257 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4258 }
4259
4260 #[test]
4261 fn cdata() {
4262 let mut de =
4263 Deserializer::from_str(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4264 // Text is trimmed from the start
4265 assert_eq!(
4266 de.next().unwrap(),
4267 DeEvent::Text("text cdata cdata2 ".into())
4268 );
4269 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4270 }
4271
4272 #[test]
4273 fn eof() {
4274 let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]>");
4275 // Text is trimmed from the start
4276 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4277 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4278 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4279 }
4280 }
4281 }
4282
4283 mod cdata {
4284 use super::*;
4285 use pretty_assertions::assert_eq;
4286
4287 mod start {
4288 use super::*;
4289 use pretty_assertions::assert_eq;
4290
4291 #[test]
4292 fn start() {
4293 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag1><tag2>");
4294 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4295 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4296 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4297 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4298 }
4299
4300 /// Not matching end tag will result in error
4301 #[test]
4302 fn end() {
4303 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag></tag>");
4304 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4305 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4306 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4307 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4308 }
4309
4310 #[test]
4311 fn text() {
4312 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag> text ");
4313 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4314 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4315 // Text is trimmed from both sides
4316 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4317 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4318 }
4319
4320 #[test]
4321 fn cdata() {
4322 let mut de =
4323 Deserializer::from_str("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4324 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4325 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4326 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4327 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4328 }
4329
4330 #[test]
4331 fn eof() {
4332 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag>");
4333 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4334 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4335 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4336 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4337 }
4338 }
4339
4340 /// End event without corresponding start event will always generate an error
4341 #[test]
4342 fn end() {
4343 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]></tag>");
4344 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4345 match de.next() {
4346 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4347 assert_eq!(expected, "");
4348 assert_eq!(found, "tag");
4349 }
4350 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4351 }
4352 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4353 }
4354
4355 mod text {
4356 use super::*;
4357 use pretty_assertions::assert_eq;
4358
4359 #[test]
4360 fn start() {
4361 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text <tag>");
4362 // Text is trimmed from the end
4363 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4364 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4365 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4366 }
4367
4368 #[test]
4369 fn end() {
4370 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text </tag>");
4371 // Text is trimmed from the end
4372 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4373 match de.next() {
4374 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4375 assert_eq!(expected, "");
4376 assert_eq!(found, "tag");
4377 }
4378 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4379 }
4380 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381 }
4382
4383 // cdata::text::text is equivalent to cdata::text
4384
4385 #[test]
4386 fn cdata() {
4387 let mut de =
4388 Deserializer::from_str("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4389 assert_eq!(
4390 de.next().unwrap(),
4391 DeEvent::Text(" cdata text cdata2 ".into())
4392 );
4393 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4394 }
4395
4396 #[test]
4397 fn eof() {
4398 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text ");
4399 // Text is trimmed from the end
4400 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4401 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4403 }
4404 }
4405
4406 mod cdata {
4407 use super::*;
4408 use pretty_assertions::assert_eq;
4409
4410 #[test]
4411 fn start() {
4412 let mut de =
4413 Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4414 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4415 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4416 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4417 }
4418
4419 #[test]
4420 fn end() {
4421 let mut de =
4422 Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4423 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4424 match de.next() {
4425 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4426 assert_eq!(expected, "");
4427 assert_eq!(found, "tag");
4428 }
4429 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4430 }
4431 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4432 }
4433
4434 #[test]
4435 fn text() {
4436 let mut de =
4437 Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4438 // Text is trimmed from the end
4439 assert_eq!(
4440 de.next().unwrap(),
4441 DeEvent::Text(" cdata cdata2 text".into())
4442 );
4443 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4444 }
4445
4446 #[test]
4447 fn cdata() {
4448 let mut de = Deserializer::from_str(
4449 "<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>",
4450 );
4451 assert_eq!(
4452 de.next().unwrap(),
4453 DeEvent::Text(" cdata cdata2 cdata3 ".into())
4454 );
4455 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4456 }
4457
4458 #[test]
4459 fn eof() {
4460 let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4461 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4462 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4463 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4464 }
4465 }
4466 }
4467 }
4468}
4469