1 | //! Serde `Deserializer` module.
|
2 | //!
|
3 | //! Due to the complexity of the XML standard and the fact that Serde was developed
|
4 | //! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
|
5 | //! that fact that some XML concepts are inexpressible in terms of Serde derives
|
6 | //! and may require manual deserialization.
|
7 | //!
|
8 | //! The most notable restriction is the ability to distinguish between _elements_
|
9 | //! and _attributes_, as no other format used by serde has such a conception.
|
10 | //!
|
11 | //! Due to that the mapping is performed in a best effort manner.
|
12 | //!
|
13 | //!
|
14 | //!
|
15 | //! Table of Contents
|
16 | //! =================
|
17 | //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
|
18 | //! - [Optional attributes and elements](#optional-attributes-and-elements)
|
19 | //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
|
20 | //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
|
21 | //! - [Composition Rules](#composition-rules)
|
22 | //! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
|
23 | //! - [`$text`](#text)
|
24 | //! - [`$value`](#value)
|
25 | //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
|
26 | //! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
|
27 | //! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
|
28 | //! - [Frequently Used Patterns](#frequently-used-patterns)
|
29 | //! - [`<element>` lists](#element-lists)
|
30 | //! - [Enum::Unit Variants As a Text](#enumunit-variants-as-a-text)
|
31 | //! - [Internally Tagged Enums](#internally-tagged-enums)
|
32 | //!
|
33 | //!
|
34 | //!
|
35 | //! Mapping XML to Rust types
|
36 | //! =========================
|
37 | //!
|
38 | //! Type names are never considered when deserializing, so you can name your
|
39 | //! types as you wish. Other general rules:
|
40 | //! - `struct` field name could be represented in XML only as an attribute name
|
41 | //! or an element name;
|
42 | //! - `enum` variant name could be represented in XML only as an attribute name
|
43 | //! or an element name;
|
44 | //! - the unit struct, unit type `()` and unit enum variant can be deserialized
|
45 | //! from any valid XML content:
|
46 | //! - attribute and element names;
|
47 | //! - attribute and element values;
|
48 | //! - text or CDATA content (including mixed text and CDATA content).
|
49 | //!
|
50 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
51 | //!
|
52 | //! NOTE: All tests are marked with an `ignore` option, even though they do
|
53 | //! compile. This is because rustdoc marks such blocks with an information
|
54 | //! icon unlike `no_run` blocks.
|
55 | //!
|
56 | //! </div>
|
57 | //!
|
58 | //! <table>
|
59 | //! <thead>
|
60 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
61 | //! </thead>
|
62 | //! <tbody style="vertical-align:top;">
|
63 | //! <tr>
|
64 | //! <td>
|
65 | //! Content of attributes and text / CDATA content of elements (including mixed
|
66 | //! text and CDATA content):
|
67 | //!
|
68 | //! ```xml
|
69 | //! <... ...="content" />
|
70 | //! ```
|
71 | //! ```xml
|
72 | //! <...>content</...>
|
73 | //! ```
|
74 | //! ```xml
|
75 | //! <...><![CDATA[content]]></...>
|
76 | //! ```
|
77 | //! ```xml
|
78 | //! <...>text<![CDATA[cdata]]>text</...>
|
79 | //! ```
|
80 | //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
|
81 | //! </td>
|
82 | //! <td>
|
83 | //!
|
84 | //! You can use any type that can be deserialized from an `&str`, for example:
|
85 | //! - [`String`] and [`&str`]
|
86 | //! - [`Cow<str>`]
|
87 | //! - [`u32`], [`f32`] and other numeric types
|
88 | //! - `enum`s, like
|
89 | //! ```
|
90 | //! # use pretty_assertions::assert_eq;
|
91 | //! # use serde::Deserialize;
|
92 | //! # #[derive(Debug, PartialEq)]
|
93 | //! #[derive(Deserialize)]
|
94 | //! enum Language {
|
95 | //! Rust,
|
96 | //! Cpp,
|
97 | //! #[serde(other)]
|
98 | //! Other,
|
99 | //! }
|
100 | //! # #[derive(Debug, PartialEq, Deserialize)]
|
101 | //! # struct X { #[serde(rename = "$text" )] x: Language }
|
102 | //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>" ).unwrap());
|
103 | //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>" ).unwrap());
|
104 | //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>" ).unwrap());
|
105 | //! ```
|
106 | //!
|
107 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
108 | //!
|
109 | //! NOTE: deserialization to non-owned types (i.e. borrow from the input),
|
110 | //! such as `&str`, is possible only if you parse document in the UTF-8
|
111 | //! encoding and content does not contain entity references such as `&`,
|
112 | //! or character references such as `
`, as well as text content represented
|
113 | //! by one piece of [text] or [CDATA] element.
|
114 | //! </div>
|
115 | //! <!-- TODO: document an error type returned -->
|
116 | //!
|
117 | //! [text]: Event::Text
|
118 | //! [CDATA]: Event::CData
|
119 | //! </td>
|
120 | //! </tr>
|
121 | //! <!-- 2 ===================================================================================== -->
|
122 | //! <tr>
|
123 | //! <td>
|
124 | //!
|
125 | //! Content of attributes and text / CDATA content of elements (including mixed
|
126 | //! text and CDATA content), which represents a space-delimited lists, as
|
127 | //! specified in the XML Schema specification for [`xs:list`] `simpleType`:
|
128 | //!
|
129 | //! ```xml
|
130 | //! <... ...="element1 element2 ..." />
|
131 | //! ```
|
132 | //! ```xml
|
133 | //! <...>
|
134 | //! element1
|
135 | //! element2
|
136 | //! ...
|
137 | //! </...>
|
138 | //! ```
|
139 | //! ```xml
|
140 | //! <...><![CDATA[
|
141 | //! element1
|
142 | //! element2
|
143 | //! ...
|
144 | //! ]]></...>
|
145 | //! ```
|
146 | //!
|
147 | //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
|
148 | //! </td>
|
149 | //! <td>
|
150 | //!
|
151 | //! Use any type that deserialized using [`deserialize_seq()`] call, for example:
|
152 | //!
|
153 | //! ```
|
154 | //! type List = Vec<u32>;
|
155 | //! ```
|
156 | //!
|
157 | //! See the next row to learn where in your struct definition you should
|
158 | //! use that type.
|
159 | //!
|
160 | //! According to the XML Schema specification, delimiters for elements is one
|
161 | //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
|
162 | //!
|
163 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
164 | //!
|
165 | //! NOTE: according to the XML Schema restrictions, you cannot escape those
|
166 | //! white-space characters, so list elements will _never_ contain them.
|
167 | //! In practice you will usually use `xs:list`s for lists of numbers or enumerated
|
168 | //! values which looks like identifiers in many languages, for example, `item`,
|
169 | //! `some_item` or `some-item`, so that shouldn't be a problem.
|
170 | //!
|
171 | //! NOTE: according to the XML Schema specification, list elements can be
|
172 | //! delimited only by spaces. Other delimiters (for example, commas) are not
|
173 | //! allowed.
|
174 | //!
|
175 | //! </div>
|
176 | //!
|
177 | //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
|
178 | //! </td>
|
179 | //! </tr>
|
180 | //! <!-- 3 ===================================================================================== -->
|
181 | //! <tr>
|
182 | //! <td>
|
183 | //! A typical XML with attributes. The root tag name does not matter:
|
184 | //!
|
185 | //! ```xml
|
186 | //! <any-tag one="..." two="..."/>
|
187 | //! ```
|
188 | //! </td>
|
189 | //! <td>
|
190 | //!
|
191 | //! A structure where each XML attribute is mapped to a field with a name
|
192 | //! starting with `@`. Because Rust identifiers do not permit the `@` character,
|
193 | //! you should use the `#[serde(rename = "@...")]` attribute to rename it.
|
194 | //! The name of the struct itself does not matter:
|
195 | //!
|
196 | //! ```
|
197 | //! # use serde::Deserialize;
|
198 | //! # type T = ();
|
199 | //! # type U = ();
|
200 | //! // Get both attributes
|
201 | //! # #[derive(Debug, PartialEq)]
|
202 | //! #[derive(Deserialize)]
|
203 | //! struct AnyName {
|
204 | //! #[serde(rename = "@one" )]
|
205 | //! one: T,
|
206 | //!
|
207 | //! #[serde(rename = "@two" )]
|
208 | //! two: U,
|
209 | //! }
|
210 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
211 | //! ```
|
212 | //! ```
|
213 | //! # use serde::Deserialize;
|
214 | //! # type T = ();
|
215 | //! // Get only the one attribute, ignore the other
|
216 | //! # #[derive(Debug, PartialEq)]
|
217 | //! #[derive(Deserialize)]
|
218 | //! struct AnyName {
|
219 | //! #[serde(rename = "@one" )]
|
220 | //! one: T,
|
221 | //! }
|
222 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
223 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"# ).unwrap();
|
224 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
225 | //! ```
|
226 | //! ```
|
227 | //! # use serde::Deserialize;
|
228 | //! // Ignore all attributes
|
229 | //! // You can also use the `()` type (unit type)
|
230 | //! # #[derive(Debug, PartialEq)]
|
231 | //! #[derive(Deserialize)]
|
232 | //! struct AnyName;
|
233 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
234 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
235 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
236 | //! ```
|
237 | //!
|
238 | //! All these structs can be used to deserialize from an XML on the
|
239 | //! left side depending on amount of information that you want to get.
|
240 | //! Of course, you can combine them with elements extractor structs (see below).
|
241 | //!
|
242 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
243 | //!
|
244 | //! NOTE: XML allows you to have an attribute and an element with the same name
|
245 | //! inside the one element. quick-xml deals with that by prepending a `@` prefix
|
246 | //! to the name of attributes.
|
247 | //! </div>
|
248 | //! </td>
|
249 | //! </tr>
|
250 | //! <!-- 4 ===================================================================================== -->
|
251 | //! <tr>
|
252 | //! <td>
|
253 | //! A typical XML with child elements. The root tag name does not matter:
|
254 | //!
|
255 | //! ```xml
|
256 | //! <any-tag>
|
257 | //! <one>...</one>
|
258 | //! <two>...</two>
|
259 | //! </any-tag>
|
260 | //! ```
|
261 | //! </td>
|
262 | //! <td>
|
263 | //! A structure where each XML child element is mapped to the field.
|
264 | //! Each element name becomes a name of field. The name of the struct itself
|
265 | //! does not matter:
|
266 | //!
|
267 | //! ```
|
268 | //! # use serde::Deserialize;
|
269 | //! # type T = ();
|
270 | //! # type U = ();
|
271 | //! // Get both elements
|
272 | //! # #[derive(Debug, PartialEq)]
|
273 | //! #[derive(Deserialize)]
|
274 | //! struct AnyName {
|
275 | //! one: T,
|
276 | //! two: U,
|
277 | //! }
|
278 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
279 | //! #
|
280 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap_err();
|
281 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"# ).unwrap_err();
|
282 | //! ```
|
283 | //! ```
|
284 | //! # use serde::Deserialize;
|
285 | //! # type T = ();
|
286 | //! // Get only the one element, ignore the other
|
287 | //! # #[derive(Debug, PartialEq)]
|
288 | //! #[derive(Deserialize)]
|
289 | //! struct AnyName {
|
290 | //! one: T,
|
291 | //! }
|
292 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
293 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
294 | //! ```
|
295 | //! ```
|
296 | //! # use serde::Deserialize;
|
297 | //! // Ignore all elements
|
298 | //! // You can also use the `()` type (unit type)
|
299 | //! # #[derive(Debug, PartialEq)]
|
300 | //! #[derive(Deserialize)]
|
301 | //! struct AnyName;
|
302 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
303 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
304 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"# ).unwrap();
|
305 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
306 | //! ```
|
307 | //!
|
308 | //! All these structs can be used to deserialize from an XML on the
|
309 | //! left side depending on amount of information that you want to get.
|
310 | //! Of course, you can combine them with attributes extractor structs (see above).
|
311 | //!
|
312 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
313 | //!
|
314 | //! NOTE: XML allows you to have an attribute and an element with the same name
|
315 | //! inside the one element. quick-xml deals with that by prepending a `@` prefix
|
316 | //! to the name of attributes.
|
317 | //! </div>
|
318 | //! </td>
|
319 | //! </tr>
|
320 | //! <!-- 5 ===================================================================================== -->
|
321 | //! <tr>
|
322 | //! <td>
|
323 | //! An XML with an attribute and a child element named equally:
|
324 | //!
|
325 | //! ```xml
|
326 | //! <any-tag field="...">
|
327 | //! <field>...</field>
|
328 | //! </any-tag>
|
329 | //! ```
|
330 | //! </td>
|
331 | //! <td>
|
332 | //!
|
333 | //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
|
334 | //! for an attribute:
|
335 | //!
|
336 | //! ```
|
337 | //! # use pretty_assertions::assert_eq;
|
338 | //! # use serde::Deserialize;
|
339 | //! # type T = ();
|
340 | //! # type U = ();
|
341 | //! # #[derive(Debug, PartialEq)]
|
342 | //! #[derive(Deserialize)]
|
343 | //! struct AnyName {
|
344 | //! #[serde(rename = "@field" )]
|
345 | //! attribute: T,
|
346 | //! field: U,
|
347 | //! }
|
348 | //! # assert_eq!(
|
349 | //! # AnyName { attribute: (), field: () },
|
350 | //! # quick_xml::de::from_str(r#"
|
351 | //! # <any-tag field="...">
|
352 | //! # <field>...</field>
|
353 | //! # </any-tag>
|
354 | //! # "# ).unwrap(),
|
355 | //! # );
|
356 | //! ```
|
357 | //! </td>
|
358 | //! </tr>
|
359 | //! <!-- ======================================================================================= -->
|
360 | //! <tr><th colspan="2">
|
361 | //!
|
362 | //! ## Optional attributes and elements
|
363 | //!
|
364 | //! </th></tr>
|
365 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
366 | //! <!-- 6 ===================================================================================== -->
|
367 | //! <tr>
|
368 | //! <td>
|
369 | //! An optional XML attribute that you want to capture.
|
370 | //! The root tag name does not matter:
|
371 | //!
|
372 | //! ```xml
|
373 | //! <any-tag optional="..."/>
|
374 | //! ```
|
375 | //! ```xml
|
376 | //! <any-tag/>
|
377 | //! ```
|
378 | //! </td>
|
379 | //! <td>
|
380 | //!
|
381 | //! A structure with an optional field, renamed according to the requirements
|
382 | //! for attributes:
|
383 | //!
|
384 | //! ```
|
385 | //! # use pretty_assertions::assert_eq;
|
386 | //! # use serde::Deserialize;
|
387 | //! # type T = ();
|
388 | //! # #[derive(Debug, PartialEq)]
|
389 | //! #[derive(Deserialize)]
|
390 | //! struct AnyName {
|
391 | //! #[serde(rename = "@optional" )]
|
392 | //! optional: Option<T>,
|
393 | //! }
|
394 | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"# ).unwrap());
|
395 | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap());
|
396 | //! ```
|
397 | //! When the XML attribute is present, type `T` will be deserialized from
|
398 | //! an attribute value (which is a string). Note, that if `T = String` or other
|
399 | //! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
|
400 | //! represents the missed attribute:
|
401 | //! ```xml
|
402 | //! <any-tag optional="..."/><!-- Some("...") -->
|
403 | //! <any-tag optional=""/> <!-- Some("") -->
|
404 | //! <any-tag/> <!-- None -->
|
405 | //! ```
|
406 | //! </td>
|
407 | //! </tr>
|
408 | //! <!-- 7 ===================================================================================== -->
|
409 | //! <tr>
|
410 | //! <td>
|
411 | //! An optional XML elements that you want to capture.
|
412 | //! The root tag name does not matter:
|
413 | //!
|
414 | //! ```xml
|
415 | //! <any-tag/>
|
416 | //! <optional>...</optional>
|
417 | //! </any-tag>
|
418 | //! ```
|
419 | //! ```xml
|
420 | //! <any-tag/>
|
421 | //! <optional/>
|
422 | //! </any-tag>
|
423 | //! ```
|
424 | //! ```xml
|
425 | //! <any-tag/>
|
426 | //! ```
|
427 | //! </td>
|
428 | //! <td>
|
429 | //!
|
430 | //! A structure with an optional field:
|
431 | //!
|
432 | //! ```
|
433 | //! # use pretty_assertions::assert_eq;
|
434 | //! # use serde::Deserialize;
|
435 | //! # type T = ();
|
436 | //! # #[derive(Debug, PartialEq)]
|
437 | //! #[derive(Deserialize)]
|
438 | //! struct AnyName {
|
439 | //! optional: Option<T>,
|
440 | //! }
|
441 | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"# ).unwrap());
|
442 | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap());
|
443 | //! ```
|
444 | //! When the XML element is present, type `T` will be deserialized from an
|
445 | //! element (which is a string or a multi-mapping -- i.e. mapping which can have
|
446 | //! duplicated keys).
|
447 | //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
|
448 | //!
|
449 | //! Currently some edge cases exists described in the issue [#497].
|
450 | //! </div>
|
451 | //! </td>
|
452 | //! </tr>
|
453 | //! <!-- ======================================================================================= -->
|
454 | //! <tr><th colspan="2">
|
455 | //!
|
456 | //! ## Choices (`xs:choice` XML Schema type)
|
457 | //!
|
458 | //! </th></tr>
|
459 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
460 | //! <!-- 8 ===================================================================================== -->
|
461 | //! <tr>
|
462 | //! <td>
|
463 | //! An XML with different root tag names, as well as text / CDATA content:
|
464 | //!
|
465 | //! ```xml
|
466 | //! <one field1="...">...</one>
|
467 | //! ```
|
468 | //! ```xml
|
469 | //! <two>
|
470 | //! <field2>...</field2>
|
471 | //! </two>
|
472 | //! ```
|
473 | //! ```xml
|
474 | //! Text <![CDATA[or (mixed)
|
475 | //! CDATA]]> content
|
476 | //! ```
|
477 | //! </td>
|
478 | //! <td>
|
479 | //!
|
480 | //! An enum where each variant has the name of a possible root tag. The name of
|
481 | //! the enum itself does not matter.
|
482 | //!
|
483 | //! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
|
484 | //!
|
485 | //! All these structs can be used to deserialize from any XML on the
|
486 | //! left side depending on amount of information that you want to get:
|
487 | //!
|
488 | //! ```
|
489 | //! # use pretty_assertions::assert_eq;
|
490 | //! # use serde::Deserialize;
|
491 | //! # type T = ();
|
492 | //! # type U = ();
|
493 | //! # #[derive(Debug, PartialEq)]
|
494 | //! #[derive(Deserialize)]
|
495 | //! #[serde(rename_all = "snake_case" )]
|
496 | //! enum AnyName {
|
497 | //! One { #[serde(rename = "@field1" )] field1: T },
|
498 | //! Two { field2: U },
|
499 | //!
|
500 | //! /// Use unit variant, if you do not care of a content.
|
501 | //! /// You can use tuple variant if you want to parse
|
502 | //! /// textual content as an xs:list.
|
503 | //! /// Struct variants are not supported and will return
|
504 | //! /// Err(Unsupported)
|
505 | //! #[serde(rename = "$text" )]
|
506 | //! Text(String),
|
507 | //! }
|
508 | //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
509 | //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
510 | //! # assert_eq!(AnyName::Text("text cdata " .into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
511 | //! ```
|
512 | //! ```
|
513 | //! # use pretty_assertions::assert_eq;
|
514 | //! # use serde::Deserialize;
|
515 | //! # type T = ();
|
516 | //! # #[derive(Debug, PartialEq)]
|
517 | //! #[derive(Deserialize)]
|
518 | //! struct Two {
|
519 | //! field2: T,
|
520 | //! }
|
521 | //! # #[derive(Debug, PartialEq)]
|
522 | //! #[derive(Deserialize)]
|
523 | //! #[serde(rename_all = "snake_case" )]
|
524 | //! enum AnyName {
|
525 | //! // `field1` content discarded
|
526 | //! One,
|
527 | //! Two(Two),
|
528 | //! #[serde(rename = "$text" )]
|
529 | //! Text,
|
530 | //! }
|
531 | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
532 | //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
533 | //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
534 | //! ```
|
535 | //! ```
|
536 | //! # use pretty_assertions::assert_eq;
|
537 | //! # use serde::Deserialize;
|
538 | //! # #[derive(Debug, PartialEq)]
|
539 | //! #[derive(Deserialize)]
|
540 | //! #[serde(rename_all = "snake_case" )]
|
541 | //! enum AnyName {
|
542 | //! One,
|
543 | //! // the <two> and textual content will be mapped to this
|
544 | //! #[serde(other)]
|
545 | //! Other,
|
546 | //! }
|
547 | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
548 | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
549 | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
550 | //! ```
|
551 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
552 | //!
|
553 | //! NOTE: You should have variants for all possible tag names in your enum
|
554 | //! or have an `#[serde(other)]` variant.
|
555 | //! <!-- TODO: document an error type if that requirement is violated -->
|
556 | //! </div>
|
557 | //! </td>
|
558 | //! </tr>
|
559 | //! <!-- 9 ===================================================================================== -->
|
560 | //! <tr>
|
561 | //! <td>
|
562 | //!
|
563 | //! `<xs:choice>` embedded in the other element, and at the same time you want
|
564 | //! to get access to other attributes that can appear in the same container
|
565 | //! (`<any-tag>`). Also this case can be described, as if you want to choose
|
566 | //! Rust enum variant based on a tag name:
|
567 | //!
|
568 | //! ```xml
|
569 | //! <any-tag field="...">
|
570 | //! <one>...</one>
|
571 | //! </any-tag>
|
572 | //! ```
|
573 | //! ```xml
|
574 | //! <any-tag field="...">
|
575 | //! <two>...</two>
|
576 | //! </any-tag>
|
577 | //! ```
|
578 | //! ```xml
|
579 | //! <any-tag field="...">
|
580 | //! Text <![CDATA[or (mixed)
|
581 | //! CDATA]]> content
|
582 | //! </any-tag>
|
583 | //! ```
|
584 | //! </td>
|
585 | //! <td>
|
586 | //!
|
587 | //! A structure with a field which type is an `enum`.
|
588 | //!
|
589 | //! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
|
590 | //!
|
591 | //! Names of the enum, struct, and struct field with `Choice` type does not matter:
|
592 | //!
|
593 | //! ```
|
594 | //! # use pretty_assertions::assert_eq;
|
595 | //! # use serde::Deserialize;
|
596 | //! # type T = ();
|
597 | //! # #[derive(Debug, PartialEq)]
|
598 | //! #[derive(Deserialize)]
|
599 | //! #[serde(rename_all = "snake_case" )]
|
600 | //! enum Choice {
|
601 | //! One,
|
602 | //! Two,
|
603 | //!
|
604 | //! /// Use unit variant, if you do not care of a content.
|
605 | //! /// You can use tuple variant if you want to parse
|
606 | //! /// textual content as an xs:list.
|
607 | //! /// Struct variants are not supported and will return
|
608 | //! /// Err(Unsupported)
|
609 | //! #[serde(rename = "$text" )]
|
610 | //! Text(String),
|
611 | //! }
|
612 | //! # #[derive(Debug, PartialEq)]
|
613 | //! #[derive(Deserialize)]
|
614 | //! struct AnyName {
|
615 | //! #[serde(rename = "@field" )]
|
616 | //! field: T,
|
617 | //!
|
618 | //! #[serde(rename = "$value" )]
|
619 | //! any_name: Choice,
|
620 | //! }
|
621 | //! # assert_eq!(
|
622 | //! # AnyName { field: (), any_name: Choice::One },
|
623 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"# ).unwrap(),
|
624 | //! # );
|
625 | //! # assert_eq!(
|
626 | //! # AnyName { field: (), any_name: Choice::Two },
|
627 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"# ).unwrap(),
|
628 | //! # );
|
629 | //! # assert_eq!(
|
630 | //! # AnyName { field: (), any_name: Choice::Text("text cdata " .into()) },
|
631 | //! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"# ).unwrap(),
|
632 | //! # );
|
633 | //! ```
|
634 | //! </td>
|
635 | //! </tr>
|
636 | //! <!-- 10 ==================================================================================== -->
|
637 | //! <tr>
|
638 | //! <td>
|
639 | //!
|
640 | //! `<xs:choice>` embedded in the other element, and at the same time you want
|
641 | //! to get access to other elements that can appear in the same container
|
642 | //! (`<any-tag>`). Also this case can be described, as if you want to choose
|
643 | //! Rust enum variant based on a tag name:
|
644 | //!
|
645 | //! ```xml
|
646 | //! <any-tag>
|
647 | //! <field>...</field>
|
648 | //! <one>...</one>
|
649 | //! </any-tag>
|
650 | //! ```
|
651 | //! ```xml
|
652 | //! <any-tag>
|
653 | //! <two>...</two>
|
654 | //! <field>...</field>
|
655 | //! </any-tag>
|
656 | //! ```
|
657 | //! </td>
|
658 | //! <td>
|
659 | //!
|
660 | //! A structure with a field which type is an `enum`.
|
661 | //!
|
662 | //! Names of the enum, struct, and struct field with `Choice` type does not matter:
|
663 | //!
|
664 | //! ```
|
665 | //! # use pretty_assertions::assert_eq;
|
666 | //! # use serde::Deserialize;
|
667 | //! # type T = ();
|
668 | //! # #[derive(Debug, PartialEq)]
|
669 | //! #[derive(Deserialize)]
|
670 | //! #[serde(rename_all = "snake_case" )]
|
671 | //! enum Choice {
|
672 | //! One,
|
673 | //! Two,
|
674 | //! }
|
675 | //! # #[derive(Debug, PartialEq)]
|
676 | //! #[derive(Deserialize)]
|
677 | //! struct AnyName {
|
678 | //! field: T,
|
679 | //!
|
680 | //! #[serde(rename = "$value" )]
|
681 | //! any_name: Choice,
|
682 | //! }
|
683 | //! # assert_eq!(
|
684 | //! # AnyName { field: (), any_name: Choice::One },
|
685 | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"# ).unwrap(),
|
686 | //! # );
|
687 | //! # assert_eq!(
|
688 | //! # AnyName { field: (), any_name: Choice::Two },
|
689 | //! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"# ).unwrap(),
|
690 | //! # );
|
691 | //! ```
|
692 | //!
|
693 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
694 | //!
|
695 | //! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
|
696 | //! variant, element `<field>` will be mapped to the `field` and not to the enum
|
697 | //! variant.
|
698 | //! </div>
|
699 | //!
|
700 | //! </td>
|
701 | //! </tr>
|
702 | //! <!-- 11 ==================================================================================== -->
|
703 | //! <tr>
|
704 | //! <td>
|
705 | //!
|
706 | //! `<xs:choice>` encapsulated in other element with a fixed name:
|
707 | //!
|
708 | //! ```xml
|
709 | //! <any-tag field="...">
|
710 | //! <choice>
|
711 | //! <one>...</one>
|
712 | //! </choice>
|
713 | //! </any-tag>
|
714 | //! ```
|
715 | //! ```xml
|
716 | //! <any-tag field="...">
|
717 | //! <choice>
|
718 | //! <two>...</two>
|
719 | //! </choice>
|
720 | //! </any-tag>
|
721 | //! ```
|
722 | //! </td>
|
723 | //! <td>
|
724 | //!
|
725 | //! A structure with a field of an intermediate type with one field of `enum` type.
|
726 | //! Actually, this example is not necessary, because you can construct it by yourself
|
727 | //! using the composition rules that were described above. However the XML construction
|
728 | //! described here is very common, so it is shown explicitly.
|
729 | //!
|
730 | //! Names of the enum and struct does not matter:
|
731 | //!
|
732 | //! ```
|
733 | //! # use pretty_assertions::assert_eq;
|
734 | //! # use serde::Deserialize;
|
735 | //! # type T = ();
|
736 | //! # #[derive(Debug, PartialEq)]
|
737 | //! #[derive(Deserialize)]
|
738 | //! #[serde(rename_all = "snake_case" )]
|
739 | //! enum Choice {
|
740 | //! One,
|
741 | //! Two,
|
742 | //! }
|
743 | //! # #[derive(Debug, PartialEq)]
|
744 | //! #[derive(Deserialize)]
|
745 | //! struct Holder {
|
746 | //! #[serde(rename = "$value" )]
|
747 | //! any_name: Choice,
|
748 | //! }
|
749 | //! # #[derive(Debug, PartialEq)]
|
750 | //! #[derive(Deserialize)]
|
751 | //! struct AnyName {
|
752 | //! #[serde(rename = "@field" )]
|
753 | //! field: T,
|
754 | //!
|
755 | //! choice: Holder,
|
756 | //! }
|
757 | //! # assert_eq!(
|
758 | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
|
759 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"# ).unwrap(),
|
760 | //! # );
|
761 | //! # assert_eq!(
|
762 | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
|
763 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"# ).unwrap(),
|
764 | //! # );
|
765 | //! ```
|
766 | //! </td>
|
767 | //! </tr>
|
768 | //! <!-- 12 ==================================================================================== -->
|
769 | //! <tr>
|
770 | //! <td>
|
771 | //!
|
772 | //! `<xs:choice>` encapsulated in other element with a fixed name:
|
773 | //!
|
774 | //! ```xml
|
775 | //! <any-tag>
|
776 | //! <field>...</field>
|
777 | //! <choice>
|
778 | //! <one>...</one>
|
779 | //! </choice>
|
780 | //! </any-tag>
|
781 | //! ```
|
782 | //! ```xml
|
783 | //! <any-tag>
|
784 | //! <choice>
|
785 | //! <two>...</two>
|
786 | //! </choice>
|
787 | //! <field>...</field>
|
788 | //! </any-tag>
|
789 | //! ```
|
790 | //! </td>
|
791 | //! <td>
|
792 | //!
|
793 | //! A structure with a field of an intermediate type with one field of `enum` type.
|
794 | //! Actually, this example is not necessary, because you can construct it by yourself
|
795 | //! using the composition rules that were described above. However the XML construction
|
796 | //! described here is very common, so it is shown explicitly.
|
797 | //!
|
798 | //! Names of the enum and struct does not matter:
|
799 | //!
|
800 | //! ```
|
801 | //! # use pretty_assertions::assert_eq;
|
802 | //! # use serde::Deserialize;
|
803 | //! # type T = ();
|
804 | //! # #[derive(Debug, PartialEq)]
|
805 | //! #[derive(Deserialize)]
|
806 | //! #[serde(rename_all = "snake_case" )]
|
807 | //! enum Choice {
|
808 | //! One,
|
809 | //! Two,
|
810 | //! }
|
811 | //! # #[derive(Debug, PartialEq)]
|
812 | //! #[derive(Deserialize)]
|
813 | //! struct Holder {
|
814 | //! #[serde(rename = "$value" )]
|
815 | //! any_name: Choice,
|
816 | //! }
|
817 | //! # #[derive(Debug, PartialEq)]
|
818 | //! #[derive(Deserialize)]
|
819 | //! struct AnyName {
|
820 | //! field: T,
|
821 | //!
|
822 | //! choice: Holder,
|
823 | //! }
|
824 | //! # assert_eq!(
|
825 | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
|
826 | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"# ).unwrap(),
|
827 | //! # );
|
828 | //! # assert_eq!(
|
829 | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
|
830 | //! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"# ).unwrap(),
|
831 | //! # );
|
832 | //! ```
|
833 | //! </td>
|
834 | //! </tr>
|
835 | //! <!-- ======================================================================================== -->
|
836 | //! <tr><th colspan="2">
|
837 | //!
|
838 | //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
|
839 | //!
|
840 | //! </th></tr>
|
841 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
842 | //! <!-- 13 ==================================================================================== -->
|
843 | //! <tr>
|
844 | //! <td>
|
845 | //! A sequence inside of a tag without a dedicated name:
|
846 | //!
|
847 | //! ```xml
|
848 | //! <any-tag/>
|
849 | //! ```
|
850 | //! ```xml
|
851 | //! <any-tag>
|
852 | //! <item/>
|
853 | //! </any-tag>
|
854 | //! ```
|
855 | //! ```xml
|
856 | //! <any-tag>
|
857 | //! <item/>
|
858 | //! <item/>
|
859 | //! <item/>
|
860 | //! </any-tag>
|
861 | //! ```
|
862 | //! </td>
|
863 | //! <td>
|
864 | //!
|
865 | //! A structure with a field which is a sequence type, for example, [`Vec`].
|
866 | //! Because XML syntax does not distinguish between empty sequences and missed
|
867 | //! elements, we should indicate that on the Rust side, because serde will require
|
868 | //! that field `item` exists. You can do that in two possible ways:
|
869 | //!
|
870 | //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
|
871 | //! ```
|
872 | //! # use pretty_assertions::assert_eq;
|
873 | //! # use serde::Deserialize;
|
874 | //! # type Item = ();
|
875 | //! # #[derive(Debug, PartialEq)]
|
876 | //! #[derive(Deserialize)]
|
877 | //! struct AnyName {
|
878 | //! #[serde(default)]
|
879 | //! item: Vec<Item>,
|
880 | //! }
|
881 | //! # assert_eq!(
|
882 | //! # AnyName { item: vec![] },
|
883 | //! # quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap(),
|
884 | //! # );
|
885 | //! # assert_eq!(
|
886 | //! # AnyName { item: vec![()] },
|
887 | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"# ).unwrap(),
|
888 | //! # );
|
889 | //! # assert_eq!(
|
890 | //! # AnyName { item: vec![(), (), ()] },
|
891 | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"# ).unwrap(),
|
892 | //! # );
|
893 | //! ```
|
894 | //!
|
895 | //! Use the [`Option`]. In that case inner array will always contains at least one
|
896 | //! element after deserialization:
|
897 | //! ```ignore
|
898 | //! # use pretty_assertions::assert_eq;
|
899 | //! # use serde::Deserialize;
|
900 | //! # type Item = ();
|
901 | //! # #[derive(Debug, PartialEq)]
|
902 | //! #[derive(Deserialize)]
|
903 | //! struct AnyName {
|
904 | //! item: Option<Vec<Item>>,
|
905 | //! }
|
906 | //! # assert_eq!(
|
907 | //! # AnyName { item: None },
|
908 | //! # quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap(),
|
909 | //! # );
|
910 | //! # assert_eq!(
|
911 | //! # AnyName { item: Some(vec![()]) },
|
912 | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"# ).unwrap(),
|
913 | //! # );
|
914 | //! # assert_eq!(
|
915 | //! # AnyName { item: Some(vec![(), (), ()]) },
|
916 | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"# ).unwrap(),
|
917 | //! # );
|
918 | //! ```
|
919 | //!
|
920 | //! See also [Frequently Used Patterns](#element-lists).
|
921 | //!
|
922 | //! [field]: https://serde.rs/field-attrs.html#default
|
923 | //! [struct]: https://serde.rs/container-attrs.html#default
|
924 | //! </td>
|
925 | //! </tr>
|
926 | //! <!-- 14 ==================================================================================== -->
|
927 | //! <tr>
|
928 | //! <td>
|
929 | //! A sequence with a strict order, probably with mixed content
|
930 | //! (text / CDATA and tags):
|
931 | //!
|
932 | //! ```xml
|
933 | //! <one>...</one>
|
934 | //! text
|
935 | //! <![CDATA[cdata]]>
|
936 | //! <two>...</two>
|
937 | //! <one>...</one>
|
938 | //! ```
|
939 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
940 | //!
|
941 | //! NOTE: this is just an example for showing mapping. XML does not allow
|
942 | //! multiple root tags -- you should wrap the sequence into a tag.
|
943 | //! </div>
|
944 | //! </td>
|
945 | //! <td>
|
946 | //!
|
947 | //! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
|
948 | //! Each element of the tuple should be able to be deserialized from the nested
|
949 | //! element content (`...`), except the enum types which would be deserialized
|
950 | //! from the full element (`<one>...</one>`), so they could use the element name
|
951 | //! to choose the right variant:
|
952 | //!
|
953 | //! ```
|
954 | //! # use pretty_assertions::assert_eq;
|
955 | //! # use serde::Deserialize;
|
956 | //! # type One = ();
|
957 | //! # type Two = ();
|
958 | //! # /*
|
959 | //! type One = ...;
|
960 | //! type Two = ...;
|
961 | //! # */
|
962 | //! # #[derive(Debug, PartialEq)]
|
963 | //! #[derive(Deserialize)]
|
964 | //! struct AnyName(One, String, Two, One);
|
965 | //! # assert_eq!(
|
966 | //! # AnyName((), "text cdata" .into(), (), ()),
|
967 | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
968 | //! # );
|
969 | //! ```
|
970 | //! ```
|
971 | //! # use pretty_assertions::assert_eq;
|
972 | //! # use serde::Deserialize;
|
973 | //! # #[derive(Debug, PartialEq)]
|
974 | //! #[derive(Deserialize)]
|
975 | //! #[serde(rename_all = "snake_case" )]
|
976 | //! enum Choice {
|
977 | //! One,
|
978 | //! }
|
979 | //! # type Two = ();
|
980 | //! # /*
|
981 | //! type Two = ...;
|
982 | //! # */
|
983 | //! type AnyName = (Choice, String, Two, Choice);
|
984 | //! # assert_eq!(
|
985 | //! # (Choice::One, "text cdata" .to_string(), (), Choice::One),
|
986 | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
987 | //! # );
|
988 | //! ```
|
989 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
990 | //!
|
991 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
992 | //! so you cannot have two adjacent string types in your sequence.
|
993 | //! </div>
|
994 | //! </td>
|
995 | //! </tr>
|
996 | //! <!-- 15 ==================================================================================== -->
|
997 | //! <tr>
|
998 | //! <td>
|
999 | //! A sequence with a non-strict order, probably with a mixed content
|
1000 | //! (text / CDATA and tags).
|
1001 | //!
|
1002 | //! ```xml
|
1003 | //! <one>...</one>
|
1004 | //! text
|
1005 | //! <![CDATA[cdata]]>
|
1006 | //! <two>...</two>
|
1007 | //! <one>...</one>
|
1008 | //! ```
|
1009 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
1010 | //!
|
1011 | //! NOTE: this is just an example for showing mapping. XML does not allow
|
1012 | //! multiple root tags -- you should wrap the sequence into a tag.
|
1013 | //! </div>
|
1014 | //! </td>
|
1015 | //! <td>
|
1016 | //! A homogeneous sequence of elements with a fixed or dynamic size:
|
1017 | //!
|
1018 | //! ```
|
1019 | //! # use pretty_assertions::assert_eq;
|
1020 | //! # use serde::Deserialize;
|
1021 | //! # #[derive(Debug, PartialEq)]
|
1022 | //! #[derive(Deserialize)]
|
1023 | //! #[serde(rename_all = "snake_case" )]
|
1024 | //! enum Choice {
|
1025 | //! One,
|
1026 | //! Two,
|
1027 | //! #[serde(other)]
|
1028 | //! Other,
|
1029 | //! }
|
1030 | //! type AnyName = [Choice; 4];
|
1031 | //! # assert_eq!(
|
1032 | //! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
|
1033 | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
1034 | //! # );
|
1035 | //! ```
|
1036 | //! ```
|
1037 | //! # use pretty_assertions::assert_eq;
|
1038 | //! # use serde::Deserialize;
|
1039 | //! # #[derive(Debug, PartialEq)]
|
1040 | //! #[derive(Deserialize)]
|
1041 | //! #[serde(rename_all = "snake_case" )]
|
1042 | //! enum Choice {
|
1043 | //! One,
|
1044 | //! Two,
|
1045 | //! #[serde(rename = "$text" )]
|
1046 | //! Other(String),
|
1047 | //! }
|
1048 | //! type AnyName = Vec<Choice>;
|
1049 | //! # assert_eq!(
|
1050 | //! # vec![
|
1051 | //! # Choice::One,
|
1052 | //! # Choice::Other("text cdata" .into()),
|
1053 | //! # Choice::Two,
|
1054 | //! # Choice::One,
|
1055 | //! # ],
|
1056 | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
1057 | //! # );
|
1058 | //! ```
|
1059 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
1060 | //!
|
1061 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
1062 | //! so you cannot have two adjacent string types in your sequence.
|
1063 | //! </div>
|
1064 | //! </td>
|
1065 | //! </tr>
|
1066 | //! <!-- 16 ==================================================================================== -->
|
1067 | //! <tr>
|
1068 | //! <td>
|
1069 | //! A sequence with a strict order, probably with a mixed content,
|
1070 | //! (text and tags) inside of the other element:
|
1071 | //!
|
1072 | //! ```xml
|
1073 | //! <any-tag attribute="...">
|
1074 | //! <one>...</one>
|
1075 | //! text
|
1076 | //! <![CDATA[cdata]]>
|
1077 | //! <two>...</two>
|
1078 | //! <one>...</one>
|
1079 | //! </any-tag>
|
1080 | //! ```
|
1081 | //! </td>
|
1082 | //! <td>
|
1083 | //!
|
1084 | //! A structure where all child elements mapped to the one field which have
|
1085 | //! a heterogeneous sequential type: tuple or named tuple. Each element of the
|
1086 | //! tuple should be able to be deserialized from the full element (`<one>...</one>`).
|
1087 | //!
|
1088 | //! You MUST specify `#[serde(rename = "$value")]` on that field:
|
1089 | //!
|
1090 | //! ```
|
1091 | //! # use pretty_assertions::assert_eq;
|
1092 | //! # use serde::Deserialize;
|
1093 | //! # type One = ();
|
1094 | //! # type Two = ();
|
1095 | //! # /*
|
1096 | //! type One = ...;
|
1097 | //! type Two = ...;
|
1098 | //! # */
|
1099 | //!
|
1100 | //! # #[derive(Debug, PartialEq)]
|
1101 | //! #[derive(Deserialize)]
|
1102 | //! struct AnyName {
|
1103 | //! #[serde(rename = "@attribute" )]
|
1104 | //! # attribute: (),
|
1105 | //! # /*
|
1106 | //! attribute: ...,
|
1107 | //! # */
|
1108 | //! // Does not (yet?) supported by the serde
|
1109 | //! // https://github.com/serde-rs/serde/issues/1905
|
1110 | //! // #[serde(flatten)]
|
1111 | //! #[serde(rename = "$value" )]
|
1112 | //! any_name: (One, String, Two, One),
|
1113 | //! }
|
1114 | //! # assert_eq!(
|
1115 | //! # AnyName { attribute: (), any_name: ((), "text cdata" .into(), (), ()) },
|
1116 | //! # quick_xml::de::from_str(" \
|
1117 | //! # <any-tag attribute='...'> \
|
1118 | //! # <one>...</one> \
|
1119 | //! # text \
|
1120 | //! # <![CDATA[cdata]]> \
|
1121 | //! # <two>...</two> \
|
1122 | //! # <one>...</one> \
|
1123 | //! # </any-tag>"
|
1124 | //! # ).unwrap(),
|
1125 | //! # );
|
1126 | //! ```
|
1127 | //! ```
|
1128 | //! # use pretty_assertions::assert_eq;
|
1129 | //! # use serde::Deserialize;
|
1130 | //! # type One = ();
|
1131 | //! # type Two = ();
|
1132 | //! # /*
|
1133 | //! type One = ...;
|
1134 | //! type Two = ...;
|
1135 | //! # */
|
1136 | //!
|
1137 | //! # #[derive(Debug, PartialEq)]
|
1138 | //! #[derive(Deserialize)]
|
1139 | //! struct NamedTuple(One, String, Two, One);
|
1140 | //!
|
1141 | //! # #[derive(Debug, PartialEq)]
|
1142 | //! #[derive(Deserialize)]
|
1143 | //! struct AnyName {
|
1144 | //! #[serde(rename = "@attribute" )]
|
1145 | //! # attribute: (),
|
1146 | //! # /*
|
1147 | //! attribute: ...,
|
1148 | //! # */
|
1149 | //! // Does not (yet?) supported by the serde
|
1150 | //! // https://github.com/serde-rs/serde/issues/1905
|
1151 | //! // #[serde(flatten)]
|
1152 | //! #[serde(rename = "$value" )]
|
1153 | //! any_name: NamedTuple,
|
1154 | //! }
|
1155 | //! # assert_eq!(
|
1156 | //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata" .into(), (), ()) },
|
1157 | //! # quick_xml::de::from_str(" \
|
1158 | //! # <any-tag attribute='...'> \
|
1159 | //! # <one>...</one> \
|
1160 | //! # text \
|
1161 | //! # <![CDATA[cdata]]> \
|
1162 | //! # <two>...</two> \
|
1163 | //! # <one>...</one> \
|
1164 | //! # </any-tag>"
|
1165 | //! # ).unwrap(),
|
1166 | //! # );
|
1167 | //! ```
|
1168 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
1169 | //!
|
1170 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
1171 | //! so you cannot have two adjacent string types in your sequence.
|
1172 | //! </div>
|
1173 | //! </td>
|
1174 | //! </tr>
|
1175 | //! <!-- 17 ==================================================================================== -->
|
1176 | //! <tr>
|
1177 | //! <td>
|
1178 | //! A sequence with a non-strict order, probably with a mixed content
|
1179 | //! (text / CDATA and tags) inside of the other element:
|
1180 | //!
|
1181 | //! ```xml
|
1182 | //! <any-tag>
|
1183 | //! <one>...</one>
|
1184 | //! text
|
1185 | //! <![CDATA[cdata]]>
|
1186 | //! <two>...</two>
|
1187 | //! <one>...</one>
|
1188 | //! </any-tag>
|
1189 | //! ```
|
1190 | //! </td>
|
1191 | //! <td>
|
1192 | //!
|
1193 | //! A structure where all child elements mapped to the one field which have
|
1194 | //! a homogeneous sequential type: array-like container. A container type `T`
|
1195 | //! should be able to be deserialized from the nested element content (`...`),
|
1196 | //! except if it is an enum type which would be deserialized from the full
|
1197 | //! element (`<one>...</one>`).
|
1198 | //!
|
1199 | //! You MUST specify `#[serde(rename = "$value")]` on that field:
|
1200 | //!
|
1201 | //! ```
|
1202 | //! # use pretty_assertions::assert_eq;
|
1203 | //! # use serde::Deserialize;
|
1204 | //! # #[derive(Debug, PartialEq)]
|
1205 | //! #[derive(Deserialize)]
|
1206 | //! #[serde(rename_all = "snake_case" )]
|
1207 | //! enum Choice {
|
1208 | //! One,
|
1209 | //! Two,
|
1210 | //! #[serde(rename = "$text" )]
|
1211 | //! Other(String),
|
1212 | //! }
|
1213 | //! # #[derive(Debug, PartialEq)]
|
1214 | //! #[derive(Deserialize)]
|
1215 | //! struct AnyName {
|
1216 | //! #[serde(rename = "@attribute" )]
|
1217 | //! # attribute: (),
|
1218 | //! # /*
|
1219 | //! attribute: ...,
|
1220 | //! # */
|
1221 | //! // Does not (yet?) supported by the serde
|
1222 | //! // https://github.com/serde-rs/serde/issues/1905
|
1223 | //! // #[serde(flatten)]
|
1224 | //! #[serde(rename = "$value" )]
|
1225 | //! any_name: [Choice; 4],
|
1226 | //! }
|
1227 | //! # assert_eq!(
|
1228 | //! # AnyName { attribute: (), any_name: [
|
1229 | //! # Choice::One,
|
1230 | //! # Choice::Other("text cdata" .into()),
|
1231 | //! # Choice::Two,
|
1232 | //! # Choice::One,
|
1233 | //! # ] },
|
1234 | //! # quick_xml::de::from_str(" \
|
1235 | //! # <any-tag attribute='...'> \
|
1236 | //! # <one>...</one> \
|
1237 | //! # text \
|
1238 | //! # <![CDATA[cdata]]> \
|
1239 | //! # <two>...</two> \
|
1240 | //! # <one>...</one> \
|
1241 | //! # </any-tag>"
|
1242 | //! # ).unwrap(),
|
1243 | //! # );
|
1244 | //! ```
|
1245 | //! ```
|
1246 | //! # use pretty_assertions::assert_eq;
|
1247 | //! # use serde::Deserialize;
|
1248 | //! # #[derive(Debug, PartialEq)]
|
1249 | //! #[derive(Deserialize)]
|
1250 | //! #[serde(rename_all = "snake_case" )]
|
1251 | //! enum Choice {
|
1252 | //! One,
|
1253 | //! Two,
|
1254 | //! #[serde(rename = "$text" )]
|
1255 | //! Other(String),
|
1256 | //! }
|
1257 | //! # #[derive(Debug, PartialEq)]
|
1258 | //! #[derive(Deserialize)]
|
1259 | //! struct AnyName {
|
1260 | //! #[serde(rename = "@attribute" )]
|
1261 | //! # attribute: (),
|
1262 | //! # /*
|
1263 | //! attribute: ...,
|
1264 | //! # */
|
1265 | //! // Does not (yet?) supported by the serde
|
1266 | //! // https://github.com/serde-rs/serde/issues/1905
|
1267 | //! // #[serde(flatten)]
|
1268 | //! #[serde(rename = "$value" )]
|
1269 | //! any_name: Vec<Choice>,
|
1270 | //! }
|
1271 | //! # assert_eq!(
|
1272 | //! # AnyName { attribute: (), any_name: vec![
|
1273 | //! # Choice::One,
|
1274 | //! # Choice::Other("text cdata" .into()),
|
1275 | //! # Choice::Two,
|
1276 | //! # Choice::One,
|
1277 | //! # ] },
|
1278 | //! # quick_xml::de::from_str(" \
|
1279 | //! # <any-tag attribute='...'> \
|
1280 | //! # <one>...</one> \
|
1281 | //! # text \
|
1282 | //! # <![CDATA[cdata]]> \
|
1283 | //! # <two>...</two> \
|
1284 | //! # <one>...</one> \
|
1285 | //! # </any-tag>"
|
1286 | //! # ).unwrap(),
|
1287 | //! # );
|
1288 | //! ```
|
1289 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
1290 | //!
|
1291 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
1292 | //! so you cannot have two adjacent string types in your sequence.
|
1293 | //! </div>
|
1294 | //! </td>
|
1295 | //! </tr>
|
1296 | //! </tbody>
|
1297 | //! </table>
|
1298 | //!
|
1299 | //!
|
1300 | //!
|
1301 | //! Composition Rules
|
1302 | //! =================
|
1303 | //!
|
1304 | //! The XML format is very different from other formats supported by `serde`.
|
1305 | //! One such difference it is how data in the serialized form is related to
|
1306 | //! the Rust type. Usually each byte in the data can be associated only with
|
1307 | //! one field in the data structure. However, XML is an exception.
|
1308 | //!
|
1309 | //! For example, took this XML:
|
1310 | //!
|
1311 | //! ```xml
|
1312 | //! <any>
|
1313 | //! <key attr="value"/>
|
1314 | //! </any>
|
1315 | //! ```
|
1316 | //!
|
1317 | //! and try to deserialize it to the struct `AnyName`:
|
1318 | //!
|
1319 | //! ```no_run
|
1320 | //! # use serde::Deserialize;
|
1321 | //! #[derive(Deserialize)]
|
1322 | //! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
|
1323 | //! // Used data: ^^^^^^^^^^^^^^^^^^^
|
1324 | //! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
|
1325 | //! // Used data: ^^^^^^^^^^^^
|
1326 | //! }
|
1327 | //! #[derive(Deserialize)]
|
1328 | //! struct Inner {
|
1329 | //! #[serde(rename = "@attr" )]
|
1330 | //! attr: String, // String calls `deserialize_string` on `value`
|
1331 | //! // Used data: ^^^^^
|
1332 | //! }
|
1333 | //! ```
|
1334 | //!
|
1335 | //! Comments shows what methods of a [`Deserializer`] called by each struct
|
1336 | //! `deserialize` method and which input their seen. **Used data** shows, what
|
1337 | //! content is actually used for deserializing. As you see, name of the inner
|
1338 | //! `<key>` tag used both as a map key / outer struct field name and as part
|
1339 | //! of the inner struct (although _value_ of the tag, i.e. `key` is not used
|
1340 | //! by it).
|
1341 | //!
|
1342 | //!
|
1343 | //!
|
1344 | //! Difference between `$text` and `$value` special names
|
1345 | //! =====================================================
|
1346 | //!
|
1347 | //! quick-xml supports two special names for fields -- `$text` and `$value`.
|
1348 | //! Although they may seem the same, there is a distinction. Two different
|
1349 | //! names is required mostly for serialization, because quick-xml should know
|
1350 | //! how you want to serialize certain constructs, which could be represented
|
1351 | //! through XML in multiple different ways.
|
1352 | //!
|
1353 | //! The only difference is in how complex types and sequences are serialized.
|
1354 | //! If you doubt which one you should select, begin with [`$value`](#value).
|
1355 | //!
|
1356 | //! ## `$text`
|
1357 | //! `$text` is used when you want to write your XML as a text or a CDATA content.
|
1358 | //! More formally, field with that name represents simple type definition with
|
1359 | //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
|
1360 | //! as described in the [specification].
|
1361 | //!
|
1362 | //! As a result, not all types of such fields can be serialized. Only serialization
|
1363 | //! of following types are supported:
|
1364 | //! - all primitive types (strings, numbers, booleans)
|
1365 | //! - unit variants of enumerations (serializes to a name of a variant)
|
1366 | //! - newtypes (delegates serialization to inner type)
|
1367 | //! - [`Option`] of above (`None` serializes to nothing)
|
1368 | //! - sequences (including tuples and tuple variants of enumerations) of above,
|
1369 | //! excluding `None` and empty string elements (because it will not be possible
|
1370 | //! to deserialize them back). The elements are separated by space(s)
|
1371 | //! - unit type `()` and unit structs (serializes to nothing)
|
1372 | //!
|
1373 | //! Complex types, such as structs and maps, are not supported in this field.
|
1374 | //! If you want them, you should use `$value`.
|
1375 | //!
|
1376 | //! Sequences serialized to a space-delimited string, that is why only certain
|
1377 | //! types are allowed in this mode:
|
1378 | //!
|
1379 | //! ```
|
1380 | //! # use serde::{Deserialize, Serialize};
|
1381 | //! # use quick_xml::de::from_str;
|
1382 | //! # use quick_xml::se::to_string;
|
1383 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1384 | //! struct AnyName {
|
1385 | //! #[serde(rename = "$text" )]
|
1386 | //! field: Vec<usize>,
|
1387 | //! }
|
1388 | //!
|
1389 | //! let obj = AnyName { field: vec![1, 2, 3] };
|
1390 | //! let xml = to_string(&obj).unwrap();
|
1391 | //! assert_eq!(xml, "<AnyName>1 2 3</AnyName>" );
|
1392 | //!
|
1393 | //! let object: AnyName = from_str(&xml).unwrap();
|
1394 | //! assert_eq!(object, obj);
|
1395 | //! ```
|
1396 | //!
|
1397 | //! ## `$value`
|
1398 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
1399 | //!
|
1400 | //! NOTE: a name `#content` would better explain the purpose of that field,
|
1401 | //! but `$value` is used for compatibility with other XML serde crates, which
|
1402 | //! uses that name. This will allow you to switch XML crates more smoothly if required.
|
1403 | //! </div>
|
1404 | //!
|
1405 | //! Representation of primitive types in `$value` does not differ from their
|
1406 | //! representation in `$text` field. The difference is how sequences are serialized.
|
1407 | //! `$value` serializes each sequence item as a separate XML element. The name
|
1408 | //! of that element is taken from serialized type, and because only `enum`s provide
|
1409 | //! such name (their variant name), only they should be used for such fields.
|
1410 | //!
|
1411 | //! `$value` fields does not support `struct` types with fields, the serialization
|
1412 | //! of such types would end with an `Err(Unsupported)`. Unit structs and unit
|
1413 | //! type `()` serializing to nothing and can be deserialized from any content.
|
1414 | //!
|
1415 | //! Serialization and deserialization of `$value` field performed as usual, except
|
1416 | //! that name for an XML element will be given by the serialized type, instead of
|
1417 | //! field. The latter allow to serialize enumerated types, where variant is encoded
|
1418 | //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
|
1419 | //!
|
1420 | //! In the example below, field will be serialized as `<field/>`, because elements
|
1421 | //! get their names from the field name. It cannot be deserialized, because `Enum`
|
1422 | //! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
|
1423 | //!
|
1424 | //! ```no_run
|
1425 | //! # use serde::{Deserialize, Serialize};
|
1426 | //! #[derive(Deserialize, Serialize)]
|
1427 | //! enum Enum { A, B, C }
|
1428 | //!
|
1429 | //! #[derive(Deserialize, Serialize)]
|
1430 | //! struct AnyName {
|
1431 | //! // <field/>
|
1432 | //! field: Enum,
|
1433 | //! }
|
1434 | //! ```
|
1435 | //!
|
1436 | //! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
|
1437 | //! `<B/>` or `<C/>`, depending on the its content. It is also possible to
|
1438 | //! deserialize it from the same elements:
|
1439 | //!
|
1440 | //! ```no_run
|
1441 | //! # use serde::{Deserialize, Serialize};
|
1442 | //! # #[derive(Deserialize, Serialize)]
|
1443 | //! # enum Enum { A, B, C }
|
1444 | //! #
|
1445 | //! #[derive(Deserialize, Serialize)]
|
1446 | //! struct AnyName {
|
1447 | //! // <A/>, <B/> or <C/>
|
1448 | //! #[serde(rename = "$value" )]
|
1449 | //! field: Enum,
|
1450 | //! }
|
1451 | //! ```
|
1452 | //!
|
1453 | //! ### Primitives and sequences of primitives
|
1454 | //!
|
1455 | //! Sequences serialized to a list of elements. Note, that types that does not
|
1456 | //! produce their own tag (i. e. primitives) are written as is, without delimiters:
|
1457 | //!
|
1458 | //! ```
|
1459 | //! # use serde::{Deserialize, Serialize};
|
1460 | //! # use quick_xml::de::from_str;
|
1461 | //! # use quick_xml::se::to_string;
|
1462 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1463 | //! struct AnyName {
|
1464 | //! #[serde(rename = "$value" )]
|
1465 | //! field: Vec<usize>,
|
1466 | //! }
|
1467 | //!
|
1468 | //! let obj = AnyName { field: vec![1, 2, 3] };
|
1469 | //! let xml = to_string(&obj).unwrap();
|
1470 | //! // Note, that types that does not produce their own tag are written as is!
|
1471 | //! assert_eq!(xml, "<AnyName>123</AnyName>" );
|
1472 | //!
|
1473 | //! let object: AnyName = from_str("<AnyName>123</AnyName>" ).unwrap();
|
1474 | //! assert_eq!(object, AnyName { field: vec![123] });
|
1475 | //!
|
1476 | //! // `1 2 3` is mapped to a single `usize` element
|
1477 | //! // It is impossible to deserialize list of primitives to such field
|
1478 | //! from_str::<AnyName>("<AnyName>1 2 3</AnyName>" ).unwrap_err();
|
1479 | //! ```
|
1480 | //!
|
1481 | //! A particular case of that example is a string `$value` field, which probably
|
1482 | //! would be a most used example of that attribute:
|
1483 | //!
|
1484 | //! ```
|
1485 | //! # use serde::{Deserialize, Serialize};
|
1486 | //! # use quick_xml::de::from_str;
|
1487 | //! # use quick_xml::se::to_string;
|
1488 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1489 | //! struct AnyName {
|
1490 | //! #[serde(rename = "$value" )]
|
1491 | //! field: String,
|
1492 | //! }
|
1493 | //!
|
1494 | //! let obj = AnyName { field: "content" .to_string() };
|
1495 | //! let xml = to_string(&obj).unwrap();
|
1496 | //! assert_eq!(xml, "<AnyName>content</AnyName>" );
|
1497 | //! ```
|
1498 | //!
|
1499 | //! ### Structs and sequences of structs
|
1500 | //!
|
1501 | //! Note, that structures do not have a serializable name as well (name of the
|
1502 | //! type is never used), so it is impossible to serialize non-unit struct or
|
1503 | //! sequence of non-unit structs in `$value` field. (sequences of) unit structs
|
1504 | //! are serialized as empty string, because units itself serializing
|
1505 | //! to nothing:
|
1506 | //!
|
1507 | //! ```
|
1508 | //! # use serde::{Deserialize, Serialize};
|
1509 | //! # use quick_xml::de::from_str;
|
1510 | //! # use quick_xml::se::to_string;
|
1511 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1512 | //! struct Unit;
|
1513 | //!
|
1514 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1515 | //! struct AnyName {
|
1516 | //! // #[serde(default)] is required to deserialization of empty lists
|
1517 | //! // This is a general note, not related to $value
|
1518 | //! #[serde(rename = "$value" , default)]
|
1519 | //! field: Vec<Unit>,
|
1520 | //! }
|
1521 | //!
|
1522 | //! let obj = AnyName { field: vec![Unit, Unit, Unit] };
|
1523 | //! let xml = to_string(&obj).unwrap();
|
1524 | //! assert_eq!(xml, "<AnyName/>" );
|
1525 | //!
|
1526 | //! let object: AnyName = from_str("<AnyName/>" ).unwrap();
|
1527 | //! assert_eq!(object, AnyName { field: vec![] });
|
1528 | //!
|
1529 | //! let object: AnyName = from_str("<AnyName></AnyName>" ).unwrap();
|
1530 | //! assert_eq!(object, AnyName { field: vec![] });
|
1531 | //!
|
1532 | //! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>" ).unwrap();
|
1533 | //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
|
1534 | //! ```
|
1535 | //!
|
1536 | //! ### Enums and sequences of enums
|
1537 | //!
|
1538 | //! Enumerations uses the variant name as an element name:
|
1539 | //!
|
1540 | //! ```
|
1541 | //! # use serde::{Deserialize, Serialize};
|
1542 | //! # use quick_xml::de::from_str;
|
1543 | //! # use quick_xml::se::to_string;
|
1544 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1545 | //! struct AnyName {
|
1546 | //! #[serde(rename = "$value" )]
|
1547 | //! field: Vec<Enum>,
|
1548 | //! }
|
1549 | //!
|
1550 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
1551 | //! enum Enum { A, B, C }
|
1552 | //!
|
1553 | //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
|
1554 | //! let xml = to_string(&obj).unwrap();
|
1555 | //! assert_eq!(
|
1556 | //! xml,
|
1557 | //! "<AnyName> \
|
1558 | //! <A/> \
|
1559 | //! <B/> \
|
1560 | //! <C/> \
|
1561 | //! </AnyName>"
|
1562 | //! );
|
1563 | //!
|
1564 | //! let object: AnyName = from_str(&xml).unwrap();
|
1565 | //! assert_eq!(object, obj);
|
1566 | //! ```
|
1567 | //!
|
1568 | //! ----------------------------------------------------------------------------
|
1569 | //!
|
1570 | //! You can have either `$text` or `$value` field in your structs. Unfortunately,
|
1571 | //! that is not enforced, so you can theoretically have both, but you should
|
1572 | //! avoid that.
|
1573 | //!
|
1574 | //!
|
1575 | //!
|
1576 | //! Frequently Used Patterns
|
1577 | //! ========================
|
1578 | //!
|
1579 | //! Some XML constructs used so frequent, that it is worth to document the recommended
|
1580 | //! way to represent them in the Rust. The sections below describes them.
|
1581 | //!
|
1582 | //! `<element>` lists
|
1583 | //! -----------------
|
1584 | //! Many XML formats wrap lists of elements in the additional container,
|
1585 | //! although this is not required by the XML rules:
|
1586 | //!
|
1587 | //! ```xml
|
1588 | //! <root>
|
1589 | //! <field1/>
|
1590 | //! <field2/>
|
1591 | //! <list><!-- Container -->
|
1592 | //! <element/>
|
1593 | //! <element/>
|
1594 | //! <element/>
|
1595 | //! </list>
|
1596 | //! <field3/>
|
1597 | //! </root>
|
1598 | //! ```
|
1599 | //! In this case, there is a great desire to describe this XML in this way:
|
1600 | //! ```
|
1601 | //! /// Represents <element/>
|
1602 | //! type Element = ();
|
1603 | //!
|
1604 | //! /// Represents <root>...</root>
|
1605 | //! struct AnyName {
|
1606 | //! // Incorrect
|
1607 | //! list: Vec<Element>,
|
1608 | //! }
|
1609 | //! ```
|
1610 | //! This will not work, because potentially `<list>` element can have attributes
|
1611 | //! and other elements inside. You should define the struct for the `<list>`
|
1612 | //! explicitly, as you do that in the XSD for that XML:
|
1613 | //! ```
|
1614 | //! /// Represents <element/>
|
1615 | //! type Element = ();
|
1616 | //!
|
1617 | //! /// Represents <root>...</root>
|
1618 | //! struct AnyName {
|
1619 | //! // Correct
|
1620 | //! list: List,
|
1621 | //! }
|
1622 | //! /// Represents <list>...</list>
|
1623 | //! struct List {
|
1624 | //! element: Vec<Element>,
|
1625 | //! }
|
1626 | //! ```
|
1627 | //!
|
1628 | //! If you want to simplify your API, you could write a simple function for unwrapping
|
1629 | //! inner list and apply it via [`deserialize_with`]:
|
1630 | //!
|
1631 | //! ```
|
1632 | //! # use pretty_assertions::assert_eq;
|
1633 | //! use quick_xml::de::from_str;
|
1634 | //! use serde::{Deserialize, Deserializer};
|
1635 | //!
|
1636 | //! /// Represents <element/>
|
1637 | //! type Element = ();
|
1638 | //!
|
1639 | //! /// Represents <root>...</root>
|
1640 | //! #[derive(Deserialize, Debug, PartialEq)]
|
1641 | //! struct AnyName {
|
1642 | //! #[serde(deserialize_with = "unwrap_list" )]
|
1643 | //! list: Vec<Element>,
|
1644 | //! }
|
1645 | //!
|
1646 | //! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
|
1647 | //! where
|
1648 | //! D: Deserializer<'de>,
|
1649 | //! {
|
1650 | //! /// Represents <list>...</list>
|
1651 | //! #[derive(Deserialize)]
|
1652 | //! struct List {
|
1653 | //! // default allows empty list
|
1654 | //! #[serde(default)]
|
1655 | //! element: Vec<Element>,
|
1656 | //! }
|
1657 | //! Ok(List::deserialize(deserializer)?.element)
|
1658 | //! }
|
1659 | //!
|
1660 | //! assert_eq!(
|
1661 | //! AnyName { list: vec![(), (), ()] },
|
1662 | //! from_str("
|
1663 | //! <root>
|
1664 | //! <list>
|
1665 | //! <element/>
|
1666 | //! <element/>
|
1667 | //! <element/>
|
1668 | //! </list>
|
1669 | //! </root>
|
1670 | //! " ).unwrap(),
|
1671 | //! );
|
1672 | //! ```
|
1673 | //!
|
1674 | //! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
|
1675 | //!
|
1676 | //! Enum::Unit Variants As a Text
|
1677 | //! -----------------------------
|
1678 | //! One frequent task and a typical mistake is to creation of mapping a text
|
1679 | //! content of some tag to a Rust `enum`. For example, for the XML:
|
1680 | //!
|
1681 | //! ```xml
|
1682 | //! <some-container>
|
1683 | //! <field>EnumValue</field>
|
1684 | //! </some-container>
|
1685 | //! ```
|
1686 | //! one could create an _incorrect_ mapping
|
1687 | //!
|
1688 | //! ```
|
1689 | //! # use serde::{Deserialize, Serialize};
|
1690 | //! #
|
1691 | //! #[derive(Serialize, Deserialize)]
|
1692 | //! enum SomeEnum {
|
1693 | //! EnumValue,
|
1694 | //! # /*
|
1695 | //! ...
|
1696 | //! # */
|
1697 | //! }
|
1698 | //!
|
1699 | //! #[derive(Serialize, Deserialize)]
|
1700 | //! #[serde(rename = "some-container" )]
|
1701 | //! struct SomeContainer {
|
1702 | //! field: SomeEnum,
|
1703 | //! }
|
1704 | //! ```
|
1705 | //!
|
1706 | //! Actually, those types will be serialized into:
|
1707 | //! ```xml
|
1708 | //! <some-container>
|
1709 | //! <EnumValue/>
|
1710 | //! </some-container>
|
1711 | //! ```
|
1712 | //! and will not be able to be deserialized.
|
1713 | //!
|
1714 | //! You can easily see what's wrong if you think about attributes, which could
|
1715 | //! be defined in the `<field>` tag:
|
1716 | //! ```xml
|
1717 | //! <some-container>
|
1718 | //! <field some="attribute">EnumValue</field>
|
1719 | //! </some-container>
|
1720 | //! ```
|
1721 | //!
|
1722 | //! After that you can find the correct solution, using the principles explained
|
1723 | //! above. You should wrap `SomeEnum` into wrapper struct under the [`$text`](#text)
|
1724 | //! name:
|
1725 | //! ```
|
1726 | //! # use serde::{Serialize, Deserialize};
|
1727 | //! # type SomeEnum = ();
|
1728 | //! #[derive(Serialize, Deserialize)]
|
1729 | //! struct Field {
|
1730 | //! // Use a special name `$text` to map field to the text content
|
1731 | //! #[serde(rename = "$text" )]
|
1732 | //! content: SomeEnum,
|
1733 | //! }
|
1734 | //!
|
1735 | //! #[derive(Serialize, Deserialize)]
|
1736 | //! #[serde(rename = "some-container" )]
|
1737 | //! struct SomeContainer {
|
1738 | //! field: Field,
|
1739 | //! }
|
1740 | //! ```
|
1741 | //!
|
1742 | //! If you still want to keep your struct untouched, you can instead use the
|
1743 | //! helper module [`text_content`].
|
1744 | //!
|
1745 | //!
|
1746 | //! Internally Tagged Enums
|
1747 | //! -----------------------
|
1748 | //! [Tagged enums] are currently not supported because of an issue in the Serde
|
1749 | //! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
|
1750 | //! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
|
1751 | //! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
|
1752 | //! or implementing [`Deserialize`], but this can get very tedious very fast for
|
1753 | //! files with large amounts of tagged enums. To help with this issue quick-xml
|
1754 | //! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
|
1755 | //! macro documentation for details.
|
1756 | //!
|
1757 | //!
|
1758 | //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
|
1759 | //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
|
1760 | //! [#497]: https://github.com/tafia/quick-xml/issues/497
|
1761 | //! [`text_content`]: crate::serde_helpers::text_content
|
1762 | //! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
|
1763 | //! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
|
1764 | //! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
|
1765 | //! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
|
1766 | //! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
|
1767 |
|
1768 | // Macros should be defined before the modules that using them
|
1769 | // Also, macros should be imported before using them
|
1770 | use serde::serde_if_integer128;
|
1771 |
|
1772 | macro_rules! deserialize_type {
|
1773 | ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
|
1774 | fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
1775 | where
|
1776 | V: Visitor<'de>,
|
1777 | {
|
1778 | // No need to unescape because valid integer representations cannot be escaped
|
1779 | let text = self.read_string()?;
|
1780 | visitor.$visit(text.parse()?)
|
1781 | }
|
1782 | };
|
1783 | }
|
1784 |
|
1785 | /// Implement deserialization methods for scalar types, such as numbers, strings,
|
1786 | /// byte arrays, booleans and identifiers.
|
1787 | macro_rules! deserialize_primitives {
|
1788 | ($($mut:tt)?) => {
|
1789 | deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
|
1790 | deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
|
1791 | deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
|
1792 | deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
|
1793 |
|
1794 | deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
|
1795 | deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
|
1796 | deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
|
1797 | deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
|
1798 |
|
1799 | serde_if_integer128! {
|
1800 | deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
|
1801 | deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
|
1802 | }
|
1803 |
|
1804 | deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
|
1805 | deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
|
1806 |
|
1807 | fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
1808 | where
|
1809 | V: Visitor<'de>,
|
1810 | {
|
1811 | let text = self.read_string()?;
|
1812 |
|
1813 | str2bool(&text, visitor)
|
1814 | }
|
1815 |
|
1816 | /// Character represented as [strings](#method.deserialize_str).
|
1817 | fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
|
1818 | where
|
1819 | V: Visitor<'de>,
|
1820 | {
|
1821 | self.deserialize_str(visitor)
|
1822 | }
|
1823 |
|
1824 | fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
1825 | where
|
1826 | V: Visitor<'de>,
|
1827 | {
|
1828 | let text = self.read_string()?;
|
1829 | match text {
|
1830 | Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
|
1831 | Cow::Owned(string) => visitor.visit_string(string),
|
1832 | }
|
1833 | }
|
1834 |
|
1835 | /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
|
1836 | fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
|
1837 | where
|
1838 | V: Visitor<'de>,
|
1839 | {
|
1840 | self.deserialize_str(visitor)
|
1841 | }
|
1842 |
|
1843 | /// Returns [`DeError::Unsupported`]
|
1844 | fn deserialize_bytes<V>(self, _visitor: V) -> Result<V::Value, DeError>
|
1845 | where
|
1846 | V: Visitor<'de>,
|
1847 | {
|
1848 | Err(DeError::Unsupported("binary data content is not supported by XML format" .into()))
|
1849 | }
|
1850 |
|
1851 | /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
|
1852 | fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
|
1853 | where
|
1854 | V: Visitor<'de>,
|
1855 | {
|
1856 | self.deserialize_bytes(visitor)
|
1857 | }
|
1858 |
|
1859 | /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
|
1860 | fn deserialize_unit_struct<V>(
|
1861 | self,
|
1862 | _name: &'static str,
|
1863 | visitor: V,
|
1864 | ) -> Result<V::Value, DeError>
|
1865 | where
|
1866 | V: Visitor<'de>,
|
1867 | {
|
1868 | self.deserialize_unit(visitor)
|
1869 | }
|
1870 |
|
1871 | /// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
|
1872 | fn deserialize_newtype_struct<V>(
|
1873 | self,
|
1874 | _name: &'static str,
|
1875 | visitor: V,
|
1876 | ) -> Result<V::Value, DeError>
|
1877 | where
|
1878 | V: Visitor<'de>,
|
1879 | {
|
1880 | self.deserialize_tuple(1, visitor)
|
1881 | }
|
1882 |
|
1883 | /// Representation of tuples the same as [sequences](#method.deserialize_seq).
|
1884 | fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
|
1885 | where
|
1886 | V: Visitor<'de>,
|
1887 | {
|
1888 | self.deserialize_seq(visitor)
|
1889 | }
|
1890 |
|
1891 | /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
|
1892 | fn deserialize_tuple_struct<V>(
|
1893 | self,
|
1894 | _name: &'static str,
|
1895 | len: usize,
|
1896 | visitor: V,
|
1897 | ) -> Result<V::Value, DeError>
|
1898 | where
|
1899 | V: Visitor<'de>,
|
1900 | {
|
1901 | self.deserialize_tuple(len, visitor)
|
1902 | }
|
1903 |
|
1904 | /// Identifiers represented as [strings](#method.deserialize_str).
|
1905 | fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
|
1906 | where
|
1907 | V: Visitor<'de>,
|
1908 | {
|
1909 | self.deserialize_str(visitor)
|
1910 | }
|
1911 | };
|
1912 | }
|
1913 |
|
1914 | macro_rules! deserialize_option {
|
1915 | ($de:expr, $deserializer:ident, $visitor:ident) => {
|
1916 | match $de.peek()? {
|
1917 | DeEvent::Text(t) if t.is_empty() => $visitor.visit_none(),
|
1918 | DeEvent::Eof => $visitor.visit_none(),
|
1919 | _ => $visitor.visit_some($deserializer),
|
1920 | }
|
1921 | };
|
1922 | }
|
1923 |
|
1924 | mod key;
|
1925 | mod map;
|
1926 | mod resolver;
|
1927 | mod simple_type;
|
1928 | mod var;
|
1929 |
|
1930 | pub use crate::errors::serialize::DeError;
|
1931 | pub use resolver::{EntityResolver, NoEntityResolver};
|
1932 |
|
1933 | use crate::{
|
1934 | encoding::Decoder,
|
1935 | errors::Error,
|
1936 | events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
|
1937 | name::QName,
|
1938 | reader::Reader,
|
1939 | };
|
1940 | use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
|
1941 | use std::borrow::Cow;
|
1942 | #[cfg (feature = "overlapped-lists" )]
|
1943 | use std::collections::VecDeque;
|
1944 | use std::io::BufRead;
|
1945 | use std::mem::replace;
|
1946 | #[cfg (feature = "overlapped-lists" )]
|
1947 | use std::num::NonZeroUsize;
|
1948 | use std::ops::Deref;
|
1949 |
|
1950 | /// Data represented by a text node or a CDATA node. XML markup is not expected
|
1951 | pub(crate) const TEXT_KEY: &str = "$text" ;
|
1952 | /// Data represented by any XML markup inside
|
1953 | pub(crate) const VALUE_KEY: &str = "$value" ;
|
1954 |
|
1955 | /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
|
1956 | /// events. _Consequent_ means that events should follow each other or be
|
1957 | /// delimited only by (any count of) [`Comment`] or [`PI`] events.
|
1958 | ///
|
1959 | /// [`Text`]: Event::Text
|
1960 | /// [`CData`]: Event::CData
|
1961 | /// [`Comment`]: Event::Comment
|
1962 | /// [`PI`]: Event::PI
|
1963 | #[derive (Debug, PartialEq, Eq)]
|
1964 | pub struct Text<'a> {
|
1965 | text: Cow<'a, str>,
|
1966 | }
|
1967 |
|
1968 | impl<'a> Deref for Text<'a> {
|
1969 | type Target = str;
|
1970 |
|
1971 | #[inline ]
|
1972 | fn deref(&self) -> &Self::Target {
|
1973 | self.text.deref()
|
1974 | }
|
1975 | }
|
1976 |
|
1977 | impl<'a> From<&'a str> for Text<'a> {
|
1978 | #[inline ]
|
1979 | fn from(text: &'a str) -> Self {
|
1980 | Self {
|
1981 | text: Cow::Borrowed(text),
|
1982 | }
|
1983 | }
|
1984 | }
|
1985 |
|
1986 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
1987 |
|
1988 | /// Simplified event which contains only these variants that used by deserializer
|
1989 | #[derive (Debug, PartialEq, Eq)]
|
1990 | pub enum DeEvent<'a> {
|
1991 | /// Start tag (with attributes) `<tag attr="value">`.
|
1992 | Start(BytesStart<'a>),
|
1993 | /// End tag `</tag>`.
|
1994 | End(BytesEnd<'a>),
|
1995 | /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
|
1996 | /// events. _Consequent_ means that events should follow each other or be
|
1997 | /// delimited only by (any count of) [`Comment`] or [`PI`] events.
|
1998 | ///
|
1999 | /// [`Text`]: Event::Text
|
2000 | /// [`CData`]: Event::CData
|
2001 | /// [`Comment`]: Event::Comment
|
2002 | /// [`PI`]: Event::PI
|
2003 | Text(Text<'a>),
|
2004 | /// End of XML document.
|
2005 | Eof,
|
2006 | }
|
2007 |
|
2008 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
2009 |
|
2010 | /// Simplified event which contains only these variants that used by deserializer,
|
2011 | /// but [`Text`] events not yet fully processed.
|
2012 | ///
|
2013 | /// [`Text`] events should be trimmed if they does not surrounded by the other
|
2014 | /// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
|
2015 | /// event, where they are trimmed from the start, but not from the end. To trim
|
2016 | /// end spaces we should lookahead by one deserializer event (i. e. skip all
|
2017 | /// comments and processing instructions).
|
2018 | ///
|
2019 | /// [`Text`]: Event::Text
|
2020 | /// [`CData`]: Event::CData
|
2021 | #[derive (Debug, PartialEq, Eq)]
|
2022 | pub enum PayloadEvent<'a> {
|
2023 | /// Start tag (with attributes) `<tag attr="value">`.
|
2024 | Start(BytesStart<'a>),
|
2025 | /// End tag `</tag>`.
|
2026 | End(BytesEnd<'a>),
|
2027 | /// Escaped character data between tags.
|
2028 | Text(BytesText<'a>),
|
2029 | /// Unescaped character data stored in `<![CDATA[...]]>`.
|
2030 | CData(BytesCData<'a>),
|
2031 | /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
|
2032 | DocType(BytesText<'a>),
|
2033 | /// End of XML document.
|
2034 | Eof,
|
2035 | }
|
2036 |
|
2037 | impl<'a> PayloadEvent<'a> {
|
2038 | /// Ensures that all data is owned to extend the object's lifetime if necessary.
|
2039 | #[inline ]
|
2040 | fn into_owned(self) -> PayloadEvent<'static> {
|
2041 | match self {
|
2042 | PayloadEvent::Start(e: BytesStart<'_>) => PayloadEvent::Start(e.into_owned()),
|
2043 | PayloadEvent::End(e: BytesEnd<'_>) => PayloadEvent::End(e.into_owned()),
|
2044 | PayloadEvent::Text(e: BytesText<'_>) => PayloadEvent::Text(e.into_owned()),
|
2045 | PayloadEvent::CData(e: BytesCData<'_>) => PayloadEvent::CData(e.into_owned()),
|
2046 | PayloadEvent::DocType(e: BytesText<'_>) => PayloadEvent::DocType(e.into_owned()),
|
2047 | PayloadEvent::Eof => PayloadEvent::Eof,
|
2048 | }
|
2049 | }
|
2050 | }
|
2051 |
|
2052 | /// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
|
2053 | /// [`PayloadEvent::Text`] events, that followed by any event except
|
2054 | /// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
|
2055 | struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = NoEntityResolver> {
|
2056 | /// A source of low-level XML events
|
2057 | reader: R,
|
2058 | /// Intermediate event, that could be returned by the next call to `next()`.
|
2059 | /// If that is the `Text` event then leading spaces already trimmed, but
|
2060 | /// trailing spaces is not. Before the event will be returned, trimming of
|
2061 | /// the spaces could be necessary
|
2062 | lookahead: Result<PayloadEvent<'i>, DeError>,
|
2063 |
|
2064 | /// Used to resolve unknown entities that would otherwise cause the parser
|
2065 | /// to return an [`EscapeError::UnrecognizedSymbol`] error.
|
2066 | ///
|
2067 | /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
|
2068 | entity_resolver: E,
|
2069 | }
|
2070 |
|
2071 | impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
|
2072 | fn new(mut reader: R, entity_resolver: E) -> Self {
|
2073 | // Lookahead by one event immediately, so we do not need to check in the
|
2074 | // loop if we need lookahead or not
|
2075 | let lookahead = reader.next();
|
2076 |
|
2077 | Self {
|
2078 | reader,
|
2079 | lookahead,
|
2080 | entity_resolver,
|
2081 | }
|
2082 | }
|
2083 |
|
2084 | /// Read next event and put it in lookahead, return the current lookahead
|
2085 | #[inline (always)]
|
2086 | fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
|
2087 | replace(&mut self.lookahead, self.reader.next())
|
2088 | }
|
2089 |
|
2090 | #[inline (always)]
|
2091 | fn need_trim_end(&self) -> bool {
|
2092 | // If next event is a text or CDATA, we should not trim trailing spaces
|
2093 | !matches!(
|
2094 | self.lookahead,
|
2095 | Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
|
2096 | )
|
2097 | }
|
2098 |
|
2099 | /// Read all consequent [`Text`] and [`CData`] events until non-text event
|
2100 | /// occurs. Content of all events would be appended to `result` and returned
|
2101 | /// as [`DeEvent::Text`].
|
2102 | ///
|
2103 | /// [`Text`]: PayloadEvent::Text
|
2104 | /// [`CData`]: PayloadEvent::CData
|
2105 | fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
|
2106 | loop {
|
2107 | match self.lookahead {
|
2108 | Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
|
2109 | let text = self.next_text()?;
|
2110 |
|
2111 | let mut s = result.into_owned();
|
2112 | s += &text;
|
2113 | result = Cow::Owned(s);
|
2114 | }
|
2115 | _ => break,
|
2116 | }
|
2117 | }
|
2118 | Ok(DeEvent::Text(Text { text: result }))
|
2119 | }
|
2120 |
|
2121 | /// Read one text event, panics if current event is not a text event
|
2122 | ///
|
2123 | /// |Event |XML |Handling
|
2124 | /// |-----------------------|---------------------------|----------------------------------------
|
2125 | /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
|
2126 | /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
|
2127 | /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
|
2128 | /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
|
2129 | /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
|
2130 | #[inline (always)]
|
2131 | fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
|
2132 | match self.next_impl()? {
|
2133 | PayloadEvent::Text(mut e) => {
|
2134 | if self.need_trim_end() {
|
2135 | e.inplace_trim_end();
|
2136 | }
|
2137 | Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
|
2138 | }
|
2139 | PayloadEvent::CData(e) => Ok(e.decode()?),
|
2140 |
|
2141 | // SAFETY: this method is called only when we peeked Text or CData
|
2142 | _ => unreachable!("Only `Text` and `CData` events can come here" ),
|
2143 | }
|
2144 | }
|
2145 |
|
2146 | /// Return an input-borrowing event.
|
2147 | fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
|
2148 | loop {
|
2149 | return match self.next_impl()? {
|
2150 | PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
|
2151 | PayloadEvent::End(e) => Ok(DeEvent::End(e)),
|
2152 | PayloadEvent::Text(mut e) => {
|
2153 | if self.need_trim_end() && e.inplace_trim_end() {
|
2154 | continue;
|
2155 | }
|
2156 | self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
|
2157 | }
|
2158 | PayloadEvent::CData(e) => self.drain_text(e.decode()?),
|
2159 | PayloadEvent::DocType(e) => {
|
2160 | self.entity_resolver
|
2161 | .capture(e)
|
2162 | .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}" , err)))?;
|
2163 | continue;
|
2164 | }
|
2165 | PayloadEvent::Eof => Ok(DeEvent::Eof),
|
2166 | };
|
2167 | }
|
2168 | }
|
2169 |
|
2170 | #[inline ]
|
2171 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
2172 | match self.lookahead {
|
2173 | // We pre-read event with the same name that is required to be skipped.
|
2174 | // First call of `read_to_end` will end out pre-read event, the second
|
2175 | // will consume other events
|
2176 | Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
|
2177 | let result1 = self.reader.read_to_end(name);
|
2178 | let result2 = self.reader.read_to_end(name);
|
2179 |
|
2180 | // In case of error `next` returns `Eof`
|
2181 | self.lookahead = self.reader.next();
|
2182 | result1?;
|
2183 | result2?;
|
2184 | }
|
2185 | // We pre-read event with the same name that is required to be skipped.
|
2186 | // Because this is end event, we already consume the whole tree, so
|
2187 | // nothing to do, just update lookahead
|
2188 | Ok(PayloadEvent::End(ref e)) if e.name() == name => {
|
2189 | self.lookahead = self.reader.next();
|
2190 | }
|
2191 | Ok(_) => {
|
2192 | let result = self.reader.read_to_end(name);
|
2193 |
|
2194 | // In case of error `next` returns `Eof`
|
2195 | self.lookahead = self.reader.next();
|
2196 | result?;
|
2197 | }
|
2198 | // Read next lookahead event, unpack error from the current lookahead
|
2199 | Err(_) => {
|
2200 | self.next_impl()?;
|
2201 | }
|
2202 | }
|
2203 | Ok(())
|
2204 | }
|
2205 |
|
2206 | #[inline ]
|
2207 | fn decoder(&self) -> Decoder {
|
2208 | self.reader.decoder()
|
2209 | }
|
2210 | }
|
2211 |
|
2212 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
2213 |
|
2214 | /// Deserialize an instance of type `T` from a string of XML text.
|
2215 | pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
|
2216 | where
|
2217 | T: Deserialize<'de>,
|
2218 | {
|
2219 | let mut de: Deserializer<'_, SliceReader<'_>> = Deserializer::from_str(source:s);
|
2220 | T::deserialize(&mut de)
|
2221 | }
|
2222 |
|
2223 | /// Deserialize from a reader. This method will do internal copies of data
|
2224 | /// readed from `reader`. If you want have a `&str` input and want to borrow
|
2225 | /// as much as possible, use [`from_str`].
|
2226 | pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
|
2227 | where
|
2228 | R: BufRead,
|
2229 | T: DeserializeOwned,
|
2230 | {
|
2231 | let mut de: Deserializer<'_, IoReader<…>> = Deserializer::from_reader(reader);
|
2232 | T::deserialize(&mut de)
|
2233 | }
|
2234 |
|
2235 | // TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
|
2236 | // valid boolean representations are only "true", "false", "1", and "0"
|
2237 | fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
|
2238 | where
|
2239 | V: de::Visitor<'de>,
|
2240 | {
|
2241 | match value {
|
2242 | "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
|
2243 | visitor.visit_bool(true)
|
2244 | }
|
2245 | "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
|
2246 | visitor.visit_bool(false)
|
2247 | }
|
2248 | _ => Err(DeError::InvalidBoolean(value.into())),
|
2249 | }
|
2250 | }
|
2251 |
|
2252 | fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
|
2253 | where
|
2254 | V: Visitor<'de>,
|
2255 | {
|
2256 | #[cfg (feature = "encoding" )]
|
2257 | {
|
2258 | let value = decoder.decode(value)?;
|
2259 | // No need to unescape because valid boolean representations cannot be escaped
|
2260 | str2bool(value.as_ref(), visitor)
|
2261 | }
|
2262 |
|
2263 | #[cfg (not(feature = "encoding" ))]
|
2264 | {
|
2265 | // No need to unescape because valid boolean representations cannot be escaped
|
2266 | match value {
|
2267 | b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
|
2268 | visitor.visit_bool(true)
|
2269 | }
|
2270 | b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
|
2271 | visitor.visit_bool(false)
|
2272 | }
|
2273 | e: &[u8] => Err(DeError::InvalidBoolean(decoder.decode(bytes:e)?.into())),
|
2274 | }
|
2275 | }
|
2276 | }
|
2277 |
|
2278 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
2279 |
|
2280 | /// A structure that deserializes XML into Rust values.
|
2281 | pub struct Deserializer<'de, R, E: EntityResolver = NoEntityResolver>
|
2282 | where
|
2283 | R: XmlRead<'de>,
|
2284 | {
|
2285 | /// An XML reader that streams events into this deserializer
|
2286 | reader: XmlReader<'de, R, E>,
|
2287 |
|
2288 | /// When deserializing sequences sometimes we have to skip unwanted events.
|
2289 | /// That events should be stored and then replayed. This is a replay buffer,
|
2290 | /// that streams events while not empty. When it exhausted, events will
|
2291 | /// requested from [`Self::reader`].
|
2292 | #[cfg (feature = "overlapped-lists" )]
|
2293 | read: VecDeque<DeEvent<'de>>,
|
2294 | /// When deserializing sequences sometimes we have to skip events, because XML
|
2295 | /// is tolerant to elements order and even if in the XSD order is strictly
|
2296 | /// specified (using `xs:sequence`) most of XML parsers allows order violations.
|
2297 | /// That means, that elements, forming a sequence, could be overlapped with
|
2298 | /// other elements, do not related to that sequence.
|
2299 | ///
|
2300 | /// In order to support this, deserializer will scan events and skip unwanted
|
2301 | /// events, store them here. After call [`Self::start_replay()`] all events
|
2302 | /// moved from this to [`Self::read`].
|
2303 | #[cfg (feature = "overlapped-lists" )]
|
2304 | write: VecDeque<DeEvent<'de>>,
|
2305 | /// Maximum number of events that can be skipped when processing sequences
|
2306 | /// that occur out-of-order. This field is used to prevent potential
|
2307 | /// denial-of-service (DoS) attacks which could cause infinite memory
|
2308 | /// consumption when parsing a very large amount of XML into a sequence field.
|
2309 | #[cfg (feature = "overlapped-lists" )]
|
2310 | limit: Option<NonZeroUsize>,
|
2311 |
|
2312 | #[cfg (not(feature = "overlapped-lists" ))]
|
2313 | peek: Option<DeEvent<'de>>,
|
2314 | }
|
2315 |
|
2316 | impl<'de, R, E> Deserializer<'de, R, E>
|
2317 | where
|
2318 | R: XmlRead<'de>,
|
2319 | E: EntityResolver,
|
2320 | {
|
2321 | /// Create an XML deserializer from one of the possible quick_xml input sources.
|
2322 | ///
|
2323 | /// Typically it is more convenient to use one of these methods instead:
|
2324 | ///
|
2325 | /// - [`Deserializer::from_str`]
|
2326 | /// - [`Deserializer::from_reader`]
|
2327 | fn new(reader: R, entity_resolver: E) -> Self {
|
2328 | Self {
|
2329 | reader: XmlReader::new(reader, entity_resolver),
|
2330 |
|
2331 | #[cfg (feature = "overlapped-lists" )]
|
2332 | read: VecDeque::new(),
|
2333 | #[cfg (feature = "overlapped-lists" )]
|
2334 | write: VecDeque::new(),
|
2335 | #[cfg (feature = "overlapped-lists" )]
|
2336 | limit: None,
|
2337 |
|
2338 | #[cfg (not(feature = "overlapped-lists" ))]
|
2339 | peek: None,
|
2340 | }
|
2341 | }
|
2342 |
|
2343 | /// Set the maximum number of events that could be skipped during deserialization
|
2344 | /// of sequences.
|
2345 | ///
|
2346 | /// If `<element>` contains more than specified nested elements, `$text` or
|
2347 | /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
|
2348 | /// deserialization of sequence field (any type that uses [`deserialize_seq`]
|
2349 | /// for the deserialization, for example, `Vec<T>`).
|
2350 | ///
|
2351 | /// This method can be used to prevent a [DoS] attack and infinite memory
|
2352 | /// consumption when parsing a very large XML to a sequence field.
|
2353 | ///
|
2354 | /// It is strongly recommended to set limit to some value when you parse data
|
2355 | /// from untrusted sources. You should choose a value that your typical XMLs
|
2356 | /// can have _between_ different elements that corresponds to the same sequence.
|
2357 | ///
|
2358 | /// # Examples
|
2359 | ///
|
2360 | /// Let's imagine, that we deserialize such structure:
|
2361 | /// ```
|
2362 | /// struct List {
|
2363 | /// item: Vec<()>,
|
2364 | /// }
|
2365 | /// ```
|
2366 | ///
|
2367 | /// The XML that we try to parse look like this:
|
2368 | /// ```xml
|
2369 | /// <any-name>
|
2370 | /// <item/>
|
2371 | /// <!-- Bufferization starts at this point -->
|
2372 | /// <another-item>
|
2373 | /// <some-element>with text</some-element>
|
2374 | /// <yet-another-element/>
|
2375 | /// </another-item>
|
2376 | /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
|
2377 | /// <item/>
|
2378 | /// <!-- There is nothing to buffer, because elements follows each other -->
|
2379 | /// <item/>
|
2380 | /// </any-name>
|
2381 | /// ```
|
2382 | ///
|
2383 | /// There, when we deserialize the `item` field, we need to buffer 7 events,
|
2384 | /// before we can deserialize the second `<item/>`:
|
2385 | ///
|
2386 | /// - `<another-item>`
|
2387 | /// - `<some-element>`
|
2388 | /// - `$text(with text)`
|
2389 | /// - `</some-element>`
|
2390 | /// - `<yet-another-element/>` (virtual start event)
|
2391 | /// - `<yet-another-element/>` (virtual end event)
|
2392 | /// - `</another-item>`
|
2393 | ///
|
2394 | /// Note, that `<yet-another-element/>` internally represented as 2 events:
|
2395 | /// one for the start tag and one for the end tag. In the future this can be
|
2396 | /// eliminated, but for now we use [auto-expanding feature] of a reader,
|
2397 | /// because this simplifies deserializer code.
|
2398 | ///
|
2399 | /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
|
2400 | /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
|
2401 | /// [auto-expanding feature]: Reader::expand_empty_elements
|
2402 | #[cfg (feature = "overlapped-lists" )]
|
2403 | pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
|
2404 | self.limit = limit;
|
2405 | self
|
2406 | }
|
2407 |
|
2408 | #[cfg (feature = "overlapped-lists" )]
|
2409 | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
|
2410 | if self.read.is_empty() {
|
2411 | self.read.push_front(self.reader.next()?);
|
2412 | }
|
2413 | if let Some(event) = self.read.front() {
|
2414 | return Ok(event);
|
2415 | }
|
2416 | // SAFETY: `self.read` was filled in the code above.
|
2417 | // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
|
2418 | // if unsafe code will be allowed
|
2419 | unreachable!()
|
2420 | }
|
2421 | #[cfg (not(feature = "overlapped-lists" ))]
|
2422 | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
|
2423 | if self.peek.is_none() {
|
2424 | self.peek = Some(self.reader.next()?);
|
2425 | }
|
2426 | match self.peek.as_ref() {
|
2427 | Some(v) => Ok(v),
|
2428 | // SAFETY: a `None` variant for `self.peek` would have been replaced
|
2429 | // by a `Some` variant in the code above.
|
2430 | // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
|
2431 | // if unsafe code will be allowed
|
2432 | None => unreachable!(),
|
2433 | }
|
2434 | }
|
2435 |
|
2436 | fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
|
2437 | // Replay skipped or peeked events
|
2438 | #[cfg (feature = "overlapped-lists" )]
|
2439 | if let Some(event) = self.read.pop_front() {
|
2440 | return Ok(event);
|
2441 | }
|
2442 | #[cfg (not(feature = "overlapped-lists" ))]
|
2443 | if let Some(e) = self.peek.take() {
|
2444 | return Ok(e);
|
2445 | }
|
2446 | self.reader.next()
|
2447 | }
|
2448 |
|
2449 | /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
|
2450 | /// should be replayed after calling [`Self::start_replay()`].
|
2451 | #[cfg (feature = "overlapped-lists" )]
|
2452 | #[inline ]
|
2453 | #[must_use = "returned checkpoint should be used in `start_replay`" ]
|
2454 | fn skip_checkpoint(&self) -> usize {
|
2455 | self.write.len()
|
2456 | }
|
2457 |
|
2458 | /// Extracts XML tree of events from and stores them in the skipped events
|
2459 | /// buffer from which they can be retrieved later. You MUST call
|
2460 | /// [`Self::start_replay()`] after calling this to give access to the skipped
|
2461 | /// events and release internal buffers.
|
2462 | #[cfg (feature = "overlapped-lists" )]
|
2463 | fn skip(&mut self) -> Result<(), DeError> {
|
2464 | let event = self.next()?;
|
2465 | self.skip_event(event)?;
|
2466 | match self.write.back() {
|
2467 | // Skip all subtree, if we skip a start event
|
2468 | Some(DeEvent::Start(e)) => {
|
2469 | let end = e.name().as_ref().to_owned();
|
2470 | let mut depth = 0;
|
2471 | loop {
|
2472 | let event = self.next()?;
|
2473 | match event {
|
2474 | DeEvent::Start(ref e) if e.name().as_ref() == end => {
|
2475 | self.skip_event(event)?;
|
2476 | depth += 1;
|
2477 | }
|
2478 | DeEvent::End(ref e) if e.name().as_ref() == end => {
|
2479 | self.skip_event(event)?;
|
2480 | if depth == 0 {
|
2481 | break;
|
2482 | }
|
2483 | depth -= 1;
|
2484 | }
|
2485 | DeEvent::Eof => {
|
2486 | self.skip_event(event)?;
|
2487 | break;
|
2488 | }
|
2489 | _ => self.skip_event(event)?,
|
2490 | }
|
2491 | }
|
2492 | }
|
2493 | _ => (),
|
2494 | }
|
2495 | Ok(())
|
2496 | }
|
2497 |
|
2498 | #[cfg (feature = "overlapped-lists" )]
|
2499 | #[inline ]
|
2500 | fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
|
2501 | if let Some(max) = self.limit {
|
2502 | if self.write.len() >= max.get() {
|
2503 | return Err(DeError::TooManyEvents(max));
|
2504 | }
|
2505 | }
|
2506 | self.write.push_back(event);
|
2507 | Ok(())
|
2508 | }
|
2509 |
|
2510 | /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
|
2511 | /// skip buffer to [`Self::read`] buffer.
|
2512 | ///
|
2513 | /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
|
2514 | /// return events that was skipped previously by calling [`Self::skip()`],
|
2515 | /// and only when all that events will be consumed, the deserializer starts
|
2516 | /// to drain events from underlying reader.
|
2517 | ///
|
2518 | /// This method MUST be called if any number of [`Self::skip()`] was called
|
2519 | /// after [`Self::new()`] or `start_replay()` or you'll lost events.
|
2520 | #[cfg (feature = "overlapped-lists" )]
|
2521 | fn start_replay(&mut self, checkpoint: usize) {
|
2522 | if checkpoint == 0 {
|
2523 | self.write.append(&mut self.read);
|
2524 | std::mem::swap(&mut self.read, &mut self.write);
|
2525 | } else {
|
2526 | let mut read = self.write.split_off(checkpoint);
|
2527 | read.append(&mut self.read);
|
2528 | self.read = read;
|
2529 | }
|
2530 | }
|
2531 |
|
2532 | #[inline ]
|
2533 | fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
|
2534 | self.read_string_impl(true)
|
2535 | }
|
2536 |
|
2537 | /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
|
2538 | /// events, merge them into one string. If there are no such events, returns
|
2539 | /// an empty string.
|
2540 | ///
|
2541 | /// If `allow_start` is `false`, then only text events is consumed, for other
|
2542 | /// events an error is returned (see table below).
|
2543 | ///
|
2544 | /// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
|
2545 | /// and all other content is skipped until corresponding end tag will be consumed.
|
2546 | ///
|
2547 | /// # Handling events
|
2548 | ///
|
2549 | /// The table below shows how events is handled by this method:
|
2550 | ///
|
2551 | /// |Event |XML |Handling
|
2552 | /// |------------------|---------------------------|----------------------------------------
|
2553 | /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
|
2554 | /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
|
2555 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
|
2556 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
2557 | ///
|
2558 | /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
|
2559 | ///
|
2560 | /// |Event |XML |Handling
|
2561 | /// |------------------|---------------------------|----------------------------------------------------------------------------------
|
2562 | /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
|
2563 | /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice, if close tag matched the open one
|
2564 | /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
|
2565 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
|
2566 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
2567 | ///
|
2568 | /// [`Text`]: Event::Text
|
2569 | /// [`CData`]: Event::CData
|
2570 | fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
|
2571 | match self.next()? {
|
2572 | DeEvent::Text(e) => Ok(e.text),
|
2573 | // allow one nested level
|
2574 | DeEvent::Start(e) if allow_start => match self.next()? {
|
2575 | DeEvent::Text(t) => {
|
2576 | self.read_to_end(e.name())?;
|
2577 | Ok(t.text)
|
2578 | }
|
2579 | DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
|
2580 | // We can get End event in case of `<tag></tag>` or `<tag/>` input
|
2581 | // Return empty text in that case
|
2582 | DeEvent::End(end) if end.name() == e.name() => Ok("" .into()),
|
2583 | DeEvent::End(end) => Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned())),
|
2584 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
2585 | },
|
2586 | DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
|
2587 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
2588 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
2589 | }
|
2590 | }
|
2591 |
|
2592 | /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
|
2593 | /// dropped. This method should be called after [`Self::next()`]
|
2594 | #[cfg (feature = "overlapped-lists" )]
|
2595 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
2596 | let mut depth = 0;
|
2597 | loop {
|
2598 | match self.read.pop_front() {
|
2599 | Some(DeEvent::Start(e)) if e.name() == name => {
|
2600 | depth += 1;
|
2601 | }
|
2602 | Some(DeEvent::End(e)) if e.name() == name => {
|
2603 | if depth == 0 {
|
2604 | break;
|
2605 | }
|
2606 | depth -= 1;
|
2607 | }
|
2608 |
|
2609 | // Drop all other skipped events
|
2610 | Some(_) => continue,
|
2611 |
|
2612 | // If we do not have skipped events, use effective reading that will
|
2613 | // not allocate memory for events
|
2614 | None => {
|
2615 | // We should close all opened tags, because we could buffer
|
2616 | // Start events, but not the corresponding End events. So we
|
2617 | // keep reading events until we exit all nested tags.
|
2618 | // `read_to_end()` will return an error if an Eof was encountered
|
2619 | // preliminary (in case of malformed XML).
|
2620 | //
|
2621 | // <tag><tag></tag></tag>
|
2622 | // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
|
2623 | // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
|
2624 | // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
|
2625 | loop {
|
2626 | self.reader.read_to_end(name)?;
|
2627 | if depth == 0 {
|
2628 | break;
|
2629 | }
|
2630 | depth -= 1;
|
2631 | }
|
2632 | break;
|
2633 | }
|
2634 | }
|
2635 | }
|
2636 | Ok(())
|
2637 | }
|
2638 | #[cfg (not(feature = "overlapped-lists" ))]
|
2639 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
2640 | // First one might be in self.peek
|
2641 | match self.next()? {
|
2642 | DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
|
2643 | DeEvent::End(e) if e.name() == name => return Ok(()),
|
2644 | _ => (),
|
2645 | }
|
2646 | self.reader.read_to_end(name)
|
2647 | }
|
2648 | }
|
2649 |
|
2650 | impl<'de> Deserializer<'de, SliceReader<'de>> {
|
2651 | /// Create new deserializer that will borrow data from the specified string.
|
2652 | ///
|
2653 | /// Deserializer created with this method will not resolve custom entities.
|
2654 | #[allow (clippy::should_implement_trait)]
|
2655 | pub fn from_str(source: &'de str) -> Self {
|
2656 | Self::from_str_with_resolver(source, entity_resolver:NoEntityResolver)
|
2657 | }
|
2658 | }
|
2659 |
|
2660 | impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
|
2661 | where
|
2662 | E: EntityResolver,
|
2663 | {
|
2664 | /// Create new deserializer that will borrow data from the specified string
|
2665 | /// and use specified entity resolver.
|
2666 | pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
|
2667 | let mut reader: Reader<&[u8]> = Reader::from_str(source);
|
2668 | reader.expand_empty_elements(val:true);
|
2669 |
|
2670 | Self::new(
|
2671 | reader:SliceReader {
|
2672 | reader,
|
2673 | start_trimmer: StartTrimmer::default(),
|
2674 | },
|
2675 | entity_resolver,
|
2676 | )
|
2677 | }
|
2678 | }
|
2679 |
|
2680 | impl<'de, R> Deserializer<'de, IoReader<R>>
|
2681 | where
|
2682 | R: BufRead,
|
2683 | {
|
2684 | /// Create new deserializer that will copy data from the specified reader
|
2685 | /// into internal buffer.
|
2686 | ///
|
2687 | /// If you already have a string use [`Self::from_str`] instead, because it
|
2688 | /// will borrow instead of copy. If you have `&[u8]` which is known to represent
|
2689 | /// UTF-8, you can decode it first before using [`from_str`].
|
2690 | ///
|
2691 | /// Deserializer created with this method will not resolve custom entities.
|
2692 | pub fn from_reader(reader: R) -> Self {
|
2693 | Self::with_resolver(reader, entity_resolver:NoEntityResolver)
|
2694 | }
|
2695 | }
|
2696 |
|
2697 | impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
|
2698 | where
|
2699 | R: BufRead,
|
2700 | E: EntityResolver,
|
2701 | {
|
2702 | /// Create new deserializer that will copy data from the specified reader
|
2703 | /// into internal buffer and use specified entity resolver.
|
2704 | ///
|
2705 | /// If you already have a string use [`Self::from_str`] instead, because it
|
2706 | /// will borrow instead of copy. If you have `&[u8]` which is known to represent
|
2707 | /// UTF-8, you can decode it first before using [`from_str`].
|
2708 | pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
|
2709 | let mut reader: Reader = Reader::from_reader(reader);
|
2710 | reader.expand_empty_elements(val:true);
|
2711 |
|
2712 | Self::new(
|
2713 | reader:IoReader {
|
2714 | reader,
|
2715 | start_trimmer: StartTrimmer::default(),
|
2716 | buf: Vec::new(),
|
2717 | },
|
2718 | entity_resolver,
|
2719 | )
|
2720 | }
|
2721 | }
|
2722 |
|
2723 | impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
|
2724 | where
|
2725 | R: XmlRead<'de>,
|
2726 | E: EntityResolver,
|
2727 | {
|
2728 | type Error = DeError;
|
2729 |
|
2730 | deserialize_primitives!();
|
2731 |
|
2732 | fn deserialize_struct<V>(
|
2733 | self,
|
2734 | _name: &'static str,
|
2735 | fields: &'static [&'static str],
|
2736 | visitor: V,
|
2737 | ) -> Result<V::Value, DeError>
|
2738 | where
|
2739 | V: Visitor<'de>,
|
2740 | {
|
2741 | match self.next()? {
|
2742 | DeEvent::Start(e) => {
|
2743 | let name = e.name().as_ref().to_vec();
|
2744 | let map = map::MapAccess::new(self, e, fields)?;
|
2745 | let value = visitor.visit_map(map)?;
|
2746 | self.read_to_end(QName(&name))?;
|
2747 | Ok(value)
|
2748 | }
|
2749 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
2750 | DeEvent::Text(_) => Err(DeError::ExpectedStart),
|
2751 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
2752 | }
|
2753 | }
|
2754 |
|
2755 | /// Unit represented in XML as a `xs:element` or text/CDATA content.
|
2756 | /// Any content inside `xs:element` is ignored and skipped.
|
2757 | ///
|
2758 | /// Produces unit struct from any of following inputs:
|
2759 | /// - any `<tag ...>...</tag>`
|
2760 | /// - any `<tag .../>`
|
2761 | /// - any consequent text / CDATA content (can consist of several parts
|
2762 | /// delimited by comments and processing instructions)
|
2763 | ///
|
2764 | /// # Events handling
|
2765 | ///
|
2766 | /// |Event |XML |Handling
|
2767 | /// |------------------|---------------------------|-------------------------------------------
|
2768 | /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
|
2769 | /// |[`DeEvent::End`] |`</tag>` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
|
2770 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
|
2771 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
2772 | fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2773 | where
|
2774 | V: Visitor<'de>,
|
2775 | {
|
2776 | match self.next()? {
|
2777 | DeEvent::Start(s) => {
|
2778 | self.read_to_end(s.name())?;
|
2779 | visitor.visit_unit()
|
2780 | }
|
2781 | DeEvent::Text(_) => visitor.visit_unit(),
|
2782 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
2783 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
2784 | }
|
2785 | }
|
2786 |
|
2787 | fn deserialize_enum<V>(
|
2788 | self,
|
2789 | _name: &'static str,
|
2790 | _variants: &'static [&'static str],
|
2791 | visitor: V,
|
2792 | ) -> Result<V::Value, DeError>
|
2793 | where
|
2794 | V: Visitor<'de>,
|
2795 | {
|
2796 | visitor.visit_enum(var::EnumAccess::new(self))
|
2797 | }
|
2798 |
|
2799 | fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2800 | where
|
2801 | V: Visitor<'de>,
|
2802 | {
|
2803 | visitor.visit_seq(self)
|
2804 | }
|
2805 |
|
2806 | fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2807 | where
|
2808 | V: Visitor<'de>,
|
2809 | {
|
2810 | self.deserialize_struct("" , &[], visitor)
|
2811 | }
|
2812 |
|
2813 | fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2814 | where
|
2815 | V: Visitor<'de>,
|
2816 | {
|
2817 | deserialize_option!(self, self, visitor)
|
2818 | }
|
2819 |
|
2820 | /// Always call `visitor.visit_unit()` because returned value ignored in any case.
|
2821 | ///
|
2822 | /// This method consumes any single [event][DeEvent] except the [`Start`]
|
2823 | /// event, in which case all events up to and including corresponding [`End`]
|
2824 | /// event will be consumed.
|
2825 | ///
|
2826 | /// This method returns error if current event is [`End`] or [`Eof`].
|
2827 | ///
|
2828 | /// [`Start`]: DeEvent::Start
|
2829 | /// [`End`]: DeEvent::End
|
2830 | /// [`Eof`]: DeEvent::Eof
|
2831 | fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2832 | where
|
2833 | V: Visitor<'de>,
|
2834 | {
|
2835 | match self.next()? {
|
2836 | DeEvent::Start(e) => self.read_to_end(e.name())?,
|
2837 | DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
2838 | DeEvent::Eof => return Err(DeError::UnexpectedEof),
|
2839 | _ => (),
|
2840 | }
|
2841 | visitor.visit_unit()
|
2842 | }
|
2843 |
|
2844 | fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
|
2845 | where
|
2846 | V: Visitor<'de>,
|
2847 | {
|
2848 | match self.peek()? {
|
2849 | DeEvent::Start(_) => self.deserialize_map(visitor),
|
2850 | // Redirect to deserialize_unit in order to consume an event and return an appropriate error
|
2851 | DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
|
2852 | _ => self.deserialize_string(visitor),
|
2853 | }
|
2854 | }
|
2855 | }
|
2856 |
|
2857 | /// An accessor to sequence elements forming a value for top-level sequence of XML
|
2858 | /// elements.
|
2859 | ///
|
2860 | /// Technically, multiple top-level elements violates XML rule of only one top-level
|
2861 | /// element, but we consider this as several concatenated XML documents.
|
2862 | impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
|
2863 | where
|
2864 | R: XmlRead<'de>,
|
2865 | E: EntityResolver,
|
2866 | {
|
2867 | type Error = DeError;
|
2868 |
|
2869 | fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
|
2870 | where
|
2871 | T: DeserializeSeed<'de>,
|
2872 | {
|
2873 | match self.peek()? {
|
2874 | DeEvent::Eof => Ok(None),
|
2875 |
|
2876 | // Start(tag), End(tag), Text
|
2877 | _ => seed.deserialize(&mut **self).map(op:Some),
|
2878 | }
|
2879 | }
|
2880 | }
|
2881 |
|
2882 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
2883 |
|
2884 | /// Helper struct that contains a state for an algorithm of converting events
|
2885 | /// from raw events to semi-trimmed events that is independent from a way of
|
2886 | /// events reading.
|
2887 | struct StartTrimmer {
|
2888 | /// If `true`, then leading whitespace will be removed from next returned
|
2889 | /// [`Event::Text`]. This field is set to `true` after reading each event
|
2890 | /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
|
2891 | /// read right after them does not trimmed.
|
2892 | trim_start: bool,
|
2893 | }
|
2894 |
|
2895 | impl StartTrimmer {
|
2896 | /// Converts raw reader's event into a payload event.
|
2897 | /// Returns `None`, if event should be skipped.
|
2898 | #[inline (always)]
|
2899 | fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
|
2900 | let (event, trim_next_event) = match event {
|
2901 | Event::DocType(e) => (PayloadEvent::DocType(e), true),
|
2902 | Event::Start(e) => (PayloadEvent::Start(e), true),
|
2903 | Event::End(e) => (PayloadEvent::End(e), true),
|
2904 | Event::Eof => (PayloadEvent::Eof, true),
|
2905 |
|
2906 | // Do not trim next text event after Text or CDATA event
|
2907 | Event::CData(e) => (PayloadEvent::CData(e), false),
|
2908 | Event::Text(mut e) => {
|
2909 | // If event is empty after trimming, skip it
|
2910 | if self.trim_start && e.inplace_trim_start() {
|
2911 | return None;
|
2912 | }
|
2913 | (PayloadEvent::Text(e), false)
|
2914 | }
|
2915 |
|
2916 | _ => return None,
|
2917 | };
|
2918 | self.trim_start = trim_next_event;
|
2919 | Some(event)
|
2920 | }
|
2921 | }
|
2922 |
|
2923 | impl Default for StartTrimmer {
|
2924 | #[inline ]
|
2925 | fn default() -> Self {
|
2926 | Self { trim_start: true }
|
2927 | }
|
2928 | }
|
2929 |
|
2930 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
2931 |
|
2932 | /// Trait used by the deserializer for iterating over input. This is manually
|
2933 | /// "specialized" for iterating over `&[u8]`.
|
2934 | ///
|
2935 | /// You do not need to implement this trait, it is needed to abstract from
|
2936 | /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
|
2937 | /// deserializer
|
2938 | pub trait XmlRead<'i> {
|
2939 | /// Return an input-borrowing event.
|
2940 | fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
|
2941 |
|
2942 | /// Skips until end element is found. Unlike `next()` it will not allocate
|
2943 | /// when it cannot satisfy the lifetime.
|
2944 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
|
2945 |
|
2946 | /// A copy of the reader's decoder used to decode strings.
|
2947 | fn decoder(&self) -> Decoder;
|
2948 | }
|
2949 |
|
2950 | /// XML input source that reads from a std::io input stream.
|
2951 | ///
|
2952 | /// You cannot create it, it is created automatically when you call
|
2953 | /// [`Deserializer::from_reader`]
|
2954 | pub struct IoReader<R: BufRead> {
|
2955 | reader: Reader<R>,
|
2956 | start_trimmer: StartTrimmer,
|
2957 | buf: Vec<u8>,
|
2958 | }
|
2959 |
|
2960 | impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
|
2961 | fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
|
2962 | loop {
|
2963 | self.buf.clear();
|
2964 |
|
2965 | let event: Event<'_> = self.reader.read_event_into(&mut self.buf)?;
|
2966 | if let Some(event: PayloadEvent<'_>) = self.start_trimmer.trim(event) {
|
2967 | return Ok(event.into_owned());
|
2968 | }
|
2969 | }
|
2970 | }
|
2971 |
|
2972 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
2973 | match self.reader.read_to_end_into(end:name, &mut self.buf) {
|
2974 | Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
|
2975 | Err(e: Error) => Err(e.into()),
|
2976 | Ok(_) => Ok(()),
|
2977 | }
|
2978 | }
|
2979 |
|
2980 | fn decoder(&self) -> Decoder {
|
2981 | self.reader.decoder()
|
2982 | }
|
2983 | }
|
2984 |
|
2985 | /// XML input source that reads from a slice of bytes and can borrow from it.
|
2986 | ///
|
2987 | /// You cannot create it, it is created automatically when you call
|
2988 | /// [`Deserializer::from_str`].
|
2989 | pub struct SliceReader<'de> {
|
2990 | reader: Reader<&'de [u8]>,
|
2991 | start_trimmer: StartTrimmer,
|
2992 | }
|
2993 |
|
2994 | impl<'de> XmlRead<'de> for SliceReader<'de> {
|
2995 | fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
|
2996 | loop {
|
2997 | let event: Event<'de> = self.reader.read_event()?;
|
2998 | if let Some(event: PayloadEvent<'de>) = self.start_trimmer.trim(event) {
|
2999 | return Ok(event);
|
3000 | }
|
3001 | }
|
3002 | }
|
3003 |
|
3004 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
3005 | match self.reader.read_to_end(name) {
|
3006 | Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
|
3007 | Err(e: Error) => Err(e.into()),
|
3008 | Ok(_) => Ok(()),
|
3009 | }
|
3010 | }
|
3011 |
|
3012 | fn decoder(&self) -> Decoder {
|
3013 | self.reader.decoder()
|
3014 | }
|
3015 | }
|
3016 |
|
3017 | #[cfg (test)]
|
3018 | mod tests {
|
3019 | use super::*;
|
3020 | use pretty_assertions::assert_eq;
|
3021 |
|
3022 | #[cfg (feature = "overlapped-lists" )]
|
3023 | mod skip {
|
3024 | use super::*;
|
3025 | use crate::de::DeEvent::*;
|
3026 | use crate::events::BytesEnd;
|
3027 | use pretty_assertions::assert_eq;
|
3028 |
|
3029 | /// Checks that `peek()` and `read()` behaves correctly after `skip()`
|
3030 | #[test ]
|
3031 | fn read_and_peek() {
|
3032 | let mut de = Deserializer::from_str(
|
3033 | r#"
|
3034 | <root>
|
3035 | <inner>
|
3036 | text
|
3037 | <inner/>
|
3038 | </inner>
|
3039 | <next/>
|
3040 | <target/>
|
3041 | </root>
|
3042 | "# ,
|
3043 | );
|
3044 |
|
3045 | // Initial conditions - both are empty
|
3046 | assert_eq!(de.read, vec![]);
|
3047 | assert_eq!(de.write, vec![]);
|
3048 |
|
3049 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
3050 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner" )));
|
3051 |
|
3052 | // Mark that start_replay() should begin replay from this point
|
3053 | let checkpoint = de.skip_checkpoint();
|
3054 | assert_eq!(checkpoint, 0);
|
3055 |
|
3056 | // Should skip first <inner> tree
|
3057 | de.skip().unwrap();
|
3058 | assert_eq!(de.read, vec![]);
|
3059 | assert_eq!(
|
3060 | de.write,
|
3061 | vec![
|
3062 | Start(BytesStart::new("inner" )),
|
3063 | Text("text" .into()),
|
3064 | Start(BytesStart::new("inner" )),
|
3065 | End(BytesEnd::new("inner" )),
|
3066 | End(BytesEnd::new("inner" )),
|
3067 | ]
|
3068 | );
|
3069 |
|
3070 | // Consume <next/>. Now unconsumed XML looks like:
|
3071 | //
|
3072 | // <inner>
|
3073 | // text
|
3074 | // <inner/>
|
3075 | // </inner>
|
3076 | // <target/>
|
3077 | // </root>
|
3078 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("next" )));
|
3079 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("next" )));
|
3080 |
|
3081 | // We finish writing. Next call to `next()` should start replay that messages:
|
3082 | //
|
3083 | // <inner>
|
3084 | // text
|
3085 | // <inner/>
|
3086 | // </inner>
|
3087 | //
|
3088 | // and after that stream that messages:
|
3089 | //
|
3090 | // <target/>
|
3091 | // </root>
|
3092 | de.start_replay(checkpoint);
|
3093 | assert_eq!(
|
3094 | de.read,
|
3095 | vec![
|
3096 | Start(BytesStart::new("inner" )),
|
3097 | Text("text" .into()),
|
3098 | Start(BytesStart::new("inner" )),
|
3099 | End(BytesEnd::new("inner" )),
|
3100 | End(BytesEnd::new("inner" )),
|
3101 | ]
|
3102 | );
|
3103 | assert_eq!(de.write, vec![]);
|
3104 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
3105 |
|
3106 | // Mark that start_replay() should begin replay from this point
|
3107 | let checkpoint = de.skip_checkpoint();
|
3108 | assert_eq!(checkpoint, 0);
|
3109 |
|
3110 | // Skip `$text` node and consume <inner/> after it
|
3111 | de.skip().unwrap();
|
3112 | assert_eq!(
|
3113 | de.read,
|
3114 | vec![
|
3115 | Start(BytesStart::new("inner" )),
|
3116 | End(BytesEnd::new("inner" )),
|
3117 | End(BytesEnd::new("inner" )),
|
3118 | ]
|
3119 | );
|
3120 | assert_eq!(
|
3121 | de.write,
|
3122 | vec![
|
3123 | // This comment here to keep the same formatting of both arrays
|
3124 | // otherwise rustfmt suggest one-line it
|
3125 | Text("text" .into()),
|
3126 | ]
|
3127 | );
|
3128 |
|
3129 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
3130 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
3131 |
|
3132 | // We finish writing. Next call to `next()` should start replay messages:
|
3133 | //
|
3134 | // text
|
3135 | // </inner>
|
3136 | //
|
3137 | // and after that stream that messages:
|
3138 | //
|
3139 | // <target/>
|
3140 | // </root>
|
3141 | de.start_replay(checkpoint);
|
3142 | assert_eq!(
|
3143 | de.read,
|
3144 | vec![
|
3145 | // This comment here to keep the same formatting as others
|
3146 | // otherwise rustfmt suggest one-line it
|
3147 | Text("text" .into()),
|
3148 | End(BytesEnd::new("inner" )),
|
3149 | ]
|
3150 | );
|
3151 | assert_eq!(de.write, vec![]);
|
3152 | assert_eq!(de.next().unwrap(), Text("text" .into()));
|
3153 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
3154 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target" )));
|
3155 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target" )));
|
3156 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
3157 | assert_eq!(de.next().unwrap(), Eof);
|
3158 | }
|
3159 |
|
3160 | /// Checks that `read_to_end()` behaves correctly after `skip()`
|
3161 | #[test ]
|
3162 | fn read_to_end() {
|
3163 | let mut de = Deserializer::from_str(
|
3164 | r#"
|
3165 | <root>
|
3166 | <skip>
|
3167 | text
|
3168 | <skip/>
|
3169 | </skip>
|
3170 | <target>
|
3171 | <target/>
|
3172 | </target>
|
3173 | </root>
|
3174 | "# ,
|
3175 | );
|
3176 |
|
3177 | // Initial conditions - both are empty
|
3178 | assert_eq!(de.read, vec![]);
|
3179 | assert_eq!(de.write, vec![]);
|
3180 |
|
3181 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
3182 |
|
3183 | // Mark that start_replay() should begin replay from this point
|
3184 | let checkpoint = de.skip_checkpoint();
|
3185 | assert_eq!(checkpoint, 0);
|
3186 |
|
3187 | // Skip the <skip> tree
|
3188 | de.skip().unwrap();
|
3189 | assert_eq!(de.read, vec![]);
|
3190 | assert_eq!(
|
3191 | de.write,
|
3192 | vec![
|
3193 | Start(BytesStart::new("skip" )),
|
3194 | Text("text" .into()),
|
3195 | Start(BytesStart::new("skip" )),
|
3196 | End(BytesEnd::new("skip" )),
|
3197 | End(BytesEnd::new("skip" )),
|
3198 | ]
|
3199 | );
|
3200 |
|
3201 | // Drop all events that represents <target> tree. Now unconsumed XML looks like:
|
3202 | //
|
3203 | // <skip>
|
3204 | // text
|
3205 | // <skip/>
|
3206 | // </skip>
|
3207 | // </root>
|
3208 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target" )));
|
3209 | de.read_to_end(QName(b"target" )).unwrap();
|
3210 | assert_eq!(de.read, vec![]);
|
3211 | assert_eq!(
|
3212 | de.write,
|
3213 | vec![
|
3214 | Start(BytesStart::new("skip" )),
|
3215 | Text("text" .into()),
|
3216 | Start(BytesStart::new("skip" )),
|
3217 | End(BytesEnd::new("skip" )),
|
3218 | End(BytesEnd::new("skip" )),
|
3219 | ]
|
3220 | );
|
3221 |
|
3222 | // We finish writing. Next call to `next()` should start replay that messages:
|
3223 | //
|
3224 | // <skip>
|
3225 | // text
|
3226 | // <skip/>
|
3227 | // </skip>
|
3228 | //
|
3229 | // and after that stream that messages:
|
3230 | //
|
3231 | // </root>
|
3232 | de.start_replay(checkpoint);
|
3233 | assert_eq!(
|
3234 | de.read,
|
3235 | vec![
|
3236 | Start(BytesStart::new("skip" )),
|
3237 | Text("text" .into()),
|
3238 | Start(BytesStart::new("skip" )),
|
3239 | End(BytesEnd::new("skip" )),
|
3240 | End(BytesEnd::new("skip" )),
|
3241 | ]
|
3242 | );
|
3243 | assert_eq!(de.write, vec![]);
|
3244 |
|
3245 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip" )));
|
3246 | de.read_to_end(QName(b"skip" )).unwrap();
|
3247 |
|
3248 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
3249 | assert_eq!(de.next().unwrap(), Eof);
|
3250 | }
|
3251 |
|
3252 | /// Checks that replay replayes only part of events
|
3253 | /// Test for https://github.com/tafia/quick-xml/issues/435
|
3254 | #[test ]
|
3255 | fn partial_replay() {
|
3256 | let mut de = Deserializer::from_str(
|
3257 | r#"
|
3258 | <root>
|
3259 | <skipped-1/>
|
3260 | <skipped-2/>
|
3261 | <inner>
|
3262 | <skipped-3/>
|
3263 | <skipped-4/>
|
3264 | <target-2/>
|
3265 | </inner>
|
3266 | <target-1/>
|
3267 | </root>
|
3268 | "# ,
|
3269 | );
|
3270 |
|
3271 | // Initial conditions - both are empty
|
3272 | assert_eq!(de.read, vec![]);
|
3273 | assert_eq!(de.write, vec![]);
|
3274 |
|
3275 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
3276 |
|
3277 | // start_replay() should start replay from this point
|
3278 | let checkpoint1 = de.skip_checkpoint();
|
3279 | assert_eq!(checkpoint1, 0);
|
3280 |
|
3281 | // Should skip first and second <skipped-N/> elements
|
3282 | de.skip().unwrap(); // skipped-1
|
3283 | de.skip().unwrap(); // skipped-2
|
3284 | assert_eq!(de.read, vec![]);
|
3285 | assert_eq!(
|
3286 | de.write,
|
3287 | vec![
|
3288 | Start(BytesStart::new("skipped-1" )),
|
3289 | End(BytesEnd::new("skipped-1" )),
|
3290 | Start(BytesStart::new("skipped-2" )),
|
3291 | End(BytesEnd::new("skipped-2" )),
|
3292 | ]
|
3293 | );
|
3294 |
|
3295 | ////////////////////////////////////////////////////////////////////////////////////////
|
3296 |
|
3297 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
3298 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3" )));
|
3299 | assert_eq!(
|
3300 | de.read,
|
3301 | vec![
|
3302 | // This comment here to keep the same formatting of both arrays
|
3303 | // otherwise rustfmt suggest one-line it
|
3304 | Start(BytesStart::new("skipped-3" )),
|
3305 | ]
|
3306 | );
|
3307 | assert_eq!(
|
3308 | de.write,
|
3309 | vec![
|
3310 | Start(BytesStart::new("skipped-1" )),
|
3311 | End(BytesEnd::new("skipped-1" )),
|
3312 | Start(BytesStart::new("skipped-2" )),
|
3313 | End(BytesEnd::new("skipped-2" )),
|
3314 | ]
|
3315 | );
|
3316 |
|
3317 | // start_replay() should start replay from this point
|
3318 | let checkpoint2 = de.skip_checkpoint();
|
3319 | assert_eq!(checkpoint2, 4);
|
3320 |
|
3321 | // Should skip third and forth <skipped-N/> elements
|
3322 | de.skip().unwrap(); // skipped-3
|
3323 | de.skip().unwrap(); // skipped-4
|
3324 | assert_eq!(de.read, vec![]);
|
3325 | assert_eq!(
|
3326 | de.write,
|
3327 | vec![
|
3328 | // checkpoint 1
|
3329 | Start(BytesStart::new("skipped-1" )),
|
3330 | End(BytesEnd::new("skipped-1" )),
|
3331 | Start(BytesStart::new("skipped-2" )),
|
3332 | End(BytesEnd::new("skipped-2" )),
|
3333 | // checkpoint 2
|
3334 | Start(BytesStart::new("skipped-3" )),
|
3335 | End(BytesEnd::new("skipped-3" )),
|
3336 | Start(BytesStart::new("skipped-4" )),
|
3337 | End(BytesEnd::new("skipped-4" )),
|
3338 | ]
|
3339 | );
|
3340 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2" )));
|
3341 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2" )));
|
3342 | assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner" )));
|
3343 | assert_eq!(
|
3344 | de.read,
|
3345 | vec![
|
3346 | // This comment here to keep the same formatting of both arrays
|
3347 | // otherwise rustfmt suggest one-line it
|
3348 | End(BytesEnd::new("inner" )),
|
3349 | ]
|
3350 | );
|
3351 | assert_eq!(
|
3352 | de.write,
|
3353 | vec![
|
3354 | // checkpoint 1
|
3355 | Start(BytesStart::new("skipped-1" )),
|
3356 | End(BytesEnd::new("skipped-1" )),
|
3357 | Start(BytesStart::new("skipped-2" )),
|
3358 | End(BytesEnd::new("skipped-2" )),
|
3359 | // checkpoint 2
|
3360 | Start(BytesStart::new("skipped-3" )),
|
3361 | End(BytesEnd::new("skipped-3" )),
|
3362 | Start(BytesStart::new("skipped-4" )),
|
3363 | End(BytesEnd::new("skipped-4" )),
|
3364 | ]
|
3365 | );
|
3366 |
|
3367 | // Start replay events from checkpoint 2
|
3368 | de.start_replay(checkpoint2);
|
3369 | assert_eq!(
|
3370 | de.read,
|
3371 | vec![
|
3372 | Start(BytesStart::new("skipped-3" )),
|
3373 | End(BytesEnd::new("skipped-3" )),
|
3374 | Start(BytesStart::new("skipped-4" )),
|
3375 | End(BytesEnd::new("skipped-4" )),
|
3376 | End(BytesEnd::new("inner" )),
|
3377 | ]
|
3378 | );
|
3379 | assert_eq!(
|
3380 | de.write,
|
3381 | vec![
|
3382 | Start(BytesStart::new("skipped-1" )),
|
3383 | End(BytesEnd::new("skipped-1" )),
|
3384 | Start(BytesStart::new("skipped-2" )),
|
3385 | End(BytesEnd::new("skipped-2" )),
|
3386 | ]
|
3387 | );
|
3388 |
|
3389 | // Replayed events
|
3390 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3" )));
|
3391 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3" )));
|
3392 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4" )));
|
3393 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4" )));
|
3394 |
|
3395 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
3396 | assert_eq!(de.read, vec![]);
|
3397 | assert_eq!(
|
3398 | de.write,
|
3399 | vec![
|
3400 | Start(BytesStart::new("skipped-1" )),
|
3401 | End(BytesEnd::new("skipped-1" )),
|
3402 | Start(BytesStart::new("skipped-2" )),
|
3403 | End(BytesEnd::new("skipped-2" )),
|
3404 | ]
|
3405 | );
|
3406 |
|
3407 | ////////////////////////////////////////////////////////////////////////////////////////
|
3408 |
|
3409 | // New events
|
3410 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1" )));
|
3411 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1" )));
|
3412 |
|
3413 | assert_eq!(de.read, vec![]);
|
3414 | assert_eq!(
|
3415 | de.write,
|
3416 | vec![
|
3417 | Start(BytesStart::new("skipped-1" )),
|
3418 | End(BytesEnd::new("skipped-1" )),
|
3419 | Start(BytesStart::new("skipped-2" )),
|
3420 | End(BytesEnd::new("skipped-2" )),
|
3421 | ]
|
3422 | );
|
3423 |
|
3424 | // Start replay events from checkpoint 1
|
3425 | de.start_replay(checkpoint1);
|
3426 | assert_eq!(
|
3427 | de.read,
|
3428 | vec![
|
3429 | Start(BytesStart::new("skipped-1" )),
|
3430 | End(BytesEnd::new("skipped-1" )),
|
3431 | Start(BytesStart::new("skipped-2" )),
|
3432 | End(BytesEnd::new("skipped-2" )),
|
3433 | ]
|
3434 | );
|
3435 | assert_eq!(de.write, vec![]);
|
3436 |
|
3437 | // Replayed events
|
3438 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1" )));
|
3439 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1" )));
|
3440 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2" )));
|
3441 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2" )));
|
3442 |
|
3443 | assert_eq!(de.read, vec![]);
|
3444 | assert_eq!(de.write, vec![]);
|
3445 |
|
3446 | // New events
|
3447 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
3448 | assert_eq!(de.next().unwrap(), Eof);
|
3449 | }
|
3450 |
|
3451 | /// Checks that limiting buffer size works correctly
|
3452 | #[test ]
|
3453 | fn limit() {
|
3454 | use serde::Deserialize;
|
3455 |
|
3456 | #[derive (Debug, Deserialize)]
|
3457 | #[allow (unused)]
|
3458 | struct List {
|
3459 | item: Vec<()>,
|
3460 | }
|
3461 |
|
3462 | let mut de = Deserializer::from_str(
|
3463 | r#"
|
3464 | <any-name>
|
3465 | <item/>
|
3466 | <another-item>
|
3467 | <some-element>with text</some-element>
|
3468 | <yet-another-element/>
|
3469 | </another-item>
|
3470 | <item/>
|
3471 | <item/>
|
3472 | </any-name>
|
3473 | "# ,
|
3474 | );
|
3475 | de.event_buffer_size(NonZeroUsize::new(3));
|
3476 |
|
3477 | match List::deserialize(&mut de) {
|
3478 | Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
|
3479 | e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}" , e),
|
3480 | }
|
3481 | }
|
3482 |
|
3483 | /// Without handling Eof in `skip` this test failed with memory allocation
|
3484 | #[test ]
|
3485 | fn invalid_xml() {
|
3486 | use crate::de::DeEvent::*;
|
3487 |
|
3488 | let mut de = Deserializer::from_str("<root>" );
|
3489 |
|
3490 | // Cache all events
|
3491 | let checkpoint = de.skip_checkpoint();
|
3492 | de.skip().unwrap();
|
3493 | de.start_replay(checkpoint);
|
3494 | assert_eq!(de.read, vec![Start(BytesStart::new("root" )), Eof]);
|
3495 | }
|
3496 | }
|
3497 |
|
3498 | mod read_to_end {
|
3499 | use super::*;
|
3500 | use crate::de::DeEvent::*;
|
3501 | use pretty_assertions::assert_eq;
|
3502 |
|
3503 | #[test ]
|
3504 | fn complex() {
|
3505 | let mut de = Deserializer::from_str(
|
3506 | r#"
|
3507 | <root>
|
3508 | <tag a="1"><tag>text</tag>content</tag>
|
3509 | <tag a="2"><![CDATA[cdata content]]></tag>
|
3510 | <self-closed/>
|
3511 | </root>
|
3512 | "# ,
|
3513 | );
|
3514 |
|
3515 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
3516 |
|
3517 | assert_eq!(
|
3518 | de.next().unwrap(),
|
3519 | Start(BytesStart::from_content(r#"tag a="1""# , 3))
|
3520 | );
|
3521 | assert_eq!(de.read_to_end(QName(b"tag" )).unwrap(), ());
|
3522 |
|
3523 | assert_eq!(
|
3524 | de.next().unwrap(),
|
3525 | Start(BytesStart::from_content(r#"tag a="2""# , 3))
|
3526 | );
|
3527 | assert_eq!(de.next().unwrap(), Text("cdata content" .into()));
|
3528 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag" )));
|
3529 |
|
3530 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed" )));
|
3531 | assert_eq!(de.read_to_end(QName(b"self-closed" )).unwrap(), ());
|
3532 |
|
3533 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
3534 | assert_eq!(de.next().unwrap(), Eof);
|
3535 | }
|
3536 |
|
3537 | #[test ]
|
3538 | fn invalid_xml1() {
|
3539 | let mut de = Deserializer::from_str("<tag><tag></tag>" );
|
3540 |
|
3541 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag" )));
|
3542 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag" )));
|
3543 |
|
3544 | match de.read_to_end(QName(b"tag" )) {
|
3545 | Err(DeError::UnexpectedEof) => (),
|
3546 | x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}" , x),
|
3547 | }
|
3548 | assert_eq!(de.next().unwrap(), Eof);
|
3549 | }
|
3550 |
|
3551 | #[test ]
|
3552 | fn invalid_xml2() {
|
3553 | let mut de = Deserializer::from_str("<tag><![CDATA[]]><tag></tag>" );
|
3554 |
|
3555 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag" )));
|
3556 | assert_eq!(de.peek().unwrap(), &Text("" .into()));
|
3557 |
|
3558 | match de.read_to_end(QName(b"tag" )) {
|
3559 | Err(DeError::UnexpectedEof) => (),
|
3560 | x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}" , x),
|
3561 | }
|
3562 | assert_eq!(de.next().unwrap(), Eof);
|
3563 | }
|
3564 | }
|
3565 |
|
3566 | #[test ]
|
3567 | fn borrowing_reader_parity() {
|
3568 | let s = r#"
|
3569 | <item name="hello" source="world.rs">Some text</item>
|
3570 | <item2/>
|
3571 | <item3 value="world" />
|
3572 | "# ;
|
3573 |
|
3574 | let mut reader1 = IoReader {
|
3575 | reader: Reader::from_reader(s.as_bytes()),
|
3576 | start_trimmer: StartTrimmer::default(),
|
3577 | buf: Vec::new(),
|
3578 | };
|
3579 | let mut reader2 = SliceReader {
|
3580 | reader: Reader::from_str(s),
|
3581 | start_trimmer: StartTrimmer::default(),
|
3582 | };
|
3583 |
|
3584 | loop {
|
3585 | let event1 = reader1.next().unwrap();
|
3586 | let event2 = reader2.next().unwrap();
|
3587 |
|
3588 | if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
|
3589 | break;
|
3590 | }
|
3591 |
|
3592 | assert_eq!(event1, event2);
|
3593 | }
|
3594 | }
|
3595 |
|
3596 | #[test ]
|
3597 | fn borrowing_reader_events() {
|
3598 | let s = r#"
|
3599 | <item name="hello" source="world.rs">Some text</item>
|
3600 | <item2></item2>
|
3601 | <item3/>
|
3602 | <item4 value="world" />
|
3603 | "# ;
|
3604 |
|
3605 | let mut reader = SliceReader {
|
3606 | reader: Reader::from_str(s),
|
3607 | start_trimmer: StartTrimmer::default(),
|
3608 | };
|
3609 |
|
3610 | reader.reader.expand_empty_elements(true);
|
3611 |
|
3612 | let mut events = Vec::new();
|
3613 |
|
3614 | loop {
|
3615 | let event = reader.next().unwrap();
|
3616 | if let PayloadEvent::Eof = event {
|
3617 | break;
|
3618 | }
|
3619 | events.push(event);
|
3620 | }
|
3621 |
|
3622 | use crate::de::PayloadEvent::*;
|
3623 |
|
3624 | assert_eq!(
|
3625 | events,
|
3626 | vec![
|
3627 | Start(BytesStart::from_content(
|
3628 | r#"item name="hello" source="world.rs""# ,
|
3629 | 4
|
3630 | )),
|
3631 | Text(BytesText::from_escaped("Some text" )),
|
3632 | End(BytesEnd::new("item" )),
|
3633 | Start(BytesStart::from_content("item2" , 5)),
|
3634 | End(BytesEnd::new("item2" )),
|
3635 | Start(BytesStart::from_content("item3" , 5)),
|
3636 | End(BytesEnd::new("item3" )),
|
3637 | Start(BytesStart::from_content(r#"item4 value="world" "# , 5)),
|
3638 | End(BytesEnd::new("item4" )),
|
3639 | ]
|
3640 | )
|
3641 | }
|
3642 |
|
3643 | /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
|
3644 | /// because parser reports error early
|
3645 | #[test ]
|
3646 | fn read_string() {
|
3647 | match from_str::<String>(r#"</root>"# ) {
|
3648 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
3649 | assert_eq!(expected, "" );
|
3650 | assert_eq!(found, "root" );
|
3651 | }
|
3652 | x => panic!(
|
3653 | r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"# ,
|
3654 | x
|
3655 | ),
|
3656 | }
|
3657 |
|
3658 | let s: String = from_str(r#"<root></root>"# ).unwrap();
|
3659 | assert_eq!(s, "" );
|
3660 |
|
3661 | match from_str::<String>(r#"<root></other>"# ) {
|
3662 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
3663 | assert_eq!(expected, "root" );
|
3664 | assert_eq!(found, "other" );
|
3665 | }
|
3666 | x => panic!(
|
3667 | r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"# ,
|
3668 | x
|
3669 | ),
|
3670 | }
|
3671 | }
|
3672 |
|
3673 | /// Tests for https://github.com/tafia/quick-xml/issues/474.
|
3674 | ///
|
3675 | /// That tests ensures that comments and processed instructions is ignored
|
3676 | /// and can split one logical string in pieces.
|
3677 | mod merge_text {
|
3678 | use super::*;
|
3679 | use pretty_assertions::assert_eq;
|
3680 |
|
3681 | #[test ]
|
3682 | fn text() {
|
3683 | let mut de = Deserializer::from_str("text" );
|
3684 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
3685 | }
|
3686 |
|
3687 | #[test ]
|
3688 | fn cdata() {
|
3689 | let mut de = Deserializer::from_str("<![CDATA[cdata]]>" );
|
3690 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata" .into()));
|
3691 | }
|
3692 |
|
3693 | #[test ]
|
3694 | fn text_and_cdata() {
|
3695 | let mut de = Deserializer::from_str("text and <![CDATA[cdata]]>" );
|
3696 | assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata" .into()));
|
3697 | }
|
3698 |
|
3699 | #[test ]
|
3700 | fn text_and_empty_cdata() {
|
3701 | let mut de = Deserializer::from_str("text and <![CDATA[]]>" );
|
3702 | assert_eq!(de.next().unwrap(), DeEvent::Text("text and " .into()));
|
3703 | }
|
3704 |
|
3705 | #[test ]
|
3706 | fn cdata_and_text() {
|
3707 | let mut de = Deserializer::from_str("<![CDATA[cdata]]> and text" );
|
3708 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text" .into()));
|
3709 | }
|
3710 |
|
3711 | #[test ]
|
3712 | fn empty_cdata_and_text() {
|
3713 | let mut de = Deserializer::from_str("<![CDATA[]]> and text" );
|
3714 | assert_eq!(de.next().unwrap(), DeEvent::Text(" and text" .into()));
|
3715 | }
|
3716 |
|
3717 | #[test ]
|
3718 | fn cdata_and_cdata() {
|
3719 | let mut de = Deserializer::from_str(
|
3720 | " \
|
3721 | <![CDATA[cdata]]]]> \
|
3722 | <![CDATA[>cdata]]> \
|
3723 | " ,
|
3724 | );
|
3725 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
3726 | }
|
3727 |
|
3728 | mod comment_between {
|
3729 | use super::*;
|
3730 | use pretty_assertions::assert_eq;
|
3731 |
|
3732 | #[test ]
|
3733 | fn text() {
|
3734 | let mut de = Deserializer::from_str(
|
3735 | " \
|
3736 | text \
|
3737 | <!--comment 1--><!--comment 2--> \
|
3738 | text \
|
3739 | " ,
|
3740 | );
|
3741 | assert_eq!(de.next().unwrap(), DeEvent::Text("text text" .into()));
|
3742 | }
|
3743 |
|
3744 | #[test ]
|
3745 | fn cdata() {
|
3746 | let mut de = Deserializer::from_str(
|
3747 | " \
|
3748 | <![CDATA[cdata]]]]> \
|
3749 | <!--comment 1--><!--comment 2--> \
|
3750 | <![CDATA[>cdata]]> \
|
3751 | " ,
|
3752 | );
|
3753 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
3754 | }
|
3755 |
|
3756 | #[test ]
|
3757 | fn text_and_cdata() {
|
3758 | let mut de = Deserializer::from_str(
|
3759 | " \
|
3760 | text \
|
3761 | <!--comment 1--><!--comment 2--> \
|
3762 | <![CDATA[ cdata]]> \
|
3763 | " ,
|
3764 | );
|
3765 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata" .into()));
|
3766 | }
|
3767 |
|
3768 | #[test ]
|
3769 | fn text_and_empty_cdata() {
|
3770 | let mut de = Deserializer::from_str(
|
3771 | " \
|
3772 | text \
|
3773 | <!--comment 1--><!--comment 2--> \
|
3774 | <![CDATA[]]> \
|
3775 | " ,
|
3776 | );
|
3777 | assert_eq!(de.next().unwrap(), DeEvent::Text("text " .into()));
|
3778 | }
|
3779 |
|
3780 | #[test ]
|
3781 | fn cdata_and_text() {
|
3782 | let mut de = Deserializer::from_str(
|
3783 | " \
|
3784 | <![CDATA[cdata ]]> \
|
3785 | <!--comment 1--><!--comment 2--> \
|
3786 | text \
|
3787 | " ,
|
3788 | );
|
3789 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text" .into()));
|
3790 | }
|
3791 |
|
3792 | #[test ]
|
3793 | fn empty_cdata_and_text() {
|
3794 | let mut de = Deserializer::from_str(
|
3795 | " \
|
3796 | <![CDATA[]]> \
|
3797 | <!--comment 1--><!--comment 2--> \
|
3798 | text \
|
3799 | " ,
|
3800 | );
|
3801 | assert_eq!(de.next().unwrap(), DeEvent::Text(" text" .into()));
|
3802 | }
|
3803 |
|
3804 | #[test ]
|
3805 | fn cdata_and_cdata() {
|
3806 | let mut de = Deserializer::from_str(
|
3807 | " \
|
3808 | <![CDATA[cdata]]]> \
|
3809 | <!--comment 1--><!--comment 2--> \
|
3810 | <![CDATA[]>cdata]]> \
|
3811 | " ,
|
3812 | );
|
3813 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
3814 | }
|
3815 | }
|
3816 |
|
3817 | mod pi_between {
|
3818 | use super::*;
|
3819 | use pretty_assertions::assert_eq;
|
3820 |
|
3821 | #[test ]
|
3822 | fn text() {
|
3823 | let mut de = Deserializer::from_str(
|
3824 | " \
|
3825 | text \
|
3826 | <?pi 1?><?pi 2?> \
|
3827 | text \
|
3828 | " ,
|
3829 | );
|
3830 | assert_eq!(de.next().unwrap(), DeEvent::Text("text text" .into()));
|
3831 | }
|
3832 |
|
3833 | #[test ]
|
3834 | fn cdata() {
|
3835 | let mut de = Deserializer::from_str(
|
3836 | " \
|
3837 | <![CDATA[cdata]]]]> \
|
3838 | <?pi 1?><?pi 2?> \
|
3839 | <![CDATA[>cdata]]> \
|
3840 | " ,
|
3841 | );
|
3842 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
3843 | }
|
3844 |
|
3845 | #[test ]
|
3846 | fn text_and_cdata() {
|
3847 | let mut de = Deserializer::from_str(
|
3848 | " \
|
3849 | text \
|
3850 | <?pi 1?><?pi 2?> \
|
3851 | <![CDATA[ cdata]]> \
|
3852 | " ,
|
3853 | );
|
3854 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata" .into()));
|
3855 | }
|
3856 |
|
3857 | #[test ]
|
3858 | fn text_and_empty_cdata() {
|
3859 | let mut de = Deserializer::from_str(
|
3860 | " \
|
3861 | text \
|
3862 | <?pi 1?><?pi 2?> \
|
3863 | <![CDATA[]]> \
|
3864 | " ,
|
3865 | );
|
3866 | assert_eq!(de.next().unwrap(), DeEvent::Text("text " .into()));
|
3867 | }
|
3868 |
|
3869 | #[test ]
|
3870 | fn cdata_and_text() {
|
3871 | let mut de = Deserializer::from_str(
|
3872 | " \
|
3873 | <![CDATA[cdata ]]> \
|
3874 | <?pi 1?><?pi 2?> \
|
3875 | text \
|
3876 | " ,
|
3877 | );
|
3878 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text" .into()));
|
3879 | }
|
3880 |
|
3881 | #[test ]
|
3882 | fn empty_cdata_and_text() {
|
3883 | let mut de = Deserializer::from_str(
|
3884 | " \
|
3885 | <![CDATA[]]> \
|
3886 | <?pi 1?><?pi 2?> \
|
3887 | text \
|
3888 | " ,
|
3889 | );
|
3890 | assert_eq!(de.next().unwrap(), DeEvent::Text(" text" .into()));
|
3891 | }
|
3892 |
|
3893 | #[test ]
|
3894 | fn cdata_and_cdata() {
|
3895 | let mut de = Deserializer::from_str(
|
3896 | " \
|
3897 | <![CDATA[cdata]]]> \
|
3898 | <?pi 1?><?pi 2?> \
|
3899 | <![CDATA[]>cdata]]> \
|
3900 | " ,
|
3901 | );
|
3902 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
3903 | }
|
3904 | }
|
3905 | }
|
3906 |
|
3907 | /// Tests for https://github.com/tafia/quick-xml/issues/474.
|
3908 | ///
|
3909 | /// This tests ensures that any combination of payload data is processed
|
3910 | /// as expected.
|
3911 | mod triples {
|
3912 | use super::*;
|
3913 | use pretty_assertions::assert_eq;
|
3914 |
|
3915 | mod start {
|
3916 | use super::*;
|
3917 |
|
3918 | /// <tag1><tag2>...
|
3919 | mod start {
|
3920 | use super::*;
|
3921 | use pretty_assertions::assert_eq;
|
3922 |
|
3923 | #[test ]
|
3924 | fn start() {
|
3925 | let mut de = Deserializer::from_str("<tag1><tag2><tag3>" );
|
3926 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
3927 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3928 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3" )));
|
3929 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3930 | }
|
3931 |
|
3932 | /// Not matching end tag will result to error
|
3933 | #[test ]
|
3934 | fn end() {
|
3935 | let mut de = Deserializer::from_str("<tag1><tag2></tag2>" );
|
3936 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
3937 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3938 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2" )));
|
3939 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3940 | }
|
3941 |
|
3942 | #[test ]
|
3943 | fn text() {
|
3944 | let mut de = Deserializer::from_str("<tag1><tag2> text " );
|
3945 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
3946 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3947 | // Text is trimmed from both sides
|
3948 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
3949 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3950 | }
|
3951 |
|
3952 | #[test ]
|
3953 | fn cdata() {
|
3954 | let mut de = Deserializer::from_str("<tag1><tag2><![CDATA[ cdata ]]>" );
|
3955 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
3956 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3957 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
3958 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3959 | }
|
3960 |
|
3961 | #[test ]
|
3962 | fn eof() {
|
3963 | let mut de = Deserializer::from_str("<tag1><tag2>" );
|
3964 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
3965 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3966 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3967 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3968 | }
|
3969 | }
|
3970 |
|
3971 | /// <tag></tag>...
|
3972 | mod end {
|
3973 | use super::*;
|
3974 | use pretty_assertions::assert_eq;
|
3975 |
|
3976 | #[test ]
|
3977 | fn start() {
|
3978 | let mut de = Deserializer::from_str("<tag></tag><tag2>" );
|
3979 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
3980 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
3981 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
3982 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3983 | }
|
3984 |
|
3985 | #[test ]
|
3986 | fn end() {
|
3987 | let mut de = Deserializer::from_str("<tag></tag></tag2>" );
|
3988 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
3989 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
3990 | match de.next() {
|
3991 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
3992 | assert_eq!(expected, "" );
|
3993 | assert_eq!(found, "tag2" );
|
3994 | }
|
3995 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag2' }})`, but got {:?}" , x),
|
3996 | }
|
3997 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
3998 | }
|
3999 |
|
4000 | #[test ]
|
4001 | fn text() {
|
4002 | let mut de = Deserializer::from_str("<tag></tag> text " );
|
4003 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4004 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4005 | // Text is trimmed from both sides
|
4006 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4007 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4008 | }
|
4009 |
|
4010 | #[test ]
|
4011 | fn cdata() {
|
4012 | let mut de = Deserializer::from_str("<tag></tag><![CDATA[ cdata ]]>" );
|
4013 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4014 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4015 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4016 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4017 | }
|
4018 |
|
4019 | #[test ]
|
4020 | fn eof() {
|
4021 | let mut de = Deserializer::from_str("<tag></tag>" );
|
4022 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4023 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4024 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4025 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4026 | }
|
4027 | }
|
4028 |
|
4029 | /// <tag> text ...
|
4030 | mod text {
|
4031 | use super::*;
|
4032 | use pretty_assertions::assert_eq;
|
4033 |
|
4034 | #[test ]
|
4035 | fn start() {
|
4036 | let mut de = Deserializer::from_str("<tag> text <tag2>" );
|
4037 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4038 | // Text is trimmed from both sides
|
4039 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4040 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
4041 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4042 | }
|
4043 |
|
4044 | #[test ]
|
4045 | fn end() {
|
4046 | let mut de = Deserializer::from_str("<tag> text </tag>" );
|
4047 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4048 | // Text is trimmed from both sides
|
4049 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4050 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4051 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4052 | }
|
4053 |
|
4054 | // start::text::text has no difference from start::text
|
4055 |
|
4056 | #[test ]
|
4057 | fn cdata() {
|
4058 | let mut de = Deserializer::from_str("<tag> text <![CDATA[ cdata ]]>" );
|
4059 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4060 | // Text is trimmed from the start
|
4061 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
4062 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4063 | }
|
4064 |
|
4065 | #[test ]
|
4066 | fn eof() {
|
4067 | let mut de = Deserializer::from_str("<tag> text " );
|
4068 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4069 | // Text is trimmed from both sides
|
4070 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4071 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4072 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4073 | }
|
4074 | }
|
4075 |
|
4076 | /// <tag><![CDATA[ cdata ]]>...
|
4077 | mod cdata {
|
4078 | use super::*;
|
4079 | use pretty_assertions::assert_eq;
|
4080 |
|
4081 | #[test ]
|
4082 | fn start() {
|
4083 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]><tag2>" );
|
4084 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4085 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4086 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
4087 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4088 | }
|
4089 |
|
4090 | #[test ]
|
4091 | fn end() {
|
4092 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]></tag>" );
|
4093 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4094 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4095 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4096 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4097 | }
|
4098 |
|
4099 | #[test ]
|
4100 | fn text() {
|
4101 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]> text " );
|
4102 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4103 | // Text is trimmed from the end
|
4104 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
4105 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4106 | }
|
4107 |
|
4108 | #[test ]
|
4109 | fn cdata() {
|
4110 | let mut de =
|
4111 | Deserializer::from_str("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
4112 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4113 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
4114 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4115 | }
|
4116 |
|
4117 | #[test ]
|
4118 | fn eof() {
|
4119 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]>" );
|
4120 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4121 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4122 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4123 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4124 | }
|
4125 | }
|
4126 | }
|
4127 |
|
4128 | /// Start from End event will always generate an error
|
4129 | #[test ]
|
4130 | fn end() {
|
4131 | let mut de = Deserializer::from_str("</tag>" );
|
4132 | match de.next() {
|
4133 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4134 | assert_eq!(expected, "" );
|
4135 | assert_eq!(found, "tag" );
|
4136 | }
|
4137 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4138 | }
|
4139 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4140 | }
|
4141 |
|
4142 | mod text {
|
4143 | use super::*;
|
4144 | use pretty_assertions::assert_eq;
|
4145 |
|
4146 | mod start {
|
4147 | use super::*;
|
4148 | use pretty_assertions::assert_eq;
|
4149 |
|
4150 | #[test ]
|
4151 | fn start() {
|
4152 | let mut de = Deserializer::from_str(" text <tag1><tag2>" );
|
4153 | // Text is trimmed from both sides
|
4154 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4155 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
4156 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
4157 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4158 | }
|
4159 |
|
4160 | /// Not matching end tag will result in error
|
4161 | #[test ]
|
4162 | fn end() {
|
4163 | let mut de = Deserializer::from_str(" text <tag></tag>" );
|
4164 | // Text is trimmed from both sides
|
4165 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4166 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4167 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4168 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4169 | }
|
4170 |
|
4171 | #[test ]
|
4172 | fn text() {
|
4173 | let mut de = Deserializer::from_str(" text <tag> text2 " );
|
4174 | // Text is trimmed from both sides
|
4175 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4176 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4177 | // Text is trimmed from both sides
|
4178 | assert_eq!(de.next().unwrap(), DeEvent::Text("text2" .into()));
|
4179 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4180 | }
|
4181 |
|
4182 | #[test ]
|
4183 | fn cdata() {
|
4184 | let mut de = Deserializer::from_str(" text <tag><![CDATA[ cdata ]]>" );
|
4185 | // Text is trimmed from both sides
|
4186 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4187 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4188 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4189 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4190 | }
|
4191 |
|
4192 | #[test ]
|
4193 | fn eof() {
|
4194 | // Text is trimmed from both sides
|
4195 | let mut de = Deserializer::from_str(" text <tag>" );
|
4196 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4197 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4198 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4199 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4200 | }
|
4201 | }
|
4202 |
|
4203 | /// End event without corresponding start event will always generate an error
|
4204 | #[test ]
|
4205 | fn end() {
|
4206 | let mut de = Deserializer::from_str(" text </tag>" );
|
4207 | // Text is trimmed from both sides
|
4208 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4209 | match de.next() {
|
4210 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4211 | assert_eq!(expected, "" );
|
4212 | assert_eq!(found, "tag" );
|
4213 | }
|
4214 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4215 | }
|
4216 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4217 | }
|
4218 |
|
4219 | // text::text::something is equivalent to text::something
|
4220 |
|
4221 | mod cdata {
|
4222 | use super::*;
|
4223 | use pretty_assertions::assert_eq;
|
4224 |
|
4225 | #[test ]
|
4226 | fn start() {
|
4227 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]><tag>" );
|
4228 | // Text is trimmed from the start
|
4229 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
4230 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4231 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4232 | }
|
4233 |
|
4234 | #[test ]
|
4235 | fn end() {
|
4236 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]></tag>" );
|
4237 | // Text is trimmed from the start
|
4238 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
4239 | match de.next() {
|
4240 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4241 | assert_eq!(expected, "" );
|
4242 | assert_eq!(found, "tag" );
|
4243 | }
|
4244 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4245 | }
|
4246 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4247 | }
|
4248 |
|
4249 | #[test ]
|
4250 | fn text() {
|
4251 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]> text2 " );
|
4252 | // Text is trimmed from the start and from the end
|
4253 | assert_eq!(
|
4254 | de.next().unwrap(),
|
4255 | DeEvent::Text("text cdata text2" .into())
|
4256 | );
|
4257 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4258 | }
|
4259 |
|
4260 | #[test ]
|
4261 | fn cdata() {
|
4262 | let mut de =
|
4263 | Deserializer::from_str(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
4264 | // Text is trimmed from the start
|
4265 | assert_eq!(
|
4266 | de.next().unwrap(),
|
4267 | DeEvent::Text("text cdata cdata2 " .into())
|
4268 | );
|
4269 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4270 | }
|
4271 |
|
4272 | #[test ]
|
4273 | fn eof() {
|
4274 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]>" );
|
4275 | // Text is trimmed from the start
|
4276 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
4277 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4278 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4279 | }
|
4280 | }
|
4281 | }
|
4282 |
|
4283 | mod cdata {
|
4284 | use super::*;
|
4285 | use pretty_assertions::assert_eq;
|
4286 |
|
4287 | mod start {
|
4288 | use super::*;
|
4289 | use pretty_assertions::assert_eq;
|
4290 |
|
4291 | #[test ]
|
4292 | fn start() {
|
4293 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag1><tag2>" );
|
4294 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4295 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
4296 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
4297 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4298 | }
|
4299 |
|
4300 | /// Not matching end tag will result in error
|
4301 | #[test ]
|
4302 | fn end() {
|
4303 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag></tag>" );
|
4304 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4305 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4306 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
4307 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4308 | }
|
4309 |
|
4310 | #[test ]
|
4311 | fn text() {
|
4312 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag> text " );
|
4313 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4314 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4315 | // Text is trimmed from both sides
|
4316 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
4317 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4318 | }
|
4319 |
|
4320 | #[test ]
|
4321 | fn cdata() {
|
4322 | let mut de =
|
4323 | Deserializer::from_str("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>" );
|
4324 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4325 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4326 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 " .into()));
|
4327 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4328 | }
|
4329 |
|
4330 | #[test ]
|
4331 | fn eof() {
|
4332 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag>" );
|
4333 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4334 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4335 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4336 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4337 | }
|
4338 | }
|
4339 |
|
4340 | /// End event without corresponding start event will always generate an error
|
4341 | #[test ]
|
4342 | fn end() {
|
4343 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]></tag>" );
|
4344 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
4345 | match de.next() {
|
4346 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4347 | assert_eq!(expected, "" );
|
4348 | assert_eq!(found, "tag" );
|
4349 | }
|
4350 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4351 | }
|
4352 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4353 | }
|
4354 |
|
4355 | mod text {
|
4356 | use super::*;
|
4357 | use pretty_assertions::assert_eq;
|
4358 |
|
4359 | #[test ]
|
4360 | fn start() {
|
4361 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text <tag>" );
|
4362 | // Text is trimmed from the end
|
4363 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
4364 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4365 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4366 | }
|
4367 |
|
4368 | #[test ]
|
4369 | fn end() {
|
4370 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text </tag>" );
|
4371 | // Text is trimmed from the end
|
4372 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
4373 | match de.next() {
|
4374 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4375 | assert_eq!(expected, "" );
|
4376 | assert_eq!(found, "tag" );
|
4377 | }
|
4378 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4379 | }
|
4380 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4381 | }
|
4382 |
|
4383 | // cdata::text::text is equivalent to cdata::text
|
4384 |
|
4385 | #[test ]
|
4386 | fn cdata() {
|
4387 | let mut de =
|
4388 | Deserializer::from_str("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>" );
|
4389 | assert_eq!(
|
4390 | de.next().unwrap(),
|
4391 | DeEvent::Text(" cdata text cdata2 " .into())
|
4392 | );
|
4393 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4394 | }
|
4395 |
|
4396 | #[test ]
|
4397 | fn eof() {
|
4398 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text " );
|
4399 | // Text is trimmed from the end
|
4400 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
4401 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4402 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4403 | }
|
4404 | }
|
4405 |
|
4406 | mod cdata {
|
4407 | use super::*;
|
4408 | use pretty_assertions::assert_eq;
|
4409 |
|
4410 | #[test ]
|
4411 | fn start() {
|
4412 | let mut de =
|
4413 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>" );
|
4414 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
4415 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
4416 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4417 | }
|
4418 |
|
4419 | #[test ]
|
4420 | fn end() {
|
4421 | let mut de =
|
4422 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>" );
|
4423 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
4424 | match de.next() {
|
4425 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
4426 | assert_eq!(expected, "" );
|
4427 | assert_eq!(found, "tag" );
|
4428 | }
|
4429 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
4430 | }
|
4431 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4432 | }
|
4433 |
|
4434 | #[test ]
|
4435 | fn text() {
|
4436 | let mut de =
|
4437 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text " );
|
4438 | // Text is trimmed from the end
|
4439 | assert_eq!(
|
4440 | de.next().unwrap(),
|
4441 | DeEvent::Text(" cdata cdata2 text" .into())
|
4442 | );
|
4443 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4444 | }
|
4445 |
|
4446 | #[test ]
|
4447 | fn cdata() {
|
4448 | let mut de = Deserializer::from_str(
|
4449 | "<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>" ,
|
4450 | );
|
4451 | assert_eq!(
|
4452 | de.next().unwrap(),
|
4453 | DeEvent::Text(" cdata cdata2 cdata3 " .into())
|
4454 | );
|
4455 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4456 | }
|
4457 |
|
4458 | #[test ]
|
4459 | fn eof() {
|
4460 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
4461 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
4462 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4463 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
4464 | }
|
4465 | }
|
4466 | }
|
4467 | }
|
4468 | }
|
4469 | |