| 1 | //! Serde `Deserializer` module.
|
| 2 | //!
|
| 3 | //! Due to the complexity of the XML standard and the fact that Serde was developed
|
| 4 | //! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
|
| 5 | //! that fact that some XML concepts are inexpressible in terms of Serde derives
|
| 6 | //! and may require manual deserialization.
|
| 7 | //!
|
| 8 | //! The most notable restriction is the ability to distinguish between _elements_
|
| 9 | //! and _attributes_, as no other format used by serde has such a conception.
|
| 10 | //!
|
| 11 | //! Due to that the mapping is performed in a best effort manner.
|
| 12 | //!
|
| 13 | //!
|
| 14 | //!
|
| 15 | //! Table of Contents
|
| 16 | //! =================
|
| 17 | //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
|
| 18 | //! - [Optional attributes and elements](#optional-attributes-and-elements)
|
| 19 | //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
|
| 20 | //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
|
| 21 | //! - [Composition Rules](#composition-rules)
|
| 22 | //! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
|
| 23 | //! - [`$text`](#text)
|
| 24 | //! - [`$value`](#value)
|
| 25 | //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
|
| 26 | //! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
|
| 27 | //! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
|
| 28 | //! - [Frequently Used Patterns](#frequently-used-patterns)
|
| 29 | //! - [`<element>` lists](#element-lists)
|
| 30 | //! - [Enum::Unit Variants As a Text](#enumunit-variants-as-a-text)
|
| 31 | //! - [Internally Tagged Enums](#internally-tagged-enums)
|
| 32 | //!
|
| 33 | //!
|
| 34 | //!
|
| 35 | //! Mapping XML to Rust types
|
| 36 | //! =========================
|
| 37 | //!
|
| 38 | //! Type names are never considered when deserializing, so you can name your
|
| 39 | //! types as you wish. Other general rules:
|
| 40 | //! - `struct` field name could be represented in XML only as an attribute name
|
| 41 | //! or an element name;
|
| 42 | //! - `enum` variant name could be represented in XML only as an attribute name
|
| 43 | //! or an element name;
|
| 44 | //! - the unit struct, unit type `()` and unit enum variant can be deserialized
|
| 45 | //! from any valid XML content:
|
| 46 | //! - attribute and element names;
|
| 47 | //! - attribute and element values;
|
| 48 | //! - text or CDATA content (including mixed text and CDATA content).
|
| 49 | //!
|
| 50 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 51 | //!
|
| 52 | //! NOTE: All tests are marked with an `ignore` option, even though they do
|
| 53 | //! compile. This is because rustdoc marks such blocks with an information
|
| 54 | //! icon unlike `no_run` blocks.
|
| 55 | //!
|
| 56 | //! </div>
|
| 57 | //!
|
| 58 | //! <table>
|
| 59 | //! <thead>
|
| 60 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
| 61 | //! </thead>
|
| 62 | //! <tbody style="vertical-align:top;">
|
| 63 | //! <tr>
|
| 64 | //! <td>
|
| 65 | //! Content of attributes and text / CDATA content of elements (including mixed
|
| 66 | //! text and CDATA content):
|
| 67 | //!
|
| 68 | //! ```xml
|
| 69 | //! <... ...="content" />
|
| 70 | //! ```
|
| 71 | //! ```xml
|
| 72 | //! <...>content</...>
|
| 73 | //! ```
|
| 74 | //! ```xml
|
| 75 | //! <...><![CDATA[content]]></...>
|
| 76 | //! ```
|
| 77 | //! ```xml
|
| 78 | //! <...>text<![CDATA[cdata]]>text</...>
|
| 79 | //! ```
|
| 80 | //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
|
| 81 | //! </td>
|
| 82 | //! <td>
|
| 83 | //!
|
| 84 | //! You can use any type that can be deserialized from an `&str`, for example:
|
| 85 | //! - [`String`] and [`&str`]
|
| 86 | //! - [`Cow<str>`]
|
| 87 | //! - [`u32`], [`f32`] and other numeric types
|
| 88 | //! - `enum`s, like
|
| 89 | //! ```
|
| 90 | //! # use pretty_assertions::assert_eq;
|
| 91 | //! # use serde::Deserialize;
|
| 92 | //! # #[derive(Debug, PartialEq)]
|
| 93 | //! #[derive(Deserialize)]
|
| 94 | //! enum Language {
|
| 95 | //! Rust,
|
| 96 | //! Cpp,
|
| 97 | //! #[serde(other)]
|
| 98 | //! Other,
|
| 99 | //! }
|
| 100 | //! # #[derive(Debug, PartialEq, Deserialize)]
|
| 101 | //! # struct X { #[serde(rename = "$text" )] x: Language }
|
| 102 | //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>" ).unwrap());
|
| 103 | //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>" ).unwrap());
|
| 104 | //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>" ).unwrap());
|
| 105 | //! ```
|
| 106 | //!
|
| 107 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 108 | //!
|
| 109 | //! NOTE: deserialization to non-owned types (i.e. borrow from the input),
|
| 110 | //! such as `&str`, is possible only if you parse document in the UTF-8
|
| 111 | //! encoding and content does not contain entity references such as `&`,
|
| 112 | //! or character references such as `
`, as well as text content represented
|
| 113 | //! by one piece of [text] or [CDATA] element.
|
| 114 | //! </div>
|
| 115 | //! <!-- TODO: document an error type returned -->
|
| 116 | //!
|
| 117 | //! [text]: Event::Text
|
| 118 | //! [CDATA]: Event::CData
|
| 119 | //! </td>
|
| 120 | //! </tr>
|
| 121 | //! <!-- 2 ===================================================================================== -->
|
| 122 | //! <tr>
|
| 123 | //! <td>
|
| 124 | //!
|
| 125 | //! Content of attributes and text / CDATA content of elements (including mixed
|
| 126 | //! text and CDATA content), which represents a space-delimited lists, as
|
| 127 | //! specified in the XML Schema specification for [`xs:list`] `simpleType`:
|
| 128 | //!
|
| 129 | //! ```xml
|
| 130 | //! <... ...="element1 element2 ..." />
|
| 131 | //! ```
|
| 132 | //! ```xml
|
| 133 | //! <...>
|
| 134 | //! element1
|
| 135 | //! element2
|
| 136 | //! ...
|
| 137 | //! </...>
|
| 138 | //! ```
|
| 139 | //! ```xml
|
| 140 | //! <...><![CDATA[
|
| 141 | //! element1
|
| 142 | //! element2
|
| 143 | //! ...
|
| 144 | //! ]]></...>
|
| 145 | //! ```
|
| 146 | //!
|
| 147 | //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
|
| 148 | //! </td>
|
| 149 | //! <td>
|
| 150 | //!
|
| 151 | //! Use any type that deserialized using [`deserialize_seq()`] call, for example:
|
| 152 | //!
|
| 153 | //! ```
|
| 154 | //! type List = Vec<u32>;
|
| 155 | //! ```
|
| 156 | //!
|
| 157 | //! See the next row to learn where in your struct definition you should
|
| 158 | //! use that type.
|
| 159 | //!
|
| 160 | //! According to the XML Schema specification, delimiters for elements is one
|
| 161 | //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
|
| 162 | //!
|
| 163 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 164 | //!
|
| 165 | //! NOTE: according to the XML Schema restrictions, you cannot escape those
|
| 166 | //! white-space characters, so list elements will _never_ contain them.
|
| 167 | //! In practice you will usually use `xs:list`s for lists of numbers or enumerated
|
| 168 | //! values which looks like identifiers in many languages, for example, `item`,
|
| 169 | //! `some_item` or `some-item`, so that shouldn't be a problem.
|
| 170 | //!
|
| 171 | //! NOTE: according to the XML Schema specification, list elements can be
|
| 172 | //! delimited only by spaces. Other delimiters (for example, commas) are not
|
| 173 | //! allowed.
|
| 174 | //!
|
| 175 | //! </div>
|
| 176 | //!
|
| 177 | //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
|
| 178 | //! </td>
|
| 179 | //! </tr>
|
| 180 | //! <!-- 3 ===================================================================================== -->
|
| 181 | //! <tr>
|
| 182 | //! <td>
|
| 183 | //! A typical XML with attributes. The root tag name does not matter:
|
| 184 | //!
|
| 185 | //! ```xml
|
| 186 | //! <any-tag one="..." two="..."/>
|
| 187 | //! ```
|
| 188 | //! </td>
|
| 189 | //! <td>
|
| 190 | //!
|
| 191 | //! A structure where each XML attribute is mapped to a field with a name
|
| 192 | //! starting with `@`. Because Rust identifiers do not permit the `@` character,
|
| 193 | //! you should use the `#[serde(rename = "@...")]` attribute to rename it.
|
| 194 | //! The name of the struct itself does not matter:
|
| 195 | //!
|
| 196 | //! ```
|
| 197 | //! # use serde::Deserialize;
|
| 198 | //! # type T = ();
|
| 199 | //! # type U = ();
|
| 200 | //! // Get both attributes
|
| 201 | //! # #[derive(Debug, PartialEq)]
|
| 202 | //! #[derive(Deserialize)]
|
| 203 | //! struct AnyName {
|
| 204 | //! #[serde(rename = "@one" )]
|
| 205 | //! one: T,
|
| 206 | //!
|
| 207 | //! #[serde(rename = "@two" )]
|
| 208 | //! two: U,
|
| 209 | //! }
|
| 210 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
| 211 | //! ```
|
| 212 | //! ```
|
| 213 | //! # use serde::Deserialize;
|
| 214 | //! # type T = ();
|
| 215 | //! // Get only the one attribute, ignore the other
|
| 216 | //! # #[derive(Debug, PartialEq)]
|
| 217 | //! #[derive(Deserialize)]
|
| 218 | //! struct AnyName {
|
| 219 | //! #[serde(rename = "@one" )]
|
| 220 | //! one: T,
|
| 221 | //! }
|
| 222 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
| 223 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"# ).unwrap();
|
| 224 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
| 225 | //! ```
|
| 226 | //! ```
|
| 227 | //! # use serde::Deserialize;
|
| 228 | //! // Ignore all attributes
|
| 229 | //! // You can also use the `()` type (unit type)
|
| 230 | //! # #[derive(Debug, PartialEq)]
|
| 231 | //! #[derive(Deserialize)]
|
| 232 | //! struct AnyName;
|
| 233 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
| 234 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
| 235 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
| 236 | //! ```
|
| 237 | //!
|
| 238 | //! All these structs can be used to deserialize from an XML on the
|
| 239 | //! left side depending on amount of information that you want to get.
|
| 240 | //! Of course, you can combine them with elements extractor structs (see below).
|
| 241 | //!
|
| 242 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 243 | //!
|
| 244 | //! NOTE: XML allows you to have an attribute and an element with the same name
|
| 245 | //! inside the one element. quick-xml deals with that by prepending a `@` prefix
|
| 246 | //! to the name of attributes.
|
| 247 | //! </div>
|
| 248 | //! </td>
|
| 249 | //! </tr>
|
| 250 | //! <!-- 4 ===================================================================================== -->
|
| 251 | //! <tr>
|
| 252 | //! <td>
|
| 253 | //! A typical XML with child elements. The root tag name does not matter:
|
| 254 | //!
|
| 255 | //! ```xml
|
| 256 | //! <any-tag>
|
| 257 | //! <one>...</one>
|
| 258 | //! <two>...</two>
|
| 259 | //! </any-tag>
|
| 260 | //! ```
|
| 261 | //! </td>
|
| 262 | //! <td>
|
| 263 | //! A structure where each XML child element is mapped to the field.
|
| 264 | //! Each element name becomes a name of field. The name of the struct itself
|
| 265 | //! does not matter:
|
| 266 | //!
|
| 267 | //! ```
|
| 268 | //! # use serde::Deserialize;
|
| 269 | //! # type T = ();
|
| 270 | //! # type U = ();
|
| 271 | //! // Get both elements
|
| 272 | //! # #[derive(Debug, PartialEq)]
|
| 273 | //! #[derive(Deserialize)]
|
| 274 | //! struct AnyName {
|
| 275 | //! one: T,
|
| 276 | //! two: U,
|
| 277 | //! }
|
| 278 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
| 279 | //! #
|
| 280 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap_err();
|
| 281 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"# ).unwrap_err();
|
| 282 | //! ```
|
| 283 | //! ```
|
| 284 | //! # use serde::Deserialize;
|
| 285 | //! # type T = ();
|
| 286 | //! // Get only the one element, ignore the other
|
| 287 | //! # #[derive(Debug, PartialEq)]
|
| 288 | //! #[derive(Deserialize)]
|
| 289 | //! struct AnyName {
|
| 290 | //! one: T,
|
| 291 | //! }
|
| 292 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
| 293 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
| 294 | //! ```
|
| 295 | //! ```
|
| 296 | //! # use serde::Deserialize;
|
| 297 | //! // Ignore all elements
|
| 298 | //! // You can also use the `()` type (unit type)
|
| 299 | //! # #[derive(Debug, PartialEq)]
|
| 300 | //! #[derive(Deserialize)]
|
| 301 | //! struct AnyName;
|
| 302 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"# ).unwrap();
|
| 303 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"# ).unwrap();
|
| 304 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"# ).unwrap();
|
| 305 | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"# ).unwrap();
|
| 306 | //! ```
|
| 307 | //!
|
| 308 | //! All these structs can be used to deserialize from an XML on the
|
| 309 | //! left side depending on amount of information that you want to get.
|
| 310 | //! Of course, you can combine them with attributes extractor structs (see above).
|
| 311 | //!
|
| 312 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 313 | //!
|
| 314 | //! NOTE: XML allows you to have an attribute and an element with the same name
|
| 315 | //! inside the one element. quick-xml deals with that by prepending a `@` prefix
|
| 316 | //! to the name of attributes.
|
| 317 | //! </div>
|
| 318 | //! </td>
|
| 319 | //! </tr>
|
| 320 | //! <!-- 5 ===================================================================================== -->
|
| 321 | //! <tr>
|
| 322 | //! <td>
|
| 323 | //! An XML with an attribute and a child element named equally:
|
| 324 | //!
|
| 325 | //! ```xml
|
| 326 | //! <any-tag field="...">
|
| 327 | //! <field>...</field>
|
| 328 | //! </any-tag>
|
| 329 | //! ```
|
| 330 | //! </td>
|
| 331 | //! <td>
|
| 332 | //!
|
| 333 | //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
|
| 334 | //! for an attribute:
|
| 335 | //!
|
| 336 | //! ```
|
| 337 | //! # use pretty_assertions::assert_eq;
|
| 338 | //! # use serde::Deserialize;
|
| 339 | //! # type T = ();
|
| 340 | //! # type U = ();
|
| 341 | //! # #[derive(Debug, PartialEq)]
|
| 342 | //! #[derive(Deserialize)]
|
| 343 | //! struct AnyName {
|
| 344 | //! #[serde(rename = "@field" )]
|
| 345 | //! attribute: T,
|
| 346 | //! field: U,
|
| 347 | //! }
|
| 348 | //! # assert_eq!(
|
| 349 | //! # AnyName { attribute: (), field: () },
|
| 350 | //! # quick_xml::de::from_str(r#"
|
| 351 | //! # <any-tag field="...">
|
| 352 | //! # <field>...</field>
|
| 353 | //! # </any-tag>
|
| 354 | //! # "# ).unwrap(),
|
| 355 | //! # );
|
| 356 | //! ```
|
| 357 | //! </td>
|
| 358 | //! </tr>
|
| 359 | //! <!-- ======================================================================================= -->
|
| 360 | //! <tr><th colspan="2">
|
| 361 | //!
|
| 362 | //! ## Optional attributes and elements
|
| 363 | //!
|
| 364 | //! </th></tr>
|
| 365 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
| 366 | //! <!-- 6 ===================================================================================== -->
|
| 367 | //! <tr>
|
| 368 | //! <td>
|
| 369 | //! An optional XML attribute that you want to capture.
|
| 370 | //! The root tag name does not matter:
|
| 371 | //!
|
| 372 | //! ```xml
|
| 373 | //! <any-tag optional="..."/>
|
| 374 | //! ```
|
| 375 | //! ```xml
|
| 376 | //! <any-tag/>
|
| 377 | //! ```
|
| 378 | //! </td>
|
| 379 | //! <td>
|
| 380 | //!
|
| 381 | //! A structure with an optional field, renamed according to the requirements
|
| 382 | //! for attributes:
|
| 383 | //!
|
| 384 | //! ```
|
| 385 | //! # use pretty_assertions::assert_eq;
|
| 386 | //! # use serde::Deserialize;
|
| 387 | //! # type T = ();
|
| 388 | //! # #[derive(Debug, PartialEq)]
|
| 389 | //! #[derive(Deserialize)]
|
| 390 | //! struct AnyName {
|
| 391 | //! #[serde(rename = "@optional" )]
|
| 392 | //! optional: Option<T>,
|
| 393 | //! }
|
| 394 | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"# ).unwrap());
|
| 395 | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap());
|
| 396 | //! ```
|
| 397 | //! When the XML attribute is present, type `T` will be deserialized from
|
| 398 | //! an attribute value (which is a string). Note, that if `T = String` or other
|
| 399 | //! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
|
| 400 | //! represents the missed attribute:
|
| 401 | //! ```xml
|
| 402 | //! <any-tag optional="..."/><!-- Some("...") -->
|
| 403 | //! <any-tag optional=""/> <!-- Some("") -->
|
| 404 | //! <any-tag/> <!-- None -->
|
| 405 | //! ```
|
| 406 | //! </td>
|
| 407 | //! </tr>
|
| 408 | //! <!-- 7 ===================================================================================== -->
|
| 409 | //! <tr>
|
| 410 | //! <td>
|
| 411 | //! An optional XML elements that you want to capture.
|
| 412 | //! The root tag name does not matter:
|
| 413 | //!
|
| 414 | //! ```xml
|
| 415 | //! <any-tag/>
|
| 416 | //! <optional>...</optional>
|
| 417 | //! </any-tag>
|
| 418 | //! ```
|
| 419 | //! ```xml
|
| 420 | //! <any-tag/>
|
| 421 | //! <optional/>
|
| 422 | //! </any-tag>
|
| 423 | //! ```
|
| 424 | //! ```xml
|
| 425 | //! <any-tag/>
|
| 426 | //! ```
|
| 427 | //! </td>
|
| 428 | //! <td>
|
| 429 | //!
|
| 430 | //! A structure with an optional field:
|
| 431 | //!
|
| 432 | //! ```
|
| 433 | //! # use pretty_assertions::assert_eq;
|
| 434 | //! # use serde::Deserialize;
|
| 435 | //! # type T = ();
|
| 436 | //! # #[derive(Debug, PartialEq)]
|
| 437 | //! #[derive(Deserialize)]
|
| 438 | //! struct AnyName {
|
| 439 | //! optional: Option<T>,
|
| 440 | //! }
|
| 441 | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"# ).unwrap());
|
| 442 | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap());
|
| 443 | //! ```
|
| 444 | //! When the XML element is present, type `T` will be deserialized from an
|
| 445 | //! element (which is a string or a multi-mapping -- i.e. mapping which can have
|
| 446 | //! duplicated keys).
|
| 447 | //! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
|
| 448 | //!
|
| 449 | //! Currently some edge cases exists described in the issue [#497].
|
| 450 | //! </div>
|
| 451 | //! </td>
|
| 452 | //! </tr>
|
| 453 | //! <!-- ======================================================================================= -->
|
| 454 | //! <tr><th colspan="2">
|
| 455 | //!
|
| 456 | //! ## Choices (`xs:choice` XML Schema type)
|
| 457 | //!
|
| 458 | //! </th></tr>
|
| 459 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
| 460 | //! <!-- 8 ===================================================================================== -->
|
| 461 | //! <tr>
|
| 462 | //! <td>
|
| 463 | //! An XML with different root tag names, as well as text / CDATA content:
|
| 464 | //!
|
| 465 | //! ```xml
|
| 466 | //! <one field1="...">...</one>
|
| 467 | //! ```
|
| 468 | //! ```xml
|
| 469 | //! <two>
|
| 470 | //! <field2>...</field2>
|
| 471 | //! </two>
|
| 472 | //! ```
|
| 473 | //! ```xml
|
| 474 | //! Text <![CDATA[or (mixed)
|
| 475 | //! CDATA]]> content
|
| 476 | //! ```
|
| 477 | //! </td>
|
| 478 | //! <td>
|
| 479 | //!
|
| 480 | //! An enum where each variant has the name of a possible root tag. The name of
|
| 481 | //! the enum itself does not matter.
|
| 482 | //!
|
| 483 | //! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
|
| 484 | //!
|
| 485 | //! All these structs can be used to deserialize from any XML on the
|
| 486 | //! left side depending on amount of information that you want to get:
|
| 487 | //!
|
| 488 | //! ```
|
| 489 | //! # use pretty_assertions::assert_eq;
|
| 490 | //! # use serde::Deserialize;
|
| 491 | //! # type T = ();
|
| 492 | //! # type U = ();
|
| 493 | //! # #[derive(Debug, PartialEq)]
|
| 494 | //! #[derive(Deserialize)]
|
| 495 | //! #[serde(rename_all = "snake_case" )]
|
| 496 | //! enum AnyName {
|
| 497 | //! One { #[serde(rename = "@field1" )] field1: T },
|
| 498 | //! Two { field2: U },
|
| 499 | //!
|
| 500 | //! /// Use unit variant, if you do not care of a content.
|
| 501 | //! /// You can use tuple variant if you want to parse
|
| 502 | //! /// textual content as an xs:list.
|
| 503 | //! /// Struct variants are not supported and will return
|
| 504 | //! /// Err(Unsupported)
|
| 505 | //! #[serde(rename = "$text" )]
|
| 506 | //! Text(String),
|
| 507 | //! }
|
| 508 | //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
| 509 | //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
| 510 | //! # assert_eq!(AnyName::Text("text cdata " .into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
| 511 | //! ```
|
| 512 | //! ```
|
| 513 | //! # use pretty_assertions::assert_eq;
|
| 514 | //! # use serde::Deserialize;
|
| 515 | //! # type T = ();
|
| 516 | //! # #[derive(Debug, PartialEq)]
|
| 517 | //! #[derive(Deserialize)]
|
| 518 | //! struct Two {
|
| 519 | //! field2: T,
|
| 520 | //! }
|
| 521 | //! # #[derive(Debug, PartialEq)]
|
| 522 | //! #[derive(Deserialize)]
|
| 523 | //! #[serde(rename_all = "snake_case" )]
|
| 524 | //! enum AnyName {
|
| 525 | //! // `field1` content discarded
|
| 526 | //! One,
|
| 527 | //! Two(Two),
|
| 528 | //! #[serde(rename = "$text" )]
|
| 529 | //! Text,
|
| 530 | //! }
|
| 531 | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
| 532 | //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
| 533 | //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
| 534 | //! ```
|
| 535 | //! ```
|
| 536 | //! # use pretty_assertions::assert_eq;
|
| 537 | //! # use serde::Deserialize;
|
| 538 | //! # #[derive(Debug, PartialEq)]
|
| 539 | //! #[derive(Deserialize)]
|
| 540 | //! #[serde(rename_all = "snake_case" )]
|
| 541 | //! enum AnyName {
|
| 542 | //! One,
|
| 543 | //! // the <two> and textual content will be mapped to this
|
| 544 | //! #[serde(other)]
|
| 545 | //! Other,
|
| 546 | //! }
|
| 547 | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"# ).unwrap());
|
| 548 | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"# ).unwrap());
|
| 549 | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"# ).unwrap());
|
| 550 | //! ```
|
| 551 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 552 | //!
|
| 553 | //! NOTE: You should have variants for all possible tag names in your enum
|
| 554 | //! or have an `#[serde(other)]` variant.
|
| 555 | //! <!-- TODO: document an error type if that requirement is violated -->
|
| 556 | //! </div>
|
| 557 | //! </td>
|
| 558 | //! </tr>
|
| 559 | //! <!-- 9 ===================================================================================== -->
|
| 560 | //! <tr>
|
| 561 | //! <td>
|
| 562 | //!
|
| 563 | //! `<xs:choice>` embedded in the other element, and at the same time you want
|
| 564 | //! to get access to other attributes that can appear in the same container
|
| 565 | //! (`<any-tag>`). Also this case can be described, as if you want to choose
|
| 566 | //! Rust enum variant based on a tag name:
|
| 567 | //!
|
| 568 | //! ```xml
|
| 569 | //! <any-tag field="...">
|
| 570 | //! <one>...</one>
|
| 571 | //! </any-tag>
|
| 572 | //! ```
|
| 573 | //! ```xml
|
| 574 | //! <any-tag field="...">
|
| 575 | //! <two>...</two>
|
| 576 | //! </any-tag>
|
| 577 | //! ```
|
| 578 | //! ```xml
|
| 579 | //! <any-tag field="...">
|
| 580 | //! Text <![CDATA[or (mixed)
|
| 581 | //! CDATA]]> content
|
| 582 | //! </any-tag>
|
| 583 | //! ```
|
| 584 | //! </td>
|
| 585 | //! <td>
|
| 586 | //!
|
| 587 | //! A structure with a field which type is an `enum`.
|
| 588 | //!
|
| 589 | //! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
|
| 590 | //!
|
| 591 | //! Names of the enum, struct, and struct field with `Choice` type does not matter:
|
| 592 | //!
|
| 593 | //! ```
|
| 594 | //! # use pretty_assertions::assert_eq;
|
| 595 | //! # use serde::Deserialize;
|
| 596 | //! # type T = ();
|
| 597 | //! # #[derive(Debug, PartialEq)]
|
| 598 | //! #[derive(Deserialize)]
|
| 599 | //! #[serde(rename_all = "snake_case" )]
|
| 600 | //! enum Choice {
|
| 601 | //! One,
|
| 602 | //! Two,
|
| 603 | //!
|
| 604 | //! /// Use unit variant, if you do not care of a content.
|
| 605 | //! /// You can use tuple variant if you want to parse
|
| 606 | //! /// textual content as an xs:list.
|
| 607 | //! /// Struct variants are not supported and will return
|
| 608 | //! /// Err(Unsupported)
|
| 609 | //! #[serde(rename = "$text" )]
|
| 610 | //! Text(String),
|
| 611 | //! }
|
| 612 | //! # #[derive(Debug, PartialEq)]
|
| 613 | //! #[derive(Deserialize)]
|
| 614 | //! struct AnyName {
|
| 615 | //! #[serde(rename = "@field" )]
|
| 616 | //! field: T,
|
| 617 | //!
|
| 618 | //! #[serde(rename = "$value" )]
|
| 619 | //! any_name: Choice,
|
| 620 | //! }
|
| 621 | //! # assert_eq!(
|
| 622 | //! # AnyName { field: (), any_name: Choice::One },
|
| 623 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"# ).unwrap(),
|
| 624 | //! # );
|
| 625 | //! # assert_eq!(
|
| 626 | //! # AnyName { field: (), any_name: Choice::Two },
|
| 627 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"# ).unwrap(),
|
| 628 | //! # );
|
| 629 | //! # assert_eq!(
|
| 630 | //! # AnyName { field: (), any_name: Choice::Text("text cdata " .into()) },
|
| 631 | //! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"# ).unwrap(),
|
| 632 | //! # );
|
| 633 | //! ```
|
| 634 | //! </td>
|
| 635 | //! </tr>
|
| 636 | //! <!-- 10 ==================================================================================== -->
|
| 637 | //! <tr>
|
| 638 | //! <td>
|
| 639 | //!
|
| 640 | //! `<xs:choice>` embedded in the other element, and at the same time you want
|
| 641 | //! to get access to other elements that can appear in the same container
|
| 642 | //! (`<any-tag>`). Also this case can be described, as if you want to choose
|
| 643 | //! Rust enum variant based on a tag name:
|
| 644 | //!
|
| 645 | //! ```xml
|
| 646 | //! <any-tag>
|
| 647 | //! <field>...</field>
|
| 648 | //! <one>...</one>
|
| 649 | //! </any-tag>
|
| 650 | //! ```
|
| 651 | //! ```xml
|
| 652 | //! <any-tag>
|
| 653 | //! <two>...</two>
|
| 654 | //! <field>...</field>
|
| 655 | //! </any-tag>
|
| 656 | //! ```
|
| 657 | //! </td>
|
| 658 | //! <td>
|
| 659 | //!
|
| 660 | //! A structure with a field which type is an `enum`.
|
| 661 | //!
|
| 662 | //! Names of the enum, struct, and struct field with `Choice` type does not matter:
|
| 663 | //!
|
| 664 | //! ```
|
| 665 | //! # use pretty_assertions::assert_eq;
|
| 666 | //! # use serde::Deserialize;
|
| 667 | //! # type T = ();
|
| 668 | //! # #[derive(Debug, PartialEq)]
|
| 669 | //! #[derive(Deserialize)]
|
| 670 | //! #[serde(rename_all = "snake_case" )]
|
| 671 | //! enum Choice {
|
| 672 | //! One,
|
| 673 | //! Two,
|
| 674 | //! }
|
| 675 | //! # #[derive(Debug, PartialEq)]
|
| 676 | //! #[derive(Deserialize)]
|
| 677 | //! struct AnyName {
|
| 678 | //! field: T,
|
| 679 | //!
|
| 680 | //! #[serde(rename = "$value" )]
|
| 681 | //! any_name: Choice,
|
| 682 | //! }
|
| 683 | //! # assert_eq!(
|
| 684 | //! # AnyName { field: (), any_name: Choice::One },
|
| 685 | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"# ).unwrap(),
|
| 686 | //! # );
|
| 687 | //! # assert_eq!(
|
| 688 | //! # AnyName { field: (), any_name: Choice::Two },
|
| 689 | //! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"# ).unwrap(),
|
| 690 | //! # );
|
| 691 | //! ```
|
| 692 | //!
|
| 693 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 694 | //!
|
| 695 | //! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
|
| 696 | //! variant, element `<field>` will be mapped to the `field` and not to the enum
|
| 697 | //! variant.
|
| 698 | //! </div>
|
| 699 | //!
|
| 700 | //! </td>
|
| 701 | //! </tr>
|
| 702 | //! <!-- 11 ==================================================================================== -->
|
| 703 | //! <tr>
|
| 704 | //! <td>
|
| 705 | //!
|
| 706 | //! `<xs:choice>` encapsulated in other element with a fixed name:
|
| 707 | //!
|
| 708 | //! ```xml
|
| 709 | //! <any-tag field="...">
|
| 710 | //! <choice>
|
| 711 | //! <one>...</one>
|
| 712 | //! </choice>
|
| 713 | //! </any-tag>
|
| 714 | //! ```
|
| 715 | //! ```xml
|
| 716 | //! <any-tag field="...">
|
| 717 | //! <choice>
|
| 718 | //! <two>...</two>
|
| 719 | //! </choice>
|
| 720 | //! </any-tag>
|
| 721 | //! ```
|
| 722 | //! </td>
|
| 723 | //! <td>
|
| 724 | //!
|
| 725 | //! A structure with a field of an intermediate type with one field of `enum` type.
|
| 726 | //! Actually, this example is not necessary, because you can construct it by yourself
|
| 727 | //! using the composition rules that were described above. However the XML construction
|
| 728 | //! described here is very common, so it is shown explicitly.
|
| 729 | //!
|
| 730 | //! Names of the enum and struct does not matter:
|
| 731 | //!
|
| 732 | //! ```
|
| 733 | //! # use pretty_assertions::assert_eq;
|
| 734 | //! # use serde::Deserialize;
|
| 735 | //! # type T = ();
|
| 736 | //! # #[derive(Debug, PartialEq)]
|
| 737 | //! #[derive(Deserialize)]
|
| 738 | //! #[serde(rename_all = "snake_case" )]
|
| 739 | //! enum Choice {
|
| 740 | //! One,
|
| 741 | //! Two,
|
| 742 | //! }
|
| 743 | //! # #[derive(Debug, PartialEq)]
|
| 744 | //! #[derive(Deserialize)]
|
| 745 | //! struct Holder {
|
| 746 | //! #[serde(rename = "$value" )]
|
| 747 | //! any_name: Choice,
|
| 748 | //! }
|
| 749 | //! # #[derive(Debug, PartialEq)]
|
| 750 | //! #[derive(Deserialize)]
|
| 751 | //! struct AnyName {
|
| 752 | //! #[serde(rename = "@field" )]
|
| 753 | //! field: T,
|
| 754 | //!
|
| 755 | //! choice: Holder,
|
| 756 | //! }
|
| 757 | //! # assert_eq!(
|
| 758 | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
|
| 759 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"# ).unwrap(),
|
| 760 | //! # );
|
| 761 | //! # assert_eq!(
|
| 762 | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
|
| 763 | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"# ).unwrap(),
|
| 764 | //! # );
|
| 765 | //! ```
|
| 766 | //! </td>
|
| 767 | //! </tr>
|
| 768 | //! <!-- 12 ==================================================================================== -->
|
| 769 | //! <tr>
|
| 770 | //! <td>
|
| 771 | //!
|
| 772 | //! `<xs:choice>` encapsulated in other element with a fixed name:
|
| 773 | //!
|
| 774 | //! ```xml
|
| 775 | //! <any-tag>
|
| 776 | //! <field>...</field>
|
| 777 | //! <choice>
|
| 778 | //! <one>...</one>
|
| 779 | //! </choice>
|
| 780 | //! </any-tag>
|
| 781 | //! ```
|
| 782 | //! ```xml
|
| 783 | //! <any-tag>
|
| 784 | //! <choice>
|
| 785 | //! <two>...</two>
|
| 786 | //! </choice>
|
| 787 | //! <field>...</field>
|
| 788 | //! </any-tag>
|
| 789 | //! ```
|
| 790 | //! </td>
|
| 791 | //! <td>
|
| 792 | //!
|
| 793 | //! A structure with a field of an intermediate type with one field of `enum` type.
|
| 794 | //! Actually, this example is not necessary, because you can construct it by yourself
|
| 795 | //! using the composition rules that were described above. However the XML construction
|
| 796 | //! described here is very common, so it is shown explicitly.
|
| 797 | //!
|
| 798 | //! Names of the enum and struct does not matter:
|
| 799 | //!
|
| 800 | //! ```
|
| 801 | //! # use pretty_assertions::assert_eq;
|
| 802 | //! # use serde::Deserialize;
|
| 803 | //! # type T = ();
|
| 804 | //! # #[derive(Debug, PartialEq)]
|
| 805 | //! #[derive(Deserialize)]
|
| 806 | //! #[serde(rename_all = "snake_case" )]
|
| 807 | //! enum Choice {
|
| 808 | //! One,
|
| 809 | //! Two,
|
| 810 | //! }
|
| 811 | //! # #[derive(Debug, PartialEq)]
|
| 812 | //! #[derive(Deserialize)]
|
| 813 | //! struct Holder {
|
| 814 | //! #[serde(rename = "$value" )]
|
| 815 | //! any_name: Choice,
|
| 816 | //! }
|
| 817 | //! # #[derive(Debug, PartialEq)]
|
| 818 | //! #[derive(Deserialize)]
|
| 819 | //! struct AnyName {
|
| 820 | //! field: T,
|
| 821 | //!
|
| 822 | //! choice: Holder,
|
| 823 | //! }
|
| 824 | //! # assert_eq!(
|
| 825 | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
|
| 826 | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"# ).unwrap(),
|
| 827 | //! # );
|
| 828 | //! # assert_eq!(
|
| 829 | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
|
| 830 | //! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"# ).unwrap(),
|
| 831 | //! # );
|
| 832 | //! ```
|
| 833 | //! </td>
|
| 834 | //! </tr>
|
| 835 | //! <!-- ======================================================================================== -->
|
| 836 | //! <tr><th colspan="2">
|
| 837 | //!
|
| 838 | //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
|
| 839 | //!
|
| 840 | //! </th></tr>
|
| 841 | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
|
| 842 | //! <!-- 13 ==================================================================================== -->
|
| 843 | //! <tr>
|
| 844 | //! <td>
|
| 845 | //! A sequence inside of a tag without a dedicated name:
|
| 846 | //!
|
| 847 | //! ```xml
|
| 848 | //! <any-tag/>
|
| 849 | //! ```
|
| 850 | //! ```xml
|
| 851 | //! <any-tag>
|
| 852 | //! <item/>
|
| 853 | //! </any-tag>
|
| 854 | //! ```
|
| 855 | //! ```xml
|
| 856 | //! <any-tag>
|
| 857 | //! <item/>
|
| 858 | //! <item/>
|
| 859 | //! <item/>
|
| 860 | //! </any-tag>
|
| 861 | //! ```
|
| 862 | //! </td>
|
| 863 | //! <td>
|
| 864 | //!
|
| 865 | //! A structure with a field which is a sequence type, for example, [`Vec`].
|
| 866 | //! Because XML syntax does not distinguish between empty sequences and missed
|
| 867 | //! elements, we should indicate that on the Rust side, because serde will require
|
| 868 | //! that field `item` exists. You can do that in two possible ways:
|
| 869 | //!
|
| 870 | //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
|
| 871 | //! ```
|
| 872 | //! # use pretty_assertions::assert_eq;
|
| 873 | //! # use serde::Deserialize;
|
| 874 | //! # type Item = ();
|
| 875 | //! # #[derive(Debug, PartialEq)]
|
| 876 | //! #[derive(Deserialize)]
|
| 877 | //! struct AnyName {
|
| 878 | //! #[serde(default)]
|
| 879 | //! item: Vec<Item>,
|
| 880 | //! }
|
| 881 | //! # assert_eq!(
|
| 882 | //! # AnyName { item: vec![] },
|
| 883 | //! # quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap(),
|
| 884 | //! # );
|
| 885 | //! # assert_eq!(
|
| 886 | //! # AnyName { item: vec![()] },
|
| 887 | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"# ).unwrap(),
|
| 888 | //! # );
|
| 889 | //! # assert_eq!(
|
| 890 | //! # AnyName { item: vec![(), (), ()] },
|
| 891 | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"# ).unwrap(),
|
| 892 | //! # );
|
| 893 | //! ```
|
| 894 | //!
|
| 895 | //! Use the [`Option`]. In that case inner array will always contains at least one
|
| 896 | //! element after deserialization:
|
| 897 | //! ```ignore
|
| 898 | //! # use pretty_assertions::assert_eq;
|
| 899 | //! # use serde::Deserialize;
|
| 900 | //! # type Item = ();
|
| 901 | //! # #[derive(Debug, PartialEq)]
|
| 902 | //! #[derive(Deserialize)]
|
| 903 | //! struct AnyName {
|
| 904 | //! item: Option<Vec<Item>>,
|
| 905 | //! }
|
| 906 | //! # assert_eq!(
|
| 907 | //! # AnyName { item: None },
|
| 908 | //! # quick_xml::de::from_str(r#"<any-tag/>"# ).unwrap(),
|
| 909 | //! # );
|
| 910 | //! # assert_eq!(
|
| 911 | //! # AnyName { item: Some(vec![()]) },
|
| 912 | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"# ).unwrap(),
|
| 913 | //! # );
|
| 914 | //! # assert_eq!(
|
| 915 | //! # AnyName { item: Some(vec![(), (), ()]) },
|
| 916 | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"# ).unwrap(),
|
| 917 | //! # );
|
| 918 | //! ```
|
| 919 | //!
|
| 920 | //! See also [Frequently Used Patterns](#element-lists).
|
| 921 | //!
|
| 922 | //! [field]: https://serde.rs/field-attrs.html#default
|
| 923 | //! [struct]: https://serde.rs/container-attrs.html#default
|
| 924 | //! </td>
|
| 925 | //! </tr>
|
| 926 | //! <!-- 14 ==================================================================================== -->
|
| 927 | //! <tr>
|
| 928 | //! <td>
|
| 929 | //! A sequence with a strict order, probably with mixed content
|
| 930 | //! (text / CDATA and tags):
|
| 931 | //!
|
| 932 | //! ```xml
|
| 933 | //! <one>...</one>
|
| 934 | //! text
|
| 935 | //! <![CDATA[cdata]]>
|
| 936 | //! <two>...</two>
|
| 937 | //! <one>...</one>
|
| 938 | //! ```
|
| 939 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 940 | //!
|
| 941 | //! NOTE: this is just an example for showing mapping. XML does not allow
|
| 942 | //! multiple root tags -- you should wrap the sequence into a tag.
|
| 943 | //! </div>
|
| 944 | //! </td>
|
| 945 | //! <td>
|
| 946 | //!
|
| 947 | //! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
|
| 948 | //! Each element of the tuple should be able to be deserialized from the nested
|
| 949 | //! element content (`...`), except the enum types which would be deserialized
|
| 950 | //! from the full element (`<one>...</one>`), so they could use the element name
|
| 951 | //! to choose the right variant:
|
| 952 | //!
|
| 953 | //! ```
|
| 954 | //! # use pretty_assertions::assert_eq;
|
| 955 | //! # use serde::Deserialize;
|
| 956 | //! # type One = ();
|
| 957 | //! # type Two = ();
|
| 958 | //! # /*
|
| 959 | //! type One = ...;
|
| 960 | //! type Two = ...;
|
| 961 | //! # */
|
| 962 | //! # #[derive(Debug, PartialEq)]
|
| 963 | //! #[derive(Deserialize)]
|
| 964 | //! struct AnyName(One, String, Two, One);
|
| 965 | //! # assert_eq!(
|
| 966 | //! # AnyName((), "text cdata" .into(), (), ()),
|
| 967 | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
| 968 | //! # );
|
| 969 | //! ```
|
| 970 | //! ```
|
| 971 | //! # use pretty_assertions::assert_eq;
|
| 972 | //! # use serde::Deserialize;
|
| 973 | //! # #[derive(Debug, PartialEq)]
|
| 974 | //! #[derive(Deserialize)]
|
| 975 | //! #[serde(rename_all = "snake_case" )]
|
| 976 | //! enum Choice {
|
| 977 | //! One,
|
| 978 | //! }
|
| 979 | //! # type Two = ();
|
| 980 | //! # /*
|
| 981 | //! type Two = ...;
|
| 982 | //! # */
|
| 983 | //! type AnyName = (Choice, String, Two, Choice);
|
| 984 | //! # assert_eq!(
|
| 985 | //! # (Choice::One, "text cdata" .to_string(), (), Choice::One),
|
| 986 | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
| 987 | //! # );
|
| 988 | //! ```
|
| 989 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 990 | //!
|
| 991 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
| 992 | //! so you cannot have two adjacent string types in your sequence.
|
| 993 | //! </div>
|
| 994 | //! </td>
|
| 995 | //! </tr>
|
| 996 | //! <!-- 15 ==================================================================================== -->
|
| 997 | //! <tr>
|
| 998 | //! <td>
|
| 999 | //! A sequence with a non-strict order, probably with a mixed content
|
| 1000 | //! (text / CDATA and tags).
|
| 1001 | //!
|
| 1002 | //! ```xml
|
| 1003 | //! <one>...</one>
|
| 1004 | //! text
|
| 1005 | //! <![CDATA[cdata]]>
|
| 1006 | //! <two>...</two>
|
| 1007 | //! <one>...</one>
|
| 1008 | //! ```
|
| 1009 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 1010 | //!
|
| 1011 | //! NOTE: this is just an example for showing mapping. XML does not allow
|
| 1012 | //! multiple root tags -- you should wrap the sequence into a tag.
|
| 1013 | //! </div>
|
| 1014 | //! </td>
|
| 1015 | //! <td>
|
| 1016 | //! A homogeneous sequence of elements with a fixed or dynamic size:
|
| 1017 | //!
|
| 1018 | //! ```
|
| 1019 | //! # use pretty_assertions::assert_eq;
|
| 1020 | //! # use serde::Deserialize;
|
| 1021 | //! # #[derive(Debug, PartialEq)]
|
| 1022 | //! #[derive(Deserialize)]
|
| 1023 | //! #[serde(rename_all = "snake_case" )]
|
| 1024 | //! enum Choice {
|
| 1025 | //! One,
|
| 1026 | //! Two,
|
| 1027 | //! #[serde(other)]
|
| 1028 | //! Other,
|
| 1029 | //! }
|
| 1030 | //! type AnyName = [Choice; 4];
|
| 1031 | //! # assert_eq!(
|
| 1032 | //! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
|
| 1033 | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
| 1034 | //! # );
|
| 1035 | //! ```
|
| 1036 | //! ```
|
| 1037 | //! # use pretty_assertions::assert_eq;
|
| 1038 | //! # use serde::Deserialize;
|
| 1039 | //! # #[derive(Debug, PartialEq)]
|
| 1040 | //! #[derive(Deserialize)]
|
| 1041 | //! #[serde(rename_all = "snake_case" )]
|
| 1042 | //! enum Choice {
|
| 1043 | //! One,
|
| 1044 | //! Two,
|
| 1045 | //! #[serde(rename = "$text" )]
|
| 1046 | //! Other(String),
|
| 1047 | //! }
|
| 1048 | //! type AnyName = Vec<Choice>;
|
| 1049 | //! # assert_eq!(
|
| 1050 | //! # vec![
|
| 1051 | //! # Choice::One,
|
| 1052 | //! # Choice::Other("text cdata" .into()),
|
| 1053 | //! # Choice::Two,
|
| 1054 | //! # Choice::One,
|
| 1055 | //! # ],
|
| 1056 | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"# ).unwrap(),
|
| 1057 | //! # );
|
| 1058 | //! ```
|
| 1059 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 1060 | //!
|
| 1061 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
| 1062 | //! so you cannot have two adjacent string types in your sequence.
|
| 1063 | //! </div>
|
| 1064 | //! </td>
|
| 1065 | //! </tr>
|
| 1066 | //! <!-- 16 ==================================================================================== -->
|
| 1067 | //! <tr>
|
| 1068 | //! <td>
|
| 1069 | //! A sequence with a strict order, probably with a mixed content,
|
| 1070 | //! (text and tags) inside of the other element:
|
| 1071 | //!
|
| 1072 | //! ```xml
|
| 1073 | //! <any-tag attribute="...">
|
| 1074 | //! <one>...</one>
|
| 1075 | //! text
|
| 1076 | //! <![CDATA[cdata]]>
|
| 1077 | //! <two>...</two>
|
| 1078 | //! <one>...</one>
|
| 1079 | //! </any-tag>
|
| 1080 | //! ```
|
| 1081 | //! </td>
|
| 1082 | //! <td>
|
| 1083 | //!
|
| 1084 | //! A structure where all child elements mapped to the one field which have
|
| 1085 | //! a heterogeneous sequential type: tuple or named tuple. Each element of the
|
| 1086 | //! tuple should be able to be deserialized from the full element (`<one>...</one>`).
|
| 1087 | //!
|
| 1088 | //! You MUST specify `#[serde(rename = "$value")]` on that field:
|
| 1089 | //!
|
| 1090 | //! ```
|
| 1091 | //! # use pretty_assertions::assert_eq;
|
| 1092 | //! # use serde::Deserialize;
|
| 1093 | //! # type One = ();
|
| 1094 | //! # type Two = ();
|
| 1095 | //! # /*
|
| 1096 | //! type One = ...;
|
| 1097 | //! type Two = ...;
|
| 1098 | //! # */
|
| 1099 | //!
|
| 1100 | //! # #[derive(Debug, PartialEq)]
|
| 1101 | //! #[derive(Deserialize)]
|
| 1102 | //! struct AnyName {
|
| 1103 | //! #[serde(rename = "@attribute" )]
|
| 1104 | //! # attribute: (),
|
| 1105 | //! # /*
|
| 1106 | //! attribute: ...,
|
| 1107 | //! # */
|
| 1108 | //! // Does not (yet?) supported by the serde
|
| 1109 | //! // https://github.com/serde-rs/serde/issues/1905
|
| 1110 | //! // #[serde(flatten)]
|
| 1111 | //! #[serde(rename = "$value" )]
|
| 1112 | //! any_name: (One, String, Two, One),
|
| 1113 | //! }
|
| 1114 | //! # assert_eq!(
|
| 1115 | //! # AnyName { attribute: (), any_name: ((), "text cdata" .into(), (), ()) },
|
| 1116 | //! # quick_xml::de::from_str(" \
|
| 1117 | //! # <any-tag attribute='...'> \
|
| 1118 | //! # <one>...</one> \
|
| 1119 | //! # text \
|
| 1120 | //! # <![CDATA[cdata]]> \
|
| 1121 | //! # <two>...</two> \
|
| 1122 | //! # <one>...</one> \
|
| 1123 | //! # </any-tag>"
|
| 1124 | //! # ).unwrap(),
|
| 1125 | //! # );
|
| 1126 | //! ```
|
| 1127 | //! ```
|
| 1128 | //! # use pretty_assertions::assert_eq;
|
| 1129 | //! # use serde::Deserialize;
|
| 1130 | //! # type One = ();
|
| 1131 | //! # type Two = ();
|
| 1132 | //! # /*
|
| 1133 | //! type One = ...;
|
| 1134 | //! type Two = ...;
|
| 1135 | //! # */
|
| 1136 | //!
|
| 1137 | //! # #[derive(Debug, PartialEq)]
|
| 1138 | //! #[derive(Deserialize)]
|
| 1139 | //! struct NamedTuple(One, String, Two, One);
|
| 1140 | //!
|
| 1141 | //! # #[derive(Debug, PartialEq)]
|
| 1142 | //! #[derive(Deserialize)]
|
| 1143 | //! struct AnyName {
|
| 1144 | //! #[serde(rename = "@attribute" )]
|
| 1145 | //! # attribute: (),
|
| 1146 | //! # /*
|
| 1147 | //! attribute: ...,
|
| 1148 | //! # */
|
| 1149 | //! // Does not (yet?) supported by the serde
|
| 1150 | //! // https://github.com/serde-rs/serde/issues/1905
|
| 1151 | //! // #[serde(flatten)]
|
| 1152 | //! #[serde(rename = "$value" )]
|
| 1153 | //! any_name: NamedTuple,
|
| 1154 | //! }
|
| 1155 | //! # assert_eq!(
|
| 1156 | //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata" .into(), (), ()) },
|
| 1157 | //! # quick_xml::de::from_str(" \
|
| 1158 | //! # <any-tag attribute='...'> \
|
| 1159 | //! # <one>...</one> \
|
| 1160 | //! # text \
|
| 1161 | //! # <![CDATA[cdata]]> \
|
| 1162 | //! # <two>...</two> \
|
| 1163 | //! # <one>...</one> \
|
| 1164 | //! # </any-tag>"
|
| 1165 | //! # ).unwrap(),
|
| 1166 | //! # );
|
| 1167 | //! ```
|
| 1168 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 1169 | //!
|
| 1170 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
| 1171 | //! so you cannot have two adjacent string types in your sequence.
|
| 1172 | //! </div>
|
| 1173 | //! </td>
|
| 1174 | //! </tr>
|
| 1175 | //! <!-- 17 ==================================================================================== -->
|
| 1176 | //! <tr>
|
| 1177 | //! <td>
|
| 1178 | //! A sequence with a non-strict order, probably with a mixed content
|
| 1179 | //! (text / CDATA and tags) inside of the other element:
|
| 1180 | //!
|
| 1181 | //! ```xml
|
| 1182 | //! <any-tag>
|
| 1183 | //! <one>...</one>
|
| 1184 | //! text
|
| 1185 | //! <![CDATA[cdata]]>
|
| 1186 | //! <two>...</two>
|
| 1187 | //! <one>...</one>
|
| 1188 | //! </any-tag>
|
| 1189 | //! ```
|
| 1190 | //! </td>
|
| 1191 | //! <td>
|
| 1192 | //!
|
| 1193 | //! A structure where all child elements mapped to the one field which have
|
| 1194 | //! a homogeneous sequential type: array-like container. A container type `T`
|
| 1195 | //! should be able to be deserialized from the nested element content (`...`),
|
| 1196 | //! except if it is an enum type which would be deserialized from the full
|
| 1197 | //! element (`<one>...</one>`).
|
| 1198 | //!
|
| 1199 | //! You MUST specify `#[serde(rename = "$value")]` on that field:
|
| 1200 | //!
|
| 1201 | //! ```
|
| 1202 | //! # use pretty_assertions::assert_eq;
|
| 1203 | //! # use serde::Deserialize;
|
| 1204 | //! # #[derive(Debug, PartialEq)]
|
| 1205 | //! #[derive(Deserialize)]
|
| 1206 | //! #[serde(rename_all = "snake_case" )]
|
| 1207 | //! enum Choice {
|
| 1208 | //! One,
|
| 1209 | //! Two,
|
| 1210 | //! #[serde(rename = "$text" )]
|
| 1211 | //! Other(String),
|
| 1212 | //! }
|
| 1213 | //! # #[derive(Debug, PartialEq)]
|
| 1214 | //! #[derive(Deserialize)]
|
| 1215 | //! struct AnyName {
|
| 1216 | //! #[serde(rename = "@attribute" )]
|
| 1217 | //! # attribute: (),
|
| 1218 | //! # /*
|
| 1219 | //! attribute: ...,
|
| 1220 | //! # */
|
| 1221 | //! // Does not (yet?) supported by the serde
|
| 1222 | //! // https://github.com/serde-rs/serde/issues/1905
|
| 1223 | //! // #[serde(flatten)]
|
| 1224 | //! #[serde(rename = "$value" )]
|
| 1225 | //! any_name: [Choice; 4],
|
| 1226 | //! }
|
| 1227 | //! # assert_eq!(
|
| 1228 | //! # AnyName { attribute: (), any_name: [
|
| 1229 | //! # Choice::One,
|
| 1230 | //! # Choice::Other("text cdata" .into()),
|
| 1231 | //! # Choice::Two,
|
| 1232 | //! # Choice::One,
|
| 1233 | //! # ] },
|
| 1234 | //! # quick_xml::de::from_str(" \
|
| 1235 | //! # <any-tag attribute='...'> \
|
| 1236 | //! # <one>...</one> \
|
| 1237 | //! # text \
|
| 1238 | //! # <![CDATA[cdata]]> \
|
| 1239 | //! # <two>...</two> \
|
| 1240 | //! # <one>...</one> \
|
| 1241 | //! # </any-tag>"
|
| 1242 | //! # ).unwrap(),
|
| 1243 | //! # );
|
| 1244 | //! ```
|
| 1245 | //! ```
|
| 1246 | //! # use pretty_assertions::assert_eq;
|
| 1247 | //! # use serde::Deserialize;
|
| 1248 | //! # #[derive(Debug, PartialEq)]
|
| 1249 | //! #[derive(Deserialize)]
|
| 1250 | //! #[serde(rename_all = "snake_case" )]
|
| 1251 | //! enum Choice {
|
| 1252 | //! One,
|
| 1253 | //! Two,
|
| 1254 | //! #[serde(rename = "$text" )]
|
| 1255 | //! Other(String),
|
| 1256 | //! }
|
| 1257 | //! # #[derive(Debug, PartialEq)]
|
| 1258 | //! #[derive(Deserialize)]
|
| 1259 | //! struct AnyName {
|
| 1260 | //! #[serde(rename = "@attribute" )]
|
| 1261 | //! # attribute: (),
|
| 1262 | //! # /*
|
| 1263 | //! attribute: ...,
|
| 1264 | //! # */
|
| 1265 | //! // Does not (yet?) supported by the serde
|
| 1266 | //! // https://github.com/serde-rs/serde/issues/1905
|
| 1267 | //! // #[serde(flatten)]
|
| 1268 | //! #[serde(rename = "$value" )]
|
| 1269 | //! any_name: Vec<Choice>,
|
| 1270 | //! }
|
| 1271 | //! # assert_eq!(
|
| 1272 | //! # AnyName { attribute: (), any_name: vec![
|
| 1273 | //! # Choice::One,
|
| 1274 | //! # Choice::Other("text cdata" .into()),
|
| 1275 | //! # Choice::Two,
|
| 1276 | //! # Choice::One,
|
| 1277 | //! # ] },
|
| 1278 | //! # quick_xml::de::from_str(" \
|
| 1279 | //! # <any-tag attribute='...'> \
|
| 1280 | //! # <one>...</one> \
|
| 1281 | //! # text \
|
| 1282 | //! # <![CDATA[cdata]]> \
|
| 1283 | //! # <two>...</two> \
|
| 1284 | //! # <one>...</one> \
|
| 1285 | //! # </any-tag>"
|
| 1286 | //! # ).unwrap(),
|
| 1287 | //! # );
|
| 1288 | //! ```
|
| 1289 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 1290 | //!
|
| 1291 | //! NOTE: consequent text and CDATA nodes are merged into the one text node,
|
| 1292 | //! so you cannot have two adjacent string types in your sequence.
|
| 1293 | //! </div>
|
| 1294 | //! </td>
|
| 1295 | //! </tr>
|
| 1296 | //! </tbody>
|
| 1297 | //! </table>
|
| 1298 | //!
|
| 1299 | //!
|
| 1300 | //!
|
| 1301 | //! Composition Rules
|
| 1302 | //! =================
|
| 1303 | //!
|
| 1304 | //! The XML format is very different from other formats supported by `serde`.
|
| 1305 | //! One such difference it is how data in the serialized form is related to
|
| 1306 | //! the Rust type. Usually each byte in the data can be associated only with
|
| 1307 | //! one field in the data structure. However, XML is an exception.
|
| 1308 | //!
|
| 1309 | //! For example, took this XML:
|
| 1310 | //!
|
| 1311 | //! ```xml
|
| 1312 | //! <any>
|
| 1313 | //! <key attr="value"/>
|
| 1314 | //! </any>
|
| 1315 | //! ```
|
| 1316 | //!
|
| 1317 | //! and try to deserialize it to the struct `AnyName`:
|
| 1318 | //!
|
| 1319 | //! ```no_run
|
| 1320 | //! # use serde::Deserialize;
|
| 1321 | //! #[derive(Deserialize)]
|
| 1322 | //! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
|
| 1323 | //! // Used data: ^^^^^^^^^^^^^^^^^^^
|
| 1324 | //! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
|
| 1325 | //! // Used data: ^^^^^^^^^^^^
|
| 1326 | //! }
|
| 1327 | //! #[derive(Deserialize)]
|
| 1328 | //! struct Inner {
|
| 1329 | //! #[serde(rename = "@attr" )]
|
| 1330 | //! attr: String, // String calls `deserialize_string` on `value`
|
| 1331 | //! // Used data: ^^^^^
|
| 1332 | //! }
|
| 1333 | //! ```
|
| 1334 | //!
|
| 1335 | //! Comments shows what methods of a [`Deserializer`] called by each struct
|
| 1336 | //! `deserialize` method and which input their seen. **Used data** shows, what
|
| 1337 | //! content is actually used for deserializing. As you see, name of the inner
|
| 1338 | //! `<key>` tag used both as a map key / outer struct field name and as part
|
| 1339 | //! of the inner struct (although _value_ of the tag, i.e. `key` is not used
|
| 1340 | //! by it).
|
| 1341 | //!
|
| 1342 | //!
|
| 1343 | //!
|
| 1344 | //! Difference between `$text` and `$value` special names
|
| 1345 | //! =====================================================
|
| 1346 | //!
|
| 1347 | //! quick-xml supports two special names for fields -- `$text` and `$value`.
|
| 1348 | //! Although they may seem the same, there is a distinction. Two different
|
| 1349 | //! names is required mostly for serialization, because quick-xml should know
|
| 1350 | //! how you want to serialize certain constructs, which could be represented
|
| 1351 | //! through XML in multiple different ways.
|
| 1352 | //!
|
| 1353 | //! The only difference is in how complex types and sequences are serialized.
|
| 1354 | //! If you doubt which one you should select, begin with [`$value`](#value).
|
| 1355 | //!
|
| 1356 | //! ## `$text`
|
| 1357 | //! `$text` is used when you want to write your XML as a text or a CDATA content.
|
| 1358 | //! More formally, field with that name represents simple type definition with
|
| 1359 | //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
|
| 1360 | //! as described in the [specification].
|
| 1361 | //!
|
| 1362 | //! As a result, not all types of such fields can be serialized. Only serialization
|
| 1363 | //! of following types are supported:
|
| 1364 | //! - all primitive types (strings, numbers, booleans)
|
| 1365 | //! - unit variants of enumerations (serializes to a name of a variant)
|
| 1366 | //! - newtypes (delegates serialization to inner type)
|
| 1367 | //! - [`Option`] of above (`None` serializes to nothing)
|
| 1368 | //! - sequences (including tuples and tuple variants of enumerations) of above,
|
| 1369 | //! excluding `None` and empty string elements (because it will not be possible
|
| 1370 | //! to deserialize them back). The elements are separated by space(s)
|
| 1371 | //! - unit type `()` and unit structs (serializes to nothing)
|
| 1372 | //!
|
| 1373 | //! Complex types, such as structs and maps, are not supported in this field.
|
| 1374 | //! If you want them, you should use `$value`.
|
| 1375 | //!
|
| 1376 | //! Sequences serialized to a space-delimited string, that is why only certain
|
| 1377 | //! types are allowed in this mode:
|
| 1378 | //!
|
| 1379 | //! ```
|
| 1380 | //! # use serde::{Deserialize, Serialize};
|
| 1381 | //! # use quick_xml::de::from_str;
|
| 1382 | //! # use quick_xml::se::to_string;
|
| 1383 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1384 | //! struct AnyName {
|
| 1385 | //! #[serde(rename = "$text" )]
|
| 1386 | //! field: Vec<usize>,
|
| 1387 | //! }
|
| 1388 | //!
|
| 1389 | //! let obj = AnyName { field: vec![1, 2, 3] };
|
| 1390 | //! let xml = to_string(&obj).unwrap();
|
| 1391 | //! assert_eq!(xml, "<AnyName>1 2 3</AnyName>" );
|
| 1392 | //!
|
| 1393 | //! let object: AnyName = from_str(&xml).unwrap();
|
| 1394 | //! assert_eq!(object, obj);
|
| 1395 | //! ```
|
| 1396 | //!
|
| 1397 | //! ## `$value`
|
| 1398 | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
|
| 1399 | //!
|
| 1400 | //! NOTE: a name `#content` would better explain the purpose of that field,
|
| 1401 | //! but `$value` is used for compatibility with other XML serde crates, which
|
| 1402 | //! uses that name. This will allow you to switch XML crates more smoothly if required.
|
| 1403 | //! </div>
|
| 1404 | //!
|
| 1405 | //! Representation of primitive types in `$value` does not differ from their
|
| 1406 | //! representation in `$text` field. The difference is how sequences are serialized.
|
| 1407 | //! `$value` serializes each sequence item as a separate XML element. The name
|
| 1408 | //! of that element is taken from serialized type, and because only `enum`s provide
|
| 1409 | //! such name (their variant name), only they should be used for such fields.
|
| 1410 | //!
|
| 1411 | //! `$value` fields does not support `struct` types with fields, the serialization
|
| 1412 | //! of such types would end with an `Err(Unsupported)`. Unit structs and unit
|
| 1413 | //! type `()` serializing to nothing and can be deserialized from any content.
|
| 1414 | //!
|
| 1415 | //! Serialization and deserialization of `$value` field performed as usual, except
|
| 1416 | //! that name for an XML element will be given by the serialized type, instead of
|
| 1417 | //! field. The latter allow to serialize enumerated types, where variant is encoded
|
| 1418 | //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
|
| 1419 | //!
|
| 1420 | //! In the example below, field will be serialized as `<field/>`, because elements
|
| 1421 | //! get their names from the field name. It cannot be deserialized, because `Enum`
|
| 1422 | //! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
|
| 1423 | //!
|
| 1424 | //! ```no_run
|
| 1425 | //! # use serde::{Deserialize, Serialize};
|
| 1426 | //! #[derive(Deserialize, Serialize)]
|
| 1427 | //! enum Enum { A, B, C }
|
| 1428 | //!
|
| 1429 | //! #[derive(Deserialize, Serialize)]
|
| 1430 | //! struct AnyName {
|
| 1431 | //! // <field/>
|
| 1432 | //! field: Enum,
|
| 1433 | //! }
|
| 1434 | //! ```
|
| 1435 | //!
|
| 1436 | //! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
|
| 1437 | //! `<B/>` or `<C/>`, depending on the its content. It is also possible to
|
| 1438 | //! deserialize it from the same elements:
|
| 1439 | //!
|
| 1440 | //! ```no_run
|
| 1441 | //! # use serde::{Deserialize, Serialize};
|
| 1442 | //! # #[derive(Deserialize, Serialize)]
|
| 1443 | //! # enum Enum { A, B, C }
|
| 1444 | //! #
|
| 1445 | //! #[derive(Deserialize, Serialize)]
|
| 1446 | //! struct AnyName {
|
| 1447 | //! // <A/>, <B/> or <C/>
|
| 1448 | //! #[serde(rename = "$value" )]
|
| 1449 | //! field: Enum,
|
| 1450 | //! }
|
| 1451 | //! ```
|
| 1452 | //!
|
| 1453 | //! ### Primitives and sequences of primitives
|
| 1454 | //!
|
| 1455 | //! Sequences serialized to a list of elements. Note, that types that does not
|
| 1456 | //! produce their own tag (i. e. primitives) are written as is, without delimiters:
|
| 1457 | //!
|
| 1458 | //! ```
|
| 1459 | //! # use serde::{Deserialize, Serialize};
|
| 1460 | //! # use quick_xml::de::from_str;
|
| 1461 | //! # use quick_xml::se::to_string;
|
| 1462 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1463 | //! struct AnyName {
|
| 1464 | //! #[serde(rename = "$value" )]
|
| 1465 | //! field: Vec<usize>,
|
| 1466 | //! }
|
| 1467 | //!
|
| 1468 | //! let obj = AnyName { field: vec![1, 2, 3] };
|
| 1469 | //! let xml = to_string(&obj).unwrap();
|
| 1470 | //! // Note, that types that does not produce their own tag are written as is!
|
| 1471 | //! assert_eq!(xml, "<AnyName>123</AnyName>" );
|
| 1472 | //!
|
| 1473 | //! let object: AnyName = from_str("<AnyName>123</AnyName>" ).unwrap();
|
| 1474 | //! assert_eq!(object, AnyName { field: vec![123] });
|
| 1475 | //!
|
| 1476 | //! // `1 2 3` is mapped to a single `usize` element
|
| 1477 | //! // It is impossible to deserialize list of primitives to such field
|
| 1478 | //! from_str::<AnyName>("<AnyName>1 2 3</AnyName>" ).unwrap_err();
|
| 1479 | //! ```
|
| 1480 | //!
|
| 1481 | //! A particular case of that example is a string `$value` field, which probably
|
| 1482 | //! would be a most used example of that attribute:
|
| 1483 | //!
|
| 1484 | //! ```
|
| 1485 | //! # use serde::{Deserialize, Serialize};
|
| 1486 | //! # use quick_xml::de::from_str;
|
| 1487 | //! # use quick_xml::se::to_string;
|
| 1488 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1489 | //! struct AnyName {
|
| 1490 | //! #[serde(rename = "$value" )]
|
| 1491 | //! field: String,
|
| 1492 | //! }
|
| 1493 | //!
|
| 1494 | //! let obj = AnyName { field: "content" .to_string() };
|
| 1495 | //! let xml = to_string(&obj).unwrap();
|
| 1496 | //! assert_eq!(xml, "<AnyName>content</AnyName>" );
|
| 1497 | //! ```
|
| 1498 | //!
|
| 1499 | //! ### Structs and sequences of structs
|
| 1500 | //!
|
| 1501 | //! Note, that structures do not have a serializable name as well (name of the
|
| 1502 | //! type is never used), so it is impossible to serialize non-unit struct or
|
| 1503 | //! sequence of non-unit structs in `$value` field. (sequences of) unit structs
|
| 1504 | //! are serialized as empty string, because units itself serializing
|
| 1505 | //! to nothing:
|
| 1506 | //!
|
| 1507 | //! ```
|
| 1508 | //! # use serde::{Deserialize, Serialize};
|
| 1509 | //! # use quick_xml::de::from_str;
|
| 1510 | //! # use quick_xml::se::to_string;
|
| 1511 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1512 | //! struct Unit;
|
| 1513 | //!
|
| 1514 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1515 | //! struct AnyName {
|
| 1516 | //! // #[serde(default)] is required to deserialization of empty lists
|
| 1517 | //! // This is a general note, not related to $value
|
| 1518 | //! #[serde(rename = "$value" , default)]
|
| 1519 | //! field: Vec<Unit>,
|
| 1520 | //! }
|
| 1521 | //!
|
| 1522 | //! let obj = AnyName { field: vec![Unit, Unit, Unit] };
|
| 1523 | //! let xml = to_string(&obj).unwrap();
|
| 1524 | //! assert_eq!(xml, "<AnyName/>" );
|
| 1525 | //!
|
| 1526 | //! let object: AnyName = from_str("<AnyName/>" ).unwrap();
|
| 1527 | //! assert_eq!(object, AnyName { field: vec![] });
|
| 1528 | //!
|
| 1529 | //! let object: AnyName = from_str("<AnyName></AnyName>" ).unwrap();
|
| 1530 | //! assert_eq!(object, AnyName { field: vec![] });
|
| 1531 | //!
|
| 1532 | //! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>" ).unwrap();
|
| 1533 | //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
|
| 1534 | //! ```
|
| 1535 | //!
|
| 1536 | //! ### Enums and sequences of enums
|
| 1537 | //!
|
| 1538 | //! Enumerations uses the variant name as an element name:
|
| 1539 | //!
|
| 1540 | //! ```
|
| 1541 | //! # use serde::{Deserialize, Serialize};
|
| 1542 | //! # use quick_xml::de::from_str;
|
| 1543 | //! # use quick_xml::se::to_string;
|
| 1544 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1545 | //! struct AnyName {
|
| 1546 | //! #[serde(rename = "$value" )]
|
| 1547 | //! field: Vec<Enum>,
|
| 1548 | //! }
|
| 1549 | //!
|
| 1550 | //! #[derive(Deserialize, Serialize, PartialEq, Debug)]
|
| 1551 | //! enum Enum { A, B, C }
|
| 1552 | //!
|
| 1553 | //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
|
| 1554 | //! let xml = to_string(&obj).unwrap();
|
| 1555 | //! assert_eq!(
|
| 1556 | //! xml,
|
| 1557 | //! "<AnyName> \
|
| 1558 | //! <A/> \
|
| 1559 | //! <B/> \
|
| 1560 | //! <C/> \
|
| 1561 | //! </AnyName>"
|
| 1562 | //! );
|
| 1563 | //!
|
| 1564 | //! let object: AnyName = from_str(&xml).unwrap();
|
| 1565 | //! assert_eq!(object, obj);
|
| 1566 | //! ```
|
| 1567 | //!
|
| 1568 | //! ----------------------------------------------------------------------------
|
| 1569 | //!
|
| 1570 | //! You can have either `$text` or `$value` field in your structs. Unfortunately,
|
| 1571 | //! that is not enforced, so you can theoretically have both, but you should
|
| 1572 | //! avoid that.
|
| 1573 | //!
|
| 1574 | //!
|
| 1575 | //!
|
| 1576 | //! Frequently Used Patterns
|
| 1577 | //! ========================
|
| 1578 | //!
|
| 1579 | //! Some XML constructs used so frequent, that it is worth to document the recommended
|
| 1580 | //! way to represent them in the Rust. The sections below describes them.
|
| 1581 | //!
|
| 1582 | //! `<element>` lists
|
| 1583 | //! -----------------
|
| 1584 | //! Many XML formats wrap lists of elements in the additional container,
|
| 1585 | //! although this is not required by the XML rules:
|
| 1586 | //!
|
| 1587 | //! ```xml
|
| 1588 | //! <root>
|
| 1589 | //! <field1/>
|
| 1590 | //! <field2/>
|
| 1591 | //! <list><!-- Container -->
|
| 1592 | //! <element/>
|
| 1593 | //! <element/>
|
| 1594 | //! <element/>
|
| 1595 | //! </list>
|
| 1596 | //! <field3/>
|
| 1597 | //! </root>
|
| 1598 | //! ```
|
| 1599 | //! In this case, there is a great desire to describe this XML in this way:
|
| 1600 | //! ```
|
| 1601 | //! /// Represents <element/>
|
| 1602 | //! type Element = ();
|
| 1603 | //!
|
| 1604 | //! /// Represents <root>...</root>
|
| 1605 | //! struct AnyName {
|
| 1606 | //! // Incorrect
|
| 1607 | //! list: Vec<Element>,
|
| 1608 | //! }
|
| 1609 | //! ```
|
| 1610 | //! This will not work, because potentially `<list>` element can have attributes
|
| 1611 | //! and other elements inside. You should define the struct for the `<list>`
|
| 1612 | //! explicitly, as you do that in the XSD for that XML:
|
| 1613 | //! ```
|
| 1614 | //! /// Represents <element/>
|
| 1615 | //! type Element = ();
|
| 1616 | //!
|
| 1617 | //! /// Represents <root>...</root>
|
| 1618 | //! struct AnyName {
|
| 1619 | //! // Correct
|
| 1620 | //! list: List,
|
| 1621 | //! }
|
| 1622 | //! /// Represents <list>...</list>
|
| 1623 | //! struct List {
|
| 1624 | //! element: Vec<Element>,
|
| 1625 | //! }
|
| 1626 | //! ```
|
| 1627 | //!
|
| 1628 | //! If you want to simplify your API, you could write a simple function for unwrapping
|
| 1629 | //! inner list and apply it via [`deserialize_with`]:
|
| 1630 | //!
|
| 1631 | //! ```
|
| 1632 | //! # use pretty_assertions::assert_eq;
|
| 1633 | //! use quick_xml::de::from_str;
|
| 1634 | //! use serde::{Deserialize, Deserializer};
|
| 1635 | //!
|
| 1636 | //! /// Represents <element/>
|
| 1637 | //! type Element = ();
|
| 1638 | //!
|
| 1639 | //! /// Represents <root>...</root>
|
| 1640 | //! #[derive(Deserialize, Debug, PartialEq)]
|
| 1641 | //! struct AnyName {
|
| 1642 | //! #[serde(deserialize_with = "unwrap_list" )]
|
| 1643 | //! list: Vec<Element>,
|
| 1644 | //! }
|
| 1645 | //!
|
| 1646 | //! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
|
| 1647 | //! where
|
| 1648 | //! D: Deserializer<'de>,
|
| 1649 | //! {
|
| 1650 | //! /// Represents <list>...</list>
|
| 1651 | //! #[derive(Deserialize)]
|
| 1652 | //! struct List {
|
| 1653 | //! // default allows empty list
|
| 1654 | //! #[serde(default)]
|
| 1655 | //! element: Vec<Element>,
|
| 1656 | //! }
|
| 1657 | //! Ok(List::deserialize(deserializer)?.element)
|
| 1658 | //! }
|
| 1659 | //!
|
| 1660 | //! assert_eq!(
|
| 1661 | //! AnyName { list: vec![(), (), ()] },
|
| 1662 | //! from_str("
|
| 1663 | //! <root>
|
| 1664 | //! <list>
|
| 1665 | //! <element/>
|
| 1666 | //! <element/>
|
| 1667 | //! <element/>
|
| 1668 | //! </list>
|
| 1669 | //! </root>
|
| 1670 | //! " ).unwrap(),
|
| 1671 | //! );
|
| 1672 | //! ```
|
| 1673 | //!
|
| 1674 | //! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
|
| 1675 | //!
|
| 1676 | //! Enum::Unit Variants As a Text
|
| 1677 | //! -----------------------------
|
| 1678 | //! One frequent task and a typical mistake is to creation of mapping a text
|
| 1679 | //! content of some tag to a Rust `enum`. For example, for the XML:
|
| 1680 | //!
|
| 1681 | //! ```xml
|
| 1682 | //! <some-container>
|
| 1683 | //! <field>EnumValue</field>
|
| 1684 | //! </some-container>
|
| 1685 | //! ```
|
| 1686 | //! one could create an _incorrect_ mapping
|
| 1687 | //!
|
| 1688 | //! ```
|
| 1689 | //! # use serde::{Deserialize, Serialize};
|
| 1690 | //! #
|
| 1691 | //! #[derive(Serialize, Deserialize)]
|
| 1692 | //! enum SomeEnum {
|
| 1693 | //! EnumValue,
|
| 1694 | //! # /*
|
| 1695 | //! ...
|
| 1696 | //! # */
|
| 1697 | //! }
|
| 1698 | //!
|
| 1699 | //! #[derive(Serialize, Deserialize)]
|
| 1700 | //! #[serde(rename = "some-container" )]
|
| 1701 | //! struct SomeContainer {
|
| 1702 | //! field: SomeEnum,
|
| 1703 | //! }
|
| 1704 | //! ```
|
| 1705 | //!
|
| 1706 | //! Actually, those types will be serialized into:
|
| 1707 | //! ```xml
|
| 1708 | //! <some-container>
|
| 1709 | //! <EnumValue/>
|
| 1710 | //! </some-container>
|
| 1711 | //! ```
|
| 1712 | //! and will not be able to be deserialized.
|
| 1713 | //!
|
| 1714 | //! You can easily see what's wrong if you think about attributes, which could
|
| 1715 | //! be defined in the `<field>` tag:
|
| 1716 | //! ```xml
|
| 1717 | //! <some-container>
|
| 1718 | //! <field some="attribute">EnumValue</field>
|
| 1719 | //! </some-container>
|
| 1720 | //! ```
|
| 1721 | //!
|
| 1722 | //! After that you can find the correct solution, using the principles explained
|
| 1723 | //! above. You should wrap `SomeEnum` into wrapper struct under the [`$text`](#text)
|
| 1724 | //! name:
|
| 1725 | //! ```
|
| 1726 | //! # use serde::{Serialize, Deserialize};
|
| 1727 | //! # type SomeEnum = ();
|
| 1728 | //! #[derive(Serialize, Deserialize)]
|
| 1729 | //! struct Field {
|
| 1730 | //! // Use a special name `$text` to map field to the text content
|
| 1731 | //! #[serde(rename = "$text" )]
|
| 1732 | //! content: SomeEnum,
|
| 1733 | //! }
|
| 1734 | //!
|
| 1735 | //! #[derive(Serialize, Deserialize)]
|
| 1736 | //! #[serde(rename = "some-container" )]
|
| 1737 | //! struct SomeContainer {
|
| 1738 | //! field: Field,
|
| 1739 | //! }
|
| 1740 | //! ```
|
| 1741 | //!
|
| 1742 | //! If you still want to keep your struct untouched, you can instead use the
|
| 1743 | //! helper module [`text_content`].
|
| 1744 | //!
|
| 1745 | //!
|
| 1746 | //! Internally Tagged Enums
|
| 1747 | //! -----------------------
|
| 1748 | //! [Tagged enums] are currently not supported because of an issue in the Serde
|
| 1749 | //! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
|
| 1750 | //! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
|
| 1751 | //! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
|
| 1752 | //! or implementing [`Deserialize`], but this can get very tedious very fast for
|
| 1753 | //! files with large amounts of tagged enums. To help with this issue quick-xml
|
| 1754 | //! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
|
| 1755 | //! macro documentation for details.
|
| 1756 | //!
|
| 1757 | //!
|
| 1758 | //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
|
| 1759 | //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
|
| 1760 | //! [#497]: https://github.com/tafia/quick-xml/issues/497
|
| 1761 | //! [`text_content`]: crate::serde_helpers::text_content
|
| 1762 | //! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
|
| 1763 | //! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
|
| 1764 | //! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
|
| 1765 | //! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
|
| 1766 | //! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
|
| 1767 |
|
| 1768 | // Macros should be defined before the modules that using them
|
| 1769 | // Also, macros should be imported before using them
|
| 1770 | use serde::serde_if_integer128;
|
| 1771 |
|
| 1772 | macro_rules! deserialize_type {
|
| 1773 | ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
|
| 1774 | fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
| 1775 | where
|
| 1776 | V: Visitor<'de>,
|
| 1777 | {
|
| 1778 | // No need to unescape because valid integer representations cannot be escaped
|
| 1779 | let text = self.read_string()?;
|
| 1780 | visitor.$visit(text.parse()?)
|
| 1781 | }
|
| 1782 | };
|
| 1783 | }
|
| 1784 |
|
| 1785 | /// Implement deserialization methods for scalar types, such as numbers, strings,
|
| 1786 | /// byte arrays, booleans and identifiers.
|
| 1787 | macro_rules! deserialize_primitives {
|
| 1788 | ($($mut:tt)?) => {
|
| 1789 | deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
|
| 1790 | deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
|
| 1791 | deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
|
| 1792 | deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
|
| 1793 |
|
| 1794 | deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
|
| 1795 | deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
|
| 1796 | deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
|
| 1797 | deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
|
| 1798 |
|
| 1799 | serde_if_integer128! {
|
| 1800 | deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
|
| 1801 | deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
|
| 1802 | }
|
| 1803 |
|
| 1804 | deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
|
| 1805 | deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
|
| 1806 |
|
| 1807 | fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
| 1808 | where
|
| 1809 | V: Visitor<'de>,
|
| 1810 | {
|
| 1811 | let text = self.read_string()?;
|
| 1812 |
|
| 1813 | str2bool(&text, visitor)
|
| 1814 | }
|
| 1815 |
|
| 1816 | /// Character represented as [strings](#method.deserialize_str).
|
| 1817 | fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 1818 | where
|
| 1819 | V: Visitor<'de>,
|
| 1820 | {
|
| 1821 | self.deserialize_str(visitor)
|
| 1822 | }
|
| 1823 |
|
| 1824 | fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
|
| 1825 | where
|
| 1826 | V: Visitor<'de>,
|
| 1827 | {
|
| 1828 | let text = self.read_string()?;
|
| 1829 | match text {
|
| 1830 | Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
|
| 1831 | Cow::Owned(string) => visitor.visit_string(string),
|
| 1832 | }
|
| 1833 | }
|
| 1834 |
|
| 1835 | /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
|
| 1836 | fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 1837 | where
|
| 1838 | V: Visitor<'de>,
|
| 1839 | {
|
| 1840 | self.deserialize_str(visitor)
|
| 1841 | }
|
| 1842 |
|
| 1843 | /// Returns [`DeError::Unsupported`]
|
| 1844 | fn deserialize_bytes<V>(self, _visitor: V) -> Result<V::Value, DeError>
|
| 1845 | where
|
| 1846 | V: Visitor<'de>,
|
| 1847 | {
|
| 1848 | Err(DeError::Unsupported("binary data content is not supported by XML format" .into()))
|
| 1849 | }
|
| 1850 |
|
| 1851 | /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
|
| 1852 | fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 1853 | where
|
| 1854 | V: Visitor<'de>,
|
| 1855 | {
|
| 1856 | self.deserialize_bytes(visitor)
|
| 1857 | }
|
| 1858 |
|
| 1859 | /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
|
| 1860 | fn deserialize_unit_struct<V>(
|
| 1861 | self,
|
| 1862 | _name: &'static str,
|
| 1863 | visitor: V,
|
| 1864 | ) -> Result<V::Value, DeError>
|
| 1865 | where
|
| 1866 | V: Visitor<'de>,
|
| 1867 | {
|
| 1868 | self.deserialize_unit(visitor)
|
| 1869 | }
|
| 1870 |
|
| 1871 | /// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
|
| 1872 | fn deserialize_newtype_struct<V>(
|
| 1873 | self,
|
| 1874 | _name: &'static str,
|
| 1875 | visitor: V,
|
| 1876 | ) -> Result<V::Value, DeError>
|
| 1877 | where
|
| 1878 | V: Visitor<'de>,
|
| 1879 | {
|
| 1880 | self.deserialize_tuple(1, visitor)
|
| 1881 | }
|
| 1882 |
|
| 1883 | /// Representation of tuples the same as [sequences](#method.deserialize_seq).
|
| 1884 | fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
|
| 1885 | where
|
| 1886 | V: Visitor<'de>,
|
| 1887 | {
|
| 1888 | self.deserialize_seq(visitor)
|
| 1889 | }
|
| 1890 |
|
| 1891 | /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
|
| 1892 | fn deserialize_tuple_struct<V>(
|
| 1893 | self,
|
| 1894 | _name: &'static str,
|
| 1895 | len: usize,
|
| 1896 | visitor: V,
|
| 1897 | ) -> Result<V::Value, DeError>
|
| 1898 | where
|
| 1899 | V: Visitor<'de>,
|
| 1900 | {
|
| 1901 | self.deserialize_tuple(len, visitor)
|
| 1902 | }
|
| 1903 |
|
| 1904 | /// Identifiers represented as [strings](#method.deserialize_str).
|
| 1905 | fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 1906 | where
|
| 1907 | V: Visitor<'de>,
|
| 1908 | {
|
| 1909 | self.deserialize_str(visitor)
|
| 1910 | }
|
| 1911 | };
|
| 1912 | }
|
| 1913 |
|
| 1914 | macro_rules! deserialize_option {
|
| 1915 | ($de:expr, $deserializer:ident, $visitor:ident) => {
|
| 1916 | match $de.peek()? {
|
| 1917 | DeEvent::Text(t) if t.is_empty() => $visitor.visit_none(),
|
| 1918 | DeEvent::Eof => $visitor.visit_none(),
|
| 1919 | _ => $visitor.visit_some($deserializer),
|
| 1920 | }
|
| 1921 | };
|
| 1922 | }
|
| 1923 |
|
| 1924 | mod key;
|
| 1925 | mod map;
|
| 1926 | mod resolver;
|
| 1927 | mod simple_type;
|
| 1928 | mod var;
|
| 1929 |
|
| 1930 | pub use crate::errors::serialize::DeError;
|
| 1931 | pub use resolver::{EntityResolver, NoEntityResolver};
|
| 1932 |
|
| 1933 | use crate::{
|
| 1934 | encoding::Decoder,
|
| 1935 | errors::Error,
|
| 1936 | events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
|
| 1937 | name::QName,
|
| 1938 | reader::Reader,
|
| 1939 | };
|
| 1940 | use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
|
| 1941 | use std::borrow::Cow;
|
| 1942 | #[cfg (feature = "overlapped-lists" )]
|
| 1943 | use std::collections::VecDeque;
|
| 1944 | use std::io::BufRead;
|
| 1945 | use std::mem::replace;
|
| 1946 | #[cfg (feature = "overlapped-lists" )]
|
| 1947 | use std::num::NonZeroUsize;
|
| 1948 | use std::ops::Deref;
|
| 1949 |
|
| 1950 | /// Data represented by a text node or a CDATA node. XML markup is not expected
|
| 1951 | pub(crate) const TEXT_KEY: &str = "$text" ;
|
| 1952 | /// Data represented by any XML markup inside
|
| 1953 | pub(crate) const VALUE_KEY: &str = "$value" ;
|
| 1954 |
|
| 1955 | /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
|
| 1956 | /// events. _Consequent_ means that events should follow each other or be
|
| 1957 | /// delimited only by (any count of) [`Comment`] or [`PI`] events.
|
| 1958 | ///
|
| 1959 | /// [`Text`]: Event::Text
|
| 1960 | /// [`CData`]: Event::CData
|
| 1961 | /// [`Comment`]: Event::Comment
|
| 1962 | /// [`PI`]: Event::PI
|
| 1963 | #[derive (Debug, PartialEq, Eq)]
|
| 1964 | pub struct Text<'a> {
|
| 1965 | text: Cow<'a, str>,
|
| 1966 | }
|
| 1967 |
|
| 1968 | impl<'a> Deref for Text<'a> {
|
| 1969 | type Target = str;
|
| 1970 |
|
| 1971 | #[inline ]
|
| 1972 | fn deref(&self) -> &Self::Target {
|
| 1973 | self.text.deref()
|
| 1974 | }
|
| 1975 | }
|
| 1976 |
|
| 1977 | impl<'a> From<&'a str> for Text<'a> {
|
| 1978 | #[inline ]
|
| 1979 | fn from(text: &'a str) -> Self {
|
| 1980 | Self {
|
| 1981 | text: Cow::Borrowed(text),
|
| 1982 | }
|
| 1983 | }
|
| 1984 | }
|
| 1985 |
|
| 1986 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 1987 |
|
| 1988 | /// Simplified event which contains only these variants that used by deserializer
|
| 1989 | #[derive (Debug, PartialEq, Eq)]
|
| 1990 | pub enum DeEvent<'a> {
|
| 1991 | /// Start tag (with attributes) `<tag attr="value">`.
|
| 1992 | Start(BytesStart<'a>),
|
| 1993 | /// End tag `</tag>`.
|
| 1994 | End(BytesEnd<'a>),
|
| 1995 | /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
|
| 1996 | /// events. _Consequent_ means that events should follow each other or be
|
| 1997 | /// delimited only by (any count of) [`Comment`] or [`PI`] events.
|
| 1998 | ///
|
| 1999 | /// [`Text`]: Event::Text
|
| 2000 | /// [`CData`]: Event::CData
|
| 2001 | /// [`Comment`]: Event::Comment
|
| 2002 | /// [`PI`]: Event::PI
|
| 2003 | Text(Text<'a>),
|
| 2004 | /// End of XML document.
|
| 2005 | Eof,
|
| 2006 | }
|
| 2007 |
|
| 2008 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 2009 |
|
| 2010 | /// Simplified event which contains only these variants that used by deserializer,
|
| 2011 | /// but [`Text`] events not yet fully processed.
|
| 2012 | ///
|
| 2013 | /// [`Text`] events should be trimmed if they does not surrounded by the other
|
| 2014 | /// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
|
| 2015 | /// event, where they are trimmed from the start, but not from the end. To trim
|
| 2016 | /// end spaces we should lookahead by one deserializer event (i. e. skip all
|
| 2017 | /// comments and processing instructions).
|
| 2018 | ///
|
| 2019 | /// [`Text`]: Event::Text
|
| 2020 | /// [`CData`]: Event::CData
|
| 2021 | #[derive (Debug, PartialEq, Eq)]
|
| 2022 | pub enum PayloadEvent<'a> {
|
| 2023 | /// Start tag (with attributes) `<tag attr="value">`.
|
| 2024 | Start(BytesStart<'a>),
|
| 2025 | /// End tag `</tag>`.
|
| 2026 | End(BytesEnd<'a>),
|
| 2027 | /// Escaped character data between tags.
|
| 2028 | Text(BytesText<'a>),
|
| 2029 | /// Unescaped character data stored in `<![CDATA[...]]>`.
|
| 2030 | CData(BytesCData<'a>),
|
| 2031 | /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
|
| 2032 | DocType(BytesText<'a>),
|
| 2033 | /// End of XML document.
|
| 2034 | Eof,
|
| 2035 | }
|
| 2036 |
|
| 2037 | impl<'a> PayloadEvent<'a> {
|
| 2038 | /// Ensures that all data is owned to extend the object's lifetime if necessary.
|
| 2039 | #[inline ]
|
| 2040 | fn into_owned(self) -> PayloadEvent<'static> {
|
| 2041 | match self {
|
| 2042 | PayloadEvent::Start(e: BytesStart<'_>) => PayloadEvent::Start(e.into_owned()),
|
| 2043 | PayloadEvent::End(e: BytesEnd<'_>) => PayloadEvent::End(e.into_owned()),
|
| 2044 | PayloadEvent::Text(e: BytesText<'_>) => PayloadEvent::Text(e.into_owned()),
|
| 2045 | PayloadEvent::CData(e: BytesCData<'_>) => PayloadEvent::CData(e.into_owned()),
|
| 2046 | PayloadEvent::DocType(e: BytesText<'_>) => PayloadEvent::DocType(e.into_owned()),
|
| 2047 | PayloadEvent::Eof => PayloadEvent::Eof,
|
| 2048 | }
|
| 2049 | }
|
| 2050 | }
|
| 2051 |
|
| 2052 | /// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
|
| 2053 | /// [`PayloadEvent::Text`] events, that followed by any event except
|
| 2054 | /// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
|
| 2055 | struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = NoEntityResolver> {
|
| 2056 | /// A source of low-level XML events
|
| 2057 | reader: R,
|
| 2058 | /// Intermediate event, that could be returned by the next call to `next()`.
|
| 2059 | /// If that is the `Text` event then leading spaces already trimmed, but
|
| 2060 | /// trailing spaces is not. Before the event will be returned, trimming of
|
| 2061 | /// the spaces could be necessary
|
| 2062 | lookahead: Result<PayloadEvent<'i>, DeError>,
|
| 2063 |
|
| 2064 | /// Used to resolve unknown entities that would otherwise cause the parser
|
| 2065 | /// to return an [`EscapeError::UnrecognizedSymbol`] error.
|
| 2066 | ///
|
| 2067 | /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
|
| 2068 | entity_resolver: E,
|
| 2069 | }
|
| 2070 |
|
| 2071 | impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
|
| 2072 | fn new(mut reader: R, entity_resolver: E) -> Self {
|
| 2073 | // Lookahead by one event immediately, so we do not need to check in the
|
| 2074 | // loop if we need lookahead or not
|
| 2075 | let lookahead = reader.next();
|
| 2076 |
|
| 2077 | Self {
|
| 2078 | reader,
|
| 2079 | lookahead,
|
| 2080 | entity_resolver,
|
| 2081 | }
|
| 2082 | }
|
| 2083 |
|
| 2084 | /// Read next event and put it in lookahead, return the current lookahead
|
| 2085 | #[inline (always)]
|
| 2086 | fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
|
| 2087 | replace(&mut self.lookahead, self.reader.next())
|
| 2088 | }
|
| 2089 |
|
| 2090 | #[inline (always)]
|
| 2091 | fn need_trim_end(&self) -> bool {
|
| 2092 | // If next event is a text or CDATA, we should not trim trailing spaces
|
| 2093 | !matches!(
|
| 2094 | self.lookahead,
|
| 2095 | Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
|
| 2096 | )
|
| 2097 | }
|
| 2098 |
|
| 2099 | /// Read all consequent [`Text`] and [`CData`] events until non-text event
|
| 2100 | /// occurs. Content of all events would be appended to `result` and returned
|
| 2101 | /// as [`DeEvent::Text`].
|
| 2102 | ///
|
| 2103 | /// [`Text`]: PayloadEvent::Text
|
| 2104 | /// [`CData`]: PayloadEvent::CData
|
| 2105 | fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
|
| 2106 | loop {
|
| 2107 | match self.lookahead {
|
| 2108 | Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
|
| 2109 | let text = self.next_text()?;
|
| 2110 |
|
| 2111 | let mut s = result.into_owned();
|
| 2112 | s += &text;
|
| 2113 | result = Cow::Owned(s);
|
| 2114 | }
|
| 2115 | _ => break,
|
| 2116 | }
|
| 2117 | }
|
| 2118 | Ok(DeEvent::Text(Text { text: result }))
|
| 2119 | }
|
| 2120 |
|
| 2121 | /// Read one text event, panics if current event is not a text event
|
| 2122 | ///
|
| 2123 | /// |Event |XML |Handling
|
| 2124 | /// |-----------------------|---------------------------|----------------------------------------
|
| 2125 | /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
|
| 2126 | /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
|
| 2127 | /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
|
| 2128 | /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
|
| 2129 | /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
|
| 2130 | #[inline (always)]
|
| 2131 | fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
|
| 2132 | match self.next_impl()? {
|
| 2133 | PayloadEvent::Text(mut e) => {
|
| 2134 | if self.need_trim_end() {
|
| 2135 | e.inplace_trim_end();
|
| 2136 | }
|
| 2137 | Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
|
| 2138 | }
|
| 2139 | PayloadEvent::CData(e) => Ok(e.decode()?),
|
| 2140 |
|
| 2141 | // SAFETY: this method is called only when we peeked Text or CData
|
| 2142 | _ => unreachable!("Only `Text` and `CData` events can come here" ),
|
| 2143 | }
|
| 2144 | }
|
| 2145 |
|
| 2146 | /// Return an input-borrowing event.
|
| 2147 | fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
|
| 2148 | loop {
|
| 2149 | return match self.next_impl()? {
|
| 2150 | PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
|
| 2151 | PayloadEvent::End(e) => Ok(DeEvent::End(e)),
|
| 2152 | PayloadEvent::Text(mut e) => {
|
| 2153 | if self.need_trim_end() && e.inplace_trim_end() {
|
| 2154 | continue;
|
| 2155 | }
|
| 2156 | self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
|
| 2157 | }
|
| 2158 | PayloadEvent::CData(e) => self.drain_text(e.decode()?),
|
| 2159 | PayloadEvent::DocType(e) => {
|
| 2160 | self.entity_resolver
|
| 2161 | .capture(e)
|
| 2162 | .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}" , err)))?;
|
| 2163 | continue;
|
| 2164 | }
|
| 2165 | PayloadEvent::Eof => Ok(DeEvent::Eof),
|
| 2166 | };
|
| 2167 | }
|
| 2168 | }
|
| 2169 |
|
| 2170 | #[inline ]
|
| 2171 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
| 2172 | match self.lookahead {
|
| 2173 | // We pre-read event with the same name that is required to be skipped.
|
| 2174 | // First call of `read_to_end` will end out pre-read event, the second
|
| 2175 | // will consume other events
|
| 2176 | Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
|
| 2177 | let result1 = self.reader.read_to_end(name);
|
| 2178 | let result2 = self.reader.read_to_end(name);
|
| 2179 |
|
| 2180 | // In case of error `next` returns `Eof`
|
| 2181 | self.lookahead = self.reader.next();
|
| 2182 | result1?;
|
| 2183 | result2?;
|
| 2184 | }
|
| 2185 | // We pre-read event with the same name that is required to be skipped.
|
| 2186 | // Because this is end event, we already consume the whole tree, so
|
| 2187 | // nothing to do, just update lookahead
|
| 2188 | Ok(PayloadEvent::End(ref e)) if e.name() == name => {
|
| 2189 | self.lookahead = self.reader.next();
|
| 2190 | }
|
| 2191 | Ok(_) => {
|
| 2192 | let result = self.reader.read_to_end(name);
|
| 2193 |
|
| 2194 | // In case of error `next` returns `Eof`
|
| 2195 | self.lookahead = self.reader.next();
|
| 2196 | result?;
|
| 2197 | }
|
| 2198 | // Read next lookahead event, unpack error from the current lookahead
|
| 2199 | Err(_) => {
|
| 2200 | self.next_impl()?;
|
| 2201 | }
|
| 2202 | }
|
| 2203 | Ok(())
|
| 2204 | }
|
| 2205 |
|
| 2206 | #[inline ]
|
| 2207 | fn decoder(&self) -> Decoder {
|
| 2208 | self.reader.decoder()
|
| 2209 | }
|
| 2210 | }
|
| 2211 |
|
| 2212 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 2213 |
|
| 2214 | /// Deserialize an instance of type `T` from a string of XML text.
|
| 2215 | pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
|
| 2216 | where
|
| 2217 | T: Deserialize<'de>,
|
| 2218 | {
|
| 2219 | let mut de: Deserializer<'_, SliceReader<'_>> = Deserializer::from_str(source:s);
|
| 2220 | T::deserialize(&mut de)
|
| 2221 | }
|
| 2222 |
|
| 2223 | /// Deserialize from a reader. This method will do internal copies of data
|
| 2224 | /// readed from `reader`. If you want have a `&str` input and want to borrow
|
| 2225 | /// as much as possible, use [`from_str`].
|
| 2226 | pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
|
| 2227 | where
|
| 2228 | R: BufRead,
|
| 2229 | T: DeserializeOwned,
|
| 2230 | {
|
| 2231 | let mut de: Deserializer<'_, IoReader<…>> = Deserializer::from_reader(reader);
|
| 2232 | T::deserialize(&mut de)
|
| 2233 | }
|
| 2234 |
|
| 2235 | // TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
|
| 2236 | // valid boolean representations are only "true", "false", "1", and "0"
|
| 2237 | fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
|
| 2238 | where
|
| 2239 | V: de::Visitor<'de>,
|
| 2240 | {
|
| 2241 | match value {
|
| 2242 | "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
|
| 2243 | visitor.visit_bool(true)
|
| 2244 | }
|
| 2245 | "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
|
| 2246 | visitor.visit_bool(false)
|
| 2247 | }
|
| 2248 | _ => Err(DeError::InvalidBoolean(value.into())),
|
| 2249 | }
|
| 2250 | }
|
| 2251 |
|
| 2252 | fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
|
| 2253 | where
|
| 2254 | V: Visitor<'de>,
|
| 2255 | {
|
| 2256 | #[cfg (feature = "encoding" )]
|
| 2257 | {
|
| 2258 | let value = decoder.decode(value)?;
|
| 2259 | // No need to unescape because valid boolean representations cannot be escaped
|
| 2260 | str2bool(value.as_ref(), visitor)
|
| 2261 | }
|
| 2262 |
|
| 2263 | #[cfg (not(feature = "encoding" ))]
|
| 2264 | {
|
| 2265 | // No need to unescape because valid boolean representations cannot be escaped
|
| 2266 | match value {
|
| 2267 | b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
|
| 2268 | visitor.visit_bool(true)
|
| 2269 | }
|
| 2270 | b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
|
| 2271 | visitor.visit_bool(false)
|
| 2272 | }
|
| 2273 | e: &[u8] => Err(DeError::InvalidBoolean(decoder.decode(bytes:e)?.into())),
|
| 2274 | }
|
| 2275 | }
|
| 2276 | }
|
| 2277 |
|
| 2278 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 2279 |
|
| 2280 | /// A structure that deserializes XML into Rust values.
|
| 2281 | pub struct Deserializer<'de, R, E: EntityResolver = NoEntityResolver>
|
| 2282 | where
|
| 2283 | R: XmlRead<'de>,
|
| 2284 | {
|
| 2285 | /// An XML reader that streams events into this deserializer
|
| 2286 | reader: XmlReader<'de, R, E>,
|
| 2287 |
|
| 2288 | /// When deserializing sequences sometimes we have to skip unwanted events.
|
| 2289 | /// That events should be stored and then replayed. This is a replay buffer,
|
| 2290 | /// that streams events while not empty. When it exhausted, events will
|
| 2291 | /// requested from [`Self::reader`].
|
| 2292 | #[cfg (feature = "overlapped-lists" )]
|
| 2293 | read: VecDeque<DeEvent<'de>>,
|
| 2294 | /// When deserializing sequences sometimes we have to skip events, because XML
|
| 2295 | /// is tolerant to elements order and even if in the XSD order is strictly
|
| 2296 | /// specified (using `xs:sequence`) most of XML parsers allows order violations.
|
| 2297 | /// That means, that elements, forming a sequence, could be overlapped with
|
| 2298 | /// other elements, do not related to that sequence.
|
| 2299 | ///
|
| 2300 | /// In order to support this, deserializer will scan events and skip unwanted
|
| 2301 | /// events, store them here. After call [`Self::start_replay()`] all events
|
| 2302 | /// moved from this to [`Self::read`].
|
| 2303 | #[cfg (feature = "overlapped-lists" )]
|
| 2304 | write: VecDeque<DeEvent<'de>>,
|
| 2305 | /// Maximum number of events that can be skipped when processing sequences
|
| 2306 | /// that occur out-of-order. This field is used to prevent potential
|
| 2307 | /// denial-of-service (DoS) attacks which could cause infinite memory
|
| 2308 | /// consumption when parsing a very large amount of XML into a sequence field.
|
| 2309 | #[cfg (feature = "overlapped-lists" )]
|
| 2310 | limit: Option<NonZeroUsize>,
|
| 2311 |
|
| 2312 | #[cfg (not(feature = "overlapped-lists" ))]
|
| 2313 | peek: Option<DeEvent<'de>>,
|
| 2314 | }
|
| 2315 |
|
| 2316 | impl<'de, R, E> Deserializer<'de, R, E>
|
| 2317 | where
|
| 2318 | R: XmlRead<'de>,
|
| 2319 | E: EntityResolver,
|
| 2320 | {
|
| 2321 | /// Create an XML deserializer from one of the possible quick_xml input sources.
|
| 2322 | ///
|
| 2323 | /// Typically it is more convenient to use one of these methods instead:
|
| 2324 | ///
|
| 2325 | /// - [`Deserializer::from_str`]
|
| 2326 | /// - [`Deserializer::from_reader`]
|
| 2327 | fn new(reader: R, entity_resolver: E) -> Self {
|
| 2328 | Self {
|
| 2329 | reader: XmlReader::new(reader, entity_resolver),
|
| 2330 |
|
| 2331 | #[cfg (feature = "overlapped-lists" )]
|
| 2332 | read: VecDeque::new(),
|
| 2333 | #[cfg (feature = "overlapped-lists" )]
|
| 2334 | write: VecDeque::new(),
|
| 2335 | #[cfg (feature = "overlapped-lists" )]
|
| 2336 | limit: None,
|
| 2337 |
|
| 2338 | #[cfg (not(feature = "overlapped-lists" ))]
|
| 2339 | peek: None,
|
| 2340 | }
|
| 2341 | }
|
| 2342 |
|
| 2343 | /// Set the maximum number of events that could be skipped during deserialization
|
| 2344 | /// of sequences.
|
| 2345 | ///
|
| 2346 | /// If `<element>` contains more than specified nested elements, `$text` or
|
| 2347 | /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
|
| 2348 | /// deserialization of sequence field (any type that uses [`deserialize_seq`]
|
| 2349 | /// for the deserialization, for example, `Vec<T>`).
|
| 2350 | ///
|
| 2351 | /// This method can be used to prevent a [DoS] attack and infinite memory
|
| 2352 | /// consumption when parsing a very large XML to a sequence field.
|
| 2353 | ///
|
| 2354 | /// It is strongly recommended to set limit to some value when you parse data
|
| 2355 | /// from untrusted sources. You should choose a value that your typical XMLs
|
| 2356 | /// can have _between_ different elements that corresponds to the same sequence.
|
| 2357 | ///
|
| 2358 | /// # Examples
|
| 2359 | ///
|
| 2360 | /// Let's imagine, that we deserialize such structure:
|
| 2361 | /// ```
|
| 2362 | /// struct List {
|
| 2363 | /// item: Vec<()>,
|
| 2364 | /// }
|
| 2365 | /// ```
|
| 2366 | ///
|
| 2367 | /// The XML that we try to parse look like this:
|
| 2368 | /// ```xml
|
| 2369 | /// <any-name>
|
| 2370 | /// <item/>
|
| 2371 | /// <!-- Bufferization starts at this point -->
|
| 2372 | /// <another-item>
|
| 2373 | /// <some-element>with text</some-element>
|
| 2374 | /// <yet-another-element/>
|
| 2375 | /// </another-item>
|
| 2376 | /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
|
| 2377 | /// <item/>
|
| 2378 | /// <!-- There is nothing to buffer, because elements follows each other -->
|
| 2379 | /// <item/>
|
| 2380 | /// </any-name>
|
| 2381 | /// ```
|
| 2382 | ///
|
| 2383 | /// There, when we deserialize the `item` field, we need to buffer 7 events,
|
| 2384 | /// before we can deserialize the second `<item/>`:
|
| 2385 | ///
|
| 2386 | /// - `<another-item>`
|
| 2387 | /// - `<some-element>`
|
| 2388 | /// - `$text(with text)`
|
| 2389 | /// - `</some-element>`
|
| 2390 | /// - `<yet-another-element/>` (virtual start event)
|
| 2391 | /// - `<yet-another-element/>` (virtual end event)
|
| 2392 | /// - `</another-item>`
|
| 2393 | ///
|
| 2394 | /// Note, that `<yet-another-element/>` internally represented as 2 events:
|
| 2395 | /// one for the start tag and one for the end tag. In the future this can be
|
| 2396 | /// eliminated, but for now we use [auto-expanding feature] of a reader,
|
| 2397 | /// because this simplifies deserializer code.
|
| 2398 | ///
|
| 2399 | /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
|
| 2400 | /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
|
| 2401 | /// [auto-expanding feature]: Reader::expand_empty_elements
|
| 2402 | #[cfg (feature = "overlapped-lists" )]
|
| 2403 | pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
|
| 2404 | self.limit = limit;
|
| 2405 | self
|
| 2406 | }
|
| 2407 |
|
| 2408 | #[cfg (feature = "overlapped-lists" )]
|
| 2409 | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
|
| 2410 | if self.read.is_empty() {
|
| 2411 | self.read.push_front(self.reader.next()?);
|
| 2412 | }
|
| 2413 | if let Some(event) = self.read.front() {
|
| 2414 | return Ok(event);
|
| 2415 | }
|
| 2416 | // SAFETY: `self.read` was filled in the code above.
|
| 2417 | // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
|
| 2418 | // if unsafe code will be allowed
|
| 2419 | unreachable!()
|
| 2420 | }
|
| 2421 | #[cfg (not(feature = "overlapped-lists" ))]
|
| 2422 | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
|
| 2423 | if self.peek.is_none() {
|
| 2424 | self.peek = Some(self.reader.next()?);
|
| 2425 | }
|
| 2426 | match self.peek.as_ref() {
|
| 2427 | Some(v) => Ok(v),
|
| 2428 | // SAFETY: a `None` variant for `self.peek` would have been replaced
|
| 2429 | // by a `Some` variant in the code above.
|
| 2430 | // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
|
| 2431 | // if unsafe code will be allowed
|
| 2432 | None => unreachable!(),
|
| 2433 | }
|
| 2434 | }
|
| 2435 |
|
| 2436 | fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
|
| 2437 | // Replay skipped or peeked events
|
| 2438 | #[cfg (feature = "overlapped-lists" )]
|
| 2439 | if let Some(event) = self.read.pop_front() {
|
| 2440 | return Ok(event);
|
| 2441 | }
|
| 2442 | #[cfg (not(feature = "overlapped-lists" ))]
|
| 2443 | if let Some(e) = self.peek.take() {
|
| 2444 | return Ok(e);
|
| 2445 | }
|
| 2446 | self.reader.next()
|
| 2447 | }
|
| 2448 |
|
| 2449 | /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
|
| 2450 | /// should be replayed after calling [`Self::start_replay()`].
|
| 2451 | #[cfg (feature = "overlapped-lists" )]
|
| 2452 | #[inline ]
|
| 2453 | #[must_use = "returned checkpoint should be used in `start_replay`" ]
|
| 2454 | fn skip_checkpoint(&self) -> usize {
|
| 2455 | self.write.len()
|
| 2456 | }
|
| 2457 |
|
| 2458 | /// Extracts XML tree of events from and stores them in the skipped events
|
| 2459 | /// buffer from which they can be retrieved later. You MUST call
|
| 2460 | /// [`Self::start_replay()`] after calling this to give access to the skipped
|
| 2461 | /// events and release internal buffers.
|
| 2462 | #[cfg (feature = "overlapped-lists" )]
|
| 2463 | fn skip(&mut self) -> Result<(), DeError> {
|
| 2464 | let event = self.next()?;
|
| 2465 | self.skip_event(event)?;
|
| 2466 | match self.write.back() {
|
| 2467 | // Skip all subtree, if we skip a start event
|
| 2468 | Some(DeEvent::Start(e)) => {
|
| 2469 | let end = e.name().as_ref().to_owned();
|
| 2470 | let mut depth = 0;
|
| 2471 | loop {
|
| 2472 | let event = self.next()?;
|
| 2473 | match event {
|
| 2474 | DeEvent::Start(ref e) if e.name().as_ref() == end => {
|
| 2475 | self.skip_event(event)?;
|
| 2476 | depth += 1;
|
| 2477 | }
|
| 2478 | DeEvent::End(ref e) if e.name().as_ref() == end => {
|
| 2479 | self.skip_event(event)?;
|
| 2480 | if depth == 0 {
|
| 2481 | break;
|
| 2482 | }
|
| 2483 | depth -= 1;
|
| 2484 | }
|
| 2485 | DeEvent::Eof => {
|
| 2486 | self.skip_event(event)?;
|
| 2487 | break;
|
| 2488 | }
|
| 2489 | _ => self.skip_event(event)?,
|
| 2490 | }
|
| 2491 | }
|
| 2492 | }
|
| 2493 | _ => (),
|
| 2494 | }
|
| 2495 | Ok(())
|
| 2496 | }
|
| 2497 |
|
| 2498 | #[cfg (feature = "overlapped-lists" )]
|
| 2499 | #[inline ]
|
| 2500 | fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
|
| 2501 | if let Some(max) = self.limit {
|
| 2502 | if self.write.len() >= max.get() {
|
| 2503 | return Err(DeError::TooManyEvents(max));
|
| 2504 | }
|
| 2505 | }
|
| 2506 | self.write.push_back(event);
|
| 2507 | Ok(())
|
| 2508 | }
|
| 2509 |
|
| 2510 | /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
|
| 2511 | /// skip buffer to [`Self::read`] buffer.
|
| 2512 | ///
|
| 2513 | /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
|
| 2514 | /// return events that was skipped previously by calling [`Self::skip()`],
|
| 2515 | /// and only when all that events will be consumed, the deserializer starts
|
| 2516 | /// to drain events from underlying reader.
|
| 2517 | ///
|
| 2518 | /// This method MUST be called if any number of [`Self::skip()`] was called
|
| 2519 | /// after [`Self::new()`] or `start_replay()` or you'll lost events.
|
| 2520 | #[cfg (feature = "overlapped-lists" )]
|
| 2521 | fn start_replay(&mut self, checkpoint: usize) {
|
| 2522 | if checkpoint == 0 {
|
| 2523 | self.write.append(&mut self.read);
|
| 2524 | std::mem::swap(&mut self.read, &mut self.write);
|
| 2525 | } else {
|
| 2526 | let mut read = self.write.split_off(checkpoint);
|
| 2527 | read.append(&mut self.read);
|
| 2528 | self.read = read;
|
| 2529 | }
|
| 2530 | }
|
| 2531 |
|
| 2532 | #[inline ]
|
| 2533 | fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
|
| 2534 | self.read_string_impl(true)
|
| 2535 | }
|
| 2536 |
|
| 2537 | /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
|
| 2538 | /// events, merge them into one string. If there are no such events, returns
|
| 2539 | /// an empty string.
|
| 2540 | ///
|
| 2541 | /// If `allow_start` is `false`, then only text events is consumed, for other
|
| 2542 | /// events an error is returned (see table below).
|
| 2543 | ///
|
| 2544 | /// If `allow_start` is `true`, then first [`DeEvent::Text`] event is returned
|
| 2545 | /// and all other content is skipped until corresponding end tag will be consumed.
|
| 2546 | ///
|
| 2547 | /// # Handling events
|
| 2548 | ///
|
| 2549 | /// The table below shows how events is handled by this method:
|
| 2550 | ///
|
| 2551 | /// |Event |XML |Handling
|
| 2552 | /// |------------------|---------------------------|----------------------------------------
|
| 2553 | /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
|
| 2554 | /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
|
| 2555 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
|
| 2556 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
| 2557 | ///
|
| 2558 | /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
|
| 2559 | ///
|
| 2560 | /// |Event |XML |Handling
|
| 2561 | /// |------------------|---------------------------|----------------------------------------------------------------------------------
|
| 2562 | /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
|
| 2563 | /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice, if close tag matched the open one
|
| 2564 | /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
|
| 2565 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, consumes events up to `</tag>`
|
| 2566 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
| 2567 | ///
|
| 2568 | /// [`Text`]: Event::Text
|
| 2569 | /// [`CData`]: Event::CData
|
| 2570 | fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
|
| 2571 | match self.next()? {
|
| 2572 | DeEvent::Text(e) => Ok(e.text),
|
| 2573 | // allow one nested level
|
| 2574 | DeEvent::Start(e) if allow_start => match self.next()? {
|
| 2575 | DeEvent::Text(t) => {
|
| 2576 | self.read_to_end(e.name())?;
|
| 2577 | Ok(t.text)
|
| 2578 | }
|
| 2579 | DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
|
| 2580 | // We can get End event in case of `<tag></tag>` or `<tag/>` input
|
| 2581 | // Return empty text in that case
|
| 2582 | DeEvent::End(end) if end.name() == e.name() => Ok("" .into()),
|
| 2583 | DeEvent::End(end) => Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned())),
|
| 2584 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
| 2585 | },
|
| 2586 | DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
|
| 2587 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
| 2588 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
| 2589 | }
|
| 2590 | }
|
| 2591 |
|
| 2592 | /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
|
| 2593 | /// dropped. This method should be called after [`Self::next()`]
|
| 2594 | #[cfg (feature = "overlapped-lists" )]
|
| 2595 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
| 2596 | let mut depth = 0;
|
| 2597 | loop {
|
| 2598 | match self.read.pop_front() {
|
| 2599 | Some(DeEvent::Start(e)) if e.name() == name => {
|
| 2600 | depth += 1;
|
| 2601 | }
|
| 2602 | Some(DeEvent::End(e)) if e.name() == name => {
|
| 2603 | if depth == 0 {
|
| 2604 | break;
|
| 2605 | }
|
| 2606 | depth -= 1;
|
| 2607 | }
|
| 2608 |
|
| 2609 | // Drop all other skipped events
|
| 2610 | Some(_) => continue,
|
| 2611 |
|
| 2612 | // If we do not have skipped events, use effective reading that will
|
| 2613 | // not allocate memory for events
|
| 2614 | None => {
|
| 2615 | // We should close all opened tags, because we could buffer
|
| 2616 | // Start events, but not the corresponding End events. So we
|
| 2617 | // keep reading events until we exit all nested tags.
|
| 2618 | // `read_to_end()` will return an error if an Eof was encountered
|
| 2619 | // preliminary (in case of malformed XML).
|
| 2620 | //
|
| 2621 | // <tag><tag></tag></tag>
|
| 2622 | // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
|
| 2623 | // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
|
| 2624 | // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
|
| 2625 | loop {
|
| 2626 | self.reader.read_to_end(name)?;
|
| 2627 | if depth == 0 {
|
| 2628 | break;
|
| 2629 | }
|
| 2630 | depth -= 1;
|
| 2631 | }
|
| 2632 | break;
|
| 2633 | }
|
| 2634 | }
|
| 2635 | }
|
| 2636 | Ok(())
|
| 2637 | }
|
| 2638 | #[cfg (not(feature = "overlapped-lists" ))]
|
| 2639 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
| 2640 | // First one might be in self.peek
|
| 2641 | match self.next()? {
|
| 2642 | DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
|
| 2643 | DeEvent::End(e) if e.name() == name => return Ok(()),
|
| 2644 | _ => (),
|
| 2645 | }
|
| 2646 | self.reader.read_to_end(name)
|
| 2647 | }
|
| 2648 | }
|
| 2649 |
|
| 2650 | impl<'de> Deserializer<'de, SliceReader<'de>> {
|
| 2651 | /// Create new deserializer that will borrow data from the specified string.
|
| 2652 | ///
|
| 2653 | /// Deserializer created with this method will not resolve custom entities.
|
| 2654 | #[allow (clippy::should_implement_trait)]
|
| 2655 | pub fn from_str(source: &'de str) -> Self {
|
| 2656 | Self::from_str_with_resolver(source, entity_resolver:NoEntityResolver)
|
| 2657 | }
|
| 2658 | }
|
| 2659 |
|
| 2660 | impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
|
| 2661 | where
|
| 2662 | E: EntityResolver,
|
| 2663 | {
|
| 2664 | /// Create new deserializer that will borrow data from the specified string
|
| 2665 | /// and use specified entity resolver.
|
| 2666 | pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
|
| 2667 | let mut reader: Reader<&[u8]> = Reader::from_str(source);
|
| 2668 | reader.expand_empty_elements(val:true);
|
| 2669 |
|
| 2670 | Self::new(
|
| 2671 | reader:SliceReader {
|
| 2672 | reader,
|
| 2673 | start_trimmer: StartTrimmer::default(),
|
| 2674 | },
|
| 2675 | entity_resolver,
|
| 2676 | )
|
| 2677 | }
|
| 2678 | }
|
| 2679 |
|
| 2680 | impl<'de, R> Deserializer<'de, IoReader<R>>
|
| 2681 | where
|
| 2682 | R: BufRead,
|
| 2683 | {
|
| 2684 | /// Create new deserializer that will copy data from the specified reader
|
| 2685 | /// into internal buffer.
|
| 2686 | ///
|
| 2687 | /// If you already have a string use [`Self::from_str`] instead, because it
|
| 2688 | /// will borrow instead of copy. If you have `&[u8]` which is known to represent
|
| 2689 | /// UTF-8, you can decode it first before using [`from_str`].
|
| 2690 | ///
|
| 2691 | /// Deserializer created with this method will not resolve custom entities.
|
| 2692 | pub fn from_reader(reader: R) -> Self {
|
| 2693 | Self::with_resolver(reader, entity_resolver:NoEntityResolver)
|
| 2694 | }
|
| 2695 | }
|
| 2696 |
|
| 2697 | impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
|
| 2698 | where
|
| 2699 | R: BufRead,
|
| 2700 | E: EntityResolver,
|
| 2701 | {
|
| 2702 | /// Create new deserializer that will copy data from the specified reader
|
| 2703 | /// into internal buffer and use specified entity resolver.
|
| 2704 | ///
|
| 2705 | /// If you already have a string use [`Self::from_str`] instead, because it
|
| 2706 | /// will borrow instead of copy. If you have `&[u8]` which is known to represent
|
| 2707 | /// UTF-8, you can decode it first before using [`from_str`].
|
| 2708 | pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
|
| 2709 | let mut reader: Reader = Reader::from_reader(reader);
|
| 2710 | reader.expand_empty_elements(val:true);
|
| 2711 |
|
| 2712 | Self::new(
|
| 2713 | reader:IoReader {
|
| 2714 | reader,
|
| 2715 | start_trimmer: StartTrimmer::default(),
|
| 2716 | buf: Vec::new(),
|
| 2717 | },
|
| 2718 | entity_resolver,
|
| 2719 | )
|
| 2720 | }
|
| 2721 | }
|
| 2722 |
|
| 2723 | impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
|
| 2724 | where
|
| 2725 | R: XmlRead<'de>,
|
| 2726 | E: EntityResolver,
|
| 2727 | {
|
| 2728 | type Error = DeError;
|
| 2729 |
|
| 2730 | deserialize_primitives!();
|
| 2731 |
|
| 2732 | fn deserialize_struct<V>(
|
| 2733 | self,
|
| 2734 | _name: &'static str,
|
| 2735 | fields: &'static [&'static str],
|
| 2736 | visitor: V,
|
| 2737 | ) -> Result<V::Value, DeError>
|
| 2738 | where
|
| 2739 | V: Visitor<'de>,
|
| 2740 | {
|
| 2741 | match self.next()? {
|
| 2742 | DeEvent::Start(e) => {
|
| 2743 | let name = e.name().as_ref().to_vec();
|
| 2744 | let map = map::MapAccess::new(self, e, fields)?;
|
| 2745 | let value = visitor.visit_map(map)?;
|
| 2746 | self.read_to_end(QName(&name))?;
|
| 2747 | Ok(value)
|
| 2748 | }
|
| 2749 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
| 2750 | DeEvent::Text(_) => Err(DeError::ExpectedStart),
|
| 2751 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
| 2752 | }
|
| 2753 | }
|
| 2754 |
|
| 2755 | /// Unit represented in XML as a `xs:element` or text/CDATA content.
|
| 2756 | /// Any content inside `xs:element` is ignored and skipped.
|
| 2757 | ///
|
| 2758 | /// Produces unit struct from any of following inputs:
|
| 2759 | /// - any `<tag ...>...</tag>`
|
| 2760 | /// - any `<tag .../>`
|
| 2761 | /// - any consequent text / CDATA content (can consist of several parts
|
| 2762 | /// delimited by comments and processing instructions)
|
| 2763 | ///
|
| 2764 | /// # Events handling
|
| 2765 | ///
|
| 2766 | /// |Event |XML |Handling
|
| 2767 | /// |------------------|---------------------------|-------------------------------------------
|
| 2768 | /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
|
| 2769 | /// |[`DeEvent::End`] |`</tag>` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
|
| 2770 | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
|
| 2771 | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
|
| 2772 | fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2773 | where
|
| 2774 | V: Visitor<'de>,
|
| 2775 | {
|
| 2776 | match self.next()? {
|
| 2777 | DeEvent::Start(s) => {
|
| 2778 | self.read_to_end(s.name())?;
|
| 2779 | visitor.visit_unit()
|
| 2780 | }
|
| 2781 | DeEvent::Text(_) => visitor.visit_unit(),
|
| 2782 | DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
| 2783 | DeEvent::Eof => Err(DeError::UnexpectedEof),
|
| 2784 | }
|
| 2785 | }
|
| 2786 |
|
| 2787 | fn deserialize_enum<V>(
|
| 2788 | self,
|
| 2789 | _name: &'static str,
|
| 2790 | _variants: &'static [&'static str],
|
| 2791 | visitor: V,
|
| 2792 | ) -> Result<V::Value, DeError>
|
| 2793 | where
|
| 2794 | V: Visitor<'de>,
|
| 2795 | {
|
| 2796 | visitor.visit_enum(var::EnumAccess::new(self))
|
| 2797 | }
|
| 2798 |
|
| 2799 | fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2800 | where
|
| 2801 | V: Visitor<'de>,
|
| 2802 | {
|
| 2803 | visitor.visit_seq(self)
|
| 2804 | }
|
| 2805 |
|
| 2806 | fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2807 | where
|
| 2808 | V: Visitor<'de>,
|
| 2809 | {
|
| 2810 | self.deserialize_struct("" , &[], visitor)
|
| 2811 | }
|
| 2812 |
|
| 2813 | fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2814 | where
|
| 2815 | V: Visitor<'de>,
|
| 2816 | {
|
| 2817 | deserialize_option!(self, self, visitor)
|
| 2818 | }
|
| 2819 |
|
| 2820 | /// Always call `visitor.visit_unit()` because returned value ignored in any case.
|
| 2821 | ///
|
| 2822 | /// This method consumes any single [event][DeEvent] except the [`Start`]
|
| 2823 | /// event, in which case all events up to and including corresponding [`End`]
|
| 2824 | /// event will be consumed.
|
| 2825 | ///
|
| 2826 | /// This method returns error if current event is [`End`] or [`Eof`].
|
| 2827 | ///
|
| 2828 | /// [`Start`]: DeEvent::Start
|
| 2829 | /// [`End`]: DeEvent::End
|
| 2830 | /// [`Eof`]: DeEvent::Eof
|
| 2831 | fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2832 | where
|
| 2833 | V: Visitor<'de>,
|
| 2834 | {
|
| 2835 | match self.next()? {
|
| 2836 | DeEvent::Start(e) => self.read_to_end(e.name())?,
|
| 2837 | DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
|
| 2838 | DeEvent::Eof => return Err(DeError::UnexpectedEof),
|
| 2839 | _ => (),
|
| 2840 | }
|
| 2841 | visitor.visit_unit()
|
| 2842 | }
|
| 2843 |
|
| 2844 | fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
|
| 2845 | where
|
| 2846 | V: Visitor<'de>,
|
| 2847 | {
|
| 2848 | match self.peek()? {
|
| 2849 | DeEvent::Start(_) => self.deserialize_map(visitor),
|
| 2850 | // Redirect to deserialize_unit in order to consume an event and return an appropriate error
|
| 2851 | DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
|
| 2852 | _ => self.deserialize_string(visitor),
|
| 2853 | }
|
| 2854 | }
|
| 2855 | }
|
| 2856 |
|
| 2857 | /// An accessor to sequence elements forming a value for top-level sequence of XML
|
| 2858 | /// elements.
|
| 2859 | ///
|
| 2860 | /// Technically, multiple top-level elements violates XML rule of only one top-level
|
| 2861 | /// element, but we consider this as several concatenated XML documents.
|
| 2862 | impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
|
| 2863 | where
|
| 2864 | R: XmlRead<'de>,
|
| 2865 | E: EntityResolver,
|
| 2866 | {
|
| 2867 | type Error = DeError;
|
| 2868 |
|
| 2869 | fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
|
| 2870 | where
|
| 2871 | T: DeserializeSeed<'de>,
|
| 2872 | {
|
| 2873 | match self.peek()? {
|
| 2874 | DeEvent::Eof => Ok(None),
|
| 2875 |
|
| 2876 | // Start(tag), End(tag), Text
|
| 2877 | _ => seed.deserialize(&mut **self).map(op:Some),
|
| 2878 | }
|
| 2879 | }
|
| 2880 | }
|
| 2881 |
|
| 2882 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 2883 |
|
| 2884 | /// Helper struct that contains a state for an algorithm of converting events
|
| 2885 | /// from raw events to semi-trimmed events that is independent from a way of
|
| 2886 | /// events reading.
|
| 2887 | struct StartTrimmer {
|
| 2888 | /// If `true`, then leading whitespace will be removed from next returned
|
| 2889 | /// [`Event::Text`]. This field is set to `true` after reading each event
|
| 2890 | /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
|
| 2891 | /// read right after them does not trimmed.
|
| 2892 | trim_start: bool,
|
| 2893 | }
|
| 2894 |
|
| 2895 | impl StartTrimmer {
|
| 2896 | /// Converts raw reader's event into a payload event.
|
| 2897 | /// Returns `None`, if event should be skipped.
|
| 2898 | #[inline (always)]
|
| 2899 | fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
|
| 2900 | let (event, trim_next_event) = match event {
|
| 2901 | Event::DocType(e) => (PayloadEvent::DocType(e), true),
|
| 2902 | Event::Start(e) => (PayloadEvent::Start(e), true),
|
| 2903 | Event::End(e) => (PayloadEvent::End(e), true),
|
| 2904 | Event::Eof => (PayloadEvent::Eof, true),
|
| 2905 |
|
| 2906 | // Do not trim next text event after Text or CDATA event
|
| 2907 | Event::CData(e) => (PayloadEvent::CData(e), false),
|
| 2908 | Event::Text(mut e) => {
|
| 2909 | // If event is empty after trimming, skip it
|
| 2910 | if self.trim_start && e.inplace_trim_start() {
|
| 2911 | return None;
|
| 2912 | }
|
| 2913 | (PayloadEvent::Text(e), false)
|
| 2914 | }
|
| 2915 |
|
| 2916 | _ => return None,
|
| 2917 | };
|
| 2918 | self.trim_start = trim_next_event;
|
| 2919 | Some(event)
|
| 2920 | }
|
| 2921 | }
|
| 2922 |
|
| 2923 | impl Default for StartTrimmer {
|
| 2924 | #[inline ]
|
| 2925 | fn default() -> Self {
|
| 2926 | Self { trim_start: true }
|
| 2927 | }
|
| 2928 | }
|
| 2929 |
|
| 2930 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 2931 |
|
| 2932 | /// Trait used by the deserializer for iterating over input. This is manually
|
| 2933 | /// "specialized" for iterating over `&[u8]`.
|
| 2934 | ///
|
| 2935 | /// You do not need to implement this trait, it is needed to abstract from
|
| 2936 | /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
|
| 2937 | /// deserializer
|
| 2938 | pub trait XmlRead<'i> {
|
| 2939 | /// Return an input-borrowing event.
|
| 2940 | fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
|
| 2941 |
|
| 2942 | /// Skips until end element is found. Unlike `next()` it will not allocate
|
| 2943 | /// when it cannot satisfy the lifetime.
|
| 2944 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
|
| 2945 |
|
| 2946 | /// A copy of the reader's decoder used to decode strings.
|
| 2947 | fn decoder(&self) -> Decoder;
|
| 2948 | }
|
| 2949 |
|
| 2950 | /// XML input source that reads from a std::io input stream.
|
| 2951 | ///
|
| 2952 | /// You cannot create it, it is created automatically when you call
|
| 2953 | /// [`Deserializer::from_reader`]
|
| 2954 | pub struct IoReader<R: BufRead> {
|
| 2955 | reader: Reader<R>,
|
| 2956 | start_trimmer: StartTrimmer,
|
| 2957 | buf: Vec<u8>,
|
| 2958 | }
|
| 2959 |
|
| 2960 | impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
|
| 2961 | fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
|
| 2962 | loop {
|
| 2963 | self.buf.clear();
|
| 2964 |
|
| 2965 | let event: Event<'_> = self.reader.read_event_into(&mut self.buf)?;
|
| 2966 | if let Some(event: PayloadEvent<'_>) = self.start_trimmer.trim(event) {
|
| 2967 | return Ok(event.into_owned());
|
| 2968 | }
|
| 2969 | }
|
| 2970 | }
|
| 2971 |
|
| 2972 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
| 2973 | match self.reader.read_to_end_into(end:name, &mut self.buf) {
|
| 2974 | Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
|
| 2975 | Err(e: Error) => Err(e.into()),
|
| 2976 | Ok(_) => Ok(()),
|
| 2977 | }
|
| 2978 | }
|
| 2979 |
|
| 2980 | fn decoder(&self) -> Decoder {
|
| 2981 | self.reader.decoder()
|
| 2982 | }
|
| 2983 | }
|
| 2984 |
|
| 2985 | /// XML input source that reads from a slice of bytes and can borrow from it.
|
| 2986 | ///
|
| 2987 | /// You cannot create it, it is created automatically when you call
|
| 2988 | /// [`Deserializer::from_str`].
|
| 2989 | pub struct SliceReader<'de> {
|
| 2990 | reader: Reader<&'de [u8]>,
|
| 2991 | start_trimmer: StartTrimmer,
|
| 2992 | }
|
| 2993 |
|
| 2994 | impl<'de> XmlRead<'de> for SliceReader<'de> {
|
| 2995 | fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
|
| 2996 | loop {
|
| 2997 | let event: Event<'de> = self.reader.read_event()?;
|
| 2998 | if let Some(event: PayloadEvent<'de>) = self.start_trimmer.trim(event) {
|
| 2999 | return Ok(event);
|
| 3000 | }
|
| 3001 | }
|
| 3002 | }
|
| 3003 |
|
| 3004 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
|
| 3005 | match self.reader.read_to_end(name) {
|
| 3006 | Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
|
| 3007 | Err(e: Error) => Err(e.into()),
|
| 3008 | Ok(_) => Ok(()),
|
| 3009 | }
|
| 3010 | }
|
| 3011 |
|
| 3012 | fn decoder(&self) -> Decoder {
|
| 3013 | self.reader.decoder()
|
| 3014 | }
|
| 3015 | }
|
| 3016 |
|
| 3017 | #[cfg (test)]
|
| 3018 | mod tests {
|
| 3019 | use super::*;
|
| 3020 | use pretty_assertions::assert_eq;
|
| 3021 |
|
| 3022 | #[cfg (feature = "overlapped-lists" )]
|
| 3023 | mod skip {
|
| 3024 | use super::*;
|
| 3025 | use crate::de::DeEvent::*;
|
| 3026 | use crate::events::BytesEnd;
|
| 3027 | use pretty_assertions::assert_eq;
|
| 3028 |
|
| 3029 | /// Checks that `peek()` and `read()` behaves correctly after `skip()`
|
| 3030 | #[test ]
|
| 3031 | fn read_and_peek() {
|
| 3032 | let mut de = Deserializer::from_str(
|
| 3033 | r#"
|
| 3034 | <root>
|
| 3035 | <inner>
|
| 3036 | text
|
| 3037 | <inner/>
|
| 3038 | </inner>
|
| 3039 | <next/>
|
| 3040 | <target/>
|
| 3041 | </root>
|
| 3042 | "# ,
|
| 3043 | );
|
| 3044 |
|
| 3045 | // Initial conditions - both are empty
|
| 3046 | assert_eq!(de.read, vec![]);
|
| 3047 | assert_eq!(de.write, vec![]);
|
| 3048 |
|
| 3049 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
| 3050 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner" )));
|
| 3051 |
|
| 3052 | // Mark that start_replay() should begin replay from this point
|
| 3053 | let checkpoint = de.skip_checkpoint();
|
| 3054 | assert_eq!(checkpoint, 0);
|
| 3055 |
|
| 3056 | // Should skip first <inner> tree
|
| 3057 | de.skip().unwrap();
|
| 3058 | assert_eq!(de.read, vec![]);
|
| 3059 | assert_eq!(
|
| 3060 | de.write,
|
| 3061 | vec![
|
| 3062 | Start(BytesStart::new("inner" )),
|
| 3063 | Text("text" .into()),
|
| 3064 | Start(BytesStart::new("inner" )),
|
| 3065 | End(BytesEnd::new("inner" )),
|
| 3066 | End(BytesEnd::new("inner" )),
|
| 3067 | ]
|
| 3068 | );
|
| 3069 |
|
| 3070 | // Consume <next/>. Now unconsumed XML looks like:
|
| 3071 | //
|
| 3072 | // <inner>
|
| 3073 | // text
|
| 3074 | // <inner/>
|
| 3075 | // </inner>
|
| 3076 | // <target/>
|
| 3077 | // </root>
|
| 3078 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("next" )));
|
| 3079 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("next" )));
|
| 3080 |
|
| 3081 | // We finish writing. Next call to `next()` should start replay that messages:
|
| 3082 | //
|
| 3083 | // <inner>
|
| 3084 | // text
|
| 3085 | // <inner/>
|
| 3086 | // </inner>
|
| 3087 | //
|
| 3088 | // and after that stream that messages:
|
| 3089 | //
|
| 3090 | // <target/>
|
| 3091 | // </root>
|
| 3092 | de.start_replay(checkpoint);
|
| 3093 | assert_eq!(
|
| 3094 | de.read,
|
| 3095 | vec![
|
| 3096 | Start(BytesStart::new("inner" )),
|
| 3097 | Text("text" .into()),
|
| 3098 | Start(BytesStart::new("inner" )),
|
| 3099 | End(BytesEnd::new("inner" )),
|
| 3100 | End(BytesEnd::new("inner" )),
|
| 3101 | ]
|
| 3102 | );
|
| 3103 | assert_eq!(de.write, vec![]);
|
| 3104 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
| 3105 |
|
| 3106 | // Mark that start_replay() should begin replay from this point
|
| 3107 | let checkpoint = de.skip_checkpoint();
|
| 3108 | assert_eq!(checkpoint, 0);
|
| 3109 |
|
| 3110 | // Skip `$text` node and consume <inner/> after it
|
| 3111 | de.skip().unwrap();
|
| 3112 | assert_eq!(
|
| 3113 | de.read,
|
| 3114 | vec![
|
| 3115 | Start(BytesStart::new("inner" )),
|
| 3116 | End(BytesEnd::new("inner" )),
|
| 3117 | End(BytesEnd::new("inner" )),
|
| 3118 | ]
|
| 3119 | );
|
| 3120 | assert_eq!(
|
| 3121 | de.write,
|
| 3122 | vec![
|
| 3123 | // This comment here to keep the same formatting of both arrays
|
| 3124 | // otherwise rustfmt suggest one-line it
|
| 3125 | Text("text" .into()),
|
| 3126 | ]
|
| 3127 | );
|
| 3128 |
|
| 3129 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
| 3130 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
| 3131 |
|
| 3132 | // We finish writing. Next call to `next()` should start replay messages:
|
| 3133 | //
|
| 3134 | // text
|
| 3135 | // </inner>
|
| 3136 | //
|
| 3137 | // and after that stream that messages:
|
| 3138 | //
|
| 3139 | // <target/>
|
| 3140 | // </root>
|
| 3141 | de.start_replay(checkpoint);
|
| 3142 | assert_eq!(
|
| 3143 | de.read,
|
| 3144 | vec![
|
| 3145 | // This comment here to keep the same formatting as others
|
| 3146 | // otherwise rustfmt suggest one-line it
|
| 3147 | Text("text" .into()),
|
| 3148 | End(BytesEnd::new("inner" )),
|
| 3149 | ]
|
| 3150 | );
|
| 3151 | assert_eq!(de.write, vec![]);
|
| 3152 | assert_eq!(de.next().unwrap(), Text("text" .into()));
|
| 3153 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
| 3154 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target" )));
|
| 3155 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target" )));
|
| 3156 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
| 3157 | assert_eq!(de.next().unwrap(), Eof);
|
| 3158 | }
|
| 3159 |
|
| 3160 | /// Checks that `read_to_end()` behaves correctly after `skip()`
|
| 3161 | #[test ]
|
| 3162 | fn read_to_end() {
|
| 3163 | let mut de = Deserializer::from_str(
|
| 3164 | r#"
|
| 3165 | <root>
|
| 3166 | <skip>
|
| 3167 | text
|
| 3168 | <skip/>
|
| 3169 | </skip>
|
| 3170 | <target>
|
| 3171 | <target/>
|
| 3172 | </target>
|
| 3173 | </root>
|
| 3174 | "# ,
|
| 3175 | );
|
| 3176 |
|
| 3177 | // Initial conditions - both are empty
|
| 3178 | assert_eq!(de.read, vec![]);
|
| 3179 | assert_eq!(de.write, vec![]);
|
| 3180 |
|
| 3181 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
| 3182 |
|
| 3183 | // Mark that start_replay() should begin replay from this point
|
| 3184 | let checkpoint = de.skip_checkpoint();
|
| 3185 | assert_eq!(checkpoint, 0);
|
| 3186 |
|
| 3187 | // Skip the <skip> tree
|
| 3188 | de.skip().unwrap();
|
| 3189 | assert_eq!(de.read, vec![]);
|
| 3190 | assert_eq!(
|
| 3191 | de.write,
|
| 3192 | vec![
|
| 3193 | Start(BytesStart::new("skip" )),
|
| 3194 | Text("text" .into()),
|
| 3195 | Start(BytesStart::new("skip" )),
|
| 3196 | End(BytesEnd::new("skip" )),
|
| 3197 | End(BytesEnd::new("skip" )),
|
| 3198 | ]
|
| 3199 | );
|
| 3200 |
|
| 3201 | // Drop all events that represents <target> tree. Now unconsumed XML looks like:
|
| 3202 | //
|
| 3203 | // <skip>
|
| 3204 | // text
|
| 3205 | // <skip/>
|
| 3206 | // </skip>
|
| 3207 | // </root>
|
| 3208 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target" )));
|
| 3209 | de.read_to_end(QName(b"target" )).unwrap();
|
| 3210 | assert_eq!(de.read, vec![]);
|
| 3211 | assert_eq!(
|
| 3212 | de.write,
|
| 3213 | vec![
|
| 3214 | Start(BytesStart::new("skip" )),
|
| 3215 | Text("text" .into()),
|
| 3216 | Start(BytesStart::new("skip" )),
|
| 3217 | End(BytesEnd::new("skip" )),
|
| 3218 | End(BytesEnd::new("skip" )),
|
| 3219 | ]
|
| 3220 | );
|
| 3221 |
|
| 3222 | // We finish writing. Next call to `next()` should start replay that messages:
|
| 3223 | //
|
| 3224 | // <skip>
|
| 3225 | // text
|
| 3226 | // <skip/>
|
| 3227 | // </skip>
|
| 3228 | //
|
| 3229 | // and after that stream that messages:
|
| 3230 | //
|
| 3231 | // </root>
|
| 3232 | de.start_replay(checkpoint);
|
| 3233 | assert_eq!(
|
| 3234 | de.read,
|
| 3235 | vec![
|
| 3236 | Start(BytesStart::new("skip" )),
|
| 3237 | Text("text" .into()),
|
| 3238 | Start(BytesStart::new("skip" )),
|
| 3239 | End(BytesEnd::new("skip" )),
|
| 3240 | End(BytesEnd::new("skip" )),
|
| 3241 | ]
|
| 3242 | );
|
| 3243 | assert_eq!(de.write, vec![]);
|
| 3244 |
|
| 3245 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip" )));
|
| 3246 | de.read_to_end(QName(b"skip" )).unwrap();
|
| 3247 |
|
| 3248 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
| 3249 | assert_eq!(de.next().unwrap(), Eof);
|
| 3250 | }
|
| 3251 |
|
| 3252 | /// Checks that replay replayes only part of events
|
| 3253 | /// Test for https://github.com/tafia/quick-xml/issues/435
|
| 3254 | #[test ]
|
| 3255 | fn partial_replay() {
|
| 3256 | let mut de = Deserializer::from_str(
|
| 3257 | r#"
|
| 3258 | <root>
|
| 3259 | <skipped-1/>
|
| 3260 | <skipped-2/>
|
| 3261 | <inner>
|
| 3262 | <skipped-3/>
|
| 3263 | <skipped-4/>
|
| 3264 | <target-2/>
|
| 3265 | </inner>
|
| 3266 | <target-1/>
|
| 3267 | </root>
|
| 3268 | "# ,
|
| 3269 | );
|
| 3270 |
|
| 3271 | // Initial conditions - both are empty
|
| 3272 | assert_eq!(de.read, vec![]);
|
| 3273 | assert_eq!(de.write, vec![]);
|
| 3274 |
|
| 3275 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
| 3276 |
|
| 3277 | // start_replay() should start replay from this point
|
| 3278 | let checkpoint1 = de.skip_checkpoint();
|
| 3279 | assert_eq!(checkpoint1, 0);
|
| 3280 |
|
| 3281 | // Should skip first and second <skipped-N/> elements
|
| 3282 | de.skip().unwrap(); // skipped-1
|
| 3283 | de.skip().unwrap(); // skipped-2
|
| 3284 | assert_eq!(de.read, vec![]);
|
| 3285 | assert_eq!(
|
| 3286 | de.write,
|
| 3287 | vec![
|
| 3288 | Start(BytesStart::new("skipped-1" )),
|
| 3289 | End(BytesEnd::new("skipped-1" )),
|
| 3290 | Start(BytesStart::new("skipped-2" )),
|
| 3291 | End(BytesEnd::new("skipped-2" )),
|
| 3292 | ]
|
| 3293 | );
|
| 3294 |
|
| 3295 | ////////////////////////////////////////////////////////////////////////////////////////
|
| 3296 |
|
| 3297 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner" )));
|
| 3298 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3" )));
|
| 3299 | assert_eq!(
|
| 3300 | de.read,
|
| 3301 | vec![
|
| 3302 | // This comment here to keep the same formatting of both arrays
|
| 3303 | // otherwise rustfmt suggest one-line it
|
| 3304 | Start(BytesStart::new("skipped-3" )),
|
| 3305 | ]
|
| 3306 | );
|
| 3307 | assert_eq!(
|
| 3308 | de.write,
|
| 3309 | vec![
|
| 3310 | Start(BytesStart::new("skipped-1" )),
|
| 3311 | End(BytesEnd::new("skipped-1" )),
|
| 3312 | Start(BytesStart::new("skipped-2" )),
|
| 3313 | End(BytesEnd::new("skipped-2" )),
|
| 3314 | ]
|
| 3315 | );
|
| 3316 |
|
| 3317 | // start_replay() should start replay from this point
|
| 3318 | let checkpoint2 = de.skip_checkpoint();
|
| 3319 | assert_eq!(checkpoint2, 4);
|
| 3320 |
|
| 3321 | // Should skip third and forth <skipped-N/> elements
|
| 3322 | de.skip().unwrap(); // skipped-3
|
| 3323 | de.skip().unwrap(); // skipped-4
|
| 3324 | assert_eq!(de.read, vec![]);
|
| 3325 | assert_eq!(
|
| 3326 | de.write,
|
| 3327 | vec![
|
| 3328 | // checkpoint 1
|
| 3329 | Start(BytesStart::new("skipped-1" )),
|
| 3330 | End(BytesEnd::new("skipped-1" )),
|
| 3331 | Start(BytesStart::new("skipped-2" )),
|
| 3332 | End(BytesEnd::new("skipped-2" )),
|
| 3333 | // checkpoint 2
|
| 3334 | Start(BytesStart::new("skipped-3" )),
|
| 3335 | End(BytesEnd::new("skipped-3" )),
|
| 3336 | Start(BytesStart::new("skipped-4" )),
|
| 3337 | End(BytesEnd::new("skipped-4" )),
|
| 3338 | ]
|
| 3339 | );
|
| 3340 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2" )));
|
| 3341 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2" )));
|
| 3342 | assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner" )));
|
| 3343 | assert_eq!(
|
| 3344 | de.read,
|
| 3345 | vec![
|
| 3346 | // This comment here to keep the same formatting of both arrays
|
| 3347 | // otherwise rustfmt suggest one-line it
|
| 3348 | End(BytesEnd::new("inner" )),
|
| 3349 | ]
|
| 3350 | );
|
| 3351 | assert_eq!(
|
| 3352 | de.write,
|
| 3353 | vec![
|
| 3354 | // checkpoint 1
|
| 3355 | Start(BytesStart::new("skipped-1" )),
|
| 3356 | End(BytesEnd::new("skipped-1" )),
|
| 3357 | Start(BytesStart::new("skipped-2" )),
|
| 3358 | End(BytesEnd::new("skipped-2" )),
|
| 3359 | // checkpoint 2
|
| 3360 | Start(BytesStart::new("skipped-3" )),
|
| 3361 | End(BytesEnd::new("skipped-3" )),
|
| 3362 | Start(BytesStart::new("skipped-4" )),
|
| 3363 | End(BytesEnd::new("skipped-4" )),
|
| 3364 | ]
|
| 3365 | );
|
| 3366 |
|
| 3367 | // Start replay events from checkpoint 2
|
| 3368 | de.start_replay(checkpoint2);
|
| 3369 | assert_eq!(
|
| 3370 | de.read,
|
| 3371 | vec![
|
| 3372 | Start(BytesStart::new("skipped-3" )),
|
| 3373 | End(BytesEnd::new("skipped-3" )),
|
| 3374 | Start(BytesStart::new("skipped-4" )),
|
| 3375 | End(BytesEnd::new("skipped-4" )),
|
| 3376 | End(BytesEnd::new("inner" )),
|
| 3377 | ]
|
| 3378 | );
|
| 3379 | assert_eq!(
|
| 3380 | de.write,
|
| 3381 | vec![
|
| 3382 | Start(BytesStart::new("skipped-1" )),
|
| 3383 | End(BytesEnd::new("skipped-1" )),
|
| 3384 | Start(BytesStart::new("skipped-2" )),
|
| 3385 | End(BytesEnd::new("skipped-2" )),
|
| 3386 | ]
|
| 3387 | );
|
| 3388 |
|
| 3389 | // Replayed events
|
| 3390 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3" )));
|
| 3391 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3" )));
|
| 3392 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4" )));
|
| 3393 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4" )));
|
| 3394 |
|
| 3395 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner" )));
|
| 3396 | assert_eq!(de.read, vec![]);
|
| 3397 | assert_eq!(
|
| 3398 | de.write,
|
| 3399 | vec![
|
| 3400 | Start(BytesStart::new("skipped-1" )),
|
| 3401 | End(BytesEnd::new("skipped-1" )),
|
| 3402 | Start(BytesStart::new("skipped-2" )),
|
| 3403 | End(BytesEnd::new("skipped-2" )),
|
| 3404 | ]
|
| 3405 | );
|
| 3406 |
|
| 3407 | ////////////////////////////////////////////////////////////////////////////////////////
|
| 3408 |
|
| 3409 | // New events
|
| 3410 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1" )));
|
| 3411 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1" )));
|
| 3412 |
|
| 3413 | assert_eq!(de.read, vec![]);
|
| 3414 | assert_eq!(
|
| 3415 | de.write,
|
| 3416 | vec![
|
| 3417 | Start(BytesStart::new("skipped-1" )),
|
| 3418 | End(BytesEnd::new("skipped-1" )),
|
| 3419 | Start(BytesStart::new("skipped-2" )),
|
| 3420 | End(BytesEnd::new("skipped-2" )),
|
| 3421 | ]
|
| 3422 | );
|
| 3423 |
|
| 3424 | // Start replay events from checkpoint 1
|
| 3425 | de.start_replay(checkpoint1);
|
| 3426 | assert_eq!(
|
| 3427 | de.read,
|
| 3428 | vec![
|
| 3429 | Start(BytesStart::new("skipped-1" )),
|
| 3430 | End(BytesEnd::new("skipped-1" )),
|
| 3431 | Start(BytesStart::new("skipped-2" )),
|
| 3432 | End(BytesEnd::new("skipped-2" )),
|
| 3433 | ]
|
| 3434 | );
|
| 3435 | assert_eq!(de.write, vec![]);
|
| 3436 |
|
| 3437 | // Replayed events
|
| 3438 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1" )));
|
| 3439 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1" )));
|
| 3440 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2" )));
|
| 3441 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2" )));
|
| 3442 |
|
| 3443 | assert_eq!(de.read, vec![]);
|
| 3444 | assert_eq!(de.write, vec![]);
|
| 3445 |
|
| 3446 | // New events
|
| 3447 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
| 3448 | assert_eq!(de.next().unwrap(), Eof);
|
| 3449 | }
|
| 3450 |
|
| 3451 | /// Checks that limiting buffer size works correctly
|
| 3452 | #[test ]
|
| 3453 | fn limit() {
|
| 3454 | use serde::Deserialize;
|
| 3455 |
|
| 3456 | #[derive (Debug, Deserialize)]
|
| 3457 | #[allow (unused)]
|
| 3458 | struct List {
|
| 3459 | item: Vec<()>,
|
| 3460 | }
|
| 3461 |
|
| 3462 | let mut de = Deserializer::from_str(
|
| 3463 | r#"
|
| 3464 | <any-name>
|
| 3465 | <item/>
|
| 3466 | <another-item>
|
| 3467 | <some-element>with text</some-element>
|
| 3468 | <yet-another-element/>
|
| 3469 | </another-item>
|
| 3470 | <item/>
|
| 3471 | <item/>
|
| 3472 | </any-name>
|
| 3473 | "# ,
|
| 3474 | );
|
| 3475 | de.event_buffer_size(NonZeroUsize::new(3));
|
| 3476 |
|
| 3477 | match List::deserialize(&mut de) {
|
| 3478 | Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
|
| 3479 | e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}" , e),
|
| 3480 | }
|
| 3481 | }
|
| 3482 |
|
| 3483 | /// Without handling Eof in `skip` this test failed with memory allocation
|
| 3484 | #[test ]
|
| 3485 | fn invalid_xml() {
|
| 3486 | use crate::de::DeEvent::*;
|
| 3487 |
|
| 3488 | let mut de = Deserializer::from_str("<root>" );
|
| 3489 |
|
| 3490 | // Cache all events
|
| 3491 | let checkpoint = de.skip_checkpoint();
|
| 3492 | de.skip().unwrap();
|
| 3493 | de.start_replay(checkpoint);
|
| 3494 | assert_eq!(de.read, vec![Start(BytesStart::new("root" )), Eof]);
|
| 3495 | }
|
| 3496 | }
|
| 3497 |
|
| 3498 | mod read_to_end {
|
| 3499 | use super::*;
|
| 3500 | use crate::de::DeEvent::*;
|
| 3501 | use pretty_assertions::assert_eq;
|
| 3502 |
|
| 3503 | #[test ]
|
| 3504 | fn complex() {
|
| 3505 | let mut de = Deserializer::from_str(
|
| 3506 | r#"
|
| 3507 | <root>
|
| 3508 | <tag a="1"><tag>text</tag>content</tag>
|
| 3509 | <tag a="2"><![CDATA[cdata content]]></tag>
|
| 3510 | <self-closed/>
|
| 3511 | </root>
|
| 3512 | "# ,
|
| 3513 | );
|
| 3514 |
|
| 3515 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root" )));
|
| 3516 |
|
| 3517 | assert_eq!(
|
| 3518 | de.next().unwrap(),
|
| 3519 | Start(BytesStart::from_content(r#"tag a="1""# , 3))
|
| 3520 | );
|
| 3521 | assert_eq!(de.read_to_end(QName(b"tag" )).unwrap(), ());
|
| 3522 |
|
| 3523 | assert_eq!(
|
| 3524 | de.next().unwrap(),
|
| 3525 | Start(BytesStart::from_content(r#"tag a="2""# , 3))
|
| 3526 | );
|
| 3527 | assert_eq!(de.next().unwrap(), Text("cdata content" .into()));
|
| 3528 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag" )));
|
| 3529 |
|
| 3530 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed" )));
|
| 3531 | assert_eq!(de.read_to_end(QName(b"self-closed" )).unwrap(), ());
|
| 3532 |
|
| 3533 | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root" )));
|
| 3534 | assert_eq!(de.next().unwrap(), Eof);
|
| 3535 | }
|
| 3536 |
|
| 3537 | #[test ]
|
| 3538 | fn invalid_xml1() {
|
| 3539 | let mut de = Deserializer::from_str("<tag><tag></tag>" );
|
| 3540 |
|
| 3541 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag" )));
|
| 3542 | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag" )));
|
| 3543 |
|
| 3544 | match de.read_to_end(QName(b"tag" )) {
|
| 3545 | Err(DeError::UnexpectedEof) => (),
|
| 3546 | x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}" , x),
|
| 3547 | }
|
| 3548 | assert_eq!(de.next().unwrap(), Eof);
|
| 3549 | }
|
| 3550 |
|
| 3551 | #[test ]
|
| 3552 | fn invalid_xml2() {
|
| 3553 | let mut de = Deserializer::from_str("<tag><![CDATA[]]><tag></tag>" );
|
| 3554 |
|
| 3555 | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag" )));
|
| 3556 | assert_eq!(de.peek().unwrap(), &Text("" .into()));
|
| 3557 |
|
| 3558 | match de.read_to_end(QName(b"tag" )) {
|
| 3559 | Err(DeError::UnexpectedEof) => (),
|
| 3560 | x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}" , x),
|
| 3561 | }
|
| 3562 | assert_eq!(de.next().unwrap(), Eof);
|
| 3563 | }
|
| 3564 | }
|
| 3565 |
|
| 3566 | #[test ]
|
| 3567 | fn borrowing_reader_parity() {
|
| 3568 | let s = r#"
|
| 3569 | <item name="hello" source="world.rs">Some text</item>
|
| 3570 | <item2/>
|
| 3571 | <item3 value="world" />
|
| 3572 | "# ;
|
| 3573 |
|
| 3574 | let mut reader1 = IoReader {
|
| 3575 | reader: Reader::from_reader(s.as_bytes()),
|
| 3576 | start_trimmer: StartTrimmer::default(),
|
| 3577 | buf: Vec::new(),
|
| 3578 | };
|
| 3579 | let mut reader2 = SliceReader {
|
| 3580 | reader: Reader::from_str(s),
|
| 3581 | start_trimmer: StartTrimmer::default(),
|
| 3582 | };
|
| 3583 |
|
| 3584 | loop {
|
| 3585 | let event1 = reader1.next().unwrap();
|
| 3586 | let event2 = reader2.next().unwrap();
|
| 3587 |
|
| 3588 | if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
|
| 3589 | break;
|
| 3590 | }
|
| 3591 |
|
| 3592 | assert_eq!(event1, event2);
|
| 3593 | }
|
| 3594 | }
|
| 3595 |
|
| 3596 | #[test ]
|
| 3597 | fn borrowing_reader_events() {
|
| 3598 | let s = r#"
|
| 3599 | <item name="hello" source="world.rs">Some text</item>
|
| 3600 | <item2></item2>
|
| 3601 | <item3/>
|
| 3602 | <item4 value="world" />
|
| 3603 | "# ;
|
| 3604 |
|
| 3605 | let mut reader = SliceReader {
|
| 3606 | reader: Reader::from_str(s),
|
| 3607 | start_trimmer: StartTrimmer::default(),
|
| 3608 | };
|
| 3609 |
|
| 3610 | reader.reader.expand_empty_elements(true);
|
| 3611 |
|
| 3612 | let mut events = Vec::new();
|
| 3613 |
|
| 3614 | loop {
|
| 3615 | let event = reader.next().unwrap();
|
| 3616 | if let PayloadEvent::Eof = event {
|
| 3617 | break;
|
| 3618 | }
|
| 3619 | events.push(event);
|
| 3620 | }
|
| 3621 |
|
| 3622 | use crate::de::PayloadEvent::*;
|
| 3623 |
|
| 3624 | assert_eq!(
|
| 3625 | events,
|
| 3626 | vec![
|
| 3627 | Start(BytesStart::from_content(
|
| 3628 | r#"item name="hello" source="world.rs""# ,
|
| 3629 | 4
|
| 3630 | )),
|
| 3631 | Text(BytesText::from_escaped("Some text" )),
|
| 3632 | End(BytesEnd::new("item" )),
|
| 3633 | Start(BytesStart::from_content("item2" , 5)),
|
| 3634 | End(BytesEnd::new("item2" )),
|
| 3635 | Start(BytesStart::from_content("item3" , 5)),
|
| 3636 | End(BytesEnd::new("item3" )),
|
| 3637 | Start(BytesStart::from_content(r#"item4 value="world" "# , 5)),
|
| 3638 | End(BytesEnd::new("item4" )),
|
| 3639 | ]
|
| 3640 | )
|
| 3641 | }
|
| 3642 |
|
| 3643 | /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
|
| 3644 | /// because parser reports error early
|
| 3645 | #[test ]
|
| 3646 | fn read_string() {
|
| 3647 | match from_str::<String>(r#"</root>"# ) {
|
| 3648 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 3649 | assert_eq!(expected, "" );
|
| 3650 | assert_eq!(found, "root" );
|
| 3651 | }
|
| 3652 | x => panic!(
|
| 3653 | r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"# ,
|
| 3654 | x
|
| 3655 | ),
|
| 3656 | }
|
| 3657 |
|
| 3658 | let s: String = from_str(r#"<root></root>"# ).unwrap();
|
| 3659 | assert_eq!(s, "" );
|
| 3660 |
|
| 3661 | match from_str::<String>(r#"<root></other>"# ) {
|
| 3662 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 3663 | assert_eq!(expected, "root" );
|
| 3664 | assert_eq!(found, "other" );
|
| 3665 | }
|
| 3666 | x => panic!(
|
| 3667 | r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"# ,
|
| 3668 | x
|
| 3669 | ),
|
| 3670 | }
|
| 3671 | }
|
| 3672 |
|
| 3673 | /// Tests for https://github.com/tafia/quick-xml/issues/474.
|
| 3674 | ///
|
| 3675 | /// That tests ensures that comments and processed instructions is ignored
|
| 3676 | /// and can split one logical string in pieces.
|
| 3677 | mod merge_text {
|
| 3678 | use super::*;
|
| 3679 | use pretty_assertions::assert_eq;
|
| 3680 |
|
| 3681 | #[test ]
|
| 3682 | fn text() {
|
| 3683 | let mut de = Deserializer::from_str("text" );
|
| 3684 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 3685 | }
|
| 3686 |
|
| 3687 | #[test ]
|
| 3688 | fn cdata() {
|
| 3689 | let mut de = Deserializer::from_str("<![CDATA[cdata]]>" );
|
| 3690 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata" .into()));
|
| 3691 | }
|
| 3692 |
|
| 3693 | #[test ]
|
| 3694 | fn text_and_cdata() {
|
| 3695 | let mut de = Deserializer::from_str("text and <![CDATA[cdata]]>" );
|
| 3696 | assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata" .into()));
|
| 3697 | }
|
| 3698 |
|
| 3699 | #[test ]
|
| 3700 | fn text_and_empty_cdata() {
|
| 3701 | let mut de = Deserializer::from_str("text and <![CDATA[]]>" );
|
| 3702 | assert_eq!(de.next().unwrap(), DeEvent::Text("text and " .into()));
|
| 3703 | }
|
| 3704 |
|
| 3705 | #[test ]
|
| 3706 | fn cdata_and_text() {
|
| 3707 | let mut de = Deserializer::from_str("<![CDATA[cdata]]> and text" );
|
| 3708 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text" .into()));
|
| 3709 | }
|
| 3710 |
|
| 3711 | #[test ]
|
| 3712 | fn empty_cdata_and_text() {
|
| 3713 | let mut de = Deserializer::from_str("<![CDATA[]]> and text" );
|
| 3714 | assert_eq!(de.next().unwrap(), DeEvent::Text(" and text" .into()));
|
| 3715 | }
|
| 3716 |
|
| 3717 | #[test ]
|
| 3718 | fn cdata_and_cdata() {
|
| 3719 | let mut de = Deserializer::from_str(
|
| 3720 | " \
|
| 3721 | <![CDATA[cdata]]]]> \
|
| 3722 | <![CDATA[>cdata]]> \
|
| 3723 | " ,
|
| 3724 | );
|
| 3725 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
| 3726 | }
|
| 3727 |
|
| 3728 | mod comment_between {
|
| 3729 | use super::*;
|
| 3730 | use pretty_assertions::assert_eq;
|
| 3731 |
|
| 3732 | #[test ]
|
| 3733 | fn text() {
|
| 3734 | let mut de = Deserializer::from_str(
|
| 3735 | " \
|
| 3736 | text \
|
| 3737 | <!--comment 1--><!--comment 2--> \
|
| 3738 | text \
|
| 3739 | " ,
|
| 3740 | );
|
| 3741 | assert_eq!(de.next().unwrap(), DeEvent::Text("text text" .into()));
|
| 3742 | }
|
| 3743 |
|
| 3744 | #[test ]
|
| 3745 | fn cdata() {
|
| 3746 | let mut de = Deserializer::from_str(
|
| 3747 | " \
|
| 3748 | <![CDATA[cdata]]]]> \
|
| 3749 | <!--comment 1--><!--comment 2--> \
|
| 3750 | <![CDATA[>cdata]]> \
|
| 3751 | " ,
|
| 3752 | );
|
| 3753 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
| 3754 | }
|
| 3755 |
|
| 3756 | #[test ]
|
| 3757 | fn text_and_cdata() {
|
| 3758 | let mut de = Deserializer::from_str(
|
| 3759 | " \
|
| 3760 | text \
|
| 3761 | <!--comment 1--><!--comment 2--> \
|
| 3762 | <![CDATA[ cdata]]> \
|
| 3763 | " ,
|
| 3764 | );
|
| 3765 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata" .into()));
|
| 3766 | }
|
| 3767 |
|
| 3768 | #[test ]
|
| 3769 | fn text_and_empty_cdata() {
|
| 3770 | let mut de = Deserializer::from_str(
|
| 3771 | " \
|
| 3772 | text \
|
| 3773 | <!--comment 1--><!--comment 2--> \
|
| 3774 | <![CDATA[]]> \
|
| 3775 | " ,
|
| 3776 | );
|
| 3777 | assert_eq!(de.next().unwrap(), DeEvent::Text("text " .into()));
|
| 3778 | }
|
| 3779 |
|
| 3780 | #[test ]
|
| 3781 | fn cdata_and_text() {
|
| 3782 | let mut de = Deserializer::from_str(
|
| 3783 | " \
|
| 3784 | <![CDATA[cdata ]]> \
|
| 3785 | <!--comment 1--><!--comment 2--> \
|
| 3786 | text \
|
| 3787 | " ,
|
| 3788 | );
|
| 3789 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text" .into()));
|
| 3790 | }
|
| 3791 |
|
| 3792 | #[test ]
|
| 3793 | fn empty_cdata_and_text() {
|
| 3794 | let mut de = Deserializer::from_str(
|
| 3795 | " \
|
| 3796 | <![CDATA[]]> \
|
| 3797 | <!--comment 1--><!--comment 2--> \
|
| 3798 | text \
|
| 3799 | " ,
|
| 3800 | );
|
| 3801 | assert_eq!(de.next().unwrap(), DeEvent::Text(" text" .into()));
|
| 3802 | }
|
| 3803 |
|
| 3804 | #[test ]
|
| 3805 | fn cdata_and_cdata() {
|
| 3806 | let mut de = Deserializer::from_str(
|
| 3807 | " \
|
| 3808 | <![CDATA[cdata]]]> \
|
| 3809 | <!--comment 1--><!--comment 2--> \
|
| 3810 | <![CDATA[]>cdata]]> \
|
| 3811 | " ,
|
| 3812 | );
|
| 3813 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
| 3814 | }
|
| 3815 | }
|
| 3816 |
|
| 3817 | mod pi_between {
|
| 3818 | use super::*;
|
| 3819 | use pretty_assertions::assert_eq;
|
| 3820 |
|
| 3821 | #[test ]
|
| 3822 | fn text() {
|
| 3823 | let mut de = Deserializer::from_str(
|
| 3824 | " \
|
| 3825 | text \
|
| 3826 | <?pi 1?><?pi 2?> \
|
| 3827 | text \
|
| 3828 | " ,
|
| 3829 | );
|
| 3830 | assert_eq!(de.next().unwrap(), DeEvent::Text("text text" .into()));
|
| 3831 | }
|
| 3832 |
|
| 3833 | #[test ]
|
| 3834 | fn cdata() {
|
| 3835 | let mut de = Deserializer::from_str(
|
| 3836 | " \
|
| 3837 | <![CDATA[cdata]]]]> \
|
| 3838 | <?pi 1?><?pi 2?> \
|
| 3839 | <![CDATA[>cdata]]> \
|
| 3840 | " ,
|
| 3841 | );
|
| 3842 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
| 3843 | }
|
| 3844 |
|
| 3845 | #[test ]
|
| 3846 | fn text_and_cdata() {
|
| 3847 | let mut de = Deserializer::from_str(
|
| 3848 | " \
|
| 3849 | text \
|
| 3850 | <?pi 1?><?pi 2?> \
|
| 3851 | <![CDATA[ cdata]]> \
|
| 3852 | " ,
|
| 3853 | );
|
| 3854 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata" .into()));
|
| 3855 | }
|
| 3856 |
|
| 3857 | #[test ]
|
| 3858 | fn text_and_empty_cdata() {
|
| 3859 | let mut de = Deserializer::from_str(
|
| 3860 | " \
|
| 3861 | text \
|
| 3862 | <?pi 1?><?pi 2?> \
|
| 3863 | <![CDATA[]]> \
|
| 3864 | " ,
|
| 3865 | );
|
| 3866 | assert_eq!(de.next().unwrap(), DeEvent::Text("text " .into()));
|
| 3867 | }
|
| 3868 |
|
| 3869 | #[test ]
|
| 3870 | fn cdata_and_text() {
|
| 3871 | let mut de = Deserializer::from_str(
|
| 3872 | " \
|
| 3873 | <![CDATA[cdata ]]> \
|
| 3874 | <?pi 1?><?pi 2?> \
|
| 3875 | text \
|
| 3876 | " ,
|
| 3877 | );
|
| 3878 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text" .into()));
|
| 3879 | }
|
| 3880 |
|
| 3881 | #[test ]
|
| 3882 | fn empty_cdata_and_text() {
|
| 3883 | let mut de = Deserializer::from_str(
|
| 3884 | " \
|
| 3885 | <![CDATA[]]> \
|
| 3886 | <?pi 1?><?pi 2?> \
|
| 3887 | text \
|
| 3888 | " ,
|
| 3889 | );
|
| 3890 | assert_eq!(de.next().unwrap(), DeEvent::Text(" text" .into()));
|
| 3891 | }
|
| 3892 |
|
| 3893 | #[test ]
|
| 3894 | fn cdata_and_cdata() {
|
| 3895 | let mut de = Deserializer::from_str(
|
| 3896 | " \
|
| 3897 | <![CDATA[cdata]]]> \
|
| 3898 | <?pi 1?><?pi 2?> \
|
| 3899 | <![CDATA[]>cdata]]> \
|
| 3900 | " ,
|
| 3901 | );
|
| 3902 | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata" .into()));
|
| 3903 | }
|
| 3904 | }
|
| 3905 | }
|
| 3906 |
|
| 3907 | /// Tests for https://github.com/tafia/quick-xml/issues/474.
|
| 3908 | ///
|
| 3909 | /// This tests ensures that any combination of payload data is processed
|
| 3910 | /// as expected.
|
| 3911 | mod triples {
|
| 3912 | use super::*;
|
| 3913 | use pretty_assertions::assert_eq;
|
| 3914 |
|
| 3915 | mod start {
|
| 3916 | use super::*;
|
| 3917 |
|
| 3918 | /// <tag1><tag2>...
|
| 3919 | mod start {
|
| 3920 | use super::*;
|
| 3921 | use pretty_assertions::assert_eq;
|
| 3922 |
|
| 3923 | #[test ]
|
| 3924 | fn start() {
|
| 3925 | let mut de = Deserializer::from_str("<tag1><tag2><tag3>" );
|
| 3926 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 3927 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3928 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3" )));
|
| 3929 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3930 | }
|
| 3931 |
|
| 3932 | /// Not matching end tag will result to error
|
| 3933 | #[test ]
|
| 3934 | fn end() {
|
| 3935 | let mut de = Deserializer::from_str("<tag1><tag2></tag2>" );
|
| 3936 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 3937 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3938 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2" )));
|
| 3939 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3940 | }
|
| 3941 |
|
| 3942 | #[test ]
|
| 3943 | fn text() {
|
| 3944 | let mut de = Deserializer::from_str("<tag1><tag2> text " );
|
| 3945 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 3946 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3947 | // Text is trimmed from both sides
|
| 3948 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 3949 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3950 | }
|
| 3951 |
|
| 3952 | #[test ]
|
| 3953 | fn cdata() {
|
| 3954 | let mut de = Deserializer::from_str("<tag1><tag2><![CDATA[ cdata ]]>" );
|
| 3955 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 3956 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3957 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 3958 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3959 | }
|
| 3960 |
|
| 3961 | #[test ]
|
| 3962 | fn eof() {
|
| 3963 | let mut de = Deserializer::from_str("<tag1><tag2>" );
|
| 3964 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 3965 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3966 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3967 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3968 | }
|
| 3969 | }
|
| 3970 |
|
| 3971 | /// <tag></tag>...
|
| 3972 | mod end {
|
| 3973 | use super::*;
|
| 3974 | use pretty_assertions::assert_eq;
|
| 3975 |
|
| 3976 | #[test ]
|
| 3977 | fn start() {
|
| 3978 | let mut de = Deserializer::from_str("<tag></tag><tag2>" );
|
| 3979 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 3980 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 3981 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 3982 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3983 | }
|
| 3984 |
|
| 3985 | #[test ]
|
| 3986 | fn end() {
|
| 3987 | let mut de = Deserializer::from_str("<tag></tag></tag2>" );
|
| 3988 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 3989 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 3990 | match de.next() {
|
| 3991 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 3992 | assert_eq!(expected, "" );
|
| 3993 | assert_eq!(found, "tag2" );
|
| 3994 | }
|
| 3995 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag2' }})`, but got {:?}" , x),
|
| 3996 | }
|
| 3997 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 3998 | }
|
| 3999 |
|
| 4000 | #[test ]
|
| 4001 | fn text() {
|
| 4002 | let mut de = Deserializer::from_str("<tag></tag> text " );
|
| 4003 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4004 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4005 | // Text is trimmed from both sides
|
| 4006 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4007 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4008 | }
|
| 4009 |
|
| 4010 | #[test ]
|
| 4011 | fn cdata() {
|
| 4012 | let mut de = Deserializer::from_str("<tag></tag><![CDATA[ cdata ]]>" );
|
| 4013 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4014 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4015 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4016 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4017 | }
|
| 4018 |
|
| 4019 | #[test ]
|
| 4020 | fn eof() {
|
| 4021 | let mut de = Deserializer::from_str("<tag></tag>" );
|
| 4022 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4023 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4024 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4025 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4026 | }
|
| 4027 | }
|
| 4028 |
|
| 4029 | /// <tag> text ...
|
| 4030 | mod text {
|
| 4031 | use super::*;
|
| 4032 | use pretty_assertions::assert_eq;
|
| 4033 |
|
| 4034 | #[test ]
|
| 4035 | fn start() {
|
| 4036 | let mut de = Deserializer::from_str("<tag> text <tag2>" );
|
| 4037 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4038 | // Text is trimmed from both sides
|
| 4039 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4040 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 4041 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4042 | }
|
| 4043 |
|
| 4044 | #[test ]
|
| 4045 | fn end() {
|
| 4046 | let mut de = Deserializer::from_str("<tag> text </tag>" );
|
| 4047 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4048 | // Text is trimmed from both sides
|
| 4049 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4050 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4051 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4052 | }
|
| 4053 |
|
| 4054 | // start::text::text has no difference from start::text
|
| 4055 |
|
| 4056 | #[test ]
|
| 4057 | fn cdata() {
|
| 4058 | let mut de = Deserializer::from_str("<tag> text <![CDATA[ cdata ]]>" );
|
| 4059 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4060 | // Text is trimmed from the start
|
| 4061 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
| 4062 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4063 | }
|
| 4064 |
|
| 4065 | #[test ]
|
| 4066 | fn eof() {
|
| 4067 | let mut de = Deserializer::from_str("<tag> text " );
|
| 4068 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4069 | // Text is trimmed from both sides
|
| 4070 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4071 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4072 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4073 | }
|
| 4074 | }
|
| 4075 |
|
| 4076 | /// <tag><![CDATA[ cdata ]]>...
|
| 4077 | mod cdata {
|
| 4078 | use super::*;
|
| 4079 | use pretty_assertions::assert_eq;
|
| 4080 |
|
| 4081 | #[test ]
|
| 4082 | fn start() {
|
| 4083 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]><tag2>" );
|
| 4084 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4085 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4086 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 4087 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4088 | }
|
| 4089 |
|
| 4090 | #[test ]
|
| 4091 | fn end() {
|
| 4092 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]></tag>" );
|
| 4093 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4094 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4095 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4096 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4097 | }
|
| 4098 |
|
| 4099 | #[test ]
|
| 4100 | fn text() {
|
| 4101 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]> text " );
|
| 4102 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4103 | // Text is trimmed from the end
|
| 4104 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
| 4105 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4106 | }
|
| 4107 |
|
| 4108 | #[test ]
|
| 4109 | fn cdata() {
|
| 4110 | let mut de =
|
| 4111 | Deserializer::from_str("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
| 4112 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4113 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
| 4114 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4115 | }
|
| 4116 |
|
| 4117 | #[test ]
|
| 4118 | fn eof() {
|
| 4119 | let mut de = Deserializer::from_str("<tag><![CDATA[ cdata ]]>" );
|
| 4120 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4121 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4122 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4123 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4124 | }
|
| 4125 | }
|
| 4126 | }
|
| 4127 |
|
| 4128 | /// Start from End event will always generate an error
|
| 4129 | #[test ]
|
| 4130 | fn end() {
|
| 4131 | let mut de = Deserializer::from_str("</tag>" );
|
| 4132 | match de.next() {
|
| 4133 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4134 | assert_eq!(expected, "" );
|
| 4135 | assert_eq!(found, "tag" );
|
| 4136 | }
|
| 4137 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4138 | }
|
| 4139 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4140 | }
|
| 4141 |
|
| 4142 | mod text {
|
| 4143 | use super::*;
|
| 4144 | use pretty_assertions::assert_eq;
|
| 4145 |
|
| 4146 | mod start {
|
| 4147 | use super::*;
|
| 4148 | use pretty_assertions::assert_eq;
|
| 4149 |
|
| 4150 | #[test ]
|
| 4151 | fn start() {
|
| 4152 | let mut de = Deserializer::from_str(" text <tag1><tag2>" );
|
| 4153 | // Text is trimmed from both sides
|
| 4154 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4155 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 4156 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 4157 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4158 | }
|
| 4159 |
|
| 4160 | /// Not matching end tag will result in error
|
| 4161 | #[test ]
|
| 4162 | fn end() {
|
| 4163 | let mut de = Deserializer::from_str(" text <tag></tag>" );
|
| 4164 | // Text is trimmed from both sides
|
| 4165 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4166 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4167 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4168 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4169 | }
|
| 4170 |
|
| 4171 | #[test ]
|
| 4172 | fn text() {
|
| 4173 | let mut de = Deserializer::from_str(" text <tag> text2 " );
|
| 4174 | // Text is trimmed from both sides
|
| 4175 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4176 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4177 | // Text is trimmed from both sides
|
| 4178 | assert_eq!(de.next().unwrap(), DeEvent::Text("text2" .into()));
|
| 4179 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4180 | }
|
| 4181 |
|
| 4182 | #[test ]
|
| 4183 | fn cdata() {
|
| 4184 | let mut de = Deserializer::from_str(" text <tag><![CDATA[ cdata ]]>" );
|
| 4185 | // Text is trimmed from both sides
|
| 4186 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4187 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4188 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4189 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4190 | }
|
| 4191 |
|
| 4192 | #[test ]
|
| 4193 | fn eof() {
|
| 4194 | // Text is trimmed from both sides
|
| 4195 | let mut de = Deserializer::from_str(" text <tag>" );
|
| 4196 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4197 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4198 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4199 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4200 | }
|
| 4201 | }
|
| 4202 |
|
| 4203 | /// End event without corresponding start event will always generate an error
|
| 4204 | #[test ]
|
| 4205 | fn end() {
|
| 4206 | let mut de = Deserializer::from_str(" text </tag>" );
|
| 4207 | // Text is trimmed from both sides
|
| 4208 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4209 | match de.next() {
|
| 4210 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4211 | assert_eq!(expected, "" );
|
| 4212 | assert_eq!(found, "tag" );
|
| 4213 | }
|
| 4214 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4215 | }
|
| 4216 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4217 | }
|
| 4218 |
|
| 4219 | // text::text::something is equivalent to text::something
|
| 4220 |
|
| 4221 | mod cdata {
|
| 4222 | use super::*;
|
| 4223 | use pretty_assertions::assert_eq;
|
| 4224 |
|
| 4225 | #[test ]
|
| 4226 | fn start() {
|
| 4227 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]><tag>" );
|
| 4228 | // Text is trimmed from the start
|
| 4229 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
| 4230 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4231 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4232 | }
|
| 4233 |
|
| 4234 | #[test ]
|
| 4235 | fn end() {
|
| 4236 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]></tag>" );
|
| 4237 | // Text is trimmed from the start
|
| 4238 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
| 4239 | match de.next() {
|
| 4240 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4241 | assert_eq!(expected, "" );
|
| 4242 | assert_eq!(found, "tag" );
|
| 4243 | }
|
| 4244 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4245 | }
|
| 4246 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4247 | }
|
| 4248 |
|
| 4249 | #[test ]
|
| 4250 | fn text() {
|
| 4251 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]> text2 " );
|
| 4252 | // Text is trimmed from the start and from the end
|
| 4253 | assert_eq!(
|
| 4254 | de.next().unwrap(),
|
| 4255 | DeEvent::Text("text cdata text2" .into())
|
| 4256 | );
|
| 4257 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4258 | }
|
| 4259 |
|
| 4260 | #[test ]
|
| 4261 | fn cdata() {
|
| 4262 | let mut de =
|
| 4263 | Deserializer::from_str(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
| 4264 | // Text is trimmed from the start
|
| 4265 | assert_eq!(
|
| 4266 | de.next().unwrap(),
|
| 4267 | DeEvent::Text("text cdata cdata2 " .into())
|
| 4268 | );
|
| 4269 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4270 | }
|
| 4271 |
|
| 4272 | #[test ]
|
| 4273 | fn eof() {
|
| 4274 | let mut de = Deserializer::from_str(" text <![CDATA[ cdata ]]>" );
|
| 4275 | // Text is trimmed from the start
|
| 4276 | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata " .into()));
|
| 4277 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4278 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4279 | }
|
| 4280 | }
|
| 4281 | }
|
| 4282 |
|
| 4283 | mod cdata {
|
| 4284 | use super::*;
|
| 4285 | use pretty_assertions::assert_eq;
|
| 4286 |
|
| 4287 | mod start {
|
| 4288 | use super::*;
|
| 4289 | use pretty_assertions::assert_eq;
|
| 4290 |
|
| 4291 | #[test ]
|
| 4292 | fn start() {
|
| 4293 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag1><tag2>" );
|
| 4294 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4295 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1" )));
|
| 4296 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2" )));
|
| 4297 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4298 | }
|
| 4299 |
|
| 4300 | /// Not matching end tag will result in error
|
| 4301 | #[test ]
|
| 4302 | fn end() {
|
| 4303 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag></tag>" );
|
| 4304 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4305 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4306 | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag" )));
|
| 4307 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4308 | }
|
| 4309 |
|
| 4310 | #[test ]
|
| 4311 | fn text() {
|
| 4312 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag> text " );
|
| 4313 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4314 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4315 | // Text is trimmed from both sides
|
| 4316 | assert_eq!(de.next().unwrap(), DeEvent::Text("text" .into()));
|
| 4317 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4318 | }
|
| 4319 |
|
| 4320 | #[test ]
|
| 4321 | fn cdata() {
|
| 4322 | let mut de =
|
| 4323 | Deserializer::from_str("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>" );
|
| 4324 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4325 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4326 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 " .into()));
|
| 4327 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4328 | }
|
| 4329 |
|
| 4330 | #[test ]
|
| 4331 | fn eof() {
|
| 4332 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><tag>" );
|
| 4333 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4334 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4335 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4336 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4337 | }
|
| 4338 | }
|
| 4339 |
|
| 4340 | /// End event without corresponding start event will always generate an error
|
| 4341 | #[test ]
|
| 4342 | fn end() {
|
| 4343 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]></tag>" );
|
| 4344 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata " .into()));
|
| 4345 | match de.next() {
|
| 4346 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4347 | assert_eq!(expected, "" );
|
| 4348 | assert_eq!(found, "tag" );
|
| 4349 | }
|
| 4350 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4351 | }
|
| 4352 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4353 | }
|
| 4354 |
|
| 4355 | mod text {
|
| 4356 | use super::*;
|
| 4357 | use pretty_assertions::assert_eq;
|
| 4358 |
|
| 4359 | #[test ]
|
| 4360 | fn start() {
|
| 4361 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text <tag>" );
|
| 4362 | // Text is trimmed from the end
|
| 4363 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
| 4364 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4365 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4366 | }
|
| 4367 |
|
| 4368 | #[test ]
|
| 4369 | fn end() {
|
| 4370 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text </tag>" );
|
| 4371 | // Text is trimmed from the end
|
| 4372 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
| 4373 | match de.next() {
|
| 4374 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4375 | assert_eq!(expected, "" );
|
| 4376 | assert_eq!(found, "tag" );
|
| 4377 | }
|
| 4378 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4379 | }
|
| 4380 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4381 | }
|
| 4382 |
|
| 4383 | // cdata::text::text is equivalent to cdata::text
|
| 4384 |
|
| 4385 | #[test ]
|
| 4386 | fn cdata() {
|
| 4387 | let mut de =
|
| 4388 | Deserializer::from_str("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>" );
|
| 4389 | assert_eq!(
|
| 4390 | de.next().unwrap(),
|
| 4391 | DeEvent::Text(" cdata text cdata2 " .into())
|
| 4392 | );
|
| 4393 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4394 | }
|
| 4395 |
|
| 4396 | #[test ]
|
| 4397 | fn eof() {
|
| 4398 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]> text " );
|
| 4399 | // Text is trimmed from the end
|
| 4400 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text" .into()));
|
| 4401 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4402 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4403 | }
|
| 4404 | }
|
| 4405 |
|
| 4406 | mod cdata {
|
| 4407 | use super::*;
|
| 4408 | use pretty_assertions::assert_eq;
|
| 4409 |
|
| 4410 | #[test ]
|
| 4411 | fn start() {
|
| 4412 | let mut de =
|
| 4413 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>" );
|
| 4414 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
| 4415 | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag" )));
|
| 4416 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4417 | }
|
| 4418 |
|
| 4419 | #[test ]
|
| 4420 | fn end() {
|
| 4421 | let mut de =
|
| 4422 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>" );
|
| 4423 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
| 4424 | match de.next() {
|
| 4425 | Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
|
| 4426 | assert_eq!(expected, "" );
|
| 4427 | assert_eq!(found, "tag" );
|
| 4428 | }
|
| 4429 | x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}" , x),
|
| 4430 | }
|
| 4431 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4432 | }
|
| 4433 |
|
| 4434 | #[test ]
|
| 4435 | fn text() {
|
| 4436 | let mut de =
|
| 4437 | Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text " );
|
| 4438 | // Text is trimmed from the end
|
| 4439 | assert_eq!(
|
| 4440 | de.next().unwrap(),
|
| 4441 | DeEvent::Text(" cdata cdata2 text" .into())
|
| 4442 | );
|
| 4443 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4444 | }
|
| 4445 |
|
| 4446 | #[test ]
|
| 4447 | fn cdata() {
|
| 4448 | let mut de = Deserializer::from_str(
|
| 4449 | "<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>" ,
|
| 4450 | );
|
| 4451 | assert_eq!(
|
| 4452 | de.next().unwrap(),
|
| 4453 | DeEvent::Text(" cdata cdata2 cdata3 " .into())
|
| 4454 | );
|
| 4455 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4456 | }
|
| 4457 |
|
| 4458 | #[test ]
|
| 4459 | fn eof() {
|
| 4460 | let mut de = Deserializer::from_str("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>" );
|
| 4461 | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 " .into()));
|
| 4462 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4463 | assert_eq!(de.next().unwrap(), DeEvent::Eof);
|
| 4464 | }
|
| 4465 | }
|
| 4466 | }
|
| 4467 | }
|
| 4468 | }
|
| 4469 | |