1 | use crate::de::str2bool;
|
2 | use crate::encoding::Decoder;
|
3 | use crate::errors::serialize::DeError;
|
4 | use crate::name::QName;
|
5 | use crate::utils::CowRef;
|
6 | use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
|
7 | use serde::{forward_to_deserialize_any, serde_if_integer128};
|
8 | use std::borrow::Cow;
|
9 |
|
10 | macro_rules! deserialize_num {
|
11 | ($method:ident, $visit:ident) => {
|
12 | fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
13 | where
|
14 | V: Visitor<'de>,
|
15 | {
|
16 | visitor.$visit(self.name.parse()?)
|
17 | }
|
18 | };
|
19 | }
|
20 |
|
21 | /// Decodes raw bytes using the deserializer encoding.
|
22 | /// The method will borrow if encoding is UTF-8 compatible and `name` contains
|
23 | /// only UTF-8 compatible characters (usually only ASCII characters).
|
24 | #[inline ]
|
25 | fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
|
26 | let local: LocalName<'_> = name.local_name();
|
27 | Ok(decoder.decode(bytes:local.into_inner())?)
|
28 | }
|
29 |
|
30 | /// A deserializer for xml names of elements and attributes.
|
31 | ///
|
32 | /// Used for deserializing values from:
|
33 | /// - attribute names (`<... name="..." ...>`)
|
34 | /// - element names (`<name>...</name>`)
|
35 | ///
|
36 | /// Converts a name to an identifier string using the following rules:
|
37 | ///
|
38 | /// - if it is an [`attribute`] name, put `@` in front of the identifier
|
39 | /// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
|
40 | /// to the identifier
|
41 | /// - put the decoded [`local_name()`] of a name to the identifier
|
42 | ///
|
43 | /// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding`
|
44 | /// (where `[]` means optional element).
|
45 | ///
|
46 | /// The deserializer also supports deserializing names as other primitive types:
|
47 | /// - numbers
|
48 | /// - booleans
|
49 | /// - unit (`()`) and unit structs
|
50 | /// - unit variants of the enumerations
|
51 | ///
|
52 | /// Because `serde` does not define on which side type conversion should be
|
53 | /// performed, and because [`Deserialize`] implementation for that primitives
|
54 | /// in serde does not accept strings, the deserializer will perform conversion
|
55 | /// by itself.
|
56 | ///
|
57 | /// The deserializer is able to deserialize unit and unit structs, but any name
|
58 | /// will be converted to the same unit instance. This is asymmetry with a serializer,
|
59 | /// which not able to serialize those types, because empty names are impossible
|
60 | /// in XML.
|
61 | ///
|
62 | /// `deserialize_any()` returns the same result as `deserialize_identifier()`.
|
63 | ///
|
64 | /// # Lifetimes
|
65 | ///
|
66 | /// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
|
67 | /// - `'d`: lifetime of a deserializer that holds a buffer with content of events
|
68 | ///
|
69 | /// [`attribute`]: Self::from_attr
|
70 | /// [`local_name()`]: QName::local_name
|
71 | /// [`Deserialize`]: serde::Deserialize
|
72 | pub struct QNameDeserializer<'i, 'd> {
|
73 | name: CowRef<'i, 'd, str>,
|
74 | }
|
75 |
|
76 | impl<'i, 'd> QNameDeserializer<'i, 'd> {
|
77 | /// Creates deserializer from name of an attribute
|
78 | pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
|
79 | // https://github.com/tafia/quick-xml/issues/537
|
80 | // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
|
81 | let field = if name.as_namespace_binding().is_some() {
|
82 | decoder.decode(name.into_inner())?
|
83 | } else {
|
84 | decode_name(name, decoder)?
|
85 | };
|
86 |
|
87 | Ok(Self {
|
88 | name: CowRef::Owned(format!("@ {field}" )),
|
89 | })
|
90 | }
|
91 |
|
92 | /// Creates deserializer from name of an element
|
93 | pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result<Self, DeError> {
|
94 | let local = match name {
|
95 | CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? {
|
96 | Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
|
97 | Cow::Owned(owned) => CowRef::Owned(owned),
|
98 | },
|
99 | CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? {
|
100 | Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
|
101 | Cow::Owned(owned) => CowRef::Owned(owned),
|
102 | },
|
103 | CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? {
|
104 | // SAFETY: Because result is borrowed, no changes was done
|
105 | // and we can safely unwrap here
|
106 | Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()),
|
107 | Cow::Owned(owned) => CowRef::Owned(owned),
|
108 | },
|
109 | };
|
110 |
|
111 | Ok(Self { name: local })
|
112 | }
|
113 | }
|
114 |
|
115 | impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
|
116 | type Error = DeError;
|
117 |
|
118 | forward_to_deserialize_any! {
|
119 | char str string
|
120 | bytes byte_buf
|
121 | seq tuple tuple_struct
|
122 | map struct
|
123 | ignored_any
|
124 | }
|
125 |
|
126 | /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
|
127 | /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
|
128 | /// and `"0"`. But this method also handles following:
|
129 | ///
|
130 | /// |`bool` |XML content
|
131 | /// |-------|-------------------------------------------------------------
|
132 | /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
|
133 | /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`
|
134 | fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
135 | where
|
136 | V: Visitor<'de>,
|
137 | {
|
138 | str2bool(self.name.as_ref(), visitor)
|
139 | }
|
140 |
|
141 | deserialize_num!(deserialize_i8, visit_i8);
|
142 | deserialize_num!(deserialize_i16, visit_i16);
|
143 | deserialize_num!(deserialize_i32, visit_i32);
|
144 | deserialize_num!(deserialize_i64, visit_i64);
|
145 |
|
146 | deserialize_num!(deserialize_u8, visit_u8);
|
147 | deserialize_num!(deserialize_u16, visit_u16);
|
148 | deserialize_num!(deserialize_u32, visit_u32);
|
149 | deserialize_num!(deserialize_u64, visit_u64);
|
150 |
|
151 | serde_if_integer128! {
|
152 | deserialize_num!(deserialize_i128, visit_i128);
|
153 | deserialize_num!(deserialize_u128, visit_u128);
|
154 | }
|
155 |
|
156 | deserialize_num!(deserialize_f32, visit_f32);
|
157 | deserialize_num!(deserialize_f64, visit_f64);
|
158 |
|
159 | /// Calls [`Visitor::visit_unit`]
|
160 | fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
161 | where
|
162 | V: Visitor<'de>,
|
163 | {
|
164 | visitor.visit_unit()
|
165 | }
|
166 |
|
167 | /// Forwards deserialization to the [`Self::deserialize_unit`]
|
168 | fn deserialize_unit_struct<V>(
|
169 | self,
|
170 | _name: &'static str,
|
171 | visitor: V,
|
172 | ) -> Result<V::Value, Self::Error>
|
173 | where
|
174 | V: Visitor<'de>,
|
175 | {
|
176 | self.deserialize_unit(visitor)
|
177 | }
|
178 |
|
179 | /// Forwards deserialization to the [`Self::deserialize_identifier`]
|
180 | #[inline ]
|
181 | fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
182 | where
|
183 | V: Visitor<'de>,
|
184 | {
|
185 | self.deserialize_identifier(visitor)
|
186 | }
|
187 |
|
188 | /// If `name` is an empty string then calls [`Visitor::visit_none`],
|
189 | /// otherwise calls [`Visitor::visit_some`] with itself
|
190 | fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
191 | where
|
192 | V: Visitor<'de>,
|
193 | {
|
194 | if self.name.is_empty() {
|
195 | visitor.visit_none()
|
196 | } else {
|
197 | visitor.visit_some(self)
|
198 | }
|
199 | }
|
200 |
|
201 | fn deserialize_newtype_struct<V>(
|
202 | self,
|
203 | _name: &'static str,
|
204 | visitor: V,
|
205 | ) -> Result<V::Value, Self::Error>
|
206 | where
|
207 | V: Visitor<'de>,
|
208 | {
|
209 | visitor.visit_newtype_struct(self)
|
210 | }
|
211 |
|
212 | /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
|
213 | /// compatible encoded characters and represents an element name and
|
214 | /// a [`Visitor::visit_string`] in all other cases.
|
215 | ///
|
216 | /// [`name`]: Self::name
|
217 | fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
218 | where
|
219 | V: Visitor<'de>,
|
220 | {
|
221 | match self.name {
|
222 | CowRef::Input(name) => visitor.visit_borrowed_str(name),
|
223 | CowRef::Slice(name) => visitor.visit_str(name),
|
224 | CowRef::Owned(name) => visitor.visit_string(name),
|
225 | }
|
226 | }
|
227 |
|
228 | fn deserialize_enum<V>(
|
229 | self,
|
230 | _name: &str,
|
231 | _variants: &'static [&'static str],
|
232 | visitor: V,
|
233 | ) -> Result<V::Value, Self::Error>
|
234 | where
|
235 | V: Visitor<'de>,
|
236 | {
|
237 | visitor.visit_enum(self)
|
238 | }
|
239 | }
|
240 |
|
241 | impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
|
242 | type Error = DeError;
|
243 | type Variant = QNameUnitOnly;
|
244 |
|
245 | fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
|
246 | where
|
247 | V: DeserializeSeed<'de>,
|
248 | {
|
249 | let name: >::Value = seed.deserialize(self)?;
|
250 | Ok((name, QNameUnitOnly))
|
251 | }
|
252 | }
|
253 |
|
254 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
255 |
|
256 | /// Deserializer of variant data, that supports only unit variants.
|
257 | /// Attempt to deserialize newtype, tuple or struct variant will return a
|
258 | /// [`DeError::Unsupported`] error.
|
259 | pub struct QNameUnitOnly;
|
260 | impl<'de> VariantAccess<'de> for QNameUnitOnly {
|
261 | type Error = DeError;
|
262 |
|
263 | #[inline ]
|
264 | fn unit_variant(self) -> Result<(), DeError> {
|
265 | Ok(())
|
266 | }
|
267 |
|
268 | fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value, DeError>
|
269 | where
|
270 | T: DeserializeSeed<'de>,
|
271 | {
|
272 | Err(DeError::Unsupported(
|
273 | "enum newtype variants are not supported as an XML names" .into(),
|
274 | ))
|
275 | }
|
276 |
|
277 | fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, DeError>
|
278 | where
|
279 | V: Visitor<'de>,
|
280 | {
|
281 | Err(DeError::Unsupported(
|
282 | "enum tuple variants are not supported as an XML names" .into(),
|
283 | ))
|
284 | }
|
285 |
|
286 | fn struct_variant<V>(
|
287 | self,
|
288 | _fields: &'static [&'static str],
|
289 | _visitor: V,
|
290 | ) -> Result<V::Value, DeError>
|
291 | where
|
292 | V: Visitor<'de>,
|
293 | {
|
294 | Err(DeError::Unsupported(
|
295 | "enum struct variants are not supported as an XML names" .into(),
|
296 | ))
|
297 | }
|
298 | }
|
299 |
|
300 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
301 |
|
302 | #[cfg (test)]
|
303 | mod tests {
|
304 | use super::*;
|
305 | use crate::se::key::QNameSerializer;
|
306 | use crate::utils::{ByteBuf, Bytes};
|
307 | use pretty_assertions::assert_eq;
|
308 | use serde::de::IgnoredAny;
|
309 | use serde::{Deserialize, Serialize};
|
310 | use std::collections::HashMap;
|
311 |
|
312 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
313 | struct Unit;
|
314 |
|
315 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
316 | struct Newtype(String);
|
317 |
|
318 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
319 | struct Struct {
|
320 | key: String,
|
321 | val: usize,
|
322 | }
|
323 |
|
324 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
325 | enum Enum {
|
326 | Unit,
|
327 | #[serde(rename = "@Attr" )]
|
328 | Attr,
|
329 | Newtype(String),
|
330 | Tuple(String, usize),
|
331 | Struct {
|
332 | key: String,
|
333 | val: usize,
|
334 | },
|
335 | }
|
336 |
|
337 | #[derive (Debug, Deserialize, PartialEq)]
|
338 | #[serde(field_identifier)]
|
339 | enum Id {
|
340 | Field,
|
341 | }
|
342 |
|
343 | #[derive (Debug, Deserialize)]
|
344 | #[serde(transparent)]
|
345 | struct Any(IgnoredAny);
|
346 | impl PartialEq for Any {
|
347 | fn eq(&self, _other: &Any) -> bool {
|
348 | true
|
349 | }
|
350 | }
|
351 |
|
352 | /// Checks that given `$input` successfully deserializing into given `$result`
|
353 | macro_rules! deserialized_to_only {
|
354 | ($name:ident: $type:ty = $input:literal => $result:expr) => {
|
355 | #[test]
|
356 | fn $name() {
|
357 | let de = QNameDeserializer {
|
358 | name: CowRef::Input($input),
|
359 | };
|
360 | let data: $type = Deserialize::deserialize(de).unwrap();
|
361 |
|
362 | assert_eq!(data, $result);
|
363 | }
|
364 | };
|
365 | }
|
366 |
|
367 | /// Checks that given `$input` successfully deserializing into given `$result`
|
368 | macro_rules! deserialized_to {
|
369 | ($name:ident: $type:ty = $input:literal => $result:expr) => {
|
370 | #[test]
|
371 | fn $name() {
|
372 | let de = QNameDeserializer {
|
373 | name: CowRef::Input($input),
|
374 | };
|
375 | let data: $type = Deserialize::deserialize(de).unwrap();
|
376 |
|
377 | assert_eq!(data, $result);
|
378 |
|
379 | // Roundtrip to ensure that serializer corresponds to deserializer
|
380 | assert_eq!(
|
381 | data.serialize(QNameSerializer {
|
382 | writer: String::new()
|
383 | })
|
384 | .unwrap(),
|
385 | $input
|
386 | );
|
387 | }
|
388 | };
|
389 | }
|
390 |
|
391 | /// Checks that attempt to deserialize given `$input` as a `$type` results to a
|
392 | /// deserialization error `$kind` with `$reason`
|
393 | macro_rules! err {
|
394 | ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
|
395 | #[test]
|
396 | fn $name() {
|
397 | let de = QNameDeserializer {
|
398 | name: CowRef::Input($input),
|
399 | };
|
400 | let err = <$type as Deserialize>::deserialize(de).unwrap_err();
|
401 |
|
402 | match err {
|
403 | DeError::$kind(e) => assert_eq!(e, $reason),
|
404 | _ => panic!(
|
405 | "Expected `{}({})`, found `{:?}`" ,
|
406 | stringify!($kind),
|
407 | $reason,
|
408 | err
|
409 | ),
|
410 | }
|
411 | }
|
412 | };
|
413 | }
|
414 |
|
415 | deserialized_to!(false_: bool = "false" => false);
|
416 | deserialized_to!(true_: bool = "true" => true);
|
417 |
|
418 | deserialized_to!(i8_: i8 = "-2" => -2);
|
419 | deserialized_to!(i16_: i16 = "-2" => -2);
|
420 | deserialized_to!(i32_: i32 = "-2" => -2);
|
421 | deserialized_to!(i64_: i64 = "-2" => -2);
|
422 |
|
423 | deserialized_to!(u8_: u8 = "3" => 3);
|
424 | deserialized_to!(u16_: u16 = "3" => 3);
|
425 | deserialized_to!(u32_: u32 = "3" => 3);
|
426 | deserialized_to!(u64_: u64 = "3" => 3);
|
427 |
|
428 | serde_if_integer128! {
|
429 | deserialized_to!(i128_: i128 = "-2" => -2);
|
430 | deserialized_to!(u128_: u128 = "2" => 2);
|
431 | }
|
432 |
|
433 | deserialized_to!(f32_: f32 = "1.23" => 1.23);
|
434 | deserialized_to!(f64_: f64 = "1.23" => 1.23);
|
435 |
|
436 | deserialized_to!(char_unescaped: char = "h" => 'h' );
|
437 | err!(char_escaped: char = "<"
|
438 | => Custom("invalid value: string \"< \", expected a character" ));
|
439 |
|
440 | deserialized_to!(string: String = "<escaped string" => "<escaped string" );
|
441 | deserialized_to!(borrowed_str: &str = "name" => "name" );
|
442 |
|
443 | err!(byte_buf: ByteBuf = "<escaped string"
|
444 | => Custom("invalid type: string \"<escaped string \", expected byte data" ));
|
445 | err!(borrowed_bytes: Bytes = "name"
|
446 | => Custom("invalid type: string \"name \", expected borrowed bytes" ));
|
447 |
|
448 | deserialized_to!(option_none: Option<String> = "" => None);
|
449 | deserialized_to!(option_some: Option<String> = "name" => Some("name" .into()));
|
450 |
|
451 | // Unit structs cannot be represented in some meaningful way, but it meaningful
|
452 | // to use them as a placeholder when we want to deserialize _something_
|
453 | deserialized_to_only!(unit: () = "anything" => ());
|
454 | deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
|
455 |
|
456 | deserialized_to!(newtype: Newtype = "<escaped string" => Newtype("<escaped string" .into()));
|
457 |
|
458 | err!(seq: Vec<()> = "name"
|
459 | => Custom("invalid type: string \"name \", expected a sequence" ));
|
460 | err!(tuple: ((), ()) = "name"
|
461 | => Custom("invalid type: string \"name \", expected a tuple of size 2" ));
|
462 | err!(tuple_struct: ((), ()) = "name"
|
463 | => Custom("invalid type: string \"name \", expected a tuple of size 2" ));
|
464 |
|
465 | err!(map: HashMap<(), ()> = "name"
|
466 | => Custom("invalid type: string \"name \", expected a map" ));
|
467 | err!(struct_: Struct = "name"
|
468 | => Custom("invalid type: string \"name \", expected struct Struct" ));
|
469 |
|
470 | deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
|
471 | deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
|
472 | err!(enum_newtype: Enum = "Newtype"
|
473 | => Unsupported("enum newtype variants are not supported as an XML names" ));
|
474 | err!(enum_tuple: Enum = "Tuple"
|
475 | => Unsupported("enum tuple variants are not supported as an XML names" ));
|
476 | err!(enum_struct: Enum = "Struct"
|
477 | => Unsupported("enum struct variants are not supported as an XML names" ));
|
478 |
|
479 | // Field identifiers cannot be serialized, and IgnoredAny represented _something_
|
480 | // which is not concrete
|
481 | deserialized_to_only!(identifier: Id = "Field" => Id::Field);
|
482 | deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
|
483 | }
|
484 | |