1use crate::de::str2bool;
2use crate::encoding::Decoder;
3use crate::errors::serialize::DeError;
4use crate::name::QName;
5use crate::utils::CowRef;
6use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
7use serde::{forward_to_deserialize_any, serde_if_integer128};
8use std::borrow::Cow;
9
10macro_rules! deserialize_num {
11 ($method:ident, $visit:ident) => {
12 fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
13 where
14 V: Visitor<'de>,
15 {
16 visitor.$visit(self.name.parse()?)
17 }
18 };
19}
20
21/// Decodes raw bytes using the deserializer encoding.
22/// The method will borrow if encoding is UTF-8 compatible and `name` contains
23/// only UTF-8 compatible characters (usually only ASCII characters).
24#[inline]
25fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
26 let local: LocalName<'_> = name.local_name();
27 Ok(decoder.decode(bytes:local.into_inner())?)
28}
29
30/// A deserializer for xml names of elements and attributes.
31///
32/// Used for deserializing values from:
33/// - attribute names (`<... name="..." ...>`)
34/// - element names (`<name>...</name>`)
35///
36/// Converts a name to an identifier string using the following rules:
37///
38/// - if it is an [`attribute`] name, put `@` in front of the identifier
39/// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
40/// to the identifier
41/// - put the decoded [`local_name()`] of a name to the identifier
42///
43/// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding`
44/// (where `[]` means optional element).
45///
46/// The deserializer also supports deserializing names as other primitive types:
47/// - numbers
48/// - booleans
49/// - unit (`()`) and unit structs
50/// - unit variants of the enumerations
51///
52/// Because `serde` does not define on which side type conversion should be
53/// performed, and because [`Deserialize`] implementation for that primitives
54/// in serde does not accept strings, the deserializer will perform conversion
55/// by itself.
56///
57/// The deserializer is able to deserialize unit and unit structs, but any name
58/// will be converted to the same unit instance. This is asymmetry with a serializer,
59/// which not able to serialize those types, because empty names are impossible
60/// in XML.
61///
62/// `deserialize_any()` returns the same result as `deserialize_identifier()`.
63///
64/// # Lifetimes
65///
66/// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
67/// - `'d`: lifetime of a deserializer that holds a buffer with content of events
68///
69/// [`attribute`]: Self::from_attr
70/// [`local_name()`]: QName::local_name
71/// [`Deserialize`]: serde::Deserialize
72pub struct QNameDeserializer<'i, 'd> {
73 name: CowRef<'i, 'd, str>,
74}
75
76impl<'i, 'd> QNameDeserializer<'i, 'd> {
77 /// Creates deserializer from name of an attribute
78 pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
79 // https://github.com/tafia/quick-xml/issues/537
80 // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
81 let field = if name.as_namespace_binding().is_some() {
82 decoder.decode(name.into_inner())?
83 } else {
84 decode_name(name, decoder)?
85 };
86
87 Ok(Self {
88 name: CowRef::Owned(format!("@{field}")),
89 })
90 }
91
92 /// Creates deserializer from name of an element
93 pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result<Self, DeError> {
94 let local = match name {
95 CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? {
96 Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
97 Cow::Owned(owned) => CowRef::Owned(owned),
98 },
99 CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? {
100 Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
101 Cow::Owned(owned) => CowRef::Owned(owned),
102 },
103 CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? {
104 // SAFETY: Because result is borrowed, no changes was done
105 // and we can safely unwrap here
106 Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()),
107 Cow::Owned(owned) => CowRef::Owned(owned),
108 },
109 };
110
111 Ok(Self { name: local })
112 }
113}
114
115impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
116 type Error = DeError;
117
118 forward_to_deserialize_any! {
119 char str string
120 bytes byte_buf
121 seq tuple tuple_struct
122 map struct
123 ignored_any
124 }
125
126 /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
127 /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
128 /// and `"0"`. But this method also handles following:
129 ///
130 /// |`bool` |XML content
131 /// |-------|-------------------------------------------------------------
132 /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
133 /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`
134 fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
135 where
136 V: Visitor<'de>,
137 {
138 str2bool(self.name.as_ref(), visitor)
139 }
140
141 deserialize_num!(deserialize_i8, visit_i8);
142 deserialize_num!(deserialize_i16, visit_i16);
143 deserialize_num!(deserialize_i32, visit_i32);
144 deserialize_num!(deserialize_i64, visit_i64);
145
146 deserialize_num!(deserialize_u8, visit_u8);
147 deserialize_num!(deserialize_u16, visit_u16);
148 deserialize_num!(deserialize_u32, visit_u32);
149 deserialize_num!(deserialize_u64, visit_u64);
150
151 serde_if_integer128! {
152 deserialize_num!(deserialize_i128, visit_i128);
153 deserialize_num!(deserialize_u128, visit_u128);
154 }
155
156 deserialize_num!(deserialize_f32, visit_f32);
157 deserialize_num!(deserialize_f64, visit_f64);
158
159 /// Calls [`Visitor::visit_unit`]
160 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
161 where
162 V: Visitor<'de>,
163 {
164 visitor.visit_unit()
165 }
166
167 /// Forwards deserialization to the [`Self::deserialize_unit`]
168 fn deserialize_unit_struct<V>(
169 self,
170 _name: &'static str,
171 visitor: V,
172 ) -> Result<V::Value, Self::Error>
173 where
174 V: Visitor<'de>,
175 {
176 self.deserialize_unit(visitor)
177 }
178
179 /// Forwards deserialization to the [`Self::deserialize_identifier`]
180 #[inline]
181 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
182 where
183 V: Visitor<'de>,
184 {
185 self.deserialize_identifier(visitor)
186 }
187
188 /// If `name` is an empty string then calls [`Visitor::visit_none`],
189 /// otherwise calls [`Visitor::visit_some`] with itself
190 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
191 where
192 V: Visitor<'de>,
193 {
194 if self.name.is_empty() {
195 visitor.visit_none()
196 } else {
197 visitor.visit_some(self)
198 }
199 }
200
201 fn deserialize_newtype_struct<V>(
202 self,
203 _name: &'static str,
204 visitor: V,
205 ) -> Result<V::Value, Self::Error>
206 where
207 V: Visitor<'de>,
208 {
209 visitor.visit_newtype_struct(self)
210 }
211
212 /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
213 /// compatible encoded characters and represents an element name and
214 /// a [`Visitor::visit_string`] in all other cases.
215 ///
216 /// [`name`]: Self::name
217 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
218 where
219 V: Visitor<'de>,
220 {
221 match self.name {
222 CowRef::Input(name) => visitor.visit_borrowed_str(name),
223 CowRef::Slice(name) => visitor.visit_str(name),
224 CowRef::Owned(name) => visitor.visit_string(name),
225 }
226 }
227
228 fn deserialize_enum<V>(
229 self,
230 _name: &str,
231 _variants: &'static [&'static str],
232 visitor: V,
233 ) -> Result<V::Value, Self::Error>
234 where
235 V: Visitor<'de>,
236 {
237 visitor.visit_enum(self)
238 }
239}
240
241impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
242 type Error = DeError;
243 type Variant = QNameUnitOnly;
244
245 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
246 where
247 V: DeserializeSeed<'de>,
248 {
249 let name: >::Value = seed.deserialize(self)?;
250 Ok((name, QNameUnitOnly))
251 }
252}
253
254////////////////////////////////////////////////////////////////////////////////////////////////////
255
256/// Deserializer of variant data, that supports only unit variants.
257/// Attempt to deserialize newtype, tuple or struct variant will return a
258/// [`DeError::Unsupported`] error.
259pub struct QNameUnitOnly;
260impl<'de> VariantAccess<'de> for QNameUnitOnly {
261 type Error = DeError;
262
263 #[inline]
264 fn unit_variant(self) -> Result<(), DeError> {
265 Ok(())
266 }
267
268 fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value, DeError>
269 where
270 T: DeserializeSeed<'de>,
271 {
272 Err(DeError::Unsupported(
273 "enum newtype variants are not supported as an XML names".into(),
274 ))
275 }
276
277 fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, DeError>
278 where
279 V: Visitor<'de>,
280 {
281 Err(DeError::Unsupported(
282 "enum tuple variants are not supported as an XML names".into(),
283 ))
284 }
285
286 fn struct_variant<V>(
287 self,
288 _fields: &'static [&'static str],
289 _visitor: V,
290 ) -> Result<V::Value, DeError>
291 where
292 V: Visitor<'de>,
293 {
294 Err(DeError::Unsupported(
295 "enum struct variants are not supported as an XML names".into(),
296 ))
297 }
298}
299
300////////////////////////////////////////////////////////////////////////////////////////////////////
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305 use crate::se::key::QNameSerializer;
306 use crate::utils::{ByteBuf, Bytes};
307 use pretty_assertions::assert_eq;
308 use serde::de::IgnoredAny;
309 use serde::{Deserialize, Serialize};
310 use std::collections::HashMap;
311
312 #[derive(Debug, Deserialize, Serialize, PartialEq)]
313 struct Unit;
314
315 #[derive(Debug, Deserialize, Serialize, PartialEq)]
316 struct Newtype(String);
317
318 #[derive(Debug, Deserialize, Serialize, PartialEq)]
319 struct Struct {
320 key: String,
321 val: usize,
322 }
323
324 #[derive(Debug, Deserialize, Serialize, PartialEq)]
325 enum Enum {
326 Unit,
327 #[serde(rename = "@Attr")]
328 Attr,
329 Newtype(String),
330 Tuple(String, usize),
331 Struct {
332 key: String,
333 val: usize,
334 },
335 }
336
337 #[derive(Debug, Deserialize, PartialEq)]
338 #[serde(field_identifier)]
339 enum Id {
340 Field,
341 }
342
343 #[derive(Debug, Deserialize)]
344 #[serde(transparent)]
345 struct Any(IgnoredAny);
346 impl PartialEq for Any {
347 fn eq(&self, _other: &Any) -> bool {
348 true
349 }
350 }
351
352 /// Checks that given `$input` successfully deserializing into given `$result`
353 macro_rules! deserialized_to_only {
354 ($name:ident: $type:ty = $input:literal => $result:expr) => {
355 #[test]
356 fn $name() {
357 let de = QNameDeserializer {
358 name: CowRef::Input($input),
359 };
360 let data: $type = Deserialize::deserialize(de).unwrap();
361
362 assert_eq!(data, $result);
363 }
364 };
365 }
366
367 /// Checks that given `$input` successfully deserializing into given `$result`
368 macro_rules! deserialized_to {
369 ($name:ident: $type:ty = $input:literal => $result:expr) => {
370 #[test]
371 fn $name() {
372 let de = QNameDeserializer {
373 name: CowRef::Input($input),
374 };
375 let data: $type = Deserialize::deserialize(de).unwrap();
376
377 assert_eq!(data, $result);
378
379 // Roundtrip to ensure that serializer corresponds to deserializer
380 assert_eq!(
381 data.serialize(QNameSerializer {
382 writer: String::new()
383 })
384 .unwrap(),
385 $input
386 );
387 }
388 };
389 }
390
391 /// Checks that attempt to deserialize given `$input` as a `$type` results to a
392 /// deserialization error `$kind` with `$reason`
393 macro_rules! err {
394 ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
395 #[test]
396 fn $name() {
397 let de = QNameDeserializer {
398 name: CowRef::Input($input),
399 };
400 let err = <$type as Deserialize>::deserialize(de).unwrap_err();
401
402 match err {
403 DeError::$kind(e) => assert_eq!(e, $reason),
404 _ => panic!(
405 "Expected `{}({})`, found `{:?}`",
406 stringify!($kind),
407 $reason,
408 err
409 ),
410 }
411 }
412 };
413 }
414
415 deserialized_to!(false_: bool = "false" => false);
416 deserialized_to!(true_: bool = "true" => true);
417
418 deserialized_to!(i8_: i8 = "-2" => -2);
419 deserialized_to!(i16_: i16 = "-2" => -2);
420 deserialized_to!(i32_: i32 = "-2" => -2);
421 deserialized_to!(i64_: i64 = "-2" => -2);
422
423 deserialized_to!(u8_: u8 = "3" => 3);
424 deserialized_to!(u16_: u16 = "3" => 3);
425 deserialized_to!(u32_: u32 = "3" => 3);
426 deserialized_to!(u64_: u64 = "3" => 3);
427
428 serde_if_integer128! {
429 deserialized_to!(i128_: i128 = "-2" => -2);
430 deserialized_to!(u128_: u128 = "2" => 2);
431 }
432
433 deserialized_to!(f32_: f32 = "1.23" => 1.23);
434 deserialized_to!(f64_: f64 = "1.23" => 1.23);
435
436 deserialized_to!(char_unescaped: char = "h" => 'h');
437 err!(char_escaped: char = "&lt;"
438 => Custom("invalid value: string \"&lt;\", expected a character"));
439
440 deserialized_to!(string: String = "&lt;escaped&#x20;string" => "&lt;escaped&#x20;string");
441 deserialized_to!(borrowed_str: &str = "name" => "name");
442
443 err!(byte_buf: ByteBuf = "&lt;escaped&#x20;string"
444 => Custom("invalid type: string \"&lt;escaped&#x20;string\", expected byte data"));
445 err!(borrowed_bytes: Bytes = "name"
446 => Custom("invalid type: string \"name\", expected borrowed bytes"));
447
448 deserialized_to!(option_none: Option<String> = "" => None);
449 deserialized_to!(option_some: Option<String> = "name" => Some("name".into()));
450
451 // Unit structs cannot be represented in some meaningful way, but it meaningful
452 // to use them as a placeholder when we want to deserialize _something_
453 deserialized_to_only!(unit: () = "anything" => ());
454 deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
455
456 deserialized_to!(newtype: Newtype = "&lt;escaped&#x20;string" => Newtype("&lt;escaped&#x20;string".into()));
457
458 err!(seq: Vec<()> = "name"
459 => Custom("invalid type: string \"name\", expected a sequence"));
460 err!(tuple: ((), ()) = "name"
461 => Custom("invalid type: string \"name\", expected a tuple of size 2"));
462 err!(tuple_struct: ((), ()) = "name"
463 => Custom("invalid type: string \"name\", expected a tuple of size 2"));
464
465 err!(map: HashMap<(), ()> = "name"
466 => Custom("invalid type: string \"name\", expected a map"));
467 err!(struct_: Struct = "name"
468 => Custom("invalid type: string \"name\", expected struct Struct"));
469
470 deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
471 deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
472 err!(enum_newtype: Enum = "Newtype"
473 => Unsupported("enum newtype variants are not supported as an XML names"));
474 err!(enum_tuple: Enum = "Tuple"
475 => Unsupported("enum tuple variants are not supported as an XML names"));
476 err!(enum_struct: Enum = "Struct"
477 => Unsupported("enum struct variants are not supported as an XML names"));
478
479 // Field identifiers cannot be serialized, and IgnoredAny represented _something_
480 // which is not concrete
481 deserialized_to_only!(identifier: Id = "Field" => Id::Field);
482 deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
483}
484