| 1 | use crate::de::str2bool;
|
| 2 | use crate::encoding::Decoder;
|
| 3 | use crate::errors::serialize::DeError;
|
| 4 | use crate::name::QName;
|
| 5 | use crate::utils::CowRef;
|
| 6 | use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
|
| 7 | use serde::{forward_to_deserialize_any, serde_if_integer128};
|
| 8 | use std::borrow::Cow;
|
| 9 |
|
| 10 | macro_rules! deserialize_num {
|
| 11 | ($method:ident, $visit:ident) => {
|
| 12 | fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 13 | where
|
| 14 | V: Visitor<'de>,
|
| 15 | {
|
| 16 | visitor.$visit(self.name.parse()?)
|
| 17 | }
|
| 18 | };
|
| 19 | }
|
| 20 |
|
| 21 | /// Decodes raw bytes using the deserializer encoding.
|
| 22 | /// The method will borrow if encoding is UTF-8 compatible and `name` contains
|
| 23 | /// only UTF-8 compatible characters (usually only ASCII characters).
|
| 24 | #[inline ]
|
| 25 | fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
|
| 26 | let local: LocalName<'_> = name.local_name();
|
| 27 | Ok(decoder.decode(bytes:local.into_inner())?)
|
| 28 | }
|
| 29 |
|
| 30 | /// A deserializer for xml names of elements and attributes.
|
| 31 | ///
|
| 32 | /// Used for deserializing values from:
|
| 33 | /// - attribute names (`<... name="..." ...>`)
|
| 34 | /// - element names (`<name>...</name>`)
|
| 35 | ///
|
| 36 | /// Converts a name to an identifier string using the following rules:
|
| 37 | ///
|
| 38 | /// - if it is an [`attribute`] name, put `@` in front of the identifier
|
| 39 | /// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
|
| 40 | /// to the identifier
|
| 41 | /// - put the decoded [`local_name()`] of a name to the identifier
|
| 42 | ///
|
| 43 | /// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding`
|
| 44 | /// (where `[]` means optional element).
|
| 45 | ///
|
| 46 | /// The deserializer also supports deserializing names as other primitive types:
|
| 47 | /// - numbers
|
| 48 | /// - booleans
|
| 49 | /// - unit (`()`) and unit structs
|
| 50 | /// - unit variants of the enumerations
|
| 51 | ///
|
| 52 | /// Because `serde` does not define on which side type conversion should be
|
| 53 | /// performed, and because [`Deserialize`] implementation for that primitives
|
| 54 | /// in serde does not accept strings, the deserializer will perform conversion
|
| 55 | /// by itself.
|
| 56 | ///
|
| 57 | /// The deserializer is able to deserialize unit and unit structs, but any name
|
| 58 | /// will be converted to the same unit instance. This is asymmetry with a serializer,
|
| 59 | /// which not able to serialize those types, because empty names are impossible
|
| 60 | /// in XML.
|
| 61 | ///
|
| 62 | /// `deserialize_any()` returns the same result as `deserialize_identifier()`.
|
| 63 | ///
|
| 64 | /// # Lifetimes
|
| 65 | ///
|
| 66 | /// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
|
| 67 | /// - `'d`: lifetime of a deserializer that holds a buffer with content of events
|
| 68 | ///
|
| 69 | /// [`attribute`]: Self::from_attr
|
| 70 | /// [`local_name()`]: QName::local_name
|
| 71 | /// [`Deserialize`]: serde::Deserialize
|
| 72 | pub struct QNameDeserializer<'i, 'd> {
|
| 73 | name: CowRef<'i, 'd, str>,
|
| 74 | }
|
| 75 |
|
| 76 | impl<'i, 'd> QNameDeserializer<'i, 'd> {
|
| 77 | /// Creates deserializer from name of an attribute
|
| 78 | pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
|
| 79 | // https://github.com/tafia/quick-xml/issues/537
|
| 80 | // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
|
| 81 | let field = if name.as_namespace_binding().is_some() {
|
| 82 | decoder.decode(name.into_inner())?
|
| 83 | } else {
|
| 84 | decode_name(name, decoder)?
|
| 85 | };
|
| 86 |
|
| 87 | Ok(Self {
|
| 88 | name: CowRef::Owned(format!("@ {field}" )),
|
| 89 | })
|
| 90 | }
|
| 91 |
|
| 92 | /// Creates deserializer from name of an element
|
| 93 | pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result<Self, DeError> {
|
| 94 | let local = match name {
|
| 95 | CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? {
|
| 96 | Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
|
| 97 | Cow::Owned(owned) => CowRef::Owned(owned),
|
| 98 | },
|
| 99 | CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? {
|
| 100 | Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
|
| 101 | Cow::Owned(owned) => CowRef::Owned(owned),
|
| 102 | },
|
| 103 | CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? {
|
| 104 | // SAFETY: Because result is borrowed, no changes was done
|
| 105 | // and we can safely unwrap here
|
| 106 | Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()),
|
| 107 | Cow::Owned(owned) => CowRef::Owned(owned),
|
| 108 | },
|
| 109 | };
|
| 110 |
|
| 111 | Ok(Self { name: local })
|
| 112 | }
|
| 113 | }
|
| 114 |
|
| 115 | impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
|
| 116 | type Error = DeError;
|
| 117 |
|
| 118 | forward_to_deserialize_any! {
|
| 119 | char str string
|
| 120 | bytes byte_buf
|
| 121 | seq tuple tuple_struct
|
| 122 | map struct
|
| 123 | ignored_any
|
| 124 | }
|
| 125 |
|
| 126 | /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
|
| 127 | /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
|
| 128 | /// and `"0"`. But this method also handles following:
|
| 129 | ///
|
| 130 | /// |`bool` |XML content
|
| 131 | /// |-------|-------------------------------------------------------------
|
| 132 | /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
|
| 133 | /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"`
|
| 134 | fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 135 | where
|
| 136 | V: Visitor<'de>,
|
| 137 | {
|
| 138 | str2bool(self.name.as_ref(), visitor)
|
| 139 | }
|
| 140 |
|
| 141 | deserialize_num!(deserialize_i8, visit_i8);
|
| 142 | deserialize_num!(deserialize_i16, visit_i16);
|
| 143 | deserialize_num!(deserialize_i32, visit_i32);
|
| 144 | deserialize_num!(deserialize_i64, visit_i64);
|
| 145 |
|
| 146 | deserialize_num!(deserialize_u8, visit_u8);
|
| 147 | deserialize_num!(deserialize_u16, visit_u16);
|
| 148 | deserialize_num!(deserialize_u32, visit_u32);
|
| 149 | deserialize_num!(deserialize_u64, visit_u64);
|
| 150 |
|
| 151 | serde_if_integer128! {
|
| 152 | deserialize_num!(deserialize_i128, visit_i128);
|
| 153 | deserialize_num!(deserialize_u128, visit_u128);
|
| 154 | }
|
| 155 |
|
| 156 | deserialize_num!(deserialize_f32, visit_f32);
|
| 157 | deserialize_num!(deserialize_f64, visit_f64);
|
| 158 |
|
| 159 | /// Calls [`Visitor::visit_unit`]
|
| 160 | fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 161 | where
|
| 162 | V: Visitor<'de>,
|
| 163 | {
|
| 164 | visitor.visit_unit()
|
| 165 | }
|
| 166 |
|
| 167 | /// Forwards deserialization to the [`Self::deserialize_unit`]
|
| 168 | fn deserialize_unit_struct<V>(
|
| 169 | self,
|
| 170 | _name: &'static str,
|
| 171 | visitor: V,
|
| 172 | ) -> Result<V::Value, Self::Error>
|
| 173 | where
|
| 174 | V: Visitor<'de>,
|
| 175 | {
|
| 176 | self.deserialize_unit(visitor)
|
| 177 | }
|
| 178 |
|
| 179 | /// Forwards deserialization to the [`Self::deserialize_identifier`]
|
| 180 | #[inline ]
|
| 181 | fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 182 | where
|
| 183 | V: Visitor<'de>,
|
| 184 | {
|
| 185 | self.deserialize_identifier(visitor)
|
| 186 | }
|
| 187 |
|
| 188 | /// If `name` is an empty string then calls [`Visitor::visit_none`],
|
| 189 | /// otherwise calls [`Visitor::visit_some`] with itself
|
| 190 | fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 191 | where
|
| 192 | V: Visitor<'de>,
|
| 193 | {
|
| 194 | if self.name.is_empty() {
|
| 195 | visitor.visit_none()
|
| 196 | } else {
|
| 197 | visitor.visit_some(self)
|
| 198 | }
|
| 199 | }
|
| 200 |
|
| 201 | fn deserialize_newtype_struct<V>(
|
| 202 | self,
|
| 203 | _name: &'static str,
|
| 204 | visitor: V,
|
| 205 | ) -> Result<V::Value, Self::Error>
|
| 206 | where
|
| 207 | V: Visitor<'de>,
|
| 208 | {
|
| 209 | visitor.visit_newtype_struct(self)
|
| 210 | }
|
| 211 |
|
| 212 | /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
|
| 213 | /// compatible encoded characters and represents an element name and
|
| 214 | /// a [`Visitor::visit_string`] in all other cases.
|
| 215 | ///
|
| 216 | /// [`name`]: Self::name
|
| 217 | fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
| 218 | where
|
| 219 | V: Visitor<'de>,
|
| 220 | {
|
| 221 | match self.name {
|
| 222 | CowRef::Input(name) => visitor.visit_borrowed_str(name),
|
| 223 | CowRef::Slice(name) => visitor.visit_str(name),
|
| 224 | CowRef::Owned(name) => visitor.visit_string(name),
|
| 225 | }
|
| 226 | }
|
| 227 |
|
| 228 | fn deserialize_enum<V>(
|
| 229 | self,
|
| 230 | _name: &str,
|
| 231 | _variants: &'static [&'static str],
|
| 232 | visitor: V,
|
| 233 | ) -> Result<V::Value, Self::Error>
|
| 234 | where
|
| 235 | V: Visitor<'de>,
|
| 236 | {
|
| 237 | visitor.visit_enum(self)
|
| 238 | }
|
| 239 | }
|
| 240 |
|
| 241 | impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
|
| 242 | type Error = DeError;
|
| 243 | type Variant = QNameUnitOnly;
|
| 244 |
|
| 245 | fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
|
| 246 | where
|
| 247 | V: DeserializeSeed<'de>,
|
| 248 | {
|
| 249 | let name: >::Value = seed.deserialize(self)?;
|
| 250 | Ok((name, QNameUnitOnly))
|
| 251 | }
|
| 252 | }
|
| 253 |
|
| 254 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 255 |
|
| 256 | /// Deserializer of variant data, that supports only unit variants.
|
| 257 | /// Attempt to deserialize newtype, tuple or struct variant will return a
|
| 258 | /// [`DeError::Unsupported`] error.
|
| 259 | pub struct QNameUnitOnly;
|
| 260 | impl<'de> VariantAccess<'de> for QNameUnitOnly {
|
| 261 | type Error = DeError;
|
| 262 |
|
| 263 | #[inline ]
|
| 264 | fn unit_variant(self) -> Result<(), DeError> {
|
| 265 | Ok(())
|
| 266 | }
|
| 267 |
|
| 268 | fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value, DeError>
|
| 269 | where
|
| 270 | T: DeserializeSeed<'de>,
|
| 271 | {
|
| 272 | Err(DeError::Unsupported(
|
| 273 | "enum newtype variants are not supported as an XML names" .into(),
|
| 274 | ))
|
| 275 | }
|
| 276 |
|
| 277 | fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, DeError>
|
| 278 | where
|
| 279 | V: Visitor<'de>,
|
| 280 | {
|
| 281 | Err(DeError::Unsupported(
|
| 282 | "enum tuple variants are not supported as an XML names" .into(),
|
| 283 | ))
|
| 284 | }
|
| 285 |
|
| 286 | fn struct_variant<V>(
|
| 287 | self,
|
| 288 | _fields: &'static [&'static str],
|
| 289 | _visitor: V,
|
| 290 | ) -> Result<V::Value, DeError>
|
| 291 | where
|
| 292 | V: Visitor<'de>,
|
| 293 | {
|
| 294 | Err(DeError::Unsupported(
|
| 295 | "enum struct variants are not supported as an XML names" .into(),
|
| 296 | ))
|
| 297 | }
|
| 298 | }
|
| 299 |
|
| 300 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 301 |
|
| 302 | #[cfg (test)]
|
| 303 | mod tests {
|
| 304 | use super::*;
|
| 305 | use crate::se::key::QNameSerializer;
|
| 306 | use crate::utils::{ByteBuf, Bytes};
|
| 307 | use pretty_assertions::assert_eq;
|
| 308 | use serde::de::IgnoredAny;
|
| 309 | use serde::{Deserialize, Serialize};
|
| 310 | use std::collections::HashMap;
|
| 311 |
|
| 312 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
| 313 | struct Unit;
|
| 314 |
|
| 315 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
| 316 | struct Newtype(String);
|
| 317 |
|
| 318 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
| 319 | struct Struct {
|
| 320 | key: String,
|
| 321 | val: usize,
|
| 322 | }
|
| 323 |
|
| 324 | #[derive (Debug, Deserialize, Serialize, PartialEq)]
|
| 325 | enum Enum {
|
| 326 | Unit,
|
| 327 | #[serde(rename = "@Attr" )]
|
| 328 | Attr,
|
| 329 | Newtype(String),
|
| 330 | Tuple(String, usize),
|
| 331 | Struct {
|
| 332 | key: String,
|
| 333 | val: usize,
|
| 334 | },
|
| 335 | }
|
| 336 |
|
| 337 | #[derive (Debug, Deserialize, PartialEq)]
|
| 338 | #[serde(field_identifier)]
|
| 339 | enum Id {
|
| 340 | Field,
|
| 341 | }
|
| 342 |
|
| 343 | #[derive (Debug, Deserialize)]
|
| 344 | #[serde(transparent)]
|
| 345 | struct Any(IgnoredAny);
|
| 346 | impl PartialEq for Any {
|
| 347 | fn eq(&self, _other: &Any) -> bool {
|
| 348 | true
|
| 349 | }
|
| 350 | }
|
| 351 |
|
| 352 | /// Checks that given `$input` successfully deserializing into given `$result`
|
| 353 | macro_rules! deserialized_to_only {
|
| 354 | ($name:ident: $type:ty = $input:literal => $result:expr) => {
|
| 355 | #[test]
|
| 356 | fn $name() {
|
| 357 | let de = QNameDeserializer {
|
| 358 | name: CowRef::Input($input),
|
| 359 | };
|
| 360 | let data: $type = Deserialize::deserialize(de).unwrap();
|
| 361 |
|
| 362 | assert_eq!(data, $result);
|
| 363 | }
|
| 364 | };
|
| 365 | }
|
| 366 |
|
| 367 | /// Checks that given `$input` successfully deserializing into given `$result`
|
| 368 | macro_rules! deserialized_to {
|
| 369 | ($name:ident: $type:ty = $input:literal => $result:expr) => {
|
| 370 | #[test]
|
| 371 | fn $name() {
|
| 372 | let de = QNameDeserializer {
|
| 373 | name: CowRef::Input($input),
|
| 374 | };
|
| 375 | let data: $type = Deserialize::deserialize(de).unwrap();
|
| 376 |
|
| 377 | assert_eq!(data, $result);
|
| 378 |
|
| 379 | // Roundtrip to ensure that serializer corresponds to deserializer
|
| 380 | assert_eq!(
|
| 381 | data.serialize(QNameSerializer {
|
| 382 | writer: String::new()
|
| 383 | })
|
| 384 | .unwrap(),
|
| 385 | $input
|
| 386 | );
|
| 387 | }
|
| 388 | };
|
| 389 | }
|
| 390 |
|
| 391 | /// Checks that attempt to deserialize given `$input` as a `$type` results to a
|
| 392 | /// deserialization error `$kind` with `$reason`
|
| 393 | macro_rules! err {
|
| 394 | ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
|
| 395 | #[test]
|
| 396 | fn $name() {
|
| 397 | let de = QNameDeserializer {
|
| 398 | name: CowRef::Input($input),
|
| 399 | };
|
| 400 | let err = <$type as Deserialize>::deserialize(de).unwrap_err();
|
| 401 |
|
| 402 | match err {
|
| 403 | DeError::$kind(e) => assert_eq!(e, $reason),
|
| 404 | _ => panic!(
|
| 405 | "Expected `{}({})`, found `{:?}`" ,
|
| 406 | stringify!($kind),
|
| 407 | $reason,
|
| 408 | err
|
| 409 | ),
|
| 410 | }
|
| 411 | }
|
| 412 | };
|
| 413 | }
|
| 414 |
|
| 415 | deserialized_to!(false_: bool = "false" => false);
|
| 416 | deserialized_to!(true_: bool = "true" => true);
|
| 417 |
|
| 418 | deserialized_to!(i8_: i8 = "-2" => -2);
|
| 419 | deserialized_to!(i16_: i16 = "-2" => -2);
|
| 420 | deserialized_to!(i32_: i32 = "-2" => -2);
|
| 421 | deserialized_to!(i64_: i64 = "-2" => -2);
|
| 422 |
|
| 423 | deserialized_to!(u8_: u8 = "3" => 3);
|
| 424 | deserialized_to!(u16_: u16 = "3" => 3);
|
| 425 | deserialized_to!(u32_: u32 = "3" => 3);
|
| 426 | deserialized_to!(u64_: u64 = "3" => 3);
|
| 427 |
|
| 428 | serde_if_integer128! {
|
| 429 | deserialized_to!(i128_: i128 = "-2" => -2);
|
| 430 | deserialized_to!(u128_: u128 = "2" => 2);
|
| 431 | }
|
| 432 |
|
| 433 | deserialized_to!(f32_: f32 = "1.23" => 1.23);
|
| 434 | deserialized_to!(f64_: f64 = "1.23" => 1.23);
|
| 435 |
|
| 436 | deserialized_to!(char_unescaped: char = "h" => 'h' );
|
| 437 | err!(char_escaped: char = "<"
|
| 438 | => Custom("invalid value: string \"< \", expected a character" ));
|
| 439 |
|
| 440 | deserialized_to!(string: String = "<escaped string" => "<escaped string" );
|
| 441 | deserialized_to!(borrowed_str: &str = "name" => "name" );
|
| 442 |
|
| 443 | err!(byte_buf: ByteBuf = "<escaped string"
|
| 444 | => Custom("invalid type: string \"<escaped string \", expected byte data" ));
|
| 445 | err!(borrowed_bytes: Bytes = "name"
|
| 446 | => Custom("invalid type: string \"name \", expected borrowed bytes" ));
|
| 447 |
|
| 448 | deserialized_to!(option_none: Option<String> = "" => None);
|
| 449 | deserialized_to!(option_some: Option<String> = "name" => Some("name" .into()));
|
| 450 |
|
| 451 | // Unit structs cannot be represented in some meaningful way, but it meaningful
|
| 452 | // to use them as a placeholder when we want to deserialize _something_
|
| 453 | deserialized_to_only!(unit: () = "anything" => ());
|
| 454 | deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
|
| 455 |
|
| 456 | deserialized_to!(newtype: Newtype = "<escaped string" => Newtype("<escaped string" .into()));
|
| 457 |
|
| 458 | err!(seq: Vec<()> = "name"
|
| 459 | => Custom("invalid type: string \"name \", expected a sequence" ));
|
| 460 | err!(tuple: ((), ()) = "name"
|
| 461 | => Custom("invalid type: string \"name \", expected a tuple of size 2" ));
|
| 462 | err!(tuple_struct: ((), ()) = "name"
|
| 463 | => Custom("invalid type: string \"name \", expected a tuple of size 2" ));
|
| 464 |
|
| 465 | err!(map: HashMap<(), ()> = "name"
|
| 466 | => Custom("invalid type: string \"name \", expected a map" ));
|
| 467 | err!(struct_: Struct = "name"
|
| 468 | => Custom("invalid type: string \"name \", expected struct Struct" ));
|
| 469 |
|
| 470 | deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
|
| 471 | deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
|
| 472 | err!(enum_newtype: Enum = "Newtype"
|
| 473 | => Unsupported("enum newtype variants are not supported as an XML names" ));
|
| 474 | err!(enum_tuple: Enum = "Tuple"
|
| 475 | => Unsupported("enum tuple variants are not supported as an XML names" ));
|
| 476 | err!(enum_struct: Enum = "Struct"
|
| 477 | => Unsupported("enum struct variants are not supported as an XML names" ));
|
| 478 |
|
| 479 | // Field identifiers cannot be serialized, and IgnoredAny represented _something_
|
| 480 | // which is not concrete
|
| 481 | deserialized_to_only!(identifier: Id = "Field" => Id::Field);
|
| 482 | deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
|
| 483 | }
|
| 484 | |