| 1 | //! Entity resolver module
|
| 2 |
|
| 3 | use std::convert::Infallible;
|
| 4 | use std::error::Error;
|
| 5 |
|
| 6 | use crate::events::BytesText;
|
| 7 |
|
| 8 | /// Used to resolve unknown entities while parsing
|
| 9 | ///
|
| 10 | /// # Example
|
| 11 | ///
|
| 12 | /// ```
|
| 13 | /// # use serde::Deserialize;
|
| 14 | /// # use pretty_assertions::assert_eq;
|
| 15 | /// use regex::bytes::Regex;
|
| 16 | /// use std::collections::BTreeMap;
|
| 17 | /// use std::string::FromUtf8Error;
|
| 18 | /// use quick_xml::de::{Deserializer, EntityResolver};
|
| 19 | /// use quick_xml::events::BytesText;
|
| 20 | ///
|
| 21 | /// struct DocTypeEntityResolver {
|
| 22 | /// re: Regex,
|
| 23 | /// map: BTreeMap<String, String>,
|
| 24 | /// }
|
| 25 | ///
|
| 26 | /// impl Default for DocTypeEntityResolver {
|
| 27 | /// fn default() -> Self {
|
| 28 | /// Self {
|
| 29 | /// // We do not focus on true parsing in this example
|
| 30 | /// // You should use special libraries to parse DTD
|
| 31 | /// re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"# ).unwrap(),
|
| 32 | /// map: BTreeMap::new(),
|
| 33 | /// }
|
| 34 | /// }
|
| 35 | /// }
|
| 36 | ///
|
| 37 | /// impl EntityResolver for DocTypeEntityResolver {
|
| 38 | /// type Error = FromUtf8Error;
|
| 39 | ///
|
| 40 | /// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
|
| 41 | /// for cap in self.re.captures_iter(&doctype) {
|
| 42 | /// self.map.insert(
|
| 43 | /// String::from_utf8(cap[1].to_vec())?,
|
| 44 | /// String::from_utf8(cap[2].to_vec())?,
|
| 45 | /// );
|
| 46 | /// }
|
| 47 | /// Ok(())
|
| 48 | /// }
|
| 49 | ///
|
| 50 | /// fn resolve(&self, entity: &str) -> Option<&str> {
|
| 51 | /// self.map.get(entity).map(|s| s.as_str())
|
| 52 | /// }
|
| 53 | /// }
|
| 54 | ///
|
| 55 | /// let xml_reader = br#"
|
| 56 | /// <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
|
| 57 | /// <root>
|
| 58 | /// <entity_one>&e1;</entity_one>
|
| 59 | /// </root>
|
| 60 | /// "# .as_ref();
|
| 61 | ///
|
| 62 | /// let mut de = Deserializer::with_resolver(
|
| 63 | /// xml_reader,
|
| 64 | /// DocTypeEntityResolver::default(),
|
| 65 | /// );
|
| 66 | /// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
|
| 67 | ///
|
| 68 | /// assert_eq!(data.get("entity_one" ), Some(&"entity 1" .to_string()));
|
| 69 | /// ```
|
| 70 | pub trait EntityResolver {
|
| 71 | /// The error type that represents DTD parse error
|
| 72 | type Error: Error;
|
| 73 |
|
| 74 | /// Called on contents of [`Event::DocType`] to capture declared entities.
|
| 75 | /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
|
| 76 | ///
|
| 77 | /// [`Event::DocType`]: crate::events::Event::DocType
|
| 78 | fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
|
| 79 |
|
| 80 | /// Called when an entity needs to be resolved.
|
| 81 | ///
|
| 82 | /// `None` is returned if a suitable value can not be found.
|
| 83 | /// In that case an [`EscapeError::UnrecognizedSymbol`] will be returned by
|
| 84 | /// a deserializer.
|
| 85 | ///
|
| 86 | /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
|
| 87 | fn resolve(&self, entity: &str) -> Option<&str>;
|
| 88 | }
|
| 89 |
|
| 90 | /// An `EntityResolver` that does nothing and always returns `None`.
|
| 91 | #[derive (Default, Copy, Clone)]
|
| 92 | pub struct NoEntityResolver;
|
| 93 |
|
| 94 | impl EntityResolver for NoEntityResolver {
|
| 95 | type Error = Infallible;
|
| 96 |
|
| 97 | fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
|
| 98 | Ok(())
|
| 99 | }
|
| 100 |
|
| 101 | fn resolve(&self, _entity: &str) -> Option<&str> {
|
| 102 | None
|
| 103 | }
|
| 104 | }
|
| 105 | |