1 | //! Entity resolver module
|
2 |
|
3 | use std::convert::Infallible;
|
4 | use std::error::Error;
|
5 |
|
6 | use crate::events::BytesText;
|
7 |
|
8 | /// Used to resolve unknown entities while parsing
|
9 | ///
|
10 | /// # Example
|
11 | ///
|
12 | /// ```
|
13 | /// # use serde::Deserialize;
|
14 | /// # use pretty_assertions::assert_eq;
|
15 | /// use regex::bytes::Regex;
|
16 | /// use std::collections::BTreeMap;
|
17 | /// use std::string::FromUtf8Error;
|
18 | /// use quick_xml::de::{Deserializer, EntityResolver};
|
19 | /// use quick_xml::events::BytesText;
|
20 | ///
|
21 | /// struct DocTypeEntityResolver {
|
22 | /// re: Regex,
|
23 | /// map: BTreeMap<String, String>,
|
24 | /// }
|
25 | ///
|
26 | /// impl Default for DocTypeEntityResolver {
|
27 | /// fn default() -> Self {
|
28 | /// Self {
|
29 | /// // We do not focus on true parsing in this example
|
30 | /// // You should use special libraries to parse DTD
|
31 | /// re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"# ).unwrap(),
|
32 | /// map: BTreeMap::new(),
|
33 | /// }
|
34 | /// }
|
35 | /// }
|
36 | ///
|
37 | /// impl EntityResolver for DocTypeEntityResolver {
|
38 | /// type Error = FromUtf8Error;
|
39 | ///
|
40 | /// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
|
41 | /// for cap in self.re.captures_iter(&doctype) {
|
42 | /// self.map.insert(
|
43 | /// String::from_utf8(cap[1].to_vec())?,
|
44 | /// String::from_utf8(cap[2].to_vec())?,
|
45 | /// );
|
46 | /// }
|
47 | /// Ok(())
|
48 | /// }
|
49 | ///
|
50 | /// fn resolve(&self, entity: &str) -> Option<&str> {
|
51 | /// self.map.get(entity).map(|s| s.as_str())
|
52 | /// }
|
53 | /// }
|
54 | ///
|
55 | /// let xml_reader = br#"
|
56 | /// <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
|
57 | /// <root>
|
58 | /// <entity_one>&e1;</entity_one>
|
59 | /// </root>
|
60 | /// "# .as_ref();
|
61 | ///
|
62 | /// let mut de = Deserializer::with_resolver(
|
63 | /// xml_reader,
|
64 | /// DocTypeEntityResolver::default(),
|
65 | /// );
|
66 | /// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
|
67 | ///
|
68 | /// assert_eq!(data.get("entity_one" ), Some(&"entity 1" .to_string()));
|
69 | /// ```
|
70 | pub trait EntityResolver {
|
71 | /// The error type that represents DTD parse error
|
72 | type Error: Error;
|
73 |
|
74 | /// Called on contents of [`Event::DocType`] to capture declared entities.
|
75 | /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
|
76 | ///
|
77 | /// [`Event::DocType`]: crate::events::Event::DocType
|
78 | fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
|
79 |
|
80 | /// Called when an entity needs to be resolved.
|
81 | ///
|
82 | /// `None` is returned if a suitable value can not be found.
|
83 | /// In that case an [`EscapeError::UnrecognizedSymbol`] will be returned by
|
84 | /// a deserializer.
|
85 | ///
|
86 | /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
|
87 | fn resolve(&self, entity: &str) -> Option<&str>;
|
88 | }
|
89 |
|
90 | /// An `EntityResolver` that does nothing and always returns `None`.
|
91 | #[derive (Default, Copy, Clone)]
|
92 | pub struct NoEntityResolver;
|
93 |
|
94 | impl EntityResolver for NoEntityResolver {
|
95 | type Error = Infallible;
|
96 |
|
97 | fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
|
98 | Ok(())
|
99 | }
|
100 |
|
101 | fn resolve(&self, _entity: &str) -> Option<&str> {
|
102 | None
|
103 | }
|
104 | }
|
105 | |