1 | //! Contains `XmlEvent` datatype, instances of which are emitted by the parser. |
2 | |
3 | use std::fmt; |
4 | use crate::attribute::OwnedAttribute; |
5 | use crate::common::XmlVersion; |
6 | use crate::name::OwnedName; |
7 | use crate::namespace::Namespace; |
8 | |
9 | /// An element of an XML input stream. |
10 | /// |
11 | /// Items of this enum are emitted by `reader::EventReader`. They correspond to different |
12 | /// elements of an XML document. |
13 | #[derive (PartialEq, Clone)] |
14 | pub enum XmlEvent { |
15 | /// Corresponds to XML document declaration. |
16 | /// |
17 | /// This event is always emitted before any other event. It is emitted |
18 | /// even if the actual declaration is not present in the document. |
19 | StartDocument { |
20 | /// XML version. |
21 | /// |
22 | /// If XML declaration is not present, defaults to `Version10`. |
23 | version: XmlVersion, |
24 | |
25 | /// XML document encoding. |
26 | /// |
27 | /// If XML declaration is not present or does not contain `encoding` attribute, |
28 | /// defaults to `"UTF-8"`. This field is currently used for no other purpose than |
29 | /// informational. |
30 | encoding: String, |
31 | |
32 | /// XML standalone declaration. |
33 | /// |
34 | /// If XML document is not present or does not contain `standalone` attribute, |
35 | /// defaults to `None`. This field is currently used for no other purpose than |
36 | /// informational. |
37 | standalone: Option<bool>, |
38 | }, |
39 | |
40 | /// Denotes to the end of the document stream. |
41 | /// |
42 | /// This event is always emitted after any other event (except `Error`). After it |
43 | /// is emitted for the first time, it will always be emitted on next event pull attempts. |
44 | EndDocument, |
45 | |
46 | /// Denotes an XML processing instruction. |
47 | /// |
48 | /// This event contains a processing instruction target (`name`) and opaque `data`. It |
49 | /// is up to the application to process them. |
50 | ProcessingInstruction { |
51 | /// Processing instruction target. |
52 | name: String, |
53 | |
54 | /// Processing instruction content. |
55 | data: Option<String>, |
56 | }, |
57 | |
58 | /// Denotes a beginning of an XML element. |
59 | /// |
60 | /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the |
61 | /// latter case `EndElement` event immediately follows. |
62 | StartElement { |
63 | /// Qualified name of the element. |
64 | name: OwnedName, |
65 | |
66 | /// A list of attributes associated with the element. |
67 | /// |
68 | /// Currently attributes are not checked for duplicates (TODO) |
69 | attributes: Vec<OwnedAttribute>, |
70 | |
71 | /// Contents of the namespace mapping at this point of the document. |
72 | namespace: Namespace, |
73 | }, |
74 | |
75 | /// Denotes an end of an XML element. |
76 | /// |
77 | /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the |
78 | /// latter case it is emitted immediately after corresponding `StartElement` event. |
79 | EndElement { |
80 | /// Qualified name of the element. |
81 | name: OwnedName, |
82 | }, |
83 | |
84 | /// Denotes CDATA content. |
85 | /// |
86 | /// This event contains unparsed data. No unescaping will be performed. |
87 | /// |
88 | /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See |
89 | /// `pull::ParserConfiguration` structure for more information. |
90 | CData(String), |
91 | |
92 | /// Denotes a comment. |
93 | /// |
94 | /// It is possible to configure a parser to ignore comments, so this event will never be emitted. |
95 | /// See `pull::ParserConfiguration` structure for more information. |
96 | Comment(String), |
97 | |
98 | /// Denotes character data outside of tags. |
99 | /// |
100 | /// Contents of this event will always be unescaped, so no entities like `<` or `&` or `{` |
101 | /// will appear in it. |
102 | /// |
103 | /// It is possible to configure a parser to trim leading and trailing whitespace for this event. |
104 | /// See `pull::ParserConfiguration` structure for more information. |
105 | Characters(String), |
106 | |
107 | /// Denotes a chunk of whitespace outside of tags. |
108 | /// |
109 | /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`. |
110 | /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace |
111 | /// trimming, it will eliminate standalone whitespace from the event stream completely. |
112 | Whitespace(String), |
113 | } |
114 | |
115 | impl fmt::Debug for XmlEvent { |
116 | #[cold ] |
117 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
118 | match *self { |
119 | XmlEvent::StartDocument { ref version, ref encoding, standalone } => |
120 | write!(f, "StartDocument( {}, {}, {:?})" , version, *encoding, standalone), |
121 | XmlEvent::EndDocument => |
122 | write!(f, "EndDocument" ), |
123 | XmlEvent::ProcessingInstruction { ref name, ref data } => |
124 | write!(f, "ProcessingInstruction( {}{})" , *name, match *data { |
125 | Some(ref data) => format!(", {data}" ), |
126 | None => String::new() |
127 | }), |
128 | XmlEvent::StartElement { ref name, ref attributes, namespace: Namespace(ref namespace) } => |
129 | write!(f, "StartElement( {}, {:?}{})" , name, namespace, if attributes.is_empty() { |
130 | String::new() |
131 | } else { |
132 | let attributes: Vec<String> = attributes.iter().map( |
133 | |a| format!(" {} -> {}" , a.name, a.value) |
134 | ).collect(); |
135 | format!(", [ {}]" , attributes.join(", " )) |
136 | }), |
137 | XmlEvent::EndElement { ref name } => |
138 | write!(f, "EndElement( {name})" ), |
139 | XmlEvent::Comment(ref data) => |
140 | write!(f, "Comment( {data})" ), |
141 | XmlEvent::CData(ref data) => |
142 | write!(f, "CData( {data})" ), |
143 | XmlEvent::Characters(ref data) => |
144 | write!(f, "Characters( {data})" ), |
145 | XmlEvent::Whitespace(ref data) => |
146 | write!(f, "Whitespace( {data})" ) |
147 | } |
148 | } |
149 | } |
150 | |
151 | impl XmlEvent { |
152 | /// Obtains a writer event from this reader event. |
153 | /// |
154 | /// This method is useful for streaming processing of XML documents where the output |
155 | /// is also an XML document. With this method it is possible to process some events |
156 | /// while passing other events through to the writer unchanged: |
157 | /// |
158 | /// ```rust |
159 | /// use std::str; |
160 | /// |
161 | /// use xml::{EventReader, EventWriter}; |
162 | /// use xml::reader::XmlEvent as ReaderEvent; |
163 | /// use xml::writer::XmlEvent as WriterEvent; |
164 | /// |
165 | /// let mut input: &[u8] = b"<hello>world</hello>" ; |
166 | /// let mut output: Vec<u8> = Vec::new(); |
167 | /// |
168 | /// { |
169 | /// let mut reader = EventReader::new(&mut input); |
170 | /// let mut writer = EventWriter::new(&mut output); |
171 | /// |
172 | /// for e in reader { |
173 | /// match e.unwrap() { |
174 | /// ReaderEvent::Characters(s) => |
175 | /// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(), |
176 | /// e => if let Some(e) = e.as_writer_event() { |
177 | /// writer.write(e).unwrap() |
178 | /// } |
179 | /// } |
180 | /// } |
181 | /// } |
182 | /// |
183 | /// assert_eq!( |
184 | /// str::from_utf8(&output).unwrap(), |
185 | /// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"# |
186 | /// ); |
187 | /// ``` |
188 | /// |
189 | /// Note that this API may change or get additions in future to improve its ergonomics. |
190 | #[must_use ] |
191 | pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> { |
192 | match *self { |
193 | XmlEvent::StartDocument { version, ref encoding, standalone } => |
194 | Some(crate::writer::events::XmlEvent::StartDocument { |
195 | version, |
196 | encoding: Some(encoding), |
197 | standalone |
198 | }), |
199 | XmlEvent::ProcessingInstruction { ref name, ref data } => |
200 | Some(crate::writer::events::XmlEvent::ProcessingInstruction { |
201 | name, |
202 | data: data.as_ref().map(|s| &**s) |
203 | }), |
204 | XmlEvent::StartElement { ref name, ref attributes, ref namespace } => |
205 | Some(crate::writer::events::XmlEvent::StartElement { |
206 | name: name.borrow(), |
207 | attributes: attributes.iter().map(|a| a.borrow()).collect(), |
208 | namespace: namespace.borrow(), |
209 | }), |
210 | XmlEvent::EndElement { ref name } => |
211 | Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }), |
212 | XmlEvent::Comment(ref data) => Some(crate::writer::events::XmlEvent::Comment(data)), |
213 | XmlEvent::CData(ref data) => Some(crate::writer::events::XmlEvent::CData(data)), |
214 | XmlEvent::Characters(ref data) | |
215 | XmlEvent::Whitespace(ref data) => Some(crate::writer::events::XmlEvent::Characters(data)), |
216 | XmlEvent::EndDocument => None, |
217 | } |
218 | } |
219 | } |
220 | |