| 1 | //! Contains `XmlEvent` datatype, instances of which are emitted by the parser. |
| 2 | |
| 3 | use crate::attribute::OwnedAttribute; |
| 4 | use crate::common::XmlVersion; |
| 5 | use crate::name::OwnedName; |
| 6 | use crate::namespace::Namespace; |
| 7 | use std::fmt; |
| 8 | |
| 9 | /// An element of an XML input stream. |
| 10 | /// |
| 11 | /// Items of this enum are emitted by `reader::EventReader`. They correspond to different |
| 12 | /// elements of an XML document. |
| 13 | #[derive (PartialEq, Clone)] |
| 14 | pub enum XmlEvent { |
| 15 | /// Corresponds to XML document declaration. |
| 16 | /// |
| 17 | /// This event is always emitted before any other event. It is emitted |
| 18 | /// even if the actual declaration is not present in the document. |
| 19 | StartDocument { |
| 20 | /// XML version. |
| 21 | /// |
| 22 | /// If XML declaration is not present, defaults to `Version10`. |
| 23 | version: XmlVersion, |
| 24 | |
| 25 | /// XML document encoding. |
| 26 | /// |
| 27 | /// If XML declaration is not present or does not contain `encoding` attribute, |
| 28 | /// defaults to `"UTF-8"`. This field is currently used for no other purpose than |
| 29 | /// informational. |
| 30 | encoding: String, |
| 31 | |
| 32 | /// XML standalone declaration. |
| 33 | /// |
| 34 | /// If XML document is not present or does not contain `standalone` attribute, |
| 35 | /// defaults to `None`. This field is currently used for no other purpose than |
| 36 | /// informational. |
| 37 | standalone: Option<bool>, |
| 38 | }, |
| 39 | |
| 40 | /// Denotes to the end of the document stream. |
| 41 | /// |
| 42 | /// This event is always emitted after any other event (except `Error`). After it |
| 43 | /// is emitted for the first time, it will always be emitted on next event pull attempts. |
| 44 | EndDocument, |
| 45 | |
| 46 | /// Denotes an XML processing instruction. |
| 47 | /// |
| 48 | /// This event contains a processing instruction target (`name`) and opaque `data`. It |
| 49 | /// is up to the application to process them. |
| 50 | ProcessingInstruction { |
| 51 | /// Processing instruction target. |
| 52 | name: String, |
| 53 | |
| 54 | /// Processing instruction content. |
| 55 | data: Option<String>, |
| 56 | }, |
| 57 | |
| 58 | /// Denotes a beginning of an XML element. |
| 59 | /// |
| 60 | /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the |
| 61 | /// latter case `EndElement` event immediately follows. |
| 62 | StartElement { |
| 63 | /// Qualified name of the element. |
| 64 | name: OwnedName, |
| 65 | |
| 66 | /// A list of attributes associated with the element. |
| 67 | /// |
| 68 | /// Currently attributes are not checked for duplicates (TODO) |
| 69 | attributes: Vec<OwnedAttribute>, |
| 70 | |
| 71 | /// Contents of the namespace mapping at this point of the document. |
| 72 | namespace: Namespace, |
| 73 | }, |
| 74 | |
| 75 | /// Denotes an end of an XML element. |
| 76 | /// |
| 77 | /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the |
| 78 | /// latter case it is emitted immediately after corresponding `StartElement` event. |
| 79 | EndElement { |
| 80 | /// Qualified name of the element. |
| 81 | name: OwnedName, |
| 82 | }, |
| 83 | |
| 84 | /// Denotes CDATA content. |
| 85 | /// |
| 86 | /// This event contains unparsed data. No unescaping will be performed. |
| 87 | /// |
| 88 | /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See |
| 89 | /// `pull::ParserConfiguration` structure for more information. |
| 90 | CData(String), |
| 91 | |
| 92 | /// Denotes a comment. |
| 93 | /// |
| 94 | /// It is possible to configure a parser to ignore comments, so this event will never be emitted. |
| 95 | /// See `pull::ParserConfiguration` structure for more information. |
| 96 | Comment(String), |
| 97 | |
| 98 | /// Denotes character data outside of tags. |
| 99 | /// |
| 100 | /// Contents of this event will always be unescaped, so no entities like `<` or `&` or `{` |
| 101 | /// will appear in it. |
| 102 | /// |
| 103 | /// It is possible to configure a parser to trim leading and trailing whitespace for this event. |
| 104 | /// See `pull::ParserConfiguration` structure for more information. |
| 105 | Characters(String), |
| 106 | |
| 107 | /// Denotes a chunk of whitespace outside of tags. |
| 108 | /// |
| 109 | /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`. |
| 110 | /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace |
| 111 | /// trimming, it will eliminate standalone whitespace from the event stream completely. |
| 112 | Whitespace(String), |
| 113 | } |
| 114 | |
| 115 | impl fmt::Debug for XmlEvent { |
| 116 | #[cold ] |
| 117 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 118 | match self { |
| 119 | Self::StartDocument { version, encoding, standalone } => |
| 120 | write!(f, "StartDocument( {}, {}, {:?})" , version, *encoding, standalone), |
| 121 | Self::EndDocument => |
| 122 | write!(f, "EndDocument" ), |
| 123 | Self::ProcessingInstruction { name, data } => |
| 124 | write!(f, "ProcessingInstruction( {}{})" , *name, match data { |
| 125 | Some(data) => format!(", {data}" ), |
| 126 | None => String::new() |
| 127 | }), |
| 128 | Self::StartElement { name, attributes, namespace: Namespace(namespace) } => |
| 129 | write!(f, "StartElement( {}, {:?}{})" , name, namespace, if attributes.is_empty() { |
| 130 | String::new() |
| 131 | } else { |
| 132 | let attributes: Vec<String> = attributes.iter().map( |
| 133 | |a| format!(" {} -> {}" , a.name, a.value) |
| 134 | ).collect(); |
| 135 | format!(", [ {}]" , attributes.join(", " )) |
| 136 | }), |
| 137 | Self::EndElement { name } => |
| 138 | write!(f, "EndElement( {name})" ), |
| 139 | Self::Comment(data) => |
| 140 | write!(f, "Comment( {data})" ), |
| 141 | Self::CData(data) => |
| 142 | write!(f, "CData( {data})" ), |
| 143 | Self::Characters(data) => |
| 144 | write!(f, "Characters( {data})" ), |
| 145 | Self::Whitespace(data) => |
| 146 | write!(f, "Whitespace( {data})" ) |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | impl XmlEvent { |
| 152 | /// Obtains a writer event from this reader event. |
| 153 | /// |
| 154 | /// This method is useful for streaming processing of XML documents where the output |
| 155 | /// is also an XML document. With this method it is possible to process some events |
| 156 | /// while passing other events through to the writer unchanged: |
| 157 | /// |
| 158 | /// ```rust |
| 159 | /// use std::str; |
| 160 | /// |
| 161 | /// use xml::reader::XmlEvent as ReaderEvent; |
| 162 | /// use xml::writer::XmlEvent as WriterEvent; |
| 163 | /// use xml::{EventReader, EventWriter}; |
| 164 | /// |
| 165 | /// let mut input: &[u8] = b"<hello>world</hello>" ; |
| 166 | /// let mut output: Vec<u8> = Vec::new(); |
| 167 | /// |
| 168 | /// { |
| 169 | /// let mut reader = EventReader::new(&mut input); |
| 170 | /// let mut writer = EventWriter::new(&mut output); |
| 171 | /// |
| 172 | /// for e in reader { |
| 173 | /// match e.unwrap() { |
| 174 | /// ReaderEvent::Characters(s) => |
| 175 | /// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(), |
| 176 | /// e => if let Some(e) = e.as_writer_event() { |
| 177 | /// writer.write(e).unwrap() |
| 178 | /// } |
| 179 | /// } |
| 180 | /// } |
| 181 | /// } |
| 182 | /// |
| 183 | /// assert_eq!( |
| 184 | /// str::from_utf8(&output).unwrap(), |
| 185 | /// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"# |
| 186 | /// ); |
| 187 | /// ``` |
| 188 | /// |
| 189 | /// Note that this API may change or get additions in future to improve its ergonomics. |
| 190 | #[must_use ] |
| 191 | pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> { |
| 192 | match self { |
| 193 | Self::StartDocument { version, encoding, standalone } => |
| 194 | Some(crate::writer::events::XmlEvent::StartDocument { |
| 195 | version: *version, |
| 196 | encoding: Some(encoding), |
| 197 | standalone: *standalone |
| 198 | }), |
| 199 | Self::ProcessingInstruction { name, data } => |
| 200 | Some(crate::writer::events::XmlEvent::ProcessingInstruction { |
| 201 | name, |
| 202 | data: data.as_ref().map(|s| &**s) |
| 203 | }), |
| 204 | Self::StartElement { name, attributes, namespace } => |
| 205 | Some(crate::writer::events::XmlEvent::StartElement { |
| 206 | name: name.borrow(), |
| 207 | attributes: attributes.iter().map(|a| a.borrow()).collect(), |
| 208 | namespace: namespace.borrow(), |
| 209 | }), |
| 210 | Self::EndElement { name } => |
| 211 | Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }), |
| 212 | Self::Comment(data) => Some(crate::writer::events::XmlEvent::Comment(data)), |
| 213 | Self::CData(data) => Some(crate::writer::events::XmlEvent::CData(data)), |
| 214 | Self::Characters(data) | |
| 215 | Self::Whitespace(data) => Some(crate::writer::events::XmlEvent::Characters(data)), |
| 216 | Self::EndDocument => None, |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | |