| 1 | use std::error::Error; |
| 2 | use std::io::prelude::*; |
| 3 | use std::{fmt, io, result}; |
| 4 | |
| 5 | use crate::attribute::Attribute; |
| 6 | use crate::common; |
| 7 | use crate::common::XmlVersion; |
| 8 | use crate::escape::{AttributeEscapes, Escaped, PcDataEscapes}; |
| 9 | use crate::name::{Name, OwnedName}; |
| 10 | use crate::namespace::{NamespaceStack, NS_EMPTY_URI, NS_NO_PREFIX, NS_XMLNS_PREFIX, NS_XML_PREFIX}; |
| 11 | |
| 12 | use crate::writer::config::EmitterConfig; |
| 13 | |
| 14 | /// An error which may be returned by `XmlWriter` when writing XML events. |
| 15 | #[derive (Debug)] |
| 16 | pub enum EmitterError { |
| 17 | /// An I/O error occured in the underlying `Write` instance. |
| 18 | Io(io::Error), |
| 19 | |
| 20 | /// Document declaration has already been written to the output stream. |
| 21 | DocumentStartAlreadyEmitted, |
| 22 | |
| 23 | /// The name of the last opening element is not available. |
| 24 | LastElementNameNotAvailable, |
| 25 | |
| 26 | /// The name of the last opening element is not equal to the name of the provided |
| 27 | /// closing element. |
| 28 | EndElementNameIsNotEqualToLastStartElementName, |
| 29 | |
| 30 | /// End element name is not specified when it is needed, for example, when automatic |
| 31 | /// closing is not enabled in configuration. |
| 32 | EndElementNameIsNotSpecified, |
| 33 | } |
| 34 | |
| 35 | impl From<io::Error> for EmitterError { |
| 36 | #[cold ] |
| 37 | fn from(err: io::Error) -> Self { |
| 38 | Self::Io(err) |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | impl fmt::Display for EmitterError { |
| 43 | #[cold ] |
| 44 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 45 | f.write_str(data:"emitter error: " )?; |
| 46 | match self { |
| 47 | Self::Io(e: &Error) => write!(f, "I/O error: {e}" ), |
| 48 | Self::DocumentStartAlreadyEmitted => f.write_str(data:"document start event has already been emitted" ), |
| 49 | Self::LastElementNameNotAvailable => f.write_str(data:"last element name is not available" ), |
| 50 | Self::EndElementNameIsNotEqualToLastStartElementName => f.write_str(data:"end element name is not equal to last start element name" ), |
| 51 | Self::EndElementNameIsNotSpecified => f.write_str(data:"end element name is not specified and can't be inferred" ), |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | impl Error for EmitterError { |
| 57 | } |
| 58 | |
| 59 | /// A result type yielded by `XmlWriter`. |
| 60 | pub type Result<T, E = EmitterError> = result::Result<T, E>; |
| 61 | |
| 62 | // TODO: split into a low-level fast writer without any checks and formatting logic and a |
| 63 | // high-level indenting validating writer |
| 64 | pub struct Emitter { |
| 65 | config: EmitterConfig, |
| 66 | |
| 67 | nst: NamespaceStack, |
| 68 | |
| 69 | indent_level: usize, |
| 70 | indent_stack: Vec<IndentFlags>, |
| 71 | |
| 72 | element_names: Vec<OwnedName>, |
| 73 | |
| 74 | start_document_emitted: bool, |
| 75 | just_wrote_start_element: bool, |
| 76 | } |
| 77 | |
| 78 | impl Emitter { |
| 79 | pub fn new(config: EmitterConfig) -> Self { |
| 80 | let mut indent_stack: Vec = Vec::with_capacity(16); |
| 81 | indent_stack.push(IndentFlags::WroteNothing); |
| 82 | |
| 83 | Self { |
| 84 | config, |
| 85 | |
| 86 | nst: NamespaceStack::empty(), |
| 87 | |
| 88 | indent_level: 0, |
| 89 | indent_stack, |
| 90 | |
| 91 | element_names: Vec::new(), |
| 92 | |
| 93 | start_document_emitted: false, |
| 94 | just_wrote_start_element: false, |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | #[derive (Copy, Clone, Eq, PartialEq, Debug)] |
| 100 | enum IndentFlags { |
| 101 | WroteNothing, |
| 102 | WroteMarkup, |
| 103 | WroteText, |
| 104 | } |
| 105 | |
| 106 | impl Emitter { |
| 107 | /// Returns the current state of namespaces. |
| 108 | #[inline ] |
| 109 | pub fn namespace_stack_mut(&mut self) -> &mut NamespaceStack { |
| 110 | &mut self.nst |
| 111 | } |
| 112 | |
| 113 | #[inline ] |
| 114 | fn wrote_text(&self) -> bool { |
| 115 | self.indent_stack.last().map_or(false, |&e| e == IndentFlags::WroteText) |
| 116 | } |
| 117 | |
| 118 | #[inline ] |
| 119 | fn wrote_markup(&self) -> bool { |
| 120 | self.indent_stack.last().map_or(false, |&e| e == IndentFlags::WroteMarkup) |
| 121 | } |
| 122 | |
| 123 | #[inline ] |
| 124 | fn set_wrote_text(&mut self) { |
| 125 | if let Some(e) = self.indent_stack.last_mut() { |
| 126 | *e = IndentFlags::WroteText; |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | #[inline ] |
| 131 | fn set_wrote_markup(&mut self) { |
| 132 | if let Some(e) = self.indent_stack.last_mut() { |
| 133 | *e = IndentFlags::WroteMarkup; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | fn write_newline<W: Write>(&self, target: &mut W, level: usize) -> Result<()> { |
| 138 | target.write_all(self.config.line_separator.as_bytes())?; |
| 139 | for _ in 0..level { |
| 140 | target.write_all(self.config.indent_string.as_bytes())?; |
| 141 | } |
| 142 | Ok(()) |
| 143 | } |
| 144 | |
| 145 | fn before_markup<W: Write>(&mut self, target: &mut W) -> Result<()> { |
| 146 | if self.config.perform_indent && !self.wrote_text() && |
| 147 | (self.indent_level > 0 || self.wrote_markup()) { |
| 148 | let indent_level = self.indent_level; |
| 149 | self.write_newline(target, indent_level)?; |
| 150 | if self.indent_level > 0 && self.config.indent_string.len() > 0 { |
| 151 | self.after_markup(); |
| 152 | } |
| 153 | } |
| 154 | Ok(()) |
| 155 | } |
| 156 | |
| 157 | fn after_markup(&mut self) { |
| 158 | self.set_wrote_markup(); |
| 159 | } |
| 160 | |
| 161 | fn before_start_element<W: Write>(&mut self, target: &mut W) -> Result<()> { |
| 162 | self.before_markup(target)?; |
| 163 | self.indent_stack.push(IndentFlags::WroteNothing); |
| 164 | Ok(()) |
| 165 | } |
| 166 | |
| 167 | fn after_start_element(&mut self) { |
| 168 | self.after_markup(); |
| 169 | self.indent_level += 1; |
| 170 | } |
| 171 | |
| 172 | fn before_end_element<W: Write>(&mut self, target: &mut W) -> Result<()> { |
| 173 | if self.config.perform_indent && self.indent_level > 0 && self.wrote_markup() && |
| 174 | !self.wrote_text() { |
| 175 | let indent_level = self.indent_level; |
| 176 | self.write_newline(target, indent_level - 1) |
| 177 | } else { |
| 178 | Ok(()) |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | fn after_end_element(&mut self) { |
| 183 | if self.indent_level > 0 { |
| 184 | self.indent_level -= 1; |
| 185 | self.indent_stack.pop(); |
| 186 | } |
| 187 | self.set_wrote_markup(); |
| 188 | } |
| 189 | |
| 190 | fn after_text(&mut self) { |
| 191 | self.set_wrote_text(); |
| 192 | } |
| 193 | |
| 194 | pub fn emit_start_document<W: Write>(&mut self, target: &mut W, |
| 195 | version: XmlVersion, |
| 196 | encoding: &str, |
| 197 | standalone: Option<bool>) -> Result<()> { |
| 198 | if self.start_document_emitted { |
| 199 | return Err(EmitterError::DocumentStartAlreadyEmitted); |
| 200 | } |
| 201 | self.start_document_emitted = true; |
| 202 | |
| 203 | self.before_markup(target)?; |
| 204 | let result = { |
| 205 | let mut write = move || { |
| 206 | write!(target, "<?xml version= \"{version}\" encoding= \"{encoding}\"" )?; |
| 207 | |
| 208 | if let Some(standalone) = standalone { |
| 209 | write!(target, " standalone= \"{}\"" , if standalone { "yes" } else { "no" })?; |
| 210 | } |
| 211 | |
| 212 | write!(target, "?>" )?; |
| 213 | |
| 214 | Ok(()) |
| 215 | }; |
| 216 | write() |
| 217 | }; |
| 218 | self.after_markup(); |
| 219 | |
| 220 | result |
| 221 | } |
| 222 | |
| 223 | fn check_document_started<W: Write>(&mut self, target: &mut W) -> Result<()> { |
| 224 | if !self.start_document_emitted && self.config.write_document_declaration { |
| 225 | self.emit_start_document(target, common::XmlVersion::Version10, "UTF-8" , None) |
| 226 | } else { |
| 227 | Ok(()) |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | fn fix_non_empty_element<W: Write>(&mut self, target: &mut W) -> Result<()> { |
| 232 | if self.config.normalize_empty_elements && self.just_wrote_start_element { |
| 233 | self.just_wrote_start_element = false; |
| 234 | target.write_all(b">" ).map_err(From::from) |
| 235 | } else { |
| 236 | Ok(()) |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | pub fn emit_processing_instruction<W: Write>(&mut self, |
| 241 | target: &mut W, |
| 242 | name: &str, |
| 243 | data: Option<&str>) -> Result<()> { |
| 244 | self.check_document_started(target)?; |
| 245 | self.fix_non_empty_element(target)?; |
| 246 | |
| 247 | self.before_markup(target)?; |
| 248 | |
| 249 | let result = { |
| 250 | let mut write = move || { |
| 251 | write!(target, "<? {name}" )?; |
| 252 | |
| 253 | if let Some(data) = data { |
| 254 | write!(target, " {data}" )?; |
| 255 | } |
| 256 | |
| 257 | write!(target, "?>" )?; |
| 258 | |
| 259 | Ok(()) |
| 260 | }; |
| 261 | write() |
| 262 | }; |
| 263 | |
| 264 | self.after_markup(); |
| 265 | |
| 266 | result |
| 267 | } |
| 268 | |
| 269 | #[track_caller ] |
| 270 | fn emit_start_element_initial<W>(&mut self, target: &mut W, |
| 271 | name: Name<'_>, |
| 272 | attributes: &[Attribute<'_>]) -> Result<()> |
| 273 | where W: Write |
| 274 | { |
| 275 | self.check_document_started(target)?; |
| 276 | self.fix_non_empty_element(target)?; |
| 277 | self.before_start_element(target)?; |
| 278 | write!(target, "< {}" , name.repr_display())?; |
| 279 | self.emit_current_namespace_attributes(target)?; |
| 280 | self.emit_attributes(target, attributes)?; |
| 281 | self.after_start_element(); |
| 282 | Ok(()) |
| 283 | } |
| 284 | |
| 285 | #[track_caller ] |
| 286 | pub fn emit_start_element<W>(&mut self, target: &mut W, |
| 287 | name: Name<'_>, |
| 288 | attributes: &[Attribute<'_>]) -> Result<()> |
| 289 | where W: Write |
| 290 | { |
| 291 | if self.config.keep_element_names_stack { |
| 292 | self.element_names.push(name.to_owned()); |
| 293 | } |
| 294 | |
| 295 | self.emit_start_element_initial(target, name, attributes)?; |
| 296 | self.just_wrote_start_element = true; |
| 297 | |
| 298 | if !self.config.normalize_empty_elements { |
| 299 | write!(target, ">" )?; |
| 300 | } |
| 301 | |
| 302 | Ok(()) |
| 303 | } |
| 304 | |
| 305 | #[track_caller ] |
| 306 | pub fn emit_current_namespace_attributes<W>(&self, target: &mut W) -> Result<()> |
| 307 | where W: Write |
| 308 | { |
| 309 | for (prefix, uri) in self.nst.peek() { |
| 310 | match prefix { |
| 311 | // internal namespaces are not emitted |
| 312 | NS_XMLNS_PREFIX | NS_XML_PREFIX => Ok(()), |
| 313 | //// there is already a namespace binding with this prefix in scope |
| 314 | //prefix if self.nst.get(prefix) == Some(uri) => Ok(()), |
| 315 | // emit xmlns only if it is overridden |
| 316 | NS_NO_PREFIX => if uri != NS_EMPTY_URI { |
| 317 | write!(target, " xmlns= \"{uri}\"" ) |
| 318 | } else { Ok(()) }, |
| 319 | // everything else |
| 320 | prefix => write!(target, " xmlns: {prefix}= \"{uri}\"" ), |
| 321 | }?; |
| 322 | } |
| 323 | Ok(()) |
| 324 | } |
| 325 | |
| 326 | pub fn emit_attributes<W: Write>(&self, target: &mut W, |
| 327 | attributes: &[Attribute<'_>]) -> Result<()> { |
| 328 | for attr in attributes { |
| 329 | write!(target, " {}= \"" , attr.name.repr_display())?; |
| 330 | if self.config.perform_escaping { |
| 331 | write!(target, " {}" , Escaped::<AttributeEscapes>::new(attr.value))?; |
| 332 | } else { |
| 333 | write!(target, " {}" , attr.value)?; |
| 334 | } |
| 335 | write!(target, " \"" )?; |
| 336 | } |
| 337 | Ok(()) |
| 338 | } |
| 339 | |
| 340 | pub fn emit_end_element<W: Write>(&mut self, target: &mut W, |
| 341 | name: Option<Name<'_>>) -> Result<()> { |
| 342 | let owned_name = if self.config.keep_element_names_stack { |
| 343 | Some(self.element_names.pop().ok_or(EmitterError::LastElementNameNotAvailable)?) |
| 344 | } else { |
| 345 | None |
| 346 | }; |
| 347 | |
| 348 | // Check that last started element name equals to the provided name, if there are both |
| 349 | if let Some(ref last_name) = owned_name { |
| 350 | if let Some(ref name) = name { |
| 351 | if last_name.borrow() != *name { |
| 352 | return Err(EmitterError::EndElementNameIsNotEqualToLastStartElementName); |
| 353 | } |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | if let Some(name) = owned_name.as_ref().map(|n| n.borrow()).or(name) { |
| 358 | if self.config.normalize_empty_elements && self.just_wrote_start_element { |
| 359 | self.just_wrote_start_element = false; |
| 360 | let termination = if self.config.pad_self_closing { " />" } else { "/>" }; |
| 361 | let result = target.write_all(termination.as_bytes()).map_err(From::from); |
| 362 | self.after_end_element(); |
| 363 | result |
| 364 | } else { |
| 365 | self.just_wrote_start_element = false; |
| 366 | |
| 367 | self.before_end_element(target)?; |
| 368 | let result = write!(target, "</ {}>" , name.repr_display()).map_err(From::from); |
| 369 | self.after_end_element(); |
| 370 | |
| 371 | result |
| 372 | } |
| 373 | } else { |
| 374 | Err(EmitterError::EndElementNameIsNotSpecified) |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | pub fn emit_cdata<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> { |
| 379 | self.fix_non_empty_element(target)?; |
| 380 | if self.config.cdata_to_characters { |
| 381 | self.emit_characters(target, content) |
| 382 | } else { |
| 383 | // TODO: escape ']]>' characters in CDATA as two adjacent CDATA blocks |
| 384 | target.write_all(b"<![CDATA[" )?; |
| 385 | target.write_all(content.as_bytes())?; |
| 386 | target.write_all(b"]]>" )?; |
| 387 | |
| 388 | self.after_text(); |
| 389 | |
| 390 | Ok(()) |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | pub fn emit_characters<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> { |
| 395 | self.check_document_started(target)?; |
| 396 | self.fix_non_empty_element(target)?; |
| 397 | |
| 398 | if self.config.perform_escaping { |
| 399 | write!(target, " {}" , Escaped::<PcDataEscapes>::new(content))?; |
| 400 | } else { |
| 401 | target.write_all(content.as_bytes())?; |
| 402 | } |
| 403 | |
| 404 | self.after_text(); |
| 405 | Ok(()) |
| 406 | } |
| 407 | |
| 408 | pub fn emit_comment<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> { |
| 409 | self.fix_non_empty_element(target)?; |
| 410 | |
| 411 | // TODO: add escaping dashes at the end of the comment |
| 412 | |
| 413 | let autopad_comments = self.config.autopad_comments; |
| 414 | let write = move |target: &mut W| -> Result<()> { |
| 415 | target.write_all(b"<!--" )?; |
| 416 | |
| 417 | if autopad_comments && !content.starts_with(char::is_whitespace) { |
| 418 | target.write_all(b" " )?; |
| 419 | } |
| 420 | |
| 421 | target.write_all(content.as_bytes())?; |
| 422 | |
| 423 | if autopad_comments && !content.ends_with(char::is_whitespace) { |
| 424 | target.write_all(b" " )?; |
| 425 | } |
| 426 | |
| 427 | target.write_all(b"-->" )?; |
| 428 | |
| 429 | Ok(()) |
| 430 | }; |
| 431 | |
| 432 | self.before_markup(target)?; |
| 433 | let result = write(target); |
| 434 | self.after_markup(); |
| 435 | |
| 436 | result |
| 437 | } |
| 438 | } |
| 439 | |