1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::ops::Deref;
45use std::str::from_utf8;
46
47use crate::encoding::Decoder;
48use crate::errors::{Error, Result};
49use crate::escape::{escape, partial_escape, unescape_with};
50use crate::name::{LocalName, QName};
51use crate::reader::is_whitespace;
52use crate::utils::write_cow_string;
53#[cfg(feature = "serialize")]
54use crate::utils::CowRef;
55use attributes::{Attribute, Attributes};
56use std::mem::replace;
57
58/// Opening tag data (`Event::Start`), with optional attributes.
59///
60/// `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// [`name`]: Self::name
66/// [`local_name`]: Self::local_name
67/// [`attributes`]: Self::attributes
68#[derive(Clone, Eq, PartialEq)]
69pub struct BytesStart<'a> {
70 /// content of the element, before any utf8 conversion
71 pub(crate) buf: Cow<'a, [u8]>,
72 /// end of the element name, the name starts at that the start of `buf`
73 pub(crate) name_len: usize,
74}
75
76impl<'a> BytesStart<'a> {
77 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
78 #[inline]
79 pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
80 BytesStart {
81 buf: Cow::Borrowed(content),
82 name_len,
83 }
84 }
85
86 /// Creates a new `BytesStart` from the given name.
87 ///
88 /// # Warning
89 ///
90 /// `name` must be a valid name.
91 #[inline]
92 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
93 let buf = str_cow_to_bytes(name);
94 BytesStart {
95 name_len: buf.len(),
96 buf,
97 }
98 }
99
100 /// Creates a new `BytesStart` from the given content (name + attributes).
101 ///
102 /// # Warning
103 ///
104 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
105 /// must be correctly-formed attributes. Neither are checked, it is possible
106 /// to generate invalid XML if `content` or `name_len` are incorrect.
107 #[inline]
108 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
109 BytesStart {
110 buf: str_cow_to_bytes(content),
111 name_len,
112 }
113 }
114
115 /// Converts the event into an owned event.
116 pub fn into_owned(self) -> BytesStart<'static> {
117 BytesStart {
118 buf: Cow::Owned(self.buf.into_owned()),
119 name_len: self.name_len,
120 }
121 }
122
123 /// Converts the event into an owned event without taking ownership of Event
124 pub fn to_owned(&self) -> BytesStart<'static> {
125 BytesStart {
126 buf: Cow::Owned(self.buf.clone().into_owned()),
127 name_len: self.name_len,
128 }
129 }
130
131 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
132 ///
133 /// # Example
134 ///
135 /// ```
136 /// use quick_xml::events::{BytesStart, Event};
137 /// # use quick_xml::writer::Writer;
138 /// # use quick_xml::Error;
139 ///
140 /// struct SomeStruct<'a> {
141 /// attrs: BytesStart<'a>,
142 /// // ...
143 /// }
144 /// # impl<'a> SomeStruct<'a> {
145 /// # fn example(&self) -> Result<(), Error> {
146 /// # let mut writer = Writer::new(Vec::new());
147 ///
148 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
149 /// // ...
150 /// writer.write_event(Event::End(self.attrs.to_end()))?;
151 /// # Ok(())
152 /// # }}
153 /// ```
154 ///
155 /// [`to_end`]: Self::to_end
156 pub fn borrow(&self) -> BytesStart {
157 BytesStart {
158 buf: Cow::Borrowed(&self.buf),
159 name_len: self.name_len,
160 }
161 }
162
163 /// Creates new paired close tag
164 pub fn to_end(&self) -> BytesEnd {
165 BytesEnd::wrap(self.name().into_inner().into())
166 }
167
168 /// Gets the undecoded raw tag name, as present in the input stream.
169 #[inline]
170 pub fn name(&self) -> QName {
171 QName(&self.buf[..self.name_len])
172 }
173
174 /// Gets the undecoded raw local tag name (excluding namespace) as present
175 /// in the input stream.
176 ///
177 /// All content up to and including the first `:` character is removed from the tag name.
178 #[inline]
179 pub fn local_name(&self) -> LocalName {
180 self.name().into()
181 }
182
183 /// Edit the name of the BytesStart in-place
184 ///
185 /// # Warning
186 ///
187 /// `name` must be a valid name.
188 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
189 let bytes = self.buf.to_mut();
190 bytes.splice(..self.name_len, name.iter().cloned());
191 self.name_len = name.len();
192 self
193 }
194
195 /// Gets the undecoded raw tag name, as present in the input stream, which
196 /// is borrowed either to the input, or to the event.
197 ///
198 /// # Lifetimes
199 ///
200 /// - `'a`: Lifetime of the input data from which this event is borrow
201 /// - `'e`: Lifetime of the concrete event instance
202 // TODO: We should made this is a part of public API, but with safe wrapped for a name
203 #[cfg(feature = "serialize")]
204 pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
205 match self.buf {
206 Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
207 Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
208 }
209 }
210}
211
212/// Attribute-related methods
213impl<'a> BytesStart<'a> {
214 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
215 ///
216 /// The yielded items must be convertible to [`Attribute`] using `Into`.
217 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
218 where
219 I: IntoIterator,
220 I::Item: Into<Attribute<'b>>,
221 {
222 self.extend_attributes(attributes);
223 self
224 }
225
226 /// Add additional attributes to this tag using an iterator.
227 ///
228 /// The yielded items must be convertible to [`Attribute`] using `Into`.
229 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
230 where
231 I: IntoIterator,
232 I::Item: Into<Attribute<'b>>,
233 {
234 for attr in attributes {
235 self.push_attribute(attr);
236 }
237 self
238 }
239
240 /// Adds an attribute to this element.
241 pub fn push_attribute<'b, A>(&mut self, attr: A)
242 where
243 A: Into<Attribute<'b>>,
244 {
245 let a = attr.into();
246 let bytes = self.buf.to_mut();
247 bytes.push(b' ');
248 bytes.extend_from_slice(a.key.as_ref());
249 bytes.extend_from_slice(b"=\"");
250 bytes.extend_from_slice(a.value.as_ref());
251 bytes.push(b'"');
252 }
253
254 /// Remove all attributes from the ByteStart
255 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
256 self.buf.to_mut().truncate(self.name_len);
257 self
258 }
259
260 /// Returns an iterator over the attributes of this tag.
261 pub fn attributes(&self) -> Attributes {
262 Attributes::wrap(&self.buf, self.name_len, false)
263 }
264
265 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
266 pub fn html_attributes(&self) -> Attributes {
267 Attributes::wrap(&self.buf, self.name_len, true)
268 }
269
270 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
271 /// including the whitespace after the tag name if there is any.
272 #[inline]
273 pub fn attributes_raw(&self) -> &[u8] {
274 &self.buf[self.name_len..]
275 }
276
277 /// Try to get an attribute
278 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
279 &'a self,
280 attr_name: N,
281 ) -> Result<Option<Attribute<'a>>> {
282 for a in self.attributes().with_checks(false) {
283 let a = a?;
284 if a.key.as_ref() == attr_name.as_ref() {
285 return Ok(Some(a));
286 }
287 }
288 Ok(None)
289 }
290}
291
292impl<'a> Debug for BytesStart<'a> {
293 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
294 write!(f, "BytesStart {{ buf: ")?;
295 write_cow_string(f, &self.buf)?;
296 write!(f, ", name_len: {} }}", self.name_len)
297 }
298}
299
300impl<'a> Deref for BytesStart<'a> {
301 type Target = [u8];
302
303 fn deref(&self) -> &[u8] {
304 &self.buf
305 }
306}
307
308#[cfg(feature = "arbitrary")]
309impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
310 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
311 let s = <&str>::arbitrary(u)?;
312 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
313 return Err(arbitrary::Error::IncorrectFormat);
314 }
315 let mut result = Self::new(s);
316 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
317 Ok(result)
318 }
319
320 fn size_hint(depth: usize) -> (usize, Option<usize>) {
321 return <&str as arbitrary::Arbitrary>::size_hint(depth);
322 }
323}
324////////////////////////////////////////////////////////////////////////////////////////////////////
325
326/// An XML declaration (`Event::Decl`).
327///
328/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
329#[derive(Clone, Debug, Eq, PartialEq)]
330pub struct BytesDecl<'a> {
331 content: BytesStart<'a>,
332}
333
334impl<'a> BytesDecl<'a> {
335 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
336 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
337 /// attribute.
338 ///
339 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
340 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
341 /// the double quote character is not allowed in any of the attribute values.
342 pub fn new(
343 version: &str,
344 encoding: Option<&str>,
345 standalone: Option<&str>,
346 ) -> BytesDecl<'static> {
347 // Compute length of the buffer based on supplied attributes
348 // ' encoding=""' => 12
349 let encoding_attr_len = if let Some(xs) = encoding {
350 12 + xs.len()
351 } else {
352 0
353 };
354 // ' standalone=""' => 14
355 let standalone_attr_len = if let Some(xs) = standalone {
356 14 + xs.len()
357 } else {
358 0
359 };
360 // 'xml version=""' => 14
361 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
362
363 buf.push_str("xml version=\"");
364 buf.push_str(version);
365
366 if let Some(encoding_val) = encoding {
367 buf.push_str("\" encoding=\"");
368 buf.push_str(encoding_val);
369 }
370
371 if let Some(standalone_val) = standalone {
372 buf.push_str("\" standalone=\"");
373 buf.push_str(standalone_val);
374 }
375 buf.push('"');
376
377 BytesDecl {
378 content: BytesStart::from_content(buf, 3),
379 }
380 }
381
382 /// Creates a `BytesDecl` from a `BytesStart`
383 pub fn from_start(start: BytesStart<'a>) -> Self {
384 Self { content: start }
385 }
386
387 /// Gets xml version, excluding quotes (`'` or `"`).
388 ///
389 /// According to the [grammar], the version *must* be the first thing in the declaration.
390 /// This method tries to extract the first thing in the declaration and return it.
391 /// In case of multiple attributes value of the first one is returned.
392 ///
393 /// If version is missed in the declaration, or the first thing is not a version,
394 /// [`Error::XmlDeclWithoutVersion`] will be returned.
395 ///
396 /// # Examples
397 ///
398 /// ```
399 /// use quick_xml::Error;
400 /// use quick_xml::events::{BytesDecl, BytesStart};
401 ///
402 /// // <?xml version='1.1'?>
403 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
404 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
405 ///
406 /// // <?xml version='1.0' version='1.1'?>
407 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
408 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
409 ///
410 /// // <?xml encoding='utf-8'?>
411 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
412 /// match decl.version() {
413 /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
414 /// _ => assert!(false),
415 /// }
416 ///
417 /// // <?xml encoding='utf-8' version='1.1'?>
418 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
419 /// match decl.version() {
420 /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
421 /// _ => assert!(false),
422 /// }
423 ///
424 /// // <?xml?>
425 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
426 /// match decl.version() {
427 /// Err(Error::XmlDeclWithoutVersion(None)) => {},
428 /// _ => assert!(false),
429 /// }
430 /// ```
431 ///
432 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
433 pub fn version(&self) -> Result<Cow<[u8]>> {
434 // The version *must* be the first thing in the declaration.
435 match self.content.attributes().with_checks(false).next() {
436 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
437 // first attribute was not "version"
438 Some(Ok(a)) => {
439 let found = from_utf8(a.key.as_ref())?.to_string();
440 Err(Error::XmlDeclWithoutVersion(Some(found)))
441 }
442 // error parsing attributes
443 Some(Err(e)) => Err(e.into()),
444 // no attributes
445 None => Err(Error::XmlDeclWithoutVersion(None)),
446 }
447 }
448
449 /// Gets xml encoding, excluding quotes (`'` or `"`).
450 ///
451 /// Although according to the [grammar] encoding must appear before `"standalone"`
452 /// and after `"version"`, this method does not check that. The first occurrence
453 /// of the attribute will be returned even if there are several. Also, method does
454 /// not restrict symbols that can forming the encoding, so the returned encoding
455 /// name may not correspond to the grammar.
456 ///
457 /// # Examples
458 ///
459 /// ```
460 /// use std::borrow::Cow;
461 /// use quick_xml::Error;
462 /// use quick_xml::events::{BytesDecl, BytesStart};
463 ///
464 /// // <?xml version='1.1'?>
465 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
466 /// assert!(decl.encoding().is_none());
467 ///
468 /// // <?xml encoding='utf-8'?>
469 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
470 /// match decl.encoding() {
471 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
472 /// _ => assert!(false),
473 /// }
474 ///
475 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
476 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
477 /// match decl.encoding() {
478 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
479 /// _ => assert!(false),
480 /// }
481 /// ```
482 ///
483 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
484 pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
485 self.content
486 .try_get_attribute("encoding")
487 .map(|a| a.map(|a| a.value))
488 .transpose()
489 }
490
491 /// Gets xml standalone, excluding quotes (`'` or `"`).
492 ///
493 /// Although according to the [grammar] standalone flag must appear after `"version"`
494 /// and `"encoding"`, this method does not check that. The first occurrence of the
495 /// attribute will be returned even if there are several. Also, method does not
496 /// restrict symbols that can forming the value, so the returned flag name may not
497 /// correspond to the grammar.
498 ///
499 /// # Examples
500 ///
501 /// ```
502 /// use std::borrow::Cow;
503 /// use quick_xml::Error;
504 /// use quick_xml::events::{BytesDecl, BytesStart};
505 ///
506 /// // <?xml version='1.1'?>
507 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
508 /// assert!(decl.standalone().is_none());
509 ///
510 /// // <?xml standalone='yes'?>
511 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
512 /// match decl.standalone() {
513 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
514 /// _ => assert!(false),
515 /// }
516 ///
517 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
518 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
519 /// match decl.standalone() {
520 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
521 /// _ => assert!(false),
522 /// }
523 /// ```
524 ///
525 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
526 pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
527 self.content
528 .try_get_attribute("standalone")
529 .map(|a| a.map(|a| a.value))
530 .transpose()
531 }
532
533 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
534 /// algorithm.
535 ///
536 /// If encoding in not known, or `encoding` key was not found, returns `None`.
537 /// In case of duplicated `encoding` key, encoding, corresponding to the first
538 /// one, is returned.
539 #[cfg(feature = "encoding")]
540 pub fn encoder(&self) -> Option<&'static Encoding> {
541 self.encoding()
542 .and_then(|e| e.ok())
543 .and_then(|e| Encoding::for_label(&e))
544 }
545
546 /// Converts the event into an owned event.
547 pub fn into_owned(self) -> BytesDecl<'static> {
548 BytesDecl {
549 content: self.content.into_owned(),
550 }
551 }
552
553 /// Converts the event into a borrowed event.
554 #[inline]
555 pub fn borrow(&self) -> BytesDecl {
556 BytesDecl {
557 content: self.content.borrow(),
558 }
559 }
560}
561
562impl<'a> Deref for BytesDecl<'a> {
563 type Target = [u8];
564
565 fn deref(&self) -> &[u8] {
566 &self.content
567 }
568}
569
570#[cfg(feature = "arbitrary")]
571impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
572 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
573 Ok(Self::new(
574 <&str>::arbitrary(u)?,
575 Option::<&str>::arbitrary(u)?,
576 Option::<&str>::arbitrary(u)?,
577 ))
578 }
579
580 fn size_hint(depth: usize) -> (usize, Option<usize>) {
581 return <&str as arbitrary::Arbitrary>::size_hint(depth);
582 }
583}
584
585////////////////////////////////////////////////////////////////////////////////////////////////////
586
587/// A struct to manage `Event::End` events
588#[derive(Clone, Eq, PartialEq)]
589pub struct BytesEnd<'a> {
590 name: Cow<'a, [u8]>,
591}
592
593impl<'a> BytesEnd<'a> {
594 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
595 #[inline]
596 pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
597 BytesEnd { name }
598 }
599
600 /// Creates a new `BytesEnd` borrowing a slice.
601 ///
602 /// # Warning
603 ///
604 /// `name` must be a valid name.
605 #[inline]
606 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
607 Self::wrap(str_cow_to_bytes(name))
608 }
609
610 /// Converts the event into an owned event.
611 pub fn into_owned(self) -> BytesEnd<'static> {
612 BytesEnd {
613 name: Cow::Owned(self.name.into_owned()),
614 }
615 }
616
617 /// Converts the event into a borrowed event.
618 #[inline]
619 pub fn borrow(&self) -> BytesEnd {
620 BytesEnd {
621 name: Cow::Borrowed(&self.name),
622 }
623 }
624
625 /// Gets the undecoded raw tag name, as present in the input stream.
626 #[inline]
627 pub fn name(&self) -> QName {
628 QName(&self.name)
629 }
630
631 /// Gets the undecoded raw local tag name (excluding namespace) as present
632 /// in the input stream.
633 ///
634 /// All content up to and including the first `:` character is removed from the tag name.
635 #[inline]
636 pub fn local_name(&self) -> LocalName {
637 self.name().into()
638 }
639}
640
641impl<'a> Debug for BytesEnd<'a> {
642 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
643 write!(f, "BytesEnd {{ name: ")?;
644 write_cow_string(f, &self.name)?;
645 write!(f, " }}")
646 }
647}
648
649impl<'a> Deref for BytesEnd<'a> {
650 type Target = [u8];
651
652 fn deref(&self) -> &[u8] {
653 &self.name
654 }
655}
656
657#[cfg(feature = "arbitrary")]
658impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
659 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
660 Ok(Self::new(<&str>::arbitrary(u)?))
661 }
662 fn size_hint(depth: usize) -> (usize, Option<usize>) {
663 return <&str as arbitrary::Arbitrary>::size_hint(depth);
664 }
665}
666
667////////////////////////////////////////////////////////////////////////////////////////////////////
668
669/// Data from various events (most notably, `Event::Text`) that stored in XML
670/// in escaped form. Internally data is stored in escaped form
671#[derive(Clone, Eq, PartialEq)]
672pub struct BytesText<'a> {
673 /// Escaped then encoded content of the event. Content is encoded in the XML
674 /// document encoding when event comes from the reader and should be in the
675 /// document encoding when event passed to the writer
676 content: Cow<'a, [u8]>,
677 /// Encoding in which the `content` is stored inside the event
678 decoder: Decoder,
679}
680
681impl<'a> BytesText<'a> {
682 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
683 #[inline]
684 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
685 Self {
686 content: content.into(),
687 decoder,
688 }
689 }
690
691 /// Creates a new `BytesText` from an escaped string.
692 #[inline]
693 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
694 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
695 }
696
697 /// Creates a new `BytesText` from a string. The string is expected not to
698 /// be escaped.
699 #[inline]
700 pub fn new(content: &'a str) -> Self {
701 Self::from_escaped(escape(content))
702 }
703
704 /// Ensures that all data is owned to extend the object's lifetime if
705 /// necessary.
706 #[inline]
707 pub fn into_owned(self) -> BytesText<'static> {
708 BytesText {
709 content: self.content.into_owned().into(),
710 decoder: self.decoder,
711 }
712 }
713
714 /// Extracts the inner `Cow` from the `BytesText` event container.
715 #[inline]
716 pub fn into_inner(self) -> Cow<'a, [u8]> {
717 self.content
718 }
719
720 /// Converts the event into a borrowed event.
721 #[inline]
722 pub fn borrow(&self) -> BytesText {
723 BytesText {
724 content: Cow::Borrowed(&self.content),
725 decoder: self.decoder,
726 }
727 }
728
729 /// Decodes then unescapes the content of the event.
730 ///
731 /// This will allocate if the value contains any escape sequences or in
732 /// non-UTF-8 encoding.
733 pub fn unescape(&self) -> Result<Cow<'a, str>> {
734 self.unescape_with(|_| None)
735 }
736
737 /// Decodes then unescapes the content of the event with custom entities.
738 ///
739 /// This will allocate if the value contains any escape sequences or in
740 /// non-UTF-8 encoding.
741 pub fn unescape_with<'entity>(
742 &self,
743 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
744 ) -> Result<Cow<'a, str>> {
745 let decoded = match &self.content {
746 Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
747 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
748 Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
749 };
750
751 match unescape_with(&decoded, resolve_entity)? {
752 // Because result is borrowed, no replacements was done and we can use original string
753 Cow::Borrowed(_) => Ok(decoded),
754 Cow::Owned(s) => Ok(s.into()),
755 }
756 }
757
758 /// Removes leading XML whitespace bytes from text content.
759 ///
760 /// Returns `true` if content is empty after that
761 pub fn inplace_trim_start(&mut self) -> bool {
762 self.content = trim_cow(
763 replace(&mut self.content, Cow::Borrowed(b"")),
764 trim_xml_start,
765 );
766 self.content.is_empty()
767 }
768
769 /// Removes trailing XML whitespace bytes from text content.
770 ///
771 /// Returns `true` if content is empty after that
772 pub fn inplace_trim_end(&mut self) -> bool {
773 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
774 self.content.is_empty()
775 }
776}
777
778impl<'a> Debug for BytesText<'a> {
779 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
780 write!(f, "BytesText {{ content: ")?;
781 write_cow_string(f, &self.content)?;
782 write!(f, " }}")
783 }
784}
785
786impl<'a> Deref for BytesText<'a> {
787 type Target = [u8];
788
789 fn deref(&self) -> &[u8] {
790 &self.content
791 }
792}
793
794#[cfg(feature = "arbitrary")]
795impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
796 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
797 let s = <&str>::arbitrary(u)?;
798 if !s.chars().all(char::is_alphanumeric) {
799 return Err(arbitrary::Error::IncorrectFormat);
800 }
801 Ok(Self::new(s))
802 }
803
804 fn size_hint(depth: usize) -> (usize, Option<usize>) {
805 return <&str as arbitrary::Arbitrary>::size_hint(depth);
806 }
807}
808
809////////////////////////////////////////////////////////////////////////////////////////////////////
810
811/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
812/// [convert](Self::escape) it to [`BytesText`]
813#[derive(Clone, Eq, PartialEq)]
814pub struct BytesCData<'a> {
815 content: Cow<'a, [u8]>,
816 /// Encoding in which the `content` is stored inside the event
817 decoder: Decoder,
818}
819
820impl<'a> BytesCData<'a> {
821 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
822 #[inline]
823 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
824 Self {
825 content: content.into(),
826 decoder,
827 }
828 }
829
830 /// Creates a new `BytesCData` from a string.
831 ///
832 /// # Warning
833 ///
834 /// `content` must not contain the `]]>` sequence.
835 #[inline]
836 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
837 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
838 }
839
840 /// Ensures that all data is owned to extend the object's lifetime if
841 /// necessary.
842 #[inline]
843 pub fn into_owned(self) -> BytesCData<'static> {
844 BytesCData {
845 content: self.content.into_owned().into(),
846 decoder: self.decoder,
847 }
848 }
849
850 /// Extracts the inner `Cow` from the `BytesCData` event container.
851 #[inline]
852 pub fn into_inner(self) -> Cow<'a, [u8]> {
853 self.content
854 }
855
856 /// Converts the event into a borrowed event.
857 #[inline]
858 pub fn borrow(&self) -> BytesCData {
859 BytesCData {
860 content: Cow::Borrowed(&self.content),
861 decoder: self.decoder,
862 }
863 }
864
865 /// Converts this CDATA content to an escaped version, that can be written
866 /// as an usual text in XML.
867 ///
868 /// This function performs following replacements:
869 ///
870 /// | Character | Replacement
871 /// |-----------|------------
872 /// | `<` | `&lt;`
873 /// | `>` | `&gt;`
874 /// | `&` | `&amp;`
875 /// | `'` | `&apos;`
876 /// | `"` | `&quot;`
877 pub fn escape(self) -> Result<BytesText<'a>> {
878 let decoded = self.decode()?;
879 Ok(BytesText::wrap(
880 match escape(&decoded) {
881 // Because result is borrowed, no replacements was done and we can use original content
882 Cow::Borrowed(_) => self.content,
883 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
884 },
885 Decoder::utf8(),
886 ))
887 }
888
889 /// Converts this CDATA content to an escaped version, that can be written
890 /// as an usual text in XML.
891 ///
892 /// In XML text content, it is allowed (though not recommended) to leave
893 /// the quote special characters `"` and `'` unescaped.
894 ///
895 /// This function performs following replacements:
896 ///
897 /// | Character | Replacement
898 /// |-----------|------------
899 /// | `<` | `&lt;`
900 /// | `>` | `&gt;`
901 /// | `&` | `&amp;`
902 pub fn partial_escape(self) -> Result<BytesText<'a>> {
903 let decoded = self.decode()?;
904 Ok(BytesText::wrap(
905 match partial_escape(&decoded) {
906 // Because result is borrowed, no replacements was done and we can use original content
907 Cow::Borrowed(_) => self.content,
908 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
909 },
910 Decoder::utf8(),
911 ))
912 }
913
914 /// Gets content of this text buffer in the specified encoding
915 pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
916 Ok(match &self.content {
917 Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
918 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
919 Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
920 })
921 }
922}
923
924impl<'a> Debug for BytesCData<'a> {
925 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
926 write!(f, "BytesCData {{ content: ")?;
927 write_cow_string(f, &self.content)?;
928 write!(f, " }}")
929 }
930}
931
932impl<'a> Deref for BytesCData<'a> {
933 type Target = [u8];
934
935 fn deref(&self) -> &[u8] {
936 &self.content
937 }
938}
939
940#[cfg(feature = "arbitrary")]
941impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
942 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
943 Ok(Self::new(<&str>::arbitrary(u)?))
944 }
945 fn size_hint(depth: usize) -> (usize, Option<usize>) {
946 return <&str as arbitrary::Arbitrary>::size_hint(depth);
947 }
948}
949
950////////////////////////////////////////////////////////////////////////////////////////////////////
951
952/// Event emitted by [`Reader::read_event_into`].
953///
954/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
955#[derive(Clone, Debug, Eq, PartialEq)]
956#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
957pub enum Event<'a> {
958 /// Start tag (with attributes) `<tag attr="value">`.
959 Start(BytesStart<'a>),
960 /// End tag `</tag>`.
961 End(BytesEnd<'a>),
962 /// Empty element tag (with attributes) `<tag attr="value" />`.
963 Empty(BytesStart<'a>),
964 /// Escaped character data between tags.
965 Text(BytesText<'a>),
966 /// Unescaped character data stored in `<![CDATA[...]]>`.
967 CData(BytesCData<'a>),
968 /// Comment `<!-- ... -->`.
969 Comment(BytesText<'a>),
970 /// XML declaration `<?xml ...?>`.
971 Decl(BytesDecl<'a>),
972 /// Processing instruction `<?...?>`.
973 PI(BytesText<'a>),
974 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
975 DocType(BytesText<'a>),
976 /// End of XML document.
977 Eof,
978}
979
980impl<'a> Event<'a> {
981 /// Converts the event to an owned version, untied to the lifetime of
982 /// buffer used when reading but incurring a new, separate allocation.
983 pub fn into_owned(self) -> Event<'static> {
984 match self {
985 Event::Start(e) => Event::Start(e.into_owned()),
986 Event::End(e) => Event::End(e.into_owned()),
987 Event::Empty(e) => Event::Empty(e.into_owned()),
988 Event::Text(e) => Event::Text(e.into_owned()),
989 Event::Comment(e) => Event::Comment(e.into_owned()),
990 Event::CData(e) => Event::CData(e.into_owned()),
991 Event::Decl(e) => Event::Decl(e.into_owned()),
992 Event::PI(e) => Event::PI(e.into_owned()),
993 Event::DocType(e) => Event::DocType(e.into_owned()),
994 Event::Eof => Event::Eof,
995 }
996 }
997
998 /// Converts the event into a borrowed event.
999 #[inline]
1000 pub fn borrow(&self) -> Event {
1001 match self {
1002 Event::Start(e) => Event::Start(e.borrow()),
1003 Event::End(e) => Event::End(e.borrow()),
1004 Event::Empty(e) => Event::Empty(e.borrow()),
1005 Event::Text(e) => Event::Text(e.borrow()),
1006 Event::Comment(e) => Event::Comment(e.borrow()),
1007 Event::CData(e) => Event::CData(e.borrow()),
1008 Event::Decl(e) => Event::Decl(e.borrow()),
1009 Event::PI(e) => Event::PI(e.borrow()),
1010 Event::DocType(e) => Event::DocType(e.borrow()),
1011 Event::Eof => Event::Eof,
1012 }
1013 }
1014}
1015
1016impl<'a> Deref for Event<'a> {
1017 type Target = [u8];
1018
1019 fn deref(&self) -> &[u8] {
1020 match *self {
1021 Event::Start(ref e: &BytesStart<'_>) | Event::Empty(ref e: &BytesStart<'_>) => e,
1022 Event::End(ref e: &BytesEnd<'_>) => e,
1023 Event::Text(ref e: &BytesText<'_>) => e,
1024 Event::Decl(ref e: &BytesDecl<'_>) => e,
1025 Event::PI(ref e: &BytesText<'_>) => e,
1026 Event::CData(ref e: &BytesCData<'_>) => e,
1027 Event::Comment(ref e: &BytesText<'_>) => e,
1028 Event::DocType(ref e: &BytesText<'_>) => e,
1029 Event::Eof => &[],
1030 }
1031 }
1032}
1033
1034impl<'a> AsRef<Event<'a>> for Event<'a> {
1035 fn as_ref(&self) -> &Event<'a> {
1036 self
1037 }
1038}
1039
1040////////////////////////////////////////////////////////////////////////////////////////////////////
1041
1042#[inline]
1043fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1044 match content.into() {
1045 Cow::Borrowed(s: &str) => Cow::Borrowed(s.as_bytes()),
1046 Cow::Owned(s: String) => Cow::Owned(s.into_bytes()),
1047 }
1048}
1049
1050/// Returns a byte slice with leading XML whitespace bytes removed.
1051///
1052/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1053const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
1054 // Note: A pattern matching based approach (instead of indexing) allows
1055 // making the function const.
1056 while let [first: &u8, rest: &[u8] @ ..] = bytes {
1057 if is_whitespace(*first) {
1058 bytes = rest;
1059 } else {
1060 break;
1061 }
1062 }
1063 bytes
1064}
1065
1066/// Returns a byte slice with trailing XML whitespace bytes removed.
1067///
1068/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1069const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
1070 // Note: A pattern matching based approach (instead of indexing) allows
1071 // making the function const.
1072 while let [rest: &[u8] @ .., last: &u8] = bytes {
1073 if is_whitespace(*last) {
1074 bytes = rest;
1075 } else {
1076 break;
1077 }
1078 }
1079 bytes
1080}
1081
1082fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1083where
1084 F: FnOnce(&[u8]) -> &[u8],
1085{
1086 match value {
1087 Cow::Borrowed(bytes: &[u8]) => Cow::Borrowed(trim(bytes)),
1088 Cow::Owned(mut bytes: Vec) => {
1089 let trimmed: &[u8] = trim(&bytes);
1090 if trimmed.len() != bytes.len() {
1091 bytes = trimmed.to_vec();
1092 }
1093 Cow::Owned(bytes)
1094 }
1095 }
1096}
1097
1098#[cfg(test)]
1099mod test {
1100 use super::*;
1101 use pretty_assertions::assert_eq;
1102
1103 #[test]
1104 fn bytestart_create() {
1105 let b = BytesStart::new("test");
1106 assert_eq!(b.len(), 4);
1107 assert_eq!(b.name(), QName(b"test"));
1108 }
1109
1110 #[test]
1111 fn bytestart_set_name() {
1112 let mut b = BytesStart::new("test");
1113 assert_eq!(b.len(), 4);
1114 assert_eq!(b.name(), QName(b"test"));
1115 assert_eq!(b.attributes_raw(), b"");
1116 b.push_attribute(("x", "a"));
1117 assert_eq!(b.len(), 10);
1118 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1119 b.set_name(b"g");
1120 assert_eq!(b.len(), 7);
1121 assert_eq!(b.name(), QName(b"g"));
1122 }
1123
1124 #[test]
1125 fn bytestart_clear_attributes() {
1126 let mut b = BytesStart::new("test");
1127 b.push_attribute(("x", "y\"z"));
1128 b.push_attribute(("x", "y\"z"));
1129 b.clear_attributes();
1130 assert!(b.attributes().next().is_none());
1131 assert_eq!(b.len(), 4);
1132 assert_eq!(b.name(), QName(b"test"));
1133 }
1134}
1135