1 | //! Defines zero-copy XML events used throughout this library.
|
2 | //!
|
3 | //! A XML event often represents part of a XML element.
|
4 | //! They occur both during reading and writing and are
|
5 | //! usually used with the stream-oriented API.
|
6 | //!
|
7 | //! For example, the XML element
|
8 | //! ```xml
|
9 | //! <name attr="value">Inner text</name>
|
10 | //! ```
|
11 | //! consists of the three events `Start`, `Text` and `End`.
|
12 | //! They can also represent other parts in an XML document like the
|
13 | //! XML declaration. Each Event usually contains further information,
|
14 | //! like the tag name, the attribute or the inner text.
|
15 | //!
|
16 | //! See [`Event`] for a list of all possible events.
|
17 | //!
|
18 | //! # Reading
|
19 | //! When reading a XML stream, the events are emitted by [`Reader::read_event`]
|
20 | //! and [`Reader::read_event_into`]. You must listen
|
21 | //! for the different types of events you are interested in.
|
22 | //!
|
23 | //! See [`Reader`] for further information.
|
24 | //!
|
25 | //! # Writing
|
26 | //! When writing the XML document, you must create the XML element
|
27 | //! by constructing the events it consists of and pass them to the writer
|
28 | //! sequentially.
|
29 | //!
|
30 | //! See [`Writer`] for further information.
|
31 | //!
|
32 | //! [`Reader::read_event`]: crate::reader::Reader::read_event
|
33 | //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
|
34 | //! [`Reader`]: crate::reader::Reader
|
35 | //! [`Writer`]: crate::writer::Writer
|
36 | //! [`Event`]: crate::events::Event
|
37 |
|
38 | pub mod attributes;
|
39 |
|
40 | #[cfg (feature = "encoding" )]
|
41 | use encoding_rs::Encoding;
|
42 | use std::borrow::Cow;
|
43 | use std::fmt::{self, Debug, Formatter};
|
44 | use std::iter::FusedIterator;
|
45 | use std::mem::replace;
|
46 | use std::ops::Deref;
|
47 | use std::str::from_utf8;
|
48 |
|
49 | use crate::encoding::{Decoder, EncodingError};
|
50 | use crate::errors::{Error, IllFormedError};
|
51 | use crate::escape::{
|
52 | escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
|
53 | };
|
54 | use crate::name::{LocalName, QName};
|
55 | #[cfg (feature = "serialize" )]
|
56 | use crate::utils::CowRef;
|
57 | use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
|
58 | use attributes::{AttrError, Attribute, Attributes};
|
59 |
|
60 | /// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
|
61 | ///
|
62 | /// The name can be accessed using the [`name`] or [`local_name`] methods.
|
63 | /// An iterator over the attributes is returned by the [`attributes`] method.
|
64 | ///
|
65 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
66 | /// returns the content of this event between `<` and `>` or `/>`:
|
67 | ///
|
68 | /// ```
|
69 | /// # use quick_xml::events::{BytesStart, Event};
|
70 | /// # use quick_xml::reader::Reader;
|
71 | /// # use pretty_assertions::assert_eq;
|
72 | /// // Remember, that \ at the end of string literal strips
|
73 | /// // all space characters to the first non-space character
|
74 | /// let mut reader = Reader::from_str(" \
|
75 | /// <element a1 = 'val1' a2= \"val2 \" /> \
|
76 | /// <element a1 = 'val1' a2= \"val2 \" >"
|
77 | /// );
|
78 | /// let content = "element a1 = 'val1' a2= \"val2 \" " ;
|
79 | /// let event = BytesStart::from_content(content, 7);
|
80 | ///
|
81 | /// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
|
82 | /// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
|
83 | /// // deref coercion of &BytesStart to &[u8]
|
84 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
85 | /// // AsRef<[u8]> for &T + deref coercion
|
86 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
87 | /// ```
|
88 | ///
|
89 | /// [`name`]: Self::name
|
90 | /// [`local_name`]: Self::local_name
|
91 | /// [`attributes`]: Self::attributes
|
92 | #[derive (Clone, Eq, PartialEq)]
|
93 | pub struct BytesStart<'a> {
|
94 | /// content of the element, before any utf8 conversion
|
95 | pub(crate) buf: Cow<'a, [u8]>,
|
96 | /// end of the element name, the name starts at that the start of `buf`
|
97 | pub(crate) name_len: usize,
|
98 | }
|
99 |
|
100 | impl<'a> BytesStart<'a> {
|
101 | /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
|
102 | #[inline ]
|
103 | pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self {
|
104 | BytesStart {
|
105 | buf: Cow::Borrowed(content),
|
106 | name_len,
|
107 | }
|
108 | }
|
109 |
|
110 | /// Creates a new `BytesStart` from the given name.
|
111 | ///
|
112 | /// # Warning
|
113 | ///
|
114 | /// `name` must be a valid name.
|
115 | #[inline ]
|
116 | pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
|
117 | let buf = str_cow_to_bytes(name);
|
118 | BytesStart {
|
119 | name_len: buf.len(),
|
120 | buf,
|
121 | }
|
122 | }
|
123 |
|
124 | /// Creates a new `BytesStart` from the given content (name + attributes).
|
125 | ///
|
126 | /// # Warning
|
127 | ///
|
128 | /// `&content[..name_len]` must be a valid name, and the remainder of `content`
|
129 | /// must be correctly-formed attributes. Neither are checked, it is possible
|
130 | /// to generate invalid XML if `content` or `name_len` are incorrect.
|
131 | #[inline ]
|
132 | pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
|
133 | BytesStart {
|
134 | buf: str_cow_to_bytes(content),
|
135 | name_len,
|
136 | }
|
137 | }
|
138 |
|
139 | /// Converts the event into an owned event.
|
140 | pub fn into_owned(self) -> BytesStart<'static> {
|
141 | BytesStart {
|
142 | buf: Cow::Owned(self.buf.into_owned()),
|
143 | name_len: self.name_len,
|
144 | }
|
145 | }
|
146 |
|
147 | /// Converts the event into an owned event without taking ownership of Event
|
148 | pub fn to_owned(&self) -> BytesStart<'static> {
|
149 | BytesStart {
|
150 | buf: Cow::Owned(self.buf.clone().into_owned()),
|
151 | name_len: self.name_len,
|
152 | }
|
153 | }
|
154 |
|
155 | /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
|
156 | ///
|
157 | /// # Example
|
158 | ///
|
159 | /// ```
|
160 | /// use quick_xml::events::{BytesStart, Event};
|
161 | /// # use quick_xml::writer::Writer;
|
162 | /// # use quick_xml::Error;
|
163 | ///
|
164 | /// struct SomeStruct<'a> {
|
165 | /// attrs: BytesStart<'a>,
|
166 | /// // ...
|
167 | /// }
|
168 | /// # impl<'a> SomeStruct<'a> {
|
169 | /// # fn example(&self) -> Result<(), Error> {
|
170 | /// # let mut writer = Writer::new(Vec::new());
|
171 | ///
|
172 | /// writer.write_event(Event::Start(self.attrs.borrow()))?;
|
173 | /// // ...
|
174 | /// writer.write_event(Event::End(self.attrs.to_end()))?;
|
175 | /// # Ok(())
|
176 | /// # }}
|
177 | /// ```
|
178 | ///
|
179 | /// [`to_end`]: Self::to_end
|
180 | pub fn borrow(&self) -> BytesStart {
|
181 | BytesStart {
|
182 | buf: Cow::Borrowed(&self.buf),
|
183 | name_len: self.name_len,
|
184 | }
|
185 | }
|
186 |
|
187 | /// Creates new paired close tag
|
188 | #[inline ]
|
189 | pub fn to_end(&self) -> BytesEnd {
|
190 | BytesEnd::from(self.name())
|
191 | }
|
192 |
|
193 | /// Gets the undecoded raw tag name, as present in the input stream.
|
194 | #[inline ]
|
195 | pub fn name(&self) -> QName {
|
196 | QName(&self.buf[..self.name_len])
|
197 | }
|
198 |
|
199 | /// Gets the undecoded raw local tag name (excluding namespace) as present
|
200 | /// in the input stream.
|
201 | ///
|
202 | /// All content up to and including the first `:` character is removed from the tag name.
|
203 | #[inline ]
|
204 | pub fn local_name(&self) -> LocalName {
|
205 | self.name().into()
|
206 | }
|
207 |
|
208 | /// Edit the name of the BytesStart in-place
|
209 | ///
|
210 | /// # Warning
|
211 | ///
|
212 | /// `name` must be a valid name.
|
213 | pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
|
214 | let bytes = self.buf.to_mut();
|
215 | bytes.splice(..self.name_len, name.iter().cloned());
|
216 | self.name_len = name.len();
|
217 | self
|
218 | }
|
219 |
|
220 | /// Gets the undecoded raw tag name, as present in the input stream, which
|
221 | /// is borrowed either to the input, or to the event.
|
222 | ///
|
223 | /// # Lifetimes
|
224 | ///
|
225 | /// - `'a`: Lifetime of the input data from which this event is borrow
|
226 | /// - `'e`: Lifetime of the concrete event instance
|
227 | // TODO: We should made this is a part of public API, but with safe wrapped for a name
|
228 | #[cfg (feature = "serialize" )]
|
229 | pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
|
230 | match self.buf {
|
231 | Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
|
232 | Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
|
233 | }
|
234 | }
|
235 | }
|
236 |
|
237 | /// Attribute-related methods
|
238 | impl<'a> BytesStart<'a> {
|
239 | /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
|
240 | ///
|
241 | /// The yielded items must be convertible to [`Attribute`] using `Into`.
|
242 | pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
|
243 | where
|
244 | I: IntoIterator,
|
245 | I::Item: Into<Attribute<'b>>,
|
246 | {
|
247 | self.extend_attributes(attributes);
|
248 | self
|
249 | }
|
250 |
|
251 | /// Add additional attributes to this tag using an iterator.
|
252 | ///
|
253 | /// The yielded items must be convertible to [`Attribute`] using `Into`.
|
254 | pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
|
255 | where
|
256 | I: IntoIterator,
|
257 | I::Item: Into<Attribute<'b>>,
|
258 | {
|
259 | for attr in attributes {
|
260 | self.push_attribute(attr);
|
261 | }
|
262 | self
|
263 | }
|
264 |
|
265 | /// Adds an attribute to this element.
|
266 | pub fn push_attribute<'b, A>(&mut self, attr: A)
|
267 | where
|
268 | A: Into<Attribute<'b>>,
|
269 | {
|
270 | self.buf.to_mut().push(b' ' );
|
271 | self.push_attr(attr.into());
|
272 | }
|
273 |
|
274 | /// Remove all attributes from the ByteStart
|
275 | pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
|
276 | self.buf.to_mut().truncate(self.name_len);
|
277 | self
|
278 | }
|
279 |
|
280 | /// Returns an iterator over the attributes of this tag.
|
281 | pub fn attributes(&self) -> Attributes {
|
282 | Attributes::wrap(&self.buf, self.name_len, false)
|
283 | }
|
284 |
|
285 | /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
|
286 | pub fn html_attributes(&self) -> Attributes {
|
287 | Attributes::wrap(&self.buf, self.name_len, true)
|
288 | }
|
289 |
|
290 | /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
|
291 | /// including the whitespace after the tag name if there is any.
|
292 | #[inline ]
|
293 | pub fn attributes_raw(&self) -> &[u8] {
|
294 | &self.buf[self.name_len..]
|
295 | }
|
296 |
|
297 | /// Try to get an attribute
|
298 | pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
|
299 | &'a self,
|
300 | attr_name: N,
|
301 | ) -> Result<Option<Attribute<'a>>, AttrError> {
|
302 | for a in self.attributes().with_checks(false) {
|
303 | let a = a?;
|
304 | if a.key.as_ref() == attr_name.as_ref() {
|
305 | return Ok(Some(a));
|
306 | }
|
307 | }
|
308 | Ok(None)
|
309 | }
|
310 |
|
311 | /// Adds an attribute to this element.
|
312 | pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
|
313 | let bytes = self.buf.to_mut();
|
314 | bytes.extend_from_slice(attr.key.as_ref());
|
315 | bytes.extend_from_slice(b"= \"" );
|
316 | // FIXME: need to escape attribute content
|
317 | bytes.extend_from_slice(attr.value.as_ref());
|
318 | bytes.push(b'"' );
|
319 | }
|
320 |
|
321 | /// Adds new line in existing element
|
322 | pub(crate) fn push_newline(&mut self) {
|
323 | self.buf.to_mut().push(b' \n' );
|
324 | }
|
325 |
|
326 | /// Adds indentation bytes in existing element
|
327 | pub(crate) fn push_indent(&mut self, indent: &[u8]) {
|
328 | self.buf.to_mut().extend_from_slice(indent);
|
329 | }
|
330 | }
|
331 |
|
332 | impl<'a> Debug for BytesStart<'a> {
|
333 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
334 | write!(f, "BytesStart {{ buf: " )?;
|
335 | write_cow_string(f, &self.buf)?;
|
336 | write!(f, ", name_len: {} }}" , self.name_len)
|
337 | }
|
338 | }
|
339 |
|
340 | impl<'a> Deref for BytesStart<'a> {
|
341 | type Target = [u8];
|
342 |
|
343 | fn deref(&self) -> &[u8] {
|
344 | &self.buf
|
345 | }
|
346 | }
|
347 |
|
348 | impl<'a> From<QName<'a>> for BytesStart<'a> {
|
349 | #[inline ]
|
350 | fn from(name: QName<'a>) -> Self {
|
351 | let name: &[u8] = name.into_inner();
|
352 | Self::wrap(content:name, name.len())
|
353 | }
|
354 | }
|
355 |
|
356 | #[cfg (feature = "arbitrary" )]
|
357 | impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
|
358 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
359 | let s = <&str>::arbitrary(u)?;
|
360 | if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
|
361 | return Err(arbitrary::Error::IncorrectFormat);
|
362 | }
|
363 | let mut result = Self::new(s);
|
364 | result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
|
365 | Ok(result)
|
366 | }
|
367 |
|
368 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
369 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
370 | }
|
371 | }
|
372 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
373 |
|
374 | /// Closing tag data (`Event::End`): `</name>`.
|
375 | ///
|
376 | /// The name can be accessed using the [`name`] or [`local_name`] methods.
|
377 | ///
|
378 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
379 | /// returns the content of this event between `</` and `>`.
|
380 | ///
|
381 | /// Note, that inner text will not contain `>` character inside:
|
382 | ///
|
383 | /// ```
|
384 | /// # use quick_xml::events::{BytesEnd, Event};
|
385 | /// # use quick_xml::reader::Reader;
|
386 | /// # use pretty_assertions::assert_eq;
|
387 | /// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"# );
|
388 | /// // Note, that this entire string considered as a .name()
|
389 | /// let content = "element a1 = 'val1' a2= \"val2 \" " ;
|
390 | /// let event = BytesEnd::new(content);
|
391 | ///
|
392 | /// reader.config_mut().trim_markup_names_in_closing_tags = false;
|
393 | /// reader.config_mut().check_end_names = false;
|
394 | /// reader.read_event().unwrap(); // Skip `<element>`
|
395 | ///
|
396 | /// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
|
397 | /// assert_eq!(event.name().as_ref(), content.as_bytes());
|
398 | /// // deref coercion of &BytesEnd to &[u8]
|
399 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
400 | /// // AsRef<[u8]> for &T + deref coercion
|
401 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
402 | /// ```
|
403 | ///
|
404 | /// [`name`]: Self::name
|
405 | /// [`local_name`]: Self::local_name
|
406 | #[derive (Clone, Eq, PartialEq)]
|
407 | pub struct BytesEnd<'a> {
|
408 | name: Cow<'a, [u8]>,
|
409 | }
|
410 |
|
411 | impl<'a> BytesEnd<'a> {
|
412 | /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
|
413 | #[inline ]
|
414 | pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
|
415 | BytesEnd { name }
|
416 | }
|
417 |
|
418 | /// Creates a new `BytesEnd` borrowing a slice.
|
419 | ///
|
420 | /// # Warning
|
421 | ///
|
422 | /// `name` must be a valid name.
|
423 | #[inline ]
|
424 | pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
|
425 | Self::wrap(str_cow_to_bytes(name))
|
426 | }
|
427 |
|
428 | /// Converts the event into an owned event.
|
429 | pub fn into_owned(self) -> BytesEnd<'static> {
|
430 | BytesEnd {
|
431 | name: Cow::Owned(self.name.into_owned()),
|
432 | }
|
433 | }
|
434 |
|
435 | /// Converts the event into a borrowed event.
|
436 | #[inline ]
|
437 | pub fn borrow(&self) -> BytesEnd {
|
438 | BytesEnd {
|
439 | name: Cow::Borrowed(&self.name),
|
440 | }
|
441 | }
|
442 |
|
443 | /// Gets the undecoded raw tag name, as present in the input stream.
|
444 | #[inline ]
|
445 | pub fn name(&self) -> QName {
|
446 | QName(&self.name)
|
447 | }
|
448 |
|
449 | /// Gets the undecoded raw local tag name (excluding namespace) as present
|
450 | /// in the input stream.
|
451 | ///
|
452 | /// All content up to and including the first `:` character is removed from the tag name.
|
453 | #[inline ]
|
454 | pub fn local_name(&self) -> LocalName {
|
455 | self.name().into()
|
456 | }
|
457 | }
|
458 |
|
459 | impl<'a> Debug for BytesEnd<'a> {
|
460 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
461 | write!(f, "BytesEnd {{ name: " )?;
|
462 | write_cow_string(f, &self.name)?;
|
463 | write!(f, " }}" )
|
464 | }
|
465 | }
|
466 |
|
467 | impl<'a> Deref for BytesEnd<'a> {
|
468 | type Target = [u8];
|
469 |
|
470 | fn deref(&self) -> &[u8] {
|
471 | &self.name
|
472 | }
|
473 | }
|
474 |
|
475 | impl<'a> From<QName<'a>> for BytesEnd<'a> {
|
476 | #[inline ]
|
477 | fn from(name: QName<'a>) -> Self {
|
478 | Self::wrap(name.into_inner().into())
|
479 | }
|
480 | }
|
481 |
|
482 | #[cfg (feature = "arbitrary" )]
|
483 | impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
|
484 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
485 | Ok(Self::new(<&str>::arbitrary(u)?))
|
486 | }
|
487 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
488 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
489 | }
|
490 | }
|
491 |
|
492 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
493 |
|
494 | /// Data from various events (most notably, `Event::Text`) that stored in XML
|
495 | /// in escaped form. Internally data is stored in escaped form.
|
496 | ///
|
497 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
498 | /// returns the content of this event. In case of comment this is everything
|
499 | /// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
|
500 | /// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
|
501 | /// (i.e. in case of DTD the first character is never space):
|
502 | ///
|
503 | /// ```
|
504 | /// # use quick_xml::events::{BytesText, Event};
|
505 | /// # use quick_xml::reader::Reader;
|
506 | /// # use pretty_assertions::assert_eq;
|
507 | /// // Remember, that \ at the end of string literal strips
|
508 | /// // all space characters to the first non-space character
|
509 | /// let mut reader = Reader::from_str(" \
|
510 | /// <!DOCTYPE comment or text > \
|
511 | /// comment or text \
|
512 | /// <!--comment or text -->"
|
513 | /// );
|
514 | /// let content = "comment or text " ;
|
515 | /// let event = BytesText::new(content);
|
516 | ///
|
517 | /// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
|
518 | /// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
|
519 | /// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
|
520 | /// // deref coercion of &BytesText to &[u8]
|
521 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
522 | /// // AsRef<[u8]> for &T + deref coercion
|
523 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
524 | /// ```
|
525 | #[derive (Clone, Eq, PartialEq)]
|
526 | pub struct BytesText<'a> {
|
527 | /// Escaped then encoded content of the event. Content is encoded in the XML
|
528 | /// document encoding when event comes from the reader and should be in the
|
529 | /// document encoding when event passed to the writer
|
530 | content: Cow<'a, [u8]>,
|
531 | /// Encoding in which the `content` is stored inside the event
|
532 | decoder: Decoder,
|
533 | }
|
534 |
|
535 | impl<'a> BytesText<'a> {
|
536 | /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
|
537 | #[inline ]
|
538 | pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
|
539 | Self {
|
540 | content: content.into(),
|
541 | decoder,
|
542 | }
|
543 | }
|
544 |
|
545 | /// Creates a new `BytesText` from an escaped string.
|
546 | #[inline ]
|
547 | pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
|
548 | Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
|
549 | }
|
550 |
|
551 | /// Creates a new `BytesText` from a string. The string is expected not to
|
552 | /// be escaped.
|
553 | #[inline ]
|
554 | pub fn new(content: &'a str) -> Self {
|
555 | Self::from_escaped(escape(content))
|
556 | }
|
557 |
|
558 | /// Ensures that all data is owned to extend the object's lifetime if
|
559 | /// necessary.
|
560 | #[inline ]
|
561 | pub fn into_owned(self) -> BytesText<'static> {
|
562 | BytesText {
|
563 | content: self.content.into_owned().into(),
|
564 | decoder: self.decoder,
|
565 | }
|
566 | }
|
567 |
|
568 | /// Extracts the inner `Cow` from the `BytesText` event container.
|
569 | #[inline ]
|
570 | pub fn into_inner(self) -> Cow<'a, [u8]> {
|
571 | self.content
|
572 | }
|
573 |
|
574 | /// Converts the event into a borrowed event.
|
575 | #[inline ]
|
576 | pub fn borrow(&self) -> BytesText {
|
577 | BytesText {
|
578 | content: Cow::Borrowed(&self.content),
|
579 | decoder: self.decoder,
|
580 | }
|
581 | }
|
582 |
|
583 | /// Decodes then unescapes the content of the event.
|
584 | ///
|
585 | /// This will allocate if the value contains any escape sequences or in
|
586 | /// non-UTF-8 encoding.
|
587 | pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
|
588 | self.unescape_with(resolve_predefined_entity)
|
589 | }
|
590 |
|
591 | /// Decodes then unescapes the content of the event with custom entities.
|
592 | ///
|
593 | /// This will allocate if the value contains any escape sequences or in
|
594 | /// non-UTF-8 encoding.
|
595 | pub fn unescape_with<'entity>(
|
596 | &self,
|
597 | resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
|
598 | ) -> Result<Cow<'a, str>, Error> {
|
599 | let decoded = self.decoder.decode_cow(&self.content)?;
|
600 |
|
601 | match unescape_with(&decoded, resolve_entity)? {
|
602 | // Because result is borrowed, no replacements was done and we can use original string
|
603 | Cow::Borrowed(_) => Ok(decoded),
|
604 | Cow::Owned(s) => Ok(s.into()),
|
605 | }
|
606 | }
|
607 |
|
608 | /// Removes leading XML whitespace bytes from text content.
|
609 | ///
|
610 | /// Returns `true` if content is empty after that
|
611 | pub fn inplace_trim_start(&mut self) -> bool {
|
612 | self.content = trim_cow(
|
613 | replace(&mut self.content, Cow::Borrowed(b"" )),
|
614 | trim_xml_start,
|
615 | );
|
616 | self.content.is_empty()
|
617 | }
|
618 |
|
619 | /// Removes trailing XML whitespace bytes from text content.
|
620 | ///
|
621 | /// Returns `true` if content is empty after that
|
622 | pub fn inplace_trim_end(&mut self) -> bool {
|
623 | self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"" )), trim_xml_end);
|
624 | self.content.is_empty()
|
625 | }
|
626 | }
|
627 |
|
628 | impl<'a> Debug for BytesText<'a> {
|
629 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
630 | write!(f, "BytesText {{ content: " )?;
|
631 | write_cow_string(f, &self.content)?;
|
632 | write!(f, " }}" )
|
633 | }
|
634 | }
|
635 |
|
636 | impl<'a> Deref for BytesText<'a> {
|
637 | type Target = [u8];
|
638 |
|
639 | fn deref(&self) -> &[u8] {
|
640 | &self.content
|
641 | }
|
642 | }
|
643 |
|
644 | #[cfg (feature = "arbitrary" )]
|
645 | impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
|
646 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
647 | let s = <&str>::arbitrary(u)?;
|
648 | if !s.chars().all(char::is_alphanumeric) {
|
649 | return Err(arbitrary::Error::IncorrectFormat);
|
650 | }
|
651 | Ok(Self::new(s))
|
652 | }
|
653 |
|
654 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
655 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
656 | }
|
657 | }
|
658 |
|
659 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
660 |
|
661 | /// CDATA content contains unescaped data from the reader. If you want to write them as a text,
|
662 | /// [convert](Self::escape) it to [`BytesText`].
|
663 | ///
|
664 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
665 | /// returns the content of this event between `<![CDATA[` and `]]>`.
|
666 | ///
|
667 | /// Note, that inner text will not contain `]]>` sequence inside:
|
668 | ///
|
669 | /// ```
|
670 | /// # use quick_xml::events::{BytesCData, Event};
|
671 | /// # use quick_xml::reader::Reader;
|
672 | /// # use pretty_assertions::assert_eq;
|
673 | /// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>" );
|
674 | /// let content = " CDATA section " ;
|
675 | /// let event = BytesCData::new(content);
|
676 | ///
|
677 | /// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
|
678 | /// // deref coercion of &BytesCData to &[u8]
|
679 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
680 | /// // AsRef<[u8]> for &T + deref coercion
|
681 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
682 | /// ```
|
683 | #[derive (Clone, Eq, PartialEq)]
|
684 | pub struct BytesCData<'a> {
|
685 | content: Cow<'a, [u8]>,
|
686 | /// Encoding in which the `content` is stored inside the event
|
687 | decoder: Decoder,
|
688 | }
|
689 |
|
690 | impl<'a> BytesCData<'a> {
|
691 | /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
|
692 | #[inline ]
|
693 | pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
|
694 | Self {
|
695 | content: content.into(),
|
696 | decoder,
|
697 | }
|
698 | }
|
699 |
|
700 | /// Creates a new `BytesCData` from a string.
|
701 | ///
|
702 | /// # Warning
|
703 | ///
|
704 | /// `content` must not contain the `]]>` sequence. You can use
|
705 | /// [`BytesCData::escaped`] to escape the content instead.
|
706 | #[inline ]
|
707 | pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
|
708 | Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
|
709 | }
|
710 |
|
711 | /// Creates an iterator of `BytesCData` from a string.
|
712 | ///
|
713 | /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
|
714 | /// sections, splitting the `]]` and `>` characters, because the CDATA closing
|
715 | /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
|
716 | /// for each of those sections.
|
717 | ///
|
718 | /// # Examples
|
719 | ///
|
720 | /// ```
|
721 | /// # use quick_xml::events::BytesCData;
|
722 | /// # use pretty_assertions::assert_eq;
|
723 | /// let content = "" ;
|
724 | /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
|
725 | /// assert_eq!(cdata, &[BytesCData::new("" )]);
|
726 | ///
|
727 | /// let content = "Certain tokens like ]]> can be difficult and <invalid>" ;
|
728 | /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
|
729 | /// assert_eq!(cdata, &[
|
730 | /// BytesCData::new("Certain tokens like ]]" ),
|
731 | /// BytesCData::new("> can be difficult and <invalid>" ),
|
732 | /// ]);
|
733 | ///
|
734 | /// let content = "foo]]>bar]]>baz]]>quux" ;
|
735 | /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
|
736 | /// assert_eq!(cdata, &[
|
737 | /// BytesCData::new("foo]]" ),
|
738 | /// BytesCData::new(">bar]]" ),
|
739 | /// BytesCData::new(">baz]]" ),
|
740 | /// BytesCData::new(">quux" ),
|
741 | /// ]);
|
742 | /// ```
|
743 | #[inline ]
|
744 | pub fn escaped(content: &'a str) -> CDataIterator<'a> {
|
745 | CDataIterator {
|
746 | unprocessed: content.as_bytes(),
|
747 | finished: false,
|
748 | }
|
749 | }
|
750 |
|
751 | /// Ensures that all data is owned to extend the object's lifetime if
|
752 | /// necessary.
|
753 | #[inline ]
|
754 | pub fn into_owned(self) -> BytesCData<'static> {
|
755 | BytesCData {
|
756 | content: self.content.into_owned().into(),
|
757 | decoder: self.decoder,
|
758 | }
|
759 | }
|
760 |
|
761 | /// Extracts the inner `Cow` from the `BytesCData` event container.
|
762 | #[inline ]
|
763 | pub fn into_inner(self) -> Cow<'a, [u8]> {
|
764 | self.content
|
765 | }
|
766 |
|
767 | /// Converts the event into a borrowed event.
|
768 | #[inline ]
|
769 | pub fn borrow(&self) -> BytesCData {
|
770 | BytesCData {
|
771 | content: Cow::Borrowed(&self.content),
|
772 | decoder: self.decoder,
|
773 | }
|
774 | }
|
775 |
|
776 | /// Converts this CDATA content to an escaped version, that can be written
|
777 | /// as an usual text in XML.
|
778 | ///
|
779 | /// This function performs following replacements:
|
780 | ///
|
781 | /// | Character | Replacement
|
782 | /// |-----------|------------
|
783 | /// | `<` | `<`
|
784 | /// | `>` | `>`
|
785 | /// | `&` | `&`
|
786 | /// | `'` | `'`
|
787 | /// | `"` | `"`
|
788 | pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
|
789 | let decoded = self.decode()?;
|
790 | Ok(BytesText::wrap(
|
791 | match escape(decoded) {
|
792 | Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
|
793 | Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
|
794 | },
|
795 | Decoder::utf8(),
|
796 | ))
|
797 | }
|
798 |
|
799 | /// Converts this CDATA content to an escaped version, that can be written
|
800 | /// as an usual text in XML.
|
801 | ///
|
802 | /// In XML text content, it is allowed (though not recommended) to leave
|
803 | /// the quote special characters `"` and `'` unescaped.
|
804 | ///
|
805 | /// This function performs following replacements:
|
806 | ///
|
807 | /// | Character | Replacement
|
808 | /// |-----------|------------
|
809 | /// | `<` | `<`
|
810 | /// | `>` | `>`
|
811 | /// | `&` | `&`
|
812 | pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
|
813 | let decoded = self.decode()?;
|
814 | Ok(BytesText::wrap(
|
815 | match partial_escape(decoded) {
|
816 | Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
|
817 | Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
|
818 | },
|
819 | Decoder::utf8(),
|
820 | ))
|
821 | }
|
822 |
|
823 | /// Converts this CDATA content to an escaped version, that can be written
|
824 | /// as an usual text in XML. This method escapes only those characters that
|
825 | /// must be escaped according to the [specification].
|
826 | ///
|
827 | /// This function performs following replacements:
|
828 | ///
|
829 | /// | Character | Replacement
|
830 | /// |-----------|------------
|
831 | /// | `<` | `<`
|
832 | /// | `&` | `&`
|
833 | ///
|
834 | /// [specification]: https://www.w3.org/TR/xml11/#syntax
|
835 | pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
|
836 | let decoded = self.decode()?;
|
837 | Ok(BytesText::wrap(
|
838 | match minimal_escape(decoded) {
|
839 | Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
|
840 | Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
|
841 | },
|
842 | Decoder::utf8(),
|
843 | ))
|
844 | }
|
845 |
|
846 | /// Gets content of this text buffer in the specified encoding
|
847 | pub(crate) fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
|
848 | Ok(self.decoder.decode_cow(&self.content)?)
|
849 | }
|
850 | }
|
851 |
|
852 | impl<'a> Debug for BytesCData<'a> {
|
853 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
854 | write!(f, "BytesCData {{ content: " )?;
|
855 | write_cow_string(f, &self.content)?;
|
856 | write!(f, " }}" )
|
857 | }
|
858 | }
|
859 |
|
860 | impl<'a> Deref for BytesCData<'a> {
|
861 | type Target = [u8];
|
862 |
|
863 | fn deref(&self) -> &[u8] {
|
864 | &self.content
|
865 | }
|
866 | }
|
867 |
|
868 | #[cfg (feature = "arbitrary" )]
|
869 | impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
|
870 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
871 | Ok(Self::new(<&str>::arbitrary(u)?))
|
872 | }
|
873 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
874 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
875 | }
|
876 | }
|
877 |
|
878 | /// Iterator over `CDATA` sections in a string.
|
879 | ///
|
880 | /// This iterator is created by the [`BytesCData::escaped`] method.
|
881 | #[derive (Clone)]
|
882 | pub struct CDataIterator<'a> {
|
883 | /// The unprocessed data which should be emitted as `BytesCData` events.
|
884 | /// At each iteration, the processed data is cut from this slice.
|
885 | unprocessed: &'a [u8],
|
886 | finished: bool,
|
887 | }
|
888 |
|
889 | impl<'a> Debug for CDataIterator<'a> {
|
890 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
891 | f&mut DebugStruct<'_, '_>.debug_struct("CDataIterator" )
|
892 | .field("unprocessed" , &Bytes(self.unprocessed))
|
893 | .field(name:"finished" , &self.finished)
|
894 | .finish()
|
895 | }
|
896 | }
|
897 |
|
898 | impl<'a> Iterator for CDataIterator<'a> {
|
899 | type Item = BytesCData<'a>;
|
900 |
|
901 | fn next(&mut self) -> Option<BytesCData<'a>> {
|
902 | if self.finished {
|
903 | return None;
|
904 | }
|
905 |
|
906 | for gt: usize in memchr::memchr_iter(needle:b'>' , self.unprocessed) {
|
907 | if self.unprocessed[..gt].ends_with(needle:b"]]" ) {
|
908 | let (slice: &[u8], rest: &[u8]) = self.unprocessed.split_at(mid:gt);
|
909 | self.unprocessed = rest;
|
910 | return Some(BytesCData::wrap(content:slice, Decoder::utf8()));
|
911 | }
|
912 | }
|
913 |
|
914 | self.finished = true;
|
915 | Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
|
916 | }
|
917 | }
|
918 |
|
919 | impl FusedIterator for CDataIterator<'_> {}
|
920 |
|
921 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
922 |
|
923 | /// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
|
924 | ///
|
925 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
926 | /// returns the content of this event between `<?` and `?>`.
|
927 | ///
|
928 | /// Note, that inner text will not contain `?>` sequence inside:
|
929 | ///
|
930 | /// ```
|
931 | /// # use quick_xml::events::{BytesPI, Event};
|
932 | /// # use quick_xml::reader::Reader;
|
933 | /// # use pretty_assertions::assert_eq;
|
934 | /// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>" );
|
935 | /// let content = "processing instruction >:-<~ " ;
|
936 | /// let event = BytesPI::new(content);
|
937 | ///
|
938 | /// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
|
939 | /// // deref coercion of &BytesPI to &[u8]
|
940 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
941 | /// // AsRef<[u8]> for &T + deref coercion
|
942 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
943 | /// ```
|
944 | ///
|
945 | /// [PI]: https://www.w3.org/TR/xml11/#sec-pi
|
946 | #[derive (Clone, Eq, PartialEq)]
|
947 | pub struct BytesPI<'a> {
|
948 | content: BytesStart<'a>,
|
949 | }
|
950 |
|
951 | impl<'a> BytesPI<'a> {
|
952 | /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
|
953 | #[inline ]
|
954 | pub(crate) const fn wrap(content: &'a [u8], target_len: usize) -> Self {
|
955 | Self {
|
956 | content: BytesStart::wrap(content, target_len),
|
957 | }
|
958 | }
|
959 |
|
960 | /// Creates a new `BytesPI` from a string.
|
961 | ///
|
962 | /// # Warning
|
963 | ///
|
964 | /// `content` must not contain the `?>` sequence.
|
965 | #[inline ]
|
966 | pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
|
967 | let buf = str_cow_to_bytes(content);
|
968 | let name_len = name_len(&buf);
|
969 | Self {
|
970 | content: BytesStart { buf, name_len },
|
971 | }
|
972 | }
|
973 |
|
974 | /// Ensures that all data is owned to extend the object's lifetime if
|
975 | /// necessary.
|
976 | #[inline ]
|
977 | pub fn into_owned(self) -> BytesPI<'static> {
|
978 | BytesPI {
|
979 | content: self.content.into_owned().into(),
|
980 | }
|
981 | }
|
982 |
|
983 | /// Extracts the inner `Cow` from the `BytesPI` event container.
|
984 | #[inline ]
|
985 | pub fn into_inner(self) -> Cow<'a, [u8]> {
|
986 | self.content.buf
|
987 | }
|
988 |
|
989 | /// Converts the event into a borrowed event.
|
990 | #[inline ]
|
991 | pub fn borrow(&self) -> BytesPI {
|
992 | BytesPI {
|
993 | content: self.content.borrow(),
|
994 | }
|
995 | }
|
996 |
|
997 | /// A target used to identify the application to which the instruction is directed.
|
998 | ///
|
999 | /// # Example
|
1000 | ///
|
1001 | /// ```
|
1002 | /// # use pretty_assertions::assert_eq;
|
1003 | /// use quick_xml::events::BytesPI;
|
1004 | ///
|
1005 | /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""# );
|
1006 | /// assert_eq!(instruction.target(), b"xml-stylesheet" );
|
1007 | /// ```
|
1008 | #[inline ]
|
1009 | pub fn target(&self) -> &[u8] {
|
1010 | self.content.name().0
|
1011 | }
|
1012 |
|
1013 | /// Content of the processing instruction. Contains everything between target
|
1014 | /// name and the end of the instruction. A direct consequence is that the first
|
1015 | /// character is always a space character.
|
1016 | ///
|
1017 | /// # Example
|
1018 | ///
|
1019 | /// ```
|
1020 | /// # use pretty_assertions::assert_eq;
|
1021 | /// use quick_xml::events::BytesPI;
|
1022 | ///
|
1023 | /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""# );
|
1024 | /// assert_eq!(instruction.content(), br#" href="style.css""# );
|
1025 | /// ```
|
1026 | #[inline ]
|
1027 | pub fn content(&self) -> &[u8] {
|
1028 | self.content.attributes_raw()
|
1029 | }
|
1030 |
|
1031 | /// A view of the processing instructions' content as a list of key-value pairs.
|
1032 | ///
|
1033 | /// Key-value pairs are used in some processing instructions, for example in
|
1034 | /// `<?xml-stylesheet?>`.
|
1035 | ///
|
1036 | /// Returned iterator does not validate attribute values as may required by
|
1037 | /// target's rules. For example, it doesn't check that substring `?>` is not
|
1038 | /// present in the attribute value. That shouldn't be the problem when event
|
1039 | /// is produced by the reader, because reader detects end of processing instruction
|
1040 | /// by the first `?>` sequence, as required by the specification, and therefore
|
1041 | /// this sequence cannot appear inside it.
|
1042 | ///
|
1043 | /// # Example
|
1044 | ///
|
1045 | /// ```
|
1046 | /// # use pretty_assertions::assert_eq;
|
1047 | /// use std::borrow::Cow;
|
1048 | /// use quick_xml::events::attributes::Attribute;
|
1049 | /// use quick_xml::events::BytesPI;
|
1050 | /// use quick_xml::name::QName;
|
1051 | ///
|
1052 | /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""# );
|
1053 | /// for attr in instruction.attributes() {
|
1054 | /// assert_eq!(attr, Ok(Attribute {
|
1055 | /// key: QName(b"href" ),
|
1056 | /// value: Cow::Borrowed(b"style.css" ),
|
1057 | /// }));
|
1058 | /// }
|
1059 | /// ```
|
1060 | #[inline ]
|
1061 | pub fn attributes(&self) -> Attributes {
|
1062 | self.content.attributes()
|
1063 | }
|
1064 | }
|
1065 |
|
1066 | impl<'a> Debug for BytesPI<'a> {
|
1067 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
1068 | write!(f, "BytesPI {{ content: " )?;
|
1069 | write_cow_string(f, &self.content.buf)?;
|
1070 | write!(f, " }}" )
|
1071 | }
|
1072 | }
|
1073 |
|
1074 | impl<'a> Deref for BytesPI<'a> {
|
1075 | type Target = [u8];
|
1076 |
|
1077 | fn deref(&self) -> &[u8] {
|
1078 | &self.content
|
1079 | }
|
1080 | }
|
1081 |
|
1082 | #[cfg (feature = "arbitrary" )]
|
1083 | impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
|
1084 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
1085 | Ok(Self::new(<&str>::arbitrary(u)?))
|
1086 | }
|
1087 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
1088 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
1089 | }
|
1090 | }
|
1091 |
|
1092 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
1093 |
|
1094 | /// An XML declaration (`Event::Decl`).
|
1095 | ///
|
1096 | /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
|
1097 | ///
|
1098 | /// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
|
1099 | /// returns the content of this event between `<?` and `?>`.
|
1100 | ///
|
1101 | /// Note, that inner text will not contain `?>` sequence inside:
|
1102 | ///
|
1103 | /// ```
|
1104 | /// # use quick_xml::events::{BytesDecl, BytesStart, Event};
|
1105 | /// # use quick_xml::reader::Reader;
|
1106 | /// # use pretty_assertions::assert_eq;
|
1107 | /// let mut reader = Reader::from_str("<?xml version = '1.0' ?>" );
|
1108 | /// let content = "xml version = '1.0' " ;
|
1109 | /// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
|
1110 | ///
|
1111 | /// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
|
1112 | /// // deref coercion of &BytesDecl to &[u8]
|
1113 | /// assert_eq!(&event as &[u8], content.as_bytes());
|
1114 | /// // AsRef<[u8]> for &T + deref coercion
|
1115 | /// assert_eq!(event.as_ref(), content.as_bytes());
|
1116 | /// ```
|
1117 | #[derive (Clone, Debug, Eq, PartialEq)]
|
1118 | pub struct BytesDecl<'a> {
|
1119 | content: BytesStart<'a>,
|
1120 | }
|
1121 |
|
1122 | impl<'a> BytesDecl<'a> {
|
1123 | /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
|
1124 | /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
|
1125 | /// attribute.
|
1126 | ///
|
1127 | /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
|
1128 | /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
|
1129 | /// the double quote character is not allowed in any of the attribute values.
|
1130 | pub fn new(
|
1131 | version: &str,
|
1132 | encoding: Option<&str>,
|
1133 | standalone: Option<&str>,
|
1134 | ) -> BytesDecl<'static> {
|
1135 | // Compute length of the buffer based on supplied attributes
|
1136 | // ' encoding=""' => 12
|
1137 | let encoding_attr_len = if let Some(xs) = encoding {
|
1138 | 12 + xs.len()
|
1139 | } else {
|
1140 | 0
|
1141 | };
|
1142 | // ' standalone=""' => 14
|
1143 | let standalone_attr_len = if let Some(xs) = standalone {
|
1144 | 14 + xs.len()
|
1145 | } else {
|
1146 | 0
|
1147 | };
|
1148 | // 'xml version=""' => 14
|
1149 | let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
|
1150 |
|
1151 | buf.push_str("xml version= \"" );
|
1152 | buf.push_str(version);
|
1153 |
|
1154 | if let Some(encoding_val) = encoding {
|
1155 | buf.push_str(" \" encoding= \"" );
|
1156 | buf.push_str(encoding_val);
|
1157 | }
|
1158 |
|
1159 | if let Some(standalone_val) = standalone {
|
1160 | buf.push_str(" \" standalone= \"" );
|
1161 | buf.push_str(standalone_val);
|
1162 | }
|
1163 | buf.push('"' );
|
1164 |
|
1165 | BytesDecl {
|
1166 | content: BytesStart::from_content(buf, 3),
|
1167 | }
|
1168 | }
|
1169 |
|
1170 | /// Creates a `BytesDecl` from a `BytesStart`
|
1171 | pub const fn from_start(start: BytesStart<'a>) -> Self {
|
1172 | Self { content: start }
|
1173 | }
|
1174 |
|
1175 | /// Gets xml version, excluding quotes (`'` or `"`).
|
1176 | ///
|
1177 | /// According to the [grammar], the version *must* be the first thing in the declaration.
|
1178 | /// This method tries to extract the first thing in the declaration and return it.
|
1179 | /// In case of multiple attributes value of the first one is returned.
|
1180 | ///
|
1181 | /// If version is missed in the declaration, or the first thing is not a version,
|
1182 | /// [`IllFormedError::MissingDeclVersion`] will be returned.
|
1183 | ///
|
1184 | /// # Examples
|
1185 | ///
|
1186 | /// ```
|
1187 | /// use quick_xml::errors::{Error, IllFormedError};
|
1188 | /// use quick_xml::events::{BytesDecl, BytesStart};
|
1189 | ///
|
1190 | /// // <?xml version='1.1'?>
|
1191 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'" , 0));
|
1192 | /// assert_eq!(decl.version().unwrap(), b"1.1" .as_ref());
|
1193 | ///
|
1194 | /// // <?xml version='1.0' version='1.1'?>
|
1195 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'" , 0));
|
1196 | /// assert_eq!(decl.version().unwrap(), b"1.0" .as_ref());
|
1197 | ///
|
1198 | /// // <?xml encoding='utf-8'?>
|
1199 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'" , 0));
|
1200 | /// match decl.version() {
|
1201 | /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding" ),
|
1202 | /// _ => assert!(false),
|
1203 | /// }
|
1204 | ///
|
1205 | /// // <?xml encoding='utf-8' version='1.1'?>
|
1206 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'" , 0));
|
1207 | /// match decl.version() {
|
1208 | /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding" ),
|
1209 | /// _ => assert!(false),
|
1210 | /// }
|
1211 | ///
|
1212 | /// // <?xml?>
|
1213 | /// let decl = BytesDecl::from_start(BytesStart::from_content("" , 0));
|
1214 | /// match decl.version() {
|
1215 | /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
|
1216 | /// _ => assert!(false),
|
1217 | /// }
|
1218 | /// ```
|
1219 | ///
|
1220 | /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
|
1221 | pub fn version(&self) -> Result<Cow<[u8]>, Error> {
|
1222 | // The version *must* be the first thing in the declaration.
|
1223 | match self.content.attributes().with_checks(false).next() {
|
1224 | Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
|
1225 | // first attribute was not "version"
|
1226 | Some(Ok(a)) => {
|
1227 | let found = from_utf8(a.key.as_ref())
|
1228 | .map_err(|_| IllFormedError::MissingDeclVersion(None))?
|
1229 | .to_string();
|
1230 | Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
|
1231 | found,
|
1232 | ))))
|
1233 | }
|
1234 | // error parsing attributes
|
1235 | Some(Err(e)) => Err(e.into()),
|
1236 | // no attributes
|
1237 | None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
|
1238 | }
|
1239 | }
|
1240 |
|
1241 | /// Gets xml encoding, excluding quotes (`'` or `"`).
|
1242 | ///
|
1243 | /// Although according to the [grammar] encoding must appear before `"standalone"`
|
1244 | /// and after `"version"`, this method does not check that. The first occurrence
|
1245 | /// of the attribute will be returned even if there are several. Also, method does
|
1246 | /// not restrict symbols that can forming the encoding, so the returned encoding
|
1247 | /// name may not correspond to the grammar.
|
1248 | ///
|
1249 | /// # Examples
|
1250 | ///
|
1251 | /// ```
|
1252 | /// use std::borrow::Cow;
|
1253 | /// use quick_xml::Error;
|
1254 | /// use quick_xml::events::{BytesDecl, BytesStart};
|
1255 | ///
|
1256 | /// // <?xml version='1.1'?>
|
1257 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'" , 0));
|
1258 | /// assert!(decl.encoding().is_none());
|
1259 | ///
|
1260 | /// // <?xml encoding='utf-8'?>
|
1261 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'" , 0));
|
1262 | /// match decl.encoding() {
|
1263 | /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8" ),
|
1264 | /// _ => assert!(false),
|
1265 | /// }
|
1266 | ///
|
1267 | /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
|
1268 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'" , 0));
|
1269 | /// match decl.encoding() {
|
1270 | /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG" ),
|
1271 | /// _ => assert!(false),
|
1272 | /// }
|
1273 | /// ```
|
1274 | ///
|
1275 | /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
|
1276 | pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
|
1277 | self.content
|
1278 | .try_get_attribute("encoding" )
|
1279 | .map(|a| a.map(|a| a.value))
|
1280 | .transpose()
|
1281 | }
|
1282 |
|
1283 | /// Gets xml standalone, excluding quotes (`'` or `"`).
|
1284 | ///
|
1285 | /// Although according to the [grammar] standalone flag must appear after `"version"`
|
1286 | /// and `"encoding"`, this method does not check that. The first occurrence of the
|
1287 | /// attribute will be returned even if there are several. Also, method does not
|
1288 | /// restrict symbols that can forming the value, so the returned flag name may not
|
1289 | /// correspond to the grammar.
|
1290 | ///
|
1291 | /// # Examples
|
1292 | ///
|
1293 | /// ```
|
1294 | /// use std::borrow::Cow;
|
1295 | /// use quick_xml::Error;
|
1296 | /// use quick_xml::events::{BytesDecl, BytesStart};
|
1297 | ///
|
1298 | /// // <?xml version='1.1'?>
|
1299 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'" , 0));
|
1300 | /// assert!(decl.standalone().is_none());
|
1301 | ///
|
1302 | /// // <?xml standalone='yes'?>
|
1303 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'" , 0));
|
1304 | /// match decl.standalone() {
|
1305 | /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes" ),
|
1306 | /// _ => assert!(false),
|
1307 | /// }
|
1308 | ///
|
1309 | /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
|
1310 | /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'" , 0));
|
1311 | /// match decl.standalone() {
|
1312 | /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG" ),
|
1313 | /// _ => assert!(false),
|
1314 | /// }
|
1315 | /// ```
|
1316 | ///
|
1317 | /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
|
1318 | pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
|
1319 | self.content
|
1320 | .try_get_attribute("standalone" )
|
1321 | .map(|a| a.map(|a| a.value))
|
1322 | .transpose()
|
1323 | }
|
1324 |
|
1325 | /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
|
1326 | /// algorithm.
|
1327 | ///
|
1328 | /// If encoding in not known, or `encoding` key was not found, returns `None`.
|
1329 | /// In case of duplicated `encoding` key, encoding, corresponding to the first
|
1330 | /// one, is returned.
|
1331 | #[cfg (feature = "encoding" )]
|
1332 | pub fn encoder(&self) -> Option<&'static Encoding> {
|
1333 | self.encoding()
|
1334 | .and_then(|e| e.ok())
|
1335 | .and_then(|e| Encoding::for_label(&e))
|
1336 | }
|
1337 |
|
1338 | /// Converts the event into an owned event.
|
1339 | pub fn into_owned(self) -> BytesDecl<'static> {
|
1340 | BytesDecl {
|
1341 | content: self.content.into_owned(),
|
1342 | }
|
1343 | }
|
1344 |
|
1345 | /// Converts the event into a borrowed event.
|
1346 | #[inline ]
|
1347 | pub fn borrow(&self) -> BytesDecl {
|
1348 | BytesDecl {
|
1349 | content: self.content.borrow(),
|
1350 | }
|
1351 | }
|
1352 | }
|
1353 |
|
1354 | impl<'a> Deref for BytesDecl<'a> {
|
1355 | type Target = [u8];
|
1356 |
|
1357 | fn deref(&self) -> &[u8] {
|
1358 | &self.content
|
1359 | }
|
1360 | }
|
1361 |
|
1362 | #[cfg (feature = "arbitrary" )]
|
1363 | impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
|
1364 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
|
1365 | Ok(Self::new(
|
1366 | <&str>::arbitrary(u)?,
|
1367 | Option::<&str>::arbitrary(u)?,
|
1368 | Option::<&str>::arbitrary(u)?,
|
1369 | ))
|
1370 | }
|
1371 |
|
1372 | fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
1373 | return <&str as arbitrary::Arbitrary>::size_hint(depth);
|
1374 | }
|
1375 | }
|
1376 |
|
1377 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
1378 |
|
1379 | /// Event emitted by [`Reader::read_event_into`].
|
1380 | ///
|
1381 | /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
|
1382 | #[derive (Clone, Debug, Eq, PartialEq)]
|
1383 | #[cfg_attr (feature = "arbitrary" , derive(arbitrary::Arbitrary))]
|
1384 | pub enum Event<'a> {
|
1385 | /// Start tag (with attributes) `<tag attr="value">`.
|
1386 | Start(BytesStart<'a>),
|
1387 | /// End tag `</tag>`.
|
1388 | End(BytesEnd<'a>),
|
1389 | /// Empty element tag (with attributes) `<tag attr="value" />`.
|
1390 | Empty(BytesStart<'a>),
|
1391 | /// Escaped character data between tags.
|
1392 | Text(BytesText<'a>),
|
1393 | /// Unescaped character data stored in `<![CDATA[...]]>`.
|
1394 | CData(BytesCData<'a>),
|
1395 | /// Comment `<!-- ... -->`.
|
1396 | Comment(BytesText<'a>),
|
1397 | /// XML declaration `<?xml ...?>`.
|
1398 | Decl(BytesDecl<'a>),
|
1399 | /// Processing instruction `<?...?>`.
|
1400 | PI(BytesPI<'a>),
|
1401 | /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
|
1402 | DocType(BytesText<'a>),
|
1403 | /// End of XML document.
|
1404 | Eof,
|
1405 | }
|
1406 |
|
1407 | impl<'a> Event<'a> {
|
1408 | /// Converts the event to an owned version, untied to the lifetime of
|
1409 | /// buffer used when reading but incurring a new, separate allocation.
|
1410 | pub fn into_owned(self) -> Event<'static> {
|
1411 | match self {
|
1412 | Event::Start(e) => Event::Start(e.into_owned()),
|
1413 | Event::End(e) => Event::End(e.into_owned()),
|
1414 | Event::Empty(e) => Event::Empty(e.into_owned()),
|
1415 | Event::Text(e) => Event::Text(e.into_owned()),
|
1416 | Event::Comment(e) => Event::Comment(e.into_owned()),
|
1417 | Event::CData(e) => Event::CData(e.into_owned()),
|
1418 | Event::Decl(e) => Event::Decl(e.into_owned()),
|
1419 | Event::PI(e) => Event::PI(e.into_owned()),
|
1420 | Event::DocType(e) => Event::DocType(e.into_owned()),
|
1421 | Event::Eof => Event::Eof,
|
1422 | }
|
1423 | }
|
1424 |
|
1425 | /// Converts the event into a borrowed event.
|
1426 | #[inline ]
|
1427 | pub fn borrow(&self) -> Event {
|
1428 | match self {
|
1429 | Event::Start(e) => Event::Start(e.borrow()),
|
1430 | Event::End(e) => Event::End(e.borrow()),
|
1431 | Event::Empty(e) => Event::Empty(e.borrow()),
|
1432 | Event::Text(e) => Event::Text(e.borrow()),
|
1433 | Event::Comment(e) => Event::Comment(e.borrow()),
|
1434 | Event::CData(e) => Event::CData(e.borrow()),
|
1435 | Event::Decl(e) => Event::Decl(e.borrow()),
|
1436 | Event::PI(e) => Event::PI(e.borrow()),
|
1437 | Event::DocType(e) => Event::DocType(e.borrow()),
|
1438 | Event::Eof => Event::Eof,
|
1439 | }
|
1440 | }
|
1441 | }
|
1442 |
|
1443 | impl<'a> Deref for Event<'a> {
|
1444 | type Target = [u8];
|
1445 |
|
1446 | fn deref(&self) -> &[u8] {
|
1447 | match *self {
|
1448 | Event::Start(ref e: &BytesStart<'_>) | Event::Empty(ref e: &BytesStart<'_>) => e,
|
1449 | Event::End(ref e: &BytesEnd<'_>) => e,
|
1450 | Event::Text(ref e: &BytesText<'_>) => e,
|
1451 | Event::Decl(ref e: &BytesDecl<'_>) => e,
|
1452 | Event::PI(ref e: &BytesPI<'_>) => e,
|
1453 | Event::CData(ref e: &BytesCData<'_>) => e,
|
1454 | Event::Comment(ref e: &BytesText<'_>) => e,
|
1455 | Event::DocType(ref e: &BytesText<'_>) => e,
|
1456 | Event::Eof => &[],
|
1457 | }
|
1458 | }
|
1459 | }
|
1460 |
|
1461 | impl<'a> AsRef<Event<'a>> for Event<'a> {
|
1462 | fn as_ref(&self) -> &Event<'a> {
|
1463 | self
|
1464 | }
|
1465 | }
|
1466 |
|
1467 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
1468 |
|
1469 | #[inline ]
|
1470 | fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
|
1471 | match content.into() {
|
1472 | Cow::Borrowed(s: &str) => Cow::Borrowed(s.as_bytes()),
|
1473 | Cow::Owned(s: String) => Cow::Owned(s.into_bytes()),
|
1474 | }
|
1475 | }
|
1476 |
|
1477 | fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
|
1478 | where
|
1479 | F: FnOnce(&[u8]) -> &[u8],
|
1480 | {
|
1481 | match value {
|
1482 | Cow::Borrowed(bytes: &[u8]) => Cow::Borrowed(trim(bytes)),
|
1483 | Cow::Owned(mut bytes: Vec) => {
|
1484 | let trimmed: &[u8] = trim(&bytes);
|
1485 | if trimmed.len() != bytes.len() {
|
1486 | bytes = trimmed.to_vec();
|
1487 | }
|
1488 | Cow::Owned(bytes)
|
1489 | }
|
1490 | }
|
1491 | }
|
1492 |
|
1493 | #[cfg (test)]
|
1494 | mod test {
|
1495 | use super::*;
|
1496 | use pretty_assertions::assert_eq;
|
1497 |
|
1498 | #[test ]
|
1499 | fn bytestart_create() {
|
1500 | let b = BytesStart::new("test" );
|
1501 | assert_eq!(b.len(), 4);
|
1502 | assert_eq!(b.name(), QName(b"test" ));
|
1503 | }
|
1504 |
|
1505 | #[test ]
|
1506 | fn bytestart_set_name() {
|
1507 | let mut b = BytesStart::new("test" );
|
1508 | assert_eq!(b.len(), 4);
|
1509 | assert_eq!(b.name(), QName(b"test" ));
|
1510 | assert_eq!(b.attributes_raw(), b"" );
|
1511 | b.push_attribute(("x" , "a" ));
|
1512 | assert_eq!(b.len(), 10);
|
1513 | assert_eq!(b.attributes_raw(), b" x= \"a \"" );
|
1514 | b.set_name(b"g" );
|
1515 | assert_eq!(b.len(), 7);
|
1516 | assert_eq!(b.name(), QName(b"g" ));
|
1517 | }
|
1518 |
|
1519 | #[test ]
|
1520 | fn bytestart_clear_attributes() {
|
1521 | let mut b = BytesStart::new("test" );
|
1522 | b.push_attribute(("x" , "y \"z" ));
|
1523 | b.push_attribute(("x" , "y \"z" ));
|
1524 | b.clear_attributes();
|
1525 | assert!(b.attributes().next().is_none());
|
1526 | assert_eq!(b.len(), 4);
|
1527 | assert_eq!(b.name(), QName(b"test" ));
|
1528 | }
|
1529 | }
|
1530 | |