1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::ops::Deref;
11use std::path::Path;
12
13use crate::errors::Result;
14use crate::events::Event;
15use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
16use crate::reader::{Reader, Span, XmlSource};
17
18/// A low level encoding-agnostic XML event reader that performs namespace resolution.
19///
20/// Consumes a [`BufRead`] and streams XML `Event`s.
21pub struct NsReader<R> {
22 /// An XML reader
23 pub(super) reader: Reader<R>,
24 /// A buffer to manage namespaces
25 ns_resolver: NamespaceResolver,
26 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
27 /// event will be processed by the user, so we only mark that we should that
28 /// in the next [`Self::read_event_impl()`] call.
29 pending_pop: bool,
30}
31
32/// Builder methods
33impl<R> NsReader<R> {
34 /// Creates a `NsReader` that reads from a reader.
35 #[inline]
36 pub fn from_reader(reader: R) -> Self {
37 Self::new(Reader::from_reader(reader))
38 }
39
40 configure_methods!(reader);
41}
42
43/// Private methods
44impl<R> NsReader<R> {
45 #[inline]
46 fn new(reader: Reader<R>) -> Self {
47 Self {
48 reader,
49 ns_resolver: NamespaceResolver::default(),
50 pending_pop: false,
51 }
52 }
53
54 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
55 where
56 R: XmlSource<'i, B>,
57 {
58 self.pop();
59 let event = self.reader.read_event_impl(buf);
60 self.process_event(event)
61 }
62
63 pub(super) fn pop(&mut self) {
64 if self.pending_pop {
65 self.ns_resolver.pop();
66 self.pending_pop = false;
67 }
68 }
69
70 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
71 match event {
72 Ok(Event::Start(e)) => {
73 self.ns_resolver.push(&e)?;
74 Ok(Event::Start(e))
75 }
76 Ok(Event::Empty(e)) => {
77 self.ns_resolver.push(&e)?;
78 // notify next `read_event_impl()` invocation that it needs to pop this
79 // namespace scope
80 self.pending_pop = true;
81 Ok(Event::Empty(e))
82 }
83 Ok(Event::End(e)) => {
84 // notify next `read_event_impl()` invocation that it needs to pop this
85 // namespace scope
86 self.pending_pop = true;
87 Ok(Event::End(e))
88 }
89 e => e,
90 }
91 }
92
93 pub(super) fn resolve_event<'i>(
94 &mut self,
95 event: Result<Event<'i>>,
96 ) -> Result<(ResolveResult, Event<'i>)> {
97 match event {
98 Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))),
99 Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))),
100 Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))),
101 Ok(e) => Ok((ResolveResult::Unbound, e)),
102 Err(e) => Err(e),
103 }
104 }
105}
106
107/// Getters
108impl<R> NsReader<R> {
109 /// Consumes `NsReader` returning the underlying reader
110 ///
111 /// See the [`Reader::into_inner`] for examples
112 #[inline]
113 pub fn into_inner(self) -> R {
114 self.reader.into_inner()
115 }
116
117 /// Gets a mutable reference to the underlying reader.
118 pub fn get_mut(&mut self) -> &mut R {
119 self.reader.get_mut()
120 }
121
122 /// Resolves a potentially qualified **element name** or **attribute name**
123 /// into _(namespace name, local name)_.
124 ///
125 /// _Qualified_ names have the form `prefix:local-name` where the `prefix`
126 /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
127 /// The namespace prefix can be defined on the same element as the name in question.
128 ///
129 /// The method returns following results depending on the `name` shape,
130 /// `attribute` flag and the presence of the default namespace:
131 ///
132 /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
133 /// |---------|-------------|-------------------|-----------------------|------------
134 /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name`
135 /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name`
136 /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
137 /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
138 /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name`
139 /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
140 ///
141 /// If you want to clearly indicate that name that you resolve is an element
142 /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
143 /// methods.
144 ///
145 /// # Lifetimes
146 ///
147 /// - `'n`: lifetime of a name. Returned local name will be bound to the same
148 /// lifetime as the name in question.
149 /// - returned namespace name will be bound to the reader itself
150 ///
151 /// [`Bound`]: ResolveResult::Bound
152 /// [`Unbound`]: ResolveResult::Unbound
153 /// [`Unknown`]: ResolveResult::Unknown
154 /// [`resolve_attribute()`]: Self::resolve_attribute()
155 /// [`resolve_element()`]: Self::resolve_element()
156 #[inline]
157 pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) {
158 self.ns_resolver.resolve(name, !attribute)
159 }
160
161 /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
162 ///
163 /// _Qualified_ element names have the form `prefix:local-name` where the
164 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
165 /// The namespace prefix can be defined on the same element as the element
166 /// in question.
167 ///
168 /// _Unqualified_ elements inherits the current _default namespace_.
169 ///
170 /// The method returns following results depending on the `name` shape and
171 /// the presence of the default namespace:
172 ///
173 /// |`xmlns="..."`|QName |ResolveResult |LocalName
174 /// |-------------|-------------------|-----------------------|------------
175 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
176 /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
177 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
178 ///
179 /// # Lifetimes
180 ///
181 /// - `'n`: lifetime of an element name. Returned local name will be bound
182 /// to the same lifetime as the name in question.
183 /// - returned namespace name will be bound to the reader itself
184 ///
185 /// # Examples
186 ///
187 /// This example shows how you can resolve qualified name into a namespace.
188 /// Note, that in the code like this you do not need to do that manually,
189 /// because the namespace resolution result returned by the [`read_resolved_event()`].
190 ///
191 /// ```
192 /// # use pretty_assertions::assert_eq;
193 /// use quick_xml::events::Event;
194 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
195 /// use quick_xml::reader::NsReader;
196 ///
197 /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>");
198 ///
199 /// match reader.read_event().unwrap() {
200 /// Event::Empty(e) => assert_eq!(
201 /// reader.resolve_element(e.name()),
202 /// (Bound(Namespace(b"root namespace")), QName(b"tag").into())
203 /// ),
204 /// _ => unreachable!(),
205 /// }
206 /// ```
207 ///
208 /// [`Bound`]: ResolveResult::Bound
209 /// [`Unbound`]: ResolveResult::Unbound
210 /// [`Unknown`]: ResolveResult::Unknown
211 /// [`read_resolved_event()`]: Self::read_resolved_event
212 #[inline]
213 pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
214 self.ns_resolver.resolve(name, true)
215 }
216
217 /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
218 ///
219 /// _Qualified_ attribute names have the form `prefix:local-name` where the
220 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
221 /// The namespace prefix can be defined on the same element as the attribute
222 /// in question.
223 ///
224 /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
225 ///
226 /// The method returns following results depending on the `name` shape and
227 /// the presence of the default namespace:
228 ///
229 /// |`xmlns="..."`|QName |ResolveResult |LocalName
230 /// |-------------|-------------------|-----------------------|------------
231 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
232 /// |Defined |`local-name` |[`Unbound`] |`local-name`
233 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
234 ///
235 /// # Lifetimes
236 ///
237 /// - `'n`: lifetime of an attribute name. Returned local name will be bound
238 /// to the same lifetime as the name in question.
239 /// - returned namespace name will be bound to the reader itself
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// # use pretty_assertions::assert_eq;
245 /// use quick_xml::events::Event;
246 /// use quick_xml::events::attributes::Attribute;
247 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
248 /// use quick_xml::reader::NsReader;
249 ///
250 /// let mut reader = NsReader::from_str("
251 /// <tag one='1'
252 /// p:two='2'
253 /// xmlns='root namespace'
254 /// xmlns:p='other namespace'/>
255 /// ");
256 /// reader.trim_text(true);
257 ///
258 /// match reader.read_event().unwrap() {
259 /// Event::Empty(e) => {
260 /// let mut iter = e.attributes();
261 ///
262 /// // Unlike elements, attributes without explicit namespace
263 /// // not bound to any namespace
264 /// let one = iter.next().unwrap().unwrap();
265 /// assert_eq!(
266 /// reader.resolve_attribute(one.key),
267 /// (Unbound, QName(b"one").into())
268 /// );
269 ///
270 /// let two = iter.next().unwrap().unwrap();
271 /// assert_eq!(
272 /// reader.resolve_attribute(two.key),
273 /// (Bound(Namespace(b"other namespace")), QName(b"two").into())
274 /// );
275 /// }
276 /// _ => unreachable!(),
277 /// }
278 /// ```
279 ///
280 /// [`Bound`]: ResolveResult::Bound
281 /// [`Unbound`]: ResolveResult::Unbound
282 /// [`Unknown`]: ResolveResult::Unknown
283 #[inline]
284 pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
285 self.ns_resolver.resolve(name, false)
286 }
287}
288
289impl<R: BufRead> NsReader<R> {
290 /// Reads the next event into given buffer.
291 ///
292 /// This method manages namespaces but doesn't resolve them automatically.
293 /// You should call [`resolve_element()`] if you want to get a namespace.
294 ///
295 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
296 /// namespace as soon as you get an event.
297 ///
298 /// # Examples
299 ///
300 /// ```
301 /// # use pretty_assertions::assert_eq;
302 /// use quick_xml::events::Event;
303 /// use quick_xml::name::{Namespace, ResolveResult::*};
304 /// use quick_xml::reader::NsReader;
305 ///
306 /// let mut reader = NsReader::from_str(r#"
307 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
308 /// <y:tag2><!--Test comment-->Test</y:tag2>
309 /// <y:tag2>Test 2</y:tag2>
310 /// </x:tag1>
311 /// "#);
312 /// reader.trim_text(true);
313 ///
314 /// let mut count = 0;
315 /// let mut buf = Vec::new();
316 /// let mut txt = Vec::new();
317 /// loop {
318 /// match reader.read_event_into(&mut buf).unwrap() {
319 /// Event::Start(e) => {
320 /// count += 1;
321 /// let (ns, local) = reader.resolve_element(e.name());
322 /// match local.as_ref() {
323 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
324 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
325 /// _ => unreachable!(),
326 /// }
327 /// }
328 /// Event::Text(e) => {
329 /// txt.push(e.unescape().unwrap().into_owned())
330 /// }
331 /// Event::Eof => break,
332 /// _ => (),
333 /// }
334 /// buf.clear();
335 /// }
336 /// assert_eq!(count, 3);
337 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
338 /// ```
339 ///
340 /// [`resolve_element()`]: Self::resolve_element
341 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
342 #[inline]
343 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
344 self.read_event_impl(buf)
345 }
346
347 /// Reads the next event into given buffer and resolves its namespace (if applicable).
348 ///
349 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
350 /// For all other events the concept of namespace is not defined, so
351 /// a [`ResolveResult::Unbound`] is returned.
352 ///
353 /// If you are not interested in namespaces, you can use [`read_event_into()`]
354 /// which will not automatically resolve namespaces for you.
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// # use pretty_assertions::assert_eq;
360 /// use quick_xml::events::Event;
361 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
362 /// use quick_xml::reader::NsReader;
363 ///
364 /// let mut reader = NsReader::from_str(r#"
365 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
366 /// <y:tag2><!--Test comment-->Test</y:tag2>
367 /// <y:tag2>Test 2</y:tag2>
368 /// </x:tag1>
369 /// "#);
370 /// reader.trim_text(true);
371 ///
372 /// let mut count = 0;
373 /// let mut buf = Vec::new();
374 /// let mut txt = Vec::new();
375 /// loop {
376 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
377 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
378 /// count += 1;
379 /// assert_eq!(e.local_name(), QName(b"tag1").into());
380 /// }
381 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
382 /// count += 1;
383 /// assert_eq!(e.local_name(), QName(b"tag2").into());
384 /// }
385 /// (_, Event::Start(_)) => unreachable!(),
386 ///
387 /// (_, Event::Text(e)) => {
388 /// txt.push(e.unescape().unwrap().into_owned())
389 /// }
390 /// (_, Event::Eof) => break,
391 /// _ => (),
392 /// }
393 /// buf.clear();
394 /// }
395 /// assert_eq!(count, 3);
396 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
397 /// ```
398 ///
399 /// [`Start`]: Event::Start
400 /// [`Empty`]: Event::Empty
401 /// [`End`]: Event::End
402 /// [`read_event_into()`]: Self::read_event_into
403 #[inline]
404 pub fn read_resolved_event_into<'b>(
405 &mut self,
406 buf: &'b mut Vec<u8>,
407 ) -> Result<(ResolveResult, Event<'b>)> {
408 let event = self.read_event_impl(buf);
409 self.resolve_event(event)
410 }
411
412 /// Reads until end element is found using provided buffer as intermediate
413 /// storage for events content. This function is supposed to be called after
414 /// you already read a [`Start`] event.
415 ///
416 /// Returns a span that cover content between `>` of an opening tag and `<` of
417 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
418 /// this method was called after reading expanded [`Start`] event.
419 ///
420 /// Manages nested cases where parent and child elements have the _literally_
421 /// same name.
422 ///
423 /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
424 /// will be returned. In particularly, that error will be returned if you call
425 /// this method without consuming the corresponding [`Start`] event first.
426 ///
427 /// If your reader created from a string slice or byte array slice, it is
428 /// better to use [`read_to_end()`] method, because it will not copy bytes
429 /// into intermediate buffer.
430 ///
431 /// The provided `buf` buffer will be filled only by one event content at time.
432 /// Before reading of each event the buffer will be cleared. If you know an
433 /// appropriate size of each event, you can preallocate the buffer to reduce
434 /// number of reallocations.
435 ///
436 /// The `end` parameter should contain name of the end element _in the reader
437 /// encoding_. It is good practice to always get that parameter using
438 /// [`BytesStart::to_end()`] method.
439 ///
440 /// # Namespaces
441 ///
442 /// While the `NsReader` does namespace resolution, namespaces does not
443 /// change the algorithm for comparing names. Although the names `a:name`
444 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
445 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
446 /// according to [the specification]
447 ///
448 /// > The end of every element that begins with a **start-tag** MUST be marked
449 /// > by an **end-tag** containing a name that echoes the element's type as
450 /// > given in the **start-tag**
451 ///
452 /// # Examples
453 ///
454 /// This example shows, how you can skip XML content after you read the
455 /// start event.
456 ///
457 /// ```
458 /// # use pretty_assertions::assert_eq;
459 /// use quick_xml::events::{BytesStart, Event};
460 /// use quick_xml::name::{Namespace, ResolveResult};
461 /// use quick_xml::reader::NsReader;
462 ///
463 /// let mut reader = NsReader::from_str(r#"
464 /// <outer xmlns="namespace 1">
465 /// <inner xmlns="namespace 2">
466 /// <outer></outer>
467 /// </inner>
468 /// <inner>
469 /// <inner></inner>
470 /// <inner/>
471 /// <outer></outer>
472 /// <p:outer xmlns:p="ns"></p:outer>
473 /// <outer/>
474 /// </inner>
475 /// </outer>
476 /// "#);
477 /// reader.trim_text(true);
478 /// let mut buf = Vec::new();
479 ///
480 /// let ns = Namespace(b"namespace 1");
481 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
482 /// let end = start.to_end().into_owned();
483 ///
484 /// // First, we read a start event...
485 /// assert_eq!(
486 /// reader.read_resolved_event_into(&mut buf).unwrap(),
487 /// (ResolveResult::Bound(ns), Event::Start(start))
488 /// );
489 ///
490 /// // ...then, we could skip all events to the corresponding end event.
491 /// // This call will correctly handle nested <outer> elements.
492 /// // Note, however, that this method does not handle namespaces.
493 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
494 ///
495 /// // At the end we should get an Eof event, because we ate the whole XML
496 /// assert_eq!(
497 /// reader.read_resolved_event_into(&mut buf).unwrap(),
498 /// (ResolveResult::Unbound, Event::Eof)
499 /// );
500 /// ```
501 ///
502 /// [`Start`]: Event::Start
503 /// [`End`]: Event::End
504 /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
505 /// [`read_to_end()`]: Self::read_to_end
506 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
507 /// [`expand_empty_elements`]: Self::expand_empty_elements
508 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
509 #[inline]
510 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
511 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
512 // match literally the start name. See `Self::check_end_names` documentation
513 self.reader.read_to_end_into(end, buf)
514 }
515}
516
517impl NsReader<BufReader<File>> {
518 /// Creates an XML reader from a file path.
519 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
520 Ok(Self::new(reader:Reader::from_file(path)?))
521 }
522}
523
524impl<'i> NsReader<&'i [u8]> {
525 /// Creates an XML reader from a string slice.
526 #[inline]
527 #[allow(clippy::should_implement_trait)]
528 pub fn from_str(s: &'i str) -> Self {
529 Self::new(Reader::from_str(s))
530 }
531
532 /// Reads the next event, borrow its content from the input buffer.
533 ///
534 /// This method manages namespaces but doesn't resolve them automatically.
535 /// You should call [`resolve_element()`] if you want to get a namespace.
536 ///
537 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
538 /// as soon as you get an event.
539 ///
540 /// There is no asynchronous `read_event_async()` version of this function,
541 /// because it is not necessary -- the contents are already in memory and no IO
542 /// is needed, therefore there is no potential for blocking.
543 ///
544 /// # Examples
545 ///
546 /// ```
547 /// # use pretty_assertions::assert_eq;
548 /// use quick_xml::events::Event;
549 /// use quick_xml::name::{Namespace, ResolveResult::*};
550 /// use quick_xml::reader::NsReader;
551 ///
552 /// let mut reader = NsReader::from_str(r#"
553 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
554 /// <y:tag2><!--Test comment-->Test</y:tag2>
555 /// <y:tag2>Test 2</y:tag2>
556 /// </x:tag1>
557 /// "#);
558 /// reader.trim_text(true);
559 ///
560 /// let mut count = 0;
561 /// let mut txt = Vec::new();
562 /// loop {
563 /// match reader.read_event().unwrap() {
564 /// Event::Start(e) => {
565 /// count += 1;
566 /// let (ns, local) = reader.resolve_element(e.name());
567 /// match local.as_ref() {
568 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
569 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
570 /// _ => unreachable!(),
571 /// }
572 /// }
573 /// Event::Text(e) => {
574 /// txt.push(e.unescape().unwrap().into_owned())
575 /// }
576 /// Event::Eof => break,
577 /// _ => (),
578 /// }
579 /// }
580 /// assert_eq!(count, 3);
581 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
582 /// ```
583 ///
584 /// [`resolve_element()`]: Self::resolve_element
585 /// [`read_resolved_event()`]: Self::read_resolved_event
586 #[inline]
587 pub fn read_event(&mut self) -> Result<Event<'i>> {
588 self.read_event_impl(())
589 }
590
591 /// Reads the next event, borrow its content from the input buffer, and resolves
592 /// its namespace (if applicable).
593 ///
594 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
595 /// For all other events the concept of namespace is not defined, so
596 /// a [`ResolveResult::Unbound`] is returned.
597 ///
598 /// If you are not interested in namespaces, you can use [`read_event()`]
599 /// which will not automatically resolve namespaces for you.
600 ///
601 /// There is no asynchronous `read_resolved_event_async()` version of this function,
602 /// because it is not necessary -- the contents are already in memory and no IO
603 /// is needed, therefore there is no potential for blocking.
604 ///
605 /// # Examples
606 ///
607 /// ```
608 /// # use pretty_assertions::assert_eq;
609 /// use quick_xml::events::Event;
610 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
611 /// use quick_xml::reader::NsReader;
612 ///
613 /// let mut reader = NsReader::from_str(r#"
614 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
615 /// <y:tag2><!--Test comment-->Test</y:tag2>
616 /// <y:tag2>Test 2</y:tag2>
617 /// </x:tag1>
618 /// "#);
619 /// reader.trim_text(true);
620 ///
621 /// let mut count = 0;
622 /// let mut txt = Vec::new();
623 /// loop {
624 /// match reader.read_resolved_event().unwrap() {
625 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
626 /// count += 1;
627 /// assert_eq!(e.local_name(), QName(b"tag1").into());
628 /// }
629 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
630 /// count += 1;
631 /// assert_eq!(e.local_name(), QName(b"tag2").into());
632 /// }
633 /// (_, Event::Start(_)) => unreachable!(),
634 ///
635 /// (_, Event::Text(e)) => {
636 /// txt.push(e.unescape().unwrap().into_owned())
637 /// }
638 /// (_, Event::Eof) => break,
639 /// _ => (),
640 /// }
641 /// }
642 /// assert_eq!(count, 3);
643 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
644 /// ```
645 ///
646 /// [`Start`]: Event::Start
647 /// [`Empty`]: Event::Empty
648 /// [`End`]: Event::End
649 /// [`read_event()`]: Self::read_event
650 #[inline]
651 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> {
652 let event = self.read_event_impl(());
653 self.resolve_event(event)
654 }
655
656 /// Reads until end element is found. This function is supposed to be called
657 /// after you already read a [`Start`] event.
658 ///
659 /// Returns a span that cover content between `>` of an opening tag and `<` of
660 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
661 /// this method was called after reading expanded [`Start`] event.
662 ///
663 /// Manages nested cases where parent and child elements have the _literally_
664 /// same name.
665 ///
666 /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
667 /// will be returned. In particularly, that error will be returned if you call
668 /// this method without consuming the corresponding [`Start`] event first.
669 ///
670 /// The `end` parameter should contain name of the end element _in the reader
671 /// encoding_. It is good practice to always get that parameter using
672 /// [`BytesStart::to_end()`] method.
673 ///
674 /// There is no asynchronous `read_to_end_async()` version of this function,
675 /// because it is not necessary -- the contents are already in memory and no IO
676 /// is needed, therefore there is no potential for blocking.
677 ///
678 /// # Namespaces
679 ///
680 /// While the `NsReader` does namespace resolution, namespaces does not
681 /// change the algorithm for comparing names. Although the names `a:name`
682 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
683 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
684 /// according to [the specification]
685 ///
686 /// > The end of every element that begins with a **start-tag** MUST be marked
687 /// > by an **end-tag** containing a name that echoes the element's type as
688 /// > given in the **start-tag**
689 ///
690 /// # Examples
691 ///
692 /// This example shows, how you can skip XML content after you read the
693 /// start event.
694 ///
695 /// ```
696 /// # use pretty_assertions::assert_eq;
697 /// use quick_xml::events::{BytesStart, Event};
698 /// use quick_xml::name::{Namespace, ResolveResult};
699 /// use quick_xml::reader::NsReader;
700 ///
701 /// let mut reader = NsReader::from_str(r#"
702 /// <outer xmlns="namespace 1">
703 /// <inner xmlns="namespace 2">
704 /// <outer></outer>
705 /// </inner>
706 /// <inner>
707 /// <inner></inner>
708 /// <inner/>
709 /// <outer></outer>
710 /// <p:outer xmlns:p="ns"></p:outer>
711 /// <outer/>
712 /// </inner>
713 /// </outer>
714 /// "#);
715 /// reader.trim_text(true);
716 ///
717 /// let ns = Namespace(b"namespace 1");
718 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
719 /// let end = start.to_end().into_owned();
720 ///
721 /// // First, we read a start event...
722 /// assert_eq!(
723 /// reader.read_resolved_event().unwrap(),
724 /// (ResolveResult::Bound(ns), Event::Start(start))
725 /// );
726 ///
727 /// // ...then, we could skip all events to the corresponding end event.
728 /// // This call will correctly handle nested <outer> elements.
729 /// // Note, however, that this method does not handle namespaces.
730 /// reader.read_to_end(end.name()).unwrap();
731 ///
732 /// // At the end we should get an Eof event, because we ate the whole XML
733 /// assert_eq!(
734 /// reader.read_resolved_event().unwrap(),
735 /// (ResolveResult::Unbound, Event::Eof)
736 /// );
737 /// ```
738 ///
739 /// [`Start`]: Event::Start
740 /// [`End`]: Event::End
741 /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
742 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
743 /// [`expand_empty_elements`]: Self::expand_empty_elements
744 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
745 #[inline]
746 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
747 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
748 // match literally the start name. See `Self::check_end_names` documentation
749 self.reader.read_to_end(end)
750 }
751
752 /// Reads content between start and end tags, including any markup. This
753 /// function is supposed to be called after you already read a [`Start`] event.
754 ///
755 /// Manages nested cases where parent and child elements have the _literally_
756 /// same name.
757 ///
758 /// This method does not unescape read data, instead it returns content
759 /// "as is" of the XML document. This is because it has no idea what text
760 /// it reads, and if, for example, it contains CDATA section, attempt to
761 /// unescape it content will spoil data.
762 ///
763 /// Any text will be decoded using the XML current [`decoder()`].
764 ///
765 /// Actually, this method perform the following code:
766 ///
767 /// ```ignore
768 /// let span = reader.read_to_end(end)?;
769 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
770 /// ```
771 ///
772 /// # Examples
773 ///
774 /// This example shows, how you can read a HTML content from your XML document.
775 ///
776 /// ```
777 /// # use pretty_assertions::assert_eq;
778 /// # use std::borrow::Cow;
779 /// use quick_xml::events::{BytesStart, Event};
780 /// use quick_xml::reader::NsReader;
781 ///
782 /// let mut reader = NsReader::from_str(r#"
783 /// <html>
784 /// <title>This is a HTML text</title>
785 /// <p>Usual XML rules does not apply inside it
786 /// <p>For example, elements not needed to be &quot;closed&quot;
787 /// </html>
788 /// "#);
789 /// reader.trim_text(true);
790 ///
791 /// let start = BytesStart::new("html");
792 /// let end = start.to_end().into_owned();
793 ///
794 /// // First, we read a start event...
795 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
796 /// // ...and disable checking of end names because we expect HTML further...
797 /// reader.check_end_names(false);
798 ///
799 /// // ...then, we could read text content until close tag.
800 /// // This call will correctly handle nested <html> elements.
801 /// let text = reader.read_text(end.name()).unwrap();
802 /// assert_eq!(text, Cow::Borrowed(r#"
803 /// <title>This is a HTML text</title>
804 /// <p>Usual XML rules does not apply inside it
805 /// <p>For example, elements not needed to be &quot;closed&quot;
806 /// "#));
807 ///
808 /// // Now we can enable checks again
809 /// reader.check_end_names(true);
810 ///
811 /// // At the end we should get an Eof event, because we ate the whole XML
812 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
813 /// ```
814 ///
815 /// [`Start`]: Event::Start
816 /// [`decoder()`]: Reader::decoder()
817 #[inline]
818 pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
819 self.reader.read_text(end)
820 }
821}
822
823impl<R> Deref for NsReader<R> {
824 type Target = Reader<R>;
825
826 #[inline]
827 fn deref(&self) -> &Self::Target {
828 &self.reader
829 }
830}
831