1 | //! A reader that manages namespace declarations found in the input and able
|
2 | //! to resolve [qualified names] to [expanded names].
|
3 | //!
|
4 | //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
|
5 | //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
|
6 |
|
7 | use std::borrow::Cow;
|
8 | use std::fs::File;
|
9 | use std::io::{BufRead, BufReader};
|
10 | use std::ops::Deref;
|
11 | use std::path::Path;
|
12 |
|
13 | use crate::errors::Result;
|
14 | use crate::events::Event;
|
15 | use crate::name::{LocalName, NamespaceResolver, PrefixIter, QName, ResolveResult};
|
16 | use crate::reader::{Config, Reader, Span, XmlSource};
|
17 |
|
18 | /// A low level encoding-agnostic XML event reader that performs namespace resolution.
|
19 | ///
|
20 | /// Consumes a [`BufRead`] and streams XML `Event`s.
|
21 | #[derive (Debug, Clone)]
|
22 | pub struct NsReader<R> {
|
23 | /// An XML reader
|
24 | pub(super) reader: Reader<R>,
|
25 | /// A buffer to manage namespaces
|
26 | ns_resolver: NamespaceResolver,
|
27 | /// We cannot pop data from the namespace stack until returned `Empty` or `End`
|
28 | /// event will be processed by the user, so we only mark that we should that
|
29 | /// in the next [`Self::read_event_impl()`] call.
|
30 | pending_pop: bool,
|
31 | }
|
32 |
|
33 | /// Builder methods
|
34 | impl<R> NsReader<R> {
|
35 | /// Creates a `NsReader` that reads from a reader.
|
36 | #[inline ]
|
37 | pub fn from_reader(reader: R) -> Self {
|
38 | Self::new(Reader::from_reader(reader))
|
39 | }
|
40 |
|
41 | /// Returns reference to the parser configuration
|
42 | #[inline ]
|
43 | pub const fn config(&self) -> &Config {
|
44 | self.reader.config()
|
45 | }
|
46 |
|
47 | /// Returns mutable reference to the parser configuration
|
48 | #[inline ]
|
49 | pub fn config_mut(&mut self) -> &mut Config {
|
50 | self.reader.config_mut()
|
51 | }
|
52 |
|
53 | /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces.
|
54 | ///
|
55 | /// # Examples
|
56 | ///
|
57 | /// This example shows what results the returned iterator would return after
|
58 | /// reading each event of a simple XML.
|
59 | ///
|
60 | /// ```
|
61 | /// # use pretty_assertions::assert_eq;
|
62 | /// use quick_xml::name::{Namespace, PrefixDeclaration};
|
63 | /// use quick_xml::NsReader;
|
64 | ///
|
65 | /// let src = "<root>
|
66 | /// <a xmlns= \"a1 \" xmlns:a= \"a2 \">
|
67 | /// <b xmlns= \"b1 \" xmlns:b= \"b2 \">
|
68 | /// <c/>
|
69 | /// </b>
|
70 | /// <d/>
|
71 | /// </a>
|
72 | /// </root>" ;
|
73 | /// let mut reader = NsReader::from_str(src);
|
74 | /// reader.config_mut().trim_text(true);
|
75 | /// // No prefixes at the beginning
|
76 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
|
77 | ///
|
78 | /// reader.read_resolved_event()?; // <root>
|
79 | /// // No prefixes declared on root
|
80 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
|
81 | ///
|
82 | /// reader.read_resolved_event()?; // <a>
|
83 | /// // Two prefixes declared on "a"
|
84 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
85 | /// (PrefixDeclaration::Default, Namespace(b"a1" )),
|
86 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" ))
|
87 | /// ]);
|
88 | ///
|
89 | /// reader.read_resolved_event()?; // <b>
|
90 | /// // The default prefix got overridden and new "b" prefix
|
91 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
92 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" )),
|
93 | /// (PrefixDeclaration::Default, Namespace(b"b1" )),
|
94 | /// (PrefixDeclaration::Named(b"b" ), Namespace(b"b2" ))
|
95 | /// ]);
|
96 | ///
|
97 | /// reader.read_resolved_event()?; // <c/>
|
98 | /// // Still the same
|
99 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
100 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" )),
|
101 | /// (PrefixDeclaration::Default, Namespace(b"b1" )),
|
102 | /// (PrefixDeclaration::Named(b"b" ), Namespace(b"b2" ))
|
103 | /// ]);
|
104 | ///
|
105 | /// reader.read_resolved_event()?; // </b>
|
106 | /// // Still the same
|
107 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
108 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" )),
|
109 | /// (PrefixDeclaration::Default, Namespace(b"b1" )),
|
110 | /// (PrefixDeclaration::Named(b"b" ), Namespace(b"b2" ))
|
111 | /// ]);
|
112 | ///
|
113 | /// reader.read_resolved_event()?; // <d/>
|
114 | /// // </b> got closed so back to the prefixes declared on <a>
|
115 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
116 | /// (PrefixDeclaration::Default, Namespace(b"a1" )),
|
117 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" ))
|
118 | /// ]);
|
119 | ///
|
120 | /// reader.read_resolved_event()?; // </a>
|
121 | /// // Still the same
|
122 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
|
123 | /// (PrefixDeclaration::Default, Namespace(b"a1" )),
|
124 | /// (PrefixDeclaration::Named(b"a" ), Namespace(b"a2" ))
|
125 | /// ]);
|
126 | ///
|
127 | /// reader.read_resolved_event()?; // </root>
|
128 | /// // <a> got closed
|
129 | /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
|
130 | /// # quick_xml::Result::Ok(())
|
131 | /// ```
|
132 | #[inline ]
|
133 | pub const fn prefixes(&self) -> PrefixIter {
|
134 | self.ns_resolver.iter()
|
135 | }
|
136 | }
|
137 |
|
138 | /// Private methods
|
139 | impl<R> NsReader<R> {
|
140 | #[inline ]
|
141 | fn new(reader: Reader<R>) -> Self {
|
142 | Self {
|
143 | reader,
|
144 | ns_resolver: NamespaceResolver::default(),
|
145 | pending_pop: false,
|
146 | }
|
147 | }
|
148 |
|
149 | fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
|
150 | where
|
151 | R: XmlSource<'i, B>,
|
152 | {
|
153 | self.pop();
|
154 | let event = self.reader.read_event_impl(buf);
|
155 | self.process_event(event)
|
156 | }
|
157 |
|
158 | pub(super) fn pop(&mut self) {
|
159 | if self.pending_pop {
|
160 | self.ns_resolver.pop();
|
161 | self.pending_pop = false;
|
162 | }
|
163 | }
|
164 |
|
165 | pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
|
166 | match event {
|
167 | Ok(Event::Start(e)) => {
|
168 | self.ns_resolver.push(&e)?;
|
169 | Ok(Event::Start(e))
|
170 | }
|
171 | Ok(Event::Empty(e)) => {
|
172 | self.ns_resolver.push(&e)?;
|
173 | // notify next `read_event_impl()` invocation that it needs to pop this
|
174 | // namespace scope
|
175 | self.pending_pop = true;
|
176 | Ok(Event::Empty(e))
|
177 | }
|
178 | Ok(Event::End(e)) => {
|
179 | // notify next `read_event_impl()` invocation that it needs to pop this
|
180 | // namespace scope
|
181 | self.pending_pop = true;
|
182 | Ok(Event::End(e))
|
183 | }
|
184 | e => e,
|
185 | }
|
186 | }
|
187 |
|
188 | pub(super) fn resolve_event<'i>(
|
189 | &mut self,
|
190 | event: Result<Event<'i>>,
|
191 | ) -> Result<(ResolveResult, Event<'i>)> {
|
192 | match event {
|
193 | Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))),
|
194 | Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))),
|
195 | Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))),
|
196 | Ok(e) => Ok((ResolveResult::Unbound, e)),
|
197 | Err(e) => Err(e),
|
198 | }
|
199 | }
|
200 | }
|
201 |
|
202 | /// Getters
|
203 | impl<R> NsReader<R> {
|
204 | /// Consumes `NsReader` returning the underlying reader
|
205 | ///
|
206 | /// See the [`Reader::into_inner`] for examples
|
207 | #[inline ]
|
208 | pub fn into_inner(self) -> R {
|
209 | self.reader.into_inner()
|
210 | }
|
211 |
|
212 | /// Gets a mutable reference to the underlying reader.
|
213 | pub fn get_mut(&mut self) -> &mut R {
|
214 | self.reader.get_mut()
|
215 | }
|
216 |
|
217 | /// Resolves a potentially qualified **element name** or **attribute name**
|
218 | /// into _(namespace name, local name)_.
|
219 | ///
|
220 | /// _Qualified_ names have the form `prefix:local-name` where the `prefix`
|
221 | /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
222 | /// The namespace prefix can be defined on the same element as the name in question.
|
223 | ///
|
224 | /// The method returns following results depending on the `name` shape,
|
225 | /// `attribute` flag and the presence of the default namespace:
|
226 | ///
|
227 | /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
|
228 | /// |---------|-------------|-------------------|-----------------------|------------
|
229 | /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
230 | /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name`
|
231 | /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
232 | /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
233 | /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
234 | /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
235 | ///
|
236 | /// If you want to clearly indicate that name that you resolve is an element
|
237 | /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
|
238 | /// methods.
|
239 | ///
|
240 | /// # Lifetimes
|
241 | ///
|
242 | /// - `'n`: lifetime of a name. Returned local name will be bound to the same
|
243 | /// lifetime as the name in question.
|
244 | /// - returned namespace name will be bound to the reader itself
|
245 | ///
|
246 | /// [`Bound`]: ResolveResult::Bound
|
247 | /// [`Unbound`]: ResolveResult::Unbound
|
248 | /// [`Unknown`]: ResolveResult::Unknown
|
249 | /// [`resolve_attribute()`]: Self::resolve_attribute()
|
250 | /// [`resolve_element()`]: Self::resolve_element()
|
251 | #[inline ]
|
252 | pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) {
|
253 | self.ns_resolver.resolve(name, !attribute)
|
254 | }
|
255 |
|
256 | /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
|
257 | ///
|
258 | /// _Qualified_ element names have the form `prefix:local-name` where the
|
259 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
260 | /// The namespace prefix can be defined on the same element as the element
|
261 | /// in question.
|
262 | ///
|
263 | /// _Unqualified_ elements inherits the current _default namespace_.
|
264 | ///
|
265 | /// The method returns following results depending on the `name` shape and
|
266 | /// the presence of the default namespace:
|
267 | ///
|
268 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
269 | /// |-------------|-------------------|-----------------------|------------
|
270 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
271 | /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
272 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
273 | ///
|
274 | /// # Lifetimes
|
275 | ///
|
276 | /// - `'n`: lifetime of an element name. Returned local name will be bound
|
277 | /// to the same lifetime as the name in question.
|
278 | /// - returned namespace name will be bound to the reader itself
|
279 | ///
|
280 | /// # Examples
|
281 | ///
|
282 | /// This example shows how you can resolve qualified name into a namespace.
|
283 | /// Note, that in the code like this you do not need to do that manually,
|
284 | /// because the namespace resolution result returned by the [`read_resolved_event()`].
|
285 | ///
|
286 | /// ```
|
287 | /// # use pretty_assertions::assert_eq;
|
288 | /// use quick_xml::events::Event;
|
289 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
290 | /// use quick_xml::reader::NsReader;
|
291 | ///
|
292 | /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>" );
|
293 | ///
|
294 | /// match reader.read_event().unwrap() {
|
295 | /// Event::Empty(e) => assert_eq!(
|
296 | /// reader.resolve_element(e.name()),
|
297 | /// (Bound(Namespace(b"root namespace" )), QName(b"tag" ).into())
|
298 | /// ),
|
299 | /// _ => unreachable!(),
|
300 | /// }
|
301 | /// ```
|
302 | ///
|
303 | /// [`Bound`]: ResolveResult::Bound
|
304 | /// [`Unbound`]: ResolveResult::Unbound
|
305 | /// [`Unknown`]: ResolveResult::Unknown
|
306 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
307 | #[inline ]
|
308 | pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
309 | self.ns_resolver.resolve(name, true)
|
310 | }
|
311 |
|
312 | /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
|
313 | ///
|
314 | /// _Qualified_ attribute names have the form `prefix:local-name` where the
|
315 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
316 | /// The namespace prefix can be defined on the same element as the attribute
|
317 | /// in question.
|
318 | ///
|
319 | /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
|
320 | ///
|
321 | /// The method returns following results depending on the `name` shape and
|
322 | /// the presence of the default namespace:
|
323 | ///
|
324 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
325 | /// |-------------|-------------------|-----------------------|------------
|
326 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
327 | /// |Defined |`local-name` |[`Unbound`] |`local-name`
|
328 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
329 | ///
|
330 | /// # Lifetimes
|
331 | ///
|
332 | /// - `'n`: lifetime of an attribute name. Returned local name will be bound
|
333 | /// to the same lifetime as the name in question.
|
334 | /// - returned namespace name will be bound to the reader itself
|
335 | ///
|
336 | /// # Examples
|
337 | ///
|
338 | /// ```
|
339 | /// # use pretty_assertions::assert_eq;
|
340 | /// use quick_xml::events::Event;
|
341 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
342 | /// use quick_xml::reader::NsReader;
|
343 | ///
|
344 | /// let mut reader = NsReader::from_str("
|
345 | /// <tag one='1'
|
346 | /// p:two='2'
|
347 | /// xmlns='root namespace'
|
348 | /// xmlns:p='other namespace'/>
|
349 | /// " );
|
350 | /// reader.config_mut().trim_text(true);
|
351 | ///
|
352 | /// match reader.read_event().unwrap() {
|
353 | /// Event::Empty(e) => {
|
354 | /// let mut iter = e.attributes();
|
355 | ///
|
356 | /// // Unlike elements, attributes without explicit namespace
|
357 | /// // not bound to any namespace
|
358 | /// let one = iter.next().unwrap().unwrap();
|
359 | /// assert_eq!(
|
360 | /// reader.resolve_attribute(one.key),
|
361 | /// (Unbound, QName(b"one" ).into())
|
362 | /// );
|
363 | ///
|
364 | /// let two = iter.next().unwrap().unwrap();
|
365 | /// assert_eq!(
|
366 | /// reader.resolve_attribute(two.key),
|
367 | /// (Bound(Namespace(b"other namespace" )), QName(b"two" ).into())
|
368 | /// );
|
369 | /// }
|
370 | /// _ => unreachable!(),
|
371 | /// }
|
372 | /// ```
|
373 | ///
|
374 | /// [`Bound`]: ResolveResult::Bound
|
375 | /// [`Unbound`]: ResolveResult::Unbound
|
376 | /// [`Unknown`]: ResolveResult::Unknown
|
377 | #[inline ]
|
378 | pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
379 | self.ns_resolver.resolve(name, false)
|
380 | }
|
381 | }
|
382 |
|
383 | impl<R: BufRead> NsReader<R> {
|
384 | /// Reads the next event into given buffer.
|
385 | ///
|
386 | /// This method manages namespaces but doesn't resolve them automatically.
|
387 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
388 | ///
|
389 | /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
|
390 | /// namespace as soon as you get an event.
|
391 | ///
|
392 | /// # Examples
|
393 | ///
|
394 | /// ```
|
395 | /// # use pretty_assertions::assert_eq;
|
396 | /// use quick_xml::events::Event;
|
397 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
398 | /// use quick_xml::reader::NsReader;
|
399 | ///
|
400 | /// let mut reader = NsReader::from_str(r#"
|
401 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
402 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
403 | /// <y:tag2>Test 2</y:tag2>
|
404 | /// </x:tag1>
|
405 | /// "# );
|
406 | /// reader.config_mut().trim_text(true);
|
407 | ///
|
408 | /// let mut count = 0;
|
409 | /// let mut buf = Vec::new();
|
410 | /// let mut txt = Vec::new();
|
411 | /// loop {
|
412 | /// match reader.read_event_into(&mut buf).unwrap() {
|
413 | /// Event::Start(e) => {
|
414 | /// count += 1;
|
415 | /// let (ns, local) = reader.resolve_element(e.name());
|
416 | /// match local.as_ref() {
|
417 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
418 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
419 | /// _ => unreachable!(),
|
420 | /// }
|
421 | /// }
|
422 | /// Event::Text(e) => {
|
423 | /// txt.push(e.unescape().unwrap().into_owned())
|
424 | /// }
|
425 | /// Event::Eof => break,
|
426 | /// _ => (),
|
427 | /// }
|
428 | /// buf.clear();
|
429 | /// }
|
430 | /// assert_eq!(count, 3);
|
431 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
432 | /// ```
|
433 | ///
|
434 | /// [`resolve_element()`]: Self::resolve_element
|
435 | /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
|
436 | #[inline ]
|
437 | pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
|
438 | self.read_event_impl(buf)
|
439 | }
|
440 |
|
441 | /// Reads the next event into given buffer and resolves its namespace (if applicable).
|
442 | ///
|
443 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
444 | /// For all other events the concept of namespace is not defined, so
|
445 | /// a [`ResolveResult::Unbound`] is returned.
|
446 | ///
|
447 | /// If you are not interested in namespaces, you can use [`read_event_into()`]
|
448 | /// which will not automatically resolve namespaces for you.
|
449 | ///
|
450 | /// # Examples
|
451 | ///
|
452 | /// ```
|
453 | /// # use pretty_assertions::assert_eq;
|
454 | /// use quick_xml::events::Event;
|
455 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
456 | /// use quick_xml::reader::NsReader;
|
457 | ///
|
458 | /// let mut reader = NsReader::from_str(r#"
|
459 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
460 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
461 | /// <y:tag2>Test 2</y:tag2>
|
462 | /// </x:tag1>
|
463 | /// "# );
|
464 | /// reader.config_mut().trim_text(true);
|
465 | ///
|
466 | /// let mut count = 0;
|
467 | /// let mut buf = Vec::new();
|
468 | /// let mut txt = Vec::new();
|
469 | /// loop {
|
470 | /// match reader.read_resolved_event_into(&mut buf).unwrap() {
|
471 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
472 | /// count += 1;
|
473 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
474 | /// }
|
475 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
476 | /// count += 1;
|
477 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
478 | /// }
|
479 | /// (_, Event::Start(_)) => unreachable!(),
|
480 | ///
|
481 | /// (_, Event::Text(e)) => {
|
482 | /// txt.push(e.unescape().unwrap().into_owned())
|
483 | /// }
|
484 | /// (_, Event::Eof) => break,
|
485 | /// _ => (),
|
486 | /// }
|
487 | /// buf.clear();
|
488 | /// }
|
489 | /// assert_eq!(count, 3);
|
490 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
491 | /// ```
|
492 | ///
|
493 | /// [`Start`]: Event::Start
|
494 | /// [`Empty`]: Event::Empty
|
495 | /// [`End`]: Event::End
|
496 | /// [`read_event_into()`]: Self::read_event_into
|
497 | #[inline ]
|
498 | pub fn read_resolved_event_into<'b>(
|
499 | &mut self,
|
500 | buf: &'b mut Vec<u8>,
|
501 | ) -> Result<(ResolveResult, Event<'b>)> {
|
502 | let event = self.read_event_impl(buf);
|
503 | self.resolve_event(event)
|
504 | }
|
505 |
|
506 | /// Reads until end element is found using provided buffer as intermediate
|
507 | /// storage for events content. This function is supposed to be called after
|
508 | /// you already read a [`Start`] event.
|
509 | ///
|
510 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
511 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
512 | /// this method was called after reading expanded [`Start`] event.
|
513 | ///
|
514 | /// Manages nested cases where parent and child elements have the _literally_
|
515 | /// same name.
|
516 | ///
|
517 | /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
|
518 | /// will be returned. In particularly, that error will be returned if you call
|
519 | /// this method without consuming the corresponding [`Start`] event first.
|
520 | ///
|
521 | /// If your reader created from a string slice or byte array slice, it is
|
522 | /// better to use [`read_to_end()`] method, because it will not copy bytes
|
523 | /// into intermediate buffer.
|
524 | ///
|
525 | /// The provided `buf` buffer will be filled only by one event content at time.
|
526 | /// Before reading of each event the buffer will be cleared. If you know an
|
527 | /// appropriate size of each event, you can preallocate the buffer to reduce
|
528 | /// number of reallocations.
|
529 | ///
|
530 | /// The `end` parameter should contain name of the end element _in the reader
|
531 | /// encoding_. It is good practice to always get that parameter using
|
532 | /// [`BytesStart::to_end()`] method.
|
533 | ///
|
534 | /// # Namespaces
|
535 | ///
|
536 | /// While the `NsReader` does namespace resolution, namespaces does not
|
537 | /// change the algorithm for comparing names. Although the names `a:name`
|
538 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
539 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
540 | /// according to [the specification]
|
541 | ///
|
542 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
543 | /// > by an **end-tag** containing a name that echoes the element's type as
|
544 | /// > given in the **start-tag**
|
545 | ///
|
546 | /// # Examples
|
547 | ///
|
548 | /// This example shows, how you can skip XML content after you read the
|
549 | /// start event.
|
550 | ///
|
551 | /// ```
|
552 | /// # use pretty_assertions::assert_eq;
|
553 | /// use quick_xml::events::{BytesStart, Event};
|
554 | /// use quick_xml::name::{Namespace, ResolveResult};
|
555 | /// use quick_xml::reader::NsReader;
|
556 | ///
|
557 | /// let mut reader = NsReader::from_str(r#"
|
558 | /// <outer xmlns="namespace 1">
|
559 | /// <inner xmlns="namespace 2">
|
560 | /// <outer></outer>
|
561 | /// </inner>
|
562 | /// <inner>
|
563 | /// <inner></inner>
|
564 | /// <inner/>
|
565 | /// <outer></outer>
|
566 | /// <p:outer xmlns:p="ns"></p:outer>
|
567 | /// <outer/>
|
568 | /// </inner>
|
569 | /// </outer>
|
570 | /// "# );
|
571 | /// reader.config_mut().trim_text(true);
|
572 | /// let mut buf = Vec::new();
|
573 | ///
|
574 | /// let ns = Namespace(b"namespace 1" );
|
575 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
576 | /// let end = start.to_end().into_owned();
|
577 | ///
|
578 | /// // First, we read a start event...
|
579 | /// assert_eq!(
|
580 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
581 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
582 | /// );
|
583 | ///
|
584 | /// // ...then, we could skip all events to the corresponding end event.
|
585 | /// // This call will correctly handle nested <outer> elements.
|
586 | /// // Note, however, that this method does not handle namespaces.
|
587 | /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
|
588 | ///
|
589 | /// // At the end we should get an Eof event, because we ate the whole XML
|
590 | /// assert_eq!(
|
591 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
592 | /// (ResolveResult::Unbound, Event::Eof)
|
593 | /// );
|
594 | /// ```
|
595 | ///
|
596 | /// [`Start`]: Event::Start
|
597 | /// [`End`]: Event::End
|
598 | /// [`IllFormed`]: crate::errors::Error::IllFormed
|
599 | /// [`read_to_end()`]: Self::read_to_end
|
600 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
601 | /// [`expand_empty_elements`]: Config::expand_empty_elements
|
602 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
603 | #[inline ]
|
604 | pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
|
605 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
606 | // match literally the start name. See `Config::check_end_names` documentation
|
607 | self.reader.read_to_end_into(end, buf)
|
608 | }
|
609 | }
|
610 |
|
611 | impl NsReader<BufReader<File>> {
|
612 | /// Creates an XML reader from a file path.
|
613 | pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
614 | Ok(Self::new(Reader::from_file(path)?))
|
615 | }
|
616 | }
|
617 |
|
618 | impl<'i> NsReader<&'i [u8]> {
|
619 | /// Creates an XML reader from a string slice.
|
620 | #[inline ]
|
621 | #[allow (clippy::should_implement_trait)]
|
622 | pub fn from_str(s: &'i str) -> Self {
|
623 | Self::new(Reader::from_str(s))
|
624 | }
|
625 |
|
626 | /// Reads the next event, borrow its content from the input buffer.
|
627 | ///
|
628 | /// This method manages namespaces but doesn't resolve them automatically.
|
629 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
630 | ///
|
631 | /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
|
632 | /// as soon as you get an event.
|
633 | ///
|
634 | /// There is no asynchronous `read_event_async()` version of this function,
|
635 | /// because it is not necessary -- the contents are already in memory and no IO
|
636 | /// is needed, therefore there is no potential for blocking.
|
637 | ///
|
638 | /// # Examples
|
639 | ///
|
640 | /// ```
|
641 | /// # use pretty_assertions::assert_eq;
|
642 | /// use quick_xml::events::Event;
|
643 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
644 | /// use quick_xml::reader::NsReader;
|
645 | ///
|
646 | /// let mut reader = NsReader::from_str(r#"
|
647 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
648 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
649 | /// <y:tag2>Test 2</y:tag2>
|
650 | /// </x:tag1>
|
651 | /// "# );
|
652 | /// reader.config_mut().trim_text(true);
|
653 | ///
|
654 | /// let mut count = 0;
|
655 | /// let mut txt = Vec::new();
|
656 | /// loop {
|
657 | /// match reader.read_event().unwrap() {
|
658 | /// Event::Start(e) => {
|
659 | /// count += 1;
|
660 | /// let (ns, local) = reader.resolve_element(e.name());
|
661 | /// match local.as_ref() {
|
662 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
663 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
664 | /// _ => unreachable!(),
|
665 | /// }
|
666 | /// }
|
667 | /// Event::Text(e) => {
|
668 | /// txt.push(e.unescape().unwrap().into_owned())
|
669 | /// }
|
670 | /// Event::Eof => break,
|
671 | /// _ => (),
|
672 | /// }
|
673 | /// }
|
674 | /// assert_eq!(count, 3);
|
675 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
676 | /// ```
|
677 | ///
|
678 | /// [`resolve_element()`]: Self::resolve_element
|
679 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
680 | #[inline ]
|
681 | pub fn read_event(&mut self) -> Result<Event<'i>> {
|
682 | self.read_event_impl(())
|
683 | }
|
684 |
|
685 | /// Reads the next event, borrow its content from the input buffer, and resolves
|
686 | /// its namespace (if applicable).
|
687 | ///
|
688 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
689 | /// For all other events the concept of namespace is not defined, so
|
690 | /// a [`ResolveResult::Unbound`] is returned.
|
691 | ///
|
692 | /// If you are not interested in namespaces, you can use [`read_event()`]
|
693 | /// which will not automatically resolve namespaces for you.
|
694 | ///
|
695 | /// There is no asynchronous `read_resolved_event_async()` version of this function,
|
696 | /// because it is not necessary -- the contents are already in memory and no IO
|
697 | /// is needed, therefore there is no potential for blocking.
|
698 | ///
|
699 | /// # Examples
|
700 | ///
|
701 | /// ```
|
702 | /// # use pretty_assertions::assert_eq;
|
703 | /// use quick_xml::events::Event;
|
704 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
705 | /// use quick_xml::reader::NsReader;
|
706 | ///
|
707 | /// let mut reader = NsReader::from_str(r#"
|
708 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
709 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
710 | /// <y:tag2>Test 2</y:tag2>
|
711 | /// </x:tag1>
|
712 | /// "# );
|
713 | /// reader.config_mut().trim_text(true);
|
714 | ///
|
715 | /// let mut count = 0;
|
716 | /// let mut txt = Vec::new();
|
717 | /// loop {
|
718 | /// match reader.read_resolved_event().unwrap() {
|
719 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
720 | /// count += 1;
|
721 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
722 | /// }
|
723 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
724 | /// count += 1;
|
725 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
726 | /// }
|
727 | /// (_, Event::Start(_)) => unreachable!(),
|
728 | ///
|
729 | /// (_, Event::Text(e)) => {
|
730 | /// txt.push(e.unescape().unwrap().into_owned())
|
731 | /// }
|
732 | /// (_, Event::Eof) => break,
|
733 | /// _ => (),
|
734 | /// }
|
735 | /// }
|
736 | /// assert_eq!(count, 3);
|
737 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
738 | /// ```
|
739 | ///
|
740 | /// [`Start`]: Event::Start
|
741 | /// [`Empty`]: Event::Empty
|
742 | /// [`End`]: Event::End
|
743 | /// [`read_event()`]: Self::read_event
|
744 | #[inline ]
|
745 | pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> {
|
746 | let event = self.read_event_impl(());
|
747 | self.resolve_event(event)
|
748 | }
|
749 |
|
750 | /// Reads until end element is found. This function is supposed to be called
|
751 | /// after you already read a [`Start`] event.
|
752 | ///
|
753 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
754 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
755 | /// this method was called after reading expanded [`Start`] event.
|
756 | ///
|
757 | /// Manages nested cases where parent and child elements have the _literally_
|
758 | /// same name.
|
759 | ///
|
760 | /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
|
761 | /// will be returned. In particularly, that error will be returned if you call
|
762 | /// this method without consuming the corresponding [`Start`] event first.
|
763 | ///
|
764 | /// The `end` parameter should contain name of the end element _in the reader
|
765 | /// encoding_. It is good practice to always get that parameter using
|
766 | /// [`BytesStart::to_end()`] method.
|
767 | ///
|
768 | /// There is no asynchronous `read_to_end_async()` version of this function,
|
769 | /// because it is not necessary -- the contents are already in memory and no IO
|
770 | /// is needed, therefore there is no potential for blocking.
|
771 | ///
|
772 | /// # Namespaces
|
773 | ///
|
774 | /// While the `NsReader` does namespace resolution, namespaces does not
|
775 | /// change the algorithm for comparing names. Although the names `a:name`
|
776 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
777 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
778 | /// according to [the specification]
|
779 | ///
|
780 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
781 | /// > by an **end-tag** containing a name that echoes the element's type as
|
782 | /// > given in the **start-tag**
|
783 | ///
|
784 | /// # Examples
|
785 | ///
|
786 | /// This example shows, how you can skip XML content after you read the
|
787 | /// start event.
|
788 | ///
|
789 | /// ```
|
790 | /// # use pretty_assertions::assert_eq;
|
791 | /// use quick_xml::events::{BytesStart, Event};
|
792 | /// use quick_xml::name::{Namespace, ResolveResult};
|
793 | /// use quick_xml::reader::NsReader;
|
794 | ///
|
795 | /// let mut reader = NsReader::from_str(r#"
|
796 | /// <outer xmlns="namespace 1">
|
797 | /// <inner xmlns="namespace 2">
|
798 | /// <outer></outer>
|
799 | /// </inner>
|
800 | /// <inner>
|
801 | /// <inner></inner>
|
802 | /// <inner/>
|
803 | /// <outer></outer>
|
804 | /// <p:outer xmlns:p="ns"></p:outer>
|
805 | /// <outer/>
|
806 | /// </inner>
|
807 | /// </outer>
|
808 | /// "# );
|
809 | /// reader.config_mut().trim_text(true);
|
810 | ///
|
811 | /// let ns = Namespace(b"namespace 1" );
|
812 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
813 | /// let end = start.to_end().into_owned();
|
814 | ///
|
815 | /// // First, we read a start event...
|
816 | /// assert_eq!(
|
817 | /// reader.read_resolved_event().unwrap(),
|
818 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
819 | /// );
|
820 | ///
|
821 | /// // ...then, we could skip all events to the corresponding end event.
|
822 | /// // This call will correctly handle nested <outer> elements.
|
823 | /// // Note, however, that this method does not handle namespaces.
|
824 | /// reader.read_to_end(end.name()).unwrap();
|
825 | ///
|
826 | /// // At the end we should get an Eof event, because we ate the whole XML
|
827 | /// assert_eq!(
|
828 | /// reader.read_resolved_event().unwrap(),
|
829 | /// (ResolveResult::Unbound, Event::Eof)
|
830 | /// );
|
831 | /// ```
|
832 | ///
|
833 | /// [`Start`]: Event::Start
|
834 | /// [`End`]: Event::End
|
835 | /// [`IllFormed`]: crate::errors::Error::IllFormed
|
836 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
837 | /// [`expand_empty_elements`]: Config::expand_empty_elements
|
838 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
839 | #[inline ]
|
840 | pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
|
841 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
842 | // match literally the start name. See `Config::check_end_names` documentation
|
843 | self.reader.read_to_end(end)
|
844 | }
|
845 |
|
846 | /// Reads content between start and end tags, including any markup. This
|
847 | /// function is supposed to be called after you already read a [`Start`] event.
|
848 | ///
|
849 | /// Manages nested cases where parent and child elements have the _literally_
|
850 | /// same name.
|
851 | ///
|
852 | /// This method does not unescape read data, instead it returns content
|
853 | /// "as is" of the XML document. This is because it has no idea what text
|
854 | /// it reads, and if, for example, it contains CDATA section, attempt to
|
855 | /// unescape it content will spoil data.
|
856 | ///
|
857 | /// Any text will be decoded using the XML current [`decoder()`].
|
858 | ///
|
859 | /// Actually, this method perform the following code:
|
860 | ///
|
861 | /// ```ignore
|
862 | /// let span = reader.read_to_end(end)?;
|
863 | /// let text = reader.decoder().decode(&reader.inner_slice[span]);
|
864 | /// ```
|
865 | ///
|
866 | /// # Examples
|
867 | ///
|
868 | /// This example shows, how you can read a HTML content from your XML document.
|
869 | ///
|
870 | /// ```
|
871 | /// # use pretty_assertions::assert_eq;
|
872 | /// # use std::borrow::Cow;
|
873 | /// use quick_xml::events::{BytesStart, Event};
|
874 | /// use quick_xml::reader::NsReader;
|
875 | ///
|
876 | /// let mut reader = NsReader::from_str(r#"
|
877 | /// <html>
|
878 | /// <title>This is a HTML text</title>
|
879 | /// <p>Usual XML rules does not apply inside it
|
880 | /// <p>For example, elements not needed to be "closed"
|
881 | /// </html>
|
882 | /// "# );
|
883 | /// reader.config_mut().trim_text(true);
|
884 | ///
|
885 | /// let start = BytesStart::new("html" );
|
886 | /// let end = start.to_end().into_owned();
|
887 | ///
|
888 | /// // First, we read a start event...
|
889 | /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
|
890 | /// // ...and disable checking of end names because we expect HTML further...
|
891 | /// reader.config_mut().check_end_names = false;
|
892 | ///
|
893 | /// // ...then, we could read text content until close tag.
|
894 | /// // This call will correctly handle nested <html> elements.
|
895 | /// let text = reader.read_text(end.name()).unwrap();
|
896 | /// assert_eq!(text, Cow::Borrowed(r#"
|
897 | /// <title>This is a HTML text</title>
|
898 | /// <p>Usual XML rules does not apply inside it
|
899 | /// <p>For example, elements not needed to be "closed"
|
900 | /// "# ));
|
901 | ///
|
902 | /// // Now we can enable checks again
|
903 | /// reader.config_mut().check_end_names = true;
|
904 | ///
|
905 | /// // At the end we should get an Eof event, because we ate the whole XML
|
906 | /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
|
907 | /// ```
|
908 | ///
|
909 | /// [`Start`]: Event::Start
|
910 | /// [`decoder()`]: Reader::decoder()
|
911 | #[inline ]
|
912 | pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
|
913 | self.reader.read_text(end)
|
914 | }
|
915 | }
|
916 |
|
917 | impl<R> Deref for NsReader<R> {
|
918 | type Target = Reader<R>;
|
919 |
|
920 | #[inline ]
|
921 | fn deref(&self) -> &Self::Target {
|
922 | &self.reader
|
923 | }
|
924 | }
|
925 | |