1 | //! A reader that manages namespace declarations found in the input and able
|
2 | //! to resolve [qualified names] to [expanded names].
|
3 | //!
|
4 | //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
|
5 | //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
|
6 |
|
7 | use std::borrow::Cow;
|
8 | use std::fs::File;
|
9 | use std::io::{BufRead, BufReader};
|
10 | use std::ops::Deref;
|
11 | use std::path::Path;
|
12 |
|
13 | use crate::errors::Result;
|
14 | use crate::events::Event;
|
15 | use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
|
16 | use crate::reader::{Reader, Span, XmlSource};
|
17 |
|
18 | /// A low level encoding-agnostic XML event reader that performs namespace resolution.
|
19 | ///
|
20 | /// Consumes a [`BufRead`] and streams XML `Event`s.
|
21 | pub struct NsReader<R> {
|
22 | /// An XML reader
|
23 | pub(super) reader: Reader<R>,
|
24 | /// A buffer to manage namespaces
|
25 | ns_resolver: NamespaceResolver,
|
26 | /// We cannot pop data from the namespace stack until returned `Empty` or `End`
|
27 | /// event will be processed by the user, so we only mark that we should that
|
28 | /// in the next [`Self::read_event_impl()`] call.
|
29 | pending_pop: bool,
|
30 | }
|
31 |
|
32 | /// Builder methods
|
33 | impl<R> NsReader<R> {
|
34 | /// Creates a `NsReader` that reads from a reader.
|
35 | #[inline ]
|
36 | pub fn from_reader(reader: R) -> Self {
|
37 | Self::new(Reader::from_reader(reader))
|
38 | }
|
39 |
|
40 | configure_methods!(reader);
|
41 | }
|
42 |
|
43 | /// Private methods
|
44 | impl<R> NsReader<R> {
|
45 | #[inline ]
|
46 | fn new(reader: Reader<R>) -> Self {
|
47 | Self {
|
48 | reader,
|
49 | ns_resolver: NamespaceResolver::default(),
|
50 | pending_pop: false,
|
51 | }
|
52 | }
|
53 |
|
54 | fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
|
55 | where
|
56 | R: XmlSource<'i, B>,
|
57 | {
|
58 | self.pop();
|
59 | let event = self.reader.read_event_impl(buf);
|
60 | self.process_event(event)
|
61 | }
|
62 |
|
63 | pub(super) fn pop(&mut self) {
|
64 | if self.pending_pop {
|
65 | self.ns_resolver.pop();
|
66 | self.pending_pop = false;
|
67 | }
|
68 | }
|
69 |
|
70 | pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
|
71 | match event {
|
72 | Ok(Event::Start(e)) => {
|
73 | self.ns_resolver.push(&e)?;
|
74 | Ok(Event::Start(e))
|
75 | }
|
76 | Ok(Event::Empty(e)) => {
|
77 | self.ns_resolver.push(&e)?;
|
78 | // notify next `read_event_impl()` invocation that it needs to pop this
|
79 | // namespace scope
|
80 | self.pending_pop = true;
|
81 | Ok(Event::Empty(e))
|
82 | }
|
83 | Ok(Event::End(e)) => {
|
84 | // notify next `read_event_impl()` invocation that it needs to pop this
|
85 | // namespace scope
|
86 | self.pending_pop = true;
|
87 | Ok(Event::End(e))
|
88 | }
|
89 | e => e,
|
90 | }
|
91 | }
|
92 |
|
93 | pub(super) fn resolve_event<'i>(
|
94 | &mut self,
|
95 | event: Result<Event<'i>>,
|
96 | ) -> Result<(ResolveResult, Event<'i>)> {
|
97 | match event {
|
98 | Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))),
|
99 | Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))),
|
100 | Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))),
|
101 | Ok(e) => Ok((ResolveResult::Unbound, e)),
|
102 | Err(e) => Err(e),
|
103 | }
|
104 | }
|
105 | }
|
106 |
|
107 | /// Getters
|
108 | impl<R> NsReader<R> {
|
109 | /// Consumes `NsReader` returning the underlying reader
|
110 | ///
|
111 | /// See the [`Reader::into_inner`] for examples
|
112 | #[inline ]
|
113 | pub fn into_inner(self) -> R {
|
114 | self.reader.into_inner()
|
115 | }
|
116 |
|
117 | /// Gets a mutable reference to the underlying reader.
|
118 | pub fn get_mut(&mut self) -> &mut R {
|
119 | self.reader.get_mut()
|
120 | }
|
121 |
|
122 | /// Resolves a potentially qualified **element name** or **attribute name**
|
123 | /// into _(namespace name, local name)_.
|
124 | ///
|
125 | /// _Qualified_ names have the form `prefix:local-name` where the `prefix`
|
126 | /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
127 | /// The namespace prefix can be defined on the same element as the name in question.
|
128 | ///
|
129 | /// The method returns following results depending on the `name` shape,
|
130 | /// `attribute` flag and the presence of the default namespace:
|
131 | ///
|
132 | /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
|
133 | /// |---------|-------------|-------------------|-----------------------|------------
|
134 | /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
135 | /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name`
|
136 | /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
137 | /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
138 | /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
139 | /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
140 | ///
|
141 | /// If you want to clearly indicate that name that you resolve is an element
|
142 | /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
|
143 | /// methods.
|
144 | ///
|
145 | /// # Lifetimes
|
146 | ///
|
147 | /// - `'n`: lifetime of a name. Returned local name will be bound to the same
|
148 | /// lifetime as the name in question.
|
149 | /// - returned namespace name will be bound to the reader itself
|
150 | ///
|
151 | /// [`Bound`]: ResolveResult::Bound
|
152 | /// [`Unbound`]: ResolveResult::Unbound
|
153 | /// [`Unknown`]: ResolveResult::Unknown
|
154 | /// [`resolve_attribute()`]: Self::resolve_attribute()
|
155 | /// [`resolve_element()`]: Self::resolve_element()
|
156 | #[inline ]
|
157 | pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) {
|
158 | self.ns_resolver.resolve(name, !attribute)
|
159 | }
|
160 |
|
161 | /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
|
162 | ///
|
163 | /// _Qualified_ element names have the form `prefix:local-name` where the
|
164 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
165 | /// The namespace prefix can be defined on the same element as the element
|
166 | /// in question.
|
167 | ///
|
168 | /// _Unqualified_ elements inherits the current _default namespace_.
|
169 | ///
|
170 | /// The method returns following results depending on the `name` shape and
|
171 | /// the presence of the default namespace:
|
172 | ///
|
173 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
174 | /// |-------------|-------------------|-----------------------|------------
|
175 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
176 | /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
177 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
178 | ///
|
179 | /// # Lifetimes
|
180 | ///
|
181 | /// - `'n`: lifetime of an element name. Returned local name will be bound
|
182 | /// to the same lifetime as the name in question.
|
183 | /// - returned namespace name will be bound to the reader itself
|
184 | ///
|
185 | /// # Examples
|
186 | ///
|
187 | /// This example shows how you can resolve qualified name into a namespace.
|
188 | /// Note, that in the code like this you do not need to do that manually,
|
189 | /// because the namespace resolution result returned by the [`read_resolved_event()`].
|
190 | ///
|
191 | /// ```
|
192 | /// # use pretty_assertions::assert_eq;
|
193 | /// use quick_xml::events::Event;
|
194 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
195 | /// use quick_xml::reader::NsReader;
|
196 | ///
|
197 | /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>" );
|
198 | ///
|
199 | /// match reader.read_event().unwrap() {
|
200 | /// Event::Empty(e) => assert_eq!(
|
201 | /// reader.resolve_element(e.name()),
|
202 | /// (Bound(Namespace(b"root namespace" )), QName(b"tag" ).into())
|
203 | /// ),
|
204 | /// _ => unreachable!(),
|
205 | /// }
|
206 | /// ```
|
207 | ///
|
208 | /// [`Bound`]: ResolveResult::Bound
|
209 | /// [`Unbound`]: ResolveResult::Unbound
|
210 | /// [`Unknown`]: ResolveResult::Unknown
|
211 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
212 | #[inline ]
|
213 | pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
214 | self.ns_resolver.resolve(name, true)
|
215 | }
|
216 |
|
217 | /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
|
218 | ///
|
219 | /// _Qualified_ attribute names have the form `prefix:local-name` where the
|
220 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
221 | /// The namespace prefix can be defined on the same element as the attribute
|
222 | /// in question.
|
223 | ///
|
224 | /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
|
225 | ///
|
226 | /// The method returns following results depending on the `name` shape and
|
227 | /// the presence of the default namespace:
|
228 | ///
|
229 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
230 | /// |-------------|-------------------|-----------------------|------------
|
231 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
232 | /// |Defined |`local-name` |[`Unbound`] |`local-name`
|
233 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
234 | ///
|
235 | /// # Lifetimes
|
236 | ///
|
237 | /// - `'n`: lifetime of an attribute name. Returned local name will be bound
|
238 | /// to the same lifetime as the name in question.
|
239 | /// - returned namespace name will be bound to the reader itself
|
240 | ///
|
241 | /// # Examples
|
242 | ///
|
243 | /// ```
|
244 | /// # use pretty_assertions::assert_eq;
|
245 | /// use quick_xml::events::Event;
|
246 | /// use quick_xml::events::attributes::Attribute;
|
247 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
248 | /// use quick_xml::reader::NsReader;
|
249 | ///
|
250 | /// let mut reader = NsReader::from_str("
|
251 | /// <tag one='1'
|
252 | /// p:two='2'
|
253 | /// xmlns='root namespace'
|
254 | /// xmlns:p='other namespace'/>
|
255 | /// " );
|
256 | /// reader.trim_text(true);
|
257 | ///
|
258 | /// match reader.read_event().unwrap() {
|
259 | /// Event::Empty(e) => {
|
260 | /// let mut iter = e.attributes();
|
261 | ///
|
262 | /// // Unlike elements, attributes without explicit namespace
|
263 | /// // not bound to any namespace
|
264 | /// let one = iter.next().unwrap().unwrap();
|
265 | /// assert_eq!(
|
266 | /// reader.resolve_attribute(one.key),
|
267 | /// (Unbound, QName(b"one" ).into())
|
268 | /// );
|
269 | ///
|
270 | /// let two = iter.next().unwrap().unwrap();
|
271 | /// assert_eq!(
|
272 | /// reader.resolve_attribute(two.key),
|
273 | /// (Bound(Namespace(b"other namespace" )), QName(b"two" ).into())
|
274 | /// );
|
275 | /// }
|
276 | /// _ => unreachable!(),
|
277 | /// }
|
278 | /// ```
|
279 | ///
|
280 | /// [`Bound`]: ResolveResult::Bound
|
281 | /// [`Unbound`]: ResolveResult::Unbound
|
282 | /// [`Unknown`]: ResolveResult::Unknown
|
283 | #[inline ]
|
284 | pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
285 | self.ns_resolver.resolve(name, false)
|
286 | }
|
287 | }
|
288 |
|
289 | impl<R: BufRead> NsReader<R> {
|
290 | /// Reads the next event into given buffer.
|
291 | ///
|
292 | /// This method manages namespaces but doesn't resolve them automatically.
|
293 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
294 | ///
|
295 | /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
|
296 | /// namespace as soon as you get an event.
|
297 | ///
|
298 | /// # Examples
|
299 | ///
|
300 | /// ```
|
301 | /// # use pretty_assertions::assert_eq;
|
302 | /// use quick_xml::events::Event;
|
303 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
304 | /// use quick_xml::reader::NsReader;
|
305 | ///
|
306 | /// let mut reader = NsReader::from_str(r#"
|
307 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
308 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
309 | /// <y:tag2>Test 2</y:tag2>
|
310 | /// </x:tag1>
|
311 | /// "# );
|
312 | /// reader.trim_text(true);
|
313 | ///
|
314 | /// let mut count = 0;
|
315 | /// let mut buf = Vec::new();
|
316 | /// let mut txt = Vec::new();
|
317 | /// loop {
|
318 | /// match reader.read_event_into(&mut buf).unwrap() {
|
319 | /// Event::Start(e) => {
|
320 | /// count += 1;
|
321 | /// let (ns, local) = reader.resolve_element(e.name());
|
322 | /// match local.as_ref() {
|
323 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
324 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
325 | /// _ => unreachable!(),
|
326 | /// }
|
327 | /// }
|
328 | /// Event::Text(e) => {
|
329 | /// txt.push(e.unescape().unwrap().into_owned())
|
330 | /// }
|
331 | /// Event::Eof => break,
|
332 | /// _ => (),
|
333 | /// }
|
334 | /// buf.clear();
|
335 | /// }
|
336 | /// assert_eq!(count, 3);
|
337 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
338 | /// ```
|
339 | ///
|
340 | /// [`resolve_element()`]: Self::resolve_element
|
341 | /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
|
342 | #[inline ]
|
343 | pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
|
344 | self.read_event_impl(buf)
|
345 | }
|
346 |
|
347 | /// Reads the next event into given buffer and resolves its namespace (if applicable).
|
348 | ///
|
349 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
350 | /// For all other events the concept of namespace is not defined, so
|
351 | /// a [`ResolveResult::Unbound`] is returned.
|
352 | ///
|
353 | /// If you are not interested in namespaces, you can use [`read_event_into()`]
|
354 | /// which will not automatically resolve namespaces for you.
|
355 | ///
|
356 | /// # Examples
|
357 | ///
|
358 | /// ```
|
359 | /// # use pretty_assertions::assert_eq;
|
360 | /// use quick_xml::events::Event;
|
361 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
362 | /// use quick_xml::reader::NsReader;
|
363 | ///
|
364 | /// let mut reader = NsReader::from_str(r#"
|
365 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
366 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
367 | /// <y:tag2>Test 2</y:tag2>
|
368 | /// </x:tag1>
|
369 | /// "# );
|
370 | /// reader.trim_text(true);
|
371 | ///
|
372 | /// let mut count = 0;
|
373 | /// let mut buf = Vec::new();
|
374 | /// let mut txt = Vec::new();
|
375 | /// loop {
|
376 | /// match reader.read_resolved_event_into(&mut buf).unwrap() {
|
377 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
378 | /// count += 1;
|
379 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
380 | /// }
|
381 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
382 | /// count += 1;
|
383 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
384 | /// }
|
385 | /// (_, Event::Start(_)) => unreachable!(),
|
386 | ///
|
387 | /// (_, Event::Text(e)) => {
|
388 | /// txt.push(e.unescape().unwrap().into_owned())
|
389 | /// }
|
390 | /// (_, Event::Eof) => break,
|
391 | /// _ => (),
|
392 | /// }
|
393 | /// buf.clear();
|
394 | /// }
|
395 | /// assert_eq!(count, 3);
|
396 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
397 | /// ```
|
398 | ///
|
399 | /// [`Start`]: Event::Start
|
400 | /// [`Empty`]: Event::Empty
|
401 | /// [`End`]: Event::End
|
402 | /// [`read_event_into()`]: Self::read_event_into
|
403 | #[inline ]
|
404 | pub fn read_resolved_event_into<'b>(
|
405 | &mut self,
|
406 | buf: &'b mut Vec<u8>,
|
407 | ) -> Result<(ResolveResult, Event<'b>)> {
|
408 | let event = self.read_event_impl(buf);
|
409 | self.resolve_event(event)
|
410 | }
|
411 |
|
412 | /// Reads until end element is found using provided buffer as intermediate
|
413 | /// storage for events content. This function is supposed to be called after
|
414 | /// you already read a [`Start`] event.
|
415 | ///
|
416 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
417 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
418 | /// this method was called after reading expanded [`Start`] event.
|
419 | ///
|
420 | /// Manages nested cases where parent and child elements have the _literally_
|
421 | /// same name.
|
422 | ///
|
423 | /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
|
424 | /// will be returned. In particularly, that error will be returned if you call
|
425 | /// this method without consuming the corresponding [`Start`] event first.
|
426 | ///
|
427 | /// If your reader created from a string slice or byte array slice, it is
|
428 | /// better to use [`read_to_end()`] method, because it will not copy bytes
|
429 | /// into intermediate buffer.
|
430 | ///
|
431 | /// The provided `buf` buffer will be filled only by one event content at time.
|
432 | /// Before reading of each event the buffer will be cleared. If you know an
|
433 | /// appropriate size of each event, you can preallocate the buffer to reduce
|
434 | /// number of reallocations.
|
435 | ///
|
436 | /// The `end` parameter should contain name of the end element _in the reader
|
437 | /// encoding_. It is good practice to always get that parameter using
|
438 | /// [`BytesStart::to_end()`] method.
|
439 | ///
|
440 | /// # Namespaces
|
441 | ///
|
442 | /// While the `NsReader` does namespace resolution, namespaces does not
|
443 | /// change the algorithm for comparing names. Although the names `a:name`
|
444 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
445 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
446 | /// according to [the specification]
|
447 | ///
|
448 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
449 | /// > by an **end-tag** containing a name that echoes the element's type as
|
450 | /// > given in the **start-tag**
|
451 | ///
|
452 | /// # Examples
|
453 | ///
|
454 | /// This example shows, how you can skip XML content after you read the
|
455 | /// start event.
|
456 | ///
|
457 | /// ```
|
458 | /// # use pretty_assertions::assert_eq;
|
459 | /// use quick_xml::events::{BytesStart, Event};
|
460 | /// use quick_xml::name::{Namespace, ResolveResult};
|
461 | /// use quick_xml::reader::NsReader;
|
462 | ///
|
463 | /// let mut reader = NsReader::from_str(r#"
|
464 | /// <outer xmlns="namespace 1">
|
465 | /// <inner xmlns="namespace 2">
|
466 | /// <outer></outer>
|
467 | /// </inner>
|
468 | /// <inner>
|
469 | /// <inner></inner>
|
470 | /// <inner/>
|
471 | /// <outer></outer>
|
472 | /// <p:outer xmlns:p="ns"></p:outer>
|
473 | /// <outer/>
|
474 | /// </inner>
|
475 | /// </outer>
|
476 | /// "# );
|
477 | /// reader.trim_text(true);
|
478 | /// let mut buf = Vec::new();
|
479 | ///
|
480 | /// let ns = Namespace(b"namespace 1" );
|
481 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
482 | /// let end = start.to_end().into_owned();
|
483 | ///
|
484 | /// // First, we read a start event...
|
485 | /// assert_eq!(
|
486 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
487 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
488 | /// );
|
489 | ///
|
490 | /// // ...then, we could skip all events to the corresponding end event.
|
491 | /// // This call will correctly handle nested <outer> elements.
|
492 | /// // Note, however, that this method does not handle namespaces.
|
493 | /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
|
494 | ///
|
495 | /// // At the end we should get an Eof event, because we ate the whole XML
|
496 | /// assert_eq!(
|
497 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
498 | /// (ResolveResult::Unbound, Event::Eof)
|
499 | /// );
|
500 | /// ```
|
501 | ///
|
502 | /// [`Start`]: Event::Start
|
503 | /// [`End`]: Event::End
|
504 | /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
|
505 | /// [`read_to_end()`]: Self::read_to_end
|
506 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
507 | /// [`expand_empty_elements`]: Self::expand_empty_elements
|
508 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
509 | #[inline ]
|
510 | pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
|
511 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
512 | // match literally the start name. See `Self::check_end_names` documentation
|
513 | self.reader.read_to_end_into(end, buf)
|
514 | }
|
515 | }
|
516 |
|
517 | impl NsReader<BufReader<File>> {
|
518 | /// Creates an XML reader from a file path.
|
519 | pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
520 | Ok(Self::new(reader:Reader::from_file(path)?))
|
521 | }
|
522 | }
|
523 |
|
524 | impl<'i> NsReader<&'i [u8]> {
|
525 | /// Creates an XML reader from a string slice.
|
526 | #[inline ]
|
527 | #[allow (clippy::should_implement_trait)]
|
528 | pub fn from_str(s: &'i str) -> Self {
|
529 | Self::new(Reader::from_str(s))
|
530 | }
|
531 |
|
532 | /// Reads the next event, borrow its content from the input buffer.
|
533 | ///
|
534 | /// This method manages namespaces but doesn't resolve them automatically.
|
535 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
536 | ///
|
537 | /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
|
538 | /// as soon as you get an event.
|
539 | ///
|
540 | /// There is no asynchronous `read_event_async()` version of this function,
|
541 | /// because it is not necessary -- the contents are already in memory and no IO
|
542 | /// is needed, therefore there is no potential for blocking.
|
543 | ///
|
544 | /// # Examples
|
545 | ///
|
546 | /// ```
|
547 | /// # use pretty_assertions::assert_eq;
|
548 | /// use quick_xml::events::Event;
|
549 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
550 | /// use quick_xml::reader::NsReader;
|
551 | ///
|
552 | /// let mut reader = NsReader::from_str(r#"
|
553 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
554 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
555 | /// <y:tag2>Test 2</y:tag2>
|
556 | /// </x:tag1>
|
557 | /// "# );
|
558 | /// reader.trim_text(true);
|
559 | ///
|
560 | /// let mut count = 0;
|
561 | /// let mut txt = Vec::new();
|
562 | /// loop {
|
563 | /// match reader.read_event().unwrap() {
|
564 | /// Event::Start(e) => {
|
565 | /// count += 1;
|
566 | /// let (ns, local) = reader.resolve_element(e.name());
|
567 | /// match local.as_ref() {
|
568 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
569 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
570 | /// _ => unreachable!(),
|
571 | /// }
|
572 | /// }
|
573 | /// Event::Text(e) => {
|
574 | /// txt.push(e.unescape().unwrap().into_owned())
|
575 | /// }
|
576 | /// Event::Eof => break,
|
577 | /// _ => (),
|
578 | /// }
|
579 | /// }
|
580 | /// assert_eq!(count, 3);
|
581 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
582 | /// ```
|
583 | ///
|
584 | /// [`resolve_element()`]: Self::resolve_element
|
585 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
586 | #[inline ]
|
587 | pub fn read_event(&mut self) -> Result<Event<'i>> {
|
588 | self.read_event_impl(())
|
589 | }
|
590 |
|
591 | /// Reads the next event, borrow its content from the input buffer, and resolves
|
592 | /// its namespace (if applicable).
|
593 | ///
|
594 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
595 | /// For all other events the concept of namespace is not defined, so
|
596 | /// a [`ResolveResult::Unbound`] is returned.
|
597 | ///
|
598 | /// If you are not interested in namespaces, you can use [`read_event()`]
|
599 | /// which will not automatically resolve namespaces for you.
|
600 | ///
|
601 | /// There is no asynchronous `read_resolved_event_async()` version of this function,
|
602 | /// because it is not necessary -- the contents are already in memory and no IO
|
603 | /// is needed, therefore there is no potential for blocking.
|
604 | ///
|
605 | /// # Examples
|
606 | ///
|
607 | /// ```
|
608 | /// # use pretty_assertions::assert_eq;
|
609 | /// use quick_xml::events::Event;
|
610 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
611 | /// use quick_xml::reader::NsReader;
|
612 | ///
|
613 | /// let mut reader = NsReader::from_str(r#"
|
614 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
615 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
616 | /// <y:tag2>Test 2</y:tag2>
|
617 | /// </x:tag1>
|
618 | /// "# );
|
619 | /// reader.trim_text(true);
|
620 | ///
|
621 | /// let mut count = 0;
|
622 | /// let mut txt = Vec::new();
|
623 | /// loop {
|
624 | /// match reader.read_resolved_event().unwrap() {
|
625 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
626 | /// count += 1;
|
627 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
628 | /// }
|
629 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
630 | /// count += 1;
|
631 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
632 | /// }
|
633 | /// (_, Event::Start(_)) => unreachable!(),
|
634 | ///
|
635 | /// (_, Event::Text(e)) => {
|
636 | /// txt.push(e.unescape().unwrap().into_owned())
|
637 | /// }
|
638 | /// (_, Event::Eof) => break,
|
639 | /// _ => (),
|
640 | /// }
|
641 | /// }
|
642 | /// assert_eq!(count, 3);
|
643 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
644 | /// ```
|
645 | ///
|
646 | /// [`Start`]: Event::Start
|
647 | /// [`Empty`]: Event::Empty
|
648 | /// [`End`]: Event::End
|
649 | /// [`read_event()`]: Self::read_event
|
650 | #[inline ]
|
651 | pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> {
|
652 | let event = self.read_event_impl(());
|
653 | self.resolve_event(event)
|
654 | }
|
655 |
|
656 | /// Reads until end element is found. This function is supposed to be called
|
657 | /// after you already read a [`Start`] event.
|
658 | ///
|
659 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
660 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
661 | /// this method was called after reading expanded [`Start`] event.
|
662 | ///
|
663 | /// Manages nested cases where parent and child elements have the _literally_
|
664 | /// same name.
|
665 | ///
|
666 | /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
|
667 | /// will be returned. In particularly, that error will be returned if you call
|
668 | /// this method without consuming the corresponding [`Start`] event first.
|
669 | ///
|
670 | /// The `end` parameter should contain name of the end element _in the reader
|
671 | /// encoding_. It is good practice to always get that parameter using
|
672 | /// [`BytesStart::to_end()`] method.
|
673 | ///
|
674 | /// There is no asynchronous `read_to_end_async()` version of this function,
|
675 | /// because it is not necessary -- the contents are already in memory and no IO
|
676 | /// is needed, therefore there is no potential for blocking.
|
677 | ///
|
678 | /// # Namespaces
|
679 | ///
|
680 | /// While the `NsReader` does namespace resolution, namespaces does not
|
681 | /// change the algorithm for comparing names. Although the names `a:name`
|
682 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
683 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
684 | /// according to [the specification]
|
685 | ///
|
686 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
687 | /// > by an **end-tag** containing a name that echoes the element's type as
|
688 | /// > given in the **start-tag**
|
689 | ///
|
690 | /// # Examples
|
691 | ///
|
692 | /// This example shows, how you can skip XML content after you read the
|
693 | /// start event.
|
694 | ///
|
695 | /// ```
|
696 | /// # use pretty_assertions::assert_eq;
|
697 | /// use quick_xml::events::{BytesStart, Event};
|
698 | /// use quick_xml::name::{Namespace, ResolveResult};
|
699 | /// use quick_xml::reader::NsReader;
|
700 | ///
|
701 | /// let mut reader = NsReader::from_str(r#"
|
702 | /// <outer xmlns="namespace 1">
|
703 | /// <inner xmlns="namespace 2">
|
704 | /// <outer></outer>
|
705 | /// </inner>
|
706 | /// <inner>
|
707 | /// <inner></inner>
|
708 | /// <inner/>
|
709 | /// <outer></outer>
|
710 | /// <p:outer xmlns:p="ns"></p:outer>
|
711 | /// <outer/>
|
712 | /// </inner>
|
713 | /// </outer>
|
714 | /// "# );
|
715 | /// reader.trim_text(true);
|
716 | ///
|
717 | /// let ns = Namespace(b"namespace 1" );
|
718 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
719 | /// let end = start.to_end().into_owned();
|
720 | ///
|
721 | /// // First, we read a start event...
|
722 | /// assert_eq!(
|
723 | /// reader.read_resolved_event().unwrap(),
|
724 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
725 | /// );
|
726 | ///
|
727 | /// // ...then, we could skip all events to the corresponding end event.
|
728 | /// // This call will correctly handle nested <outer> elements.
|
729 | /// // Note, however, that this method does not handle namespaces.
|
730 | /// reader.read_to_end(end.name()).unwrap();
|
731 | ///
|
732 | /// // At the end we should get an Eof event, because we ate the whole XML
|
733 | /// assert_eq!(
|
734 | /// reader.read_resolved_event().unwrap(),
|
735 | /// (ResolveResult::Unbound, Event::Eof)
|
736 | /// );
|
737 | /// ```
|
738 | ///
|
739 | /// [`Start`]: Event::Start
|
740 | /// [`End`]: Event::End
|
741 | /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
|
742 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
743 | /// [`expand_empty_elements`]: Self::expand_empty_elements
|
744 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
745 | #[inline ]
|
746 | pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
|
747 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
748 | // match literally the start name. See `Self::check_end_names` documentation
|
749 | self.reader.read_to_end(end)
|
750 | }
|
751 |
|
752 | /// Reads content between start and end tags, including any markup. This
|
753 | /// function is supposed to be called after you already read a [`Start`] event.
|
754 | ///
|
755 | /// Manages nested cases where parent and child elements have the _literally_
|
756 | /// same name.
|
757 | ///
|
758 | /// This method does not unescape read data, instead it returns content
|
759 | /// "as is" of the XML document. This is because it has no idea what text
|
760 | /// it reads, and if, for example, it contains CDATA section, attempt to
|
761 | /// unescape it content will spoil data.
|
762 | ///
|
763 | /// Any text will be decoded using the XML current [`decoder()`].
|
764 | ///
|
765 | /// Actually, this method perform the following code:
|
766 | ///
|
767 | /// ```ignore
|
768 | /// let span = reader.read_to_end(end)?;
|
769 | /// let text = reader.decoder().decode(&reader.inner_slice[span]);
|
770 | /// ```
|
771 | ///
|
772 | /// # Examples
|
773 | ///
|
774 | /// This example shows, how you can read a HTML content from your XML document.
|
775 | ///
|
776 | /// ```
|
777 | /// # use pretty_assertions::assert_eq;
|
778 | /// # use std::borrow::Cow;
|
779 | /// use quick_xml::events::{BytesStart, Event};
|
780 | /// use quick_xml::reader::NsReader;
|
781 | ///
|
782 | /// let mut reader = NsReader::from_str(r#"
|
783 | /// <html>
|
784 | /// <title>This is a HTML text</title>
|
785 | /// <p>Usual XML rules does not apply inside it
|
786 | /// <p>For example, elements not needed to be "closed"
|
787 | /// </html>
|
788 | /// "# );
|
789 | /// reader.trim_text(true);
|
790 | ///
|
791 | /// let start = BytesStart::new("html" );
|
792 | /// let end = start.to_end().into_owned();
|
793 | ///
|
794 | /// // First, we read a start event...
|
795 | /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
|
796 | /// // ...and disable checking of end names because we expect HTML further...
|
797 | /// reader.check_end_names(false);
|
798 | ///
|
799 | /// // ...then, we could read text content until close tag.
|
800 | /// // This call will correctly handle nested <html> elements.
|
801 | /// let text = reader.read_text(end.name()).unwrap();
|
802 | /// assert_eq!(text, Cow::Borrowed(r#"
|
803 | /// <title>This is a HTML text</title>
|
804 | /// <p>Usual XML rules does not apply inside it
|
805 | /// <p>For example, elements not needed to be "closed"
|
806 | /// "# ));
|
807 | ///
|
808 | /// // Now we can enable checks again
|
809 | /// reader.check_end_names(true);
|
810 | ///
|
811 | /// // At the end we should get an Eof event, because we ate the whole XML
|
812 | /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
|
813 | /// ```
|
814 | ///
|
815 | /// [`Start`]: Event::Start
|
816 | /// [`decoder()`]: Reader::decoder()
|
817 | #[inline ]
|
818 | pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
|
819 | self.reader.read_text(end)
|
820 | }
|
821 | }
|
822 |
|
823 | impl<R> Deref for NsReader<R> {
|
824 | type Target = Reader<R>;
|
825 |
|
826 | #[inline ]
|
827 | fn deref(&self) -> &Self::Target {
|
828 | &self.reader
|
829 | }
|
830 | }
|
831 | |