| 1 | //! A reader that manages namespace declarations found in the input and able
|
| 2 | //! to resolve [qualified names] to [expanded names].
|
| 3 | //!
|
| 4 | //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
|
| 5 | //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
|
| 6 |
|
| 7 | use std::borrow::Cow;
|
| 8 | use std::fs::File;
|
| 9 | use std::io::{BufRead, BufReader};
|
| 10 | use std::ops::Deref;
|
| 11 | use std::path::Path;
|
| 12 |
|
| 13 | use crate::errors::Result;
|
| 14 | use crate::events::Event;
|
| 15 | use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
|
| 16 | use crate::reader::{Reader, Span, XmlSource};
|
| 17 |
|
| 18 | /// A low level encoding-agnostic XML event reader that performs namespace resolution.
|
| 19 | ///
|
| 20 | /// Consumes a [`BufRead`] and streams XML `Event`s.
|
| 21 | pub struct NsReader<R> {
|
| 22 | /// An XML reader
|
| 23 | pub(super) reader: Reader<R>,
|
| 24 | /// Buffer that contains names of namespace prefixes (the part between `xmlns:`
|
| 25 | /// and an `=`) and namespace values.
|
| 26 | buffer: Vec<u8>,
|
| 27 | /// A buffer to manage namespaces
|
| 28 | ns_resolver: NamespaceResolver,
|
| 29 | /// We cannot pop data from the namespace stack until returned `Empty` or `End`
|
| 30 | /// event will be processed by the user, so we only mark that we should that
|
| 31 | /// in the next [`Self::read_event_impl()`] call.
|
| 32 | pending_pop: bool,
|
| 33 | }
|
| 34 |
|
| 35 | /// Builder methods
|
| 36 | impl<R> NsReader<R> {
|
| 37 | /// Creates a `NsReader` that reads from a reader.
|
| 38 | #[inline ]
|
| 39 | pub fn from_reader(reader: R) -> Self {
|
| 40 | Self::new(Reader::from_reader(reader))
|
| 41 | }
|
| 42 |
|
| 43 | configure_methods!(reader);
|
| 44 | }
|
| 45 |
|
| 46 | /// Private methods
|
| 47 | impl<R> NsReader<R> {
|
| 48 | #[inline ]
|
| 49 | fn new(reader: Reader<R>) -> Self {
|
| 50 | Self {
|
| 51 | reader,
|
| 52 | buffer: Vec::new(),
|
| 53 | ns_resolver: NamespaceResolver::default(),
|
| 54 | pending_pop: false,
|
| 55 | }
|
| 56 | }
|
| 57 |
|
| 58 | fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
|
| 59 | where
|
| 60 | R: XmlSource<'i, B>,
|
| 61 | {
|
| 62 | self.pop();
|
| 63 | let event = self.reader.read_event_impl(buf);
|
| 64 | self.process_event(event)
|
| 65 | }
|
| 66 |
|
| 67 | pub(super) fn pop(&mut self) {
|
| 68 | if self.pending_pop {
|
| 69 | self.ns_resolver.pop(&mut self.buffer);
|
| 70 | self.pending_pop = false;
|
| 71 | }
|
| 72 | }
|
| 73 |
|
| 74 | pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
|
| 75 | match event {
|
| 76 | Ok(Event::Start(e)) => {
|
| 77 | self.ns_resolver.push(&e, &mut self.buffer);
|
| 78 | Ok(Event::Start(e))
|
| 79 | }
|
| 80 | Ok(Event::Empty(e)) => {
|
| 81 | self.ns_resolver.push(&e, &mut self.buffer);
|
| 82 | // notify next `read_event_impl()` invocation that it needs to pop this
|
| 83 | // namespace scope
|
| 84 | self.pending_pop = true;
|
| 85 | Ok(Event::Empty(e))
|
| 86 | }
|
| 87 | Ok(Event::End(e)) => {
|
| 88 | // notify next `read_event_impl()` invocation that it needs to pop this
|
| 89 | // namespace scope
|
| 90 | self.pending_pop = true;
|
| 91 | Ok(Event::End(e))
|
| 92 | }
|
| 93 | e => e,
|
| 94 | }
|
| 95 | }
|
| 96 |
|
| 97 | pub(super) fn resolve_event<'i>(
|
| 98 | &mut self,
|
| 99 | event: Result<Event<'i>>,
|
| 100 | ) -> Result<(ResolveResult, Event<'i>)> {
|
| 101 | match event {
|
| 102 | Ok(Event::Start(e)) => Ok((
|
| 103 | self.ns_resolver.find(e.name(), &self.buffer),
|
| 104 | Event::Start(e),
|
| 105 | )),
|
| 106 | Ok(Event::Empty(e)) => Ok((
|
| 107 | self.ns_resolver.find(e.name(), &self.buffer),
|
| 108 | Event::Empty(e),
|
| 109 | )),
|
| 110 | Ok(Event::End(e)) => Ok((
|
| 111 | // Comment that prevent cargo rmt
|
| 112 | self.ns_resolver.find(e.name(), &self.buffer),
|
| 113 | Event::End(e),
|
| 114 | )),
|
| 115 | Ok(e) => Ok((ResolveResult::Unbound, e)),
|
| 116 | Err(e) => Err(e),
|
| 117 | }
|
| 118 | }
|
| 119 | }
|
| 120 |
|
| 121 | /// Getters
|
| 122 | impl<R> NsReader<R> {
|
| 123 | /// Consumes `NsReader` returning the underlying reader
|
| 124 | ///
|
| 125 | /// See the [`Reader::into_inner`] for examples
|
| 126 | #[inline ]
|
| 127 | pub fn into_inner(self) -> R {
|
| 128 | self.reader.into_inner()
|
| 129 | }
|
| 130 |
|
| 131 | /// Gets a mutable reference to the underlying reader.
|
| 132 | pub fn get_mut(&mut self) -> &mut R {
|
| 133 | self.reader.get_mut()
|
| 134 | }
|
| 135 |
|
| 136 | /// Resolves a potentially qualified **element name** or **attribute name**
|
| 137 | /// into _(namespace name, local name)_.
|
| 138 | ///
|
| 139 | /// _Qualified_ names have the form `prefix:local-name` where the `prefix`
|
| 140 | /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
| 141 | /// The namespace prefix can be defined on the same element as the name in question.
|
| 142 | ///
|
| 143 | /// The method returns following results depending on the `name` shape,
|
| 144 | /// `attribute` flag and the presence of the default namespace:
|
| 145 | ///
|
| 146 | /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
|
| 147 | /// |---------|-------------|-------------------|-----------------------|------------
|
| 148 | /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
| 149 | /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name`
|
| 150 | /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
| 151 | /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
|
| 152 | /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
| 153 | /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
| 154 | ///
|
| 155 | /// If you want to clearly indicate that name that you resolve is an element
|
| 156 | /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
|
| 157 | /// methods.
|
| 158 | ///
|
| 159 | /// # Lifetimes
|
| 160 | ///
|
| 161 | /// - `'n`: lifetime of a name. Returned local name will be bound to the same
|
| 162 | /// lifetime as the name in question.
|
| 163 | /// - returned namespace name will be bound to the reader itself
|
| 164 | ///
|
| 165 | /// [`Bound`]: ResolveResult::Bound
|
| 166 | /// [`Unbound`]: ResolveResult::Unbound
|
| 167 | /// [`Unknown`]: ResolveResult::Unknown
|
| 168 | /// [`resolve_attribute()`]: Self::resolve_attribute()
|
| 169 | /// [`resolve_element()`]: Self::resolve_element()
|
| 170 | #[inline ]
|
| 171 | pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) {
|
| 172 | self.ns_resolver.resolve(name, &self.buffer, !attribute)
|
| 173 | }
|
| 174 |
|
| 175 | /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
|
| 176 | ///
|
| 177 | /// _Qualified_ element names have the form `prefix:local-name` where the
|
| 178 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
| 179 | /// The namespace prefix can be defined on the same element as the element
|
| 180 | /// in question.
|
| 181 | ///
|
| 182 | /// _Unqualified_ elements inherits the current _default namespace_.
|
| 183 | ///
|
| 184 | /// The method returns following results depending on the `name` shape and
|
| 185 | /// the presence of the default namespace:
|
| 186 | ///
|
| 187 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
| 188 | /// |-------------|-------------------|-----------------------|------------
|
| 189 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
| 190 | /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
|
| 191 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
| 192 | ///
|
| 193 | /// # Lifetimes
|
| 194 | ///
|
| 195 | /// - `'n`: lifetime of an element name. Returned local name will be bound
|
| 196 | /// to the same lifetime as the name in question.
|
| 197 | /// - returned namespace name will be bound to the reader itself
|
| 198 | ///
|
| 199 | /// # Examples
|
| 200 | ///
|
| 201 | /// This example shows how you can resolve qualified name into a namespace.
|
| 202 | /// Note, that in the code like this you do not need to do that manually,
|
| 203 | /// because the namespace resolution result returned by the [`read_resolved_event()`].
|
| 204 | ///
|
| 205 | /// ```
|
| 206 | /// # use pretty_assertions::assert_eq;
|
| 207 | /// use quick_xml::events::Event;
|
| 208 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
| 209 | /// use quick_xml::reader::NsReader;
|
| 210 | ///
|
| 211 | /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>" );
|
| 212 | ///
|
| 213 | /// match reader.read_event().unwrap() {
|
| 214 | /// Event::Empty(e) => assert_eq!(
|
| 215 | /// reader.resolve_element(e.name()),
|
| 216 | /// (Bound(Namespace(b"root namespace" )), QName(b"tag" ).into())
|
| 217 | /// ),
|
| 218 | /// _ => unreachable!(),
|
| 219 | /// }
|
| 220 | /// ```
|
| 221 | ///
|
| 222 | /// [`Bound`]: ResolveResult::Bound
|
| 223 | /// [`Unbound`]: ResolveResult::Unbound
|
| 224 | /// [`Unknown`]: ResolveResult::Unknown
|
| 225 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
| 226 | #[inline ]
|
| 227 | pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
| 228 | self.ns_resolver.resolve(name, &self.buffer, true)
|
| 229 | }
|
| 230 |
|
| 231 | /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
|
| 232 | ///
|
| 233 | /// _Qualified_ attribute names have the form `prefix:local-name` where the
|
| 234 | /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
|
| 235 | /// The namespace prefix can be defined on the same element as the attribute
|
| 236 | /// in question.
|
| 237 | ///
|
| 238 | /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
|
| 239 | ///
|
| 240 | /// The method returns following results depending on the `name` shape and
|
| 241 | /// the presence of the default namespace:
|
| 242 | ///
|
| 243 | /// |`xmlns="..."`|QName |ResolveResult |LocalName
|
| 244 | /// |-------------|-------------------|-----------------------|------------
|
| 245 | /// |Not defined |`local-name` |[`Unbound`] |`local-name`
|
| 246 | /// |Defined |`local-name` |[`Unbound`] |`local-name`
|
| 247 | /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
|
| 248 | ///
|
| 249 | /// # Lifetimes
|
| 250 | ///
|
| 251 | /// - `'n`: lifetime of an attribute name. Returned local name will be bound
|
| 252 | /// to the same lifetime as the name in question.
|
| 253 | /// - returned namespace name will be bound to the reader itself
|
| 254 | ///
|
| 255 | /// # Examples
|
| 256 | ///
|
| 257 | /// ```
|
| 258 | /// # use pretty_assertions::assert_eq;
|
| 259 | /// use quick_xml::events::Event;
|
| 260 | /// use quick_xml::events::attributes::Attribute;
|
| 261 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
| 262 | /// use quick_xml::reader::NsReader;
|
| 263 | ///
|
| 264 | /// let mut reader = NsReader::from_str("
|
| 265 | /// <tag one='1'
|
| 266 | /// p:two='2'
|
| 267 | /// xmlns='root namespace'
|
| 268 | /// xmlns:p='other namespace'/>
|
| 269 | /// " );
|
| 270 | /// reader.trim_text(true);
|
| 271 | ///
|
| 272 | /// match reader.read_event().unwrap() {
|
| 273 | /// Event::Empty(e) => {
|
| 274 | /// let mut iter = e.attributes();
|
| 275 | ///
|
| 276 | /// // Unlike elements, attributes without explicit namespace
|
| 277 | /// // not bound to any namespace
|
| 278 | /// let one = iter.next().unwrap().unwrap();
|
| 279 | /// assert_eq!(
|
| 280 | /// reader.resolve_attribute(one.key),
|
| 281 | /// (Unbound, QName(b"one" ).into())
|
| 282 | /// );
|
| 283 | ///
|
| 284 | /// let two = iter.next().unwrap().unwrap();
|
| 285 | /// assert_eq!(
|
| 286 | /// reader.resolve_attribute(two.key),
|
| 287 | /// (Bound(Namespace(b"other namespace" )), QName(b"two" ).into())
|
| 288 | /// );
|
| 289 | /// }
|
| 290 | /// _ => unreachable!(),
|
| 291 | /// }
|
| 292 | /// ```
|
| 293 | ///
|
| 294 | /// [`Bound`]: ResolveResult::Bound
|
| 295 | /// [`Unbound`]: ResolveResult::Unbound
|
| 296 | /// [`Unknown`]: ResolveResult::Unknown
|
| 297 | #[inline ]
|
| 298 | pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
|
| 299 | self.ns_resolver.resolve(name, &self.buffer, false)
|
| 300 | }
|
| 301 | }
|
| 302 |
|
| 303 | impl<R: BufRead> NsReader<R> {
|
| 304 | /// Reads the next event into given buffer.
|
| 305 | ///
|
| 306 | /// This method manages namespaces but doesn't resolve them automatically.
|
| 307 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
| 308 | ///
|
| 309 | /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
|
| 310 | /// namespace as soon as you get an event.
|
| 311 | ///
|
| 312 | /// # Examples
|
| 313 | ///
|
| 314 | /// ```
|
| 315 | /// # use pretty_assertions::assert_eq;
|
| 316 | /// use quick_xml::events::Event;
|
| 317 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
| 318 | /// use quick_xml::reader::NsReader;
|
| 319 | ///
|
| 320 | /// let mut reader = NsReader::from_str(r#"
|
| 321 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
| 322 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
| 323 | /// <y:tag2>Test 2</y:tag2>
|
| 324 | /// </x:tag1>
|
| 325 | /// "# );
|
| 326 | /// reader.trim_text(true);
|
| 327 | ///
|
| 328 | /// let mut count = 0;
|
| 329 | /// let mut buf = Vec::new();
|
| 330 | /// let mut txt = Vec::new();
|
| 331 | /// loop {
|
| 332 | /// match reader.read_event_into(&mut buf).unwrap() {
|
| 333 | /// Event::Start(e) => {
|
| 334 | /// count += 1;
|
| 335 | /// let (ns, local) = reader.resolve_element(e.name());
|
| 336 | /// match local.as_ref() {
|
| 337 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
| 338 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
| 339 | /// _ => unreachable!(),
|
| 340 | /// }
|
| 341 | /// }
|
| 342 | /// Event::Text(e) => {
|
| 343 | /// txt.push(e.unescape().unwrap().into_owned())
|
| 344 | /// }
|
| 345 | /// Event::Eof => break,
|
| 346 | /// _ => (),
|
| 347 | /// }
|
| 348 | /// buf.clear();
|
| 349 | /// }
|
| 350 | /// assert_eq!(count, 3);
|
| 351 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
| 352 | /// ```
|
| 353 | ///
|
| 354 | /// [`resolve_element()`]: Self::resolve_element
|
| 355 | /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
|
| 356 | #[inline ]
|
| 357 | pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
|
| 358 | self.read_event_impl(buf)
|
| 359 | }
|
| 360 |
|
| 361 | /// Reads the next event into given buffer and resolves its namespace (if applicable).
|
| 362 | ///
|
| 363 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
| 364 | /// For all other events the concept of namespace is not defined, so
|
| 365 | /// a [`ResolveResult::Unbound`] is returned.
|
| 366 | ///
|
| 367 | /// If you are not interested in namespaces, you can use [`read_event_into()`]
|
| 368 | /// which will not automatically resolve namespaces for you.
|
| 369 | ///
|
| 370 | /// # Examples
|
| 371 | ///
|
| 372 | /// ```
|
| 373 | /// # use pretty_assertions::assert_eq;
|
| 374 | /// use quick_xml::events::Event;
|
| 375 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
| 376 | /// use quick_xml::reader::NsReader;
|
| 377 | ///
|
| 378 | /// let mut reader = NsReader::from_str(r#"
|
| 379 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
| 380 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
| 381 | /// <y:tag2>Test 2</y:tag2>
|
| 382 | /// </x:tag1>
|
| 383 | /// "# );
|
| 384 | /// reader.trim_text(true);
|
| 385 | ///
|
| 386 | /// let mut count = 0;
|
| 387 | /// let mut buf = Vec::new();
|
| 388 | /// let mut txt = Vec::new();
|
| 389 | /// loop {
|
| 390 | /// match reader.read_resolved_event_into(&mut buf).unwrap() {
|
| 391 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
| 392 | /// count += 1;
|
| 393 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
| 394 | /// }
|
| 395 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
| 396 | /// count += 1;
|
| 397 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
| 398 | /// }
|
| 399 | /// (_, Event::Start(_)) => unreachable!(),
|
| 400 | ///
|
| 401 | /// (_, Event::Text(e)) => {
|
| 402 | /// txt.push(e.unescape().unwrap().into_owned())
|
| 403 | /// }
|
| 404 | /// (_, Event::Eof) => break,
|
| 405 | /// _ => (),
|
| 406 | /// }
|
| 407 | /// buf.clear();
|
| 408 | /// }
|
| 409 | /// assert_eq!(count, 3);
|
| 410 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
| 411 | /// ```
|
| 412 | ///
|
| 413 | /// [`Start`]: Event::Start
|
| 414 | /// [`Empty`]: Event::Empty
|
| 415 | /// [`End`]: Event::End
|
| 416 | /// [`read_event_into()`]: Self::read_event_into
|
| 417 | #[inline ]
|
| 418 | pub fn read_resolved_event_into<'b>(
|
| 419 | &mut self,
|
| 420 | buf: &'b mut Vec<u8>,
|
| 421 | ) -> Result<(ResolveResult, Event<'b>)> {
|
| 422 | let event = self.read_event_impl(buf);
|
| 423 | self.resolve_event(event)
|
| 424 | }
|
| 425 |
|
| 426 | /// Reads until end element is found using provided buffer as intermediate
|
| 427 | /// storage for events content. This function is supposed to be called after
|
| 428 | /// you already read a [`Start`] event.
|
| 429 | ///
|
| 430 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
| 431 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
| 432 | /// this method was called after reading expanded [`Start`] event.
|
| 433 | ///
|
| 434 | /// Manages nested cases where parent and child elements have the _literally_
|
| 435 | /// same name.
|
| 436 | ///
|
| 437 | /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
|
| 438 | /// will be returned. In particularly, that error will be returned if you call
|
| 439 | /// this method without consuming the corresponding [`Start`] event first.
|
| 440 | ///
|
| 441 | /// If your reader created from a string slice or byte array slice, it is
|
| 442 | /// better to use [`read_to_end()`] method, because it will not copy bytes
|
| 443 | /// into intermediate buffer.
|
| 444 | ///
|
| 445 | /// The provided `buf` buffer will be filled only by one event content at time.
|
| 446 | /// Before reading of each event the buffer will be cleared. If you know an
|
| 447 | /// appropriate size of each event, you can preallocate the buffer to reduce
|
| 448 | /// number of reallocations.
|
| 449 | ///
|
| 450 | /// The `end` parameter should contain name of the end element _in the reader
|
| 451 | /// encoding_. It is good practice to always get that parameter using
|
| 452 | /// [`BytesStart::to_end()`] method.
|
| 453 | ///
|
| 454 | /// # Namespaces
|
| 455 | ///
|
| 456 | /// While the `NsReader` does namespace resolution, namespaces does not
|
| 457 | /// change the algorithm for comparing names. Although the names `a:name`
|
| 458 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
| 459 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
| 460 | /// according to [the specification]
|
| 461 | ///
|
| 462 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
| 463 | /// > by an **end-tag** containing a name that echoes the element's type as
|
| 464 | /// > given in the **start-tag**
|
| 465 | ///
|
| 466 | /// # Examples
|
| 467 | ///
|
| 468 | /// This example shows, how you can skip XML content after you read the
|
| 469 | /// start event.
|
| 470 | ///
|
| 471 | /// ```
|
| 472 | /// # use pretty_assertions::assert_eq;
|
| 473 | /// use quick_xml::events::{BytesStart, Event};
|
| 474 | /// use quick_xml::name::{Namespace, ResolveResult};
|
| 475 | /// use quick_xml::reader::NsReader;
|
| 476 | ///
|
| 477 | /// let mut reader = NsReader::from_str(r#"
|
| 478 | /// <outer xmlns="namespace 1">
|
| 479 | /// <inner xmlns="namespace 2">
|
| 480 | /// <outer></outer>
|
| 481 | /// </inner>
|
| 482 | /// <inner>
|
| 483 | /// <inner></inner>
|
| 484 | /// <inner/>
|
| 485 | /// <outer></outer>
|
| 486 | /// <p:outer xmlns:p="ns"></p:outer>
|
| 487 | /// <outer/>
|
| 488 | /// </inner>
|
| 489 | /// </outer>
|
| 490 | /// "# );
|
| 491 | /// reader.trim_text(true);
|
| 492 | /// let mut buf = Vec::new();
|
| 493 | ///
|
| 494 | /// let ns = Namespace(b"namespace 1" );
|
| 495 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
| 496 | /// let end = start.to_end().into_owned();
|
| 497 | ///
|
| 498 | /// // First, we read a start event...
|
| 499 | /// assert_eq!(
|
| 500 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
| 501 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
| 502 | /// );
|
| 503 | ///
|
| 504 | /// // ...then, we could skip all events to the corresponding end event.
|
| 505 | /// // This call will correctly handle nested <outer> elements.
|
| 506 | /// // Note, however, that this method does not handle namespaces.
|
| 507 | /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
|
| 508 | ///
|
| 509 | /// // At the end we should get an Eof event, because we ate the whole XML
|
| 510 | /// assert_eq!(
|
| 511 | /// reader.read_resolved_event_into(&mut buf).unwrap(),
|
| 512 | /// (ResolveResult::Unbound, Event::Eof)
|
| 513 | /// );
|
| 514 | /// ```
|
| 515 | ///
|
| 516 | /// [`Start`]: Event::Start
|
| 517 | /// [`End`]: Event::End
|
| 518 | /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
|
| 519 | /// [`read_to_end()`]: Self::read_to_end
|
| 520 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
| 521 | /// [`expand_empty_elements`]: Self::expand_empty_elements
|
| 522 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
| 523 | #[inline ]
|
| 524 | pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
|
| 525 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
| 526 | // match literally the start name. See `Self::check_end_names` documentation
|
| 527 | self.reader.read_to_end_into(end, buf)
|
| 528 | }
|
| 529 | }
|
| 530 |
|
| 531 | impl NsReader<BufReader<File>> {
|
| 532 | /// Creates an XML reader from a file path.
|
| 533 | pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
| 534 | Ok(Self::new(Reader::from_file(path)?))
|
| 535 | }
|
| 536 | }
|
| 537 |
|
| 538 | impl<'i> NsReader<&'i [u8]> {
|
| 539 | /// Creates an XML reader from a string slice.
|
| 540 | #[inline ]
|
| 541 | #[allow (clippy::should_implement_trait)]
|
| 542 | pub fn from_str(s: &'i str) -> Self {
|
| 543 | Self::new(Reader::from_str(s))
|
| 544 | }
|
| 545 |
|
| 546 | /// Reads the next event, borrow its content from the input buffer.
|
| 547 | ///
|
| 548 | /// This method manages namespaces but doesn't resolve them automatically.
|
| 549 | /// You should call [`resolve_element()`] if you want to get a namespace.
|
| 550 | ///
|
| 551 | /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
|
| 552 | /// as soon as you get an event.
|
| 553 | ///
|
| 554 | /// There is no asynchronous `read_event_async()` version of this function,
|
| 555 | /// because it is not necessary -- the contents are already in memory and no IO
|
| 556 | /// is needed, therefore there is no potential for blocking.
|
| 557 | ///
|
| 558 | /// # Examples
|
| 559 | ///
|
| 560 | /// ```
|
| 561 | /// # use pretty_assertions::assert_eq;
|
| 562 | /// use quick_xml::events::Event;
|
| 563 | /// use quick_xml::name::{Namespace, ResolveResult::*};
|
| 564 | /// use quick_xml::reader::NsReader;
|
| 565 | ///
|
| 566 | /// let mut reader = NsReader::from_str(r#"
|
| 567 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
| 568 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
| 569 | /// <y:tag2>Test 2</y:tag2>
|
| 570 | /// </x:tag1>
|
| 571 | /// "# );
|
| 572 | /// reader.trim_text(true);
|
| 573 | ///
|
| 574 | /// let mut count = 0;
|
| 575 | /// let mut txt = Vec::new();
|
| 576 | /// loop {
|
| 577 | /// match reader.read_event().unwrap() {
|
| 578 | /// Event::Start(e) => {
|
| 579 | /// count += 1;
|
| 580 | /// let (ns, local) = reader.resolve_element(e.name());
|
| 581 | /// match local.as_ref() {
|
| 582 | /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx" ))),
|
| 583 | /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy" ))),
|
| 584 | /// _ => unreachable!(),
|
| 585 | /// }
|
| 586 | /// }
|
| 587 | /// Event::Text(e) => {
|
| 588 | /// txt.push(e.unescape().unwrap().into_owned())
|
| 589 | /// }
|
| 590 | /// Event::Eof => break,
|
| 591 | /// _ => (),
|
| 592 | /// }
|
| 593 | /// }
|
| 594 | /// assert_eq!(count, 3);
|
| 595 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
| 596 | /// ```
|
| 597 | ///
|
| 598 | /// [`resolve_element()`]: Self::resolve_element
|
| 599 | /// [`read_resolved_event()`]: Self::read_resolved_event
|
| 600 | #[inline ]
|
| 601 | pub fn read_event(&mut self) -> Result<Event<'i>> {
|
| 602 | self.read_event_impl(())
|
| 603 | }
|
| 604 |
|
| 605 | /// Reads the next event, borrow its content from the input buffer, and resolves
|
| 606 | /// its namespace (if applicable).
|
| 607 | ///
|
| 608 | /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
|
| 609 | /// For all other events the concept of namespace is not defined, so
|
| 610 | /// a [`ResolveResult::Unbound`] is returned.
|
| 611 | ///
|
| 612 | /// If you are not interested in namespaces, you can use [`read_event()`]
|
| 613 | /// which will not automatically resolve namespaces for you.
|
| 614 | ///
|
| 615 | /// There is no asynchronous `read_resolved_event_async()` version of this function,
|
| 616 | /// because it is not necessary -- the contents are already in memory and no IO
|
| 617 | /// is needed, therefore there is no potential for blocking.
|
| 618 | ///
|
| 619 | /// # Examples
|
| 620 | ///
|
| 621 | /// ```
|
| 622 | /// # use pretty_assertions::assert_eq;
|
| 623 | /// use quick_xml::events::Event;
|
| 624 | /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
|
| 625 | /// use quick_xml::reader::NsReader;
|
| 626 | ///
|
| 627 | /// let mut reader = NsReader::from_str(r#"
|
| 628 | /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
|
| 629 | /// <y:tag2><!--Test comment-->Test</y:tag2>
|
| 630 | /// <y:tag2>Test 2</y:tag2>
|
| 631 | /// </x:tag1>
|
| 632 | /// "# );
|
| 633 | /// reader.trim_text(true);
|
| 634 | ///
|
| 635 | /// let mut count = 0;
|
| 636 | /// let mut txt = Vec::new();
|
| 637 | /// loop {
|
| 638 | /// match reader.read_resolved_event().unwrap() {
|
| 639 | /// (Bound(Namespace(b"www.xxxx" )), Event::Start(e)) => {
|
| 640 | /// count += 1;
|
| 641 | /// assert_eq!(e.local_name(), QName(b"tag1" ).into());
|
| 642 | /// }
|
| 643 | /// (Bound(Namespace(b"www.yyyy" )), Event::Start(e)) => {
|
| 644 | /// count += 1;
|
| 645 | /// assert_eq!(e.local_name(), QName(b"tag2" ).into());
|
| 646 | /// }
|
| 647 | /// (_, Event::Start(_)) => unreachable!(),
|
| 648 | ///
|
| 649 | /// (_, Event::Text(e)) => {
|
| 650 | /// txt.push(e.unescape().unwrap().into_owned())
|
| 651 | /// }
|
| 652 | /// (_, Event::Eof) => break,
|
| 653 | /// _ => (),
|
| 654 | /// }
|
| 655 | /// }
|
| 656 | /// assert_eq!(count, 3);
|
| 657 | /// assert_eq!(txt, vec!["Test" .to_string(), "Test 2" .to_string()]);
|
| 658 | /// ```
|
| 659 | ///
|
| 660 | /// [`Start`]: Event::Start
|
| 661 | /// [`Empty`]: Event::Empty
|
| 662 | /// [`End`]: Event::End
|
| 663 | /// [`read_event()`]: Self::read_event
|
| 664 | #[inline ]
|
| 665 | pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> {
|
| 666 | let event = self.read_event_impl(());
|
| 667 | self.resolve_event(event)
|
| 668 | }
|
| 669 |
|
| 670 | /// Reads until end element is found. This function is supposed to be called
|
| 671 | /// after you already read a [`Start`] event.
|
| 672 | ///
|
| 673 | /// Returns a span that cover content between `>` of an opening tag and `<` of
|
| 674 | /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
|
| 675 | /// this method was called after reading expanded [`Start`] event.
|
| 676 | ///
|
| 677 | /// Manages nested cases where parent and child elements have the _literally_
|
| 678 | /// same name.
|
| 679 | ///
|
| 680 | /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`]
|
| 681 | /// will be returned. In particularly, that error will be returned if you call
|
| 682 | /// this method without consuming the corresponding [`Start`] event first.
|
| 683 | ///
|
| 684 | /// The `end` parameter should contain name of the end element _in the reader
|
| 685 | /// encoding_. It is good practice to always get that parameter using
|
| 686 | /// [`BytesStart::to_end()`] method.
|
| 687 | ///
|
| 688 | /// There is no asynchronous `read_to_end_async()` version of this function,
|
| 689 | /// because it is not necessary -- the contents are already in memory and no IO
|
| 690 | /// is needed, therefore there is no potential for blocking.
|
| 691 | ///
|
| 692 | /// # Namespaces
|
| 693 | ///
|
| 694 | /// While the `NsReader` does namespace resolution, namespaces does not
|
| 695 | /// change the algorithm for comparing names. Although the names `a:name`
|
| 696 | /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
|
| 697 | /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
|
| 698 | /// according to [the specification]
|
| 699 | ///
|
| 700 | /// > The end of every element that begins with a **start-tag** MUST be marked
|
| 701 | /// > by an **end-tag** containing a name that echoes the element's type as
|
| 702 | /// > given in the **start-tag**
|
| 703 | ///
|
| 704 | /// # Examples
|
| 705 | ///
|
| 706 | /// This example shows, how you can skip XML content after you read the
|
| 707 | /// start event.
|
| 708 | ///
|
| 709 | /// ```
|
| 710 | /// # use pretty_assertions::assert_eq;
|
| 711 | /// use quick_xml::events::{BytesStart, Event};
|
| 712 | /// use quick_xml::name::{Namespace, ResolveResult};
|
| 713 | /// use quick_xml::reader::NsReader;
|
| 714 | ///
|
| 715 | /// let mut reader = NsReader::from_str(r#"
|
| 716 | /// <outer xmlns="namespace 1">
|
| 717 | /// <inner xmlns="namespace 2">
|
| 718 | /// <outer></outer>
|
| 719 | /// </inner>
|
| 720 | /// <inner>
|
| 721 | /// <inner></inner>
|
| 722 | /// <inner/>
|
| 723 | /// <outer></outer>
|
| 724 | /// <p:outer xmlns:p="ns"></p:outer>
|
| 725 | /// <outer/>
|
| 726 | /// </inner>
|
| 727 | /// </outer>
|
| 728 | /// "# );
|
| 729 | /// reader.trim_text(true);
|
| 730 | ///
|
| 731 | /// let ns = Namespace(b"namespace 1" );
|
| 732 | /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""# , 5);
|
| 733 | /// let end = start.to_end().into_owned();
|
| 734 | ///
|
| 735 | /// // First, we read a start event...
|
| 736 | /// assert_eq!(
|
| 737 | /// reader.read_resolved_event().unwrap(),
|
| 738 | /// (ResolveResult::Bound(ns), Event::Start(start))
|
| 739 | /// );
|
| 740 | ///
|
| 741 | /// // ...then, we could skip all events to the corresponding end event.
|
| 742 | /// // This call will correctly handle nested <outer> elements.
|
| 743 | /// // Note, however, that this method does not handle namespaces.
|
| 744 | /// reader.read_to_end(end.name()).unwrap();
|
| 745 | ///
|
| 746 | /// // At the end we should get an Eof event, because we ate the whole XML
|
| 747 | /// assert_eq!(
|
| 748 | /// reader.read_resolved_event().unwrap(),
|
| 749 | /// (ResolveResult::Unbound, Event::Eof)
|
| 750 | /// );
|
| 751 | /// ```
|
| 752 | ///
|
| 753 | /// [`Start`]: Event::Start
|
| 754 | /// [`End`]: Event::End
|
| 755 | /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
|
| 756 | /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
|
| 757 | /// [`expand_empty_elements`]: Self::expand_empty_elements
|
| 758 | /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
|
| 759 | #[inline ]
|
| 760 | pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
|
| 761 | // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
|
| 762 | // match literally the start name. See `Self::check_end_names` documentation
|
| 763 | self.reader.read_to_end(end)
|
| 764 | }
|
| 765 |
|
| 766 | /// Reads content between start and end tags, including any markup. This
|
| 767 | /// function is supposed to be called after you already read a [`Start`] event.
|
| 768 | ///
|
| 769 | /// Manages nested cases where parent and child elements have the _literally_
|
| 770 | /// same name.
|
| 771 | ///
|
| 772 | /// This method does not unescape read data, instead it returns content
|
| 773 | /// "as is" of the XML document. This is because it has no idea what text
|
| 774 | /// it reads, and if, for example, it contains CDATA section, attempt to
|
| 775 | /// unescape it content will spoil data.
|
| 776 | ///
|
| 777 | /// Any text will be decoded using the XML current [`decoder()`].
|
| 778 | ///
|
| 779 | /// Actually, this method perform the following code:
|
| 780 | ///
|
| 781 | /// ```ignore
|
| 782 | /// let span = reader.read_to_end(end)?;
|
| 783 | /// let text = reader.decoder().decode(&reader.inner_slice[span]);
|
| 784 | /// ```
|
| 785 | ///
|
| 786 | /// # Examples
|
| 787 | ///
|
| 788 | /// This example shows, how you can read a HTML content from your XML document.
|
| 789 | ///
|
| 790 | /// ```
|
| 791 | /// # use pretty_assertions::assert_eq;
|
| 792 | /// # use std::borrow::Cow;
|
| 793 | /// use quick_xml::events::{BytesStart, Event};
|
| 794 | /// use quick_xml::reader::NsReader;
|
| 795 | ///
|
| 796 | /// let mut reader = NsReader::from_str(r#"
|
| 797 | /// <html>
|
| 798 | /// <title>This is a HTML text</title>
|
| 799 | /// <p>Usual XML rules does not apply inside it
|
| 800 | /// <p>For example, elements not needed to be "closed"
|
| 801 | /// </html>
|
| 802 | /// "# );
|
| 803 | /// reader.trim_text(true);
|
| 804 | ///
|
| 805 | /// let start = BytesStart::new("html" );
|
| 806 | /// let end = start.to_end().into_owned();
|
| 807 | ///
|
| 808 | /// // First, we read a start event...
|
| 809 | /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
|
| 810 | /// // ...and disable checking of end names because we expect HTML further...
|
| 811 | /// reader.check_end_names(false);
|
| 812 | ///
|
| 813 | /// // ...then, we could read text content until close tag.
|
| 814 | /// // This call will correctly handle nested <html> elements.
|
| 815 | /// let text = reader.read_text(end.name()).unwrap();
|
| 816 | /// assert_eq!(text, Cow::Borrowed(r#"
|
| 817 | /// <title>This is a HTML text</title>
|
| 818 | /// <p>Usual XML rules does not apply inside it
|
| 819 | /// <p>For example, elements not needed to be "closed"
|
| 820 | /// "# ));
|
| 821 | ///
|
| 822 | /// // Now we can enable checks again
|
| 823 | /// reader.check_end_names(true);
|
| 824 | ///
|
| 825 | /// // At the end we should get an Eof event, because we ate the whole XML
|
| 826 | /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
|
| 827 | /// ```
|
| 828 | ///
|
| 829 | /// [`Start`]: Event::Start
|
| 830 | /// [`decoder()`]: Reader::decoder()
|
| 831 | #[inline ]
|
| 832 | pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
|
| 833 | self.reader.read_text(end)
|
| 834 | }
|
| 835 | }
|
| 836 |
|
| 837 | impl<R> Deref for NsReader<R> {
|
| 838 | type Target = Reader<R>;
|
| 839 |
|
| 840 | #[inline ]
|
| 841 | fn deref(&self) -> &Self::Target {
|
| 842 | &self.reader
|
| 843 | }
|
| 844 | }
|
| 845 | |