| 1 | //! Basic types to build the parsers |
| 2 | |
| 3 | use self::Needed::*; |
| 4 | use crate::error::{self, ErrorKind}; |
| 5 | use crate::lib::std::fmt; |
| 6 | use core::num::NonZeroUsize; |
| 7 | |
| 8 | /// Holds the result of parsing functions |
| 9 | /// |
| 10 | /// It depends on the input type `I`, the output type `O`, and the error type `E` |
| 11 | /// (by default `(I, nom::ErrorKind)`) |
| 12 | /// |
| 13 | /// The `Ok` side is a pair containing the remainder of the input (the part of the data that |
| 14 | /// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`. |
| 15 | /// |
| 16 | /// Outside of the parsing code, you can use the [Finish::finish] method to convert |
| 17 | /// it to a more common result type |
| 18 | pub type IResult<I, O, E = error::Error<I>> = Result<(I, O), Err<E>>; |
| 19 | |
| 20 | /// Helper trait to convert a parser's result to a more manageable type |
| 21 | pub trait Finish<I, O, E> { |
| 22 | /// converts the parser's result to a type that is more consumable by error |
| 23 | /// management libraries. It keeps the same `Ok` branch, and merges `Err::Error` |
| 24 | /// and `Err::Failure` into the `Err` side. |
| 25 | /// |
| 26 | /// *warning*: if the result is `Err(Err::Incomplete(_))`, this method will panic. |
| 27 | /// - "complete" parsers: It will not be an issue, `Incomplete` is never used |
| 28 | /// - "streaming" parsers: `Incomplete` will be returned if there's not enough data |
| 29 | /// for the parser to decide, and you should gather more data before parsing again. |
| 30 | /// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`, |
| 31 | /// you can get out of the parsing loop and call `finish()` on the parser's result |
| 32 | fn finish(self) -> Result<(I, O), E>; |
| 33 | } |
| 34 | |
| 35 | impl<I, O, E> Finish<I, O, E> for IResult<I, O, E> { |
| 36 | fn finish(self) -> Result<(I, O), E> { |
| 37 | match self { |
| 38 | Ok(res: (I, O)) => Ok(res), |
| 39 | Err(Err::Error(e: E)) | Err(Err::Failure(e: E)) => Err(e), |
| 40 | Err(Err::Incomplete(_)) => { |
| 41 | panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply the parser instead" ) |
| 42 | } |
| 43 | } |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | /// Contains information on needed data if a parser returned `Incomplete` |
| 48 | #[derive (Debug, PartialEq, Eq, Clone, Copy)] |
| 49 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 50 | pub enum Needed { |
| 51 | /// Needs more data, but we do not know how much |
| 52 | Unknown, |
| 53 | /// Contains the required data size in bytes |
| 54 | Size(NonZeroUsize), |
| 55 | } |
| 56 | |
| 57 | impl Needed { |
| 58 | /// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero |
| 59 | pub fn new(s: usize) -> Self { |
| 60 | match NonZeroUsize::new(s) { |
| 61 | Some(sz: NonZero) => Needed::Size(sz), |
| 62 | None => Needed::Unknown, |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | /// Indicates if we know how many bytes we need |
| 67 | pub fn is_known(&self) -> bool { |
| 68 | *self != Unknown |
| 69 | } |
| 70 | |
| 71 | /// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value. |
| 72 | #[inline ] |
| 73 | pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed { |
| 74 | match self { |
| 75 | Unknown => Unknown, |
| 76 | Size(n: NonZero) => Needed::new(f(n)), |
| 77 | } |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | /// The `Err` enum indicates the parser was not successful |
| 82 | /// |
| 83 | /// It has three cases: |
| 84 | /// |
| 85 | /// * `Incomplete` indicates that more data is needed to decide. The `Needed` enum |
| 86 | /// can contain how many additional bytes are necessary. If you are sure your parser |
| 87 | /// is working on full data, you can wrap your parser with the `complete` combinator |
| 88 | /// to transform that case in `Error` |
| 89 | /// * `Error` means some parser did not succeed, but another one might (as an example, |
| 90 | /// when testing different branches of an `alt` combinator) |
| 91 | /// * `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix |
| 92 | /// to decide on the next parser to apply, and that parser fails, you know there's no need |
| 93 | /// to try other parsers, you were already in the right branch, so the data is invalid |
| 94 | /// |
| 95 | #[derive (Debug, Clone, PartialEq)] |
| 96 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 97 | pub enum Err<E> { |
| 98 | /// There was not enough data |
| 99 | Incomplete(Needed), |
| 100 | /// The parser had an error (recoverable) |
| 101 | Error(E), |
| 102 | /// The parser had an unrecoverable error: we got to the right |
| 103 | /// branch and we know other branches won't work, so backtrack |
| 104 | /// as fast as possible |
| 105 | Failure(E), |
| 106 | } |
| 107 | |
| 108 | impl<E> Err<E> { |
| 109 | /// Tests if the result is Incomplete |
| 110 | pub fn is_incomplete(&self) -> bool { |
| 111 | if let Err::Incomplete(_) = self { |
| 112 | true |
| 113 | } else { |
| 114 | false |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | /// Applies the given function to the inner error |
| 119 | pub fn map<E2, F>(self, f: F) -> Err<E2> |
| 120 | where |
| 121 | F: FnOnce(E) -> E2, |
| 122 | { |
| 123 | match self { |
| 124 | Err::Incomplete(n) => Err::Incomplete(n), |
| 125 | Err::Failure(t) => Err::Failure(f(t)), |
| 126 | Err::Error(t) => Err::Error(f(t)), |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | /// Automatically converts between errors if the underlying type supports it |
| 131 | pub fn convert<F>(e: Err<F>) -> Self |
| 132 | where |
| 133 | E: From<F>, |
| 134 | { |
| 135 | e.map(crate::lib::std::convert::Into::into) |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | impl<T> Err<(T, ErrorKind)> { |
| 140 | /// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U` |
| 141 | pub fn map_input<U, F>(self, f: F) -> Err<(U, ErrorKind)> |
| 142 | where |
| 143 | F: FnOnce(T) -> U, |
| 144 | { |
| 145 | match self { |
| 146 | Err::Incomplete(n: Needed) => Err::Incomplete(n), |
| 147 | Err::Failure((input: T, k: ErrorKind)) => Err::Failure((f(input), k)), |
| 148 | Err::Error((input: T, k: ErrorKind)) => Err::Error((f(input), k)), |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | impl<T> Err<error::Error<T>> { |
| 154 | /// Maps `Err<error::Error<T>>` to `Err<error::Error<U>>` with the given `F: T -> U` |
| 155 | pub fn map_input<U, F>(self, f: F) -> Err<error::Error<U>> |
| 156 | where |
| 157 | F: FnOnce(T) -> U, |
| 158 | { |
| 159 | match self { |
| 160 | Err::Incomplete(n: Needed) => Err::Incomplete(n), |
| 161 | Err::Failure(error::Error { input: T, code: ErrorKind }) => Err::Failure(error::Error { |
| 162 | input: f(input), |
| 163 | code, |
| 164 | }), |
| 165 | Err::Error(error::Error { input: T, code: ErrorKind }) => Err::Error(error::Error { |
| 166 | input: f(input), |
| 167 | code, |
| 168 | }), |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | #[cfg (feature = "alloc" )] |
| 174 | use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec}; |
| 175 | #[cfg (feature = "alloc" )] |
| 176 | impl Err<(&[u8], ErrorKind)> { |
| 177 | /// Obtaining ownership |
| 178 | #[cfg_attr (feature = "docsrs" , doc(cfg(feature = "alloc" )))] |
| 179 | pub fn to_owned(self) -> Err<(Vec<u8>, ErrorKind)> { |
| 180 | self.map_input(ToOwned::to_owned) |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | #[cfg (feature = "alloc" )] |
| 185 | impl Err<(&str, ErrorKind)> { |
| 186 | /// Obtaining ownership |
| 187 | #[cfg_attr (feature = "docsrs" , doc(cfg(feature = "alloc" )))] |
| 188 | pub fn to_owned(self) -> Err<(String, ErrorKind)> { |
| 189 | self.map_input(ToOwned::to_owned) |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | #[cfg (feature = "alloc" )] |
| 194 | impl Err<error::Error<&[u8]>> { |
| 195 | /// Obtaining ownership |
| 196 | #[cfg_attr (feature = "docsrs" , doc(cfg(feature = "alloc" )))] |
| 197 | pub fn to_owned(self) -> Err<error::Error<Vec<u8>>> { |
| 198 | self.map_input(ToOwned::to_owned) |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | #[cfg (feature = "alloc" )] |
| 203 | impl Err<error::Error<&str>> { |
| 204 | /// Obtaining ownership |
| 205 | #[cfg_attr (feature = "docsrs" , doc(cfg(feature = "alloc" )))] |
| 206 | pub fn to_owned(self) -> Err<error::Error<String>> { |
| 207 | self.map_input(ToOwned::to_owned) |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | impl<E: Eq> Eq for Err<E> {} |
| 212 | |
| 213 | impl<E> fmt::Display for Err<E> |
| 214 | where |
| 215 | E: fmt::Debug, |
| 216 | { |
| 217 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 218 | match self { |
| 219 | Err::Incomplete(Needed::Size(u: &NonZero)) => write!(f, "Parsing requires {} bytes/chars" , u), |
| 220 | Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data" ), |
| 221 | Err::Failure(c: &E) => write!(f, "Parsing Failure: {:?}" , c), |
| 222 | Err::Error(c: &E) => write!(f, "Parsing Error: {:?}" , c), |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | #[cfg (feature = "std" )] |
| 228 | use std::error::Error; |
| 229 | |
| 230 | #[cfg (feature = "std" )] |
| 231 | impl<E> Error for Err<E> |
| 232 | where |
| 233 | E: fmt::Debug, |
| 234 | { |
| 235 | fn source(&self) -> Option<&(dyn Error + 'static)> { |
| 236 | None // no underlying error |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | /// All nom parsers implement this trait |
| 241 | pub trait Parser<I, O, E> { |
| 242 | /// A parser takes in input type, and returns a `Result` containing |
| 243 | /// either the remaining input and the output value, or an error |
| 244 | fn parse(&mut self, input: I) -> IResult<I, O, E>; |
| 245 | |
| 246 | /// Maps a function over the result of a parser |
| 247 | fn map<G, O2>(self, g: G) -> Map<Self, G, O> |
| 248 | where |
| 249 | G: Fn(O) -> O2, |
| 250 | Self: core::marker::Sized, |
| 251 | { |
| 252 | Map { |
| 253 | f: self, |
| 254 | g, |
| 255 | phantom: core::marker::PhantomData, |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | /// Creates a second parser from the output of the first one, then apply over the rest of the input |
| 260 | fn flat_map<G, H, O2>(self, g: G) -> FlatMap<Self, G, O> |
| 261 | where |
| 262 | G: FnMut(O) -> H, |
| 263 | H: Parser<I, O2, E>, |
| 264 | Self: core::marker::Sized, |
| 265 | { |
| 266 | FlatMap { |
| 267 | f: self, |
| 268 | g, |
| 269 | phantom: core::marker::PhantomData, |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | /// Applies a second parser over the output of the first one |
| 274 | fn and_then<G, O2>(self, g: G) -> AndThen<Self, G, O> |
| 275 | where |
| 276 | G: Parser<O, O2, E>, |
| 277 | Self: core::marker::Sized, |
| 278 | { |
| 279 | AndThen { |
| 280 | f: self, |
| 281 | g, |
| 282 | phantom: core::marker::PhantomData, |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | /// Applies a second parser after the first one, return their results as a tuple |
| 287 | fn and<G, O2>(self, g: G) -> And<Self, G> |
| 288 | where |
| 289 | G: Parser<I, O2, E>, |
| 290 | Self: core::marker::Sized, |
| 291 | { |
| 292 | And { f: self, g } |
| 293 | } |
| 294 | |
| 295 | /// Applies a second parser over the input if the first one failed |
| 296 | fn or<G>(self, g: G) -> Or<Self, G> |
| 297 | where |
| 298 | G: Parser<I, O, E>, |
| 299 | Self: core::marker::Sized, |
| 300 | { |
| 301 | Or { f: self, g } |
| 302 | } |
| 303 | |
| 304 | /// automatically converts the parser's output and error values to another type, as long as they |
| 305 | /// implement the `From` trait |
| 306 | fn into<O2: From<O>, E2: From<E>>(self) -> Into<Self, O, O2, E, E2> |
| 307 | where |
| 308 | Self: core::marker::Sized, |
| 309 | { |
| 310 | Into { |
| 311 | f: self, |
| 312 | phantom_out1: core::marker::PhantomData, |
| 313 | phantom_err1: core::marker::PhantomData, |
| 314 | phantom_out2: core::marker::PhantomData, |
| 315 | phantom_err2: core::marker::PhantomData, |
| 316 | } |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | impl<'a, I, O, E, F> Parser<I, O, E> for F |
| 321 | where |
| 322 | F: FnMut(I) -> IResult<I, O, E> + 'a, |
| 323 | { |
| 324 | fn parse(&mut self, i: I) -> IResult<I, O, E> { |
| 325 | self(i) |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | #[cfg (feature = "alloc" )] |
| 330 | use alloc::boxed::Box; |
| 331 | |
| 332 | #[cfg (feature = "alloc" )] |
| 333 | impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> { |
| 334 | fn parse(&mut self, input: I) -> IResult<I, O, E> { |
| 335 | (**self).parse(input) |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | /// Implementation of `Parser::map` |
| 340 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 341 | pub struct Map<F, G, O1> { |
| 342 | f: F, |
| 343 | g: G, |
| 344 | phantom: core::marker::PhantomData<O1>, |
| 345 | } |
| 346 | |
| 347 | impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> O2> Parser<I, O2, E> for Map<F, G, O1> { |
| 348 | fn parse(&mut self, i: I) -> IResult<I, O2, E> { |
| 349 | match self.f.parse(input:i) { |
| 350 | Err(e: Err) => Err(e), |
| 351 | Ok((i: I, o: O1)) => Ok((i, (self.g)(o))), |
| 352 | } |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | /// Implementation of `Parser::flat_map` |
| 357 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 358 | pub struct FlatMap<F, G, O1> { |
| 359 | f: F, |
| 360 | g: G, |
| 361 | phantom: core::marker::PhantomData<O1>, |
| 362 | } |
| 363 | |
| 364 | impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> H, H: Parser<I, O2, E>> Parser<I, O2, E> |
| 365 | for FlatMap<F, G, O1> |
| 366 | { |
| 367 | fn parse(&mut self, i: I) -> IResult<I, O2, E> { |
| 368 | let (i: I, o1: O1) = self.f.parse(input:i)?; |
| 369 | (self.g)(o1).parse(input:i) |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | /// Implementation of `Parser::and_then` |
| 374 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 375 | pub struct AndThen<F, G, O1> { |
| 376 | f: F, |
| 377 | g: G, |
| 378 | phantom: core::marker::PhantomData<O1>, |
| 379 | } |
| 380 | |
| 381 | impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<O1, O2, E>> Parser<I, O2, E> |
| 382 | for AndThen<F, G, O1> |
| 383 | { |
| 384 | fn parse(&mut self, i: I) -> IResult<I, O2, E> { |
| 385 | let (i: I, o1: O1) = self.f.parse(input:i)?; |
| 386 | let (_, o2: O2) = self.g.parse(input:o1)?; |
| 387 | Ok((i, o2)) |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | /// Implementation of `Parser::and` |
| 392 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 393 | pub struct And<F, G> { |
| 394 | f: F, |
| 395 | g: G, |
| 396 | } |
| 397 | |
| 398 | impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<I, O2, E>> Parser<I, (O1, O2), E> |
| 399 | for And<F, G> |
| 400 | { |
| 401 | fn parse(&mut self, i: I) -> IResult<I, (O1, O2), E> { |
| 402 | let (i: I, o1: O1) = self.f.parse(input:i)?; |
| 403 | let (i: I, o2: O2) = self.g.parse(input:i)?; |
| 404 | Ok((i, (o1, o2))) |
| 405 | } |
| 406 | } |
| 407 | |
| 408 | /// Implementation of `Parser::or` |
| 409 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 410 | pub struct Or<F, G> { |
| 411 | f: F, |
| 412 | g: G, |
| 413 | } |
| 414 | |
| 415 | impl<'a, I: Clone, O, E: crate::error::ParseError<I>, F: Parser<I, O, E>, G: Parser<I, O, E>> |
| 416 | Parser<I, O, E> for Or<F, G> |
| 417 | { |
| 418 | fn parse(&mut self, i: I) -> IResult<I, O, E> { |
| 419 | match self.f.parse(input:i.clone()) { |
| 420 | Err(Err::Error(e1: E)) => match self.g.parse(input:i) { |
| 421 | Err(Err::Error(e2: E)) => Err(Err::Error(e1.or(e2))), |
| 422 | res: Result<(I, O), Err> => res, |
| 423 | }, |
| 424 | res: Result<(I, O), Err> => res, |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | /// Implementation of `Parser::into` |
| 430 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 431 | pub struct Into<F, O1, O2: From<O1>, E1, E2: From<E1>> { |
| 432 | f: F, |
| 433 | phantom_out1: core::marker::PhantomData<O1>, |
| 434 | phantom_err1: core::marker::PhantomData<E1>, |
| 435 | phantom_out2: core::marker::PhantomData<O2>, |
| 436 | phantom_err2: core::marker::PhantomData<E2>, |
| 437 | } |
| 438 | |
| 439 | impl< |
| 440 | 'a, |
| 441 | I: Clone, |
| 442 | O1, |
| 443 | O2: From<O1>, |
| 444 | E1, |
| 445 | E2: crate::error::ParseError<I> + From<E1>, |
| 446 | F: Parser<I, O1, E1>, |
| 447 | > Parser<I, O2, E2> for Into<F, O1, O2, E1, E2> |
| 448 | { |
| 449 | fn parse(&mut self, i: I) -> IResult<I, O2, E2> { |
| 450 | match self.f.parse(input:i) { |
| 451 | Ok((i: I, o: O1)) => Ok((i, o.into())), |
| 452 | Err(Err::Error(e: E1)) => Err(Err::Error(e.into())), |
| 453 | Err(Err::Failure(e: E1)) => Err(Err::Failure(e.into())), |
| 454 | Err(Err::Incomplete(e: Needed)) => Err(Err::Incomplete(e)), |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | #[cfg (test)] |
| 460 | mod tests { |
| 461 | use super::*; |
| 462 | use crate::error::ErrorKind; |
| 463 | |
| 464 | #[doc (hidden)] |
| 465 | #[macro_export ] |
| 466 | macro_rules! assert_size ( |
| 467 | ($t:ty, $sz:expr) => ( |
| 468 | assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz); |
| 469 | ); |
| 470 | ); |
| 471 | |
| 472 | #[test ] |
| 473 | #[cfg (target_pointer_width = "64" )] |
| 474 | fn size_test() { |
| 475 | assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40); |
| 476 | //FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075 |
| 477 | // deactivating that test for now because it'll have different values depending on the rust version |
| 478 | // assert_size!(IResult<&str, &str, u32>, 40); |
| 479 | assert_size!(Needed, 8); |
| 480 | assert_size!(Err<u32>, 16); |
| 481 | assert_size!(ErrorKind, 1); |
| 482 | } |
| 483 | |
| 484 | #[test ] |
| 485 | fn err_map_test() { |
| 486 | let e = Err::Error(1); |
| 487 | assert_eq!(e.map(|v| v + 1), Err::Error(2)); |
| 488 | } |
| 489 | } |
| 490 | |