| 1 | //! # nom, eating data byte by byte |
| 2 | //! |
| 3 | //! nom is a parser combinator library with a focus on safe parsing, |
| 4 | //! streaming patterns, and as much as possible zero copy. |
| 5 | //! |
| 6 | //! ## Example |
| 7 | //! |
| 8 | //! ```rust |
| 9 | //! use nom::{ |
| 10 | //! IResult, |
| 11 | //! bytes::complete::{tag, take_while_m_n}, |
| 12 | //! combinator::map_res, |
| 13 | //! sequence::tuple}; |
| 14 | //! |
| 15 | //! #[derive(Debug,PartialEq)] |
| 16 | //! pub struct Color { |
| 17 | //! pub red: u8, |
| 18 | //! pub green: u8, |
| 19 | //! pub blue: u8, |
| 20 | //! } |
| 21 | //! |
| 22 | //! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> { |
| 23 | //! u8::from_str_radix(input, 16) |
| 24 | //! } |
| 25 | //! |
| 26 | //! fn is_hex_digit(c: char) -> bool { |
| 27 | //! c.is_digit(16) |
| 28 | //! } |
| 29 | //! |
| 30 | //! fn hex_primary(input: &str) -> IResult<&str, u8> { |
| 31 | //! map_res( |
| 32 | //! take_while_m_n(2, 2, is_hex_digit), |
| 33 | //! from_hex |
| 34 | //! )(input) |
| 35 | //! } |
| 36 | //! |
| 37 | //! fn hex_color(input: &str) -> IResult<&str, Color> { |
| 38 | //! let (input, _) = tag("#" )(input)?; |
| 39 | //! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?; |
| 40 | //! |
| 41 | //! Ok((input, Color { red, green, blue })) |
| 42 | //! } |
| 43 | //! |
| 44 | //! fn main() { |
| 45 | //! assert_eq!(hex_color("#2F14DF" ), Ok(("" , Color { |
| 46 | //! red: 47, |
| 47 | //! green: 20, |
| 48 | //! blue: 223, |
| 49 | //! }))); |
| 50 | //! } |
| 51 | //! ``` |
| 52 | //! |
| 53 | //! The code is available on [Github](https://github.com/Geal/nom) |
| 54 | //! |
| 55 | //! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details |
| 56 | //! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md), |
| 57 | //! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md). |
| 58 | //! You can also check out the [recipes] module that contains examples of common patterns. |
| 59 | //! |
| 60 | //! **Looking for a specific combinator? Read the |
| 61 | //! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)** |
| 62 | //! |
| 63 | //! If you are upgrading to nom 5.0, please read the |
| 64 | //! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md). |
| 65 | //! |
| 66 | //! ## Parser combinators |
| 67 | //! |
| 68 | //! Parser combinators are an approach to parsers that is very different from |
| 69 | //! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and |
| 70 | //! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar |
| 71 | //! in a separate syntax and generating the corresponding code, you use very small |
| 72 | //! functions with very specific purposes, like "take 5 bytes", or "recognize the |
| 73 | //! word 'HTTP'", and assemble them in meaningful patterns like "recognize |
| 74 | //! 'HTTP', then a space, then a version". |
| 75 | //! The resulting code is small, and looks like the grammar you would have |
| 76 | //! written with other parser approaches. |
| 77 | //! |
| 78 | //! This gives us a few advantages: |
| 79 | //! |
| 80 | //! - The parsers are small and easy to write |
| 81 | //! - The parsers components are easy to reuse (if they're general enough, please add them to nom!) |
| 82 | //! - The parsers components are easy to test separately (unit tests and property-based tests) |
| 83 | //! - The parser combination code looks close to the grammar you would have written |
| 84 | //! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest |
| 85 | //! |
| 86 | //! Here is an example of one such parser, to recognize text between parentheses: |
| 87 | //! |
| 88 | //! ```rust |
| 89 | //! use nom::{ |
| 90 | //! IResult, |
| 91 | //! sequence::delimited, |
| 92 | //! // see the "streaming/complete" paragraph lower for an explanation of these submodules |
| 93 | //! character::complete::char, |
| 94 | //! bytes::complete::is_not |
| 95 | //! }; |
| 96 | //! |
| 97 | //! fn parens(input: &str) -> IResult<&str, &str> { |
| 98 | //! delimited(char('(' ), is_not(")" ), char(')' ))(input) |
| 99 | //! } |
| 100 | //! ``` |
| 101 | //! |
| 102 | //! It defines a function named `parens` which will recognize a sequence of the |
| 103 | //! character `(`, the longest byte array not containing `)`, then the character |
| 104 | //! `)`, and will return the byte array in the middle. |
| 105 | //! |
| 106 | //! Here is another parser, written without using nom's combinators this time: |
| 107 | //! |
| 108 | //! ```rust |
| 109 | //! use nom::{IResult, Err, Needed}; |
| 110 | //! |
| 111 | //! # fn main() { |
| 112 | //! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{ |
| 113 | //! if i.len() < 4 { |
| 114 | //! Err(Err::Incomplete(Needed::new(4))) |
| 115 | //! } else { |
| 116 | //! Ok((&i[4..], &i[0..4])) |
| 117 | //! } |
| 118 | //! } |
| 119 | //! # } |
| 120 | //! ``` |
| 121 | //! |
| 122 | //! This function takes a byte array as input, and tries to consume 4 bytes. |
| 123 | //! Writing all the parsers manually, like this, is dangerous, despite Rust's |
| 124 | //! safety features. There are still a lot of mistakes one can make. That's why |
| 125 | //! nom provides a list of functions to help in developing parsers. |
| 126 | //! |
| 127 | //! With functions, you would write it like this: |
| 128 | //! |
| 129 | //! ```rust |
| 130 | //! use nom::{IResult, bytes::streaming::take}; |
| 131 | //! fn take4(input: &str) -> IResult<&str, &str> { |
| 132 | //! take(4u8)(input) |
| 133 | //! } |
| 134 | //! ``` |
| 135 | //! |
| 136 | //! A parser in nom is a function which, for an input type `I`, an output type `O` |
| 137 | //! and an optional error type `E`, will have the following signature: |
| 138 | //! |
| 139 | //! ```rust,compile_fail |
| 140 | //! fn parser(input: I) -> IResult<I, O, E>; |
| 141 | //! ``` |
| 142 | //! |
| 143 | //! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default): |
| 144 | //! |
| 145 | //! ```rust,compile_fail |
| 146 | //! fn parser(input: I) -> IResult<I, O>; |
| 147 | //! ``` |
| 148 | //! |
| 149 | //! `IResult` is an alias for the `Result` type: |
| 150 | //! |
| 151 | //! ```rust |
| 152 | //! use nom::{Needed, error::Error}; |
| 153 | //! |
| 154 | //! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>; |
| 155 | //! |
| 156 | //! enum Err<E> { |
| 157 | //! Incomplete(Needed), |
| 158 | //! Error(E), |
| 159 | //! Failure(E), |
| 160 | //! } |
| 161 | //! ``` |
| 162 | //! |
| 163 | //! It can have the following values: |
| 164 | //! |
| 165 | //! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value; |
| 166 | //! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error |
| 167 | //! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed |
| 168 | //! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser |
| 169 | //! |
| 170 | //! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers. |
| 171 | //! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc). |
| 172 | //! |
| 173 | //! ## Making new parsers with function combinators |
| 174 | //! |
| 175 | //! nom is based on functions that generate parsers, with a signature like |
| 176 | //! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`. |
| 177 | //! The arguments of a combinator can be direct values (like `take` which uses |
| 178 | //! a number of bytes or character as argument) or even other parsers (like |
| 179 | //! `delimited` which takes as argument 3 parsers, and returns the result of |
| 180 | //! the second one if all are successful). |
| 181 | //! |
| 182 | //! Here are some examples: |
| 183 | //! |
| 184 | //! ```rust |
| 185 | //! use nom::IResult; |
| 186 | //! use nom::bytes::complete::{tag, take}; |
| 187 | //! fn abcd_parser(i: &str) -> IResult<&str, &str> { |
| 188 | //! tag("abcd" )(i) // will consume bytes if the input begins with "abcd" |
| 189 | //! } |
| 190 | //! |
| 191 | //! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> { |
| 192 | //! take(10u8)(i) // will consume and return 10 bytes of input |
| 193 | //! } |
| 194 | //! ``` |
| 195 | //! |
| 196 | //! ## Combining parsers |
| 197 | //! |
| 198 | //! There are higher level patterns, like the **`alt`** combinator, which |
| 199 | //! provides a choice between multiple parsers. If one branch fails, it tries |
| 200 | //! the next, and returns the result of the first parser that succeeds: |
| 201 | //! |
| 202 | //! ```rust |
| 203 | //! use nom::IResult; |
| 204 | //! use nom::branch::alt; |
| 205 | //! use nom::bytes::complete::tag; |
| 206 | //! |
| 207 | //! let mut alt_tags = alt((tag("abcd" ), tag("efgh" ))); |
| 208 | //! |
| 209 | //! assert_eq!(alt_tags(&b"abcdxxx" [..]), Ok((&b"xxx" [..], &b"abcd" [..]))); |
| 210 | //! assert_eq!(alt_tags(&b"efghxxx" [..]), Ok((&b"xxx" [..], &b"efgh" [..]))); |
| 211 | //! assert_eq!(alt_tags(&b"ijklxxx" [..]), Err(nom::Err::Error((&b"ijklxxx" [..], nom::error::ErrorKind::Tag)))); |
| 212 | //! ``` |
| 213 | //! |
| 214 | //! The **`opt`** combinator makes a parser optional. If the child parser returns |
| 215 | //! an error, **`opt`** will still succeed and return None: |
| 216 | //! |
| 217 | //! ```rust |
| 218 | //! use nom::{IResult, combinator::opt, bytes::complete::tag}; |
| 219 | //! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { |
| 220 | //! opt(tag("abcd" ))(i) |
| 221 | //! } |
| 222 | //! |
| 223 | //! assert_eq!(abcd_opt(&b"abcdxxx" [..]), Ok((&b"xxx" [..], Some(&b"abcd" [..])))); |
| 224 | //! assert_eq!(abcd_opt(&b"efghxxx" [..]), Ok((&b"efghxxx" [..], None))); |
| 225 | //! ``` |
| 226 | //! |
| 227 | //! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results: |
| 228 | //! |
| 229 | //! ```rust |
| 230 | //! # #[cfg (feature = "alloc" )] |
| 231 | //! # fn main() { |
| 232 | //! use nom::{IResult, multi::many0, bytes::complete::tag}; |
| 233 | //! use std::str; |
| 234 | //! |
| 235 | //! fn multi(i: &str) -> IResult<&str, Vec<&str>> { |
| 236 | //! many0(tag("abcd" ))(i) |
| 237 | //! } |
| 238 | //! |
| 239 | //! let a = "abcdef" ; |
| 240 | //! let b = "abcdabcdef" ; |
| 241 | //! let c = "azerty" ; |
| 242 | //! assert_eq!(multi(a), Ok(("ef" , vec!["abcd" ]))); |
| 243 | //! assert_eq!(multi(b), Ok(("ef" , vec!["abcd" , "abcd" ]))); |
| 244 | //! assert_eq!(multi(c), Ok(("azerty" , Vec::new()))); |
| 245 | //! # } |
| 246 | //! # #[cfg (not(feature = "alloc" ))] |
| 247 | //! # fn main() {} |
| 248 | //! ``` |
| 249 | //! |
| 250 | //! Here are some basic combinators available: |
| 251 | //! |
| 252 | //! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`) |
| 253 | //! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`) |
| 254 | //! - **`many1`**: Will apply the parser 1 or more times |
| 255 | //! |
| 256 | //! There are more complex (and more useful) parsers like `tuple`, which is |
| 257 | //! used to apply a series of parsers then assemble their results. |
| 258 | //! |
| 259 | //! Example with `tuple`: |
| 260 | //! |
| 261 | //! ```rust |
| 262 | //! # fn main() { |
| 263 | //! use nom::{error::ErrorKind, Needed, |
| 264 | //! number::streaming::be_u16, |
| 265 | //! bytes::streaming::{tag, take}, |
| 266 | //! sequence::tuple}; |
| 267 | //! |
| 268 | //! let mut tpl = tuple((be_u16, take(3u8), tag("fg" ))); |
| 269 | //! |
| 270 | //! assert_eq!( |
| 271 | //! tpl(&b"abcdefgh" [..]), |
| 272 | //! Ok(( |
| 273 | //! &b"h" [..], |
| 274 | //! (0x6162u16, &b"cde" [..], &b"fg" [..]) |
| 275 | //! )) |
| 276 | //! ); |
| 277 | //! assert_eq!(tpl(&b"abcde" [..]), Err(nom::Err::Incomplete(Needed::new(2)))); |
| 278 | //! let input = &b"abcdejk" [..]; |
| 279 | //! assert_eq!(tpl(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag)))); |
| 280 | //! # } |
| 281 | //! ``` |
| 282 | //! |
| 283 | //! But you can also use a sequence of combinators written in imperative style, |
| 284 | //! thanks to the `?` operator: |
| 285 | //! |
| 286 | //! ```rust |
| 287 | //! # fn main() { |
| 288 | //! use nom::{IResult, bytes::complete::tag}; |
| 289 | //! |
| 290 | //! #[derive(Debug, PartialEq)] |
| 291 | //! struct A { |
| 292 | //! a: u8, |
| 293 | //! b: u8 |
| 294 | //! } |
| 295 | //! |
| 296 | //! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) } |
| 297 | //! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) } |
| 298 | //! |
| 299 | //! fn f(i: &[u8]) -> IResult<&[u8], A> { |
| 300 | //! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure |
| 301 | //! let (i, _) = tag("abcd" )(i)?; |
| 302 | //! let (i, a) = ret_int1(i)?; |
| 303 | //! let (i, _) = tag("efgh" )(i)?; |
| 304 | //! let (i, b) = ret_int2(i)?; |
| 305 | //! |
| 306 | //! Ok((i, A { a, b })) |
| 307 | //! } |
| 308 | //! |
| 309 | //! let r = f(b"abcdefghX" ); |
| 310 | //! assert_eq!(r, Ok((&b"X" [..], A{a: 1, b: 2}))); |
| 311 | //! # } |
| 312 | //! ``` |
| 313 | //! |
| 314 | //! ## Streaming / Complete |
| 315 | //! |
| 316 | //! Some of nom's modules have `streaming` or `complete` submodules. They hold |
| 317 | //! different variants of the same combinators. |
| 318 | //! |
| 319 | //! A streaming parser assumes that we might not have all of the input data. |
| 320 | //! This can happen with some network protocol or large file parsers, where the |
| 321 | //! input buffer can be full and need to be resized or refilled. |
| 322 | //! |
| 323 | //! A complete parser assumes that we already have all of the input data. |
| 324 | //! This will be the common case with small files that can be read entirely to |
| 325 | //! memory. |
| 326 | //! |
| 327 | //! Here is how it works in practice: |
| 328 | //! |
| 329 | //! ```rust |
| 330 | //! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character}; |
| 331 | //! |
| 332 | //! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> { |
| 333 | //! bytes::streaming::take(4u8)(i) |
| 334 | //! } |
| 335 | //! |
| 336 | //! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> { |
| 337 | //! bytes::complete::take(4u8)(i) |
| 338 | //! } |
| 339 | //! |
| 340 | //! // both parsers will take 4 bytes as expected |
| 341 | //! assert_eq!(take_streaming(&b"abcde" [..]), Ok((&b"e" [..], &b"abcd" [..]))); |
| 342 | //! assert_eq!(take_complete(&b"abcde" [..]), Ok((&b"e" [..], &b"abcd" [..]))); |
| 343 | //! |
| 344 | //! // if the input is smaller than 4 bytes, the streaming parser |
| 345 | //! // will return `Incomplete` to indicate that we need more data |
| 346 | //! assert_eq!(take_streaming(&b"abc" [..]), Err(Err::Incomplete(Needed::new(1)))); |
| 347 | //! |
| 348 | //! // but the complete parser will return an error |
| 349 | //! assert_eq!(take_complete(&b"abc" [..]), Err(Err::Error(Error::new(&b"abc" [..], ErrorKind::Eof)))); |
| 350 | //! |
| 351 | //! // the alpha0 function recognizes 0 or more alphabetic characters |
| 352 | //! fn alpha0_streaming(i: &str) -> IResult<&str, &str> { |
| 353 | //! character::streaming::alpha0(i) |
| 354 | //! } |
| 355 | //! |
| 356 | //! fn alpha0_complete(i: &str) -> IResult<&str, &str> { |
| 357 | //! character::complete::alpha0(i) |
| 358 | //! } |
| 359 | //! |
| 360 | //! // if there's a clear limit to the recognized characters, both parsers work the same way |
| 361 | //! assert_eq!(alpha0_streaming("abcd;" ), Ok((";" , "abcd" ))); |
| 362 | //! assert_eq!(alpha0_complete("abcd;" ), Ok((";" , "abcd" ))); |
| 363 | //! |
| 364 | //! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot |
| 365 | //! // know if more input data should be recognized. The whole input could be "abcd;", or |
| 366 | //! // "abcde;" |
| 367 | //! assert_eq!(alpha0_streaming("abcd" ), Err(Err::Incomplete(Needed::new(1)))); |
| 368 | //! |
| 369 | //! // while the complete version knows that all of the data is there |
| 370 | //! assert_eq!(alpha0_complete("abcd" ), Ok(("" , "abcd" ))); |
| 371 | //! ``` |
| 372 | //! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc), |
| 373 | //! check out the [recipes]! |
| 374 | #![cfg_attr (not(feature = "std" ), no_std)] |
| 375 | #![cfg_attr (feature = "cargo-clippy" , allow(clippy::doc_markdown))] |
| 376 | #![cfg_attr (feature = "docsrs" , feature(doc_cfg))] |
| 377 | #![cfg_attr (feature = "docsrs" , feature(extended_key_value_attributes))] |
| 378 | #![deny (missing_docs)] |
| 379 | #[cfg_attr (nightly, warn(rustdoc::missing_doc_code_examples))] |
| 380 | #[cfg (feature = "alloc" )] |
| 381 | #[macro_use ] |
| 382 | extern crate alloc; |
| 383 | #[cfg (doctest)] |
| 384 | extern crate doc_comment; |
| 385 | |
| 386 | #[cfg (doctest)] |
| 387 | doc_comment::doctest!("../README.md" ); |
| 388 | |
| 389 | /// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does |
| 390 | /// it, albeit there it is not public. |
| 391 | #[cfg_attr (nightly, allow(rustdoc::missing_doc_code_examples))] |
| 392 | pub mod lib { |
| 393 | /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally, |
| 394 | /// as well as `core` or `std` |
| 395 | #[cfg (not(feature = "std" ))] |
| 396 | #[cfg_attr (nightly, allow(rustdoc::missing_doc_code_examples))] |
| 397 | /// internal std exports for no_std compatibility |
| 398 | pub mod std { |
| 399 | #[doc (hidden)] |
| 400 | #[cfg (not(feature = "alloc" ))] |
| 401 | pub use core::borrow; |
| 402 | |
| 403 | #[cfg (feature = "alloc" )] |
| 404 | #[doc (hidden)] |
| 405 | pub use alloc::{borrow, boxed, string, vec}; |
| 406 | |
| 407 | #[doc (hidden)] |
| 408 | pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str}; |
| 409 | |
| 410 | /// internal reproduction of std prelude |
| 411 | #[doc (hidden)] |
| 412 | pub mod prelude { |
| 413 | pub use core::prelude as v1; |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | #[cfg (feature = "std" )] |
| 418 | #[cfg_attr (nightly, allow(rustdoc::missing_doc_code_examples))] |
| 419 | /// internal std exports for no_std compatibility |
| 420 | pub mod std { |
| 421 | #[doc (hidden)] |
| 422 | pub use std::{ |
| 423 | alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result, |
| 424 | slice, str, string, vec, |
| 425 | }; |
| 426 | |
| 427 | /// internal reproduction of std prelude |
| 428 | #[doc (hidden)] |
| 429 | pub mod prelude { |
| 430 | pub use std::prelude as v1; |
| 431 | } |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | pub use self::bits::*; |
| 436 | pub use self::internal::*; |
| 437 | pub use self::traits::*; |
| 438 | |
| 439 | pub use self::str::*; |
| 440 | |
| 441 | #[macro_use ] |
| 442 | mod macros; |
| 443 | #[macro_use ] |
| 444 | pub mod error; |
| 445 | |
| 446 | pub mod branch; |
| 447 | pub mod combinator; |
| 448 | mod internal; |
| 449 | pub mod multi; |
| 450 | pub mod sequence; |
| 451 | mod traits; |
| 452 | |
| 453 | pub mod bits; |
| 454 | pub mod bytes; |
| 455 | |
| 456 | pub mod character; |
| 457 | |
| 458 | mod str; |
| 459 | |
| 460 | pub mod number; |
| 461 | |
| 462 | #[cfg (feature = "docsrs" )] |
| 463 | #[cfg_attr (feature = "docsrs" , cfg_attr(feature = "docsrs" , doc = include_str!("../doc/nom_recipes.md" )))] |
| 464 | pub mod recipes {} |
| 465 | |