lib.rs source code [crates/nom/src/lib.rs]

1	//! # nom, eating data byte by byte
2	//!
3	//! nom is a parser combinator library with a focus on safe parsing,
4	//! streaming patterns, and as much as possible zero copy.
5	//!
6	//! ## Example
7	//!
8	//! ```rust
9	//! use nom::{
10	//! IResult,
11	//! bytes::complete::{tag, take_while_m_n},
12	//! combinator::map_res,
13	//! sequence::tuple};
14	//!
15	//! #[derive(Debug,PartialEq)]
16	//! pub struct Color {
17	//! pub red: u8,
18	//! pub green: u8,
19	//! pub blue: u8,
20	//! }
21	//!
22	//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
23	//! u8::from_str_radix(input, `16`)
24	//! }
25	//!
26	//! fn is_hex_digit(c: char) -> bool {
27	//! c.is_digit(`16`)
28	//! }
29	//!
30	//! fn hex_primary(input: &str) -> IResult<&str, u8> {
31	//! map_res(
32	//! take_while_m_n(`2`, `2`, is_hex_digit),
33	//! from_hex
34	//! )(input)
35	//! }
36	//!
37	//! fn hex_color(input: &str) -> IResult<&str, Color> {
38	//! let (input, _) = tag("#")(input)?;
39	//! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
40	//!
41	//! Ok((input, Color { red, green, blue }))
42	//! }
43	//!
44	//! fn main() {
45	//! assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
46	//! red: `47`,
47	//! green: `20`,
48	//! blue: `223`,
49	//! })));
50	//! }
51	//! ```
52	//!
53	//! The code is available on [Github](https://github.com/Geal/nom)
54	//!
55	//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details
56	//! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md),
57	//! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md).
58	//! You can also check out the [recipes] module that contains examples of common patterns.
59	//!
60	//! Looking for a specific combinator? Read the
61	//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)**
62	//!
63	//! If you are upgrading to nom 5.0, please read the
64	//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md).
65	//!
66	//! ## Parser combinators
67	//!
68	//! Parser combinators are an approach to parsers that is very different from
69	//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
70	//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
71	//! in a separate syntax and generating the corresponding code, you use very small
72	//! functions with very specific purposes, like "take 5 bytes", or "recognize the
73	//! word 'HTTP'", and assemble them in meaningful patterns like "recognize
74	//! 'HTTP', then a space, then a version".
75	//! The resulting code is small, and looks like the grammar you would have
76	//! written with other parser approaches.
77	//!
78	//! This gives us a few advantages:
79	//!
80	//! - The parsers are small and easy to write
81	//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!)
82	//! - The parsers components are easy to test separately (unit tests and property-based tests)
83	//! - The parser combination code looks close to the grammar you would have written
84	//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest
85	//!
86	//! Here is an example of one such parser, to recognize text between parentheses:
87	//!
88	//! ```rust
89	//! use nom::{
90	//! IResult,
91	//! sequence::delimited,
92	//! // see the "streaming/complete" paragraph lower for an explanation of these submodules
93	//! character::complete::char,
94	//! bytes::complete::is_not
95	//! };
96	//!
97	//! fn parens(input: &str) -> IResult<&str, &str> {
98	//! delimited(char('('), is_not(")"), char(')'))(input)
99	//! }
100	//! ```
101	//!
102	//! It defines a function named `parens` which will recognize a sequence of the
103	//! character `(`, the longest byte array not containing `)`, then the character
104	//! `)`, and will return the byte array in the middle.
105	//!
106	//! Here is another parser, written without using nom's combinators this time:
107	//!
108	//! ```rust
109	//! use nom::{IResult, Err, Needed};
110	//!
111	//! # fn main() {
112	//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{
113	//! if i.len() < `4` {
114	//! Err(Err::Incomplete(Needed::new(`4`)))
115	//! } else {
116	//! Ok((&i[`4`..], &i[`0`..`4`]))
117	//! }
118	//! }
119	//! # }
120	//! ```
121	//!
122	//! This function takes a byte array as input, and tries to consume 4 bytes.
123	//! Writing all the parsers manually, like this, is dangerous, despite Rust's
124	//! safety features. There are still a lot of mistakes one can make. That's why
125	//! nom provides a list of functions to help in developing parsers.
126	//!
127	//! With functions, you would write it like this:
128	//!
129	//! ```rust
130	//! use nom::{IResult, bytes::streaming::take};
131	//! fn take4(input: &str) -> IResult<&str, &str> {
132	//! take(`4u8`)(input)
133	//! }
134	//! ```
135	//!
136	//! A parser in nom is a function which, for an input type `I`, an output type `O`
137	//! and an optional error type `E`, will have the following signature:
138	//!
139	//! ```rust,compile_fail
140	//! fn parser(input: I) -> IResult<I, O, E>;
141	//! ```
142	//!
143	//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default):
144	//!
145	//! ```rust,compile_fail
146	//! fn parser(input: I) -> IResult<I, O>;
147	//! ```
148	//!
149	//! `IResult` is an alias for the `Result` type:
150	//!
151	//! ```rust
152	//! use nom::{Needed, error::Error};
153	//!
154	//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>;
155	//!
156	//! enum Err<E> {
157	//! Incomplete(Needed),
158	//! Error(E),
159	//! Failure(E),
160	//! }
161	//! ```
162	//!
163	//! It can have the following values:
164	//!
165	//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
166	//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error
167	//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
168	//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser
169	//!
170	//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers.
171	//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc).
172	//!
173	//! ## Making new parsers with function combinators
174	//!
175	//! nom is based on functions that generate parsers, with a signature like
176	//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`.
177	//! The arguments of a combinator can be direct values (like `take` which uses
178	//! a number of bytes or character as argument) or even other parsers (like
179	//! `delimited` which takes as argument 3 parsers, and returns the result of
180	//! the second one if all are successful).
181	//!
182	//! Here are some examples:
183	//!
184	//! ```rust
185	//! use nom::IResult;
186	//! use nom::bytes::complete::{tag, take};
187	//! fn abcd_parser(i: &str) -> IResult<&str, &str> {
188	//! tag("abcd")(i) // will consume bytes if the input begins with "abcd"
189	//! }
190	//!
191	//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> {
192	//! take(`10u8`)(i) // will consume and return 10 bytes of input
193	//! }
194	//! ```
195	//!
196	//! ## Combining parsers
197	//!
198	//! There are higher level patterns, like the `alt`* combinator, which*
199	//! provides a choice between multiple parsers. If one branch fails, it tries
200	//! the next, and returns the result of the first parser that succeeds:
201	//!
202	//! ```rust
203	//! use nom::IResult;
204	//! use nom::branch::alt;
205	//! use nom::bytes::complete::tag;
206	//!
207	//! let mut alt_tags = alt((tag("abcd"), tag("efgh")));
208	//!
209	//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..])));
210	//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..])));
211	//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag))));
212	//! ```
213	//!
214	//! The `opt`* combinator makes a parser optional. If the child parser returns*
215	//! an error, `opt`* will still succeed and return None:*
216	//!
217	//! ```rust
218	//! use nom::{IResult, combinator::opt, bytes::complete::tag};
219	//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
220	//! opt(tag("abcd"))(i)
221	//! }
222	//!
223	//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
224	//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None)));
225	//! ```
226	//!
227	//! `many0`* applies a parser 0 or more times, and returns a vector of the aggregated results:*
228	//!
229	//! ```rust
230	//! # #[cfg(feature = "alloc")]
231	//! # fn main() {
232	//! use nom::{IResult, multi::many0, bytes::complete::tag};
233	//! use std::str;
234	//!
235	//! fn multi(i: &str) -> IResult<&str, Vec<&str>> {
236	//! many0(tag("abcd"))(i)
237	//! }
238	//!
239	//! let a = "abcdef";
240	//! let b = "abcdabcdef";
241	//! let c = "azerty";
242	//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"])));
243	//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"])));
244	//! assert_eq!(multi(c), Ok(("azerty", Vec::new())));
245	//! # }
246	//! # #[cfg(not(feature = "alloc"))]
247	//! # fn main() {}
248	//! ```
249	//!
250	//! Here are some basic combinators available:
251	//!
252	//! - `opt`: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
253	//! - `many0`: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
254	//! - `many1`: Will apply the parser 1 or more times
255	//!
256	//! There are more complex (and more useful) parsers like `tuple`, which is
257	//! used to apply a series of parsers then assemble their results.
258	//!
259	//! Example with `tuple`:
260	//!
261	//! ```rust
262	//! # fn main() {
263	//! use nom::{error::ErrorKind, Needed,
264	//! number::streaming::be_u16,
265	//! bytes::streaming::{tag, take},
266	//! sequence::tuple};
267	//!
268	//! let mut tpl = tuple((be_u16, take(`3u8`), tag("fg")));
269	//!
270	//! assert_eq!(
271	//! tpl(&b"abcdefgh"[..]),
272	//! Ok((
273	//! &b"h"[..],
274	//! (`0x6162u16`, &b"cde"[..], &b"fg"[..])
275	//! ))
276	//! );
277	//! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(`2`))));
278	//! let input = &b"abcdejk"[..];
279	//! assert_eq!(tpl(input), Err(nom::Err::Error((&input[`5`..], ErrorKind::Tag))));
280	//! # }
281	//! ```
282	//!
283	//! But you can also use a sequence of combinators written in imperative style,
284	//! thanks to the `?` operator:
285	//!
286	//! ```rust
287	//! # fn main() {
288	//! use nom::{IResult, bytes::complete::tag};
289	//!
290	//! #[derive(Debug, PartialEq)]
291	//! struct A {
292	//! a: u8,
293	//! b: u8
294	//! }
295	//!
296	//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,`1`)) }
297	//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,`2`)) }
298	//!
299	//! fn f(i: &[u8]) -> IResult<&[u8], A> {
300	//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure
301	//! let (i, _) = tag("abcd")(i)?;
302	//! let (i, a) = ret_int1(i)?;
303	//! let (i, _) = tag("efgh")(i)?;
304	//! let (i, b) = ret_int2(i)?;
305	//!
306	//! Ok((i, A { a, b }))
307	//! }
308	//!
309	//! let r = f(b"abcdefghX");
310	//! assert_eq!(r, Ok((&b"X"[..], A{a: `1`, b: `2`})));
311	//! # }
312	//! ```
313	//!
314	//! ## Streaming / Complete
315	//!
316	//! Some of nom's modules have `streaming` or `complete` submodules. They hold
317	//! different variants of the same combinators.
318	//!
319	//! A streaming parser assumes that we might not have all of the input data.
320	//! This can happen with some network protocol or large file parsers, where the
321	//! input buffer can be full and need to be resized or refilled.
322	//!
323	//! A complete parser assumes that we already have all of the input data.
324	//! This will be the common case with small files that can be read entirely to
325	//! memory.
326	//!
327	//! Here is how it works in practice:
328	//!
329	//! ```rust
330	//! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character};
331	//!
332	//! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> {
333	//! bytes::streaming::take(`4u8`)(i)
334	//! }
335	//!
336	//! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> {
337	//! bytes::complete::take(`4u8`)(i)
338	//! }
339	//!
340	//! // both parsers will take 4 bytes as expected
341	//! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
342	//! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..])));
343	//!
344	//! // if the input is smaller than 4 bytes, the streaming parser
345	//! // will return `Incomplete` to indicate that we need more data
346	//! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(`1`))));
347	//!
348	//! // but the complete parser will return an error
349	//! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof))));
350	//!
351	//! // the alpha0 function recognizes 0 or more alphabetic characters
352	//! fn alpha0_streaming(i: &str) -> IResult<&str, &str> {
353	//! character::streaming::alpha0(i)
354	//! }
355	//!
356	//! fn alpha0_complete(i: &str) -> IResult<&str, &str> {
357	//! character::complete::alpha0(i)
358	//! }
359	//!
360	//! // if there's a clear limit to the recognized characters, both parsers work the same way
361	//! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd")));
362	//! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd")));
363	//!
364	//! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot
365	//! // know if more input data should be recognized. The whole input could be "abcd;", or
366	//! // "abcde;"
367	//! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(`1`))));
368	//!
369	//! // while the complete version knows that all of the data is there
370	//! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd")));
371	//! ```
372	//! Going further:* Read the [guides](https://github.com/Geal/nom/tree/main/doc),*
373	//! check out the [recipes]!
374	#![cfg_attr(not(feature = "std"), no_std)]
375	#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))]
376	#![cfg_attr(feature = "docsrs", feature(doc_cfg))]
377	#![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))]
378	#![deny(missing_docs)]
379	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
380	#[cfg(feature = "alloc")]
381	#[macro_use]
382	extern crate alloc;
383	#[cfg(doctest)]
384	extern crate doc_comment;
385
386	#[cfg(doctest)]
387	doc_comment::doctest!("../README.md");
388
389	/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
390	/// it, albeit there it is not public.
391	#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
392	pub mod lib {
393	/// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
394	/// as well as `core` or `std`
395	#[cfg(not(feature = "std"))]
396	#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
397	/// internal std exports for no_std compatibility
398	pub mod std {
399	#[doc(hidden)]
400	#[cfg(not(feature = "alloc"))]
401	pub use core::borrow;
402
403	#[cfg(feature = "alloc")]
404	#[doc(hidden)]
405	pub use alloc::{borrow, boxed, string, vec};
406
407	#[doc(hidden)]
408	pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str};
409
410	/// internal reproduction of std prelude
411	#[doc(hidden)]
412	pub mod prelude {
413	pub use core::prelude as v1;
414	}
415	}
416
417	#[cfg(feature = "std")]
418	#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
419	/// internal std exports for no_std compatibility
420	pub mod std {
421	#[doc(hidden)]
422	pub use std::{
423	alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result,
424	slice, str, string, vec,
425	};
426
427	/// internal reproduction of std prelude
428	#[doc(hidden)]
429	pub mod prelude {
430	pub use std::prelude as v1;
431	}
432	}
433	}
434
435	pub use self::bits::*;
436	pub use self::internal::*;
437	pub use self::traits::*;
438
439	pub use self::str::*;
440
441	#[macro_use]
442	mod macros;
443	#[macro_use]
444	pub mod error;
445
446	pub mod branch;
447	pub mod combinator;
448	mod internal;
449	pub mod multi;
450	pub mod sequence;
451	mod traits;
452
453	pub mod bits;
454	pub mod bytes;
455
456	pub mod character;
457
458	mod str;
459
460	pub mod number;
461
462	#[cfg(feature = "docsrs")]
463	#[cfg_attr(feature = "docsrs", cfg_attr(feature = "docsrs", doc = include_str!("../doc/nom_recipes.md")))]
464	pub mod recipes {}
465