internal.rs source code [crates/nom/src/internal.rs]

1	//! Basic types to build the parsers
2
3	use self::Needed::*;
4	use crate::error::{self, ErrorKind};
5	use crate::lib::std::fmt;
6	use core::num::NonZeroUsize;
7
8	/// Holds the result of parsing functions
9	///
10	/// It depends on the input type `I`, the output type `O`, and the error type `E`
11	/// (by default `(I, nom::ErrorKind)`)
12	///
13	/// The `Ok` side is a pair containing the remainder of the input (the part of the data that
14	/// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`.
15	///
16	/// Outside of the parsing code, you can use the [Finish::finish] method to convert
17	/// it to a more common result type
18	pub type IResult<I, O, E = error::Error<I>> = Result<(I, O), Err<E>>;
19
20	/// Helper trait to convert a parser's result to a more manageable type
21	pub trait Finish<I, O, E> {
22	/// converts the parser's result to a type that is more consumable by error
23	/// management libraries. It keeps the same `Ok` branch, and merges `Err::Error`
24	/// and `Err::Failure` into the `Err` side.
25	///
26	/// warning: if the result is `Err(Err::Incomplete(_))`, this method will panic.
27	/// - "complete" parsers: It will not be an issue, `Incomplete` is never used
28	/// - "streaming" parsers: `Incomplete` will be returned if there's not enough data
29	/// for the parser to decide, and you should gather more data before parsing again.
30	/// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`,
31	/// you can get out of the parsing loop and call `finish()` on the parser's result
32	fn finish(self) -> Result<(I, O), E>;
33	}
34
35	impl<I, O, E> Finish<I, O, E> for IResult<I, O, E> {
36	fn finish(self) -> Result<(I, O), E> {
37	match self {
38	Ok(res: (I, O)) => Ok(res),
39	Err(Err::Error(e: E)) \| Err(Err::Failure(e: E)) => Err(e),
40	Err(Err::Incomplete(_)) => {
41	panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply the parser instead")
42	}
43	}
44	}
45	}
46
47	/// Contains information on needed data if a parser returned `Incomplete`
48	#[derive(Debug, PartialEq, Eq, Clone, Copy)]
49	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
50	pub enum Needed {
51	/// Needs more data, but we do not know how much
52	Unknown,
53	/// Contains the required data size in bytes
54	Size(NonZeroUsize),
55	}
56
57	impl Needed {
58	/// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero
59	pub fn new(s: usize) -> Self {
60	match NonZeroUsize::new(s) {
61	Some(sz: NonZero) => Needed::Size(sz),
62	None => Needed::Unknown,
63	}
64	}
65
66	/// Indicates if we know how many bytes we need
67	pub fn is_known(&self) -> bool {
68	*self != Unknown
69	}
70
71	/// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value.
72	#[inline]
73	pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed {
74	match self {
75	Unknown => Unknown,
76	Size(n: NonZero) => Needed::new(f(n)),
77	}
78	}
79	}
80
81	/// The `Err` enum indicates the parser was not successful
82	///
83	/// It has three cases:
84	///
85	/// `Incomplete` indicates that more data is needed to decide. The `Needed` enum*
86	/// can contain how many additional bytes are necessary. If you are sure your parser
87	/// is working on full data, you can wrap your parser with the `complete` combinator
88	/// to transform that case in `Error`
89	/// `Error` means some parser did not succeed, but another one might (as an example,*
90	/// when testing different branches of an `alt` combinator)
91	/// `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix*
92	/// to decide on the next parser to apply, and that parser fails, you know there's no need
93	/// to try other parsers, you were already in the right branch, so the data is invalid
94	///
95	#[derive(Debug, Clone, PartialEq)]
96	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
97	pub enum Err<E> {
98	/// There was not enough data
99	Incomplete(Needed),
100	/// The parser had an error (recoverable)
101	Error(E),
102	/// The parser had an unrecoverable error: we got to the right
103	/// branch and we know other branches won't work, so backtrack
104	/// as fast as possible
105	Failure(E),
106	}
107
108	impl<E> Err<E> {
109	/// Tests if the result is Incomplete
110	pub fn is_incomplete(&self) -> bool {
111	if let Err::Incomplete(_) = self {
112	`true`
113	} else {
114	`false`
115	}
116	}
117
118	/// Applies the given function to the inner error
119	pub fn map<E2, F>(self, f: F) -> Err<E2>
120	where
121	F: FnOnce(E) -> E2,
122	{
123	match self {
124	Err::Incomplete(n) => Err::Incomplete(n),
125	Err::Failure(t) => Err::Failure(f(t)),
126	Err::Error(t) => Err::Error(f(t)),
127	}
128	}
129
130	/// Automatically converts between errors if the underlying type supports it
131	pub fn convert<F>(e: Err<F>) -> Self
132	where
133	E: From<F>,
134	{
135	e.map(crate::lib::std::convert::Into::into)
136	}
137	}
138
139	impl<T> Err<(T, ErrorKind)> {
140	/// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U`
141	pub fn map_input<U, F>(self, f: F) -> Err<(U, ErrorKind)>
142	where
143	F: FnOnce(T) -> U,
144	{
145	match self {
146	Err::Incomplete(n: Needed) => Err::Incomplete(n),
147	Err::Failure((input: T, k: ErrorKind)) => Err::Failure((f(input), k)),
148	Err::Error((input: T, k: ErrorKind)) => Err::Error((f(input), k)),
149	}
150	}
151	}
152
153	impl<T> Err<error::Error<T>> {
154	/// Maps `Err<error::Error<T>>` to `Err<error::Error<U>>` with the given `F: T -> U`
155	pub fn map_input<U, F>(self, f: F) -> Err<error::Error<U>>
156	where
157	F: FnOnce(T) -> U,
158	{
159	match self {
160	Err::Incomplete(n: Needed) => Err::Incomplete(n),
161	Err::Failure(error::Error { input: T, code: ErrorKind }) => Err::Failure(error::Error {
162	input: f(input),
163	code,
164	}),
165	Err::Error(error::Error { input: T, code: ErrorKind }) => Err::Error(error::Error {
166	input: f(input),
167	code,
168	}),
169	}
170	}
171	}
172
173	#[cfg(feature = "alloc")]
174	use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec};
175	#[cfg(feature = "alloc")]
176	impl Err<(&[u8], ErrorKind)> {
177	/// Obtaining ownership
178	#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
179	pub fn to_owned(self) -> Err<(Vec<u8>, ErrorKind)> {
180	self.map_input(ToOwned::to_owned)
181	}
182	}
183
184	#[cfg(feature = "alloc")]
185	impl Err<(&str, ErrorKind)> {
186	/// Obtaining ownership
187	#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
188	pub fn to_owned(self) -> Err<(String, ErrorKind)> {
189	self.map_input(ToOwned::to_owned)
190	}
191	}
192
193	#[cfg(feature = "alloc")]
194	impl Err<error::Error<&[u8]>> {
195	/// Obtaining ownership
196	#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
197	pub fn to_owned(self) -> Err<error::Error<Vec<u8>>> {
198	self.map_input(ToOwned::to_owned)
199	}
200	}
201
202	#[cfg(feature = "alloc")]
203	impl Err<error::Error<&str>> {
204	/// Obtaining ownership
205	#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
206	pub fn to_owned(self) -> Err<error::Error<String>> {
207	self.map_input(ToOwned::to_owned)
208	}
209	}
210
211	impl<E: Eq> Eq for Err<E> {}
212
213	impl<E> fmt::Display for Err<E>
214	where
215	E: fmt::Debug,
216	{
217	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218	match self {
219	Err::Incomplete(Needed::Size(u: &NonZero)) => write!(f, "Parsing requires {} bytes/chars", u),
220	Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data"),
221	Err::Failure(c: &E) => write!(f, "Parsing Failure: {:?}", c),
222	Err::Error(c: &E) => write!(f, "Parsing Error: {:?}", c),
223	}
224	}
225	}
226
227	#[cfg(feature = "std")]
228	use std::error::Error;
229
230	#[cfg(feature = "std")]
231	impl<E> Error for Err<E>
232	where
233	E: fmt::Debug,
234	{
235	fn source(&self) -> Option<&(dyn Error + 'static)> {
236	None // no underlying error
237	}
238	}
239
240	/// All nom parsers implement this trait
241	pub trait Parser<I, O, E> {
242	/// A parser takes in input type, and returns a `Result` containing
243	/// either the remaining input and the output value, or an error
244	fn parse(&mut self, input: I) -> IResult<I, O, E>;
245
246	/// Maps a function over the result of a parser
247	fn map<G, O2>(self, g: G) -> Map<Self, G, O>
248	where
249	G: Fn(O) -> O2,
250	Self: core::marker::Sized,
251	{
252	Map {
253	f: self,
254	g,
255	phantom: core::marker::PhantomData,
256	}
257	}
258
259	/// Creates a second parser from the output of the first one, then apply over the rest of the input
260	fn flat_map<G, H, O2>(self, g: G) -> FlatMap<Self, G, O>
261	where
262	G: FnMut(O) -> H,
263	H: Parser<I, O2, E>,
264	Self: core::marker::Sized,
265	{
266	FlatMap {
267	f: self,
268	g,
269	phantom: core::marker::PhantomData,
270	}
271	}
272
273	/// Applies a second parser over the output of the first one
274	fn and_then<G, O2>(self, g: G) -> AndThen<Self, G, O>
275	where
276	G: Parser<O, O2, E>,
277	Self: core::marker::Sized,
278	{
279	AndThen {
280	f: self,
281	g,
282	phantom: core::marker::PhantomData,
283	}
284	}
285
286	/// Applies a second parser after the first one, return their results as a tuple
287	fn and<G, O2>(self, g: G) -> And<Self, G>
288	where
289	G: Parser<I, O2, E>,
290	Self: core::marker::Sized,
291	{
292	And { f: self, g }
293	}
294
295	/// Applies a second parser over the input if the first one failed
296	fn or<G>(self, g: G) -> Or<Self, G>
297	where
298	G: Parser<I, O, E>,
299	Self: core::marker::Sized,
300	{
301	Or { f: self, g }
302	}
303
304	/// automatically converts the parser's output and error values to another type, as long as they
305	/// implement the `From` trait
306	fn into<O2: From<O>, E2: From<E>>(self) -> Into<Self, O, O2, E, E2>
307	where
308	Self: core::marker::Sized,
309	{
310	Into {
311	f: self,
312	phantom_out1: core::marker::PhantomData,
313	phantom_err1: core::marker::PhantomData,
314	phantom_out2: core::marker::PhantomData,
315	phantom_err2: core::marker::PhantomData,
316	}
317	}
318	}
319
320	impl<'a, I, O, E, F> Parser<I, O, E> for F
321	where
322	F: FnMut(I) -> IResult<I, O, E> + 'a,
323	{
324	fn parse(&mut self, i: I) -> IResult<I, O, E> {
325	self(i)
326	}
327	}
328
329	#[cfg(feature = "alloc")]
330	use alloc::boxed::Box;
331
332	#[cfg(feature = "alloc")]
333	impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> {
334	fn parse(&mut self, input: I) -> IResult<I, O, E> {
335	(**self).parse(input)
336	}
337	}
338
339	/// Implementation of `Parser::map`
340	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
341	pub struct Map<F, G, O1> {
342	f: F,
343	g: G,
344	phantom: core::marker::PhantomData<O1>,
345	}
346
347	impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> O2> Parser<I, O2, E> for Map<F, G, O1> {
348	fn parse(&mut self, i: I) -> IResult<I, O2, E> {
349	match self.f.parse(input:i) {
350	Err(e: Err) => Err(e),
351	Ok((i: I, o: O1)) => Ok((i, (self.g)(o))),
352	}
353	}
354	}
355
356	/// Implementation of `Parser::flat_map`
357	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
358	pub struct FlatMap<F, G, O1> {
359	f: F,
360	g: G,
361	phantom: core::marker::PhantomData<O1>,
362	}
363
364	impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> H, H: Parser<I, O2, E>> Parser<I, O2, E>
365	for FlatMap<F, G, O1>
366	{
367	fn parse(&mut self, i: I) -> IResult<I, O2, E> {
368	let (i: I, o1: O1) = self.f.parse(input:i)?;
369	(self.g)(o1).parse(input:i)
370	}
371	}
372
373	/// Implementation of `Parser::and_then`
374	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
375	pub struct AndThen<F, G, O1> {
376	f: F,
377	g: G,
378	phantom: core::marker::PhantomData<O1>,
379	}
380
381	impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<O1, O2, E>> Parser<I, O2, E>
382	for AndThen<F, G, O1>
383	{
384	fn parse(&mut self, i: I) -> IResult<I, O2, E> {
385	let (i: I, o1: O1) = self.f.parse(input:i)?;
386	let (_, o2: O2) = self.g.parse(input:o1)?;
387	Ok((i, o2))
388	}
389	}
390
391	/// Implementation of `Parser::and`
392	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
393	pub struct And<F, G> {
394	f: F,
395	g: G,
396	}
397
398	impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<I, O2, E>> Parser<I, (O1, O2), E>
399	for And<F, G>
400	{
401	fn parse(&mut self, i: I) -> IResult<I, (O1, O2), E> {
402	let (i: I, o1: O1) = self.f.parse(input:i)?;
403	let (i: I, o2: O2) = self.g.parse(input:i)?;
404	Ok((i, (o1, o2)))
405	}
406	}
407
408	/// Implementation of `Parser::or`
409	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
410	pub struct Or<F, G> {
411	f: F,
412	g: G,
413	}
414
415	impl<'a, I: Clone, O, E: crate::error::ParseError<I>, F: Parser<I, O, E>, G: Parser<I, O, E>>
416	Parser<I, O, E> for Or<F, G>
417	{
418	fn parse(&mut self, i: I) -> IResult<I, O, E> {
419	match self.f.parse(input:i.clone()) {
420	Err(Err::Error(e1: E)) => match self.g.parse(input:i) {
421	Err(Err::Error(e2: E)) => Err(Err::Error(e1.or(e2))),
422	res: Result<(I, O), Err> => res,
423	},
424	res: Result<(I, O), Err> => res,
425	}
426	}
427	}
428
429	/// Implementation of `Parser::into`
430	#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
431	pub struct Into<F, O1, O2: From<O1>, E1, E2: From<E1>> {
432	f: F,
433	phantom_out1: core::marker::PhantomData<O1>,
434	phantom_err1: core::marker::PhantomData<E1>,
435	phantom_out2: core::marker::PhantomData<O2>,
436	phantom_err2: core::marker::PhantomData<E2>,
437	}
438
439	impl<
440	'a,
441	I: Clone,
442	O1,
443	O2: From<O1>,
444	E1,
445	E2: crate::error::ParseError<I> + From<E1>,
446	F: Parser<I, O1, E1>,
447	> Parser<I, O2, E2> for Into<F, O1, O2, E1, E2>
448	{
449	fn parse(&mut self, i: I) -> IResult<I, O2, E2> {
450	match self.f.parse(input:i) {
451	Ok((i: I, o: O1)) => Ok((i, o.into())),
452	Err(Err::Error(e: E1)) => Err(Err::Error(e.into())),
453	Err(Err::Failure(e: E1)) => Err(Err::Failure(e.into())),
454	Err(Err::Incomplete(e: Needed)) => Err(Err::Incomplete(e)),
455	}
456	}
457	}
458
459	#[cfg(test)]
460	mod tests {
461	use super::*;
462	use crate::error::ErrorKind;
463
464	#[doc(hidden)]
465	#[macro_export]
466	macro_rules! assert_size (
467	($t:ty, $sz:expr) => (
468	assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz);
469	);
470	);
471
472	#[test]
473	#[cfg(target_pointer_width = "64")]
474	fn size_test() {
475	assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, `40`);
476	//FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075
477	// deactivating that test for now because it'll have different values depending on the rust version
478	// assert_size!(IResult<&str, &str, u32>, 40);
479	assert_size!(Needed, `8`);
480	assert_size!(Err<u32>, `16`);
481	assert_size!(ErrorKind, `1`);
482	}
483
484	#[test]
485	fn err_map_test() {
486	let e = Err::Error(`1`);
487	assert_eq!(e.map(\|v\| v + `1`), Err::Error(`2`));
488	}
489	}
490