1 | //! Parsers recognizing bytes streams, complete input version |
2 | |
3 | use crate::error::ErrorKind; |
4 | use crate::error::ParseError; |
5 | use crate::internal::{Err, IResult, Parser}; |
6 | use crate::lib::std::ops::RangeFrom; |
7 | use crate::lib::std::result::Result::*; |
8 | use crate::traits::{ |
9 | Compare, CompareResult, FindSubstring, FindToken, InputIter, InputLength, InputTake, |
10 | InputTakeAtPosition, Slice, ToUsize, |
11 | }; |
12 | |
13 | /// Recognizes a pattern |
14 | /// |
15 | /// The input data will be compared to the tag combinator's argument and will return the part of |
16 | /// the input that matches the argument |
17 | /// |
18 | /// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern |
19 | /// # Example |
20 | /// ```rust |
21 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
22 | /// use nom::bytes::complete::tag; |
23 | /// |
24 | /// fn parser(s: &str) -> IResult<&str, &str> { |
25 | /// tag("Hello" )(s) |
26 | /// } |
27 | /// |
28 | /// assert_eq!(parser("Hello, World!" ), Ok((", World!" , "Hello" ))); |
29 | /// assert_eq!(parser("Something" ), Err(Err::Error(Error::new("Something" , ErrorKind::Tag)))); |
30 | /// assert_eq!(parser("" ), Err(Err::Error(Error::new("" , ErrorKind::Tag)))); |
31 | /// ``` |
32 | pub fn tag<T, Input, Error: ParseError<Input>>( |
33 | tag: T, |
34 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
35 | where |
36 | Input: InputTake + Compare<T>, |
37 | T: InputLength + Clone, |
38 | { |
39 | move |i: Input| { |
40 | let tag_len: usize = tag.input_len(); |
41 | let t: T = tag.clone(); |
42 | let res: IResult<_, _, Error> = match i.compare(t) { |
43 | CompareResult::Ok => Ok(i.take_split(count:tag_len)), |
44 | _ => { |
45 | let e: ErrorKind = ErrorKind::Tag; |
46 | Err(Err::Error(Error::from_error_kind(input:i, kind:e))) |
47 | } |
48 | }; |
49 | res |
50 | } |
51 | } |
52 | |
53 | /// Recognizes a case insensitive pattern. |
54 | /// |
55 | /// The input data will be compared to the tag combinator's argument and will return the part of |
56 | /// the input that matches the argument with no regard to case. |
57 | /// |
58 | /// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern. |
59 | /// # Example |
60 | /// ```rust |
61 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
62 | /// use nom::bytes::complete::tag_no_case; |
63 | /// |
64 | /// fn parser(s: &str) -> IResult<&str, &str> { |
65 | /// tag_no_case("hello" )(s) |
66 | /// } |
67 | /// |
68 | /// assert_eq!(parser("Hello, World!" ), Ok((", World!" , "Hello" ))); |
69 | /// assert_eq!(parser("hello, World!" ), Ok((", World!" , "hello" ))); |
70 | /// assert_eq!(parser("HeLlO, World!" ), Ok((", World!" , "HeLlO" ))); |
71 | /// assert_eq!(parser("Something" ), Err(Err::Error(Error::new("Something" , ErrorKind::Tag)))); |
72 | /// assert_eq!(parser("" ), Err(Err::Error(Error::new("" , ErrorKind::Tag)))); |
73 | /// ``` |
74 | pub fn tag_no_case<T, Input, Error: ParseError<Input>>( |
75 | tag: T, |
76 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
77 | where |
78 | Input: InputTake + Compare<T>, |
79 | T: InputLength + Clone, |
80 | { |
81 | move |i: Input| { |
82 | let tag_len: usize = tag.input_len(); |
83 | let t: T = tag.clone(); |
84 | |
85 | let res: IResult<_, _, Error> = match (i).compare_no_case(t) { |
86 | CompareResult::Ok => Ok(i.take_split(count:tag_len)), |
87 | _ => { |
88 | let e: ErrorKind = ErrorKind::Tag; |
89 | Err(Err::Error(Error::from_error_kind(input:i, kind:e))) |
90 | } |
91 | }; |
92 | res |
93 | } |
94 | } |
95 | |
96 | /// Parse till certain characters are met. |
97 | /// |
98 | /// The parser will return the longest slice till one of the characters of the combinator's argument are met. |
99 | /// |
100 | /// It doesn't consume the matched character. |
101 | /// |
102 | /// It will return a `Err::Error(("", ErrorKind::IsNot))` if the pattern wasn't met. |
103 | /// # Example |
104 | /// ```rust |
105 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
106 | /// use nom::bytes::complete::is_not; |
107 | /// |
108 | /// fn not_space(s: &str) -> IResult<&str, &str> { |
109 | /// is_not(" \t\r\n" )(s) |
110 | /// } |
111 | /// |
112 | /// assert_eq!(not_space("Hello, World!" ), Ok((" World!" , "Hello," ))); |
113 | /// assert_eq!(not_space("Sometimes \t" ), Ok((" \t" , "Sometimes" ))); |
114 | /// assert_eq!(not_space("Nospace" ), Ok(("" , "Nospace" ))); |
115 | /// assert_eq!(not_space("" ), Err(Err::Error(Error::new("" , ErrorKind::IsNot)))); |
116 | /// ``` |
117 | pub fn is_not<T, Input, Error: ParseError<Input>>( |
118 | arr: T, |
119 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
120 | where |
121 | Input: InputTakeAtPosition, |
122 | T: FindToken<<Input as InputTakeAtPosition>::Item>, |
123 | { |
124 | move |i: Input| { |
125 | let e: ErrorKind = ErrorKind::IsNot; |
126 | i.split_at_position1_complete(|c: ::Item| arr.find_token(c), e) |
127 | } |
128 | } |
129 | |
130 | /// Returns the longest slice of the matches the pattern. |
131 | /// |
132 | /// The parser will return the longest slice consisting of the characters in provided in the |
133 | /// combinator's argument. |
134 | /// |
135 | /// It will return a `Err(Err::Error((_, ErrorKind::IsA)))` if the pattern wasn't met. |
136 | /// # Example |
137 | /// ```rust |
138 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
139 | /// use nom::bytes::complete::is_a; |
140 | /// |
141 | /// fn hex(s: &str) -> IResult<&str, &str> { |
142 | /// is_a("1234567890ABCDEF" )(s) |
143 | /// } |
144 | /// |
145 | /// assert_eq!(hex("123 and voila" ), Ok((" and voila" , "123" ))); |
146 | /// assert_eq!(hex("DEADBEEF and others" ), Ok((" and others" , "DEADBEEF" ))); |
147 | /// assert_eq!(hex("BADBABEsomething" ), Ok(("something" , "BADBABE" ))); |
148 | /// assert_eq!(hex("D15EA5E" ), Ok(("" , "D15EA5E" ))); |
149 | /// assert_eq!(hex("" ), Err(Err::Error(Error::new("" , ErrorKind::IsA)))); |
150 | /// ``` |
151 | pub fn is_a<T, Input, Error: ParseError<Input>>( |
152 | arr: T, |
153 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
154 | where |
155 | Input: InputTakeAtPosition, |
156 | T: FindToken<<Input as InputTakeAtPosition>::Item>, |
157 | { |
158 | move |i: Input| { |
159 | let e: ErrorKind = ErrorKind::IsA; |
160 | i.split_at_position1_complete(|c: ::Item| !arr.find_token(c), e) |
161 | } |
162 | } |
163 | |
164 | /// Returns the longest input slice (if any) that matches the predicate. |
165 | /// |
166 | /// The parser will return the longest slice that matches the given predicate *(a function that |
167 | /// takes the input and returns a bool)*. |
168 | /// # Example |
169 | /// ```rust |
170 | /// # use nom::{Err, error::ErrorKind, Needed, IResult}; |
171 | /// use nom::bytes::complete::take_while; |
172 | /// use nom::character::is_alphabetic; |
173 | /// |
174 | /// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { |
175 | /// take_while(is_alphabetic)(s) |
176 | /// } |
177 | /// |
178 | /// assert_eq!(alpha(b"latin123" ), Ok((&b"123" [..], &b"latin" [..]))); |
179 | /// assert_eq!(alpha(b"12345" ), Ok((&b"12345" [..], &b"" [..]))); |
180 | /// assert_eq!(alpha(b"latin" ), Ok((&b"" [..], &b"latin" [..]))); |
181 | /// assert_eq!(alpha(b"" ), Ok((&b"" [..], &b"" [..]))); |
182 | /// ``` |
183 | pub fn take_while<F, Input, Error: ParseError<Input>>( |
184 | cond: F, |
185 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
186 | where |
187 | Input: InputTakeAtPosition, |
188 | F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, |
189 | { |
190 | move |i: Input| i.split_at_position_complete(|c: ::Item| !cond(c)) |
191 | } |
192 | |
193 | /// Returns the longest (at least 1) input slice that matches the predicate. |
194 | /// |
195 | /// The parser will return the longest slice that matches the given predicate *(a function that |
196 | /// takes the input and returns a bool)*. |
197 | /// |
198 | /// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met. |
199 | /// # Example |
200 | /// ```rust |
201 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
202 | /// use nom::bytes::complete::take_while1; |
203 | /// use nom::character::is_alphabetic; |
204 | /// |
205 | /// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { |
206 | /// take_while1(is_alphabetic)(s) |
207 | /// } |
208 | /// |
209 | /// assert_eq!(alpha(b"latin123" ), Ok((&b"123" [..], &b"latin" [..]))); |
210 | /// assert_eq!(alpha(b"latin" ), Ok((&b"" [..], &b"latin" [..]))); |
211 | /// assert_eq!(alpha(b"12345" ), Err(Err::Error(Error::new(&b"12345" [..], ErrorKind::TakeWhile1)))); |
212 | /// ``` |
213 | pub fn take_while1<F, Input, Error: ParseError<Input>>( |
214 | cond: F, |
215 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
216 | where |
217 | Input: InputTakeAtPosition, |
218 | F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, |
219 | { |
220 | move |i: Input| { |
221 | let e: ErrorKind = ErrorKind::TakeWhile1; |
222 | i.split_at_position1_complete(|c: ::Item| !cond(c), e) |
223 | } |
224 | } |
225 | |
226 | /// Returns the longest (m <= len <= n) input slice that matches the predicate. |
227 | /// |
228 | /// The parser will return the longest slice that matches the given predicate *(a function that |
229 | /// takes the input and returns a bool)*. |
230 | /// |
231 | /// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met or is out |
232 | /// of range (m <= len <= n). |
233 | /// # Example |
234 | /// ```rust |
235 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
236 | /// use nom::bytes::complete::take_while_m_n; |
237 | /// use nom::character::is_alphabetic; |
238 | /// |
239 | /// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { |
240 | /// take_while_m_n(3, 6, is_alphabetic)(s) |
241 | /// } |
242 | /// |
243 | /// assert_eq!(short_alpha(b"latin123" ), Ok((&b"123" [..], &b"latin" [..]))); |
244 | /// assert_eq!(short_alpha(b"lengthy" ), Ok((&b"y" [..], &b"length" [..]))); |
245 | /// assert_eq!(short_alpha(b"latin" ), Ok((&b"" [..], &b"latin" [..]))); |
246 | /// assert_eq!(short_alpha(b"ed" ), Err(Err::Error(Error::new(&b"ed" [..], ErrorKind::TakeWhileMN)))); |
247 | /// assert_eq!(short_alpha(b"12345" ), Err(Err::Error(Error::new(&b"12345" [..], ErrorKind::TakeWhileMN)))); |
248 | /// ``` |
249 | pub fn take_while_m_n<F, Input, Error: ParseError<Input>>( |
250 | m: usize, |
251 | n: usize, |
252 | cond: F, |
253 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
254 | where |
255 | Input: InputTake + InputIter + InputLength + Slice<RangeFrom<usize>>, |
256 | F: Fn(<Input as InputIter>::Item) -> bool, |
257 | { |
258 | move |i: Input| { |
259 | let input = i; |
260 | |
261 | match input.position(|c| !cond(c)) { |
262 | Some(idx) => { |
263 | if idx >= m { |
264 | if idx <= n { |
265 | let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(idx) { |
266 | Ok(input.take_split(index)) |
267 | } else { |
268 | Err(Err::Error(Error::from_error_kind( |
269 | input, |
270 | ErrorKind::TakeWhileMN, |
271 | ))) |
272 | }; |
273 | res |
274 | } else { |
275 | let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(n) { |
276 | Ok(input.take_split(index)) |
277 | } else { |
278 | Err(Err::Error(Error::from_error_kind( |
279 | input, |
280 | ErrorKind::TakeWhileMN, |
281 | ))) |
282 | }; |
283 | res |
284 | } |
285 | } else { |
286 | let e = ErrorKind::TakeWhileMN; |
287 | Err(Err::Error(Error::from_error_kind(input, e))) |
288 | } |
289 | } |
290 | None => { |
291 | let len = input.input_len(); |
292 | if len >= n { |
293 | match input.slice_index(n) { |
294 | Ok(index) => Ok(input.take_split(index)), |
295 | Err(_needed) => Err(Err::Error(Error::from_error_kind( |
296 | input, |
297 | ErrorKind::TakeWhileMN, |
298 | ))), |
299 | } |
300 | } else if len >= m && len <= n { |
301 | let res: IResult<_, _, Error> = Ok((input.slice(len..), input)); |
302 | res |
303 | } else { |
304 | let e = ErrorKind::TakeWhileMN; |
305 | Err(Err::Error(Error::from_error_kind(input, e))) |
306 | } |
307 | } |
308 | } |
309 | } |
310 | } |
311 | |
312 | /// Returns the longest input slice (if any) till a predicate is met. |
313 | /// |
314 | /// The parser will return the longest slice till the given predicate *(a function that |
315 | /// takes the input and returns a bool)*. |
316 | /// # Example |
317 | /// ```rust |
318 | /// # use nom::{Err, error::ErrorKind, Needed, IResult}; |
319 | /// use nom::bytes::complete::take_till; |
320 | /// |
321 | /// fn till_colon(s: &str) -> IResult<&str, &str> { |
322 | /// take_till(|c| c == ':' )(s) |
323 | /// } |
324 | /// |
325 | /// assert_eq!(till_colon("latin:123" ), Ok((":123" , "latin" ))); |
326 | /// assert_eq!(till_colon(":empty matched" ), Ok((":empty matched" , "" ))); //allowed |
327 | /// assert_eq!(till_colon("12345" ), Ok(("" , "12345" ))); |
328 | /// assert_eq!(till_colon("" ), Ok(("" , "" ))); |
329 | /// ``` |
330 | pub fn take_till<F, Input, Error: ParseError<Input>>( |
331 | cond: F, |
332 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
333 | where |
334 | Input: InputTakeAtPosition, |
335 | F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, |
336 | { |
337 | move |i: Input| i.split_at_position_complete(|c: ::Item| cond(c)) |
338 | } |
339 | |
340 | /// Returns the longest (at least 1) input slice till a predicate is met. |
341 | /// |
342 | /// The parser will return the longest slice till the given predicate *(a function that |
343 | /// takes the input and returns a bool)*. |
344 | /// |
345 | /// It will return `Err(Err::Error((_, ErrorKind::TakeTill1)))` if the input is empty or the |
346 | /// predicate matches the first input. |
347 | /// # Example |
348 | /// ```rust |
349 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
350 | /// use nom::bytes::complete::take_till1; |
351 | /// |
352 | /// fn till_colon(s: &str) -> IResult<&str, &str> { |
353 | /// take_till1(|c| c == ':' )(s) |
354 | /// } |
355 | /// |
356 | /// assert_eq!(till_colon("latin:123" ), Ok((":123" , "latin" ))); |
357 | /// assert_eq!(till_colon(":empty matched" ), Err(Err::Error(Error::new(":empty matched" , ErrorKind::TakeTill1)))); |
358 | /// assert_eq!(till_colon("12345" ), Ok(("" , "12345" ))); |
359 | /// assert_eq!(till_colon("" ), Err(Err::Error(Error::new("" , ErrorKind::TakeTill1)))); |
360 | /// ``` |
361 | pub fn take_till1<F, Input, Error: ParseError<Input>>( |
362 | cond: F, |
363 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
364 | where |
365 | Input: InputTakeAtPosition, |
366 | F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, |
367 | { |
368 | move |i: Input| { |
369 | let e: ErrorKind = ErrorKind::TakeTill1; |
370 | i.split_at_position1_complete(|c: ::Item| cond(c), e) |
371 | } |
372 | } |
373 | |
374 | /// Returns an input slice containing the first N input elements (Input[..N]). |
375 | /// |
376 | /// It will return `Err(Err::Error((_, ErrorKind::Eof)))` if the input is shorter than the argument. |
377 | /// # Example |
378 | /// ```rust |
379 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
380 | /// use nom::bytes::complete::take; |
381 | /// |
382 | /// fn take6(s: &str) -> IResult<&str, &str> { |
383 | /// take(6usize)(s) |
384 | /// } |
385 | /// |
386 | /// assert_eq!(take6("1234567" ), Ok(("7" , "123456" ))); |
387 | /// assert_eq!(take6("things" ), Ok(("" , "things" ))); |
388 | /// assert_eq!(take6("short" ), Err(Err::Error(Error::new("short" , ErrorKind::Eof)))); |
389 | /// assert_eq!(take6("" ), Err(Err::Error(Error::new("" , ErrorKind::Eof)))); |
390 | /// ``` |
391 | /// |
392 | /// The units that are taken will depend on the input type. For example, for a |
393 | /// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will |
394 | /// take that many `u8`'s: |
395 | /// |
396 | /// ```rust |
397 | /// use nom::error::Error; |
398 | /// use nom::bytes::complete::take; |
399 | /// |
400 | /// assert_eq!(take::<_, _, Error<_>>(1usize)("💙" ), Ok(("" , "💙" ))); |
401 | /// assert_eq!(take::<_, _, Error<_>>(1usize)("💙" .as_bytes()), Ok((b" \x9F\x92\x99" .as_ref(), b" \xF0" .as_ref()))); |
402 | /// ``` |
403 | pub fn take<C, Input, Error: ParseError<Input>>( |
404 | count: C, |
405 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
406 | where |
407 | Input: InputIter + InputTake, |
408 | C: ToUsize, |
409 | { |
410 | let c: usize = count.to_usize(); |
411 | move |i: Input| match i.slice_index(count:c) { |
412 | Err(_needed: Needed) => Err(Err::Error(Error::from_error_kind(input:i, kind:ErrorKind::Eof))), |
413 | Ok(index: usize) => Ok(i.take_split(count:index)), |
414 | } |
415 | } |
416 | |
417 | /// Returns the input slice up to the first occurrence of the pattern. |
418 | /// |
419 | /// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))` |
420 | /// if the pattern wasn't met. |
421 | /// # Example |
422 | /// ```rust |
423 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
424 | /// use nom::bytes::complete::take_until; |
425 | /// |
426 | /// fn until_eof(s: &str) -> IResult<&str, &str> { |
427 | /// take_until("eof" )(s) |
428 | /// } |
429 | /// |
430 | /// assert_eq!(until_eof("hello, worldeof" ), Ok(("eof" , "hello, world" ))); |
431 | /// assert_eq!(until_eof("hello, world" ), Err(Err::Error(Error::new("hello, world" , ErrorKind::TakeUntil)))); |
432 | /// assert_eq!(until_eof("" ), Err(Err::Error(Error::new("" , ErrorKind::TakeUntil)))); |
433 | /// assert_eq!(until_eof("1eof2eof" ), Ok(("eof2eof" , "1" ))); |
434 | /// ``` |
435 | pub fn take_until<T, Input, Error: ParseError<Input>>( |
436 | tag: T, |
437 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
438 | where |
439 | Input: InputTake + FindSubstring<T>, |
440 | T: InputLength + Clone, |
441 | { |
442 | move |i: Input| { |
443 | let t: T = tag.clone(); |
444 | let res: IResult<_, _, Error> = match i.find_substring(substr:t) { |
445 | None => Err(Err::Error(Error::from_error_kind(input:i, kind:ErrorKind::TakeUntil))), |
446 | Some(index: usize) => Ok(i.take_split(count:index)), |
447 | }; |
448 | res |
449 | } |
450 | } |
451 | |
452 | /// Returns the non empty input slice up to the first occurrence of the pattern. |
453 | /// |
454 | /// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))` |
455 | /// if the pattern wasn't met. |
456 | /// # Example |
457 | /// ```rust |
458 | /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; |
459 | /// use nom::bytes::complete::take_until1; |
460 | /// |
461 | /// fn until_eof(s: &str) -> IResult<&str, &str> { |
462 | /// take_until1("eof" )(s) |
463 | /// } |
464 | /// |
465 | /// assert_eq!(until_eof("hello, worldeof" ), Ok(("eof" , "hello, world" ))); |
466 | /// assert_eq!(until_eof("hello, world" ), Err(Err::Error(Error::new("hello, world" , ErrorKind::TakeUntil)))); |
467 | /// assert_eq!(until_eof("" ), Err(Err::Error(Error::new("" , ErrorKind::TakeUntil)))); |
468 | /// assert_eq!(until_eof("1eof2eof" ), Ok(("eof2eof" , "1" ))); |
469 | /// assert_eq!(until_eof("eof" ), Err(Err::Error(Error::new("eof" , ErrorKind::TakeUntil)))); |
470 | /// ``` |
471 | pub fn take_until1<T, Input, Error: ParseError<Input>>( |
472 | tag: T, |
473 | ) -> impl Fn(Input) -> IResult<Input, Input, Error> |
474 | where |
475 | Input: InputTake + FindSubstring<T>, |
476 | T: InputLength + Clone, |
477 | { |
478 | move |i: Input| { |
479 | let t: T = tag.clone(); |
480 | let res: IResult<_, _, Error> = match i.find_substring(substr:t) { |
481 | None => Err(Err::Error(Error::from_error_kind(input:i, kind:ErrorKind::TakeUntil))), |
482 | Some(0) => Err(Err::Error(Error::from_error_kind(input:i, kind:ErrorKind::TakeUntil))), |
483 | Some(index: usize) => Ok(i.take_split(count:index)), |
484 | }; |
485 | res |
486 | } |
487 | } |
488 | |
489 | /// Matches a byte string with escaped characters. |
490 | /// |
491 | /// * The first argument matches the normal characters (it must not accept the control character) |
492 | /// * The second argument is the control character (like `\` in most languages) |
493 | /// * The third argument matches the escaped characters |
494 | /// # Example |
495 | /// ``` |
496 | /// # use nom::{Err, error::ErrorKind, Needed, IResult}; |
497 | /// # use nom::character::complete::digit1; |
498 | /// use nom::bytes::complete::escaped; |
499 | /// use nom::character::complete::one_of; |
500 | /// |
501 | /// fn esc(s: &str) -> IResult<&str, &str> { |
502 | /// escaped(digit1, ' \\' , one_of(r#""n\"# ))(s) |
503 | /// } |
504 | /// |
505 | /// assert_eq!(esc("123;" ), Ok((";" , "123" ))); |
506 | /// assert_eq!(esc(r#"12\"34;"# ), Ok((";" , r#"12\"34"# ))); |
507 | /// ``` |
508 | /// |
509 | pub fn escaped<'a, Input: 'a, Error, F, G, O1, O2>( |
510 | mut normal: F, |
511 | control_char: char, |
512 | mut escapable: G, |
513 | ) -> impl FnMut(Input) -> IResult<Input, Input, Error> |
514 | where |
515 | Input: Clone |
516 | + crate::traits::Offset |
517 | + InputLength |
518 | + InputTake |
519 | + InputTakeAtPosition |
520 | + Slice<RangeFrom<usize>> |
521 | + InputIter, |
522 | <Input as InputIter>::Item: crate::traits::AsChar, |
523 | F: Parser<Input, O1, Error>, |
524 | G: Parser<Input, O2, Error>, |
525 | Error: ParseError<Input>, |
526 | { |
527 | use crate::traits::AsChar; |
528 | |
529 | move |input: Input| { |
530 | let mut i = input.clone(); |
531 | |
532 | while i.input_len() > 0 { |
533 | let current_len = i.input_len(); |
534 | |
535 | match normal.parse(i.clone()) { |
536 | Ok((i2, _)) => { |
537 | // return if we consumed everything or if the normal parser |
538 | // does not consume anything |
539 | if i2.input_len() == 0 { |
540 | return Ok((input.slice(input.input_len()..), input)); |
541 | } else if i2.input_len() == current_len { |
542 | let index = input.offset(&i2); |
543 | return Ok(input.take_split(index)); |
544 | } else { |
545 | i = i2; |
546 | } |
547 | } |
548 | Err(Err::Error(_)) => { |
549 | // unwrap() should be safe here since index < $i.input_len() |
550 | if i.iter_elements().next().unwrap().as_char() == control_char { |
551 | let next = control_char.len_utf8(); |
552 | if next >= i.input_len() { |
553 | return Err(Err::Error(Error::from_error_kind( |
554 | input, |
555 | ErrorKind::Escaped, |
556 | ))); |
557 | } else { |
558 | match escapable.parse(i.slice(next..)) { |
559 | Ok((i2, _)) => { |
560 | if i2.input_len() == 0 { |
561 | return Ok((input.slice(input.input_len()..), input)); |
562 | } else { |
563 | i = i2; |
564 | } |
565 | } |
566 | Err(e) => return Err(e), |
567 | } |
568 | } |
569 | } else { |
570 | let index = input.offset(&i); |
571 | if index == 0 { |
572 | return Err(Err::Error(Error::from_error_kind( |
573 | input, |
574 | ErrorKind::Escaped, |
575 | ))); |
576 | } |
577 | return Ok(input.take_split(index)); |
578 | } |
579 | } |
580 | Err(e) => { |
581 | return Err(e); |
582 | } |
583 | } |
584 | } |
585 | |
586 | Ok((input.slice(input.input_len()..), input)) |
587 | } |
588 | } |
589 | |
590 | /// Matches a byte string with escaped characters. |
591 | /// |
592 | /// * The first argument matches the normal characters (it must not match the control character) |
593 | /// * The second argument is the control character (like `\` in most languages) |
594 | /// * The third argument matches the escaped characters and transforms them |
595 | /// |
596 | /// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character) |
597 | /// |
598 | /// ``` |
599 | /// # use nom::{Err, error::ErrorKind, Needed, IResult}; |
600 | /// # use std::str::from_utf8; |
601 | /// use nom::bytes::complete::{escaped_transform, tag}; |
602 | /// use nom::character::complete::alpha1; |
603 | /// use nom::branch::alt; |
604 | /// use nom::combinator::value; |
605 | /// |
606 | /// fn parser(input: &str) -> IResult<&str, String> { |
607 | /// escaped_transform( |
608 | /// alpha1, |
609 | /// ' \\' , |
610 | /// alt(( |
611 | /// value(" \\" , tag(" \\" )), |
612 | /// value(" \"" , tag(" \"" )), |
613 | /// value(" \n" , tag("n" )), |
614 | /// )) |
615 | /// )(input) |
616 | /// } |
617 | /// |
618 | /// assert_eq!(parser("ab \\\"cd" ), Ok(("" , String::from("ab \"cd" )))); |
619 | /// assert_eq!(parser("ab \\ncd" ), Ok(("" , String::from("ab \ncd" )))); |
620 | /// ``` |
621 | #[cfg (feature = "alloc" )] |
622 | #[cfg_attr (feature = "docsrs" , doc(cfg(feature = "alloc" )))] |
623 | pub fn escaped_transform<Input, Error, F, G, O1, O2, ExtendItem, Output>( |
624 | mut normal: F, |
625 | control_char: char, |
626 | mut transform: G, |
627 | ) -> impl FnMut(Input) -> IResult<Input, Output, Error> |
628 | where |
629 | Input: Clone |
630 | + crate::traits::Offset |
631 | + InputLength |
632 | + InputTake |
633 | + InputTakeAtPosition |
634 | + Slice<RangeFrom<usize>> |
635 | + InputIter, |
636 | Input: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, |
637 | O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, |
638 | O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, |
639 | <Input as InputIter>::Item: crate::traits::AsChar, |
640 | F: Parser<Input, O1, Error>, |
641 | G: Parser<Input, O2, Error>, |
642 | Error: ParseError<Input>, |
643 | { |
644 | use crate::traits::AsChar; |
645 | |
646 | move |input: Input| { |
647 | let mut index = 0; |
648 | let mut res = input.new_builder(); |
649 | |
650 | let i = input.clone(); |
651 | |
652 | while index < i.input_len() { |
653 | let current_len = i.input_len(); |
654 | let remainder = i.slice(index..); |
655 | match normal.parse(remainder.clone()) { |
656 | Ok((i2, o)) => { |
657 | o.extend_into(&mut res); |
658 | if i2.input_len() == 0 { |
659 | return Ok((i.slice(i.input_len()..), res)); |
660 | } else if i2.input_len() == current_len { |
661 | return Ok((remainder, res)); |
662 | } else { |
663 | index = input.offset(&i2); |
664 | } |
665 | } |
666 | Err(Err::Error(_)) => { |
667 | // unwrap() should be safe here since index < $i.input_len() |
668 | if remainder.iter_elements().next().unwrap().as_char() == control_char { |
669 | let next = index + control_char.len_utf8(); |
670 | let input_len = input.input_len(); |
671 | |
672 | if next >= input_len { |
673 | return Err(Err::Error(Error::from_error_kind( |
674 | remainder, |
675 | ErrorKind::EscapedTransform, |
676 | ))); |
677 | } else { |
678 | match transform.parse(i.slice(next..)) { |
679 | Ok((i2, o)) => { |
680 | o.extend_into(&mut res); |
681 | if i2.input_len() == 0 { |
682 | return Ok((i.slice(i.input_len()..), res)); |
683 | } else { |
684 | index = input.offset(&i2); |
685 | } |
686 | } |
687 | Err(e) => return Err(e), |
688 | } |
689 | } |
690 | } else { |
691 | if index == 0 { |
692 | return Err(Err::Error(Error::from_error_kind( |
693 | remainder, |
694 | ErrorKind::EscapedTransform, |
695 | ))); |
696 | } |
697 | return Ok((remainder, res)); |
698 | } |
699 | } |
700 | Err(e) => return Err(e), |
701 | } |
702 | } |
703 | Ok((input.slice(index..), res)) |
704 | } |
705 | } |
706 | |
707 | #[cfg (test)] |
708 | mod tests { |
709 | use super::*; |
710 | |
711 | #[test ] |
712 | fn complete_take_while_m_n_utf8_all_matching() { |
713 | let result: IResult<&str, &str> = |
714 | super::take_while_m_n(1, 4, |c: char| c.is_alphabetic())("øn" ); |
715 | assert_eq!(result, Ok(("" , "øn" ))); |
716 | } |
717 | |
718 | #[test ] |
719 | fn complete_take_while_m_n_utf8_all_matching_substring() { |
720 | let result: IResult<&str, &str> = |
721 | super::take_while_m_n(1, 1, |c: char| c.is_alphabetic())("øn" ); |
722 | assert_eq!(result, Ok(("n" , "ø" ))); |
723 | } |
724 | |
725 | // issue #1336 "escaped hangs if normal parser accepts empty" |
726 | fn escaped_string(input: &str) -> IResult<&str, &str> { |
727 | use crate::character::complete::{alpha0, one_of}; |
728 | escaped(alpha0, ' \\' , one_of("n" ))(input) |
729 | } |
730 | |
731 | // issue #1336 "escaped hangs if normal parser accepts empty" |
732 | #[test ] |
733 | fn escaped_hang() { |
734 | escaped_string("7" ).unwrap(); |
735 | escaped_string("a7" ).unwrap(); |
736 | } |
737 | |
738 | // issue ##1118 escaped does not work with empty string |
739 | fn unquote<'a>(input: &'a str) -> IResult<&'a str, &'a str> { |
740 | use crate::bytes::complete::*; |
741 | use crate::character::complete::*; |
742 | use crate::combinator::opt; |
743 | use crate::sequence::delimited; |
744 | |
745 | delimited( |
746 | char('"' ), |
747 | escaped(opt(none_of(r#"\""# )), ' \\' , one_of(r#"\"rnt"# )), |
748 | char('"' ), |
749 | )(input) |
750 | } |
751 | |
752 | #[test ] |
753 | fn escaped_hang_1118() { |
754 | assert_eq!(unquote(r#""""# ), Ok(("" , "" ))); |
755 | } |
756 | } |
757 | |