1 | use std::ops::RangeInclusive; |
2 | |
3 | use winnow::combinator::alt; |
4 | use winnow::combinator::cut_err; |
5 | use winnow::combinator::opt; |
6 | use winnow::combinator::peek; |
7 | use winnow::combinator::preceded; |
8 | use winnow::combinator::repeat; |
9 | use winnow::combinator::rest; |
10 | use winnow::token::one_of; |
11 | use winnow::token::tag; |
12 | use winnow::token::take; |
13 | |
14 | use crate::parser::prelude::*; |
15 | use crate::parser::trivia::from_utf8_unchecked; |
16 | |
17 | // ;; Boolean |
18 | |
19 | // boolean = true / false |
20 | #[allow (dead_code)] // directly define in `fn value` |
21 | pub(crate) fn boolean(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { |
22 | alt((true_, false_)).parse_next(input) |
23 | } |
24 | |
25 | pub(crate) fn true_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { |
26 | (peek(TRUE[0]), cut_err(TRUE)).value(val:true).parse_next(input) |
27 | } |
28 | const TRUE: &[u8] = b"true" ; |
29 | |
30 | pub(crate) fn false_(input: Input<'_>) -> IResult<Input<'_>, bool, ParserError<'_>> { |
31 | (peek(FALSE[0]), cut_err(FALSE)) |
32 | .value(val:false) |
33 | .parse_next(input) |
34 | } |
35 | const FALSE: &[u8] = b"false" ; |
36 | |
37 | // ;; Integer |
38 | |
39 | // integer = dec-int / hex-int / oct-int / bin-int |
40 | pub(crate) fn integer(input: Input<'_>) -> IResult<Input<'_>, i64, ParserError<'_>> { |
41 | dispatchimpl Fn(Located<&BStr>) -> …! {peek(opt::<_, &[u8], _, _>(take(2usize))); |
42 | Some(b"0x" ) => cut_err(hex_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 16))), |
43 | Some(b"0o" ) => cut_err(oct_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 8))), |
44 | Some(b"0b" ) => cut_err(bin_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 2))), |
45 | _ => dec_int.and_then(cut_err(rest |
46 | .try_map(|s: &str| s.replace('_' , "" ).parse()))) |
47 | } |
48 | .parse_next(input) |
49 | } |
50 | |
51 | // dec-int = [ minus / plus ] unsigned-dec-int |
52 | // unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) |
53 | pub(crate) fn dec_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
54 | ( |
55 | opt(one_of((b'+' , b'-' ))), |
56 | alt(( |
57 | ( |
58 | one_of(DIGIT1_9), |
59 | repeat( |
60 | 0.., |
61 | alt(( |
62 | digit.value(()), |
63 | ( |
64 | one_of(b'_' ), |
65 | cut_err(digit) |
66 | .context(Context::Expected(ParserValue::Description("digit" ))), |
67 | ) |
68 | .value(()), |
69 | )), |
70 | ) |
71 | .map(|()| ()), |
72 | ) |
73 | .value(()), |
74 | digit.value(()), |
75 | )), |
76 | ) |
77 | .recognize() |
78 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII" ) }) |
79 | .context(Context::Expression("integer" )) |
80 | .parse_next(input) |
81 | } |
82 | const DIGIT1_9: RangeInclusive<u8> = b'1' ..=b'9' ; |
83 | |
84 | // hex-prefix = %x30.78 ; 0x |
85 | // hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) |
86 | pub(crate) fn hex_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
87 | preceded( |
88 | HEX_PREFIX, |
89 | cut_err(( |
90 | hexdig, |
91 | repeat( |
92 | 0.., |
93 | alt(( |
94 | hexdig.value(()), |
95 | ( |
96 | one_of(b'_' ), |
97 | cut_err(hexdig) |
98 | .context(Context::Expected(ParserValue::Description("digit" ))), |
99 | ) |
100 | .value(()), |
101 | )), |
102 | ) |
103 | .map(|()| ()), |
104 | )) |
105 | .recognize(), |
106 | ) |
107 | .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII" ) }) |
108 | .context(Context::Expression("hexadecimal integer" )) |
109 | .parse_next(input) |
110 | } |
111 | const HEX_PREFIX: &[u8] = b"0x" ; |
112 | |
113 | // oct-prefix = %x30.6F ; 0o |
114 | // oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) |
115 | pub(crate) fn oct_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
116 | preceded( |
117 | OCT_PREFIX, |
118 | cut_err(( |
119 | one_of(DIGIT0_7), |
120 | repeat( |
121 | 0.., |
122 | alt(( |
123 | one_of(DIGIT0_7).value(()), |
124 | ( |
125 | one_of(b'_' ), |
126 | cut_err(one_of(DIGIT0_7)) |
127 | .context(Context::Expected(ParserValue::Description("digit" ))), |
128 | ) |
129 | .value(()), |
130 | )), |
131 | ) |
132 | .map(|()| ()), |
133 | )) |
134 | .recognize(), |
135 | ) |
136 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII" ) }) |
137 | .context(Context::Expression("octal integer" )) |
138 | .parse_next(input) |
139 | } |
140 | const OCT_PREFIX: &[u8] = b"0o" ; |
141 | const DIGIT0_7: RangeInclusive<u8> = b'0' ..=b'7' ; |
142 | |
143 | // bin-prefix = %x30.62 ; 0b |
144 | // bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) |
145 | pub(crate) fn bin_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
146 | preceded( |
147 | BIN_PREFIX, |
148 | cut_err(( |
149 | one_of(DIGIT0_1), |
150 | repeat( |
151 | 0.., |
152 | alt(( |
153 | one_of(DIGIT0_1).value(()), |
154 | ( |
155 | one_of(b'_' ), |
156 | cut_err(one_of(DIGIT0_1)) |
157 | .context(Context::Expected(ParserValue::Description("digit" ))), |
158 | ) |
159 | .value(()), |
160 | )), |
161 | ) |
162 | .map(|()| ()), |
163 | )) |
164 | .recognize(), |
165 | ) |
166 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII" ) }) |
167 | .context(Context::Expression("binary integer" )) |
168 | .parse_next(input) |
169 | } |
170 | const BIN_PREFIX: &[u8] = b"0b" ; |
171 | const DIGIT0_1: RangeInclusive<u8> = b'0' ..=b'1' ; |
172 | |
173 | // ;; Float |
174 | |
175 | // float = float-int-part ( exp / frac [ exp ] ) |
176 | // float =/ special-float |
177 | // float-int-part = dec-int |
178 | pub(crate) fn float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { |
179 | altContext, …>, …, …, …, …>(( |
180 | float_.and_then(inner:cut_err( |
181 | parser:rest.try_map(|s: &str| s.replace('_' , "" ).parse()) |
182 | .verify(|f: &f64| *f != f64::INFINITY), |
183 | )), |
184 | special_float, |
185 | )) |
186 | .context(Context::Expression("floating-point number" )) |
187 | .parse_next(input) |
188 | } |
189 | |
190 | pub(crate) fn float_(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
191 | (dec_int, alt((exp, (frac, opt(exp)).map(|_| "" )))) |
192 | .recognize() |
193 | .map(|b: &[u8]| unsafe { |
194 | from_utf8_unchecked( |
195 | bytes:b, |
196 | safety_justification:"`dec_int`, `one_of`, `exp`, and `frac` filter out non-ASCII" , |
197 | ) |
198 | }) |
199 | .parse_next(input) |
200 | } |
201 | |
202 | // frac = decimal-point zero-prefixable-int |
203 | // decimal-point = %x2E ; . |
204 | pub(crate) fn frac(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
205 | ( |
206 | b'.' , |
207 | cut_err(parser:zero_prefixable_int).context(Context::Expected(ParserValue::Description("digit" ))), |
208 | ) |
209 | .recognize() |
210 | .map(|b: &[u8]| unsafe { |
211 | from_utf8_unchecked( |
212 | bytes:b, |
213 | safety_justification:"`.` and `parse_zero_prefixable_int` filter out non-ASCII" , |
214 | ) |
215 | }) |
216 | .parse_next(input) |
217 | } |
218 | |
219 | // zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) |
220 | pub(crate) fn zero_prefixable_int(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
221 | ( |
222 | digit, |
223 | repeatimpl Parser, …>( |
224 | range:0.., |
225 | f:alt(( |
226 | digit.value(()), |
227 | ( |
228 | one_of(b'_' ), |
229 | cut_err(digit).context(Context::Expected(ParserValue::Description("digit" ))), |
230 | ) |
231 | .value(()), |
232 | )), |
233 | ) |
234 | .map(|()| ()), |
235 | ) |
236 | .recognize() |
237 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`digit` and `_` filter out non-ASCII" ) }) |
238 | .parse_next(input) |
239 | } |
240 | |
241 | // exp = "e" float-exp-part |
242 | // float-exp-part = [ minus / plus ] zero-prefixable-int |
243 | pub(crate) fn exp(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> { |
244 | ( |
245 | one_of((b'e' , b'E' )), |
246 | opt(one_of([b'+' , b'-' ])), |
247 | cut_err(parser:zero_prefixable_int), |
248 | ) |
249 | .recognize() |
250 | .map(|b: &[u8]| unsafe { |
251 | from_utf8_unchecked( |
252 | bytes:b, |
253 | safety_justification:"`one_of` and `parse_zero_prefixable_int` filter out non-ASCII" , |
254 | ) |
255 | }) |
256 | .parse_next(input) |
257 | } |
258 | |
259 | // special-float = [ minus / plus ] ( inf / nan ) |
260 | pub(crate) fn special_float(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { |
261 | (opt(one_of((b'+' , b'-' ))), alt((inf, nan))) |
262 | .map(|(s: Option, f: f64)| match s { |
263 | Some(b'+' ) | None => f, |
264 | Some(b'-' ) => -f, |
265 | _ => unreachable!("one_of should prevent this" ), |
266 | }) |
267 | .parse_next(input) |
268 | } |
269 | // inf = %x69.6e.66 ; inf |
270 | pub(crate) fn inf(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { |
271 | tag(INF).value(val:f64::INFINITY).parse_next(input) |
272 | } |
273 | const INF: &[u8] = b"inf" ; |
274 | // nan = %x6e.61.6e ; nan |
275 | pub(crate) fn nan(input: Input<'_>) -> IResult<Input<'_>, f64, ParserError<'_>> { |
276 | tag(NAN).value(val:f64::NAN).parse_next(input) |
277 | } |
278 | const NAN: &[u8] = b"nan" ; |
279 | |
280 | // DIGIT = %x30-39 ; 0-9 |
281 | pub(crate) fn digit(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { |
282 | one_of(DIGIT).parse_next(input) |
283 | } |
284 | const DIGIT: RangeInclusive<u8> = b'0' ..=b'9' ; |
285 | |
286 | // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" |
287 | pub(crate) fn hexdig(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> { |
288 | one_of(HEXDIG).parse_next(input) |
289 | } |
290 | pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = |
291 | (DIGIT, b'A' ..=b'F' , b'a' ..=b'f' ); |
292 | |
293 | #[cfg (test)] |
294 | mod test { |
295 | use super::*; |
296 | |
297 | #[test ] |
298 | fn integers() { |
299 | let cases = [ |
300 | ("+99" , 99), |
301 | ("42" , 42), |
302 | ("0" , 0), |
303 | ("-17" , -17), |
304 | ("1_000" , 1_000), |
305 | ("5_349_221" , 5_349_221), |
306 | ("1_2_3_4_5" , 1_2_3_4_5), |
307 | ("0xF" , 15), |
308 | ("0o0_755" , 493), |
309 | ("0b1_0_1" , 5), |
310 | (&std::i64::MIN.to_string()[..], std::i64::MIN), |
311 | (&std::i64::MAX.to_string()[..], std::i64::MAX), |
312 | ]; |
313 | for &(input, expected) in &cases { |
314 | dbg!(input); |
315 | let parsed = integer.parse(new_input(input)); |
316 | assert_eq!(parsed, Ok(expected), "Parsing {input:?}" ); |
317 | } |
318 | |
319 | let overflow = "1000000000000000000000000000000000" ; |
320 | let parsed = integer.parse(new_input(overflow)); |
321 | assert!(parsed.is_err()); |
322 | } |
323 | |
324 | #[track_caller ] |
325 | fn assert_float_eq(actual: f64, expected: f64) { |
326 | if expected.is_nan() { |
327 | assert!(actual.is_nan()); |
328 | } else if expected.is_infinite() { |
329 | assert!(actual.is_infinite()); |
330 | assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); |
331 | } else { |
332 | dbg!(expected); |
333 | dbg!(actual); |
334 | assert!((expected - actual).abs() < std::f64::EPSILON); |
335 | } |
336 | } |
337 | |
338 | #[test ] |
339 | fn floats() { |
340 | let cases = [ |
341 | ("+1.0" , 1.0), |
342 | ("3.1419" , 3.1419), |
343 | ("-0.01" , -0.01), |
344 | ("5e+22" , 5e+22), |
345 | ("1e6" , 1e6), |
346 | ("-2E-2" , -2E-2), |
347 | ("6.626e-34" , 6.626e-34), |
348 | ("9_224_617.445_991_228_313" , 9_224_617.445_991_227), |
349 | ("-1.7976931348623157e+308" , std::f64::MIN), |
350 | ("1.7976931348623157e+308" , std::f64::MAX), |
351 | ("nan" , f64::NAN), |
352 | ("+nan" , f64::NAN), |
353 | ("-nan" , f64::NAN), |
354 | ("inf" , f64::INFINITY), |
355 | ("+inf" , f64::INFINITY), |
356 | ("-inf" , f64::NEG_INFINITY), |
357 | // ("1e+400", std::f64::INFINITY), |
358 | ]; |
359 | for &(input, expected) in &cases { |
360 | dbg!(input); |
361 | let parsed = float.parse(new_input(input)).unwrap(); |
362 | assert_float_eq(parsed, expected); |
363 | |
364 | let overflow = "9e99999" ; |
365 | let parsed = float.parse(new_input(overflow)); |
366 | assert!(parsed.is_err(), " {:?}" , parsed); |
367 | } |
368 | } |
369 | } |
370 | |