1 | use std::ops::RangeInclusive; |
2 | |
3 | use winnow::combinator::alt; |
4 | use winnow::combinator::cut_err; |
5 | use winnow::combinator::opt; |
6 | use winnow::combinator::peek; |
7 | use winnow::combinator::preceded; |
8 | use winnow::combinator::repeat; |
9 | use winnow::combinator::rest; |
10 | use winnow::token::one_of; |
11 | use winnow::token::tag; |
12 | use winnow::token::take; |
13 | use winnow::trace::trace; |
14 | |
15 | use crate::parser::prelude::*; |
16 | use crate::parser::trivia::from_utf8_unchecked; |
17 | |
18 | // ;; Boolean |
19 | |
20 | // boolean = true / false |
21 | #[allow (dead_code)] // directly define in `fn value` |
22 | pub(crate) fn boolean(input: &mut Input<'_>) -> PResult<bool> { |
23 | trace(name:"boolean" , parser:alt((true_, false_))).parse_next(input) |
24 | } |
25 | |
26 | pub(crate) fn true_(input: &mut Input<'_>) -> PResult<bool> { |
27 | (peek(TRUE[0]), cut_err(TRUE)).value(val:true).parse_next(input) |
28 | } |
29 | const TRUE: &[u8] = b"true" ; |
30 | |
31 | pub(crate) fn false_(input: &mut Input<'_>) -> PResult<bool> { |
32 | (peek(FALSE[0]), cut_err(FALSE)) |
33 | .value(val:false) |
34 | .parse_next(input) |
35 | } |
36 | const FALSE: &[u8] = b"false" ; |
37 | |
38 | // ;; Integer |
39 | |
40 | // integer = dec-int / hex-int / oct-int / bin-int |
41 | pub(crate) fn integer(input: &mut Input<'_>) -> PResult<i64> { |
42 | traceimpl Parser, …>(name:"integer" , |
43 | parser:dispatch! {peek(opt::<_, &[u8], _, _>(take(2usize))); |
44 | Some(b"0x" ) => cut_err(hex_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 16))), |
45 | Some(b"0o" ) => cut_err(oct_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 8))), |
46 | Some(b"0b" ) => cut_err(bin_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 2))), |
47 | _ => dec_int.and_then(cut_err(rest |
48 | .try_map(|s: &str| s.replace('_' , "" ).parse()))) |
49 | }) |
50 | .parse_next(input) |
51 | } |
52 | |
53 | // dec-int = [ minus / plus ] unsigned-dec-int |
54 | // unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) |
55 | pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
56 | trace( |
57 | "dec-int" , |
58 | ( |
59 | opt(one_of((b'+' , b'-' ))), |
60 | alt(( |
61 | ( |
62 | one_of(DIGIT1_9), |
63 | repeat( |
64 | 0.., |
65 | alt(( |
66 | digit.value(()), |
67 | ( |
68 | one_of(b'_' ), |
69 | cut_err(digit).context(StrContext::Expected( |
70 | StrContextValue::Description("digit" ), |
71 | )), |
72 | ) |
73 | .value(()), |
74 | )), |
75 | ) |
76 | .map(|()| ()), |
77 | ) |
78 | .value(()), |
79 | digit.value(()), |
80 | )), |
81 | ) |
82 | .recognize() |
83 | .map(|b: &[u8]| unsafe { |
84 | from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII" ) |
85 | }) |
86 | .context(StrContext::Label("integer" )), |
87 | ) |
88 | .parse_next(input) |
89 | } |
90 | const DIGIT1_9: RangeInclusive<u8> = b'1' ..=b'9' ; |
91 | |
92 | // hex-prefix = %x30.78 ; 0x |
93 | // hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) |
94 | pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
95 | trace( |
96 | "hex-int" , |
97 | preceded( |
98 | HEX_PREFIX, |
99 | cut_err(( |
100 | hexdig, |
101 | repeat( |
102 | 0.., |
103 | alt(( |
104 | hexdig.value(()), |
105 | ( |
106 | one_of(b'_' ), |
107 | cut_err(hexdig).context(StrContext::Expected( |
108 | StrContextValue::Description("digit" ), |
109 | )), |
110 | ) |
111 | .value(()), |
112 | )), |
113 | ) |
114 | .map(|()| ()), |
115 | )) |
116 | .recognize(), |
117 | ) |
118 | .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII" ) }) |
119 | .context(StrContext::Label("hexadecimal integer" )), |
120 | ) |
121 | .parse_next(input) |
122 | } |
123 | const HEX_PREFIX: &[u8] = b"0x" ; |
124 | |
125 | // oct-prefix = %x30.6F ; 0o |
126 | // oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) |
127 | pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
128 | trace( |
129 | "oct-int" , |
130 | preceded( |
131 | OCT_PREFIX, |
132 | cut_err(( |
133 | one_of(DIGIT0_7), |
134 | repeat( |
135 | 0.., |
136 | alt(( |
137 | one_of(DIGIT0_7).value(()), |
138 | ( |
139 | one_of(b'_' ), |
140 | cut_err(one_of(DIGIT0_7)).context(StrContext::Expected( |
141 | StrContextValue::Description("digit" ), |
142 | )), |
143 | ) |
144 | .value(()), |
145 | )), |
146 | ) |
147 | .map(|()| ()), |
148 | )) |
149 | .recognize(), |
150 | ) |
151 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII" ) }) |
152 | .context(StrContext::Label("octal integer" )), |
153 | ) |
154 | .parse_next(input) |
155 | } |
156 | const OCT_PREFIX: &[u8] = b"0o" ; |
157 | const DIGIT0_7: RangeInclusive<u8> = b'0' ..=b'7' ; |
158 | |
159 | // bin-prefix = %x30.62 ; 0b |
160 | // bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) |
161 | pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
162 | trace( |
163 | "bin-int" , |
164 | preceded( |
165 | BIN_PREFIX, |
166 | cut_err(( |
167 | one_of(DIGIT0_1), |
168 | repeat( |
169 | 0.., |
170 | alt(( |
171 | one_of(DIGIT0_1).value(()), |
172 | ( |
173 | one_of(b'_' ), |
174 | cut_err(one_of(DIGIT0_1)).context(StrContext::Expected( |
175 | StrContextValue::Description("digit" ), |
176 | )), |
177 | ) |
178 | .value(()), |
179 | )), |
180 | ) |
181 | .map(|()| ()), |
182 | )) |
183 | .recognize(), |
184 | ) |
185 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII" ) }) |
186 | .context(StrContext::Label("binary integer" )), |
187 | ) |
188 | .parse_next(input) |
189 | } |
190 | const BIN_PREFIX: &[u8] = b"0b" ; |
191 | const DIGIT0_1: RangeInclusive<u8> = b'0' ..=b'1' ; |
192 | |
193 | // ;; Float |
194 | |
195 | // float = float-int-part ( exp / frac [ exp ] ) |
196 | // float =/ special-float |
197 | // float-int-part = dec-int |
198 | pub(crate) fn float(input: &mut Input<'_>) -> PResult<f64> { |
199 | traceimpl Parser, …>( |
200 | name:"float" , |
201 | parser:alt(( |
202 | float_.and_then(inner:cut_err( |
203 | parser:rest.try_map(|s: &str| s.replace('_' , "" ).parse()) |
204 | .verify(|f: &f64| *f != f64::INFINITY), |
205 | )), |
206 | special_float, |
207 | )) |
208 | .context(StrContext::Label("floating-point number" )), |
209 | ) |
210 | .parse_next(input) |
211 | } |
212 | |
213 | pub(crate) fn float_<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
214 | ( |
215 | dec_int, |
216 | alt((exp.void(), (frac.void(), opt(exp.void())).void())), |
217 | ) |
218 | .recognize() |
219 | .map(|b: &[u8]| unsafe { |
220 | from_utf8_unchecked( |
221 | bytes:b, |
222 | safety_justification:"`dec_int`, `one_of`, `exp`, and `frac` filter out non-ASCII" , |
223 | ) |
224 | }) |
225 | .parse_next(input) |
226 | } |
227 | |
228 | // frac = decimal-point zero-prefixable-int |
229 | // decimal-point = %x2E ; . |
230 | pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
231 | ( |
232 | b'.' , |
233 | cut_errimpl Parser, …>(parser:zero_prefixable_int) |
234 | .context(StrContext::Expected(StrContextValue::Description("digit" ))), |
235 | ) |
236 | .recognize() |
237 | .map(|b: &[u8]| unsafe { |
238 | from_utf8_unchecked( |
239 | bytes:b, |
240 | safety_justification:"`.` and `parse_zero_prefixable_int` filter out non-ASCII" , |
241 | ) |
242 | }) |
243 | .parse_next(input) |
244 | } |
245 | |
246 | // zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) |
247 | pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
248 | ( |
249 | digit, |
250 | repeatRepeat, …>, …, …, …, …>( |
251 | range:0.., |
252 | parser:alt(( |
253 | digit.value(()), |
254 | ( |
255 | one_of(b'_' ), |
256 | cut_err(digit) |
257 | .context(StrContext::Expected(StrContextValue::Description("digit" ))), |
258 | ) |
259 | .value(()), |
260 | )), |
261 | ) |
262 | .map(|()| ()), |
263 | ) |
264 | .recognize() |
265 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`digit` and `_` filter out non-ASCII" ) }) |
266 | .parse_next(input) |
267 | } |
268 | |
269 | // exp = "e" float-exp-part |
270 | // float-exp-part = [ minus / plus ] zero-prefixable-int |
271 | pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
272 | ( |
273 | one_of((b'e' , b'E' )), |
274 | opt(one_of([b'+' , b'-' ])), |
275 | cut_err(parser:zero_prefixable_int), |
276 | ) |
277 | .recognize() |
278 | .map(|b: &[u8]| unsafe { |
279 | from_utf8_unchecked( |
280 | bytes:b, |
281 | safety_justification:"`one_of` and `parse_zero_prefixable_int` filter out non-ASCII" , |
282 | ) |
283 | }) |
284 | .parse_next(input) |
285 | } |
286 | |
287 | // special-float = [ minus / plus ] ( inf / nan ) |
288 | pub(crate) fn special_float(input: &mut Input<'_>) -> PResult<f64> { |
289 | (opt(one_of((b'+' , b'-' ))), alt((inf, nan))) |
290 | .map(|(s: Option, f: f64)| match s { |
291 | Some(b'+' ) | None => f, |
292 | Some(b'-' ) => -f, |
293 | _ => unreachable!("one_of should prevent this" ), |
294 | }) |
295 | .parse_next(input) |
296 | } |
297 | // inf = %x69.6e.66 ; inf |
298 | pub(crate) fn inf(input: &mut Input<'_>) -> PResult<f64> { |
299 | tag(INF).value(val:f64::INFINITY).parse_next(input) |
300 | } |
301 | const INF: &[u8] = b"inf" ; |
302 | // nan = %x6e.61.6e ; nan |
303 | pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> { |
304 | tag(NAN).value(val:f64::NAN).parse_next(input) |
305 | } |
306 | const NAN: &[u8] = b"nan" ; |
307 | |
308 | // DIGIT = %x30-39 ; 0-9 |
309 | pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> { |
310 | one_of(DIGIT).parse_next(input) |
311 | } |
312 | const DIGIT: RangeInclusive<u8> = b'0' ..=b'9' ; |
313 | |
314 | // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" |
315 | pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> { |
316 | one_of(HEXDIG).parse_next(input) |
317 | } |
318 | pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = |
319 | (DIGIT, b'A' ..=b'F' , b'a' ..=b'f' ); |
320 | |
321 | #[cfg (test)] |
322 | mod test { |
323 | use super::*; |
324 | |
325 | #[test ] |
326 | fn integers() { |
327 | let cases = [ |
328 | ("+99" , 99), |
329 | ("42" , 42), |
330 | ("0" , 0), |
331 | ("-17" , -17), |
332 | ("1_000" , 1_000), |
333 | ("5_349_221" , 5_349_221), |
334 | ("1_2_3_4_5" , 1_2_3_4_5), |
335 | ("0xF" , 15), |
336 | ("0o0_755" , 493), |
337 | ("0b1_0_1" , 5), |
338 | (&std::i64::MIN.to_string()[..], std::i64::MIN), |
339 | (&std::i64::MAX.to_string()[..], std::i64::MAX), |
340 | ]; |
341 | for &(input, expected) in &cases { |
342 | dbg!(input); |
343 | let parsed = integer.parse(new_input(input)); |
344 | assert_eq!(parsed, Ok(expected), "Parsing {input:?}" ); |
345 | } |
346 | |
347 | let overflow = "1000000000000000000000000000000000" ; |
348 | let parsed = integer.parse(new_input(overflow)); |
349 | assert!(parsed.is_err()); |
350 | } |
351 | |
352 | #[track_caller ] |
353 | fn assert_float_eq(actual: f64, expected: f64) { |
354 | if expected.is_nan() { |
355 | assert!(actual.is_nan()); |
356 | } else if expected.is_infinite() { |
357 | assert!(actual.is_infinite()); |
358 | assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); |
359 | } else { |
360 | dbg!(expected); |
361 | dbg!(actual); |
362 | assert!((expected - actual).abs() < std::f64::EPSILON); |
363 | } |
364 | } |
365 | |
366 | #[test ] |
367 | fn floats() { |
368 | let cases = [ |
369 | ("+1.0" , 1.0), |
370 | ("3.1419" , 3.1419), |
371 | ("-0.01" , -0.01), |
372 | ("5e+22" , 5e+22), |
373 | ("1e6" , 1e6), |
374 | ("-2E-2" , -2E-2), |
375 | ("6.626e-34" , 6.626e-34), |
376 | ("9_224_617.445_991_228_313" , 9_224_617.445_991_227), |
377 | ("-1.7976931348623157e+308" , std::f64::MIN), |
378 | ("1.7976931348623157e+308" , std::f64::MAX), |
379 | ("nan" , f64::NAN), |
380 | ("+nan" , f64::NAN), |
381 | ("-nan" , f64::NAN), |
382 | ("inf" , f64::INFINITY), |
383 | ("+inf" , f64::INFINITY), |
384 | ("-inf" , f64::NEG_INFINITY), |
385 | // ("1e+400", std::f64::INFINITY), |
386 | ]; |
387 | for &(input, expected) in &cases { |
388 | dbg!(input); |
389 | let parsed = float.parse(new_input(input)).unwrap(); |
390 | assert_float_eq(parsed, expected); |
391 | |
392 | let overflow = "9e99999" ; |
393 | let parsed = float.parse(new_input(overflow)); |
394 | assert!(parsed.is_err(), " {:?}" , parsed); |
395 | } |
396 | } |
397 | } |
398 | |