1 | use std::ops::RangeInclusive; |
2 | |
3 | use winnow::combinator::alt; |
4 | use winnow::combinator::cut_err; |
5 | use winnow::combinator::opt; |
6 | use winnow::combinator::peek; |
7 | use winnow::combinator::preceded; |
8 | use winnow::combinator::repeat; |
9 | use winnow::combinator::rest; |
10 | use winnow::combinator::trace; |
11 | use winnow::token::one_of; |
12 | use winnow::token::take; |
13 | |
14 | use crate::parser::prelude::*; |
15 | use crate::parser::trivia::from_utf8_unchecked; |
16 | |
17 | // ;; Boolean |
18 | |
19 | // boolean = true / false |
20 | #[allow (dead_code)] // directly define in `fn value` |
21 | pub(crate) fn boolean(input: &mut Input<'_>) -> PResult<bool> { |
22 | trace(name:"boolean" , parser:alt((true_, false_))).parse_next(input) |
23 | } |
24 | |
25 | pub(crate) fn true_(input: &mut Input<'_>) -> PResult<bool> { |
26 | (peek(TRUE[0]), cut_err(TRUE)).value(val:true).parse_next(input) |
27 | } |
28 | const TRUE: &[u8] = b"true" ; |
29 | |
30 | pub(crate) fn false_(input: &mut Input<'_>) -> PResult<bool> { |
31 | (peek(FALSE[0]), cut_err(FALSE)) |
32 | .value(val:false) |
33 | .parse_next(input) |
34 | } |
35 | const FALSE: &[u8] = b"false" ; |
36 | |
37 | // ;; Integer |
38 | |
39 | // integer = dec-int / hex-int / oct-int / bin-int |
40 | pub(crate) fn integer(input: &mut Input<'_>) -> PResult<i64> { |
41 | traceimpl Parser, …>(name:"integer" , |
42 | parser:dispatch! {peek(opt::<_, &[u8], _, _>(take(2usize))); |
43 | Some(b"0x" ) => cut_err(hex_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 16))), |
44 | Some(b"0o" ) => cut_err(oct_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 8))), |
45 | Some(b"0b" ) => cut_err(bin_int.try_map(|s| i64::from_str_radix(&s.replace('_' , "" ), 2))), |
46 | _ => dec_int.and_then(cut_err(rest |
47 | .try_map(|s: &str| s.replace('_' , "" ).parse()))) |
48 | }) |
49 | .parse_next(input) |
50 | } |
51 | |
52 | // dec-int = [ minus / plus ] unsigned-dec-int |
53 | // unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) |
54 | pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
55 | trace( |
56 | "dec-int" , |
57 | ( |
58 | opt(one_of((b'+' , b'-' ))), |
59 | alt(( |
60 | ( |
61 | one_of(DIGIT1_9), |
62 | repeat( |
63 | 0.., |
64 | alt(( |
65 | digit.void(), |
66 | ( |
67 | one_of(b'_' ), |
68 | cut_err(digit).context(StrContext::Expected( |
69 | StrContextValue::Description("digit" ), |
70 | )), |
71 | ) |
72 | .void(), |
73 | )), |
74 | ) |
75 | .map(|()| ()), |
76 | ) |
77 | .void(), |
78 | digit.void(), |
79 | )), |
80 | ) |
81 | .recognize() |
82 | .map(|b: &[u8]| unsafe { |
83 | from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII" ) |
84 | }) |
85 | .context(StrContext::Label("integer" )), |
86 | ) |
87 | .parse_next(input) |
88 | } |
89 | const DIGIT1_9: RangeInclusive<u8> = b'1' ..=b'9' ; |
90 | |
91 | // hex-prefix = %x30.78 ; 0x |
92 | // hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) |
93 | pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
94 | trace( |
95 | "hex-int" , |
96 | preceded( |
97 | HEX_PREFIX, |
98 | cut_err(( |
99 | hexdig, |
100 | repeat( |
101 | 0.., |
102 | alt(( |
103 | hexdig.void(), |
104 | ( |
105 | one_of(b'_' ), |
106 | cut_err(hexdig).context(StrContext::Expected( |
107 | StrContextValue::Description("digit" ), |
108 | )), |
109 | ) |
110 | .void(), |
111 | )), |
112 | ) |
113 | .map(|()| ()), |
114 | )) |
115 | .recognize(), |
116 | ) |
117 | .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII" ) }) |
118 | .context(StrContext::Label("hexadecimal integer" )), |
119 | ) |
120 | .parse_next(input) |
121 | } |
122 | const HEX_PREFIX: &[u8] = b"0x" ; |
123 | |
124 | // oct-prefix = %x30.6F ; 0o |
125 | // oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) |
126 | pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
127 | trace( |
128 | "oct-int" , |
129 | preceded( |
130 | OCT_PREFIX, |
131 | cut_err(( |
132 | one_of(DIGIT0_7), |
133 | repeat( |
134 | 0.., |
135 | alt(( |
136 | one_of(DIGIT0_7).void(), |
137 | ( |
138 | one_of(b'_' ), |
139 | cut_err(one_of(DIGIT0_7)).context(StrContext::Expected( |
140 | StrContextValue::Description("digit" ), |
141 | )), |
142 | ) |
143 | .void(), |
144 | )), |
145 | ) |
146 | .map(|()| ()), |
147 | )) |
148 | .recognize(), |
149 | ) |
150 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII" ) }) |
151 | .context(StrContext::Label("octal integer" )), |
152 | ) |
153 | .parse_next(input) |
154 | } |
155 | const OCT_PREFIX: &[u8] = b"0o" ; |
156 | const DIGIT0_7: RangeInclusive<u8> = b'0' ..=b'7' ; |
157 | |
158 | // bin-prefix = %x30.62 ; 0b |
159 | // bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) |
160 | pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
161 | trace( |
162 | "bin-int" , |
163 | preceded( |
164 | BIN_PREFIX, |
165 | cut_err(( |
166 | one_of(DIGIT0_1), |
167 | repeat( |
168 | 0.., |
169 | alt(( |
170 | one_of(DIGIT0_1).void(), |
171 | ( |
172 | one_of(b'_' ), |
173 | cut_err(one_of(DIGIT0_1)).context(StrContext::Expected( |
174 | StrContextValue::Description("digit" ), |
175 | )), |
176 | ) |
177 | .void(), |
178 | )), |
179 | ) |
180 | .map(|()| ()), |
181 | )) |
182 | .recognize(), |
183 | ) |
184 | .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII" ) }) |
185 | .context(StrContext::Label("binary integer" )), |
186 | ) |
187 | .parse_next(input) |
188 | } |
189 | const BIN_PREFIX: &[u8] = b"0b" ; |
190 | const DIGIT0_1: RangeInclusive<u8> = b'0' ..=b'1' ; |
191 | |
192 | // ;; Float |
193 | |
194 | // float = float-int-part ( exp / frac [ exp ] ) |
195 | // float =/ special-float |
196 | // float-int-part = dec-int |
197 | pub(crate) fn float(input: &mut Input<'_>) -> PResult<f64> { |
198 | traceimpl Parser, …>( |
199 | name:"float" , |
200 | parser:alt(( |
201 | float_.and_then(inner:cut_err( |
202 | parser:rest.try_map(|s: &str| s.replace('_' , "" ).parse()) |
203 | .verify(|f: &f64| *f != f64::INFINITY), |
204 | )), |
205 | special_float, |
206 | )) |
207 | .context(StrContext::Label("floating-point number" )), |
208 | ) |
209 | .parse_next(input) |
210 | } |
211 | |
212 | pub(crate) fn float_<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
213 | ( |
214 | dec_int, |
215 | alt((exp.void(), (frac.void(), opt(parser:exp.void())).void())), |
216 | ) |
217 | .recognize() |
218 | .map(|b: &[u8]| unsafe { |
219 | from_utf8_unchecked( |
220 | bytes:b, |
221 | safety_justification:"`dec_int`, `one_of`, `exp`, and `frac` filter out non-ASCII" , |
222 | ) |
223 | }) |
224 | .parse_next(input) |
225 | } |
226 | |
227 | // frac = decimal-point zero-prefixable-int |
228 | // decimal-point = %x2E ; . |
229 | pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
230 | ( |
231 | b'.' , |
232 | cut_errimpl Parser, …>(parser:zero_prefixable_int) |
233 | .context(StrContext::Expected(StrContextValue::Description("digit" ))), |
234 | ) |
235 | .recognize() |
236 | .map(|b: &[u8]| unsafe { |
237 | from_utf8_unchecked( |
238 | bytes:b, |
239 | safety_justification:"`.` and `parse_zero_prefixable_int` filter out non-ASCII" , |
240 | ) |
241 | }) |
242 | .parse_next(input) |
243 | } |
244 | |
245 | // zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) |
246 | pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
247 | ( |
248 | digit, |
249 | repeatRepeat, …>, …, …, …, …>( |
250 | occurrences:0.., |
251 | parser:alt(( |
252 | digit.void(), |
253 | ( |
254 | one_of(set:b'_' ), |
255 | cut_errimpl Parser, …>(parser:digit) |
256 | .context(StrContext::Expected(StrContextValue::Description("digit" ))), |
257 | ) |
258 | .void(), |
259 | )), |
260 | ) |
261 | .map(|()| ()), |
262 | ) |
263 | .recognize() |
264 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`digit` and `_` filter out non-ASCII" ) }) |
265 | .parse_next(input) |
266 | } |
267 | |
268 | // exp = "e" float-exp-part |
269 | // float-exp-part = [ minus / plus ] zero-prefixable-int |
270 | pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
271 | ( |
272 | one_of((b'e' , b'E' )), |
273 | opt(parser:one_of([b'+' , b'-' ])), |
274 | cut_err(parser:zero_prefixable_int), |
275 | ) |
276 | .recognize() |
277 | .map(|b: &[u8]| unsafe { |
278 | from_utf8_unchecked( |
279 | bytes:b, |
280 | safety_justification:"`one_of` and `parse_zero_prefixable_int` filter out non-ASCII" , |
281 | ) |
282 | }) |
283 | .parse_next(input) |
284 | } |
285 | |
286 | // special-float = [ minus / plus ] ( inf / nan ) |
287 | pub(crate) fn special_float(input: &mut Input<'_>) -> PResult<f64> { |
288 | (opt(parser:one_of((b'+' , b'-' ))), alt((inf, nan))) |
289 | .map(|(s: Option, f: f64)| match s { |
290 | Some(b'+' ) | None => f, |
291 | Some(b'-' ) => -f, |
292 | _ => unreachable!("one_of should prevent this" ), |
293 | }) |
294 | .parse_next(input) |
295 | } |
296 | // inf = %x69.6e.66 ; inf |
297 | pub(crate) fn inf(input: &mut Input<'_>) -> PResult<f64> { |
298 | INF.value(val:f64::INFINITY).parse_next(input) |
299 | } |
300 | const INF: &[u8] = b"inf" ; |
301 | // nan = %x6e.61.6e ; nan |
302 | pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> { |
303 | NAN.value(val:f64::NAN.copysign(sign:1.0)).parse_next(input) |
304 | } |
305 | const NAN: &[u8] = b"nan" ; |
306 | |
307 | // DIGIT = %x30-39 ; 0-9 |
308 | pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> { |
309 | one_of(DIGIT).parse_next(input) |
310 | } |
311 | const DIGIT: RangeInclusive<u8> = b'0' ..=b'9' ; |
312 | |
313 | // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" |
314 | pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> { |
315 | one_of(HEXDIG).parse_next(input) |
316 | } |
317 | pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) = |
318 | (DIGIT, b'A' ..=b'F' , b'a' ..=b'f' ); |
319 | |
320 | #[cfg (test)] |
321 | #[cfg (feature = "parse" )] |
322 | #[cfg (feature = "display" )] |
323 | mod test { |
324 | use super::*; |
325 | |
326 | #[test ] |
327 | fn integers() { |
328 | let cases = [ |
329 | ("+99" , 99), |
330 | ("42" , 42), |
331 | ("0" , 0), |
332 | ("-17" , -17), |
333 | ("1_000" , 1_000), |
334 | ("5_349_221" , 5_349_221), |
335 | ("1_2_3_4_5" , 1_2_3_4_5), |
336 | ("0xF" , 15), |
337 | ("0o0_755" , 493), |
338 | ("0b1_0_1" , 5), |
339 | (&std::i64::MIN.to_string()[..], std::i64::MIN), |
340 | (&std::i64::MAX.to_string()[..], std::i64::MAX), |
341 | ]; |
342 | for &(input, expected) in &cases { |
343 | dbg!(input); |
344 | let parsed = integer.parse(new_input(input)); |
345 | assert_eq!(parsed, Ok(expected), "Parsing {input:?}" ); |
346 | } |
347 | |
348 | let overflow = "1000000000000000000000000000000000" ; |
349 | let parsed = integer.parse(new_input(overflow)); |
350 | assert!(parsed.is_err()); |
351 | } |
352 | |
353 | #[track_caller ] |
354 | fn assert_float_eq(actual: f64, expected: f64) { |
355 | if expected.is_nan() { |
356 | assert!(actual.is_nan()); |
357 | assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); |
358 | } else if expected.is_infinite() { |
359 | assert!(actual.is_infinite()); |
360 | assert_eq!(expected.is_sign_positive(), actual.is_sign_positive()); |
361 | } else { |
362 | dbg!(expected); |
363 | dbg!(actual); |
364 | assert!((expected - actual).abs() < std::f64::EPSILON); |
365 | } |
366 | } |
367 | |
368 | #[test ] |
369 | fn floats() { |
370 | let cases = [ |
371 | ("+1.0" , 1.0), |
372 | ("3.1419" , 3.1419), |
373 | ("-0.01" , -0.01), |
374 | ("5e+22" , 5e+22), |
375 | ("1e6" , 1e6), |
376 | ("-2E-2" , -2E-2), |
377 | ("6.626e-34" , 6.626e-34), |
378 | ("9_224_617.445_991_228_313" , 9_224_617.445_991_227), |
379 | ("-1.7976931348623157e+308" , std::f64::MIN), |
380 | ("1.7976931348623157e+308" , std::f64::MAX), |
381 | ("nan" , f64::NAN.copysign(1.0)), |
382 | ("+nan" , f64::NAN.copysign(1.0)), |
383 | ("-nan" , f64::NAN.copysign(-1.0)), |
384 | ("inf" , f64::INFINITY), |
385 | ("+inf" , f64::INFINITY), |
386 | ("-inf" , f64::NEG_INFINITY), |
387 | // ("1e+400", std::f64::INFINITY), |
388 | ]; |
389 | for &(input, expected) in &cases { |
390 | dbg!(input); |
391 | let parsed = float.parse(new_input(input)).unwrap(); |
392 | assert_float_eq(parsed, expected); |
393 | |
394 | let overflow = "9e99999" ; |
395 | let parsed = float.parse(new_input(overflow)); |
396 | assert!(parsed.is_err(), " {:?}" , parsed); |
397 | } |
398 | } |
399 | } |
400 | |