1 | use crate::{ |
2 | BoolLit, |
3 | Buffer, |
4 | ByteLit, |
5 | ByteStringLit, |
6 | CharLit, |
7 | ParseError, |
8 | FloatLit, |
9 | IntegerLit, |
10 | Literal, |
11 | StringLit, |
12 | err::{perr, ParseErrorKind::{*, self}}, |
13 | }; |
14 | |
15 | |
16 | pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> { |
17 | let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?; |
18 | let second = input.as_bytes().get(1).copied(); |
19 | |
20 | match first { |
21 | b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)), |
22 | b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)), |
23 | |
24 | // A number literal (integer or float). |
25 | b'0' ..=b'9' => { |
26 | // To figure out whether this is a float or integer, we do some |
27 | // quick inspection here. Yes, this is technically duplicate |
28 | // work with what is happening in the integer/float parse |
29 | // methods, but it makes the code way easier for now and won't |
30 | // be a huge performance loss. |
31 | // |
32 | // The first non-decimal char in a float literal must |
33 | // be '.', 'e' or 'E'. |
34 | match input.as_bytes().get(1 + end_dec_digits(rest)) { |
35 | Some(b'.' ) | Some(b'e' ) | Some(b'E' ) |
36 | => FloatLit::parse(input).map(Literal::Float), |
37 | |
38 | _ => IntegerLit::parse(input).map(Literal::Integer), |
39 | } |
40 | }, |
41 | |
42 | b' \'' => CharLit::parse(input).map(Literal::Char), |
43 | b'"' | b'r' => StringLit::parse(input).map(Literal::String), |
44 | |
45 | b'b' if second == Some(b' \'' ) => ByteLit::parse(input).map(Literal::Byte), |
46 | b'b' if second == Some(b'r' ) || second == Some(b'"' ) |
47 | => ByteStringLit::parse(input).map(Literal::ByteString), |
48 | |
49 | _ => Err(perr(None, InvalidLiteral)), |
50 | } |
51 | } |
52 | |
53 | |
54 | pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> { |
55 | s.as_bytes().get(0).copied().ok_or(err:perr(span:None, kind:Empty)) |
56 | } |
57 | |
58 | /// Returns the index of the first non-underscore, non-decimal digit in `input`, |
59 | /// or the `input.len()` if all characters are decimal digits. |
60 | pub(crate) fn end_dec_digits(input: &[u8]) -> usize { |
61 | input.iter() |
62 | .position(|b| !matches!(b, b'_' | b'0' ..=b'9' )) |
63 | .unwrap_or(default:input.len()) |
64 | } |
65 | |
66 | pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> { |
67 | match digit { |
68 | b'0' ..=b'9' => Some(digit - b'0' ), |
69 | b'a' ..=b'f' => Some(digit - b'a' + 10), |
70 | b'A' ..=b'F' => Some(digit - b'A' + 10), |
71 | _ => None, |
72 | } |
73 | } |
74 | |
75 | /// Makes sure that `s` is a valid literal suffix. |
76 | pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> { |
77 | if s.is_empty() { |
78 | return Ok(()); |
79 | } |
80 | |
81 | let mut chars = s.chars(); |
82 | let first = chars.next().unwrap(); |
83 | let rest = chars.as_str(); |
84 | if first == '_' && rest.is_empty() { |
85 | return Err(InvalidSuffix); |
86 | } |
87 | |
88 | // This is just an extra check to improve the error message. If the first |
89 | // character of the "suffix" is already some invalid ASCII |
90 | // char, "unexpected character" seems like the more fitting error. |
91 | if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_' ) { |
92 | return Err(UnexpectedChar); |
93 | } |
94 | |
95 | // Proper check is optional as it's not really necessary in proc macro |
96 | // context. |
97 | #[cfg (feature = "check_suffix" )] |
98 | fn is_valid_suffix(first: char, rest: &str) -> bool { |
99 | use unicode_xid::UnicodeXID; |
100 | |
101 | (first == '_' || first.is_xid_start()) |
102 | && rest.chars().all(|c| c.is_xid_continue()) |
103 | } |
104 | |
105 | // When avoiding the dependency on `unicode_xid`, we just do a best effort |
106 | // to catch the most common errors. |
107 | #[cfg (not(feature = "check_suffix" ))] |
108 | fn is_valid_suffix(first: char, rest: &str) -> bool { |
109 | if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_' ) { |
110 | return false; |
111 | } |
112 | for c in rest.chars() { |
113 | if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_' ) { |
114 | return false; |
115 | } |
116 | } |
117 | true |
118 | } |
119 | |
120 | if is_valid_suffix(first, rest) { |
121 | Ok(()) |
122 | } else { |
123 | Err(InvalidSuffix) |
124 | } |
125 | } |
126 | |