1 | use std::{fmt, str::FromStr}; |
2 | |
3 | use crate::{ |
4 | Buffer, ParseError, |
5 | err::{perr, ParseErrorKind::*}, |
6 | parse::{first_byte_or_empty, hex_digit_value, check_suffix}, |
7 | }; |
8 | |
9 | |
10 | /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. |
11 | /// |
12 | /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), |
13 | /// the main part (digits and underscores), and an optional type suffix |
14 | /// (e.g. `u64` or `i8`). See [the reference][ref] for more information. |
15 | /// |
16 | /// Note that integer literals are always positive: the grammar does not contain |
17 | /// the minus sign at all. The minus sign is just the unary negate operator, |
18 | /// not part of the literal. Which is interesting for cases like `- 128i8`: |
19 | /// here, the literal itself would overflow the specified type (`i8` cannot |
20 | /// represent 128). That's why in rustc, the literal overflow check is |
21 | /// performed as a lint after parsing, not during the lexing stage. Similarly, |
22 | /// [`IntegerLit::parse`] does not perform an overflow check. |
23 | /// |
24 | /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals |
25 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
26 | #[non_exhaustive ] |
27 | pub struct IntegerLit<B: Buffer> { |
28 | /// The raw literal. Grammar: `<prefix?><main part><suffix?>`. |
29 | raw: B, |
30 | /// First index of the main number part (after the base prefix). |
31 | start_main_part: usize, |
32 | /// First index not part of the main number part. |
33 | end_main_part: usize, |
34 | /// Parsed `raw[..start_main_part]`. |
35 | base: IntegerBase, |
36 | } |
37 | |
38 | impl<B: Buffer> IntegerLit<B> { |
39 | /// Parses the input as an integer literal. Returns an error if the input is |
40 | /// invalid or represents a different kind of literal. |
41 | pub fn parse(input: B) -> Result<Self, ParseError> { |
42 | match first_byte_or_empty(&input)? { |
43 | digit @ b'0' ..=b'9' => { |
44 | // TODO: simplify once RFC 2528 is stabilized |
45 | let IntegerLit { |
46 | start_main_part, |
47 | end_main_part, |
48 | base, |
49 | .. |
50 | } = parse_impl(&input, digit)?; |
51 | |
52 | Ok(Self { raw: input, start_main_part, end_main_part, base }) |
53 | }, |
54 | _ => Err(perr(0, DoesNotStartWithDigit)), |
55 | } |
56 | } |
57 | |
58 | /// Performs the actual string to int conversion to obtain the integer |
59 | /// value. The optional type suffix of the literal **is ignored by this |
60 | /// method**. This means `N` does not need to match the type suffix! |
61 | /// |
62 | /// Returns `None` if the literal overflows `N`. |
63 | /// |
64 | /// Hint: `u128` can represent all possible values integer literal values, |
65 | /// as there are no negative literals (see type docs). Thus you can, for |
66 | /// example, safely use `lit.value::<u128>().to_string()` to get a decimal |
67 | /// string. (Technically, Rust integer literals can represent arbitrarily |
68 | /// large numbers, but those would be rejected at a later stage by the Rust |
69 | /// compiler). |
70 | pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> { |
71 | let base = N::from_small_number(self.base.value()); |
72 | |
73 | let mut acc = N::from_small_number(0); |
74 | for digit in self.raw_main_part().bytes() { |
75 | if digit == b'_' { |
76 | continue; |
77 | } |
78 | |
79 | // We don't actually need the base here: we already know this main |
80 | // part only contains digits valid for the specified base. |
81 | let digit = hex_digit_value(digit) |
82 | .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit" )); |
83 | |
84 | acc = acc.checked_mul(base)?; |
85 | acc = acc.checked_add(N::from_small_number(digit))?; |
86 | } |
87 | |
88 | Some(acc) |
89 | } |
90 | |
91 | /// The base of this integer literal. |
92 | pub fn base(&self) -> IntegerBase { |
93 | self.base |
94 | } |
95 | |
96 | /// The main part containing the digits and potentially `_`. Do not try to |
97 | /// parse this directly as that would ignore the base! |
98 | pub fn raw_main_part(&self) -> &str { |
99 | &(*self.raw)[self.start_main_part..self.end_main_part] |
100 | } |
101 | |
102 | /// The optional suffix. Returns `""` if the suffix is empty/does not exist. |
103 | /// |
104 | /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. |
105 | pub fn suffix(&self) -> &str { |
106 | &(*self.raw)[self.end_main_part..] |
107 | } |
108 | |
109 | /// Returns the raw input that was passed to `parse`. |
110 | pub fn raw_input(&self) -> &str { |
111 | &self.raw |
112 | } |
113 | |
114 | /// Returns the raw input that was passed to `parse`, potentially owned. |
115 | pub fn into_raw_input(self) -> B { |
116 | self.raw |
117 | } |
118 | } |
119 | |
120 | impl IntegerLit<&str> { |
121 | /// Makes a copy of the underlying buffer and returns the owned version of |
122 | /// `Self`. |
123 | pub fn to_owned(&self) -> IntegerLit<String> { |
124 | IntegerLit { |
125 | raw: self.raw.to_owned(), |
126 | start_main_part: self.start_main_part, |
127 | end_main_part: self.end_main_part, |
128 | base: self.base, |
129 | } |
130 | } |
131 | } |
132 | |
133 | impl<B: Buffer> fmt::Display for IntegerLit<B> { |
134 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
135 | write!(f, " {}" , &*self.raw) |
136 | } |
137 | } |
138 | |
139 | /// Integer literal types. *Implementation detail*. |
140 | /// |
141 | /// Implemented for all integer literal types. This trait is sealed and cannot |
142 | /// be implemented outside of this crate. The trait's methods are implementation |
143 | /// detail of this library and are not subject to semver. |
144 | pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { |
145 | /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. |
146 | #[doc (hidden)] |
147 | fn from_small_number(n: u8) -> Self; |
148 | |
149 | #[doc (hidden)] |
150 | fn checked_add(self, rhs: Self) -> Option<Self>; |
151 | |
152 | #[doc (hidden)] |
153 | fn checked_mul(self, rhs: Self) -> Option<Self>; |
154 | |
155 | #[doc (hidden)] |
156 | fn ty() -> IntegerType; |
157 | } |
158 | |
159 | macro_rules! impl_from_int_literal { |
160 | ($( $ty:ty => $variant:ident ,)* ) => { |
161 | $( |
162 | impl self::sealed::Sealed for $ty {} |
163 | impl FromIntegerLiteral for $ty { |
164 | fn from_small_number(n: u8) -> Self { |
165 | n as Self |
166 | } |
167 | fn checked_add(self, rhs: Self) -> Option<Self> { |
168 | self.checked_add(rhs) |
169 | } |
170 | fn checked_mul(self, rhs: Self) -> Option<Self> { |
171 | self.checked_mul(rhs) |
172 | } |
173 | fn ty() -> IntegerType { |
174 | IntegerType::$variant |
175 | } |
176 | } |
177 | )* |
178 | }; |
179 | } |
180 | |
181 | impl_from_int_literal!( |
182 | u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, |
183 | i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, |
184 | ); |
185 | |
186 | mod sealed { |
187 | pub trait Sealed {} |
188 | } |
189 | |
190 | /// Precondition: first byte of string has to be in `b'0'..=b'9'`. |
191 | #[inline (never)] |
192 | pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { |
193 | // Figure out base and strip prefix base, if it exists. |
194 | let (end_prefix, base) = match (first, input.as_bytes().get(1)) { |
195 | (b'0' , Some(b'b' )) => (2, IntegerBase::Binary), |
196 | (b'0' , Some(b'o' )) => (2, IntegerBase::Octal), |
197 | (b'0' , Some(b'x' )) => (2, IntegerBase::Hexadecimal), |
198 | |
199 | // Everything else is treated as decimal. Several cases are caught |
200 | // by this: |
201 | // - "123" |
202 | // - "0" |
203 | // - "0u8" |
204 | // - "0r" -> this will error later |
205 | _ => (0, IntegerBase::Decimal), |
206 | }; |
207 | let without_prefix = &input[end_prefix..]; |
208 | |
209 | |
210 | // Scan input to find the first character that's not a valid digit. |
211 | let is_valid_digit = match base { |
212 | IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_' ), |
213 | IntegerBase::Octal => |b| matches!(b, b'0' ..=b'7' | b'_' ), |
214 | IntegerBase::Decimal => |b| matches!(b, b'0' ..=b'9' | b'_' ), |
215 | IntegerBase::Hexadecimal => |b| matches!(b, b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' | b'_' ), |
216 | }; |
217 | let end_main = without_prefix.bytes() |
218 | .position(|b| !is_valid_digit(b)) |
219 | .unwrap_or(without_prefix.len()); |
220 | let (main_part, suffix) = without_prefix.split_at(end_main); |
221 | |
222 | check_suffix(suffix).map_err(|kind| { |
223 | // This is just to have a nicer error kind for this special case. If the |
224 | // suffix is invalid, it is non-empty -> unwrap ok. |
225 | let first = suffix.as_bytes()[0]; |
226 | if !is_valid_digit(first) && first.is_ascii_digit() { |
227 | perr(end_main + end_prefix, InvalidDigit) |
228 | } else { |
229 | perr(end_main + end_prefix..input.len(), kind) |
230 | } |
231 | })?; |
232 | if suffix.starts_with('e' ) || suffix.starts_with('E' ) { |
233 | return Err(perr(end_main, IntegerSuffixStartingWithE)); |
234 | } |
235 | |
236 | // Make sure main number part is not empty. |
237 | if main_part.bytes().filter(|&b| b != b'_' ).count() == 0 { |
238 | return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); |
239 | } |
240 | |
241 | Ok(IntegerLit { |
242 | raw: input, |
243 | start_main_part: end_prefix, |
244 | end_main_part: end_main + end_prefix, |
245 | base, |
246 | }) |
247 | } |
248 | |
249 | |
250 | /// The bases in which an integer can be specified. |
251 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
252 | pub enum IntegerBase { |
253 | Binary, |
254 | Octal, |
255 | Decimal, |
256 | Hexadecimal, |
257 | } |
258 | |
259 | impl IntegerBase { |
260 | /// Returns the literal prefix that indicates this base, i.e. `"0b"`, |
261 | /// `"0o"`, `""` and `"0x"`. |
262 | pub fn prefix(self) -> &'static str { |
263 | match self { |
264 | Self::Binary => "0b" , |
265 | Self::Octal => "0o" , |
266 | Self::Decimal => "" , |
267 | Self::Hexadecimal => "0x" , |
268 | } |
269 | } |
270 | |
271 | /// Returns the base value, i.e. 2, 8, 10 or 16. |
272 | pub fn value(self) -> u8 { |
273 | match self { |
274 | Self::Binary => 2, |
275 | Self::Octal => 8, |
276 | Self::Decimal => 10, |
277 | Self::Hexadecimal => 16, |
278 | } |
279 | } |
280 | } |
281 | |
282 | /// All possible integer type suffixes. |
283 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
284 | #[non_exhaustive ] |
285 | pub enum IntegerType { |
286 | U8, |
287 | U16, |
288 | U32, |
289 | U64, |
290 | U128, |
291 | Usize, |
292 | I8, |
293 | I16, |
294 | I32, |
295 | I64, |
296 | I128, |
297 | Isize, |
298 | } |
299 | |
300 | impl IntegerType { |
301 | /// Returns the type corresponding to the given suffix (e.g. `"u8"` is |
302 | /// mapped to `Self::U8`). If the suffix is not a valid integer type, |
303 | /// `None` is returned. |
304 | pub fn from_suffix(suffix: &str) -> Option<Self> { |
305 | match suffix { |
306 | "u8" => Some(Self::U8), |
307 | "u16" => Some(Self::U16), |
308 | "u32" => Some(Self::U32), |
309 | "u64" => Some(Self::U64), |
310 | "u128" => Some(Self::U128), |
311 | "usize" => Some(Self::Usize), |
312 | "i8" => Some(Self::I8), |
313 | "i16" => Some(Self::I16), |
314 | "i32" => Some(Self::I32), |
315 | "i64" => Some(Self::I64), |
316 | "i128" => Some(Self::I128), |
317 | "isize" => Some(Self::Isize), |
318 | _ => None, |
319 | } |
320 | } |
321 | |
322 | /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. |
323 | pub fn suffix(self) -> &'static str { |
324 | match self { |
325 | Self::U8 => "u8" , |
326 | Self::U16 => "u16" , |
327 | Self::U32 => "u32" , |
328 | Self::U64 => "u64" , |
329 | Self::U128 => "u128" , |
330 | Self::Usize => "usize" , |
331 | Self::I8 => "i8" , |
332 | Self::I16 => "i16" , |
333 | Self::I32 => "i32" , |
334 | Self::I64 => "i64" , |
335 | Self::I128 => "i128" , |
336 | Self::Isize => "isize" , |
337 | } |
338 | } |
339 | } |
340 | |
341 | impl FromStr for IntegerType { |
342 | type Err = (); |
343 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
344 | Self::from_suffix(s).ok_or(()) |
345 | } |
346 | } |
347 | |
348 | impl fmt::Display for IntegerType { |
349 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
350 | self.suffix().fmt(f) |
351 | } |
352 | } |
353 | |
354 | |
355 | #[cfg (test)] |
356 | mod tests; |
357 | |