1use std::{fmt, str::FromStr};
2
3use crate::{
4 Buffer, ParseError,
5 err::{perr, ParseErrorKind::*},
6 parse::{first_byte_or_empty, hex_digit_value, check_suffix},
7};
8
9
10/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
11///
12/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
13/// the main part (digits and underscores), and an optional type suffix
14/// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
15///
16/// Note that integer literals are always positive: the grammar does not contain
17/// the minus sign at all. The minus sign is just the unary negate operator,
18/// not part of the literal. Which is interesting for cases like `- 128i8`:
19/// here, the literal itself would overflow the specified type (`i8` cannot
20/// represent 128). That's why in rustc, the literal overflow check is
21/// performed as a lint after parsing, not during the lexing stage. Similarly,
22/// [`IntegerLit::parse`] does not perform an overflow check.
23///
24/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26#[non_exhaustive]
27pub struct IntegerLit<B: Buffer> {
28 /// The raw literal. Grammar: `<prefix?><main part><suffix?>`.
29 raw: B,
30 /// First index of the main number part (after the base prefix).
31 start_main_part: usize,
32 /// First index not part of the main number part.
33 end_main_part: usize,
34 /// Parsed `raw[..start_main_part]`.
35 base: IntegerBase,
36}
37
38impl<B: Buffer> IntegerLit<B> {
39 /// Parses the input as an integer literal. Returns an error if the input is
40 /// invalid or represents a different kind of literal.
41 pub fn parse(input: B) -> Result<Self, ParseError> {
42 match first_byte_or_empty(&input)? {
43 digit @ b'0'..=b'9' => {
44 // TODO: simplify once RFC 2528 is stabilized
45 let IntegerLit {
46 start_main_part,
47 end_main_part,
48 base,
49 ..
50 } = parse_impl(&input, digit)?;
51
52 Ok(Self { raw: input, start_main_part, end_main_part, base })
53 },
54 _ => Err(perr(0, DoesNotStartWithDigit)),
55 }
56 }
57
58 /// Performs the actual string to int conversion to obtain the integer
59 /// value. The optional type suffix of the literal **is ignored by this
60 /// method**. This means `N` does not need to match the type suffix!
61 ///
62 /// Returns `None` if the literal overflows `N`.
63 ///
64 /// Hint: `u128` can represent all possible values integer literal values,
65 /// as there are no negative literals (see type docs). Thus you can, for
66 /// example, safely use `lit.value::<u128>().to_string()` to get a decimal
67 /// string. (Technically, Rust integer literals can represent arbitrarily
68 /// large numbers, but those would be rejected at a later stage by the Rust
69 /// compiler).
70 pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
71 let base = N::from_small_number(self.base.value());
72
73 let mut acc = N::from_small_number(0);
74 for digit in self.raw_main_part().bytes() {
75 if digit == b'_' {
76 continue;
77 }
78
79 // We don't actually need the base here: we already know this main
80 // part only contains digits valid for the specified base.
81 let digit = hex_digit_value(digit)
82 .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
83
84 acc = acc.checked_mul(base)?;
85 acc = acc.checked_add(N::from_small_number(digit))?;
86 }
87
88 Some(acc)
89 }
90
91 /// The base of this integer literal.
92 pub fn base(&self) -> IntegerBase {
93 self.base
94 }
95
96 /// The main part containing the digits and potentially `_`. Do not try to
97 /// parse this directly as that would ignore the base!
98 pub fn raw_main_part(&self) -> &str {
99 &(*self.raw)[self.start_main_part..self.end_main_part]
100 }
101
102 /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
103 ///
104 /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`.
105 pub fn suffix(&self) -> &str {
106 &(*self.raw)[self.end_main_part..]
107 }
108
109 /// Returns the raw input that was passed to `parse`.
110 pub fn raw_input(&self) -> &str {
111 &self.raw
112 }
113
114 /// Returns the raw input that was passed to `parse`, potentially owned.
115 pub fn into_raw_input(self) -> B {
116 self.raw
117 }
118}
119
120impl IntegerLit<&str> {
121 /// Makes a copy of the underlying buffer and returns the owned version of
122 /// `Self`.
123 pub fn to_owned(&self) -> IntegerLit<String> {
124 IntegerLit {
125 raw: self.raw.to_owned(),
126 start_main_part: self.start_main_part,
127 end_main_part: self.end_main_part,
128 base: self.base,
129 }
130 }
131}
132
133impl<B: Buffer> fmt::Display for IntegerLit<B> {
134 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 write!(f, "{}", &*self.raw)
136 }
137}
138
139/// Integer literal types. *Implementation detail*.
140///
141/// Implemented for all integer literal types. This trait is sealed and cannot
142/// be implemented outside of this crate. The trait's methods are implementation
143/// detail of this library and are not subject to semver.
144pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
145 /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
146 #[doc(hidden)]
147 fn from_small_number(n: u8) -> Self;
148
149 #[doc(hidden)]
150 fn checked_add(self, rhs: Self) -> Option<Self>;
151
152 #[doc(hidden)]
153 fn checked_mul(self, rhs: Self) -> Option<Self>;
154
155 #[doc(hidden)]
156 fn ty() -> IntegerType;
157}
158
159macro_rules! impl_from_int_literal {
160 ($( $ty:ty => $variant:ident ,)* ) => {
161 $(
162 impl self::sealed::Sealed for $ty {}
163 impl FromIntegerLiteral for $ty {
164 fn from_small_number(n: u8) -> Self {
165 n as Self
166 }
167 fn checked_add(self, rhs: Self) -> Option<Self> {
168 self.checked_add(rhs)
169 }
170 fn checked_mul(self, rhs: Self) -> Option<Self> {
171 self.checked_mul(rhs)
172 }
173 fn ty() -> IntegerType {
174 IntegerType::$variant
175 }
176 }
177 )*
178 };
179}
180
181impl_from_int_literal!(
182 u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
183 i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
184);
185
186mod sealed {
187 pub trait Sealed {}
188}
189
190/// Precondition: first byte of string has to be in `b'0'..=b'9'`.
191#[inline(never)]
192pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> {
193 // Figure out base and strip prefix base, if it exists.
194 let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
195 (b'0', Some(b'b')) => (2, IntegerBase::Binary),
196 (b'0', Some(b'o')) => (2, IntegerBase::Octal),
197 (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
198
199 // Everything else is treated as decimal. Several cases are caught
200 // by this:
201 // - "123"
202 // - "0"
203 // - "0u8"
204 // - "0r" -> this will error later
205 _ => (0, IntegerBase::Decimal),
206 };
207 let without_prefix = &input[end_prefix..];
208
209
210 // Scan input to find the first character that's not a valid digit.
211 let is_valid_digit = match base {
212 IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'),
213 IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'),
214 IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'),
215 IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'),
216 };
217 let end_main = without_prefix.bytes()
218 .position(|b| !is_valid_digit(b))
219 .unwrap_or(without_prefix.len());
220 let (main_part, suffix) = without_prefix.split_at(end_main);
221
222 check_suffix(suffix).map_err(|kind| {
223 // This is just to have a nicer error kind for this special case. If the
224 // suffix is invalid, it is non-empty -> unwrap ok.
225 let first = suffix.as_bytes()[0];
226 if !is_valid_digit(first) && first.is_ascii_digit() {
227 perr(end_main + end_prefix, InvalidDigit)
228 } else {
229 perr(end_main + end_prefix..input.len(), kind)
230 }
231 })?;
232 if suffix.starts_with('e') || suffix.starts_with('E') {
233 return Err(perr(end_main, IntegerSuffixStartingWithE));
234 }
235
236 // Make sure main number part is not empty.
237 if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
238 return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
239 }
240
241 Ok(IntegerLit {
242 raw: input,
243 start_main_part: end_prefix,
244 end_main_part: end_main + end_prefix,
245 base,
246 })
247}
248
249
250/// The bases in which an integer can be specified.
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
252pub enum IntegerBase {
253 Binary,
254 Octal,
255 Decimal,
256 Hexadecimal,
257}
258
259impl IntegerBase {
260 /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
261 /// `"0o"`, `""` and `"0x"`.
262 pub fn prefix(self) -> &'static str {
263 match self {
264 Self::Binary => "0b",
265 Self::Octal => "0o",
266 Self::Decimal => "",
267 Self::Hexadecimal => "0x",
268 }
269 }
270
271 /// Returns the base value, i.e. 2, 8, 10 or 16.
272 pub fn value(self) -> u8 {
273 match self {
274 Self::Binary => 2,
275 Self::Octal => 8,
276 Self::Decimal => 10,
277 Self::Hexadecimal => 16,
278 }
279 }
280}
281
282/// All possible integer type suffixes.
283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
284#[non_exhaustive]
285pub enum IntegerType {
286 U8,
287 U16,
288 U32,
289 U64,
290 U128,
291 Usize,
292 I8,
293 I16,
294 I32,
295 I64,
296 I128,
297 Isize,
298}
299
300impl IntegerType {
301 /// Returns the type corresponding to the given suffix (e.g. `"u8"` is
302 /// mapped to `Self::U8`). If the suffix is not a valid integer type,
303 /// `None` is returned.
304 pub fn from_suffix(suffix: &str) -> Option<Self> {
305 match suffix {
306 "u8" => Some(Self::U8),
307 "u16" => Some(Self::U16),
308 "u32" => Some(Self::U32),
309 "u64" => Some(Self::U64),
310 "u128" => Some(Self::U128),
311 "usize" => Some(Self::Usize),
312 "i8" => Some(Self::I8),
313 "i16" => Some(Self::I16),
314 "i32" => Some(Self::I32),
315 "i64" => Some(Self::I64),
316 "i128" => Some(Self::I128),
317 "isize" => Some(Self::Isize),
318 _ => None,
319 }
320 }
321
322 /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`.
323 pub fn suffix(self) -> &'static str {
324 match self {
325 Self::U8 => "u8",
326 Self::U16 => "u16",
327 Self::U32 => "u32",
328 Self::U64 => "u64",
329 Self::U128 => "u128",
330 Self::Usize => "usize",
331 Self::I8 => "i8",
332 Self::I16 => "i16",
333 Self::I32 => "i32",
334 Self::I64 => "i64",
335 Self::I128 => "i128",
336 Self::Isize => "isize",
337 }
338 }
339}
340
341impl FromStr for IntegerType {
342 type Err = ();
343 fn from_str(s: &str) -> Result<Self, Self::Err> {
344 Self::from_suffix(s).ok_or(())
345 }
346}
347
348impl fmt::Display for IntegerType {
349 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350 self.suffix().fmt(f)
351 }
352}
353
354
355#[cfg(test)]
356mod tests;
357