| 1 | use std::{fmt, str::FromStr}; |
| 2 | |
| 3 | use crate::{ |
| 4 | Buffer, ParseError, |
| 5 | err::{perr, ParseErrorKind::*}, |
| 6 | parse::{first_byte_or_empty, hex_digit_value, check_suffix}, |
| 7 | }; |
| 8 | |
| 9 | |
| 10 | /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. |
| 11 | /// |
| 12 | /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), |
| 13 | /// the main part (digits and underscores), and an optional type suffix |
| 14 | /// (e.g. `u64` or `i8`). See [the reference][ref] for more information. |
| 15 | /// |
| 16 | /// Note that integer literals are always positive: the grammar does not contain |
| 17 | /// the minus sign at all. The minus sign is just the unary negate operator, |
| 18 | /// not part of the literal. Which is interesting for cases like `- 128i8`: |
| 19 | /// here, the literal itself would overflow the specified type (`i8` cannot |
| 20 | /// represent 128). That's why in rustc, the literal overflow check is |
| 21 | /// performed as a lint after parsing, not during the lexing stage. Similarly, |
| 22 | /// [`IntegerLit::parse`] does not perform an overflow check. |
| 23 | /// |
| 24 | /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals |
| 25 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 26 | #[non_exhaustive ] |
| 27 | pub struct IntegerLit<B: Buffer> { |
| 28 | /// The raw literal. Grammar: `<prefix?><main part><suffix?>`. |
| 29 | raw: B, |
| 30 | /// First index of the main number part (after the base prefix). |
| 31 | start_main_part: usize, |
| 32 | /// First index not part of the main number part. |
| 33 | end_main_part: usize, |
| 34 | /// Parsed `raw[..start_main_part]`. |
| 35 | base: IntegerBase, |
| 36 | } |
| 37 | |
| 38 | impl<B: Buffer> IntegerLit<B> { |
| 39 | /// Parses the input as an integer literal. Returns an error if the input is |
| 40 | /// invalid or represents a different kind of literal. |
| 41 | pub fn parse(input: B) -> Result<Self, ParseError> { |
| 42 | match first_byte_or_empty(&input)? { |
| 43 | digit @ b'0' ..=b'9' => { |
| 44 | // TODO: simplify once RFC 2528 is stabilized |
| 45 | let IntegerLit { |
| 46 | start_main_part, |
| 47 | end_main_part, |
| 48 | base, |
| 49 | .. |
| 50 | } = parse_impl(&input, digit)?; |
| 51 | |
| 52 | Ok(Self { raw: input, start_main_part, end_main_part, base }) |
| 53 | }, |
| 54 | _ => Err(perr(0, DoesNotStartWithDigit)), |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | /// Performs the actual string to int conversion to obtain the integer |
| 59 | /// value. The optional type suffix of the literal **is ignored by this |
| 60 | /// method**. This means `N` does not need to match the type suffix! |
| 61 | /// |
| 62 | /// Returns `None` if the literal overflows `N`. |
| 63 | /// |
| 64 | /// Hint: `u128` can represent all possible values integer literal values, |
| 65 | /// as there are no negative literals (see type docs). Thus you can, for |
| 66 | /// example, safely use `lit.value::<u128>().to_string()` to get a decimal |
| 67 | /// string. (Technically, Rust integer literals can represent arbitrarily |
| 68 | /// large numbers, but those would be rejected at a later stage by the Rust |
| 69 | /// compiler). |
| 70 | pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> { |
| 71 | let base = N::from_small_number(self.base.value()); |
| 72 | |
| 73 | let mut acc = N::from_small_number(0); |
| 74 | for digit in self.raw_main_part().bytes() { |
| 75 | if digit == b'_' { |
| 76 | continue; |
| 77 | } |
| 78 | |
| 79 | // We don't actually need the base here: we already know this main |
| 80 | // part only contains digits valid for the specified base. |
| 81 | let digit = hex_digit_value(digit) |
| 82 | .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit" )); |
| 83 | |
| 84 | acc = acc.checked_mul(base)?; |
| 85 | acc = acc.checked_add(N::from_small_number(digit))?; |
| 86 | } |
| 87 | |
| 88 | Some(acc) |
| 89 | } |
| 90 | |
| 91 | /// The base of this integer literal. |
| 92 | pub fn base(&self) -> IntegerBase { |
| 93 | self.base |
| 94 | } |
| 95 | |
| 96 | /// The main part containing the digits and potentially `_`. Do not try to |
| 97 | /// parse this directly as that would ignore the base! |
| 98 | pub fn raw_main_part(&self) -> &str { |
| 99 | &(*self.raw)[self.start_main_part..self.end_main_part] |
| 100 | } |
| 101 | |
| 102 | /// The optional suffix. Returns `""` if the suffix is empty/does not exist. |
| 103 | /// |
| 104 | /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. |
| 105 | pub fn suffix(&self) -> &str { |
| 106 | &(*self.raw)[self.end_main_part..] |
| 107 | } |
| 108 | |
| 109 | /// Returns the raw input that was passed to `parse`. |
| 110 | pub fn raw_input(&self) -> &str { |
| 111 | &self.raw |
| 112 | } |
| 113 | |
| 114 | /// Returns the raw input that was passed to `parse`, potentially owned. |
| 115 | pub fn into_raw_input(self) -> B { |
| 116 | self.raw |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | impl IntegerLit<&str> { |
| 121 | /// Makes a copy of the underlying buffer and returns the owned version of |
| 122 | /// `Self`. |
| 123 | pub fn to_owned(&self) -> IntegerLit<String> { |
| 124 | IntegerLit { |
| 125 | raw: self.raw.to_owned(), |
| 126 | start_main_part: self.start_main_part, |
| 127 | end_main_part: self.end_main_part, |
| 128 | base: self.base, |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | impl<B: Buffer> fmt::Display for IntegerLit<B> { |
| 134 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 135 | write!(f, " {}" , &*self.raw) |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | /// Integer literal types. *Implementation detail*. |
| 140 | /// |
| 141 | /// Implemented for all integer literal types. This trait is sealed and cannot |
| 142 | /// be implemented outside of this crate. The trait's methods are implementation |
| 143 | /// detail of this library and are not subject to semver. |
| 144 | pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { |
| 145 | /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. |
| 146 | #[doc (hidden)] |
| 147 | fn from_small_number(n: u8) -> Self; |
| 148 | |
| 149 | #[doc (hidden)] |
| 150 | fn checked_add(self, rhs: Self) -> Option<Self>; |
| 151 | |
| 152 | #[doc (hidden)] |
| 153 | fn checked_mul(self, rhs: Self) -> Option<Self>; |
| 154 | |
| 155 | #[doc (hidden)] |
| 156 | fn ty() -> IntegerType; |
| 157 | } |
| 158 | |
| 159 | macro_rules! impl_from_int_literal { |
| 160 | ($( $ty:ty => $variant:ident ,)* ) => { |
| 161 | $( |
| 162 | impl self::sealed::Sealed for $ty {} |
| 163 | impl FromIntegerLiteral for $ty { |
| 164 | fn from_small_number(n: u8) -> Self { |
| 165 | n as Self |
| 166 | } |
| 167 | fn checked_add(self, rhs: Self) -> Option<Self> { |
| 168 | self.checked_add(rhs) |
| 169 | } |
| 170 | fn checked_mul(self, rhs: Self) -> Option<Self> { |
| 171 | self.checked_mul(rhs) |
| 172 | } |
| 173 | fn ty() -> IntegerType { |
| 174 | IntegerType::$variant |
| 175 | } |
| 176 | } |
| 177 | )* |
| 178 | }; |
| 179 | } |
| 180 | |
| 181 | impl_from_int_literal!( |
| 182 | u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, |
| 183 | i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, |
| 184 | ); |
| 185 | |
| 186 | mod sealed { |
| 187 | pub trait Sealed {} |
| 188 | } |
| 189 | |
| 190 | /// Precondition: first byte of string has to be in `b'0'..=b'9'`. |
| 191 | #[inline (never)] |
| 192 | pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { |
| 193 | // Figure out base and strip prefix base, if it exists. |
| 194 | let (end_prefix, base) = match (first, input.as_bytes().get(1)) { |
| 195 | (b'0' , Some(b'b' )) => (2, IntegerBase::Binary), |
| 196 | (b'0' , Some(b'o' )) => (2, IntegerBase::Octal), |
| 197 | (b'0' , Some(b'x' )) => (2, IntegerBase::Hexadecimal), |
| 198 | |
| 199 | // Everything else is treated as decimal. Several cases are caught |
| 200 | // by this: |
| 201 | // - "123" |
| 202 | // - "0" |
| 203 | // - "0u8" |
| 204 | // - "0r" -> this will error later |
| 205 | _ => (0, IntegerBase::Decimal), |
| 206 | }; |
| 207 | let without_prefix = &input[end_prefix..]; |
| 208 | |
| 209 | |
| 210 | // Scan input to find the first character that's not a valid digit. |
| 211 | let is_valid_digit = match base { |
| 212 | IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_' ), |
| 213 | IntegerBase::Octal => |b| matches!(b, b'0' ..=b'7' | b'_' ), |
| 214 | IntegerBase::Decimal => |b| matches!(b, b'0' ..=b'9' | b'_' ), |
| 215 | IntegerBase::Hexadecimal => |b| matches!(b, b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' | b'_' ), |
| 216 | }; |
| 217 | let end_main = without_prefix.bytes() |
| 218 | .position(|b| !is_valid_digit(b)) |
| 219 | .unwrap_or(without_prefix.len()); |
| 220 | let (main_part, suffix) = without_prefix.split_at(end_main); |
| 221 | |
| 222 | check_suffix(suffix).map_err(|kind| { |
| 223 | // This is just to have a nicer error kind for this special case. If the |
| 224 | // suffix is invalid, it is non-empty -> unwrap ok. |
| 225 | let first = suffix.as_bytes()[0]; |
| 226 | if !is_valid_digit(first) && first.is_ascii_digit() { |
| 227 | perr(end_main + end_prefix, InvalidDigit) |
| 228 | } else { |
| 229 | perr(end_main + end_prefix..input.len(), kind) |
| 230 | } |
| 231 | })?; |
| 232 | if suffix.starts_with('e' ) || suffix.starts_with('E' ) { |
| 233 | return Err(perr(end_main, IntegerSuffixStartingWithE)); |
| 234 | } |
| 235 | |
| 236 | // Make sure main number part is not empty. |
| 237 | if main_part.bytes().filter(|&b| b != b'_' ).count() == 0 { |
| 238 | return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); |
| 239 | } |
| 240 | |
| 241 | Ok(IntegerLit { |
| 242 | raw: input, |
| 243 | start_main_part: end_prefix, |
| 244 | end_main_part: end_main + end_prefix, |
| 245 | base, |
| 246 | }) |
| 247 | } |
| 248 | |
| 249 | |
| 250 | /// The bases in which an integer can be specified. |
| 251 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 252 | pub enum IntegerBase { |
| 253 | Binary, |
| 254 | Octal, |
| 255 | Decimal, |
| 256 | Hexadecimal, |
| 257 | } |
| 258 | |
| 259 | impl IntegerBase { |
| 260 | /// Returns the literal prefix that indicates this base, i.e. `"0b"`, |
| 261 | /// `"0o"`, `""` and `"0x"`. |
| 262 | pub fn prefix(self) -> &'static str { |
| 263 | match self { |
| 264 | Self::Binary => "0b" , |
| 265 | Self::Octal => "0o" , |
| 266 | Self::Decimal => "" , |
| 267 | Self::Hexadecimal => "0x" , |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | /// Returns the base value, i.e. 2, 8, 10 or 16. |
| 272 | pub fn value(self) -> u8 { |
| 273 | match self { |
| 274 | Self::Binary => 2, |
| 275 | Self::Octal => 8, |
| 276 | Self::Decimal => 10, |
| 277 | Self::Hexadecimal => 16, |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | /// All possible integer type suffixes. |
| 283 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 284 | #[non_exhaustive ] |
| 285 | pub enum IntegerType { |
| 286 | U8, |
| 287 | U16, |
| 288 | U32, |
| 289 | U64, |
| 290 | U128, |
| 291 | Usize, |
| 292 | I8, |
| 293 | I16, |
| 294 | I32, |
| 295 | I64, |
| 296 | I128, |
| 297 | Isize, |
| 298 | } |
| 299 | |
| 300 | impl IntegerType { |
| 301 | /// Returns the type corresponding to the given suffix (e.g. `"u8"` is |
| 302 | /// mapped to `Self::U8`). If the suffix is not a valid integer type, |
| 303 | /// `None` is returned. |
| 304 | pub fn from_suffix(suffix: &str) -> Option<Self> { |
| 305 | match suffix { |
| 306 | "u8" => Some(Self::U8), |
| 307 | "u16" => Some(Self::U16), |
| 308 | "u32" => Some(Self::U32), |
| 309 | "u64" => Some(Self::U64), |
| 310 | "u128" => Some(Self::U128), |
| 311 | "usize" => Some(Self::Usize), |
| 312 | "i8" => Some(Self::I8), |
| 313 | "i16" => Some(Self::I16), |
| 314 | "i32" => Some(Self::I32), |
| 315 | "i64" => Some(Self::I64), |
| 316 | "i128" => Some(Self::I128), |
| 317 | "isize" => Some(Self::Isize), |
| 318 | _ => None, |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. |
| 323 | pub fn suffix(self) -> &'static str { |
| 324 | match self { |
| 325 | Self::U8 => "u8" , |
| 326 | Self::U16 => "u16" , |
| 327 | Self::U32 => "u32" , |
| 328 | Self::U64 => "u64" , |
| 329 | Self::U128 => "u128" , |
| 330 | Self::Usize => "usize" , |
| 331 | Self::I8 => "i8" , |
| 332 | Self::I16 => "i16" , |
| 333 | Self::I32 => "i32" , |
| 334 | Self::I64 => "i64" , |
| 335 | Self::I128 => "i128" , |
| 336 | Self::Isize => "isize" , |
| 337 | } |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | impl FromStr for IntegerType { |
| 342 | type Err = (); |
| 343 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 344 | Self::from_suffix(s).ok_or(()) |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | impl fmt::Display for IntegerType { |
| 349 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 350 | self.suffix().fmt(f) |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | |
| 355 | #[cfg (test)] |
| 356 | mod tests; |
| 357 | |