| 1 | //! Parsing and inspecting Rust literal tokens. |
| 2 | //! |
| 3 | //! This library offers functionality to parse Rust literals, i.e. tokens in the |
| 4 | //! Rust programming language that represent fixed values. The grammar for |
| 5 | //! those is defined [here][ref]. |
| 6 | //! |
| 7 | //! This kind of functionality already exists in the crate `syn`. However, as |
| 8 | //! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was |
| 9 | //! built. This crate also offers a bit more flexibility compared to `syn` |
| 10 | //! (only regarding literals, of course). |
| 11 | //! |
| 12 | //! |
| 13 | //! # Quick start |
| 14 | //! |
| 15 | //! | **`StringLit::try_from(tt)?.value()`** | |
| 16 | //! | - | |
| 17 | //! |
| 18 | //! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be |
| 19 | //! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). |
| 20 | //! Calling `value()` returns the value that is represented by the literal. |
| 21 | //! |
| 22 | //! **Mini Example** |
| 23 | //! |
| 24 | //! ```ignore |
| 25 | //! use proc_macro::TokenStream; |
| 26 | //! |
| 27 | //! #[proc_macro] |
| 28 | //! pub fn foo(input: TokenStream) -> TokenStream { |
| 29 | //! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! |
| 30 | //! let string_value = match litrs::StringLit::try_from(first_token) { |
| 31 | //! Ok(string_lit) => string_lit.value(), |
| 32 | //! Err(e) => return e.to_compile_error(), |
| 33 | //! }; |
| 34 | //! |
| 35 | //! // `string_value` is the string value with all escapes resolved. |
| 36 | //! todo!() |
| 37 | //! } |
| 38 | //! ``` |
| 39 | //! |
| 40 | //! # Overview |
| 41 | //! |
| 42 | //! The main types of this library are [`Literal`], representing any kind of |
| 43 | //! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a |
| 44 | //! specific kind of literal. |
| 45 | //! |
| 46 | //! There are different ways to obtain such a literal type: |
| 47 | //! |
| 48 | //! - **`parse`**: parses a `&str` or `String` and returns `Result<_, |
| 49 | //! ParseError>`. For example: [`Literal::parse`] and |
| 50 | //! [`IntegerLit::parse`]. |
| 51 | //! |
| 52 | //! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from |
| 53 | //! the `proc_macro` crate into a `Literal` from this crate. |
| 54 | //! |
| 55 | //! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a |
| 56 | //! `proc_macro::Literal` into a specific literal type of this crate. If |
| 57 | //! the input is a literal of a different kind, `Err(InvalidToken)` is |
| 58 | //! returned. |
| 59 | //! |
| 60 | //! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a |
| 61 | //! literal type of this crate. An error is returned if the token tree is |
| 62 | //! not a literal, or if you are trying to turn it into a specific kind of |
| 63 | //! literal and the token tree is a different kind of literal. |
| 64 | //! |
| 65 | //! All of the `From` and `TryFrom` conversions also work for reference to |
| 66 | //! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is |
| 67 | //! enabled (which it is by default), all these `From` and `TryFrom` impls also |
| 68 | //! exist for the corresponding `proc_macro2` types. |
| 69 | //! |
| 70 | //! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. |
| 71 | //! The `TryFrom<TokenTree>` impls check for those two special idents and |
| 72 | //! return a [`BoolLit`] appropriately. For that reason, there is also no |
| 73 | //! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal` |
| 74 | //! simply cannot represent bool literals. |
| 75 | //! |
| 76 | //! |
| 77 | //! # Examples |
| 78 | //! |
| 79 | //! In a proc-macro: |
| 80 | //! |
| 81 | //! ```ignore |
| 82 | //! use std::convert::TryFrom; |
| 83 | //! use proc_macro::TokenStream; |
| 84 | //! use litrs::FloatLit; |
| 85 | //! |
| 86 | //! #[proc_macro] |
| 87 | //! pub fn foo(input: TokenStream) -> TokenStream { |
| 88 | //! let mut input = input.into_iter().collect::<Vec<_>>(); |
| 89 | //! if input.len() != 1 { |
| 90 | //! // Please do proper error handling in your real code! |
| 91 | //! panic!("expected exactly one token as input" ); |
| 92 | //! } |
| 93 | //! let token = input.remove(0); |
| 94 | //! |
| 95 | //! match FloatLit::try_from(token) { |
| 96 | //! Ok(float_lit) => { /* do something */ } |
| 97 | //! Err(e) => return e.to_compile_error(), |
| 98 | //! } |
| 99 | //! |
| 100 | //! // Dummy output |
| 101 | //! TokenStream::new() |
| 102 | //! } |
| 103 | //! ``` |
| 104 | //! |
| 105 | //! Parsing from string: |
| 106 | //! |
| 107 | //! ``` |
| 108 | //! use litrs::{FloatLit, Literal}; |
| 109 | //! |
| 110 | //! // Parse a specific kind of literal (float in this case): |
| 111 | //! let float_lit = FloatLit::parse("3.14f32" ); |
| 112 | //! assert!(float_lit.is_ok()); |
| 113 | //! assert_eq!(float_lit.unwrap().suffix(), "f32" ); |
| 114 | //! assert!(FloatLit::parse("'c'" ).is_err()); |
| 115 | //! |
| 116 | //! // Parse any kind of literal. After parsing, you can inspect the literal |
| 117 | //! // and decide what to do in each case. |
| 118 | //! let lit = Literal::parse("0xff80" ).expect("failed to parse literal" ); |
| 119 | //! match lit { |
| 120 | //! Literal::Integer(lit) => { /* ... */ } |
| 121 | //! Literal::Float(lit) => { /* ... */ } |
| 122 | //! Literal::Bool(lit) => { /* ... */ } |
| 123 | //! Literal::Char(lit) => { /* ... */ } |
| 124 | //! Literal::String(lit) => { /* ... */ } |
| 125 | //! Literal::Byte(lit) => { /* ... */ } |
| 126 | //! Literal::ByteString(lit) => { /* ... */ } |
| 127 | //! } |
| 128 | //! ``` |
| 129 | //! |
| 130 | //! |
| 131 | //! |
| 132 | //! # Crate features |
| 133 | //! |
| 134 | //! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of |
| 135 | //! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. |
| 136 | //! - `check_suffix`: if enabled, `parse` functions will exactly verify that the |
| 137 | //! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, |
| 138 | //! only an approximate check (only in ASCII range) is done. If you are |
| 139 | //! writing a proc macro, you don't need to enable this as the suffix is |
| 140 | //! already checked by the compiler. |
| 141 | //! |
| 142 | //! |
| 143 | //! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals |
| 144 | //! |
| 145 | |
| 146 | #![deny (missing_debug_implementations)] |
| 147 | |
| 148 | extern crate proc_macro; |
| 149 | |
| 150 | #[cfg (test)] |
| 151 | #[macro_use ] |
| 152 | mod test_util; |
| 153 | |
| 154 | #[cfg (test)] |
| 155 | mod tests; |
| 156 | |
| 157 | mod bool; |
| 158 | mod byte; |
| 159 | mod bytestr; |
| 160 | mod char; |
| 161 | mod err; |
| 162 | mod escape; |
| 163 | mod float; |
| 164 | mod impls; |
| 165 | mod integer; |
| 166 | mod parse; |
| 167 | mod string; |
| 168 | |
| 169 | |
| 170 | use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; |
| 171 | |
| 172 | pub use self::{ |
| 173 | bool::BoolLit, |
| 174 | byte::ByteLit, |
| 175 | bytestr::ByteStringLit, |
| 176 | char::CharLit, |
| 177 | err::{InvalidToken, ParseError}, |
| 178 | float::{FloatLit, FloatType}, |
| 179 | integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, |
| 180 | string::StringLit, |
| 181 | }; |
| 182 | |
| 183 | |
| 184 | // ============================================================================================== |
| 185 | // ===== `Literal` and type defs |
| 186 | // ============================================================================================== |
| 187 | |
| 188 | /// A literal. This is the main type of this library. |
| 189 | /// |
| 190 | /// This type is generic over the underlying buffer `B`, which can be `&str` or |
| 191 | /// `String`. |
| 192 | /// |
| 193 | /// To create this type, you have to either call [`Literal::parse`] with an |
| 194 | /// input string or use the `From<_>` impls of this type. The impls are only |
| 195 | /// available of the corresponding crate features are enabled (they are enabled |
| 196 | /// by default). |
| 197 | #[derive (Debug, Clone, PartialEq, Eq)] |
| 198 | pub enum Literal<B: Buffer> { |
| 199 | Bool(BoolLit), |
| 200 | Integer(IntegerLit<B>), |
| 201 | Float(FloatLit<B>), |
| 202 | Char(CharLit<B>), |
| 203 | String(StringLit<B>), |
| 204 | Byte(ByteLit<B>), |
| 205 | ByteString(ByteStringLit<B>), |
| 206 | } |
| 207 | |
| 208 | impl<B: Buffer> Literal<B> { |
| 209 | /// Parses the given input as a Rust literal. |
| 210 | pub fn parse(input: B) -> Result<Self, ParseError> { |
| 211 | parse::parse(input) |
| 212 | } |
| 213 | |
| 214 | /// Returns the suffix of this literal or `""` if it doesn't have one. |
| 215 | /// |
| 216 | /// Rust token grammar actually allows suffixes for all kinds of tokens. |
| 217 | /// Most Rust programmer only know the type suffixes for integer and |
| 218 | /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an |
| 219 | /// error. But it is possible to pass literals with arbitrary suffixes to |
| 220 | /// proc macros, for example: |
| 221 | /// |
| 222 | /// ```ignore |
| 223 | /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong ); |
| 224 | /// ``` |
| 225 | /// |
| 226 | /// Boolean literals, not actually being literals, but idents, cannot have |
| 227 | /// suffixes and this method always returns `""` for those. |
| 228 | /// |
| 229 | /// There are some edge cases to be aware of: |
| 230 | /// - Integer suffixes must not start with `e` or `E` as that conflicts with |
| 231 | /// the exponent grammar for floats. `0e1` is a float; `0eel` is also |
| 232 | /// parsed as a float and results in an error. |
| 233 | /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a |
| 234 | /// suffix von `gh`. |
| 235 | /// - Suffixes can contain and start with `_`, but for integer and number |
| 236 | /// literals, `_` is eagerly parsed as part of the number, so `1_x` has |
| 237 | /// the suffix `x`. |
| 238 | /// - The input `55f32` is regarded as integer literal with suffix `f32`. |
| 239 | /// |
| 240 | /// # Example |
| 241 | /// |
| 242 | /// ``` |
| 243 | /// use litrs::Literal; |
| 244 | /// |
| 245 | /// assert_eq!(Literal::parse(r##"3.14f33"## ).unwrap().suffix(), "f33" ); |
| 246 | /// assert_eq!(Literal::parse(r##"123hackerman"## ).unwrap().suffix(), "hackerman" ); |
| 247 | /// assert_eq!(Literal::parse(r##"0x0fuck"## ).unwrap().suffix(), "uck" ); |
| 248 | /// assert_eq!(Literal::parse(r##"'🦊'good_boy"## ).unwrap().suffix(), "good_boy" ); |
| 249 | /// assert_eq!(Literal::parse(r##""toph"beifong"## ).unwrap().suffix(), "beifong" ); |
| 250 | /// ``` |
| 251 | pub fn suffix(&self) -> &str { |
| 252 | match self { |
| 253 | Literal::Bool(_) => "" , |
| 254 | Literal::Integer(l) => l.suffix(), |
| 255 | Literal::Float(l) => l.suffix(), |
| 256 | Literal::Char(l) => l.suffix(), |
| 257 | Literal::String(l) => l.suffix(), |
| 258 | Literal::Byte(l) => l.suffix(), |
| 259 | Literal::ByteString(l) => l.suffix(), |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | impl Literal<&str> { |
| 265 | /// Makes a copy of the underlying buffer and returns the owned version of |
| 266 | /// `Self`. |
| 267 | pub fn into_owned(self) -> Literal<String> { |
| 268 | match self { |
| 269 | Literal::Bool(l: BoolLit) => Literal::Bool(l.to_owned()), |
| 270 | Literal::Integer(l: IntegerLit<&str>) => Literal::Integer(l.to_owned()), |
| 271 | Literal::Float(l: FloatLit<&str>) => Literal::Float(l.to_owned()), |
| 272 | Literal::Char(l: CharLit<&str>) => Literal::Char(l.to_owned()), |
| 273 | Literal::String(l: StringLit<&str>) => Literal::String(l.into_owned()), |
| 274 | Literal::Byte(l: ByteLit<&str>) => Literal::Byte(l.to_owned()), |
| 275 | Literal::ByteString(l: ByteStringLit<&str>) => Literal::ByteString(l.into_owned()), |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | impl<B: Buffer> fmt::Display for Literal<B> { |
| 281 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 282 | match self { |
| 283 | Literal::Bool(l: &BoolLit) => l.fmt(f), |
| 284 | Literal::Integer(l: &IntegerLit) => l.fmt(f), |
| 285 | Literal::Float(l: &FloatLit) => l.fmt(f), |
| 286 | Literal::Char(l: &CharLit) => l.fmt(f), |
| 287 | Literal::String(l: &StringLit) => l.fmt(f), |
| 288 | Literal::Byte(l: &ByteLit) => l.fmt(f), |
| 289 | Literal::ByteString(l: &ByteStringLit) => l.fmt(f), |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | |
| 295 | // ============================================================================================== |
| 296 | // ===== Buffer |
| 297 | // ============================================================================================== |
| 298 | |
| 299 | /// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. |
| 300 | /// |
| 301 | /// This is trait is implementation detail of this library, cannot be |
| 302 | /// implemented in other crates and is not subject to semantic versioning. |
| 303 | /// `litrs` only guarantees that this trait is implemented for `String` and |
| 304 | /// `for<'a> &'a str`. |
| 305 | pub trait Buffer: sealed::Sealed + Deref<Target = str> { |
| 306 | /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. |
| 307 | type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>; |
| 308 | |
| 309 | #[doc (hidden)] |
| 310 | fn into_cow(self) -> Self::Cow; |
| 311 | |
| 312 | /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. |
| 313 | type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>; |
| 314 | |
| 315 | #[doc (hidden)] |
| 316 | fn into_byte_cow(self) -> Self::ByteCow; |
| 317 | |
| 318 | /// Cuts away some characters at the beginning and some at the end. Given |
| 319 | /// range has to be in bounds. |
| 320 | #[doc (hidden)] |
| 321 | fn cut(self, range: Range<usize>) -> Self; |
| 322 | } |
| 323 | |
| 324 | mod sealed { |
| 325 | pub trait Sealed {} |
| 326 | } |
| 327 | |
| 328 | impl<'a> sealed::Sealed for &'a str {} |
| 329 | impl<'a> Buffer for &'a str { |
| 330 | #[doc (hidden)] |
| 331 | fn cut(self, range: Range<usize>) -> Self { |
| 332 | &self[range] |
| 333 | } |
| 334 | |
| 335 | type Cow = Cow<'a, str>; |
| 336 | #[doc (hidden)] |
| 337 | fn into_cow(self) -> Self::Cow { |
| 338 | self.into() |
| 339 | } |
| 340 | type ByteCow = Cow<'a, [u8]>; |
| 341 | #[doc (hidden)] |
| 342 | fn into_byte_cow(self) -> Self::ByteCow { |
| 343 | self.as_bytes().into() |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | impl sealed::Sealed for String {} |
| 348 | impl Buffer for String { |
| 349 | #[doc (hidden)] |
| 350 | fn cut(mut self, range: Range<usize>) -> Self { |
| 351 | // This is not the most efficient way, but it works. First we cut the |
| 352 | // end, then the beginning. Note that `drain` also removes the range if |
| 353 | // the iterator is not consumed. |
| 354 | self.truncate(new_len:range.end); |
| 355 | self.drain(..range.start); |
| 356 | self |
| 357 | } |
| 358 | |
| 359 | type Cow = Cow<'static, str>; |
| 360 | #[doc (hidden)] |
| 361 | fn into_cow(self) -> Self::Cow { |
| 362 | self.into() |
| 363 | } |
| 364 | |
| 365 | type ByteCow = Cow<'static, [u8]>; |
| 366 | #[doc (hidden)] |
| 367 | fn into_byte_cow(self) -> Self::ByteCow { |
| 368 | self.into_bytes().into() |
| 369 | } |
| 370 | } |
| 371 | |