1 | //! Parsing and inspecting Rust literal tokens. |
2 | //! |
3 | //! This library offers functionality to parse Rust literals, i.e. tokens in the |
4 | //! Rust programming language that represent fixed values. The grammar for |
5 | //! those is defined [here][ref]. |
6 | //! |
7 | //! This kind of functionality already exists in the crate `syn`. However, as |
8 | //! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was |
9 | //! built. This crate also offers a bit more flexibility compared to `syn` |
10 | //! (only regarding literals, of course). |
11 | //! |
12 | //! |
13 | //! # Quick start |
14 | //! |
15 | //! | **`StringLit::try_from(tt)?.value()`** | |
16 | //! | - | |
17 | //! |
18 | //! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be |
19 | //! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). |
20 | //! Calling `value()` returns the value that is represented by the literal. |
21 | //! |
22 | //! **Mini Example** |
23 | //! |
24 | //! ```ignore |
25 | //! use proc_macro::TokenStream; |
26 | //! |
27 | //! #[proc_macro] |
28 | //! pub fn foo(input: TokenStream) -> TokenStream { |
29 | //! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! |
30 | //! let string_value = match litrs::StringLit::try_from(first_token) { |
31 | //! Ok(string_lit) => string_lit.value(), |
32 | //! Err(e) => return e.to_compile_error(), |
33 | //! }; |
34 | //! |
35 | //! // `string_value` is the string value with all escapes resolved. |
36 | //! todo!() |
37 | //! } |
38 | //! ``` |
39 | //! |
40 | //! # Overview |
41 | //! |
42 | //! The main types of this library are [`Literal`], representing any kind of |
43 | //! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a |
44 | //! specific kind of literal. |
45 | //! |
46 | //! There are different ways to obtain such a literal type: |
47 | //! |
48 | //! - **`parse`**: parses a `&str` or `String` and returns `Result<_, |
49 | //! ParseError>`. For example: [`Literal::parse`] and |
50 | //! [`IntegerLit::parse`]. |
51 | //! |
52 | //! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from |
53 | //! the `proc_macro` crate into a `Literal` from this crate. |
54 | //! |
55 | //! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a |
56 | //! `proc_macro::Literal` into a specific literal type of this crate. If |
57 | //! the input is a literal of a different kind, `Err(InvalidToken)` is |
58 | //! returned. |
59 | //! |
60 | //! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a |
61 | //! literal type of this crate. An error is returned if the token tree is |
62 | //! not a literal, or if you are trying to turn it into a specific kind of |
63 | //! literal and the token tree is a different kind of literal. |
64 | //! |
65 | //! All of the `From` and `TryFrom` conversions also work for reference to |
66 | //! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is |
67 | //! enabled (which it is by default), all these `From` and `TryFrom` impls also |
68 | //! exist for the corresponding `proc_macro2` types. |
69 | //! |
70 | //! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. |
71 | //! The `TryFrom<TokenTree>` impls check for those two special idents and |
72 | //! return a [`BoolLit`] appropriately. For that reason, there is also no |
73 | //! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal` |
74 | //! simply cannot represent bool literals. |
75 | //! |
76 | //! |
77 | //! # Examples |
78 | //! |
79 | //! In a proc-macro: |
80 | //! |
81 | //! ```ignore |
82 | //! use std::convert::TryFrom; |
83 | //! use proc_macro::TokenStream; |
84 | //! use litrs::FloatLit; |
85 | //! |
86 | //! #[proc_macro] |
87 | //! pub fn foo(input: TokenStream) -> TokenStream { |
88 | //! let mut input = input.into_iter().collect::<Vec<_>>(); |
89 | //! if input.len() != 1 { |
90 | //! // Please do proper error handling in your real code! |
91 | //! panic!("expected exactly one token as input" ); |
92 | //! } |
93 | //! let token = input.remove(0); |
94 | //! |
95 | //! match FloatLit::try_from(token) { |
96 | //! Ok(float_lit) => { /* do something */ } |
97 | //! Err(e) => return e.to_compile_error(), |
98 | //! } |
99 | //! |
100 | //! // Dummy output |
101 | //! TokenStream::new() |
102 | //! } |
103 | //! ``` |
104 | //! |
105 | //! Parsing from string: |
106 | //! |
107 | //! ``` |
108 | //! use litrs::{FloatLit, Literal}; |
109 | //! |
110 | //! // Parse a specific kind of literal (float in this case): |
111 | //! let float_lit = FloatLit::parse("3.14f32" ); |
112 | //! assert!(float_lit.is_ok()); |
113 | //! assert_eq!(float_lit.unwrap().suffix(), "f32" ); |
114 | //! assert!(FloatLit::parse("'c'" ).is_err()); |
115 | //! |
116 | //! // Parse any kind of literal. After parsing, you can inspect the literal |
117 | //! // and decide what to do in each case. |
118 | //! let lit = Literal::parse("0xff80" ).expect("failed to parse literal" ); |
119 | //! match lit { |
120 | //! Literal::Integer(lit) => { /* ... */ } |
121 | //! Literal::Float(lit) => { /* ... */ } |
122 | //! Literal::Bool(lit) => { /* ... */ } |
123 | //! Literal::Char(lit) => { /* ... */ } |
124 | //! Literal::String(lit) => { /* ... */ } |
125 | //! Literal::Byte(lit) => { /* ... */ } |
126 | //! Literal::ByteString(lit) => { /* ... */ } |
127 | //! } |
128 | //! ``` |
129 | //! |
130 | //! |
131 | //! |
132 | //! # Crate features |
133 | //! |
134 | //! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of |
135 | //! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. |
136 | //! - `check_suffix`: if enabled, `parse` functions will exactly verify that the |
137 | //! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, |
138 | //! only an approximate check (only in ASCII range) is done. If you are |
139 | //! writing a proc macro, you don't need to enable this as the suffix is |
140 | //! already checked by the compiler. |
141 | //! |
142 | //! |
143 | //! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals |
144 | //! |
145 | |
146 | #![deny (missing_debug_implementations)] |
147 | |
148 | extern crate proc_macro; |
149 | |
150 | #[cfg (test)] |
151 | #[macro_use ] |
152 | mod test_util; |
153 | |
154 | #[cfg (test)] |
155 | mod tests; |
156 | |
157 | mod bool; |
158 | mod byte; |
159 | mod bytestr; |
160 | mod char; |
161 | mod err; |
162 | mod escape; |
163 | mod float; |
164 | mod impls; |
165 | mod integer; |
166 | mod parse; |
167 | mod string; |
168 | |
169 | |
170 | use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; |
171 | |
172 | pub use self::{ |
173 | bool::BoolLit, |
174 | byte::ByteLit, |
175 | bytestr::ByteStringLit, |
176 | char::CharLit, |
177 | err::{InvalidToken, ParseError}, |
178 | float::{FloatLit, FloatType}, |
179 | integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, |
180 | string::StringLit, |
181 | }; |
182 | |
183 | |
184 | // ============================================================================================== |
185 | // ===== `Literal` and type defs |
186 | // ============================================================================================== |
187 | |
188 | /// A literal. This is the main type of this library. |
189 | /// |
190 | /// This type is generic over the underlying buffer `B`, which can be `&str` or |
191 | /// `String`. |
192 | /// |
193 | /// To create this type, you have to either call [`Literal::parse`] with an |
194 | /// input string or use the `From<_>` impls of this type. The impls are only |
195 | /// available of the corresponding crate features are enabled (they are enabled |
196 | /// by default). |
197 | #[derive (Debug, Clone, PartialEq, Eq)] |
198 | pub enum Literal<B: Buffer> { |
199 | Bool(BoolLit), |
200 | Integer(IntegerLit<B>), |
201 | Float(FloatLit<B>), |
202 | Char(CharLit<B>), |
203 | String(StringLit<B>), |
204 | Byte(ByteLit<B>), |
205 | ByteString(ByteStringLit<B>), |
206 | } |
207 | |
208 | impl<B: Buffer> Literal<B> { |
209 | /// Parses the given input as a Rust literal. |
210 | pub fn parse(input: B) -> Result<Self, ParseError> { |
211 | parse::parse(input) |
212 | } |
213 | |
214 | /// Returns the suffix of this literal or `""` if it doesn't have one. |
215 | /// |
216 | /// Rust token grammar actually allows suffixes for all kinds of tokens. |
217 | /// Most Rust programmer only know the type suffixes for integer and |
218 | /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an |
219 | /// error. But it is possible to pass literals with arbitrary suffixes to |
220 | /// proc macros, for example: |
221 | /// |
222 | /// ```ignore |
223 | /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong ); |
224 | /// ``` |
225 | /// |
226 | /// Boolean literals, not actually being literals, but idents, cannot have |
227 | /// suffixes and this method always returns `""` for those. |
228 | /// |
229 | /// There are some edge cases to be aware of: |
230 | /// - Integer suffixes must not start with `e` or `E` as that conflicts with |
231 | /// the exponent grammar for floats. `0e1` is a float; `0eel` is also |
232 | /// parsed as a float and results in an error. |
233 | /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a |
234 | /// suffix von `gh`. |
235 | /// - Suffixes can contain and start with `_`, but for integer and number |
236 | /// literals, `_` is eagerly parsed as part of the number, so `1_x` has |
237 | /// the suffix `x`. |
238 | /// - The input `55f32` is regarded as integer literal with suffix `f32`. |
239 | /// |
240 | /// # Example |
241 | /// |
242 | /// ``` |
243 | /// use litrs::Literal; |
244 | /// |
245 | /// assert_eq!(Literal::parse(r##"3.14f33"## ).unwrap().suffix(), "f33" ); |
246 | /// assert_eq!(Literal::parse(r##"123hackerman"## ).unwrap().suffix(), "hackerman" ); |
247 | /// assert_eq!(Literal::parse(r##"0x0fuck"## ).unwrap().suffix(), "uck" ); |
248 | /// assert_eq!(Literal::parse(r##"'🦊'good_boy"## ).unwrap().suffix(), "good_boy" ); |
249 | /// assert_eq!(Literal::parse(r##""toph"beifong"## ).unwrap().suffix(), "beifong" ); |
250 | /// ``` |
251 | pub fn suffix(&self) -> &str { |
252 | match self { |
253 | Literal::Bool(_) => "" , |
254 | Literal::Integer(l) => l.suffix(), |
255 | Literal::Float(l) => l.suffix(), |
256 | Literal::Char(l) => l.suffix(), |
257 | Literal::String(l) => l.suffix(), |
258 | Literal::Byte(l) => l.suffix(), |
259 | Literal::ByteString(l) => l.suffix(), |
260 | } |
261 | } |
262 | } |
263 | |
264 | impl Literal<&str> { |
265 | /// Makes a copy of the underlying buffer and returns the owned version of |
266 | /// `Self`. |
267 | pub fn into_owned(self) -> Literal<String> { |
268 | match self { |
269 | Literal::Bool(l: BoolLit) => Literal::Bool(l.to_owned()), |
270 | Literal::Integer(l: IntegerLit<&str>) => Literal::Integer(l.to_owned()), |
271 | Literal::Float(l: FloatLit<&str>) => Literal::Float(l.to_owned()), |
272 | Literal::Char(l: CharLit<&str>) => Literal::Char(l.to_owned()), |
273 | Literal::String(l: StringLit<&str>) => Literal::String(l.into_owned()), |
274 | Literal::Byte(l: ByteLit<&str>) => Literal::Byte(l.to_owned()), |
275 | Literal::ByteString(l: ByteStringLit<&str>) => Literal::ByteString(l.into_owned()), |
276 | } |
277 | } |
278 | } |
279 | |
280 | impl<B: Buffer> fmt::Display for Literal<B> { |
281 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
282 | match self { |
283 | Literal::Bool(l: &BoolLit) => l.fmt(f), |
284 | Literal::Integer(l: &IntegerLit) => l.fmt(f), |
285 | Literal::Float(l: &FloatLit) => l.fmt(f), |
286 | Literal::Char(l: &CharLit) => l.fmt(f), |
287 | Literal::String(l: &StringLit) => l.fmt(f), |
288 | Literal::Byte(l: &ByteLit) => l.fmt(f), |
289 | Literal::ByteString(l: &ByteStringLit) => l.fmt(f), |
290 | } |
291 | } |
292 | } |
293 | |
294 | |
295 | // ============================================================================================== |
296 | // ===== Buffer |
297 | // ============================================================================================== |
298 | |
299 | /// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. |
300 | /// |
301 | /// This is trait is implementation detail of this library, cannot be |
302 | /// implemented in other crates and is not subject to semantic versioning. |
303 | /// `litrs` only guarantees that this trait is implemented for `String` and |
304 | /// `for<'a> &'a str`. |
305 | pub trait Buffer: sealed::Sealed + Deref<Target = str> { |
306 | /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. |
307 | type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>; |
308 | |
309 | #[doc (hidden)] |
310 | fn into_cow(self) -> Self::Cow; |
311 | |
312 | /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. |
313 | type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>; |
314 | |
315 | #[doc (hidden)] |
316 | fn into_byte_cow(self) -> Self::ByteCow; |
317 | |
318 | /// Cuts away some characters at the beginning and some at the end. Given |
319 | /// range has to be in bounds. |
320 | #[doc (hidden)] |
321 | fn cut(self, range: Range<usize>) -> Self; |
322 | } |
323 | |
324 | mod sealed { |
325 | pub trait Sealed {} |
326 | } |
327 | |
328 | impl<'a> sealed::Sealed for &'a str {} |
329 | impl<'a> Buffer for &'a str { |
330 | #[doc (hidden)] |
331 | fn cut(self, range: Range<usize>) -> Self { |
332 | &self[range] |
333 | } |
334 | |
335 | type Cow = Cow<'a, str>; |
336 | #[doc (hidden)] |
337 | fn into_cow(self) -> Self::Cow { |
338 | self.into() |
339 | } |
340 | type ByteCow = Cow<'a, [u8]>; |
341 | #[doc (hidden)] |
342 | fn into_byte_cow(self) -> Self::ByteCow { |
343 | self.as_bytes().into() |
344 | } |
345 | } |
346 | |
347 | impl sealed::Sealed for String {} |
348 | impl Buffer for String { |
349 | #[doc (hidden)] |
350 | fn cut(mut self, range: Range<usize>) -> Self { |
351 | // This is not the most efficient way, but it works. First we cut the |
352 | // end, then the beginning. Note that `drain` also removes the range if |
353 | // the iterator is not consumed. |
354 | self.truncate(new_len:range.end); |
355 | self.drain(..range.start); |
356 | self |
357 | } |
358 | |
359 | type Cow = Cow<'static, str>; |
360 | #[doc (hidden)] |
361 | fn into_cow(self) -> Self::Cow { |
362 | self.into() |
363 | } |
364 | |
365 | type ByteCow = Cow<'static, [u8]>; |
366 | #[doc (hidden)] |
367 | fn into_byte_cow(self) -> Self::ByteCow { |
368 | self.into_bytes().into() |
369 | } |
370 | } |
371 | |