lib.rs source code [crates/litrs/src/lib.rs]

1	//! Parsing and inspecting Rust literal tokens.
2	//!
3	//! This library offers functionality to parse Rust literals, i.e. tokens in the
4	//! Rust programming language that represent fixed values. The grammar for
5	//! those is defined [here][ref].
6	//!
7	//! This kind of functionality already exists in the crate `syn`. However, as
8	//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
9	//! built. This crate also offers a bit more flexibility compared to `syn`
10	//! (only regarding literals, of course).
11	//!
12	//!
13	//! # Quick start
14	//!
15	//! \| `StringLit::try_from(tt)?.value()`* \|*
16	//! \| - \|
17	//!
18	//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be
19	//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).
20	//! Calling `value()` returns the value that is represented by the literal.
21	//!
22	//! Mini Example
23	//!
24	//! ```ignore
25	//! use proc_macro::TokenStream;
26	//!
27	//! #[proc_macro]
28	//! pub fn foo(input: TokenStream) -> TokenStream {
29	//! let first_token = input.into_iter().next().unwrap(); // Do proper error handling!
30	//! let string_value = match litrs::StringLit::try_from(first_token) {
31	//! Ok(string_lit) => string_lit.value(),
32	//! Err(e) => return e.to_compile_error(),
33	//! };
34	//!
35	//! // `string_value` is the string value with all escapes resolved.
36	//! todo!()
37	//! }
38	//! ```
39	//!
40	//! # Overview
41	//!
42	//! The main types of this library are [`Literal`], representing any kind of
43	//! literal, and `Lit`, like* [`StringLit`] or [`FloatLit`], representing a
44	//! specific kind of literal.
45	//!
46	//! There are different ways to obtain such a literal type:
47	//!
48	//! - `parse`: parses a `&str` or `String` and returns `Result<_,
49	//! ParseError>`. For example: [`Literal::parse`] and
50	//! [`IntegerLit::parse`].
51	//!
52	//! - `From<proc_macro::Literal> for Literal`: turns a `Literal` value from
53	//! the `proc_macro` crate into a `Literal` from this crate.
54	//!
55	//! - `TryFrom<proc_macro::Literal> for Lit`*: tries to turn a
56	//! `proc_macro::Literal` into a specific literal type of this crate. If
57	//! the input is a literal of a different kind, `Err(InvalidToken)` is
58	//! returned.
59	//!
60	//! - `TryFrom<proc_macro::TokenTree>`: attempts to turn a token tree into a
61	//! literal type of this crate. An error is returned if the token tree is
62	//! not a literal, or if you are trying to turn it into a specific kind of
63	//! literal and the token tree is a different kind of literal.
64	//!
65	//! All of the `From` and `TryFrom` conversions also work for reference to
66	//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
67	//! enabled (which it is by default), all these `From` and `TryFrom` impls also
68	//! exist for the corresponding `proc_macro2` types.
69	//!
70	//! Note: `true` and `false` are `Ident`s when passed to your proc macro.
71	//! The `TryFrom<TokenTree>` impls check for those two special idents and
72	//! return a [`BoolLit`] appropriately. For that reason, there is also no
73	//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`
74	//! simply cannot represent bool literals.
75	//!
76	//!
77	//! # Examples
78	//!
79	//! In a proc-macro:
80	//!
81	//! ```ignore
82	//! use std::convert::TryFrom;
83	//! use proc_macro::TokenStream;
84	//! use litrs::FloatLit;
85	//!
86	//! #[proc_macro]
87	//! pub fn foo(input: TokenStream) -> TokenStream {
88	//! let mut input = input.into_iter().collect::<Vec<_>>();
89	//! if input.len() != `1` {
90	//! // Please do proper error handling in your real code!
91	//! panic!("expected exactly one token as input");
92	//! }
93	//! let token = input.remove(`0`);
94	//!
95	//! match FloatLit::try_from(token) {
96	//! Ok(float_lit) => { / do something / }
97	//! Err(e) => return e.to_compile_error(),
98	//! }
99	//!
100	//! // Dummy output
101	//! TokenStream::new()
102	//! }
103	//! ```
104	//!
105	//! Parsing from string:
106	//!
107	//! ```
108	//! use litrs::{FloatLit, Literal};
109	//!
110	//! // Parse a specific kind of literal (float in this case):
111	//! let float_lit = FloatLit::parse("3.14f32");
112	//! assert!(float_lit.is_ok());
113	//! assert_eq!(float_lit.unwrap().suffix(), "f32");
114	//! assert!(FloatLit::parse("'c'").is_err());
115	//!
116	//! // Parse any kind of literal. After parsing, you can inspect the literal
117	//! // and decide what to do in each case.
118	//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
119	//! match lit {
120	//! Literal::Integer(lit) => { / ... / }
121	//! Literal::Float(lit) => { / ... / }
122	//! Literal::Bool(lit) => { / ... / }
123	//! Literal::Char(lit) => { / ... / }
124	//! Literal::String(lit) => { / ... / }
125	//! Literal::Byte(lit) => { / ... / }
126	//! Literal::ByteString(lit) => { / ... / }
127	//! }
128	//! ```
129	//!
130	//!
131	//!
132	//! # Crate features
133	//!
134	//! - `proc-macro2` (default): adds the dependency `proc_macro2`, a bunch of
135	//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
136	//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the
137	//! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,
138	//! only an approximate check (only in ASCII range) is done. If you are
139	//! writing a proc macro, you don't need to enable this as the suffix is
140	//! already checked by the compiler.
141	//!
142	//!
143	//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
144	//!
145
146	#![deny(missing_debug_implementations)]
147
148	extern crate proc_macro;
149
150	#[cfg(test)]
151	#[macro_use]
152	mod test_util;
153
154	#[cfg(test)]
155	mod tests;
156
157	mod bool;
158	mod byte;
159	mod bytestr;
160	mod char;
161	mod err;
162	mod escape;
163	mod float;
164	mod impls;
165	mod integer;
166	mod parse;
167	mod string;
168
169
170	use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};
171
172	pub use self::{
173	bool::BoolLit,
174	byte::ByteLit,
175	bytestr::ByteStringLit,
176	char::CharLit,
177	err::{InvalidToken, ParseError},
178	float::{FloatLit, FloatType},
179	integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},
180	string::StringLit,
181	};
182
183
184	// ==============================================================================================
185	// ===== `Literal` and type defs
186	// ==============================================================================================
187
188	/// A literal. This is the main type of this library.
189	///
190	/// This type is generic over the underlying buffer `B`, which can be `&str` or
191	/// `String`.
192	///
193	/// To create this type, you have to either call [`Literal::parse`] with an
194	/// input string or use the `From<_>` impls of this type. The impls are only
195	/// available of the corresponding crate features are enabled (they are enabled
196	/// by default).
197	#[derive(Debug, Clone, PartialEq, Eq)]
198	pub enum Literal<B: Buffer> {
199	Bool(BoolLit),
200	Integer(IntegerLit<B>),
201	Float(FloatLit<B>),
202	Char(CharLit<B>),
203	String(StringLit<B>),
204	Byte(ByteLit<B>),
205	ByteString(ByteStringLit<B>),
206	}
207
208	impl<B: Buffer> Literal<B> {
209	/// Parses the given input as a Rust literal.
210	pub fn parse(input: B) -> Result<Self, ParseError> {
211	parse::parse(input)
212	}
213
214	/// Returns the suffix of this literal or `""` if it doesn't have one.
215	///
216	/// Rust token grammar actually allows suffixes for all kinds of tokens.
217	/// Most Rust programmer only know the type suffixes for integer and
218	/// floats, e.g. `0u32`. And in normal Rust code, everything else causes an
219	/// error. But it is possible to pass literals with arbitrary suffixes to
220	/// proc macros, for example:
221	///
222	/// ```ignore
223	/// some_macro!(`3.14f33` `16px` '🦊'good_boy "toph"beifong);
224	/// ```
225	///
226	/// Boolean literals, not actually being literals, but idents, cannot have
227	/// suffixes and this method always returns `""` for those.
228	///
229	/// There are some edge cases to be aware of:
230	/// - Integer suffixes must not start with `e` or `E` as that conflicts with
231	/// the exponent grammar for floats. `0e1` is a float; `0eel` is also
232	/// parsed as a float and results in an error.
233	/// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a
234	/// suffix von `gh`.
235	/// - Suffixes can contain and start with `_`, but for integer and number
236	/// literals, `_` is eagerly parsed as part of the number, so `1_x` has
237	/// the suffix `x`.
238	/// - The input `55f32` is regarded as integer literal with suffix `f32`.
239	///
240	/// # Example
241	///
242	/// ```
243	/// use litrs::Literal;
244	///
245	/// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");
246	/// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");
247	/// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");
248	/// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");
249	/// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");
250	/// ```
251	pub fn suffix(&self) -> &str {
252	match self {
253	Literal::Bool(_) => "",
254	Literal::Integer(l) => l.suffix(),
255	Literal::Float(l) => l.suffix(),
256	Literal::Char(l) => l.suffix(),
257	Literal::String(l) => l.suffix(),
258	Literal::Byte(l) => l.suffix(),
259	Literal::ByteString(l) => l.suffix(),
260	}
261	}
262	}
263
264	impl Literal<&str> {
265	/// Makes a copy of the underlying buffer and returns the owned version of
266	/// `Self`.
267	pub fn into_owned(self) -> Literal<String> {
268	match self {
269	Literal::Bool(l: BoolLit) => Literal::Bool(l.to_owned()),
270	Literal::Integer(l: IntegerLit<&str>) => Literal::Integer(l.to_owned()),
271	Literal::Float(l: FloatLit<&str>) => Literal::Float(l.to_owned()),
272	Literal::Char(l: CharLit<&str>) => Literal::Char(l.to_owned()),
273	Literal::String(l: StringLit<&str>) => Literal::String(l.into_owned()),
274	Literal::Byte(l: ByteLit<&str>) => Literal::Byte(l.to_owned()),
275	Literal::ByteString(l: ByteStringLit<&str>) => Literal::ByteString(l.into_owned()),
276	}
277	}
278	}
279
280	impl<B: Buffer> fmt::Display for Literal<B> {
281	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282	match self {
283	Literal::Bool(l: &BoolLit) => l.fmt(f),
284	Literal::Integer(l: &IntegerLit) => l.fmt(f),
285	Literal::Float(l: &FloatLit) => l.fmt(f),
286	Literal::Char(l: &CharLit) => l.fmt(f),
287	Literal::String(l: &StringLit) => l.fmt(f),
288	Literal::Byte(l: &ByteLit) => l.fmt(f),
289	Literal::ByteString(l: &ByteStringLit) => l.fmt(f),
290	}
291	}
292	}
293
294
295	// ==============================================================================================
296	// ===== Buffer
297	// ==============================================================================================
298
299	/// A shared or owned string buffer. Implemented for `String` and `&str`. Implementation detail.
300	///
301	/// This is trait is implementation detail of this library, cannot be
302	/// implemented in other crates and is not subject to semantic versioning.
303	/// `litrs` only guarantees that this trait is implemented for `String` and
304	/// `for<'a> &'a str`.
305	pub trait Buffer: sealed::Sealed + Deref<Target = str> {
306	/// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.
307	type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;
308
309	#[doc(hidden)]
310	fn into_cow(self) -> Self::Cow;
311
312	/// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
313	type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;
314
315	#[doc(hidden)]
316	fn into_byte_cow(self) -> Self::ByteCow;
317
318	/// Cuts away some characters at the beginning and some at the end. Given
319	/// range has to be in bounds.
320	#[doc(hidden)]
321	fn cut(self, range: Range<usize>) -> Self;
322	}
323
324	mod sealed {
325	pub trait Sealed {}
326	}
327
328	impl<'a> sealed::Sealed for &'a str {}
329	impl<'a> Buffer for &'a str {
330	#[doc(hidden)]
331	fn cut(self, range: Range<usize>) -> Self {
332	&self[range]
333	}
334
335	type Cow = Cow<'a, str>;
336	#[doc(hidden)]
337	fn into_cow(self) -> Self::Cow {
338	self.into()
339	}
340	type ByteCow = Cow<'a, [u8]>;
341	#[doc(hidden)]
342	fn into_byte_cow(self) -> Self::ByteCow {
343	self.as_bytes().into()
344	}
345	}
346
347	impl sealed::Sealed for String {}
348	impl Buffer for String {
349	#[doc(hidden)]
350	fn cut(mut self, range: Range<usize>) -> Self {
351	// This is not the most efficient way, but it works. First we cut the
352	// end, then the beginning. Note that `drain` also removes the range if
353	// the iterator is not consumed.
354	self.truncate(new_len:range.end);
355	self.drain(..range.start);
356	self
357	}
358
359	type Cow = Cow<'static, str>;
360	#[doc(hidden)]
361	fn into_cow(self) -> Self::Cow {
362	self.into()
363	}
364
365	type ByteCow = Cow<'static, [u8]>;
366	#[doc(hidden)]
367	fn into_byte_cow(self) -> Self::ByteCow {
368	self.into_bytes().into()
369	}
370	}
371