parse.rs source code [crates/litrs/src/parse.rs]

1	use crate::{
2	BoolLit,
3	Buffer,
4	ByteLit,
5	ByteStringLit,
6	CharLit,
7	ParseError,
8	FloatLit,
9	IntegerLit,
10	Literal,
11	StringLit,
12	err::{perr, ParseErrorKind::{*, self}},
13	};
14
15
16	pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
17	let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
18	let second = input.as_bytes().get(`1`).copied();
19
20	match first {
21	b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
22	b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),
23
24	// A number literal (integer or float).
25	b'0'..=b'9' => {
26	// To figure out whether this is a float or integer, we do some
27	// quick inspection here. Yes, this is technically duplicate
28	// work with what is happening in the integer/float parse
29	// methods, but it makes the code way easier for now and won't
30	// be a huge performance loss.
31	//
32	// The first non-decimal char in a float literal must
33	// be '.', 'e' or 'E'.
34	match input.as_bytes().get(`1` + end_dec_digits(rest)) {
35	Some(b'.') \| Some(b'e') \| Some(b'E')
36	=> FloatLit::parse(input).map(Literal::Float),
37
38	_ => IntegerLit::parse(input).map(Literal::Integer),
39	}
40	},
41
42	b'`\'`' => CharLit::parse(input).map(Literal::Char),
43	b'"' \| b'r' => StringLit::parse(input).map(Literal::String),
44
45	b'b' if second == Some(b'`\'`') => ByteLit::parse(input).map(Literal::Byte),
46	b'b' if second == Some(b'r') \|\| second == Some(b'"')
47	=> ByteStringLit::parse(input).map(Literal::ByteString),
48
49	_ => Err(perr(None, InvalidLiteral)),
50	}
51	}
52
53
54	pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
55	s.as_bytes().get(`0`).copied().ok_or(err:perr(span:None, kind:Empty))
56	}
57
58	/// Returns the index of the first non-underscore, non-decimal digit in `input`,
59	/// or the `input.len()` if all characters are decimal digits.
60	pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
61	input.iter()
62	.position(\|b\| !matches!(b, b'_' \| b'0'..=b'9'))
63	.unwrap_or(default:input.len())
64	}
65
66	pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
67	match digit {
68	b'0'..=b'9' => Some(digit - b'0'),
69	b'a'..=b'f' => Some(digit - b'a' + `10`),
70	b'A'..=b'F' => Some(digit - b'A' + `10`),
71	_ => None,
72	}
73	}
74
75	/// Makes sure that `s` is a valid literal suffix.
76	pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
77	if s.is_empty() {
78	return Ok(());
79	}
80
81	let mut chars = s.chars();
82	let first = chars.next().unwrap();
83	let rest = chars.as_str();
84	if first == '_' && rest.is_empty() {
85	return Err(InvalidSuffix);
86	}
87
88	// This is just an extra check to improve the error message. If the first
89	// character of the "suffix" is already some invalid ASCII
90	// char, "unexpected character" seems like the more fitting error.
91	if first.is_ascii() && !(first.is_ascii_alphabetic() \|\| first == '_') {
92	return Err(UnexpectedChar);
93	}
94
95	// Proper check is optional as it's not really necessary in proc macro
96	// context.
97	#[cfg(feature = "check_suffix")]
98	fn is_valid_suffix(first: char, rest: &str) -> bool {
99	use unicode_xid::UnicodeXID;
100
101	(first == '_' \|\| first.is_xid_start())
102	&& rest.chars().all(\|c\| c.is_xid_continue())
103	}
104
105	// When avoiding the dependency on `unicode_xid`, we just do a best effort
106	// to catch the most common errors.
107	#[cfg(not(feature = "check_suffix"))]
108	fn is_valid_suffix(first: char, rest: &str) -> bool {
109	if first.is_ascii() && !(first.is_ascii_alphabetic() \|\| first == '_') {
110	return `false`;
111	}
112	for c in rest.chars() {
113	if c.is_ascii() && !(c.is_ascii_alphanumeric() \|\| c == '_') {
114	return `false`;
115	}
116	}
117	`true`
118	}
119
120	if is_valid_suffix(first, rest) {
121	Ok(())
122	} else {
123	Err(InvalidSuffix)
124	}
125	}
126