1 | use core::iter; |
2 | |
3 | use super::{Error, Location, Spanned, SpannedValue}; |
4 | |
5 | pub(super) struct Lexed<I: Iterator> { |
6 | iter: iter::Peekable<I>, |
7 | } |
8 | |
9 | impl<I: Iterator> Iterator for Lexed<I> { |
10 | type Item = I::Item; |
11 | |
12 | fn next(&mut self) -> Option<Self::Item> { |
13 | self.iter.next() |
14 | } |
15 | } |
16 | |
17 | impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> { |
18 | pub(super) fn peek(&mut self) -> Option<&I::Item> { |
19 | self.iter.peek() |
20 | } |
21 | |
22 | pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> { |
23 | if let Some(&Ok(Token::ComponentPart { |
24 | kind: ComponentKind::Whitespace, |
25 | value, |
26 | })) = self.peek() |
27 | { |
28 | self.next(); // consume |
29 | Some(value) |
30 | } else { |
31 | None |
32 | } |
33 | } |
34 | |
35 | pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> { |
36 | if let Some(&Ok(Token::ComponentPart { |
37 | kind: ComponentKind::NotWhitespace, |
38 | value, |
39 | })) = self.peek() |
40 | { |
41 | self.next(); |
42 | Some(value) |
43 | } else { |
44 | None |
45 | } |
46 | } |
47 | |
48 | pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> { |
49 | if let Some(&Ok(Token::Bracket { |
50 | kind: BracketKind::Opening, |
51 | location, |
52 | })) = self.peek() |
53 | { |
54 | self.next(); |
55 | Some(location) |
56 | } else { |
57 | None |
58 | } |
59 | } |
60 | |
61 | pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> { |
62 | if let Some(Ok(Token::Bracket { |
63 | kind: BracketKind::Closing, |
64 | location, |
65 | })) = self.peek() |
66 | { |
67 | Some(location) |
68 | } else { |
69 | None |
70 | } |
71 | } |
72 | |
73 | pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> { |
74 | if let Some(&Ok(Token::Bracket { |
75 | kind: BracketKind::Closing, |
76 | location, |
77 | })) = self.peek() |
78 | { |
79 | self.next(); |
80 | Some(location) |
81 | } else { |
82 | None |
83 | } |
84 | } |
85 | } |
86 | |
87 | pub(super) enum Token<'a> { |
88 | Literal(Spanned<&'a [u8]>), |
89 | Bracket { |
90 | kind: BracketKind, |
91 | location: Location, |
92 | }, |
93 | ComponentPart { |
94 | kind: ComponentKind, |
95 | value: Spanned<&'a [u8]>, |
96 | }, |
97 | } |
98 | |
99 | pub(super) enum BracketKind { |
100 | Opening, |
101 | Closing, |
102 | } |
103 | |
104 | pub(super) enum ComponentKind { |
105 | #[allow (clippy::missing_docs_in_private_items)] |
106 | Whitespace, |
107 | #[allow (clippy::missing_docs_in_private_items)] |
108 | NotWhitespace, |
109 | } |
110 | |
111 | fn attach_location<'item>( |
112 | iter: impl Iterator<Item = &'item u8>, |
113 | proc_span: proc_macro::Span, |
114 | ) -> impl Iterator<Item = (&'item u8, Location)> { |
115 | let mut byte_pos: u32 = 0; |
116 | |
117 | iter.map(move |byte: &'item u8| { |
118 | let location: Location = Location { |
119 | byte: byte_pos, |
120 | proc_span, |
121 | }; |
122 | byte_pos += 1; |
123 | (byte, location) |
124 | }) |
125 | } |
126 | |
127 | #[allow (clippy::unused_peekable)] // false positive |
128 | pub(super) fn lex<const VERSION: u8>( |
129 | mut input: &[u8], |
130 | proc_span: proc_macro::Span, |
131 | ) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> { |
132 | assert!(version!(1..=2)); |
133 | |
134 | let mut depth: u8 = 0; |
135 | let mut iter = attach_location(input.iter(), proc_span).peekable(); |
136 | let mut second_bracket_location = None; |
137 | |
138 | let iter = iter::from_fn(move || { |
139 | if version!(..=1) { |
140 | if let Some(location) = second_bracket_location.take() { |
141 | return Some(Ok(Token::Bracket { |
142 | kind: BracketKind::Opening, |
143 | location, |
144 | })); |
145 | } |
146 | } |
147 | |
148 | Some(Ok(match iter.next()? { |
149 | (b' \\' , backslash_loc) if version!(2..) => match iter.next() { |
150 | Some((b' \\' | b'[' | b']' , char_loc)) => { |
151 | let char = &input[1..2]; |
152 | input = &input[2..]; |
153 | if depth == 0 { |
154 | Token::Literal(char.spanned(backslash_loc.to(char_loc))) |
155 | } else { |
156 | Token::ComponentPart { |
157 | kind: ComponentKind::NotWhitespace, |
158 | value: char.spanned(backslash_loc.to(char_loc)), |
159 | } |
160 | } |
161 | } |
162 | Some((_, loc)) => { |
163 | return Some(Err(loc.error("invalid escape sequence" ))); |
164 | } |
165 | None => { |
166 | return Some(Err(backslash_loc.error("unexpected end of input" ))); |
167 | } |
168 | }, |
169 | (b'[' , location) if version!(..=1) => { |
170 | if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[' ) { |
171 | second_bracket_location = Some(second_location); |
172 | input = &input[2..]; |
173 | } else { |
174 | depth += 1; |
175 | input = &input[1..]; |
176 | } |
177 | |
178 | Token::Bracket { |
179 | kind: BracketKind::Opening, |
180 | location, |
181 | } |
182 | } |
183 | (b'[' , location) => { |
184 | depth += 1; |
185 | input = &input[1..]; |
186 | |
187 | Token::Bracket { |
188 | kind: BracketKind::Opening, |
189 | location, |
190 | } |
191 | } |
192 | (b']' , location) if depth > 0 => { |
193 | depth -= 1; |
194 | input = &input[1..]; |
195 | |
196 | Token::Bracket { |
197 | kind: BracketKind::Closing, |
198 | location, |
199 | } |
200 | } |
201 | (_, start_location) if depth == 0 => { |
202 | let mut bytes = 1; |
203 | let mut end_location = start_location; |
204 | |
205 | while let Some((_, location)) = |
206 | iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b' \\' ) || byte == b'[' )) |
207 | { |
208 | end_location = location; |
209 | bytes += 1; |
210 | } |
211 | |
212 | let value = &input[..bytes]; |
213 | input = &input[bytes..]; |
214 | |
215 | Token::Literal(value.spanned(start_location.to(end_location))) |
216 | } |
217 | (byte, start_location) => { |
218 | let mut bytes = 1; |
219 | let mut end_location = start_location; |
220 | let is_whitespace = byte.is_ascii_whitespace(); |
221 | |
222 | while let Some((_, location)) = iter.next_if(|&(byte, _)| { |
223 | !matches!(byte, b' \\' | b'[' | b']' ) |
224 | && is_whitespace == byte.is_ascii_whitespace() |
225 | }) { |
226 | end_location = location; |
227 | bytes += 1; |
228 | } |
229 | |
230 | let value = &input[..bytes]; |
231 | input = &input[bytes..]; |
232 | |
233 | Token::ComponentPart { |
234 | kind: if is_whitespace { |
235 | ComponentKind::Whitespace |
236 | } else { |
237 | ComponentKind::NotWhitespace |
238 | }, |
239 | value: value.spanned(start_location.to(end_location)), |
240 | } |
241 | } |
242 | })) |
243 | }); |
244 | |
245 | Lexed { |
246 | iter: iter.peekable(), |
247 | } |
248 | } |
249 | |