1 | //! Fork of the equivalent file from the proc-macro2 file. |
2 | //! Modified to support line number counting in Cursor. |
3 | //! Also contains some function from stable.rs of proc_macro2. |
4 | |
5 | #![allow (dead_code)] // Why is this needed ? |
6 | |
7 | use std::str::{Bytes, CharIndices, Chars}; |
8 | |
9 | use unicode_xid::UnicodeXID; |
10 | |
11 | #[derive (Debug)] |
12 | pub struct LexError { |
13 | pub line: u32, |
14 | } |
15 | |
16 | #[derive (Copy, Clone, Eq, PartialEq)] |
17 | pub struct Cursor<'a> { |
18 | pub rest: &'a str, |
19 | pub off: u32, |
20 | pub line: u32, |
21 | pub column: u32, |
22 | } |
23 | |
24 | impl<'a> Cursor<'a> { |
25 | #[allow (clippy::suspicious_map)] |
26 | pub fn advance(&self, amt: usize) -> Cursor<'a> { |
27 | let mut column_start: Option<usize> = None; |
28 | Cursor { |
29 | rest: &self.rest[amt..], |
30 | off: self.off + (amt as u32), |
31 | line: self.line |
32 | + self.rest[..amt] |
33 | .char_indices() |
34 | .filter(|(_, x)| *x == ' \n' ) |
35 | .map(|(i, _)| { |
36 | column_start = Some(i); |
37 | }) |
38 | .count() as u32, |
39 | column: match column_start { |
40 | None => self.column + (amt as u32), |
41 | Some(i) => (amt - i) as u32 - 1, |
42 | }, |
43 | } |
44 | } |
45 | |
46 | pub fn find(&self, p: char) -> Option<usize> { |
47 | self.rest.find(p) |
48 | } |
49 | |
50 | pub fn starts_with(&self, s: &str) -> bool { |
51 | self.rest.starts_with(s) |
52 | } |
53 | |
54 | pub fn is_empty(&self) -> bool { |
55 | self.rest.is_empty() |
56 | } |
57 | |
58 | pub fn len(&self) -> usize { |
59 | self.rest.len() |
60 | } |
61 | |
62 | pub fn as_bytes(&self) -> &'a [u8] { |
63 | self.rest.as_bytes() |
64 | } |
65 | |
66 | pub fn bytes(&self) -> Bytes<'a> { |
67 | self.rest.bytes() |
68 | } |
69 | |
70 | pub fn chars(&self) -> Chars<'a> { |
71 | self.rest.chars() |
72 | } |
73 | |
74 | pub fn char_indices(&self) -> CharIndices<'a> { |
75 | self.rest.char_indices() |
76 | } |
77 | } |
78 | |
79 | pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>; |
80 | |
81 | pub fn whitespace(input: Cursor) -> PResult<()> { |
82 | if input.is_empty() { |
83 | return Err(LexError { line: input.line }); |
84 | } |
85 | |
86 | let bytes = input.as_bytes(); |
87 | let mut i = 0; |
88 | while i < bytes.len() { |
89 | let s = input.advance(i); |
90 | if bytes[i] == b'/' { |
91 | if s.starts_with("//" ) |
92 | // && (!s.starts_with("///") || s.starts_with("////")) |
93 | // && !s.starts_with("//!") |
94 | { |
95 | if let Some(len) = s.find(' \n' ) { |
96 | i += len + 1; |
97 | continue; |
98 | } |
99 | break; |
100 | } else if s.starts_with("/**/" ) { |
101 | i += 4; |
102 | continue; |
103 | } else if s.starts_with("/*" ) |
104 | // && (!s.starts_with("/**") || s.starts_with("/***")) |
105 | // && !s.starts_with("/*!") |
106 | { |
107 | let (_, com) = block_comment(s)?; |
108 | i += com.len(); |
109 | continue; |
110 | } |
111 | } |
112 | match bytes[i] { |
113 | b' ' | 0x09..=0x0d => { |
114 | i += 1; |
115 | continue; |
116 | } |
117 | b if b <= 0x7f => {} |
118 | _ => { |
119 | let ch = s.chars().next().unwrap(); |
120 | if is_whitespace(ch) { |
121 | i += ch.len_utf8(); |
122 | continue; |
123 | } |
124 | } |
125 | } |
126 | return if i > 0 { Ok((s, ())) } else { Err(LexError { line: s.line }) }; |
127 | } |
128 | Ok((input.advance(input.len()), ())) |
129 | } |
130 | |
131 | pub fn block_comment(input: Cursor) -> PResult<&str> { |
132 | if !input.starts_with("/*" ) { |
133 | return Err(LexError { line: input.line }); |
134 | } |
135 | |
136 | let mut depth: i32 = 0; |
137 | let bytes: &[u8] = input.as_bytes(); |
138 | let mut i: usize = 0; |
139 | let upper: usize = bytes.len() - 1; |
140 | while i < upper { |
141 | if bytes[i] == b'/' && bytes[i + 1] == b'*' { |
142 | depth += 1; |
143 | i += 1; // eat '*' |
144 | } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { |
145 | depth -= 1; |
146 | if depth == 0 { |
147 | return Ok((input.advance(amt:i + 2), &input.rest[..i + 2])); |
148 | } |
149 | i += 1; // eat '/' |
150 | } |
151 | i += 1; |
152 | } |
153 | Err(LexError { line: input.line }) |
154 | } |
155 | |
156 | pub fn skip_whitespace(input: Cursor) -> Cursor { |
157 | match whitespace(input) { |
158 | Ok((rest: Cursor<'_>, _)) => rest, |
159 | Err(_) => input, |
160 | } |
161 | } |
162 | |
163 | fn is_whitespace(ch: char) -> bool { |
164 | // Rust treats left-to-right mark and right-to-left mark as whitespace |
165 | ch.is_whitespace() || ch == ' \u{200e}' || ch == ' \u{200f}' |
166 | } |
167 | |
168 | // --- functions from stable.rs |
169 | |
170 | #[inline ] |
171 | fn is_ident_start(c: char) -> bool { |
172 | c.is_ascii_alphabetic() || c == '_' || (c > ' \x7f' && UnicodeXID::is_xid_start(self:c)) |
173 | } |
174 | |
175 | #[inline ] |
176 | fn is_ident_continue(c: char) -> bool { |
177 | c.is_ascii_alphanumeric() || c == '_' || (c > ' \x7f' && UnicodeXID::is_xid_continue(self:c)) |
178 | } |
179 | |
180 | pub fn symbol(input: Cursor) -> PResult<&str> { |
181 | let mut chars = input.char_indices(); |
182 | |
183 | let raw = input.starts_with("r#" ); |
184 | if raw { |
185 | chars.next(); |
186 | chars.next(); |
187 | } |
188 | |
189 | match chars.next() { |
190 | Some((_, ch)) if is_ident_start(ch) => {} |
191 | _ => return Err(LexError { line: input.line }), |
192 | } |
193 | |
194 | let mut end = input.len(); |
195 | for (i, ch) in chars { |
196 | if !is_ident_continue(ch) { |
197 | end = i; |
198 | break; |
199 | } |
200 | } |
201 | |
202 | let a = &input.rest[..end]; |
203 | if a == "r#_" { |
204 | Err(LexError { line: input.line }) |
205 | } else { |
206 | let ident = if raw { &a[2..] } else { a }; |
207 | Ok((input.advance(end), ident)) |
208 | } |
209 | } |
210 | |
211 | pub fn cooked_string(input: Cursor) -> PResult<()> { |
212 | let mut chars = input.char_indices().peekable(); |
213 | while let Some((byte_offset, ch)) = chars.next() { |
214 | match ch { |
215 | '"' => { |
216 | return Ok((input.advance(byte_offset), ())); |
217 | } |
218 | ' \r' => { |
219 | if let Some((_, ' \n' )) = chars.next() { |
220 | // ... |
221 | } else { |
222 | break; |
223 | } |
224 | } |
225 | ' \\' => match chars.next() { |
226 | Some((_, 'x' )) => { |
227 | if !backslash_x_char(&mut chars) { |
228 | break; |
229 | } |
230 | } |
231 | Some((_, 'n' )) | Some((_, 'r' )) | Some((_, 't' )) | Some((_, ' \\' )) |
232 | | Some((_, ' \'' )) | Some((_, '"' )) | Some((_, '0' )) => {} |
233 | Some((_, 'u' )) => { |
234 | if !backslash_u(&mut chars) { |
235 | break; |
236 | } |
237 | } |
238 | Some((_, ' \n' )) | Some((_, ' \r' )) => { |
239 | while let Some(&(_, ch)) = chars.peek() { |
240 | if ch.is_whitespace() { |
241 | chars.next(); |
242 | } else { |
243 | break; |
244 | } |
245 | } |
246 | } |
247 | _ => break, |
248 | }, |
249 | _ch => {} |
250 | } |
251 | } |
252 | Err(LexError { line: input.line }) |
253 | } |
254 | |
255 | pub fn cooked_byte_string(mut input: Cursor) -> PResult<()> { |
256 | let mut bytes = input.bytes().enumerate(); |
257 | 'outer: while let Some((offset, b)) = bytes.next() { |
258 | match b { |
259 | b'"' => { |
260 | return Ok((input.advance(offset), ())); |
261 | } |
262 | b' \r' => { |
263 | if let Some((_, b' \n' )) = bytes.next() { |
264 | // ... |
265 | } else { |
266 | break; |
267 | } |
268 | } |
269 | b' \\' => match bytes.next() { |
270 | Some((_, b'x' )) => { |
271 | if !backslash_x_byte(&mut bytes) { |
272 | break; |
273 | } |
274 | } |
275 | Some((_, b'n' )) | Some((_, b'r' )) | Some((_, b't' )) | Some((_, b' \\' )) |
276 | | Some((_, b'0' )) | Some((_, b' \'' )) | Some((_, b'"' )) => {} |
277 | Some((newline, b' \n' )) | Some((newline, b' \r' )) => { |
278 | let rest = input.advance(newline + 1); |
279 | for (offset, ch) in rest.char_indices() { |
280 | if !ch.is_whitespace() { |
281 | input = rest.advance(offset); |
282 | bytes = input.bytes().enumerate(); |
283 | continue 'outer; |
284 | } |
285 | } |
286 | break; |
287 | } |
288 | _ => break, |
289 | }, |
290 | b if b < 0x80 => {} |
291 | _ => break, |
292 | } |
293 | } |
294 | Err(LexError { line: input.line }) |
295 | } |
296 | |
297 | pub fn raw_string(input: Cursor) -> PResult<()> { |
298 | let mut chars = input.char_indices(); |
299 | let mut n = 0; |
300 | #[allow (clippy::while_let_on_iterator)] //chars is used in the next loop |
301 | while let Some((byte_offset, ch)) = chars.next() { |
302 | match ch { |
303 | '"' => { |
304 | n = byte_offset; |
305 | break; |
306 | } |
307 | '#' => {} |
308 | _ => return Err(LexError { line: input.line }), |
309 | } |
310 | } |
311 | for (byte_offset, ch) in chars { |
312 | match ch { |
313 | '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => { |
314 | let rest = input.advance(byte_offset + 1 + n); |
315 | return Ok((rest, ())); |
316 | } |
317 | ' \r' => {} |
318 | _ => {} |
319 | } |
320 | } |
321 | Err(LexError { line: input.line }) |
322 | } |
323 | |
324 | pub fn cooked_byte(input: Cursor) -> PResult<()> { |
325 | let mut bytes = input.bytes().enumerate(); |
326 | let ok = match bytes.next().map(|(_, b)| b) { |
327 | Some(b' \\' ) => match bytes.next().map(|(_, b)| b) { |
328 | Some(b'x' ) => backslash_x_byte(&mut bytes), |
329 | Some(b'n' ) | Some(b'r' ) | Some(b't' ) | Some(b' \\' ) | Some(b'0' ) | Some(b' \'' ) |
330 | | Some(b'"' ) => true, |
331 | _ => false, |
332 | }, |
333 | b => b.is_some(), |
334 | }; |
335 | if ok { |
336 | match bytes.next() { |
337 | Some((offset, _)) => { |
338 | if input.chars().as_str().is_char_boundary(offset) { |
339 | Ok((input.advance(offset), ())) |
340 | } else { |
341 | Err(LexError { line: input.line }) |
342 | } |
343 | } |
344 | None => Ok((input.advance(input.len()), ())), |
345 | } |
346 | } else { |
347 | Err(LexError { line: input.line }) |
348 | } |
349 | } |
350 | |
351 | pub fn cooked_char(input: Cursor) -> PResult<()> { |
352 | let mut chars: CharIndices<'_> = input.char_indices(); |
353 | let ok: bool = match chars.next().map(|(_, ch: char)| ch) { |
354 | Some(' \\' ) => match chars.next().map(|(_, ch: char)| ch) { |
355 | Some('x' ) => backslash_x_char(&mut chars), |
356 | Some('u' ) => backslash_u(&mut chars), |
357 | Some('n' ) | Some('r' ) | Some('t' ) | Some(' \\' ) | Some('0' ) | Some(' \'' ) | Some('"' ) => { |
358 | true |
359 | } |
360 | _ => false, |
361 | }, |
362 | ch: Option => ch.is_some(), |
363 | }; |
364 | if ok { |
365 | match chars.next() { |
366 | Some((idx: usize, _)) => Ok((input.advance(amt:idx), ())), |
367 | None => Ok((input.advance(amt:input.len()), ())), |
368 | } |
369 | } else { |
370 | Err(LexError { line: input.line }) |
371 | } |
372 | } |
373 | |
374 | macro_rules! next_ch { |
375 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
376 | match $chars.next() { |
377 | Some((_, ch)) => match ch { |
378 | $pat $(| $rest)* => ch, |
379 | _ => return false, |
380 | }, |
381 | None => return false |
382 | } |
383 | }; |
384 | } |
385 | |
386 | fn backslash_x_char<I>(chars: &mut I) -> bool |
387 | where |
388 | I: Iterator<Item = (usize, char)>, |
389 | { |
390 | next_ch!(chars @ '0' ..='7' ); |
391 | next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' ); |
392 | true |
393 | } |
394 | |
395 | fn backslash_x_byte<I>(chars: &mut I) -> bool |
396 | where |
397 | I: Iterator<Item = (usize, u8)>, |
398 | { |
399 | next_ch!(chars @ b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' ); |
400 | next_ch!(chars @ b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' ); |
401 | true |
402 | } |
403 | |
404 | fn backslash_u<I>(chars: &mut I) -> bool |
405 | where |
406 | I: Iterator<Item = (usize, char)>, |
407 | { |
408 | next_ch!(chars @ '{' ); |
409 | next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' ); |
410 | loop { |
411 | let c: char = next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' | '_' | '}' ); |
412 | if c == '}' { |
413 | return true; |
414 | } |
415 | } |
416 | } |
417 | |