1//! Fork of the equivalent file from the proc-macro2 file.
2//! Modified to support line number counting in Cursor.
3//! Also contains some function from stable.rs of proc_macro2.
4
5#![allow(dead_code)] // Why is this needed ?
6
7use std::str::{Bytes, CharIndices, Chars};
8
9use unicode_xid::UnicodeXID;
10
11#[derive(Debug)]
12pub struct LexError {
13 pub line: u32,
14}
15
16#[derive(Copy, Clone, Eq, PartialEq)]
17pub struct Cursor<'a> {
18 pub rest: &'a str,
19 pub off: u32,
20 pub line: u32,
21 pub column: u32,
22}
23
24impl<'a> Cursor<'a> {
25 #[allow(clippy::suspicious_map)]
26 pub fn advance(&self, amt: usize) -> Cursor<'a> {
27 let mut column_start: Option<usize> = None;
28 Cursor {
29 rest: &self.rest[amt..],
30 off: self.off + (amt as u32),
31 line: self.line
32 + self.rest[..amt]
33 .char_indices()
34 .filter(|(_, x)| *x == '\n')
35 .map(|(i, _)| {
36 column_start = Some(i);
37 })
38 .count() as u32,
39 column: match column_start {
40 None => self.column + (amt as u32),
41 Some(i) => (amt - i) as u32 - 1,
42 },
43 }
44 }
45
46 pub fn find(&self, p: char) -> Option<usize> {
47 self.rest.find(p)
48 }
49
50 pub fn starts_with(&self, s: &str) -> bool {
51 self.rest.starts_with(s)
52 }
53
54 pub fn is_empty(&self) -> bool {
55 self.rest.is_empty()
56 }
57
58 pub fn len(&self) -> usize {
59 self.rest.len()
60 }
61
62 pub fn as_bytes(&self) -> &'a [u8] {
63 self.rest.as_bytes()
64 }
65
66 pub fn bytes(&self) -> Bytes<'a> {
67 self.rest.bytes()
68 }
69
70 pub fn chars(&self) -> Chars<'a> {
71 self.rest.chars()
72 }
73
74 pub fn char_indices(&self) -> CharIndices<'a> {
75 self.rest.char_indices()
76 }
77}
78
79pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
80
81pub fn whitespace(input: Cursor) -> PResult<()> {
82 if input.is_empty() {
83 return Err(LexError { line: input.line });
84 }
85
86 let bytes = input.as_bytes();
87 let mut i = 0;
88 while i < bytes.len() {
89 let s = input.advance(i);
90 if bytes[i] == b'/' {
91 if s.starts_with("//")
92 // && (!s.starts_with("///") || s.starts_with("////"))
93 // && !s.starts_with("//!")
94 {
95 if let Some(len) = s.find('\n') {
96 i += len + 1;
97 continue;
98 }
99 break;
100 } else if s.starts_with("/**/") {
101 i += 4;
102 continue;
103 } else if s.starts_with("/*")
104 // && (!s.starts_with("/**") || s.starts_with("/***"))
105 // && !s.starts_with("/*!")
106 {
107 let (_, com) = block_comment(s)?;
108 i += com.len();
109 continue;
110 }
111 }
112 match bytes[i] {
113 b' ' | 0x09..=0x0d => {
114 i += 1;
115 continue;
116 }
117 b if b <= 0x7f => {}
118 _ => {
119 let ch = s.chars().next().unwrap();
120 if is_whitespace(ch) {
121 i += ch.len_utf8();
122 continue;
123 }
124 }
125 }
126 return if i > 0 { Ok((s, ())) } else { Err(LexError { line: s.line }) };
127 }
128 Ok((input.advance(input.len()), ()))
129}
130
131pub fn block_comment(input: Cursor) -> PResult<&str> {
132 if !input.starts_with("/*") {
133 return Err(LexError { line: input.line });
134 }
135
136 let mut depth: i32 = 0;
137 let bytes: &[u8] = input.as_bytes();
138 let mut i: usize = 0;
139 let upper: usize = bytes.len() - 1;
140 while i < upper {
141 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
142 depth += 1;
143 i += 1; // eat '*'
144 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
145 depth -= 1;
146 if depth == 0 {
147 return Ok((input.advance(amt:i + 2), &input.rest[..i + 2]));
148 }
149 i += 1; // eat '/'
150 }
151 i += 1;
152 }
153 Err(LexError { line: input.line })
154}
155
156pub fn skip_whitespace(input: Cursor) -> Cursor {
157 match whitespace(input) {
158 Ok((rest: Cursor<'_>, _)) => rest,
159 Err(_) => input,
160 }
161}
162
163fn is_whitespace(ch: char) -> bool {
164 // Rust treats left-to-right mark and right-to-left mark as whitespace
165 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
166}
167
168// --- functions from stable.rs
169
170#[inline]
171fn is_ident_start(c: char) -> bool {
172 c.is_ascii_alphabetic() || c == '_' || (c > '\x7f' && UnicodeXID::is_xid_start(self:c))
173}
174
175#[inline]
176fn is_ident_continue(c: char) -> bool {
177 c.is_ascii_alphanumeric() || c == '_' || (c > '\x7f' && UnicodeXID::is_xid_continue(self:c))
178}
179
180pub fn symbol(input: Cursor) -> PResult<&str> {
181 let mut chars = input.char_indices();
182
183 let raw = input.starts_with("r#");
184 if raw {
185 chars.next();
186 chars.next();
187 }
188
189 match chars.next() {
190 Some((_, ch)) if is_ident_start(ch) => {}
191 _ => return Err(LexError { line: input.line }),
192 }
193
194 let mut end = input.len();
195 for (i, ch) in chars {
196 if !is_ident_continue(ch) {
197 end = i;
198 break;
199 }
200 }
201
202 let a = &input.rest[..end];
203 if a == "r#_" {
204 Err(LexError { line: input.line })
205 } else {
206 let ident = if raw { &a[2..] } else { a };
207 Ok((input.advance(end), ident))
208 }
209}
210
211pub fn cooked_string(input: Cursor) -> PResult<()> {
212 let mut chars = input.char_indices().peekable();
213 while let Some((byte_offset, ch)) = chars.next() {
214 match ch {
215 '"' => {
216 return Ok((input.advance(byte_offset), ()));
217 }
218 '\r' => {
219 if let Some((_, '\n')) = chars.next() {
220 // ...
221 } else {
222 break;
223 }
224 }
225 '\\' => match chars.next() {
226 Some((_, 'x')) => {
227 if !backslash_x_char(&mut chars) {
228 break;
229 }
230 }
231 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
232 | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
233 Some((_, 'u')) => {
234 if !backslash_u(&mut chars) {
235 break;
236 }
237 }
238 Some((_, '\n')) | Some((_, '\r')) => {
239 while let Some(&(_, ch)) = chars.peek() {
240 if ch.is_whitespace() {
241 chars.next();
242 } else {
243 break;
244 }
245 }
246 }
247 _ => break,
248 },
249 _ch => {}
250 }
251 }
252 Err(LexError { line: input.line })
253}
254
255pub fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
256 let mut bytes = input.bytes().enumerate();
257 'outer: while let Some((offset, b)) = bytes.next() {
258 match b {
259 b'"' => {
260 return Ok((input.advance(offset), ()));
261 }
262 b'\r' => {
263 if let Some((_, b'\n')) = bytes.next() {
264 // ...
265 } else {
266 break;
267 }
268 }
269 b'\\' => match bytes.next() {
270 Some((_, b'x')) => {
271 if !backslash_x_byte(&mut bytes) {
272 break;
273 }
274 }
275 Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\'))
276 | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {}
277 Some((newline, b'\n')) | Some((newline, b'\r')) => {
278 let rest = input.advance(newline + 1);
279 for (offset, ch) in rest.char_indices() {
280 if !ch.is_whitespace() {
281 input = rest.advance(offset);
282 bytes = input.bytes().enumerate();
283 continue 'outer;
284 }
285 }
286 break;
287 }
288 _ => break,
289 },
290 b if b < 0x80 => {}
291 _ => break,
292 }
293 }
294 Err(LexError { line: input.line })
295}
296
297pub fn raw_string(input: Cursor) -> PResult<()> {
298 let mut chars = input.char_indices();
299 let mut n = 0;
300 #[allow(clippy::while_let_on_iterator)] //chars is used in the next loop
301 while let Some((byte_offset, ch)) = chars.next() {
302 match ch {
303 '"' => {
304 n = byte_offset;
305 break;
306 }
307 '#' => {}
308 _ => return Err(LexError { line: input.line }),
309 }
310 }
311 for (byte_offset, ch) in chars {
312 match ch {
313 '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
314 let rest = input.advance(byte_offset + 1 + n);
315 return Ok((rest, ()));
316 }
317 '\r' => {}
318 _ => {}
319 }
320 }
321 Err(LexError { line: input.line })
322}
323
324pub fn cooked_byte(input: Cursor) -> PResult<()> {
325 let mut bytes = input.bytes().enumerate();
326 let ok = match bytes.next().map(|(_, b)| b) {
327 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
328 Some(b'x') => backslash_x_byte(&mut bytes),
329 Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'')
330 | Some(b'"') => true,
331 _ => false,
332 },
333 b => b.is_some(),
334 };
335 if ok {
336 match bytes.next() {
337 Some((offset, _)) => {
338 if input.chars().as_str().is_char_boundary(offset) {
339 Ok((input.advance(offset), ()))
340 } else {
341 Err(LexError { line: input.line })
342 }
343 }
344 None => Ok((input.advance(input.len()), ())),
345 }
346 } else {
347 Err(LexError { line: input.line })
348 }
349}
350
351pub fn cooked_char(input: Cursor) -> PResult<()> {
352 let mut chars: CharIndices<'_> = input.char_indices();
353 let ok: bool = match chars.next().map(|(_, ch: char)| ch) {
354 Some('\\') => match chars.next().map(|(_, ch: char)| ch) {
355 Some('x') => backslash_x_char(&mut chars),
356 Some('u') => backslash_u(&mut chars),
357 Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
358 true
359 }
360 _ => false,
361 },
362 ch: Option => ch.is_some(),
363 };
364 if ok {
365 match chars.next() {
366 Some((idx: usize, _)) => Ok((input.advance(amt:idx), ())),
367 None => Ok((input.advance(amt:input.len()), ())),
368 }
369 } else {
370 Err(LexError { line: input.line })
371 }
372}
373
374macro_rules! next_ch {
375 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
376 match $chars.next() {
377 Some((_, ch)) => match ch {
378 $pat $(| $rest)* => ch,
379 _ => return false,
380 },
381 None => return false
382 }
383 };
384}
385
386fn backslash_x_char<I>(chars: &mut I) -> bool
387where
388 I: Iterator<Item = (usize, char)>,
389{
390 next_ch!(chars @ '0'..='7');
391 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
392 true
393}
394
395fn backslash_x_byte<I>(chars: &mut I) -> bool
396where
397 I: Iterator<Item = (usize, u8)>,
398{
399 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
400 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
401 true
402}
403
404fn backslash_u<I>(chars: &mut I) -> bool
405where
406 I: Iterator<Item = (usize, char)>,
407{
408 next_ch!(chars @ '{');
409 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
410 loop {
411 let c: char = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F' | '_' | '}');
412 if c == '}' {
413 return true;
414 }
415 }
416}
417