1use crate::fallback::{
2 is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3 TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(Copy, Clone, Eq, PartialEq)]
10pub(crate) struct Cursor<'a> {
11 pub rest: &'a str,
12 #[cfg(span_locations)]
13 pub off: u32,
14}
15
16impl<'a> Cursor<'a> {
17 pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18 let (_front, rest) = self.rest.split_at(bytes);
19 Cursor {
20 rest,
21 #[cfg(span_locations)]
22 off: self.off + _front.chars().count() as u32,
23 }
24 }
25
26 pub fn starts_with(&self, s: &str) -> bool {
27 self.rest.starts_with(s)
28 }
29
30 pub fn starts_with_char(&self, ch: char) -> bool {
31 self.rest.starts_with(ch)
32 }
33
34 pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35 where
36 Pattern: FnMut(char) -> bool,
37 {
38 self.rest.starts_with(f)
39 }
40
41 pub fn is_empty(&self) -> bool {
42 self.rest.is_empty()
43 }
44
45 fn len(&self) -> usize {
46 self.rest.len()
47 }
48
49 fn as_bytes(&self) -> &'a [u8] {
50 self.rest.as_bytes()
51 }
52
53 fn bytes(&self) -> Bytes<'a> {
54 self.rest.bytes()
55 }
56
57 fn chars(&self) -> Chars<'a> {
58 self.rest.chars()
59 }
60
61 fn char_indices(&self) -> CharIndices<'a> {
62 self.rest.char_indices()
63 }
64
65 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66 if self.starts_with(tag) {
67 Ok(self.advance(tag.len()))
68 } else {
69 Err(Reject)
70 }
71 }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78 let mut s = input;
79
80 while !s.is_empty() {
81 let byte = s.as_bytes()[0];
82 if byte == b'/' {
83 if s.starts_with("//")
84 && (!s.starts_with("///") || s.starts_with("////"))
85 && !s.starts_with("//!")
86 {
87 let (cursor, _) = take_until_newline_or_eof(s);
88 s = cursor;
89 continue;
90 } else if s.starts_with("/**/") {
91 s = s.advance(4);
92 continue;
93 } else if s.starts_with("/*")
94 && (!s.starts_with("/**") || s.starts_with("/***"))
95 && !s.starts_with("/*!")
96 {
97 match block_comment(s) {
98 Ok((rest, _)) => {
99 s = rest;
100 continue;
101 }
102 Err(Reject) => return s,
103 }
104 }
105 }
106 match byte {
107 b' ' | 0x09..=0x0c => {
108 s = s.advance(1);
109 continue;
110 }
111 b'\r' if s.as_bytes().get(1) == Some(&b'\n') => {
112 s = s.advance(2);
113 continue;
114 }
115 b if b <= 0x7f => {}
116 _ => {
117 let ch = s.chars().next().unwrap();
118 if is_whitespace(ch) {
119 s = s.advance(ch.len_utf8());
120 continue;
121 }
122 }
123 }
124 return s;
125 }
126 s
127}
128
129fn block_comment(input: Cursor) -> PResult<&str> {
130 if !input.starts_with("/*") {
131 return Err(Reject);
132 }
133
134 let mut depth = 0usize;
135 let bytes = input.as_bytes();
136 let mut i = 0usize;
137 let upper = bytes.len() - 1;
138
139 while i < upper {
140 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
141 depth += 1;
142 i += 1; // eat '*'
143 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
144 depth -= 1;
145 if depth == 0 {
146 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
147 }
148 i += 1; // eat '/'
149 }
150 i += 1;
151 }
152
153 Err(Reject)
154}
155
156fn is_whitespace(ch: char) -> bool {
157 // Rust treats left-to-right mark and right-to-left mark as whitespace
158 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
159}
160
161fn word_break(input: Cursor) -> Result<Cursor, Reject> {
162 match input.chars().next() {
163 Some(ch: char) if is_ident_continue(ch) => Err(Reject),
164 Some(_) | None => Ok(input),
165 }
166}
167
168pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169 let mut trees = TokenStreamBuilder::new();
170 let mut stack = Vec::new();
171
172 loop {
173 input = skip_whitespace(input);
174
175 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176 input = rest;
177 continue;
178 }
179
180 #[cfg(span_locations)]
181 let lo = input.off;
182
183 let first = match input.bytes().next() {
184 Some(first) => first,
185 None => match stack.last() {
186 None => return Ok(trees.build()),
187 #[cfg(span_locations)]
188 Some((lo, _frame)) => {
189 return Err(LexError {
190 span: Span { lo: *lo, hi: *lo },
191 })
192 }
193 #[cfg(not(span_locations))]
194 Some(_frame) => return Err(LexError { span: Span {} }),
195 },
196 };
197
198 if let Some(open_delimiter) = match first {
199 b'(' => Some(Delimiter::Parenthesis),
200 b'[' => Some(Delimiter::Bracket),
201 b'{' => Some(Delimiter::Brace),
202 _ => None,
203 } {
204 input = input.advance(1);
205 let frame = (open_delimiter, trees);
206 #[cfg(span_locations)]
207 let frame = (lo, frame);
208 stack.push(frame);
209 trees = TokenStreamBuilder::new();
210 } else if let Some(close_delimiter) = match first {
211 b')' => Some(Delimiter::Parenthesis),
212 b']' => Some(Delimiter::Bracket),
213 b'}' => Some(Delimiter::Brace),
214 _ => None,
215 } {
216 let frame = match stack.pop() {
217 Some(frame) => frame,
218 None => return Err(lex_error(input)),
219 };
220 #[cfg(span_locations)]
221 let (lo, frame) = frame;
222 let (open_delimiter, outer) = frame;
223 if open_delimiter != close_delimiter {
224 return Err(lex_error(input));
225 }
226 input = input.advance(1);
227 let mut g = Group::new(open_delimiter, trees.build());
228 g.set_span(Span {
229 #[cfg(span_locations)]
230 lo,
231 #[cfg(span_locations)]
232 hi: input.off,
233 });
234 trees = outer;
235 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236 } else {
237 let (rest, mut tt) = match leaf_token(input) {
238 Ok((rest, tt)) => (rest, tt),
239 Err(Reject) => return Err(lex_error(input)),
240 };
241 tt.set_span(crate::Span::_new_fallback(Span {
242 #[cfg(span_locations)]
243 lo,
244 #[cfg(span_locations)]
245 hi: rest.off,
246 }));
247 trees.push_token_from_parser(tt);
248 input = rest;
249 }
250 }
251}
252
253fn lex_error(cursor: Cursor) -> LexError {
254 #[cfg(not(span_locations))]
255 let _ = cursor;
256 LexError {
257 span: Span {
258 #[cfg(span_locations)]
259 lo: cursor.off,
260 #[cfg(span_locations)]
261 hi: cursor.off,
262 },
263 }
264}
265
266fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267 if let Ok((input: Cursor<'_>, l: Literal)) = literal(input) {
268 // must be parsed before ident
269 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(inner:l))))
270 } else if let Ok((input: Cursor<'_>, p: Punct)) = punct(input) {
271 Ok((input, TokenTree::Punct(p)))
272 } else if let Ok((input: Cursor<'_>, i: Ident)) = ident(input) {
273 Ok((input, TokenTree::Ident(i)))
274 } else {
275 Err(Reject)
276 }
277}
278
279fn ident(input: Cursor) -> PResult<crate::Ident> {
280 if [
281 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
282 ]
283 .iter()
284 .any(|prefix: &&str| input.starts_with(prefix))
285 {
286 Err(Reject)
287 } else {
288 ident_any(input)
289 }
290}
291
292fn ident_any(input: Cursor) -> PResult<crate::Ident> {
293 let raw: bool = input.starts_with("r#");
294 let rest: Cursor<'_> = input.advance((raw as usize) << 1);
295
296 let (rest: Cursor<'_>, sym: &str) = ident_not_raw(input:rest)?;
297
298 if !raw {
299 let ident: Ident = crate::Ident::new(string:sym, crate::Span::call_site());
300 return Ok((rest, ident));
301 }
302
303 match sym {
304 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
305 _ => {}
306 }
307
308 let ident: Ident = crate::Ident::_new_raw(string:sym, crate::Span::call_site());
309 Ok((rest, ident))
310}
311
312fn ident_not_raw(input: Cursor) -> PResult<&str> {
313 let mut chars: CharIndices<'_> = input.char_indices();
314
315 match chars.next() {
316 Some((_, ch: char)) if is_ident_start(ch) => {}
317 _ => return Err(Reject),
318 }
319
320 let mut end: usize = input.len();
321 for (i: usize, ch: char) in chars {
322 if !is_ident_continue(ch) {
323 end = i;
324 break;
325 }
326 }
327
328 Ok((input.advance(bytes:end), &input.rest[..end]))
329}
330
331pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
332 let rest: Cursor<'_> = literal_nocapture(input)?;
333 let end: usize = input.len() - rest.len();
334 Ok((rest, Literal::_new(repr:input.rest[..end].to_string())))
335}
336
337fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
338 if let Ok(ok: Cursor<'_>) = string(input) {
339 Ok(ok)
340 } else if let Ok(ok: Cursor<'_>) = byte_string(input) {
341 Ok(ok)
342 } else if let Ok(ok: Cursor<'_>) = c_string(input) {
343 Ok(ok)
344 } else if let Ok(ok: Cursor<'_>) = byte(input) {
345 Ok(ok)
346 } else if let Ok(ok: Cursor<'_>) = character(input) {
347 Ok(ok)
348 } else if let Ok(ok: Cursor<'_>) = float(input) {
349 Ok(ok)
350 } else if let Ok(ok: Cursor<'_>) = int(input) {
351 Ok(ok)
352 } else {
353 Err(Reject)
354 }
355}
356
357fn literal_suffix(input: Cursor) -> Cursor {
358 match ident_not_raw(input) {
359 Ok((input: Cursor<'_>, _)) => input,
360 Err(Reject) => input,
361 }
362}
363
364fn string(input: Cursor) -> Result<Cursor, Reject> {
365 if let Ok(input: Cursor<'_>) = input.parse(tag:"\"") {
366 cooked_string(input)
367 } else if let Ok(input: Cursor<'_>) = input.parse(tag:"r") {
368 raw_string(input)
369 } else {
370 Err(Reject)
371 }
372}
373
374fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
375 let mut chars = input.char_indices();
376
377 while let Some((i, ch)) = chars.next() {
378 match ch {
379 '"' => {
380 let input = input.advance(i + 1);
381 return Ok(literal_suffix(input));
382 }
383 '\r' => match chars.next() {
384 Some((_, '\n')) => {}
385 _ => break,
386 },
387 '\\' => match chars.next() {
388 Some((_, 'x')) => {
389 backslash_x_char(&mut chars)?;
390 }
391 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
392 | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
393 Some((_, 'u')) => {
394 backslash_u(&mut chars)?;
395 }
396 Some((newline, ch @ '\n')) | Some((newline, ch @ '\r')) => {
397 input = input.advance(newline + 1);
398 trailing_backslash(&mut input, ch as u8)?;
399 chars = input.char_indices();
400 }
401 _ => break,
402 },
403 _ch => {}
404 }
405 }
406 Err(Reject)
407}
408
409fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
410 let (input: Cursor<'_>, delimiter: &str) = delimiter_of_raw_string(input)?;
411 let mut bytes: impl Iterator = input.bytes().enumerate();
412 while let Some((i: usize, byte: u8)) = bytes.next() {
413 match byte {
414 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
415 let rest: Cursor<'_> = input.advance(bytes:i + 1 + delimiter.len());
416 return Ok(literal_suffix(input:rest));
417 }
418 b'\r' => match bytes.next() {
419 Some((_, b'\n')) => {}
420 _ => break,
421 },
422 _ => {}
423 }
424 }
425 Err(Reject)
426}
427
428fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
429 if let Ok(input: Cursor<'_>) = input.parse(tag:"b\"") {
430 cooked_byte_string(input)
431 } else if let Ok(input: Cursor<'_>) = input.parse(tag:"br") {
432 raw_byte_string(input)
433 } else {
434 Err(Reject)
435 }
436}
437
438fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
439 let mut bytes = input.bytes().enumerate();
440 while let Some((offset, b)) = bytes.next() {
441 match b {
442 b'"' => {
443 let input = input.advance(offset + 1);
444 return Ok(literal_suffix(input));
445 }
446 b'\r' => match bytes.next() {
447 Some((_, b'\n')) => {}
448 _ => break,
449 },
450 b'\\' => match bytes.next() {
451 Some((_, b'x')) => {
452 backslash_x_byte(&mut bytes)?;
453 }
454 Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\'))
455 | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {}
456 Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => {
457 input = input.advance(newline + 1);
458 trailing_backslash(&mut input, b)?;
459 bytes = input.bytes().enumerate();
460 }
461 _ => break,
462 },
463 b if b < 0x80 => {}
464 _ => break,
465 }
466 }
467 Err(Reject)
468}
469
470fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
471 for (i: usize, byte: u8) in input.bytes().enumerate() {
472 match byte {
473 b'"' => {
474 if i > 255 {
475 // https://github.com/rust-lang/rust/pull/95251
476 return Err(Reject);
477 }
478 return Ok((input.advance(bytes:i + 1), &input.rest[..i]));
479 }
480 b'#' => {}
481 _ => break,
482 }
483 }
484 Err(Reject)
485}
486
487fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
488 let (input: Cursor<'_>, delimiter: &str) = delimiter_of_raw_string(input)?;
489 let mut bytes: impl Iterator = input.bytes().enumerate();
490 while let Some((i: usize, byte: u8)) = bytes.next() {
491 match byte {
492 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
493 let rest: Cursor<'_> = input.advance(bytes:i + 1 + delimiter.len());
494 return Ok(literal_suffix(input:rest));
495 }
496 b'\r' => match bytes.next() {
497 Some((_, b'\n')) => {}
498 _ => break,
499 },
500 other: u8 => {
501 if !other.is_ascii() {
502 break;
503 }
504 }
505 }
506 }
507 Err(Reject)
508}
509
510fn c_string(input: Cursor) -> Result<Cursor, Reject> {
511 if let Ok(input: Cursor<'_>) = input.parse(tag:"c\"") {
512 cooked_c_string(input)
513 } else if let Ok(input: Cursor<'_>) = input.parse(tag:"cr") {
514 raw_c_string(input)
515 } else {
516 Err(Reject)
517 }
518}
519
520fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
521 let (input: Cursor<'_>, delimiter: &str) = delimiter_of_raw_string(input)?;
522 let mut bytes: impl Iterator = input.bytes().enumerate();
523 while let Some((i: usize, byte: u8)) = bytes.next() {
524 match byte {
525 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
526 let rest: Cursor<'_> = input.advance(bytes:i + 1 + delimiter.len());
527 return Ok(literal_suffix(input:rest));
528 }
529 b'\r' => match bytes.next() {
530 Some((_, b'\n')) => {}
531 _ => break,
532 },
533 b'\0' => break,
534 _ => {}
535 }
536 }
537 Err(Reject)
538}
539
540fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
541 let mut chars = input.char_indices();
542
543 while let Some((i, ch)) = chars.next() {
544 match ch {
545 '"' => {
546 let input = input.advance(i + 1);
547 return Ok(literal_suffix(input));
548 }
549 '\r' => match chars.next() {
550 Some((_, '\n')) => {}
551 _ => break,
552 },
553 '\\' => match chars.next() {
554 Some((_, 'x')) => {
555 backslash_x_nonzero(&mut chars)?;
556 }
557 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
558 | Some((_, '\'')) | Some((_, '"')) => {}
559 Some((_, 'u')) => {
560 if backslash_u(&mut chars)? == '\0' {
561 break;
562 }
563 }
564 Some((newline, ch @ '\n')) | Some((newline, ch @ '\r')) => {
565 input = input.advance(newline + 1);
566 trailing_backslash(&mut input, ch as u8)?;
567 chars = input.char_indices();
568 }
569 _ => break,
570 },
571 '\0' => break,
572 _ch => {}
573 }
574 }
575 Err(Reject)
576}
577
578fn byte(input: Cursor) -> Result<Cursor, Reject> {
579 let input: Cursor<'_> = input.parse(tag:"b'")?;
580 let mut bytes: impl Iterator = input.bytes().enumerate();
581 let ok: bool = match bytes.next().map(|(_, b: u8)| b) {
582 Some(b'\\') => match bytes.next().map(|(_, b: u8)| b) {
583 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
584 Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'')
585 | Some(b'"') => true,
586 _ => false,
587 },
588 b: Option => b.is_some(),
589 };
590 if !ok {
591 return Err(Reject);
592 }
593 let (offset: usize, _) = bytes.next().ok_or(err:Reject)?;
594 if !input.chars().as_str().is_char_boundary(index:offset) {
595 return Err(Reject);
596 }
597 let input: Cursor<'_> = input.advance(offset).parse(tag:"'")?;
598 Ok(literal_suffix(input))
599}
600
601fn character(input: Cursor) -> Result<Cursor, Reject> {
602 let input: Cursor<'_> = input.parse(tag:"'")?;
603 let mut chars: CharIndices<'_> = input.char_indices();
604 let ok: bool = match chars.next().map(|(_, ch: char)| ch) {
605 Some('\\') => match chars.next().map(|(_, ch: char)| ch) {
606 Some('x') => backslash_x_char(&mut chars).is_ok(),
607 Some('u') => backslash_u(&mut chars).is_ok(),
608 Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
609 true
610 }
611 _ => false,
612 },
613 ch: Option => ch.is_some(),
614 };
615 if !ok {
616 return Err(Reject);
617 }
618 let (idx: usize, _) = chars.next().ok_or(err:Reject)?;
619 let input: Cursor<'_> = input.advance(idx).parse(tag:"'")?;
620 Ok(literal_suffix(input))
621}
622
623macro_rules! next_ch {
624 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
625 match $chars.next() {
626 Some((_, ch)) => match ch {
627 $pat $(| $rest)* => ch,
628 _ => return Err(Reject),
629 },
630 None => return Err(Reject),
631 }
632 };
633}
634
635fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
636where
637 I: Iterator<Item = (usize, char)>,
638{
639 next_ch!(chars @ '0'..='7');
640 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
641 Ok(())
642}
643
644fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
645where
646 I: Iterator<Item = (usize, u8)>,
647{
648 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
649 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
650 Ok(())
651}
652
653fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
654where
655 I: Iterator<Item = (usize, char)>,
656{
657 let first: char = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
658 let second: char = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
659 if first == '0' && second == '0' {
660 Err(Reject)
661 } else {
662 Ok(())
663 }
664}
665
666fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
667where
668 I: Iterator<Item = (usize, char)>,
669{
670 next_ch!(chars @ '{');
671 let mut value: u32 = 0;
672 let mut len: i32 = 0;
673 for (_, ch: char) in chars {
674 let digit: u8 = match ch {
675 '0'..='9' => ch as u8 - b'0',
676 'a'..='f' => 10 + ch as u8 - b'a',
677 'A'..='F' => 10 + ch as u8 - b'A',
678 '_' if len > 0 => continue,
679 '}' if len > 0 => return char::from_u32(value).ok_or(err:Reject),
680 _ => break,
681 };
682 if len == 6 {
683 break;
684 }
685 value *= 0x10;
686 value += u32::from(digit);
687 len += 1;
688 }
689 Err(Reject)
690}
691
692fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
693 let mut whitespace: impl Iterator = input.bytes().enumerate();
694 loop {
695 if last == b'\r' && whitespace.next().map_or(default:true, |(_, b: u8)| b != b'\n') {
696 return Err(Reject);
697 }
698 match whitespace.next() {
699 Some((_, b: u8 @ b' ')) | Some((_, b: u8 @ b'\t')) | Some((_, b: u8 @ b'\n'))
700 | Some((_, b: u8 @ b'\r')) => {
701 last = b;
702 }
703 Some((offset: usize, _)) => {
704 *input = input.advance(bytes:offset);
705 return Ok(());
706 }
707 None => return Err(Reject),
708 }
709 }
710}
711
712fn float(input: Cursor) -> Result<Cursor, Reject> {
713 let mut rest: Cursor<'_> = float_digits(input)?;
714 if let Some(ch: char) = rest.chars().next() {
715 if is_ident_start(ch) {
716 rest = ident_not_raw(input:rest)?.0;
717 }
718 }
719 word_break(input:rest)
720}
721
722fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
723 let mut chars = input.chars().peekable();
724 match chars.next() {
725 Some(ch) if ch >= '0' && ch <= '9' => {}
726 _ => return Err(Reject),
727 }
728
729 let mut len = 1;
730 let mut has_dot = false;
731 let mut has_exp = false;
732 while let Some(&ch) = chars.peek() {
733 match ch {
734 '0'..='9' | '_' => {
735 chars.next();
736 len += 1;
737 }
738 '.' => {
739 if has_dot {
740 break;
741 }
742 chars.next();
743 if chars
744 .peek()
745 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
746 {
747 return Err(Reject);
748 }
749 len += 1;
750 has_dot = true;
751 }
752 'e' | 'E' => {
753 chars.next();
754 len += 1;
755 has_exp = true;
756 break;
757 }
758 _ => break,
759 }
760 }
761
762 if !(has_dot || has_exp) {
763 return Err(Reject);
764 }
765
766 if has_exp {
767 let token_before_exp = if has_dot {
768 Ok(input.advance(len - 1))
769 } else {
770 Err(Reject)
771 };
772 let mut has_sign = false;
773 let mut has_exp_value = false;
774 while let Some(&ch) = chars.peek() {
775 match ch {
776 '+' | '-' => {
777 if has_exp_value {
778 break;
779 }
780 if has_sign {
781 return token_before_exp;
782 }
783 chars.next();
784 len += 1;
785 has_sign = true;
786 }
787 '0'..='9' => {
788 chars.next();
789 len += 1;
790 has_exp_value = true;
791 }
792 '_' => {
793 chars.next();
794 len += 1;
795 }
796 _ => break,
797 }
798 }
799 if !has_exp_value {
800 return token_before_exp;
801 }
802 }
803
804 Ok(input.advance(len))
805}
806
807fn int(input: Cursor) -> Result<Cursor, Reject> {
808 let mut rest: Cursor<'_> = digits(input)?;
809 if let Some(ch: char) = rest.chars().next() {
810 if is_ident_start(ch) {
811 rest = ident_not_raw(input:rest)?.0;
812 }
813 }
814 word_break(input:rest)
815}
816
817fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
818 let base = if input.starts_with("0x") {
819 input = input.advance(2);
820 16
821 } else if input.starts_with("0o") {
822 input = input.advance(2);
823 8
824 } else if input.starts_with("0b") {
825 input = input.advance(2);
826 2
827 } else {
828 10
829 };
830
831 let mut len = 0;
832 let mut empty = true;
833 for b in input.bytes() {
834 match b {
835 b'0'..=b'9' => {
836 let digit = (b - b'0') as u64;
837 if digit >= base {
838 return Err(Reject);
839 }
840 }
841 b'a'..=b'f' => {
842 let digit = 10 + (b - b'a') as u64;
843 if digit >= base {
844 break;
845 }
846 }
847 b'A'..=b'F' => {
848 let digit = 10 + (b - b'A') as u64;
849 if digit >= base {
850 break;
851 }
852 }
853 b'_' => {
854 if empty && base == 10 {
855 return Err(Reject);
856 }
857 len += 1;
858 continue;
859 }
860 _ => break,
861 };
862 len += 1;
863 empty = false;
864 }
865 if empty {
866 Err(Reject)
867 } else {
868 Ok(input.advance(len))
869 }
870}
871
872fn punct(input: Cursor) -> PResult<Punct> {
873 let (rest: Cursor<'_>, ch: char) = punct_char(input)?;
874 if ch == '\'' {
875 if ident_any(rest)?.0.starts_with_char(ch:'\'') {
876 Err(Reject)
877 } else {
878 Ok((rest, Punct::new(ch:'\'', Spacing::Joint)))
879 }
880 } else {
881 let kind: Spacing = match punct_char(input:rest) {
882 Ok(_) => Spacing::Joint,
883 Err(Reject) => Spacing::Alone,
884 };
885 Ok((rest, Punct::new(ch, spacing:kind)))
886 }
887}
888
889fn punct_char(input: Cursor) -> PResult<char> {
890 if input.starts_with("//") || input.starts_with("/*") {
891 // Do not accept `/` of a comment as a punct.
892 return Err(Reject);
893 }
894
895 let mut chars: Chars<'_> = input.chars();
896 let first: char = match chars.next() {
897 Some(ch: char) => ch,
898 None => {
899 return Err(Reject);
900 }
901 };
902 let recognized: &str = "~!@#$%^&*-=+|;:,<.>/?'";
903 if recognized.contains(first) {
904 Ok((input.advance(bytes:first.len_utf8()), first))
905 } else {
906 Err(Reject)
907 }
908}
909
910fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
911 #[cfg(span_locations)]
912 let lo = input.off;
913 let (rest, (comment, inner)) = doc_comment_contents(input)?;
914 let span = crate::Span::_new_fallback(Span {
915 #[cfg(span_locations)]
916 lo,
917 #[cfg(span_locations)]
918 hi: rest.off,
919 });
920
921 let mut scan_for_bare_cr = comment;
922 while let Some(cr) = scan_for_bare_cr.find('\r') {
923 let rest = &scan_for_bare_cr[cr + 1..];
924 if !rest.starts_with('\n') {
925 return Err(Reject);
926 }
927 scan_for_bare_cr = rest;
928 }
929
930 let mut pound = Punct::new('#', Spacing::Alone);
931 pound.set_span(span);
932 trees.push_token_from_parser(TokenTree::Punct(pound));
933
934 if inner {
935 let mut bang = Punct::new('!', Spacing::Alone);
936 bang.set_span(span);
937 trees.push_token_from_parser(TokenTree::Punct(bang));
938 }
939
940 let doc_ident = crate::Ident::new("doc", span);
941 let mut equal = Punct::new('=', Spacing::Alone);
942 equal.set_span(span);
943 let mut literal = crate::Literal::string(comment);
944 literal.set_span(span);
945 let mut bracketed = TokenStreamBuilder::with_capacity(3);
946 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
947 bracketed.push_token_from_parser(TokenTree::Punct(equal));
948 bracketed.push_token_from_parser(TokenTree::Literal(literal));
949 let group = Group::new(Delimiter::Bracket, bracketed.build());
950 let mut group = crate::Group::_new_fallback(group);
951 group.set_span(span);
952 trees.push_token_from_parser(TokenTree::Group(group));
953
954 Ok((rest, ()))
955}
956
957fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
958 if input.starts_with("//!") {
959 let input: Cursor<'_> = input.advance(bytes:3);
960 let (input: Cursor<'_>, s: &str) = take_until_newline_or_eof(input);
961 Ok((input, (s, true)))
962 } else if input.starts_with("/*!") {
963 let (input: Cursor<'_>, s: &str) = block_comment(input)?;
964 Ok((input, (&s[3..s.len() - 2], true)))
965 } else if input.starts_with("///") {
966 let input: Cursor<'_> = input.advance(bytes:3);
967 if input.starts_with_char(ch:'/') {
968 return Err(Reject);
969 }
970 let (input: Cursor<'_>, s: &str) = take_until_newline_or_eof(input);
971 Ok((input, (s, false)))
972 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
973 let (input: Cursor<'_>, s: &str) = block_comment(input)?;
974 Ok((input, (&s[3..s.len() - 2], false)))
975 } else {
976 Err(Reject)
977 }
978}
979
980fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
981 let chars: CharIndices<'_> = input.char_indices();
982
983 for (i: usize, ch: char) in chars {
984 if ch == '\n' {
985 return (input.advance(bytes:i), &input.rest[..i]);
986 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
987 return (input.advance(bytes:i + 1), &input.rest[..i]);
988 }
989 }
990
991 (input.advance(bytes:input.len()), input.rest)
992}
993