parse.rs - Codebrowser

1	use crate::fallback::{
2	self, is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3	TokenStreamBuilder,
4	};
5	use crate::{Delimiter, Punct, Spacing, TokenTree};
6	use core::char;
7	use core::str::{Bytes, CharIndices, Chars};
8
9	#[derive(Copy, Clone, Eq, PartialEq)]
10	pub(crate) struct Cursor<'a> {
11	pub rest: &'a str,
12	#[cfg(span_locations)]
13	pub off: u32,
14	}
15
16	impl<'a> Cursor<'a> {
17	pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18	let (_front, rest) = self.rest.split_at(bytes);
19	Cursor {
20	rest,
21	#[cfg(span_locations)]
22	off: self.off + _front.chars().count() as u32,
23	}
24	}
25
26	pub fn starts_with(&self, s: &str) -> bool {
27	self.rest.starts_with(s)
28	}
29
30	pub fn starts_with_char(&self, ch: char) -> bool {
31	self.rest.starts_with(ch)
32	}
33
34	pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35	where
36	Pattern: FnMut(char) -> bool,
37	{
38	self.rest.starts_with(f)
39	}
40
41	pub fn is_empty(&self) -> bool {
42	self.rest.is_empty()
43	}
44
45	fn len(&self) -> usize {
46	self.rest.len()
47	}
48
49	fn as_bytes(&self) -> &'a [u8] {
50	self.rest.as_bytes()
51	}
52
53	fn bytes(&self) -> Bytes<'a> {
54	self.rest.bytes()
55	}
56
57	fn chars(&self) -> Chars<'a> {
58	self.rest.chars()
59	}
60
61	fn char_indices(&self) -> CharIndices<'a> {
62	self.rest.char_indices()
63	}
64
65	fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66	if self.starts_with(tag) {
67	Ok(self.advance(tag.len()))
68	} else {
69	Err(Reject)
70	}
71	}
72	}
73
74	pub(crate) struct Reject;
75	type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77	fn skip_whitespace(input: Cursor) -> Cursor {
78	let mut s = input;
79
80	while !s.is_empty() {
81	let byte = s.as_bytes()[`0`];
82	if byte == b'/' {
83	if s.starts_with("//")
84	&& (!s.starts_with("///") \|\| s.starts_with("////"))
85	&& !s.starts_with("//!")
86	{
87	let (cursor, _) = take_until_newline_or_eof(s);
88	s = cursor;
89	continue;
90	} else if s.starts_with("/**/") {
91	s = s.advance(`4`);
92	continue;
93	} else if s.starts_with("/*")
94	&& (!s.starts_with("/") \|\| s.starts_with("/*"))
95	&& !s.starts_with("/*!")
96	{
97	match block_comment(s) {
98	Ok((rest, _)) => {
99	s = rest;
100	continue;
101	}
102	Err(Reject) => return s,
103	}
104	}
105	}
106	match byte {
107	b' ' \| `0x09`..=`0x0d` => {
108	s = s.advance(`1`);
109	continue;
110	}
111	b if b.is_ascii() => {}
112	_ => {
113	let ch = s.chars().next().unwrap();
114	if is_whitespace(ch) {
115	s = s.advance(ch.len_utf8());
116	continue;
117	}
118	}
119	}
120	return s;
121	}
122	s
123	}
124
125	fn block_comment(input: Cursor) -> PResult<&str> {
126	if !input.starts_with("/*") {
127	return Err(Reject);
128	}
129
130	let mut depth = `0usize`;
131	let bytes = input.as_bytes();
132	let mut i = `0usize`;
133	let upper = bytes.len() - `1`;
134
135	while i < upper {
136	if bytes[i] == b'/' && bytes[i + `1`] == b'*' {
137	depth += `1`;
138	i += `1`; // eat ''*
139	} else if bytes[i] == b'*' && bytes[i + `1`] == b'/' {
140	depth -= `1`;
141	if depth == `0` {
142	return Ok((input.advance(i + `2`), &input.rest[..i + `2`]));
143	}
144	i += `1`; // eat '/'
145	}
146	i += `1`;
147	}
148
149	Err(Reject)
150	}
151
152	fn is_whitespace(ch: char) -> bool {
153	// Rust treats left-to-right mark and right-to-left mark as whitespace
154	ch.is_whitespace() \|\| ch == '`\u{200e}`' \|\| ch == '`\u{200f}`'
155	}
156
157	fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158	match input.chars().next() {
159	Some(ch) if is_ident_continue(ch) => Err(Reject),
160	Some(_) \| None => Ok(input),
161	}
162	}
163
164	// Rustc's representation of a macro expansion error in expression position or
165	// type position.
166	const ERROR: &str = "(/ERROR/)";
167
168	pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169	let mut trees = TokenStreamBuilder::new();
170	let mut stack = Vec::new();
171
172	loop {
173	input = skip_whitespace(input);
174
175	if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176	input = rest;
177	continue;
178	}
179
180	#[cfg(span_locations)]
181	let lo = input.off;
182
183	let first = match input.bytes().next() {
184	Some(first) => first,
185	None => match stack.last() {
186	None => return Ok(trees.build()),
187	#[cfg(span_locations)]
188	Some((lo, _frame)) => {
189	return Err(LexError {
190	span: Span { lo: lo, hi: lo },
191	})
192	}
193	#[cfg(not(span_locations))]
194	Some(_frame) => return Err(LexError { span: Span {} }),
195	},
196	};
197
198	if let Some(open_delimiter) = match first {
199	b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200	b'[' => Some(Delimiter::Bracket),
201	b'{' => Some(Delimiter::Brace),
202	_ => None,
203	} {
204	input = input.advance(`1`);
205	let frame = (open_delimiter, trees);
206	#[cfg(span_locations)]
207	let frame = (lo, frame);
208	stack.push(frame);
209	trees = TokenStreamBuilder::new();
210	} else if let Some(close_delimiter) = match first {
211	b')' => Some(Delimiter::Parenthesis),
212	b']' => Some(Delimiter::Bracket),
213	b'}' => Some(Delimiter::Brace),
214	_ => None,
215	} {
216	let frame = match stack.pop() {
217	Some(frame) => frame,
218	None => return Err(lex_error(input)),
219	};
220	#[cfg(span_locations)]
221	let (lo, frame) = frame;
222	let (open_delimiter, outer) = frame;
223	if open_delimiter != close_delimiter {
224	return Err(lex_error(input));
225	}
226	input = input.advance(`1`);
227	let mut g = Group::new(open_delimiter, trees.build());
228	g.set_span(Span {
229	#[cfg(span_locations)]
230	lo,
231	#[cfg(span_locations)]
232	hi: input.off,
233	});
234	trees = outer;
235	trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236	} else {
237	let (rest, mut tt) = match leaf_token(input) {
238	Ok((rest, tt)) => (rest, tt),
239	Err(Reject) => return Err(lex_error(input)),
240	};
241	tt.set_span(crate::Span::_new_fallback(Span {
242	#[cfg(span_locations)]
243	lo,
244	#[cfg(span_locations)]
245	hi: rest.off,
246	}));
247	trees.push_token_from_parser(tt);
248	input = rest;
249	}
250	}
251	}
252
253	fn lex_error(cursor: Cursor) -> LexError {
254	#[cfg(not(span_locations))]
255	let _ = cursor;
256	LexError {
257	span: Span {
258	#[cfg(span_locations)]
259	lo: cursor.off,
260	#[cfg(span_locations)]
261	hi: cursor.off,
262	},
263	}
264	}
265
266	fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267	if let Ok((input, l)) = literal(input) {
268	// must be parsed before ident
269	Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
270	} else if let Ok((input, p)) = punct(input) {
271	Ok((input, TokenTree::Punct(p)))
272	} else if let Ok((input, i)) = ident(input) {
273	Ok((input, TokenTree::Ident(i)))
274	} else if input.starts_with(ERROR) {
275	let rest = input.advance(ERROR.len());
276	let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
277	Ok((rest, TokenTree::Literal(repr)))
278	} else {
279	Err(Reject)
280	}
281	}
282
283	fn ident(input: Cursor) -> PResult<crate::Ident> {
284	if [
285	"r`\"`", "r#`\"`", "r##", "b`\"`", "b`\'`", "br`\"`", "br#", "c`\"`", "cr`\"`", "cr#",
286	]
287	.iter()
288	.any(\|prefix\| input.starts_with(prefix))
289	{
290	Err(Reject)
291	} else {
292	ident_any(input)
293	}
294	}
295
296	fn ident_any(input: Cursor) -> PResult<crate::Ident> {
297	let raw = input.starts_with("r#");
298	let rest = input.advance((raw as usize) << `1`);
299
300	let (rest, sym) = ident_not_raw(rest)?;
301
302	if !raw {
303	let ident = crate::Ident::_new(crate::imp::Ident::new_unchecked(
304	sym,
305	fallback::Span::call_site(),
306	));
307	return Ok((rest, ident));
308	}
309
310	match sym {
311	"_" \| "super" \| "self" \| "Self" \| "crate" => return Err(Reject),
312	_ => {}
313	}
314
315	let ident = crate::Ident::_new(crate::imp::Ident::new_raw_unchecked(
316	sym,
317	fallback::Span::call_site(),
318	));
319	Ok((rest, ident))
320	}
321
322	fn ident_not_raw(input: Cursor) -> PResult<&str> {
323	let mut chars = input.char_indices();
324
325	match chars.next() {
326	Some((_, ch)) if is_ident_start(ch) => {}
327	_ => return Err(Reject),
328	}
329
330	let mut end = input.len();
331	for (i, ch) in chars {
332	if !is_ident_continue(ch) {
333	end = i;
334	break;
335	}
336	}
337
338	Ok((input.advance(end), &input.rest[..end]))
339	}
340
341	pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
342	let rest = literal_nocapture(input)?;
343	let end = input.len() - rest.len();
344	Ok((rest, Literal::_new(input.rest[..end].to_string())))
345	}
346
347	fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
348	if let Ok(ok) = string(input) {
349	Ok(ok)
350	} else if let Ok(ok) = byte_string(input) {
351	Ok(ok)
352	} else if let Ok(ok) = c_string(input) {
353	Ok(ok)
354	} else if let Ok(ok) = byte(input) {
355	Ok(ok)
356	} else if let Ok(ok) = character(input) {
357	Ok(ok)
358	} else if let Ok(ok) = float(input) {
359	Ok(ok)
360	} else if let Ok(ok) = int(input) {
361	Ok(ok)
362	} else {
363	Err(Reject)
364	}
365	}
366
367	fn literal_suffix(input: Cursor) -> Cursor {
368	match ident_not_raw(input) {
369	Ok((input, _)) => input,
370	Err(Reject) => input,
371	}
372	}
373
374	fn string(input: Cursor) -> Result<Cursor, Reject> {
375	if let Ok(input) = input.parse("`\"`") {
376	cooked_string(input)
377	} else if let Ok(input) = input.parse("r") {
378	raw_string(input)
379	} else {
380	Err(Reject)
381	}
382	}
383
384	fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
385	let mut chars = input.char_indices();
386
387	while let Some((i, ch)) = chars.next() {
388	match ch {
389	'"' => {
390	let input = input.advance(i + `1`);
391	return Ok(literal_suffix(input));
392	}
393	'`\r`' => match chars.next() {
394	Some((_, '`\n`')) => {}
395	_ => break,
396	},
397	'`\\`' => match chars.next() {
398	Some((_, 'x')) => {
399	backslash_x_char(&mut chars)?;
400	}
401	Some((_, 'n' \| 'r' \| 't' \| '`\\`' \| '`\'`' \| '"' \| '0')) => {}
402	Some((_, 'u')) => {
403	backslash_u(&mut chars)?;
404	}
405	Some((newline, ch @ ('`\n`' \| '`\r`'))) => {
406	input = input.advance(newline + `1`);
407	trailing_backslash(&mut input, ch as u8)?;
408	chars = input.char_indices();
409	}
410	_ => break,
411	},
412	_ch => {}
413	}
414	}
415	Err(Reject)
416	}
417
418	fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
419	let (input, delimiter) = delimiter_of_raw_string(input)?;
420	let mut bytes = input.bytes().enumerate();
421	while let Some((i, byte)) = bytes.next() {
422	match byte {
423	b'"' if input.rest[i + `1`..].starts_with(delimiter) => {
424	let rest = input.advance(i + `1` + delimiter.len());
425	return Ok(literal_suffix(rest));
426	}
427	b'`\r`' => match bytes.next() {
428	Some((_, b'`\n`')) => {}
429	_ => break,
430	},
431	_ => {}
432	}
433	}
434	Err(Reject)
435	}
436
437	fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
438	if let Ok(input) = input.parse("b`\"`") {
439	cooked_byte_string(input)
440	} else if let Ok(input) = input.parse("br") {
441	raw_byte_string(input)
442	} else {
443	Err(Reject)
444	}
445	}
446
447	fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
448	let mut bytes = input.bytes().enumerate();
449	while let Some((offset, b)) = bytes.next() {
450	match b {
451	b'"' => {
452	let input = input.advance(offset + `1`);
453	return Ok(literal_suffix(input));
454	}
455	b'`\r`' => match bytes.next() {
456	Some((_, b'`\n`')) => {}
457	_ => break,
458	},
459	b'`\\`' => match bytes.next() {
460	Some((_, b'x')) => {
461	backslash_x_byte(&mut bytes)?;
462	}
463	Some((_, b'n' \| b'r' \| b't' \| b'`\\`' \| b'0' \| b'`\'`' \| b'"')) => {}
464	Some((newline, b @ (b'`\n`' \| b'`\r`'))) => {
465	input = input.advance(newline + `1`);
466	trailing_backslash(&mut input, b)?;
467	bytes = input.bytes().enumerate();
468	}
469	_ => break,
470	},
471	b if b.is_ascii() => {}
472	_ => break,
473	}
474	}
475	Err(Reject)
476	}
477
478	fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
479	for (i, byte) in input.bytes().enumerate() {
480	match byte {
481	b'"' => {
482	if i > `255` {
483	// https://github.com/rust-lang/rust/pull/95251
484	return Err(Reject);
485	}
486	return Ok((input.advance(i + `1`), &input.rest[..i]));
487	}
488	b'#' => {}
489	_ => break,
490	}
491	}
492	Err(Reject)
493	}
494
495	fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
496	let (input, delimiter) = delimiter_of_raw_string(input)?;
497	let mut bytes = input.bytes().enumerate();
498	while let Some((i, byte)) = bytes.next() {
499	match byte {
500	b'"' if input.rest[i + `1`..].starts_with(delimiter) => {
501	let rest = input.advance(i + `1` + delimiter.len());
502	return Ok(literal_suffix(rest));
503	}
504	b'`\r`' => match bytes.next() {
505	Some((_, b'`\n`')) => {}
506	_ => break,
507	},
508	other => {
509	if !other.is_ascii() {
510	break;
511	}
512	}
513	}
514	}
515	Err(Reject)
516	}
517
518	fn c_string(input: Cursor) -> Result<Cursor, Reject> {
519	if let Ok(input) = input.parse("c`\"`") {
520	cooked_c_string(input)
521	} else if let Ok(input) = input.parse("cr") {
522	raw_c_string(input)
523	} else {
524	Err(Reject)
525	}
526	}
527
528	fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
529	let (input, delimiter) = delimiter_of_raw_string(input)?;
530	let mut bytes = input.bytes().enumerate();
531	while let Some((i, byte)) = bytes.next() {
532	match byte {
533	b'"' if input.rest[i + `1`..].starts_with(delimiter) => {
534	let rest = input.advance(i + `1` + delimiter.len());
535	return Ok(literal_suffix(rest));
536	}
537	b'`\r`' => match bytes.next() {
538	Some((_, b'`\n`')) => {}
539	_ => break,
540	},
541	b'`\0`' => break,
542	_ => {}
543	}
544	}
545	Err(Reject)
546	}
547
548	fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
549	let mut chars = input.char_indices();
550
551	while let Some((i, ch)) = chars.next() {
552	match ch {
553	'"' => {
554	let input = input.advance(i + `1`);
555	return Ok(literal_suffix(input));
556	}
557	'`\r`' => match chars.next() {
558	Some((_, '`\n`')) => {}
559	_ => break,
560	},
561	'`\\`' => match chars.next() {
562	Some((_, 'x')) => {
563	backslash_x_nonzero(&mut chars)?;
564	}
565	Some((_, 'n' \| 'r' \| 't' \| '`\\`' \| '`\'`' \| '"')) => {}
566	Some((_, 'u')) => {
567	if backslash_u(&mut chars)? == '`\0`' {
568	break;
569	}
570	}
571	Some((newline, ch @ ('`\n`' \| '`\r`'))) => {
572	input = input.advance(newline + `1`);
573	trailing_backslash(&mut input, ch as u8)?;
574	chars = input.char_indices();
575	}
576	_ => break,
577	},
578	'`\0`' => break,
579	_ch => {}
580	}
581	}
582	Err(Reject)
583	}
584
585	fn byte(input: Cursor) -> Result<Cursor, Reject> {
586	let input = input.parse("b'")?;
587	let mut bytes = input.bytes().enumerate();
588	let ok = match bytes.next().map(\|(_, b)\| b) {
589	Some(b'`\\`') => match bytes.next().map(\|(_, b)\| b) {
590	Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
591	Some(b'n' \| b'r' \| b't' \| b'`\\`' \| b'0' \| b'`\'`' \| b'"') => `true`,
592	_ => `false`,
593	},
594	b => b.is_some(),
595	};
596	if !ok {
597	return Err(Reject);
598	}
599	let (offset, _) = bytes.next().ok_or(Reject)?;
600	if !input.chars().as_str().is_char_boundary(offset) {
601	return Err(Reject);
602	}
603	let input = input.advance(offset).parse("'")?;
604	Ok(literal_suffix(input))
605	}
606
607	fn character(input: Cursor) -> Result<Cursor, Reject> {
608	let input = input.parse("'")?;
609	let mut chars = input.char_indices();
610	let ok = match chars.next().map(\|(_, ch)\| ch) {
611	Some('`\\`') => match chars.next().map(\|(_, ch)\| ch) {
612	Some('x') => backslash_x_char(&mut chars).is_ok(),
613	Some('u') => backslash_u(&mut chars).is_ok(),
614	Some('n' \| 'r' \| 't' \| '`\\`' \| '0' \| '`\'`' \| '"') => `true`,
615	_ => `false`,
616	},
617	ch => ch.is_some(),
618	};
619	if !ok {
620	return Err(Reject);
621	}
622	let (idx, _) = chars.next().ok_or(Reject)?;
623	let input = input.advance(idx).parse("'")?;
624	Ok(literal_suffix(input))
625	}
626
627	macro_rules! next_ch {
628	($chars:ident @ $pat:pat) => {
629	match $chars.next() {
630	Some((_, ch)) => match ch {
631	$pat => ch,
632	_ => return Err(Reject),
633	},
634	None => return Err(Reject),
635	}
636	};
637	}
638
639	fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
640	where
641	I: Iterator<Item = (usize, char)>,
642	{
643	next_ch!(chars @ '0'..='7');
644	next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F');
645	Ok(())
646	}
647
648	fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
649	where
650	I: Iterator<Item = (usize, u8)>,
651	{
652	next_ch!(chars @ b'0'..=b'9' \| b'a'..=b'f' \| b'A'..=b'F');
653	next_ch!(chars @ b'0'..=b'9' \| b'a'..=b'f' \| b'A'..=b'F');
654	Ok(())
655	}
656
657	fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
658	where
659	I: Iterator<Item = (usize, char)>,
660	{
661	let first = next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F');
662	let second = next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F');
663	if first == '0' && second == '0' {
664	Err(Reject)
665	} else {
666	Ok(())
667	}
668	}
669
670	fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
671	where
672	I: Iterator<Item = (usize, char)>,
673	{
674	next_ch!(chars @ '{');
675	let mut value = `0`;
676	let mut len = `0`;
677	for (_, ch) in chars {
678	let digit = match ch {
679	'0'..='9' => ch as u8 - b'0',
680	'a'..='f' => `10` + ch as u8 - b'a',
681	'A'..='F' => `10` + ch as u8 - b'A',
682	'_' if len > `0` => continue,
683	'}' if len > `0` => return char::from_u32(value).ok_or(Reject),
684	_ => break,
685	};
686	if len == `6` {
687	break;
688	}
689	value *= `0x10`;
690	value += u32::from(digit);
691	len += `1`;
692	}
693	Err(Reject)
694	}
695
696	fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
697	let mut whitespace = input.bytes().enumerate();
698	loop {
699	if last == b'`\r`' && whitespace.next().map_or(`true`, \|(_, b)\| b != b'`\n`') {
700	return Err(Reject);
701	}
702	match whitespace.next() {
703	Some((_, b @ (b' ' \| b'`\t`' \| b'`\n`' \| b'`\r`'))) => {
704	last = b;
705	}
706	Some((offset, _)) => {
707	*input = input.advance(offset);
708	return Ok(());
709	}
710	None => return Err(Reject),
711	}
712	}
713	}
714
715	fn float(input: Cursor) -> Result<Cursor, Reject> {
716	let mut rest = float_digits(input)?;
717	if let Some(ch) = rest.chars().next() {
718	if is_ident_start(ch) {
719	rest = ident_not_raw(rest)?.0;
720	}
721	}
722	word_break(rest)
723	}
724
725	fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
726	let mut chars = input.chars().peekable();
727	match chars.next() {
728	Some(ch) if '0' <= ch && ch <= '9' => {}
729	_ => return Err(Reject),
730	}
731
732	let mut len = `1`;
733	let mut has_dot = `false`;
734	let mut has_exp = `false`;
735	while let Some(&ch) = chars.peek() {
736	match ch {
737	'0'..='9' \| '_' => {
738	chars.next();
739	len += `1`;
740	}
741	'.' => {
742	if has_dot {
743	break;
744	}
745	chars.next();
746	if chars
747	.peek()
748	.map_or(`false`, \|&ch\| ch == '.' \|\| is_ident_start(ch))
749	{
750	return Err(Reject);
751	}
752	len += `1`;
753	has_dot = `true`;
754	}
755	'e' \| 'E' => {
756	chars.next();
757	len += `1`;
758	has_exp = `true`;
759	break;
760	}
761	_ => break,
762	}
763	}
764
765	if !(has_dot \|\| has_exp) {
766	return Err(Reject);
767	}
768
769	if has_exp {
770	let token_before_exp = if has_dot {
771	Ok(input.advance(len - `1`))
772	} else {
773	Err(Reject)
774	};
775	let mut has_sign = `false`;
776	let mut has_exp_value = `false`;
777	while let Some(&ch) = chars.peek() {
778	match ch {
779	'+' \| '-' => {
780	if has_exp_value {
781	break;
782	}
783	if has_sign {
784	return token_before_exp;
785	}
786	chars.next();
787	len += `1`;
788	has_sign = `true`;
789	}
790	'0'..='9' => {
791	chars.next();
792	len += `1`;
793	has_exp_value = `true`;
794	}
795	'_' => {
796	chars.next();
797	len += `1`;
798	}
799	_ => break,
800	}
801	}
802	if !has_exp_value {
803	return token_before_exp;
804	}
805	}
806
807	Ok(input.advance(len))
808	}
809
810	fn int(input: Cursor) -> Result<Cursor, Reject> {
811	let mut rest = digits(input)?;
812	if let Some(ch) = rest.chars().next() {
813	if is_ident_start(ch) {
814	rest = ident_not_raw(rest)?.0;
815	}
816	}
817	word_break(rest)
818	}
819
820	fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
821	let base = if input.starts_with("0x") {
822	input = input.advance(`2`);
823	`16`
824	} else if input.starts_with("0o") {
825	input = input.advance(`2`);
826	`8`
827	} else if input.starts_with("0b") {
828	input = input.advance(`2`);
829	`2`
830	} else {
831	`10`
832	};
833
834	let mut len = `0`;
835	let mut empty = `true`;
836	for b in input.bytes() {
837	match b {
838	b'0'..=b'9' => {
839	let digit = (b - b'0') as u64;
840	if digit >= base {
841	return Err(Reject);
842	}
843	}
844	b'a'..=b'f' => {
845	let digit = `10` + (b - b'a') as u64;
846	if digit >= base {
847	break;
848	}
849	}
850	b'A'..=b'F' => {
851	let digit = `10` + (b - b'A') as u64;
852	if digit >= base {
853	break;
854	}
855	}
856	b'_' => {
857	if empty && base == `10` {
858	return Err(Reject);
859	}
860	len += `1`;
861	continue;
862	}
863	_ => break,
864	};
865	len += `1`;
866	empty = `false`;
867	}
868	if empty {
869	Err(Reject)
870	} else {
871	Ok(input.advance(len))
872	}
873	}
874
875	fn punct(input: Cursor) -> PResult<Punct> {
876	let (rest, ch) = punct_char(input)?;
877	if ch == '`\'`' {
878	if ident_any(rest)?.0.starts_with_char('`\'`') {
879	Err(Reject)
880	} else {
881	Ok((rest, Punct::new('`\'`', Spacing::Joint)))
882	}
883	} else {
884	let kind = match punct_char(rest) {
885	Ok(_) => Spacing::Joint,
886	Err(Reject) => Spacing::Alone,
887	};
888	Ok((rest, Punct::new(ch, kind)))
889	}
890	}
891
892	fn punct_char(input: Cursor) -> PResult<char> {
893	if input.starts_with("//") \|\| input.starts_with("/*") {
894	// Do not accept `/` of a comment as a punct.
895	return Err(Reject);
896	}
897
898	let mut chars = input.chars();
899	let first = match chars.next() {
900	Some(ch) => ch,
901	None => {
902	return Err(Reject);
903	}
904	};
905	let recognized = "~!@#$%^&*-=+\|;:,<.>/?'";
906	if recognized.contains(first) {
907	Ok((input.advance(first.len_utf8()), first))
908	} else {
909	Err(Reject)
910	}
911	}
912
913	fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
914	#[cfg(span_locations)]
915	let lo = input.off;
916	let (rest, (comment, inner)) = doc_comment_contents(input)?;
917	let fallback_span = Span {
918	#[cfg(span_locations)]
919	lo,
920	#[cfg(span_locations)]
921	hi: rest.off,
922	};
923	let span = crate::Span::_new_fallback(fallback_span);
924
925	let mut scan_for_bare_cr = comment;
926	while let Some(cr) = scan_for_bare_cr.find('`\r`') {
927	let rest = &scan_for_bare_cr[cr + `1`..];
928	if !rest.starts_with('`\n`') {
929	return Err(Reject);
930	}
931	scan_for_bare_cr = rest;
932	}
933
934	let mut pound = Punct::new('#', Spacing::Alone);
935	pound.set_span(span);
936	trees.push_token_from_parser(TokenTree::Punct(pound));
937
938	if inner {
939	let mut bang = Punct::new('!', Spacing::Alone);
940	bang.set_span(span);
941	trees.push_token_from_parser(TokenTree::Punct(bang));
942	}
943
944	let doc_ident = crate::Ident::_new(crate::imp::Ident::new_unchecked("doc", fallback_span));
945	let mut equal = Punct::new('=', Spacing::Alone);
946	equal.set_span(span);
947	let mut literal = crate::Literal::string(comment);
948	literal.set_span(span);
949	let mut bracketed = TokenStreamBuilder::with_capacity(`3`);
950	bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
951	bracketed.push_token_from_parser(TokenTree::Punct(equal));
952	bracketed.push_token_from_parser(TokenTree::Literal(literal));
953	let group = Group::new(Delimiter::Bracket, bracketed.build());
954	let mut group = crate::Group::_new_fallback(group);
955	group.set_span(span);
956	trees.push_token_from_parser(TokenTree::Group(group));
957
958	Ok((rest, ()))
959	}
960
961	fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
962	if input.starts_with("//!") {
963	let input = input.advance(`3`);
964	let (input, s) = take_until_newline_or_eof(input);
965	Ok((input, (s, `true`)))
966	} else if input.starts_with("/*!") {
967	let (input, s) = block_comment(input)?;
968	Ok((input, (&s[`3`..s.len() - `2`], `true`)))
969	} else if input.starts_with("///") {
970	let input = input.advance(`3`);
971	if input.starts_with_char('/') {
972	return Err(Reject);
973	}
974	let (input, s) = take_until_newline_or_eof(input);
975	Ok((input, (s, `false`)))
976	} else if input.starts_with("/*") && !input.rest[`3`..].starts_with('') {
977	let (input, s) = block_comment(input)?;
978	Ok((input, (&s[`3`..s.len() - `2`], `false`)))
979	} else {
980	Err(Reject)
981	}
982	}
983
984	fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
985	let chars = input.char_indices();
986
987	for (i, ch) in chars {
988	if ch == '`\n`' {
989	return (input.advance(i), &input.rest[..i]);
990	} else if ch == '`\r`' && input.rest[i + `1`..].starts_with('`\n`') {
991	return (input.advance(i + `1`), &input.rest[..i]);
992	}
993	}
994
995	(input.advance(input.len()), input.rest)
996	}
997