1use std::borrow::Cow;
2use std::fmt;
3use std::sync::Arc;
4
5pub use LitKind::*;
6pub use Nonterminal::*;
7pub use NtExprKind::*;
8pub use NtPatKind::*;
9pub use TokenKind::*;
10use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
11use rustc_macros::{Decodable, Encodable, HashStable_Generic};
12use rustc_span::edition::Edition;
13use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, kw, sym};
14#[allow(clippy::useless_attribute)] // FIXME: following use of `hidden_glob_reexports` incorrectly triggers `useless_attribute` lint.
15#[allow(hidden_glob_reexports)]
16use rustc_span::{Ident, Symbol};
17
18use crate::ast;
19use crate::ptr::P;
20use crate::util::case::Case;
21
22#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
23pub enum CommentKind {
24 Line,
25 Block,
26}
27
28// This type must not implement `Hash` due to the unusual `PartialEq` impl below.
29#[derive(Copy, Clone, Debug, Encodable, Decodable, HashStable_Generic)]
30pub enum InvisibleOrigin {
31 // From the expansion of a metavariable in a declarative macro.
32 MetaVar(MetaVarKind),
33
34 // Converted from `proc_macro::Delimiter` in
35 // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro.
36 ProcMacro,
37
38 // Converted from `TokenKind::Interpolated` in
39 // `TokenStream::flatten_token`. Treated similarly to `ProcMacro`.
40 FlattenToken,
41}
42
43impl PartialEq for InvisibleOrigin {
44 #[inline]
45 fn eq(&self, _other: &InvisibleOrigin) -> bool {
46 // When we had AST-based nonterminals we couldn't compare them, and the
47 // old `Nonterminal` type had an `eq` that always returned false,
48 // resulting in this restriction:
49 // https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment
50 // This `eq` emulates that behaviour. We could consider lifting this
51 // restriction now but there are still cases involving invisible
52 // delimiters that make it harder than it first appears.
53 false
54 }
55}
56
57/// Annoyingly similar to `NonterminalKind`, but the slight differences are important.
58#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
59pub enum MetaVarKind {
60 Item,
61 Block,
62 Stmt,
63 Pat(NtPatKind),
64 Expr {
65 kind: NtExprKind,
66 // This field is needed for `Token::can_begin_literal_maybe_minus`.
67 can_begin_literal_maybe_minus: bool,
68 // This field is needed for `Token::can_begin_string_literal`.
69 can_begin_string_literal: bool,
70 },
71 Ty {
72 is_path: bool,
73 },
74 Ident,
75 Lifetime,
76 Literal,
77 Meta {
78 /// Will `AttrItem::meta` succeed on this, if reparsed?
79 has_meta_form: bool,
80 },
81 Path,
82 Vis,
83 TT,
84}
85
86impl fmt::Display for MetaVarKind {
87 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88 let sym = match self {
89 MetaVarKind::Item => sym::item,
90 MetaVarKind::Block => sym::block,
91 MetaVarKind::Stmt => sym::stmt,
92 MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
93 MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param,
94 MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr,
95 MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021,
96 MetaVarKind::Ty { .. } => sym::ty,
97 MetaVarKind::Ident => sym::ident,
98 MetaVarKind::Lifetime => sym::lifetime,
99 MetaVarKind::Literal => sym::literal,
100 MetaVarKind::Meta { .. } => sym::meta,
101 MetaVarKind::Path => sym::path,
102 MetaVarKind::Vis => sym::vis,
103 MetaVarKind::TT => sym::tt,
104 };
105 write!(f, "{sym}")
106 }
107}
108
109/// Describes how a sequence of token trees is delimited.
110/// Cannot use `proc_macro::Delimiter` directly because this
111/// structure should implement some additional traits.
112#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
113pub enum Delimiter {
114 /// `( ... )`
115 Parenthesis,
116 /// `{ ... }`
117 Brace,
118 /// `[ ... ]`
119 Bracket,
120 /// `∅ ... ∅`
121 /// An invisible delimiter, that may, for example, appear around tokens coming from a
122 /// "macro variable" `$var`. It is important to preserve operator priorities in cases like
123 /// `$var * 3` where `$var` is `1 + 2`.
124 /// Invisible delimiters might not survive roundtrip of a token stream through a string.
125 Invisible(InvisibleOrigin),
126}
127
128impl Delimiter {
129 // Should the parser skip these delimiters? Only happens for certain kinds
130 // of invisible delimiters. Ideally this function will eventually disappear
131 // and no invisible delimiters will be skipped.
132 #[inline]
133 pub fn skip(&self) -> bool {
134 match self {
135 Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false,
136 Delimiter::Invisible(InvisibleOrigin::MetaVar(_)) => false,
137 Delimiter::Invisible(InvisibleOrigin::FlattenToken | InvisibleOrigin::ProcMacro) => {
138 true
139 }
140 }
141 }
142
143 // This exists because `InvisibleOrigin`s should be compared. It is only used for assertions.
144 pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool {
145 match (self, other) {
146 (Delimiter::Parenthesis, Delimiter::Parenthesis) => true,
147 (Delimiter::Brace, Delimiter::Brace) => true,
148 (Delimiter::Bracket, Delimiter::Bracket) => true,
149 (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true,
150 _ => false,
151 }
152 }
153}
154
155// Note that the suffix is *not* considered when deciding the `LitKind` in this
156// type. This means that float literals like `1f32` are classified by this type
157// as `Int`. Only upon conversion to `ast::LitKind` will such a literal be
158// given the `Float` kind.
159#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
160pub enum LitKind {
161 Bool, // AST only, must never appear in a `Token`
162 Byte,
163 Char,
164 Integer, // e.g. `1`, `1u8`, `1f32`
165 Float, // e.g. `1.`, `1.0`, `1e3f32`
166 Str,
167 StrRaw(u8), // raw string delimited by `n` hash symbols
168 ByteStr,
169 ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
170 CStr,
171 CStrRaw(u8),
172 Err(ErrorGuaranteed),
173}
174
175/// A literal token.
176#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
177pub struct Lit {
178 pub kind: LitKind,
179 pub symbol: Symbol,
180 pub suffix: Option<Symbol>,
181}
182
183impl Lit {
184 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
185 Lit { kind, symbol, suffix }
186 }
187
188 /// Returns `true` if this is semantically a float literal. This includes
189 /// ones like `1f32` that have an `Integer` kind but a float suffix.
190 pub fn is_semantic_float(&self) -> bool {
191 match self.kind {
192 LitKind::Float => true,
193 LitKind::Integer => match self.suffix {
194 Some(sym) => sym == sym::f32 || sym == sym::f64,
195 None => false,
196 },
197 _ => false,
198 }
199 }
200
201 /// Keep this in sync with `Token::can_begin_literal_maybe_minus` excluding unary negation.
202 pub fn from_token(token: &Token) -> Option<Lit> {
203 match token.uninterpolate().kind {
204 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => Some(Lit::new(Bool, name, None)),
205 Literal(token_lit) => Some(token_lit),
206 Interpolated(ref nt)
207 if let NtExpr(expr) | NtLiteral(expr) = &**nt
208 && let ast::ExprKind::Lit(token_lit) = expr.kind =>
209 {
210 Some(token_lit)
211 }
212 _ => None,
213 }
214 }
215}
216
217impl fmt::Display for Lit {
218 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219 let Lit { kind, symbol, suffix } = *self;
220 match kind {
221 Byte => write!(f, "b'{symbol}'")?,
222 Char => write!(f, "'{symbol}'")?,
223 Str => write!(f, "\"{symbol}\"")?,
224 StrRaw(n) => write!(
225 f,
226 "r{delim}\"{string}\"{delim}",
227 delim = "#".repeat(n as usize),
228 string = symbol
229 )?,
230 ByteStr => write!(f, "b\"{symbol}\"")?,
231 ByteStrRaw(n) => write!(
232 f,
233 "br{delim}\"{string}\"{delim}",
234 delim = "#".repeat(n as usize),
235 string = symbol
236 )?,
237 CStr => write!(f, "c\"{symbol}\"")?,
238 CStrRaw(n) => {
239 write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
240 }
241 Integer | Float | Bool | Err(_) => write!(f, "{symbol}")?,
242 }
243
244 if let Some(suffix) = suffix {
245 write!(f, "{suffix}")?;
246 }
247
248 Ok(())
249 }
250}
251
252impl LitKind {
253 /// An English article for the literal token kind.
254 pub fn article(self) -> &'static str {
255 match self {
256 Integer | Err(_) => "an",
257 _ => "a",
258 }
259 }
260
261 pub fn descr(self) -> &'static str {
262 match self {
263 Bool => "boolean",
264 Byte => "byte",
265 Char => "char",
266 Integer => "integer",
267 Float => "float",
268 Str | StrRaw(..) => "string",
269 ByteStr | ByteStrRaw(..) => "byte string",
270 CStr | CStrRaw(..) => "C string",
271 Err(_) => "error",
272 }
273 }
274
275 pub(crate) fn may_have_suffix(self) -> bool {
276 matches!(self, Integer | Float | Err(_))
277 }
278}
279
280pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
281 let ident_token = Token::new(Ident(name, is_raw), span);
282
283 !ident_token.is_reserved_ident()
284 || ident_token.is_path_segment_keyword()
285 || [
286 kw::Async,
287 kw::Do,
288 kw::Box,
289 kw::Break,
290 kw::Const,
291 kw::Continue,
292 kw::False,
293 kw::For,
294 kw::Gen,
295 kw::If,
296 kw::Let,
297 kw::Loop,
298 kw::Match,
299 kw::Move,
300 kw::Return,
301 kw::True,
302 kw::Try,
303 kw::Unsafe,
304 kw::While,
305 kw::Yield,
306 kw::Safe,
307 kw::Static,
308 ]
309 .contains(&name)
310}
311
312fn ident_can_begin_type(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
313 let ident_token: Token = Token::new(kind:Ident(name, is_raw), span);
314
315 !ident_token.is_reserved_ident()
316 || ident_token.is_path_segment_keyword()
317 || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]
318 .contains(&name)
319}
320
321#[derive(PartialEq, Encodable, Decodable, Debug, Copy, Clone, HashStable_Generic)]
322pub enum IdentIsRaw {
323 No,
324 Yes,
325}
326
327impl From<bool> for IdentIsRaw {
328 fn from(b: bool) -> Self {
329 if b { Self::Yes } else { Self::No }
330 }
331}
332
333impl From<IdentIsRaw> for bool {
334 fn from(is_raw: IdentIsRaw) -> bool {
335 matches!(is_raw, IdentIsRaw::Yes)
336 }
337}
338
339// SAFETY: due to the `Clone` impl below, all fields of all variants other than
340// `Interpolated` must impl `Copy`.
341#[derive(PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
342pub enum TokenKind {
343 /* Expression-operator symbols. */
344 /// `=`
345 Eq,
346 /// `<`
347 Lt,
348 /// `<=`
349 Le,
350 /// `==`
351 EqEq,
352 /// `!=`
353 Ne,
354 /// `>=`
355 Ge,
356 /// `>`
357 Gt,
358 /// `&&`
359 AndAnd,
360 /// `||`
361 OrOr,
362 /// `!`
363 Bang,
364 /// `~`
365 Tilde,
366 // `+`
367 Plus,
368 // `-`
369 Minus,
370 // `*`
371 Star,
372 // `/`
373 Slash,
374 // `%`
375 Percent,
376 // `^`
377 Caret,
378 // `&`
379 And,
380 // `|`
381 Or,
382 // `<<`
383 Shl,
384 // `>>`
385 Shr,
386 // `+=`
387 PlusEq,
388 // `-=`
389 MinusEq,
390 // `*=`
391 StarEq,
392 // `/=`
393 SlashEq,
394 // `%=`
395 PercentEq,
396 // `^=`
397 CaretEq,
398 // `&=`
399 AndEq,
400 // `|=`
401 OrEq,
402 // `<<=`
403 ShlEq,
404 // `>>=`
405 ShrEq,
406
407 /* Structural symbols */
408 /// `@`
409 At,
410 /// `.`
411 Dot,
412 /// `..`
413 DotDot,
414 /// `...`
415 DotDotDot,
416 /// `..=`
417 DotDotEq,
418 /// `,`
419 Comma,
420 /// `;`
421 Semi,
422 /// `:`
423 Colon,
424 /// `::`
425 PathSep,
426 /// `->`
427 RArrow,
428 /// `<-`
429 LArrow,
430 /// `=>`
431 FatArrow,
432 /// `#`
433 Pound,
434 /// `$`
435 Dollar,
436 /// `?`
437 Question,
438 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
439 SingleQuote,
440 /// An opening delimiter (e.g., `{`).
441 OpenDelim(Delimiter),
442 /// A closing delimiter (e.g., `}`).
443 CloseDelim(Delimiter),
444
445 /* Literals */
446 Literal(Lit),
447
448 /// Identifier token.
449 /// Do not forget about `NtIdent` when you want to match on identifiers.
450 /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to
451 /// treat regular and interpolated identifiers in the same way.
452 Ident(Symbol, IdentIsRaw),
453 /// This identifier (and its span) is the identifier passed to the
454 /// declarative macro. The span in the surrounding `Token` is the span of
455 /// the `ident` metavariable in the macro's RHS.
456 NtIdent(Ident, IdentIsRaw),
457
458 /// Lifetime identifier token.
459 /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
460 /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to
461 /// treat regular and interpolated lifetime identifiers in the same way.
462 Lifetime(Symbol, IdentIsRaw),
463 /// This identifier (and its span) is the lifetime passed to the
464 /// declarative macro. The span in the surrounding `Token` is the span of
465 /// the `lifetime` metavariable in the macro's RHS.
466 NtLifetime(Ident, IdentIsRaw),
467
468 /// An embedded AST node, as produced by a macro. This only exists for
469 /// historical reasons. We'd like to get rid of it, for multiple reasons.
470 /// - It's conceptually very strange. Saying a token can contain an AST
471 /// node is like saying, in natural language, that a word can contain a
472 /// sentence.
473 /// - It requires special handling in a bunch of places in the parser.
474 /// - It prevents `Token` from implementing `Copy`.
475 /// It adds complexity and likely slows things down. Please don't add new
476 /// occurrences of this token kind!
477 ///
478 /// The span in the surrounding `Token` is that of the metavariable in the
479 /// macro's RHS. The span within the Nonterminal is that of the fragment
480 /// passed to the macro at the call site.
481 Interpolated(Arc<Nonterminal>),
482
483 /// A doc comment token.
484 /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
485 /// similarly to symbols in string literal tokens.
486 DocComment(CommentKind, ast::AttrStyle, Symbol),
487
488 /// End Of File
489 Eof,
490}
491
492impl Clone for TokenKind {
493 fn clone(&self) -> Self {
494 // `TokenKind` would impl `Copy` if it weren't for `Interpolated`. So
495 // for all other variants, this implementation of `clone` is just like
496 // a copy. This is faster than the `derive(Clone)` version which has a
497 // separate path for every variant.
498 match self {
499 Interpolated(nt: &Arc) => Interpolated(Arc::clone(self:nt)),
500 _ => unsafe { std::ptr::read(self) },
501 }
502 }
503}
504
505#[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
506pub struct Token {
507 pub kind: TokenKind,
508 pub span: Span,
509}
510
511impl TokenKind {
512 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
513 Literal(Lit::new(kind, symbol, suffix))
514 }
515
516 /// An approximation to proc-macro-style single-character operators used by
517 /// rustc parser. If the operator token can be broken into two tokens, the
518 /// first of which has `n` (1 or 2) chars, then this function performs that
519 /// operation, otherwise it returns `None`.
520 pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {
521 assert!(n == 1 || n == 2);
522 Some(match (self, n) {
523 (Le, 1) => (Lt, Eq),
524 (EqEq, 1) => (Eq, Eq),
525 (Ne, 1) => (Bang, Eq),
526 (Ge, 1) => (Gt, Eq),
527 (AndAnd, 1) => (And, And),
528 (OrOr, 1) => (Or, Or),
529 (Shl, 1) => (Lt, Lt),
530 (Shr, 1) => (Gt, Gt),
531 (PlusEq, 1) => (Plus, Eq),
532 (MinusEq, 1) => (Minus, Eq),
533 (StarEq, 1) => (Star, Eq),
534 (SlashEq, 1) => (Slash, Eq),
535 (PercentEq, 1) => (Percent, Eq),
536 (CaretEq, 1) => (Caret, Eq),
537 (AndEq, 1) => (And, Eq),
538 (OrEq, 1) => (Or, Eq),
539 (ShlEq, 1) => (Lt, Le), // `<` + `<=`
540 (ShlEq, 2) => (Shl, Eq), // `<<` + `=`
541 (ShrEq, 1) => (Gt, Ge), // `>` + `>=`
542 (ShrEq, 2) => (Shr, Eq), // `>>` + `=`
543 (DotDot, 1) => (Dot, Dot),
544 (DotDotDot, 1) => (Dot, DotDot), // `.` + `..`
545 (DotDotDot, 2) => (DotDot, Dot), // `..` + `.`
546 (DotDotEq, 2) => (DotDot, Eq),
547 (PathSep, 1) => (Colon, Colon),
548 (RArrow, 1) => (Minus, Gt),
549 (LArrow, 1) => (Lt, Minus),
550 (FatArrow, 1) => (Eq, Gt),
551 _ => return None,
552 })
553 }
554
555 /// Returns tokens that are likely to be typed accidentally instead of the current token.
556 /// Enables better error recovery when the wrong token is found.
557 pub fn similar_tokens(&self) -> &[TokenKind] {
558 match self {
559 Comma => &[Dot, Lt, Semi],
560 Semi => &[Colon, Comma],
561 Colon => &[Semi],
562 FatArrow => &[Eq, RArrow, Ge, Gt],
563 _ => &[],
564 }
565 }
566
567 pub fn should_end_const_arg(&self) -> bool {
568 matches!(self, Gt | Ge | Shr | ShrEq)
569 }
570}
571
572impl Token {
573 pub fn new(kind: TokenKind, span: Span) -> Self {
574 Token { kind, span }
575 }
576
577 /// Some token that will be thrown away later.
578 pub fn dummy() -> Self {
579 Token::new(TokenKind::Question, DUMMY_SP)
580 }
581
582 /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.
583 pub fn from_ast_ident(ident: Ident) -> Self {
584 Token::new(Ident(ident.name, ident.is_raw_guess().into()), ident.span)
585 }
586
587 /// For interpolated tokens, returns a span of the fragment to which the interpolated
588 /// token refers. For all other tokens this is just a regular span.
589 /// It is particularly important to use this for identifiers and lifetimes
590 /// for which spans affect name resolution and edition checks.
591 /// Note that keywords are also identifiers, so they should use this
592 /// if they keep spans or perform edition checks.
593 pub fn uninterpolated_span(&self) -> Span {
594 match self.kind {
595 NtIdent(ident, _) | NtLifetime(ident, _) => ident.span,
596 Interpolated(ref nt) => nt.use_span(),
597 _ => self.span,
598 }
599 }
600
601 pub fn is_range_separator(&self) -> bool {
602 [DotDot, DotDotDot, DotDotEq].contains(&self.kind)
603 }
604
605 pub fn is_punct(&self) -> bool {
606 match self.kind {
607 Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Bang | Tilde | Plus | Minus
608 | Star | Slash | Percent | Caret | And | Or | Shl | Shr | PlusEq | MinusEq | StarEq
609 | SlashEq | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | Dot | DotDot
610 | DotDotDot | DotDotEq | Comma | Semi | Colon | PathSep | RArrow | LArrow
611 | FatArrow | Pound | Dollar | Question | SingleQuote => true,
612
613 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..)
614 | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Interpolated(..) | Eof => false,
615 }
616 }
617
618 pub fn is_like_plus(&self) -> bool {
619 matches!(self.kind, Plus | PlusEq)
620 }
621
622 /// Returns `true` if the token can appear at the start of an expression.
623 ///
624 /// **NB**: Take care when modifying this function, since it will change
625 /// the stable set of tokens that are allowed to match an expr nonterminal.
626 pub fn can_begin_expr(&self) -> bool {
627 use Delimiter::*;
628 match self.uninterpolate().kind {
629 Ident(name, is_raw) =>
630 ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
631 OpenDelim(Parenthesis | Brace | Bracket) | // tuple, array or block
632 Literal(..) | // literal
633 Bang | // operator not
634 Minus | // unary minus
635 Star | // dereference
636 Or | OrOr | // closure
637 And | // reference
638 AndAnd | // double reference
639 // DotDotDot is no longer supported, but we need some way to display the error
640 DotDot | DotDotDot | DotDotEq | // range notation
641 Lt | Shl | // associated path
642 PathSep | // global path
643 Lifetime(..) | // labeled loop
644 Pound => true, // expression attributes
645 Interpolated(ref nt) =>
646 matches!(&**nt,
647 NtBlock(..) |
648 NtExpr(..) |
649 NtLiteral(..)
650 ),
651 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
652 MetaVarKind::Block |
653 MetaVarKind::Expr { .. } |
654 MetaVarKind::Literal |
655 MetaVarKind::Path
656 ))) => true,
657 _ => false,
658 }
659 }
660
661 /// Returns `true` if the token can appear at the start of a pattern.
662 ///
663 /// Shamelessly borrowed from `can_begin_expr`, only used for diagnostics right now.
664 pub fn can_begin_pattern(&self, pat_kind: NtPatKind) -> bool {
665 match &self.uninterpolate().kind {
666 // box, ref, mut, and other identifiers (can stricten)
667 Ident(..) | NtIdent(..) |
668 OpenDelim(Delimiter::Parenthesis) | // tuple pattern
669 OpenDelim(Delimiter::Bracket) | // slice pattern
670 And | // reference
671 Minus | // negative literal
672 AndAnd | // double reference
673 Literal(_) | // literal
674 DotDot | // range pattern (future compat)
675 DotDotDot | // range pattern (future compat)
676 PathSep | // path
677 Lt | // path (UFCS constant)
678 Shl => true, // path (double UFCS)
679 Or => matches!(pat_kind, PatWithOr), // leading vert `|` or-pattern
680 Interpolated(nt) =>
681 matches!(&**nt,
682 | NtExpr(..)
683 | NtLiteral(..)
684 ),
685 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
686 MetaVarKind::Expr { .. } |
687 MetaVarKind::Literal |
688 MetaVarKind::Meta { .. } |
689 MetaVarKind::Pat(_) |
690 MetaVarKind::Path |
691 MetaVarKind::Ty { .. }
692 ))) => true,
693 _ => false,
694 }
695 }
696
697 /// Returns `true` if the token can appear at the start of a type.
698 pub fn can_begin_type(&self) -> bool {
699 match self.uninterpolate().kind {
700 Ident(name, is_raw) =>
701 ident_can_begin_type(name, self.span, is_raw), // type name or keyword
702 OpenDelim(Delimiter::Parenthesis) | // tuple
703 OpenDelim(Delimiter::Bracket) | // array
704 Bang | // never
705 Star | // raw pointer
706 And | // reference
707 AndAnd | // double reference
708 Question | // maybe bound in trait object
709 Lifetime(..) | // lifetime bound in trait object
710 Lt | Shl | // associated path
711 PathSep => true, // global path
712 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
713 MetaVarKind::Ty { .. } |
714 MetaVarKind::Path
715 ))) => true,
716 // For anonymous structs or unions, which only appear in specific positions
717 // (type of struct fields or union fields), we don't consider them as regular types
718 _ => false,
719 }
720 }
721
722 /// Returns `true` if the token can appear at the start of a const param.
723 pub fn can_begin_const_arg(&self) -> bool {
724 match self.kind {
725 OpenDelim(Delimiter::Brace) | Literal(..) | Minus => true,
726 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
727 Interpolated(ref nt) => matches!(&**nt, NtExpr(..) | NtBlock(..) | NtLiteral(..)),
728 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
729 MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal,
730 ))) => true,
731 _ => false,
732 }
733 }
734
735 /// Returns `true` if the token can appear at the start of an item.
736 pub fn can_begin_item(&self) -> bool {
737 match self.kind {
738 Ident(name, _) => [
739 kw::Fn,
740 kw::Use,
741 kw::Struct,
742 kw::Enum,
743 kw::Pub,
744 kw::Trait,
745 kw::Extern,
746 kw::Impl,
747 kw::Unsafe,
748 kw::Const,
749 kw::Safe,
750 kw::Static,
751 kw::Union,
752 kw::Macro,
753 kw::Mod,
754 kw::Type,
755 ]
756 .contains(&name),
757 _ => false,
758 }
759 }
760
761 /// Returns `true` if the token is any literal.
762 pub fn is_lit(&self) -> bool {
763 matches!(self.kind, Literal(..))
764 }
765
766 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
767 /// for example a '-42', or one of the boolean idents).
768 ///
769 /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?
770 ///
771 /// Keep this in sync with and `Lit::from_token`, excluding unary negation.
772 pub fn can_begin_literal_maybe_minus(&self) -> bool {
773 match self.uninterpolate().kind {
774 Literal(..) | Minus => true,
775 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
776 Interpolated(ref nt) => match &**nt {
777 NtLiteral(_) => true,
778 NtExpr(e) => match &e.kind {
779 ast::ExprKind::Lit(_) => true,
780 ast::ExprKind::Unary(ast::UnOp::Neg, e) => {
781 matches!(&e.kind, ast::ExprKind::Lit(_))
782 }
783 _ => false,
784 },
785 _ => false,
786 },
787 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind {
788 MetaVarKind::Literal => true,
789 MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => {
790 can_begin_literal_maybe_minus
791 }
792 _ => false,
793 },
794 _ => false,
795 }
796 }
797
798 pub fn can_begin_string_literal(&self) -> bool {
799 match self.uninterpolate().kind {
800 Literal(..) => true,
801 Interpolated(ref nt) => match &**nt {
802 NtLiteral(_) => true,
803 NtExpr(e) => match &e.kind {
804 ast::ExprKind::Lit(_) => true,
805 _ => false,
806 },
807 _ => false,
808 },
809 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind {
810 MetaVarKind::Literal => true,
811 MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal,
812 _ => false,
813 },
814 _ => false,
815 }
816 }
817
818 /// A convenience function for matching on identifiers during parsing.
819 /// Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token
820 /// into the regular identifier or lifetime token it refers to,
821 /// otherwise returns the original token.
822 pub fn uninterpolate(&self) -> Cow<'_, Token> {
823 match self.kind {
824 NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)),
825 NtLifetime(ident, is_raw) => {
826 Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span))
827 }
828 _ => Cow::Borrowed(self),
829 }
830 }
831
832 /// Returns an identifier if this token is an identifier.
833 #[inline]
834 pub fn ident(&self) -> Option<(Ident, IdentIsRaw)> {
835 // We avoid using `Token::uninterpolate` here because it's slow.
836 match self.kind {
837 Ident(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
838 NtIdent(ident, is_raw) => Some((ident, is_raw)),
839 _ => None,
840 }
841 }
842
843 /// Returns a lifetime identifier if this token is a lifetime.
844 #[inline]
845 pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> {
846 // We avoid using `Token::uninterpolate` here because it's slow.
847 match self.kind {
848 Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
849 NtLifetime(ident, is_raw) => Some((ident, is_raw)),
850 _ => None,
851 }
852 }
853
854 /// Returns `true` if the token is an identifier.
855 pub fn is_ident(&self) -> bool {
856 self.ident().is_some()
857 }
858
859 /// Returns `true` if the token is a lifetime.
860 pub fn is_lifetime(&self) -> bool {
861 self.lifetime().is_some()
862 }
863
864 /// Returns `true` if the token is an identifier whose name is the given
865 /// string slice.
866 pub fn is_ident_named(&self, name: Symbol) -> bool {
867 self.ident().is_some_and(|(ident, _)| ident.name == name)
868 }
869
870 /// Is this a pre-parsed expression dropped into the token stream
871 /// (which happens while parsing the result of macro expansion)?
872 pub fn is_whole_expr(&self) -> bool {
873 #[allow(irrefutable_let_patterns)] // FIXME: temporary
874 if let Interpolated(nt) = &self.kind
875 && let NtExpr(_) | NtLiteral(_) | NtBlock(_) = &**nt
876 {
877 true
878 } else {
879 matches!(self.is_metavar_seq(), Some(MetaVarKind::Path))
880 }
881 }
882
883 /// Is the token an interpolated block (`$b:block`)?
884 pub fn is_whole_block(&self) -> bool {
885 if let Interpolated(nt) = &self.kind
886 && let NtBlock(..) = &**nt
887 {
888 return true;
889 }
890
891 false
892 }
893
894 /// Returns `true` if the token is either the `mut` or `const` keyword.
895 pub fn is_mutability(&self) -> bool {
896 self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)
897 }
898
899 pub fn is_qpath_start(&self) -> bool {
900 self == &Lt || self == &Shl
901 }
902
903 pub fn is_path_start(&self) -> bool {
904 self == &PathSep
905 || self.is_qpath_start()
906 || matches!(self.is_metavar_seq(), Some(MetaVarKind::Path))
907 || self.is_path_segment_keyword()
908 || self.is_ident() && !self.is_reserved_ident()
909 }
910
911 /// Returns `true` if the token is a given keyword, `kw`.
912 pub fn is_keyword(&self, kw: Symbol) -> bool {
913 self.is_non_raw_ident_where(|id| id.name == kw)
914 }
915
916 /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
917 /// token is an identifier equal to `kw` ignoring the case.
918 pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
919 self.is_keyword(kw)
920 || (case == Case::Insensitive
921 && self.is_non_raw_ident_where(|id| {
922 // Do an ASCII case-insensitive match, because all keywords are ASCII.
923 id.name.as_str().eq_ignore_ascii_case(kw.as_str())
924 }))
925 }
926
927 pub fn is_path_segment_keyword(&self) -> bool {
928 self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
929 }
930
931 /// Returns true for reserved identifiers used internally for elided lifetimes,
932 /// unnamed method parameters, crate root module, error recovery etc.
933 pub fn is_special_ident(&self) -> bool {
934 self.is_non_raw_ident_where(Ident::is_special)
935 }
936
937 /// Returns `true` if the token is a keyword used in the language.
938 pub fn is_used_keyword(&self) -> bool {
939 self.is_non_raw_ident_where(Ident::is_used_keyword)
940 }
941
942 /// Returns `true` if the token is a keyword reserved for possible future use.
943 pub fn is_unused_keyword(&self) -> bool {
944 self.is_non_raw_ident_where(Ident::is_unused_keyword)
945 }
946
947 /// Returns `true` if the token is either a special identifier or a keyword.
948 pub fn is_reserved_ident(&self) -> bool {
949 self.is_non_raw_ident_where(Ident::is_reserved)
950 }
951
952 /// Returns `true` if the token is the identifier `true` or `false`.
953 pub fn is_bool_lit(&self) -> bool {
954 self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
955 }
956
957 pub fn is_numeric_lit(&self) -> bool {
958 matches!(
959 self.kind,
960 Literal(Lit { kind: LitKind::Integer, .. }) | Literal(Lit { kind: LitKind::Float, .. })
961 )
962 }
963
964 /// Returns `true` if the token is the integer literal.
965 pub fn is_integer_lit(&self) -> bool {
966 matches!(self.kind, Literal(Lit { kind: LitKind::Integer, .. }))
967 }
968
969 /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
970 pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {
971 match self.ident() {
972 Some((id, IdentIsRaw::No)) => pred(id),
973 _ => false,
974 }
975 }
976
977 /// Is this an invisible open delimiter at the start of a token sequence
978 /// from an expanded metavar?
979 pub fn is_metavar_seq(&self) -> Option<MetaVarKind> {
980 match self.kind {
981 OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(kind))) => Some(kind),
982 _ => None,
983 }
984 }
985
986 pub fn glue(&self, joint: &Token) -> Option<Token> {
987 let kind = match (&self.kind, &joint.kind) {
988 (Eq, Eq) => EqEq,
989 (Eq, Gt) => FatArrow,
990 (Eq, _) => return None,
991
992 (Lt, Eq) => Le,
993 (Lt, Lt) => Shl,
994 (Lt, Le) => ShlEq,
995 (Lt, Minus) => LArrow,
996 (Lt, _) => return None,
997
998 (Gt, Eq) => Ge,
999 (Gt, Gt) => Shr,
1000 (Gt, Ge) => ShrEq,
1001 (Gt, _) => return None,
1002
1003 (Bang, Eq) => Ne,
1004 (Bang, _) => return None,
1005
1006 (Plus, Eq) => PlusEq,
1007 (Plus, _) => return None,
1008
1009 (Minus, Eq) => MinusEq,
1010 (Minus, Gt) => RArrow,
1011 (Minus, _) => return None,
1012
1013 (Star, Eq) => StarEq,
1014 (Star, _) => return None,
1015
1016 (Slash, Eq) => SlashEq,
1017 (Slash, _) => return None,
1018
1019 (Percent, Eq) => PercentEq,
1020 (Percent, _) => return None,
1021
1022 (Caret, Eq) => CaretEq,
1023 (Caret, _) => return None,
1024
1025 (And, Eq) => AndEq,
1026 (And, And) => AndAnd,
1027 (And, _) => return None,
1028
1029 (Or, Eq) => OrEq,
1030 (Or, Or) => OrOr,
1031 (Or, _) => return None,
1032
1033 (Shl, Eq) => ShlEq,
1034 (Shl, _) => return None,
1035
1036 (Shr, Eq) => ShrEq,
1037 (Shr, _) => return None,
1038
1039 (Dot, Dot) => DotDot,
1040 (Dot, DotDot) => DotDotDot,
1041 (Dot, _) => return None,
1042
1043 (DotDot, Dot) => DotDotDot,
1044 (DotDot, Eq) => DotDotEq,
1045 (DotDot, _) => return None,
1046
1047 (Colon, Colon) => PathSep,
1048 (Colon, _) => return None,
1049
1050 (SingleQuote, Ident(name, is_raw)) => {
1051 Lifetime(Symbol::intern(&format!("'{name}")), *is_raw)
1052 }
1053 (SingleQuote, _) => return None,
1054
1055 (
1056 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | PlusEq | MinusEq | StarEq | SlashEq
1057 | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | DotDotDot | DotDotEq
1058 | Comma | Semi | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question
1059 | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..) | NtIdent(..)
1060 | Lifetime(..) | NtLifetime(..) | Interpolated(..) | DocComment(..) | Eof,
1061 _,
1062 ) => {
1063 return None;
1064 }
1065 };
1066
1067 Some(Token::new(kind, self.span.to(joint.span)))
1068 }
1069}
1070
1071impl PartialEq<TokenKind> for Token {
1072 #[inline]
1073 fn eq(&self, rhs: &TokenKind) -> bool {
1074 self.kind == *rhs
1075 }
1076}
1077
1078#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1079pub enum NtPatKind {
1080 // Matches or-patterns. Was written using `pat` in edition 2021 or later.
1081 PatWithOr,
1082 // Doesn't match or-patterns.
1083 // - `inferred`: was written using `pat` in edition 2015 or 2018.
1084 // - `!inferred`: was written using `pat_param`.
1085 PatParam { inferred: bool },
1086}
1087
1088#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1089pub enum NtExprKind {
1090 // Matches expressions using the post-edition 2024. Was written using
1091 // `expr` in edition 2024 or later.
1092 Expr,
1093 // Matches expressions using the pre-edition 2024 rules.
1094 // - `inferred`: was written using `expr` in edition 2021 or earlier.
1095 // - `!inferred`: was written using `expr_2021`.
1096 Expr2021 { inferred: bool },
1097}
1098
1099#[derive(Clone, Encodable, Decodable)]
1100/// For interpolation during macro expansion.
1101pub enum Nonterminal {
1102 NtBlock(P<ast::Block>),
1103 NtExpr(P<ast::Expr>),
1104 NtLiteral(P<ast::Expr>),
1105}
1106
1107#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1108pub enum NonterminalKind {
1109 Item,
1110 Block,
1111 Stmt,
1112 Pat(NtPatKind),
1113 Expr(NtExprKind),
1114 Ty,
1115 Ident,
1116 Lifetime,
1117 Literal,
1118 Meta,
1119 Path,
1120 Vis,
1121 TT,
1122}
1123
1124impl NonterminalKind {
1125 /// The `edition` closure is used to get the edition for the given symbol. Doing
1126 /// `span.edition()` is expensive, so we do it lazily.
1127 pub fn from_symbol(
1128 symbol: Symbol,
1129 edition: impl FnOnce() -> Edition,
1130 ) -> Option<NonterminalKind> {
1131 Some(match symbol {
1132 sym::item => NonterminalKind::Item,
1133 sym::block => NonterminalKind::Block,
1134 sym::stmt => NonterminalKind::Stmt,
1135 sym::pat => {
1136 if edition().at_least_rust_2021() {
1137 NonterminalKind::Pat(PatWithOr)
1138 } else {
1139 NonterminalKind::Pat(PatParam { inferred: true })
1140 }
1141 }
1142 sym::pat_param => NonterminalKind::Pat(PatParam { inferred: false }),
1143 sym::expr => {
1144 if edition().at_least_rust_2024() {
1145 NonterminalKind::Expr(Expr)
1146 } else {
1147 NonterminalKind::Expr(Expr2021 { inferred: true })
1148 }
1149 }
1150 sym::expr_2021 => NonterminalKind::Expr(Expr2021 { inferred: false }),
1151 sym::ty => NonterminalKind::Ty,
1152 sym::ident => NonterminalKind::Ident,
1153 sym::lifetime => NonterminalKind::Lifetime,
1154 sym::literal => NonterminalKind::Literal,
1155 sym::meta => NonterminalKind::Meta,
1156 sym::path => NonterminalKind::Path,
1157 sym::vis => NonterminalKind::Vis,
1158 sym::tt => NonterminalKind::TT,
1159 _ => return None,
1160 })
1161 }
1162
1163 fn symbol(self) -> Symbol {
1164 match self {
1165 NonterminalKind::Item => sym::item,
1166 NonterminalKind::Block => sym::block,
1167 NonterminalKind::Stmt => sym::stmt,
1168 NonterminalKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
1169 NonterminalKind::Pat(PatParam { inferred: false }) => sym::pat_param,
1170 NonterminalKind::Expr(Expr2021 { inferred: true } | Expr) => sym::expr,
1171 NonterminalKind::Expr(Expr2021 { inferred: false }) => sym::expr_2021,
1172 NonterminalKind::Ty => sym::ty,
1173 NonterminalKind::Ident => sym::ident,
1174 NonterminalKind::Lifetime => sym::lifetime,
1175 NonterminalKind::Literal => sym::literal,
1176 NonterminalKind::Meta => sym::meta,
1177 NonterminalKind::Path => sym::path,
1178 NonterminalKind::Vis => sym::vis,
1179 NonterminalKind::TT => sym::tt,
1180 }
1181 }
1182}
1183
1184impl fmt::Display for NonterminalKind {
1185 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1186 write!(f, "{}", self.symbol())
1187 }
1188}
1189
1190impl Nonterminal {
1191 pub fn use_span(&self) -> Span {
1192 match self {
1193 NtBlock(block: &P) => block.span,
1194 NtExpr(expr: &P) | NtLiteral(expr: &P) => expr.span,
1195 }
1196 }
1197
1198 pub fn descr(&self) -> &'static str {
1199 match self {
1200 NtBlock(..) => "block",
1201 NtExpr(..) => "expression",
1202 NtLiteral(..) => "literal",
1203 }
1204 }
1205}
1206
1207impl PartialEq for Nonterminal {
1208 fn eq(&self, _rhs: &Self) -> bool {
1209 // FIXME: Assume that all nonterminals are not equal, we can't compare them
1210 // correctly based on data from AST. This will prevent them from matching each other
1211 // in macros. The comparison will become possible only when each nonterminal has an
1212 // attached token stream from which it was parsed.
1213 false
1214 }
1215}
1216
1217impl fmt::Debug for Nonterminal {
1218 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1219 match *self {
1220 NtBlock(..) => f.pad("NtBlock(..)"),
1221 NtExpr(..) => f.pad("NtExpr(..)"),
1222 NtLiteral(..) => f.pad("NtLiteral(..)"),
1223 }
1224 }
1225}
1226
1227impl<CTX> HashStable<CTX> for Nonterminal
1228where
1229 CTX: crate::HashStableContext,
1230{
1231 fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
1232 panic!("interpolated tokens should not be present in the HIR")
1233 }
1234}
1235
1236// Some types are used a lot. Make sure they don't unintentionally get bigger.
1237#[cfg(target_pointer_width = "64")]
1238mod size_asserts {
1239 use rustc_data_structures::static_assert_size;
1240
1241 use super::*;
1242 // tidy-alphabetical-start
1243 static_assert_size!(Lit, 12);
1244 static_assert_size!(LitKind, 2);
1245 static_assert_size!(Nonterminal, 16);
1246 static_assert_size!(Token, 24);
1247 static_assert_size!(TokenKind, 16);
1248 // tidy-alphabetical-end
1249}
1250

Provided by KDAB

Privacy Policy
Learn Rust with the experts
Find out more