1use std::borrow::Cow;2use std::fmt;34pub use LitKind::*;5pub use NtExprKind::*;6pub use NtPatKind::*;7pub use TokenKind::*;8use rustc_macros::{Decodable, Encodable, StableHash};9use rustc_span::edition::Edition;10use rustc_span::symbol::IdentPrintMode;11use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, kw, sym};12#[allow(clippy::useless_attribute)] // FIXME: following use of `hidden_glob_reexports` incorrectly triggers `useless_attribute` lint.13#[allow(hidden_glob_reexports)]14use rustc_span::{Ident, Symbol};1516use crate::ast;17use crate::util::case::Case;1819/// Represents the kind of doc comment it is, ie `///` or `#[doc = ""]`.20#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Debug, StableHash)]21pub enum DocFragmentKind {22 /// A sugared doc comment: `///` or `//!` or `/**` or `/*!`.23 Sugared(CommentKind),24 /// A "raw" doc comment: `#[doc = ""]`. The `Span` represents the string literal.25 Raw(Span),26}2728impl DocFragmentKind {29 pub fn is_sugared(self) -> bool {30 matches!(self, Self::Sugared(_))31 }3233 /// If it is `Sugared`, it will return its associated `CommentKind`, otherwise it will return34 /// `CommentKind::Line`.35 pub fn comment_kind(self) -> CommentKind {36 match self {37 Self::Sugared(kind) => kind,38 Self::Raw(_) => CommentKind::Line,39 }40 }41}4243#[derive(Clone, Copy, PartialEq, Eq, Hash, Encodable, Decodable, Debug, StableHash)]44pub enum CommentKind {45 Line,46 Block,47}4849#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Encodable, Decodable, StableHash)]50pub enum InvisibleOrigin {51 // From the expansion of a metavariable in a declarative macro.52 MetaVar(MetaVarKind),5354 // Converted from `proc_macro::Delimiter` in55 // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro.56 ProcMacro,57}5859impl InvisibleOrigin {60 // Should the parser skip these invisible delimiters? Ideally this function61 // will eventually disappear and no invisible delimiters will be skipped.62 #[inline]63 pub fn skip(&self) -> bool {64 match self {65 InvisibleOrigin::MetaVar(_) => false,66 InvisibleOrigin::ProcMacro => true,67 }68 }69}7071/// Annoyingly similar to `NonterminalKind`, but the slight differences are important.72#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, StableHash)]73pub enum MetaVarKind {74 Item,75 Block,76 Stmt,77 Pat(NtPatKind),78 Expr {79 kind: NtExprKind,80 // This field is needed for `Token::can_begin_literal_maybe_minus`.81 can_begin_literal_maybe_minus: bool,82 // This field is needed for `Token::can_begin_string_literal`.83 can_begin_string_literal: bool,84 },85 Ty {86 is_path: bool,87 },88 Ident,89 Lifetime,90 Literal,91 Meta {92 /// Will `AttrItem::meta` succeed on this, if reparsed?93 has_meta_form: bool,94 },95 Path,96 Vis,97 Guard,98 TT,99}100101impl fmt::Display for MetaVarKind {102 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {103 let sym = match self {104 MetaVarKind::Item => sym::item,105 MetaVarKind::Block => sym::block,106 MetaVarKind::Stmt => sym::stmt,107 MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,108 MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param,109 MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr,110 MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021,111 MetaVarKind::Ty { .. } => sym::ty,112 MetaVarKind::Ident => sym::ident,113 MetaVarKind::Lifetime => sym::lifetime,114 MetaVarKind::Literal => sym::literal,115 MetaVarKind::Meta { .. } => sym::meta,116 MetaVarKind::Path => sym::path,117 MetaVarKind::Vis => sym::vis,118 MetaVarKind::Guard => sym::guard,119 MetaVarKind::TT => sym::tt,120 };121 write!(f, "{sym}")122 }123}124125/// Describes how a sequence of token trees is delimited.126/// Cannot use `proc_macro::Delimiter` directly because this127/// structure should implement some additional traits.128#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Encodable, Decodable, StableHash)]129pub enum Delimiter {130 /// `( ... )`131 Parenthesis,132 /// `{ ... }`133 Brace,134 /// `[ ... ]`135 Bracket,136 /// `∅ ... ∅`137 /// An invisible delimiter, that may, for example, appear around tokens coming from a138 /// "macro variable" `$var`. It is important to preserve operator priorities in cases like139 /// `$var * 3` where `$var` is `1 + 2`.140 /// Invisible delimiters might not survive roundtrip of a token stream through a string.141 Invisible(InvisibleOrigin),142}143144impl Delimiter {145 // Should the parser skip these delimiters? Only happens for certain kinds146 // of invisible delimiters. Ideally this function will eventually disappear147 // and no invisible delimiters will be skipped.148 #[inline]149 pub fn skip(&self) -> bool {150 match self {151 Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false,152 Delimiter::Invisible(origin) => origin.skip(),153 }154 }155156 // This exists because `InvisibleOrigin`s should not be compared. It is only used for157 // assertions.158 pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool {159 match (self, other) {160 (Delimiter::Parenthesis, Delimiter::Parenthesis) => true,161 (Delimiter::Brace, Delimiter::Brace) => true,162 (Delimiter::Bracket, Delimiter::Bracket) => true,163 (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true,164 _ => false,165 }166 }167168 pub fn as_open_token_kind(&self) -> TokenKind {169 match *self {170 Delimiter::Parenthesis => OpenParen,171 Delimiter::Brace => OpenBrace,172 Delimiter::Bracket => OpenBracket,173 Delimiter::Invisible(origin) => OpenInvisible(origin),174 }175 }176177 pub fn as_close_token_kind(&self) -> TokenKind {178 match *self {179 Delimiter::Parenthesis => CloseParen,180 Delimiter::Brace => CloseBrace,181 Delimiter::Bracket => CloseBracket,182 Delimiter::Invisible(origin) => CloseInvisible(origin),183 }184 }185}186187// Note that the suffix is *not* considered when deciding the `LitKind` in this188// type. This means that float literals like `1f32` are classified by this type189// as `Int`. Only upon conversion to `ast::LitKind` will such a literal be190// given the `Float` kind.191#[derive(Clone, Copy, PartialEq, Eq, Hash, Encodable, Decodable, Debug, StableHash)]192pub enum LitKind {193 Bool, // AST only, must never appear in a `Token`194 Byte,195 Char,196 Integer, // e.g. `1`, `1u8`, `1f32`197 Float, // e.g. `1.`, `1.0`, `1e3f32`198 Str,199 StrRaw(u8), // raw string delimited by `n` hash symbols200 ByteStr,201 ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols202 CStr,203 CStrRaw(u8),204 Err(ErrorGuaranteed),205}206207/// A literal token.208#[derive(Clone, Copy, PartialEq, Eq, Hash, Encodable, Decodable, Debug, StableHash)]209pub struct Lit {210 pub kind: LitKind,211 pub symbol: Symbol,212 pub suffix: Option<Symbol>,213}214215impl Lit {216 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {217 Lit { kind, symbol, suffix }218 }219220 /// Returns `true` if this is semantically a float literal. This includes221 /// ones like `1f32` that have an `Integer` kind but a float suffix.222 pub fn is_semantic_float(&self) -> bool {223 match self.kind {224 LitKind::Float => true,225 LitKind::Integer => match self.suffix {226 Some(sym) => sym == sym::f32 || sym == sym::f64,227 None => false,228 },229 _ => false,230 }231 }232233 /// Keep this in sync with `Token::can_begin_literal_maybe_minus` and234 /// `Parser::eat_token_lit` (excluding unary negation).235 pub fn from_token(token: &Token) -> Option<Lit> {236 match token.uninterpolate().kind {237 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => Some(Lit::new(Bool, name, None)),238 Literal(token_lit) => Some(token_lit),239 OpenInvisible(InvisibleOrigin::MetaVar(240 MetaVarKind::Literal | MetaVarKind::Expr { .. },241 )) => {242 // Unreachable with the current test suite.243 panic!("from_token metavar");244 }245 _ => None,246 }247 }248}249250impl fmt::Display for Lit {251 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {252 let Lit { kind, symbol, suffix } = *self;253 match kind {254 Byte => write!(f, "b'{symbol}'")?,255 Char => write!(f, "'{symbol}'")?,256 Str => write!(f, "\"{symbol}\"")?,257 StrRaw(n) => write!(258 f,259 "r{delim}\"{string}\"{delim}",260 delim = "#".repeat(n as usize),261 string = symbol262 )?,263 ByteStr => write!(f, "b\"{symbol}\"")?,264 ByteStrRaw(n) => write!(265 f,266 "br{delim}\"{string}\"{delim}",267 delim = "#".repeat(n as usize),268 string = symbol269 )?,270 CStr => write!(f, "c\"{symbol}\"")?,271 CStrRaw(n) => {272 write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?273 }274 Integer | Float | Bool | Err(_) => write!(f, "{symbol}")?,275 }276277 if let Some(suffix) = suffix {278 write!(f, "{suffix}")?;279 }280281 Ok(())282 }283}284285impl LitKind {286 /// An English article for the literal token kind.287 pub fn article(self) -> &'static str {288 match self {289 Integer | Err(_) => "an",290 _ => "a",291 }292 }293294 pub fn descr(self) -> &'static str {295 match self {296 Bool => "boolean",297 Byte => "byte",298 Char => "char",299 Integer => "integer",300 Float => "float",301 Str | StrRaw(..) => "string",302 ByteStr | ByteStrRaw(..) => "byte string",303 CStr | CStrRaw(..) => "C string",304 Err(_) => "error",305 }306 }307308 pub(crate) fn may_have_suffix(self) -> bool {309 matches!(self, Integer | Float | Err(_))310 }311}312313pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {314 let ident_token = Token::new(Ident(name, is_raw), span);315316 !ident_token.is_reserved_ident()317 || ident_token.is_path_segment_keyword()318 || [319 kw::Async,320 kw::Do,321 kw::Box,322 kw::Break,323 kw::Const,324 kw::Continue,325 kw::False,326 kw::For,327 kw::Gen,328 kw::If,329 kw::Let,330 kw::Loop,331 kw::Match,332 kw::Move,333 kw::Return,334 kw::True,335 kw::Try,336 kw::Unsafe,337 kw::While,338 kw::Yield,339 kw::Safe,340 kw::Static,341 ]342 .contains(&name)343}344345fn ident_can_begin_type(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {346 let ident_token = Token::new(Ident(name, is_raw), span);347348 !ident_token.is_reserved_ident()349 || ident_token.is_path_segment_keyword()350 || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]351 .contains(&name)352}353354#[derive(PartialEq, Eq, Encodable, Decodable, Hash, Debug, Copy, Clone, StableHash)]355pub enum IdentIsRaw {356 No,357 Yes,358}359360impl IdentIsRaw {361 pub fn to_print_mode_ident(self) -> IdentPrintMode {362 match self {363 IdentIsRaw::No => IdentPrintMode::Normal,364 IdentIsRaw::Yes => IdentPrintMode::RawIdent,365 }366 }367 pub fn to_print_mode_lifetime(self) -> IdentPrintMode {368 match self {369 IdentIsRaw::No => IdentPrintMode::Normal,370 IdentIsRaw::Yes => IdentPrintMode::RawLifetime,371 }372 }373}374375impl From<bool> for IdentIsRaw {376 fn from(b: bool) -> Self {377 if b { Self::Yes } else { Self::No }378 }379}380381#[derive(Clone, Copy, PartialEq, Eq, Hash, Encodable, Decodable, Debug, StableHash)]382pub enum TokenKind {383 /* Expression-operator symbols. */384 /// `=`385 Eq,386 /// `<`387 Lt,388 /// `<=`389 Le,390 /// `==`391 EqEq,392 /// `!=`393 Ne,394 /// `>=`395 Ge,396 /// `>`397 Gt,398 /// `&&`399 AndAnd,400 /// `||`401 OrOr,402 /// `!`403 Bang,404 /// `~`405 Tilde,406 // `+`407 Plus,408 // `-`409 Minus,410 // `*`411 Star,412 // `/`413 Slash,414 // `%`415 Percent,416 // `^`417 Caret,418 // `&`419 And,420 // `|`421 Or,422 // `<<`423 Shl,424 // `>>`425 Shr,426 // `+=`427 PlusEq,428 // `-=`429 MinusEq,430 // `*=`431 StarEq,432 // `/=`433 SlashEq,434 // `%=`435 PercentEq,436 // `^=`437 CaretEq,438 // `&=`439 AndEq,440 // `|=`441 OrEq,442 // `<<=`443 ShlEq,444 // `>>=`445 ShrEq,446447 /* Structural symbols */448 /// `@`449 At,450 /// `.`451 Dot,452 /// `..`453 DotDot,454 /// `...`455 DotDotDot,456 /// `..=`457 DotDotEq,458 /// `,`459 Comma,460 /// `;`461 Semi,462 /// `:`463 Colon,464 /// `::`465 PathSep,466 /// `->`467 RArrow,468 /// `<-`469 LArrow,470 /// `=>`471 FatArrow,472 /// `#`473 Pound,474 /// `$`475 Dollar,476 /// `?`477 Question,478 /// Used by proc macros for representing lifetimes, not generated by lexer right now.479 SingleQuote,480 /// `(`481 OpenParen,482 /// `)`483 CloseParen,484 /// `{`485 OpenBrace,486 /// `}`487 CloseBrace,488 /// `[`489 OpenBracket,490 /// `]`491 CloseBracket,492 /// Invisible opening delimiter, produced by a macro.493 OpenInvisible(InvisibleOrigin),494 /// Invisible closing delimiter, produced by a macro.495 CloseInvisible(InvisibleOrigin),496497 /* Literals */498 Literal(Lit),499500 /// Identifier token.501 /// Do not forget about `NtIdent` when you want to match on identifiers.502 /// It's recommended to use `Token::{ident,uninterpolate}` and503 /// `Parser::token_uninterpolated_span` to treat regular and interpolated504 /// identifiers in the same way.505 Ident(Symbol, IdentIsRaw),506 /// This identifier (and its span) is the identifier passed to the507 /// declarative macro. The span in the surrounding `Token` is the span of508 /// the `ident` metavariable in the macro's RHS.509 NtIdent(Ident, IdentIsRaw),510511 /// Lifetime identifier token.512 /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.513 /// It's recommended to use `Token::{ident,uninterpolate}` and514 /// `Parser::token_uninterpolated_span` to treat regular and interpolated515 /// identifiers in the same way.516 Lifetime(Symbol, IdentIsRaw),517 /// This identifier (and its span) is the lifetime passed to the518 /// declarative macro. The span in the surrounding `Token` is the span of519 /// the `lifetime` metavariable in the macro's RHS.520 NtLifetime(Ident, IdentIsRaw),521522 /// A doc comment token.523 /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)524 /// similarly to symbols in string literal tokens.525 DocComment(CommentKind, ast::AttrStyle, Symbol),526527 /// End Of File528 Eof,529}530531#[derive(Clone, Copy, PartialEq, Eq, Hash, Encodable, Decodable, Debug, StableHash)]532pub struct Token {533 pub kind: TokenKind,534 pub span: Span,535}536537impl TokenKind {538 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {539 Literal(Lit::new(kind, symbol, suffix))540 }541542 /// An approximation to proc-macro-style single-character operators used by543 /// rustc parser. If the operator token can be broken into two tokens, the544 /// first of which has `n` (1 or 2) chars, then this function performs that545 /// operation, otherwise it returns `None`.546 pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {547 assert!(n == 1 || n == 2);548 Some(match (self, n) {549 (Le, 1) => (Lt, Eq),550 (EqEq, 1) => (Eq, Eq),551 (Ne, 1) => (Bang, Eq),552 (Ge, 1) => (Gt, Eq),553 (AndAnd, 1) => (And, And),554 (OrOr, 1) => (Or, Or),555 (Shl, 1) => (Lt, Lt),556 (Shr, 1) => (Gt, Gt),557 (PlusEq, 1) => (Plus, Eq),558 (MinusEq, 1) => (Minus, Eq),559 (StarEq, 1) => (Star, Eq),560 (SlashEq, 1) => (Slash, Eq),561 (PercentEq, 1) => (Percent, Eq),562 (CaretEq, 1) => (Caret, Eq),563 (AndEq, 1) => (And, Eq),564 (OrEq, 1) => (Or, Eq),565 (ShlEq, 1) => (Lt, Le), // `<` + `<=`566 (ShlEq, 2) => (Shl, Eq), // `<<` + `=`567 (ShrEq, 1) => (Gt, Ge), // `>` + `>=`568 (ShrEq, 2) => (Shr, Eq), // `>>` + `=`569 (DotDot, 1) => (Dot, Dot),570 (DotDotDot, 1) => (Dot, DotDot), // `.` + `..`571 (DotDotDot, 2) => (DotDot, Dot), // `..` + `.`572 (DotDotEq, 2) => (DotDot, Eq),573 (PathSep, 1) => (Colon, Colon),574 (RArrow, 1) => (Minus, Gt),575 (LArrow, 1) => (Lt, Minus),576 (FatArrow, 1) => (Eq, Gt),577 _ => return None,578 })579 }580581 /// Returns tokens that are likely to be typed accidentally instead of the current token.582 /// Enables better error recovery when the wrong token is found.583 pub fn similar_tokens(&self) -> &[TokenKind] {584 match self {585 Comma => &[Dot, Lt, Semi],586 Semi => &[Colon, Comma],587 Colon => &[Semi],588 FatArrow => &[Eq, RArrow, Ge, Gt],589 _ => &[],590 }591 }592593 pub fn should_end_const_arg(&self) -> bool {594 matches!(self, Gt | Ge | Shr | ShrEq)595 }596597 pub fn is_delim(&self) -> bool {598 self.open_delim().is_some() || self.close_delim().is_some()599 }600601 pub fn open_delim(&self) -> Option<Delimiter> {602 match *self {603 OpenParen => Some(Delimiter::Parenthesis),604 OpenBrace => Some(Delimiter::Brace),605 OpenBracket => Some(Delimiter::Bracket),606 OpenInvisible(origin) => Some(Delimiter::Invisible(origin)),607 _ => None,608 }609 }610611 pub fn close_delim(&self) -> Option<Delimiter> {612 match *self {613 CloseParen => Some(Delimiter::Parenthesis),614 CloseBrace => Some(Delimiter::Brace),615 CloseBracket => Some(Delimiter::Bracket),616 CloseInvisible(origin) => Some(Delimiter::Invisible(origin)),617 _ => None,618 }619 }620621 pub fn is_close_delim_or_eof(&self) -> bool {622 match self {623 CloseParen | CloseBrace | CloseBracket | CloseInvisible(_) | Eof => true,624 _ => false,625 }626 }627}628629impl Token {630 pub const fn new(kind: TokenKind, span: Span) -> Self {631 Token { kind, span }632 }633634 /// Some token that will be thrown away later.635 pub const fn dummy() -> Self {636 Token::new(TokenKind::Question, DUMMY_SP)637 }638639 /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.640 pub fn from_ast_ident(ident: Ident) -> Self {641 Token::new(Ident(ident.name, ident.is_raw_guess().into()), ident.span)642 }643644 pub fn is_range_separator(&self) -> bool {645 [DotDot, DotDotDot, DotDotEq].contains(&self.kind)646 }647648 pub fn is_punct(&self) -> bool {649 match self.kind {650 Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Bang | Tilde | Plus | Minus651 | Star | Slash | Percent | Caret | And | Or | Shl | Shr | PlusEq | MinusEq | StarEq652 | SlashEq | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | Dot | DotDot653 | DotDotDot | DotDotEq | Comma | Semi | Colon | PathSep | RArrow | LArrow654 | FatArrow | Pound | Dollar | Question | SingleQuote => true,655656 OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket657 | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | DocComment(..) | Ident(..)658 | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Eof => false,659 }660 }661662 pub fn is_like_plus(&self) -> bool {663 matches!(self.kind, Plus | PlusEq)664 }665666 /// Returns `true` if the token can appear at the start of an expression.667 ///668 /// **NB**: Take care when modifying this function, since it will change669 /// the stable set of tokens that are allowed to match an expr nonterminal.670 pub fn can_begin_expr(&self) -> bool {671 match self.uninterpolate().kind {672 Ident(name, is_raw) =>673 ident_can_begin_expr(name, self.span, is_raw), // value name or keyword674 OpenParen | // tuple675 OpenBrace | // block676 OpenBracket | // array677 Literal(..) | // literal678 Bang | // operator not679 Minus | // unary minus680 Star | // dereference681 Or | OrOr | // closure682 And | // reference683 AndAnd | // double reference684 // DotDotDot is no longer supported, but we need some way to display the error685 DotDot | DotDotDot | DotDotEq | // range notation686 Lt | Shl | // associated path687 PathSep | // global path688 Lifetime(..) | // labeled loop689 Pound => true, // expression attributes690 OpenInvisible(InvisibleOrigin::MetaVar(691 MetaVarKind::Block |692 MetaVarKind::Expr { .. } |693 MetaVarKind::Literal |694 MetaVarKind::Path695 )) => true,696 _ => false,697 }698 }699700 /// Returns `true` if the token can appear at the start of a pattern.701 ///702 /// Shamelessly borrowed from `can_begin_expr`, only used for diagnostics right now.703 pub fn can_begin_pattern(&self, pat_kind: NtPatKind) -> bool {704 match &self.uninterpolate().kind {705 // box, ref, mut, and other identifiers (can stricten)706 Ident(..) | NtIdent(..) |707 OpenParen | // tuple pattern708 OpenBracket | // slice pattern709 And | // reference710 Minus | // negative literal711 AndAnd | // double reference712 Literal(_) | // literal713 DotDot | // range pattern (future compat)714 DotDotDot | // range pattern (future compat)715 PathSep | // path716 Lt | // path (UFCS constant)717 Shl => true, // path (double UFCS)718 Or => matches!(pat_kind, PatWithOr), // leading vert `|` or-pattern719 OpenInvisible(InvisibleOrigin::MetaVar(720 MetaVarKind::Expr { .. } |721 MetaVarKind::Literal |722 MetaVarKind::Meta { .. } |723 MetaVarKind::Pat(_) |724 MetaVarKind::Path |725 MetaVarKind::Ty { .. }726 )) => true,727 _ => false,728 }729 }730731 /// Returns `true` if the token can appear at the start of a type.732 pub fn can_begin_type(&self) -> bool {733 match self.uninterpolate().kind {734 Ident(name, is_raw) =>735 ident_can_begin_type(name, self.span, is_raw), // type name or keyword736 OpenParen | // tuple737 OpenBracket | // array738 Bang | // never739 Star | // raw pointer740 And | // reference741 AndAnd | // double reference742 Question | // maybe bound in trait object743 Lifetime(..) | // lifetime bound in trait object744 Lt | Shl | // associated path745 PathSep => true, // global path746 OpenInvisible(InvisibleOrigin::MetaVar(747 MetaVarKind::Ty { .. } |748 MetaVarKind::Path749 )) => true,750 // For anonymous structs or unions, which only appear in specific positions751 // (type of struct fields or union fields), we don't consider them as regular types752 _ => false,753 }754 }755756 /// Returns `true` if the token can appear at the start of a const param.757 pub fn can_begin_const_arg(&self) -> bool {758 match self.kind {759 OpenBrace | Literal(..) | Minus => true,760 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,761 OpenInvisible(InvisibleOrigin::MetaVar(762 MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal,763 )) => true,764 _ => false,765 }766 }767768 /// Returns `true` if the token can appear at the start of an item.769 pub fn can_begin_item(&self) -> bool {770 match self.kind {771 Ident(name, _) => [772 kw::Fn,773 kw::Use,774 kw::Struct,775 kw::Enum,776 kw::Pub,777 kw::Trait,778 kw::Extern,779 kw::Impl,780 kw::Unsafe,781 kw::Const,782 kw::Safe,783 kw::Static,784 kw::Union,785 kw::Macro,786 kw::Mod,787 kw::Type,788 ]789 .contains(&name),790 _ => false,791 }792 }793794 /// Returns `true` if the token is any literal.795 pub fn is_lit(&self) -> bool {796 matches!(self.kind, Literal(..))797 }798799 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,800 /// for example a '-42', or one of the boolean idents).801 ///802 /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?803 ///804 /// Keep this in sync with `Lit::from_token` and `Parser::eat_token_lit`805 /// (excluding unary negation).806 pub fn can_begin_literal_maybe_minus(&self) -> bool {807 match self.uninterpolate().kind {808 Literal(..) | Minus => true,809 Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,810 OpenInvisible(InvisibleOrigin::MetaVar(mv_kind)) => match mv_kind {811 MetaVarKind::Literal => true,812 MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => {813 can_begin_literal_maybe_minus814 }815 _ => false,816 },817 _ => false,818 }819 }820821 pub fn can_begin_string_literal(&self) -> bool {822 match self.uninterpolate().kind {823 Literal(..) => true,824 OpenInvisible(InvisibleOrigin::MetaVar(mv_kind)) => match mv_kind {825 MetaVarKind::Literal => true,826 MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal,827 _ => false,828 },829 _ => false,830 }831 }832833 /// A convenience function for matching on identifiers during parsing.834 /// Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token835 /// into the regular identifier or lifetime token it refers to,836 /// otherwise returns the original token.837 pub fn uninterpolate(&self) -> Cow<'_, Token> {838 match self.kind {839 NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)),840 NtLifetime(ident, is_raw) => {841 Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span))842 }843 _ => Cow::Borrowed(self),844 }845 }846847 /// Returns an identifier if this token is an identifier.848 #[inline]849 pub fn ident(&self) -> Option<(Ident, IdentIsRaw)> {850 // We avoid using `Token::uninterpolate` here because it's slow.851 match self.kind {852 Ident(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),853 NtIdent(ident, is_raw) => Some((ident, is_raw)),854 _ => None,855 }856 }857858 /// Returns a lifetime identifier if this token is a lifetime.859 #[inline]860 pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> {861 // We avoid using `Token::uninterpolate` here because it's slow.862 match self.kind {863 Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),864 NtLifetime(ident, is_raw) => Some((ident, is_raw)),865 _ => None,866 }867 }868869 /// Returns `true` if the token is an identifier.870 pub fn is_ident(&self) -> bool {871 self.ident().is_some()872 }873874 /// Returns `true` if the token is a lifetime.875 pub fn is_lifetime(&self) -> bool {876 self.lifetime().is_some()877 }878879 /// Returns `true` if the token is an identifier whose name is the given880 /// string slice.881 pub fn is_ident_named(&self, name: Symbol) -> bool {882 self.ident().is_some_and(|(ident, _)| ident.name == name)883 }884885 /// Is this a pre-parsed expression dropped into the token stream886 /// (which happens while parsing the result of macro expansion)?887 pub fn is_metavar_expr(&self) -> bool {888 matches!(889 self.is_metavar_seq(),890 Some(891 MetaVarKind::Expr { .. }892 | MetaVarKind::Literal893 | MetaVarKind::Path894 | MetaVarKind::Block895 )896 )897 }898899 /// Are we at a block from a metavar (`$b:block`)?900 pub fn is_metavar_block(&self) -> bool {901 matches!(self.is_metavar_seq(), Some(MetaVarKind::Block))902 }903904 /// Returns `true` if the token is either the `mut` or `const` keyword.905 pub fn is_mutability(&self) -> bool {906 self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)907 }908909 pub fn is_qpath_start(&self) -> bool {910 matches!(self.kind, Lt | Shl)911 }912913 pub fn is_path_start(&self) -> bool {914 self.kind == PathSep915 || self.is_qpath_start()916 || matches!(self.is_metavar_seq(), Some(MetaVarKind::Path))917 || self.is_path_segment_keyword()918 || self.is_non_reserved_ident()919 }920921 /// Returns `true` if the token is a given keyword, `kw`.922 pub fn is_keyword(&self, kw: Symbol) -> bool {923 self.is_non_raw_ident_where(|id| id.name == kw)924 }925926 /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this927 /// token is an identifier equal to `kw` ignoring the case.928 pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {929 self.is_keyword(kw)930 || (case == Case::Insensitive931 && self.is_non_raw_ident_where(|id| {932 // Do an ASCII case-insensitive match, because all keywords are ASCII.933 id.name.as_str().eq_ignore_ascii_case(kw.as_str())934 }))935 }936937 pub fn is_path_segment_keyword(&self) -> bool {938 self.is_non_raw_ident_where(Ident::is_path_segment_keyword)939 }940941 /// Returns true for reserved identifiers used internally for elided lifetimes,942 /// unnamed method parameters, crate root module, error recovery etc.943 pub fn is_special_ident(&self) -> bool {944 self.is_non_raw_ident_where(Ident::is_special)945 }946947 /// Returns `true` if the token is a keyword used in the language.948 pub fn is_used_keyword(&self) -> bool {949 self.is_non_raw_ident_where(Ident::is_used_keyword)950 }951952 /// Returns `true` if the token is a keyword reserved for possible future use.953 pub fn is_unused_keyword(&self) -> bool {954 self.is_non_raw_ident_where(Ident::is_unused_keyword)955 }956957 /// Returns `true` if the token is either a special identifier or a keyword.958 pub fn is_reserved_ident(&self) -> bool {959 self.is_non_raw_ident_where(Ident::is_reserved)960 }961962 pub fn is_non_reserved_ident(&self) -> bool {963 self.ident().is_some_and(|(id, raw)| raw == IdentIsRaw::Yes || !Ident::is_reserved(id))964 }965966 /// Returns `true` if the token is the identifier `true` or `false`.967 pub fn is_bool_lit(&self) -> bool {968 self.is_non_raw_ident_where(|id| id.name.is_bool_lit())969 }970971 pub fn is_numeric_lit(&self) -> bool {972 matches!(973 self.kind,974 Literal(Lit { kind: LitKind::Integer, .. }) | Literal(Lit { kind: LitKind::Float, .. })975 )976 }977978 /// Returns `true` if the token is the integer literal.979 pub fn is_integer_lit(&self) -> bool {980 matches!(self.kind, Literal(Lit { kind: LitKind::Integer, .. }))981 }982983 /// Returns `true` if the token is a non-raw identifier for which `pred` holds.984 pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {985 match self.ident() {986 Some((id, IdentIsRaw::No)) => pred(id),987 _ => false,988 }989 }990991 /// Is this an invisible open delimiter at the start of a token sequence992 /// from an expanded metavar?993 pub fn is_metavar_seq(&self) -> Option<MetaVarKind> {994 match self.kind {995 OpenInvisible(InvisibleOrigin::MetaVar(kind)) => Some(kind),996 _ => None,997 }998 }9991000 pub fn glue(&self, joint: &Token) -> Option<Token> {1001 let kind = match (&self.kind, &joint.kind) {1002 (Eq, Eq) => EqEq,1003 (Eq, Gt) => FatArrow,1004 (Eq, _) => return None,10051006 (Lt, Eq) => Le,1007 (Lt, Lt) => Shl,1008 (Lt, Le) => ShlEq,1009 (Lt, Minus) => LArrow,1010 (Lt, _) => return None,10111012 (Gt, Eq) => Ge,1013 (Gt, Gt) => Shr,1014 (Gt, Ge) => ShrEq,1015 (Gt, _) => return None,10161017 (Bang, Eq) => Ne,1018 (Bang, _) => return None,10191020 (Plus, Eq) => PlusEq,1021 (Plus, _) => return None,10221023 (Minus, Eq) => MinusEq,1024 (Minus, Gt) => RArrow,1025 (Minus, _) => return None,10261027 (Star, Eq) => StarEq,1028 (Star, _) => return None,10291030 (Slash, Eq) => SlashEq,1031 (Slash, _) => return None,10321033 (Percent, Eq) => PercentEq,1034 (Percent, _) => return None,10351036 (Caret, Eq) => CaretEq,1037 (Caret, _) => return None,10381039 (And, Eq) => AndEq,1040 (And, And) => AndAnd,1041 (And, _) => return None,10421043 (Or, Eq) => OrEq,1044 (Or, Or) => OrOr,1045 (Or, _) => return None,10461047 (Shl, Eq) => ShlEq,1048 (Shl, _) => return None,10491050 (Shr, Eq) => ShrEq,1051 (Shr, _) => return None,10521053 (Dot, Dot) => DotDot,1054 (Dot, DotDot) => DotDotDot,1055 (Dot, _) => return None,10561057 (DotDot, Dot) => DotDotDot,1058 (DotDot, Eq) => DotDotEq,1059 (DotDot, _) => return None,10601061 (Colon, Colon) => PathSep,1062 (Colon, _) => return None,10631064 (SingleQuote, Ident(name, is_raw)) => {1065 Lifetime(Symbol::intern(&format!("'{name}")), *is_raw)1066 }1067 (SingleQuote, _) => return None,10681069 (1070 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | PlusEq | MinusEq | StarEq | SlashEq1071 | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | DotDotDot | DotDotEq1072 | Comma | Semi | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question1073 | OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket1074 | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | Ident(..) | NtIdent(..)1075 | Lifetime(..) | NtLifetime(..) | DocComment(..) | Eof,1076 _,1077 ) => {1078 return None;1079 }1080 };10811082 Some(Token::new(kind, self.span.to(joint.span)))1083 }1084}10851086impl PartialEq<TokenKind> for Token {1087 #[inline]1088 fn eq(&self, rhs: &TokenKind) -> bool {1089 self.kind == *rhs1090 }1091}10921093#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, StableHash)]1094pub enum NtPatKind {1095 // Matches or-patterns. Was written using `pat` in edition 2021 or later.1096 PatWithOr,1097 // Doesn't match or-patterns.1098 // - `inferred`: was written using `pat` in edition 2015 or 2018.1099 // - `!inferred`: was written using `pat_param`.1100 PatParam { inferred: bool },1101}11021103#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, StableHash)]1104pub enum NtExprKind {1105 // Matches expressions using the post-edition 2024. Was written using1106 // `expr` in edition 2024 or later.1107 Expr,1108 // Matches expressions using the pre-edition 2024 rules.1109 // - `inferred`: was written using `expr` in edition 2021 or earlier.1110 // - `!inferred`: was written using `expr_2021`.1111 Expr2021 { inferred: bool },1112}11131114/// A macro nonterminal, known in documentation as a fragment specifier.1115#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, StableHash)]1116pub enum NonterminalKind {1117 Item,1118 Block,1119 Stmt,1120 Pat(NtPatKind),1121 Expr(NtExprKind),1122 Ty,1123 Ident,1124 Lifetime,1125 Literal,1126 Meta,1127 Path,1128 Vis,1129 Guard,1130 TT,1131}11321133impl NonterminalKind {1134 /// The `edition` closure is used to get the edition for the given symbol. Doing1135 /// `span.edition()` is expensive, so we do it lazily.1136 pub fn from_symbol(1137 symbol: Symbol,1138 edition: impl FnOnce() -> Edition,1139 ) -> Option<NonterminalKind> {1140 Some(match symbol {1141 sym::item => NonterminalKind::Item,1142 sym::block => NonterminalKind::Block,1143 sym::stmt => NonterminalKind::Stmt,1144 sym::pat => {1145 if edition().at_least_rust_2021() {1146 NonterminalKind::Pat(PatWithOr)1147 } else {1148 NonterminalKind::Pat(PatParam { inferred: true })1149 }1150 }1151 sym::pat_param => NonterminalKind::Pat(PatParam { inferred: false }),1152 sym::expr => {1153 if edition().at_least_rust_2024() {1154 NonterminalKind::Expr(Expr)1155 } else {1156 NonterminalKind::Expr(Expr2021 { inferred: true })1157 }1158 }1159 sym::expr_2021 => NonterminalKind::Expr(Expr2021 { inferred: false }),1160 sym::ty => NonterminalKind::Ty,1161 sym::ident => NonterminalKind::Ident,1162 sym::lifetime => NonterminalKind::Lifetime,1163 sym::literal => NonterminalKind::Literal,1164 sym::meta => NonterminalKind::Meta,1165 sym::path => NonterminalKind::Path,1166 sym::vis => NonterminalKind::Vis,1167 sym::guard => NonterminalKind::Guard,1168 sym::tt => NonterminalKind::TT,1169 _ => return None,1170 })1171 }11721173 fn symbol(self) -> Symbol {1174 match self {1175 NonterminalKind::Item => sym::item,1176 NonterminalKind::Block => sym::block,1177 NonterminalKind::Stmt => sym::stmt,1178 NonterminalKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,1179 NonterminalKind::Pat(PatParam { inferred: false }) => sym::pat_param,1180 NonterminalKind::Expr(Expr2021 { inferred: true } | Expr) => sym::expr,1181 NonterminalKind::Expr(Expr2021 { inferred: false }) => sym::expr_2021,1182 NonterminalKind::Ty => sym::ty,1183 NonterminalKind::Ident => sym::ident,1184 NonterminalKind::Lifetime => sym::lifetime,1185 NonterminalKind::Literal => sym::literal,1186 NonterminalKind::Meta => sym::meta,1187 NonterminalKind::Path => sym::path,1188 NonterminalKind::Vis => sym::vis,1189 NonterminalKind::Guard => sym::guard,1190 NonterminalKind::TT => sym::tt,1191 }1192 }1193}11941195impl fmt::Display for NonterminalKind {1196 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {1197 write!(f, "{}", self.symbol())1198 }1199}12001201// Some types are used a lot. Make sure they don't unintentionally get bigger.1202#[cfg(target_pointer_width = "64")]1203mod size_asserts {1204 use rustc_data_structures::static_assert_size;12051206 use super::*;1207 // tidy-alphabetical-start1208 static_assert_size!(Lit, 12);1209 static_assert_size!(LitKind, 2);1210 static_assert_size!(Token, 24);1211 static_assert_size!(TokenKind, 16);1212 // tidy-alphabetical-end1213}