| 1 | //===--- LexerUtils.cpp - clang-tidy---------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "LexerUtils.h" |
| 10 | #include "clang/AST/AST.h" |
| 11 | #include "clang/Basic/SourceManager.h" |
| 12 | #include <optional> |
| 13 | #include <utility> |
| 14 | |
| 15 | namespace clang::tidy::utils::lexer { |
| 16 | |
| 17 | std::pair<Token, SourceLocation> |
| 18 | getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, |
| 19 | const LangOptions &LangOpts, bool ) { |
| 20 | const std::optional<Token> Tok = |
| 21 | Lexer::findPreviousToken(Loc: Location, SM, LangOpts, IncludeComments: !SkipComments); |
| 22 | |
| 23 | if (Tok.has_value()) { |
| 24 | return {*Tok, Lexer::GetBeginningOfToken(Loc: Tok->getLocation(), SM, LangOpts)}; |
| 25 | } |
| 26 | |
| 27 | Token Token; |
| 28 | Token.setKind(tok::unknown); |
| 29 | return {Token, SourceLocation()}; |
| 30 | } |
| 31 | |
| 32 | Token getPreviousToken(SourceLocation Location, const SourceManager &SM, |
| 33 | const LangOptions &LangOpts, bool ) { |
| 34 | auto [Token, Start] = |
| 35 | getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments); |
| 36 | return Token; |
| 37 | } |
| 38 | |
| 39 | SourceLocation findPreviousTokenStart(SourceLocation Start, |
| 40 | const SourceManager &SM, |
| 41 | const LangOptions &LangOpts) { |
| 42 | if (Start.isInvalid() || Start.isMacroID()) |
| 43 | return {}; |
| 44 | |
| 45 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
| 46 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
| 47 | return {}; |
| 48 | |
| 49 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
| 50 | } |
| 51 | |
| 52 | SourceLocation findPreviousTokenKind(SourceLocation Start, |
| 53 | const SourceManager &SM, |
| 54 | const LangOptions &LangOpts, |
| 55 | tok::TokenKind TK) { |
| 56 | if (Start.isInvalid() || Start.isMacroID()) |
| 57 | return {}; |
| 58 | |
| 59 | while (true) { |
| 60 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
| 61 | if (L.isInvalid() || L.isMacroID()) |
| 62 | return {}; |
| 63 | |
| 64 | Token T; |
| 65 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
| 66 | return {}; |
| 67 | |
| 68 | if (T.is(K: TK)) |
| 69 | return T.getLocation(); |
| 70 | |
| 71 | Start = L; |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, |
| 76 | const LangOptions &LangOpts) { |
| 77 | return findNextAnyTokenKind(Start, SM, LangOpts, TK: tok::comma, TKs: tok::semi); |
| 78 | } |
| 79 | |
| 80 | std::optional<Token> |
| 81 | (SourceLocation Start, const SourceManager &SM, |
| 82 | const LangOptions &LangOpts) { |
| 83 | while (Start.isValid()) { |
| 84 | std::optional<Token> CurrentToken = |
| 85 | Lexer::findNextToken(Loc: Start, SM, LangOpts); |
| 86 | if (!CurrentToken || !CurrentToken->is(K: tok::comment)) |
| 87 | return CurrentToken; |
| 88 | |
| 89 | Start = CurrentToken->getLocation(); |
| 90 | } |
| 91 | |
| 92 | return std::nullopt; |
| 93 | } |
| 94 | |
| 95 | bool rangeContainsExpansionsOrDirectives(SourceRange Range, |
| 96 | const SourceManager &SM, |
| 97 | const LangOptions &LangOpts) { |
| 98 | assert(Range.isValid() && "Invalid Range for relexing provided" ); |
| 99 | SourceLocation Loc = Range.getBegin(); |
| 100 | |
| 101 | while (Loc <= Range.getEnd()) { |
| 102 | if (Loc.isMacroID()) |
| 103 | return true; |
| 104 | |
| 105 | std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts); |
| 106 | |
| 107 | if (!Tok) |
| 108 | return true; |
| 109 | |
| 110 | if (Tok->is(K: tok::hash)) |
| 111 | return true; |
| 112 | |
| 113 | Loc = Tok->getLocation(); |
| 114 | } |
| 115 | |
| 116 | return false; |
| 117 | } |
| 118 | |
| 119 | std::optional<Token> getQualifyingToken(tok::TokenKind TK, |
| 120 | CharSourceRange Range, |
| 121 | const ASTContext &Context, |
| 122 | const SourceManager &SM) { |
| 123 | assert((TK == tok::kw_const || TK == tok::kw_volatile || |
| 124 | TK == tok::kw_restrict) && |
| 125 | "TK is not a qualifier keyword" ); |
| 126 | std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: Range.getBegin()); |
| 127 | StringRef File = SM.getBufferData(FID: LocInfo.first); |
| 128 | Lexer RawLexer(SM.getLocForStartOfFile(FID: LocInfo.first), Context.getLangOpts(), |
| 129 | File.begin(), File.data() + LocInfo.second, File.end()); |
| 130 | std::optional<Token> LastMatchBeforeTemplate; |
| 131 | std::optional<Token> LastMatchAfterTemplate; |
| 132 | bool SawTemplate = false; |
| 133 | Token Tok; |
| 134 | while (!RawLexer.LexFromRawLexer(Result&: Tok) && |
| 135 | Range.getEnd() != Tok.getLocation() && |
| 136 | !SM.isBeforeInTranslationUnit(LHS: Range.getEnd(), RHS: Tok.getLocation())) { |
| 137 | if (Tok.is(K: tok::raw_identifier)) { |
| 138 | IdentifierInfo &Info = Context.Idents.get( |
| 139 | Name: StringRef(SM.getCharacterData(SL: Tok.getLocation()), Tok.getLength())); |
| 140 | Tok.setIdentifierInfo(&Info); |
| 141 | Tok.setKind(Info.getTokenID()); |
| 142 | } |
| 143 | if (Tok.is(K: tok::less)) |
| 144 | SawTemplate = true; |
| 145 | else if (Tok.isOneOf(K1: tok::greater, K2: tok::greatergreater)) |
| 146 | LastMatchAfterTemplate = std::nullopt; |
| 147 | else if (Tok.is(K: TK)) { |
| 148 | if (SawTemplate) |
| 149 | LastMatchAfterTemplate = Tok; |
| 150 | else |
| 151 | LastMatchBeforeTemplate = Tok; |
| 152 | } |
| 153 | } |
| 154 | return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate |
| 155 | : LastMatchBeforeTemplate; |
| 156 | } |
| 157 | |
| 158 | static bool breakAndReturnEnd(const Stmt &S) { |
| 159 | return isa<CompoundStmt, DeclStmt, NullStmt>(Val: S); |
| 160 | } |
| 161 | |
| 162 | static bool breakAndReturnEndPlus1Token(const Stmt &S) { |
| 163 | return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, |
| 164 | SEHLeaveStmt>(Val: S); |
| 165 | } |
| 166 | |
| 167 | // Given a Stmt which does not include it's semicolon this method returns the |
| 168 | // SourceLocation of the semicolon. |
| 169 | static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, |
| 170 | const SourceManager &SM, |
| 171 | const LangOptions &LangOpts) { |
| 172 | |
| 173 | if (EndLoc.isMacroID()) { |
| 174 | // Assuming EndLoc points to a function call foo within macro F. |
| 175 | // This method is supposed to return location of the semicolon within |
| 176 | // those macro arguments: |
| 177 | // F ( foo() ; ) |
| 178 | // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc |
| 179 | const SourceLocation SpellingLoc = SM.getSpellingLoc(Loc: EndLoc); |
| 180 | std::optional<Token> NextTok = |
| 181 | findNextTokenSkippingComments(Start: SpellingLoc, SM, LangOpts); |
| 182 | |
| 183 | // Was the next token found successfully? |
| 184 | // All macro issues are simply resolved by ensuring it's a semicolon. |
| 185 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) { |
| 186 | // Ideally this would return `F` with spelling location `;` (NextTok) |
| 187 | // following the example above. For now simply return NextTok location. |
| 188 | return NextTok->getLocation(); |
| 189 | } |
| 190 | |
| 191 | // Fallthrough to 'normal handling'. |
| 192 | // F ( foo() ) ; |
| 193 | // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc |
| 194 | } |
| 195 | |
| 196 | std::optional<Token> NextTok = |
| 197 | findNextTokenSkippingComments(Start: EndLoc, SM, LangOpts); |
| 198 | |
| 199 | // Testing for semicolon again avoids some issues with macros. |
| 200 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) |
| 201 | return NextTok->getLocation(); |
| 202 | |
| 203 | return {}; |
| 204 | } |
| 205 | |
| 206 | SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, |
| 207 | const LangOptions &LangOpts) { |
| 208 | |
| 209 | const Stmt *LastChild = &S; |
| 210 | while (!LastChild->children().empty() && !breakAndReturnEnd(S: *LastChild) && |
| 211 | !breakAndReturnEndPlus1Token(S: *LastChild)) { |
| 212 | for (const Stmt *Child : LastChild->children()) |
| 213 | LastChild = Child; |
| 214 | } |
| 215 | |
| 216 | if (!breakAndReturnEnd(S: *LastChild) && breakAndReturnEndPlus1Token(S: *LastChild)) |
| 217 | return getSemicolonAfterStmtEndLoc(EndLoc: S.getEndLoc(), SM, LangOpts); |
| 218 | |
| 219 | return S.getEndLoc(); |
| 220 | } |
| 221 | |
| 222 | SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, |
| 223 | const SourceManager &SM) { |
| 224 | if (!FuncDecl) |
| 225 | return {}; |
| 226 | |
| 227 | const LangOptions &LangOpts = FuncDecl->getLangOpts(); |
| 228 | |
| 229 | if (FuncDecl->getNumParams() == 0) { |
| 230 | // Start at the beginning of the function declaration, and find the closing |
| 231 | // parenthesis after which we would place the noexcept specifier. |
| 232 | Token CurrentToken; |
| 233 | SourceLocation CurrentLocation = FuncDecl->getBeginLoc(); |
| 234 | while (!Lexer::getRawToken(Loc: CurrentLocation, Result&: CurrentToken, SM, LangOpts, |
| 235 | IgnoreWhiteSpace: true)) { |
| 236 | if (CurrentToken.is(K: tok::r_paren)) |
| 237 | return CurrentLocation.getLocWithOffset(Offset: 1); |
| 238 | |
| 239 | CurrentLocation = CurrentToken.getEndLoc(); |
| 240 | } |
| 241 | |
| 242 | // Failed to find the closing parenthesis, so just return an invalid |
| 243 | // SourceLocation. |
| 244 | return {}; |
| 245 | } |
| 246 | |
| 247 | // FunctionDecl with parameters |
| 248 | const SourceLocation NoexceptLoc = |
| 249 | FuncDecl->getParamDecl(i: FuncDecl->getNumParams() - 1)->getEndLoc(); |
| 250 | if (NoexceptLoc.isValid()) |
| 251 | return Lexer::findLocationAfterToken( |
| 252 | loc: NoexceptLoc, TKind: tok::r_paren, SM, LangOpts, |
| 253 | /*SkipTrailingWhitespaceAndNewLine=*/true); |
| 254 | |
| 255 | return {}; |
| 256 | } |
| 257 | |
| 258 | } // namespace clang::tidy::utils::lexer |
| 259 | |