1 | //===--- LexerUtils.cpp - clang-tidy---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "LexerUtils.h" |
10 | #include "clang/AST/AST.h" |
11 | #include "clang/Basic/SourceManager.h" |
12 | #include <optional> |
13 | #include <utility> |
14 | |
15 | namespace clang::tidy::utils::lexer { |
16 | |
17 | std::pair<Token, SourceLocation> |
18 | getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, |
19 | const LangOptions &LangOpts, bool ) { |
20 | const std::optional<Token> Tok = |
21 | Lexer::findPreviousToken(Loc: Location, SM, LangOpts, IncludeComments: !SkipComments); |
22 | |
23 | if (Tok.has_value()) { |
24 | return {*Tok, Lexer::GetBeginningOfToken(Loc: Tok->getLocation(), SM, LangOpts)}; |
25 | } |
26 | |
27 | Token Token; |
28 | Token.setKind(tok::unknown); |
29 | return {Token, SourceLocation()}; |
30 | } |
31 | |
32 | Token getPreviousToken(SourceLocation Location, const SourceManager &SM, |
33 | const LangOptions &LangOpts, bool ) { |
34 | auto [Token, Start] = |
35 | getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments); |
36 | return Token; |
37 | } |
38 | |
39 | SourceLocation findPreviousTokenStart(SourceLocation Start, |
40 | const SourceManager &SM, |
41 | const LangOptions &LangOpts) { |
42 | if (Start.isInvalid() || Start.isMacroID()) |
43 | return {}; |
44 | |
45 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
46 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
47 | return {}; |
48 | |
49 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
50 | } |
51 | |
52 | SourceLocation findPreviousTokenKind(SourceLocation Start, |
53 | const SourceManager &SM, |
54 | const LangOptions &LangOpts, |
55 | tok::TokenKind TK) { |
56 | if (Start.isInvalid() || Start.isMacroID()) |
57 | return {}; |
58 | |
59 | while (true) { |
60 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
61 | if (L.isInvalid() || L.isMacroID()) |
62 | return {}; |
63 | |
64 | Token T; |
65 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
66 | return {}; |
67 | |
68 | if (T.is(K: TK)) |
69 | return T.getLocation(); |
70 | |
71 | Start = L; |
72 | } |
73 | } |
74 | |
75 | SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, |
76 | const LangOptions &LangOpts) { |
77 | return findNextAnyTokenKind(Start, SM, LangOpts, TK: tok::comma, TKs: tok::semi); |
78 | } |
79 | |
80 | std::optional<Token> |
81 | (SourceLocation Start, const SourceManager &SM, |
82 | const LangOptions &LangOpts) { |
83 | while (Start.isValid()) { |
84 | std::optional<Token> CurrentToken = |
85 | Lexer::findNextToken(Loc: Start, SM, LangOpts); |
86 | if (!CurrentToken || !CurrentToken->is(K: tok::comment)) |
87 | return CurrentToken; |
88 | |
89 | Start = CurrentToken->getLocation(); |
90 | } |
91 | |
92 | return std::nullopt; |
93 | } |
94 | |
95 | bool rangeContainsExpansionsOrDirectives(SourceRange Range, |
96 | const SourceManager &SM, |
97 | const LangOptions &LangOpts) { |
98 | assert(Range.isValid() && "Invalid Range for relexing provided" ); |
99 | SourceLocation Loc = Range.getBegin(); |
100 | |
101 | while (Loc <= Range.getEnd()) { |
102 | if (Loc.isMacroID()) |
103 | return true; |
104 | |
105 | std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts); |
106 | |
107 | if (!Tok) |
108 | return true; |
109 | |
110 | if (Tok->is(K: tok::hash)) |
111 | return true; |
112 | |
113 | Loc = Tok->getLocation(); |
114 | } |
115 | |
116 | return false; |
117 | } |
118 | |
119 | std::optional<Token> getQualifyingToken(tok::TokenKind TK, |
120 | CharSourceRange Range, |
121 | const ASTContext &Context, |
122 | const SourceManager &SM) { |
123 | assert((TK == tok::kw_const || TK == tok::kw_volatile || |
124 | TK == tok::kw_restrict) && |
125 | "TK is not a qualifier keyword" ); |
126 | std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: Range.getBegin()); |
127 | StringRef File = SM.getBufferData(FID: LocInfo.first); |
128 | Lexer RawLexer(SM.getLocForStartOfFile(FID: LocInfo.first), Context.getLangOpts(), |
129 | File.begin(), File.data() + LocInfo.second, File.end()); |
130 | std::optional<Token> LastMatchBeforeTemplate; |
131 | std::optional<Token> LastMatchAfterTemplate; |
132 | bool SawTemplate = false; |
133 | Token Tok; |
134 | while (!RawLexer.LexFromRawLexer(Result&: Tok) && |
135 | Range.getEnd() != Tok.getLocation() && |
136 | !SM.isBeforeInTranslationUnit(LHS: Range.getEnd(), RHS: Tok.getLocation())) { |
137 | if (Tok.is(K: tok::raw_identifier)) { |
138 | IdentifierInfo &Info = Context.Idents.get( |
139 | Name: StringRef(SM.getCharacterData(SL: Tok.getLocation()), Tok.getLength())); |
140 | Tok.setIdentifierInfo(&Info); |
141 | Tok.setKind(Info.getTokenID()); |
142 | } |
143 | if (Tok.is(K: tok::less)) |
144 | SawTemplate = true; |
145 | else if (Tok.isOneOf(K1: tok::greater, K2: tok::greatergreater)) |
146 | LastMatchAfterTemplate = std::nullopt; |
147 | else if (Tok.is(K: TK)) { |
148 | if (SawTemplate) |
149 | LastMatchAfterTemplate = Tok; |
150 | else |
151 | LastMatchBeforeTemplate = Tok; |
152 | } |
153 | } |
154 | return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate |
155 | : LastMatchBeforeTemplate; |
156 | } |
157 | |
158 | static bool breakAndReturnEnd(const Stmt &S) { |
159 | return isa<CompoundStmt, DeclStmt, NullStmt>(Val: S); |
160 | } |
161 | |
162 | static bool breakAndReturnEndPlus1Token(const Stmt &S) { |
163 | return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, |
164 | SEHLeaveStmt>(Val: S); |
165 | } |
166 | |
167 | // Given a Stmt which does not include it's semicolon this method returns the |
168 | // SourceLocation of the semicolon. |
169 | static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, |
170 | const SourceManager &SM, |
171 | const LangOptions &LangOpts) { |
172 | |
173 | if (EndLoc.isMacroID()) { |
174 | // Assuming EndLoc points to a function call foo within macro F. |
175 | // This method is supposed to return location of the semicolon within |
176 | // those macro arguments: |
177 | // F ( foo() ; ) |
178 | // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc |
179 | const SourceLocation SpellingLoc = SM.getSpellingLoc(Loc: EndLoc); |
180 | std::optional<Token> NextTok = |
181 | findNextTokenSkippingComments(Start: SpellingLoc, SM, LangOpts); |
182 | |
183 | // Was the next token found successfully? |
184 | // All macro issues are simply resolved by ensuring it's a semicolon. |
185 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) { |
186 | // Ideally this would return `F` with spelling location `;` (NextTok) |
187 | // following the example above. For now simply return NextTok location. |
188 | return NextTok->getLocation(); |
189 | } |
190 | |
191 | // Fallthrough to 'normal handling'. |
192 | // F ( foo() ) ; |
193 | // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc |
194 | } |
195 | |
196 | std::optional<Token> NextTok = |
197 | findNextTokenSkippingComments(Start: EndLoc, SM, LangOpts); |
198 | |
199 | // Testing for semicolon again avoids some issues with macros. |
200 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) |
201 | return NextTok->getLocation(); |
202 | |
203 | return {}; |
204 | } |
205 | |
206 | SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, |
207 | const LangOptions &LangOpts) { |
208 | |
209 | const Stmt *LastChild = &S; |
210 | while (!LastChild->children().empty() && !breakAndReturnEnd(S: *LastChild) && |
211 | !breakAndReturnEndPlus1Token(S: *LastChild)) { |
212 | for (const Stmt *Child : LastChild->children()) |
213 | LastChild = Child; |
214 | } |
215 | |
216 | if (!breakAndReturnEnd(S: *LastChild) && breakAndReturnEndPlus1Token(S: *LastChild)) |
217 | return getSemicolonAfterStmtEndLoc(EndLoc: S.getEndLoc(), SM, LangOpts); |
218 | |
219 | return S.getEndLoc(); |
220 | } |
221 | |
222 | SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, |
223 | const SourceManager &SM) { |
224 | if (!FuncDecl) |
225 | return {}; |
226 | |
227 | const LangOptions &LangOpts = FuncDecl->getLangOpts(); |
228 | |
229 | if (FuncDecl->getNumParams() == 0) { |
230 | // Start at the beginning of the function declaration, and find the closing |
231 | // parenthesis after which we would place the noexcept specifier. |
232 | Token CurrentToken; |
233 | SourceLocation CurrentLocation = FuncDecl->getBeginLoc(); |
234 | while (!Lexer::getRawToken(Loc: CurrentLocation, Result&: CurrentToken, SM, LangOpts, |
235 | IgnoreWhiteSpace: true)) { |
236 | if (CurrentToken.is(K: tok::r_paren)) |
237 | return CurrentLocation.getLocWithOffset(Offset: 1); |
238 | |
239 | CurrentLocation = CurrentToken.getEndLoc(); |
240 | } |
241 | |
242 | // Failed to find the closing parenthesis, so just return an invalid |
243 | // SourceLocation. |
244 | return {}; |
245 | } |
246 | |
247 | // FunctionDecl with parameters |
248 | const SourceLocation NoexceptLoc = |
249 | FuncDecl->getParamDecl(i: FuncDecl->getNumParams() - 1)->getEndLoc(); |
250 | if (NoexceptLoc.isValid()) |
251 | return Lexer::findLocationAfterToken( |
252 | loc: NoexceptLoc, TKind: tok::r_paren, SM, LangOpts, |
253 | /*SkipTrailingWhitespaceAndNewLine=*/true); |
254 | |
255 | return {}; |
256 | } |
257 | |
258 | } // namespace clang::tidy::utils::lexer |
259 | |