1 | //===--- LexerUtils.cpp - clang-tidy---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "LexerUtils.h" |
10 | #include "clang/AST/AST.h" |
11 | #include "clang/Basic/SourceManager.h" |
12 | #include <optional> |
13 | #include <utility> |
14 | |
15 | namespace clang::tidy::utils::lexer { |
16 | |
17 | std::pair<Token, SourceLocation> |
18 | getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, |
19 | const LangOptions &LangOpts, bool ) { |
20 | Token Token; |
21 | Token.setKind(tok::unknown); |
22 | |
23 | Location = Location.getLocWithOffset(Offset: -1); |
24 | if (Location.isInvalid()) |
25 | return {Token, Location}; |
26 | |
27 | auto StartOfFile = SM.getLocForStartOfFile(FID: SM.getFileID(SpellingLoc: Location)); |
28 | while (Location != StartOfFile) { |
29 | Location = Lexer::GetBeginningOfToken(Loc: Location, SM, LangOpts); |
30 | if (!Lexer::getRawToken(Loc: Location, Result&: Token, SM, LangOpts) && |
31 | (!SkipComments || !Token.is(K: tok::comment))) { |
32 | break; |
33 | } |
34 | Location = Location.getLocWithOffset(Offset: -1); |
35 | } |
36 | return {Token, Location}; |
37 | } |
38 | |
39 | Token getPreviousToken(SourceLocation Location, const SourceManager &SM, |
40 | const LangOptions &LangOpts, bool ) { |
41 | auto [Token, Start] = |
42 | getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments); |
43 | return Token; |
44 | } |
45 | |
46 | SourceLocation findPreviousTokenStart(SourceLocation Start, |
47 | const SourceManager &SM, |
48 | const LangOptions &LangOpts) { |
49 | if (Start.isInvalid() || Start.isMacroID()) |
50 | return {}; |
51 | |
52 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
53 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
54 | return {}; |
55 | |
56 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
57 | } |
58 | |
59 | SourceLocation findPreviousTokenKind(SourceLocation Start, |
60 | const SourceManager &SM, |
61 | const LangOptions &LangOpts, |
62 | tok::TokenKind TK) { |
63 | if (Start.isInvalid() || Start.isMacroID()) |
64 | return {}; |
65 | |
66 | while (true) { |
67 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
68 | if (L.isInvalid() || L.isMacroID()) |
69 | return {}; |
70 | |
71 | Token T; |
72 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
73 | return {}; |
74 | |
75 | if (T.is(K: TK)) |
76 | return T.getLocation(); |
77 | |
78 | Start = L; |
79 | } |
80 | } |
81 | |
82 | SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, |
83 | const LangOptions &LangOpts) { |
84 | return findNextAnyTokenKind(Start, SM, LangOpts, TK: tok::comma, TKs: tok::semi); |
85 | } |
86 | |
87 | std::optional<Token> |
88 | (SourceLocation Start, const SourceManager &SM, |
89 | const LangOptions &LangOpts) { |
90 | // `Lexer::findNextToken` will ignore comment |
91 | if (Start.isMacroID()) |
92 | return std::nullopt; |
93 | Start = Lexer::getLocForEndOfToken(Loc: Start, Offset: 0, SM, LangOpts); |
94 | // Break down the source location. |
95 | std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: Start); |
96 | bool InvalidTemp = false; |
97 | StringRef File = SM.getBufferData(FID: LocInfo.first, Invalid: &InvalidTemp); |
98 | if (InvalidTemp) |
99 | return std::nullopt; |
100 | // Lex from the start of the given location. |
101 | Lexer L(SM.getLocForStartOfFile(FID: LocInfo.first), LangOpts, File.begin(), |
102 | File.data() + LocInfo.second, File.end()); |
103 | L.SetCommentRetentionState(true); |
104 | // Find the token. |
105 | Token Tok; |
106 | L.LexFromRawLexer(Result&: Tok); |
107 | return Tok; |
108 | } |
109 | |
110 | std::optional<Token> |
111 | (SourceLocation Start, const SourceManager &SM, |
112 | const LangOptions &LangOpts) { |
113 | while (Start.isValid()) { |
114 | std::optional<Token> CurrentToken = |
115 | Lexer::findNextToken(Loc: Start, SM, LangOpts); |
116 | if (!CurrentToken || !CurrentToken->is(K: tok::comment)) |
117 | return CurrentToken; |
118 | |
119 | Start = CurrentToken->getLocation(); |
120 | } |
121 | |
122 | return std::nullopt; |
123 | } |
124 | |
125 | bool rangeContainsExpansionsOrDirectives(SourceRange Range, |
126 | const SourceManager &SM, |
127 | const LangOptions &LangOpts) { |
128 | assert(Range.isValid() && "Invalid Range for relexing provided" ); |
129 | SourceLocation Loc = Range.getBegin(); |
130 | |
131 | while (Loc <= Range.getEnd()) { |
132 | if (Loc.isMacroID()) |
133 | return true; |
134 | |
135 | std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts); |
136 | |
137 | if (!Tok) |
138 | return true; |
139 | |
140 | if (Tok->is(K: tok::hash)) |
141 | return true; |
142 | |
143 | Loc = Tok->getLocation(); |
144 | } |
145 | |
146 | return false; |
147 | } |
148 | |
149 | std::optional<Token> getQualifyingToken(tok::TokenKind TK, |
150 | CharSourceRange Range, |
151 | const ASTContext &Context, |
152 | const SourceManager &SM) { |
153 | assert((TK == tok::kw_const || TK == tok::kw_volatile || |
154 | TK == tok::kw_restrict) && |
155 | "TK is not a qualifier keyword" ); |
156 | std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: Range.getBegin()); |
157 | StringRef File = SM.getBufferData(FID: LocInfo.first); |
158 | Lexer RawLexer(SM.getLocForStartOfFile(FID: LocInfo.first), Context.getLangOpts(), |
159 | File.begin(), File.data() + LocInfo.second, File.end()); |
160 | std::optional<Token> LastMatchBeforeTemplate; |
161 | std::optional<Token> LastMatchAfterTemplate; |
162 | bool SawTemplate = false; |
163 | Token Tok; |
164 | while (!RawLexer.LexFromRawLexer(Result&: Tok) && |
165 | Range.getEnd() != Tok.getLocation() && |
166 | !SM.isBeforeInTranslationUnit(LHS: Range.getEnd(), RHS: Tok.getLocation())) { |
167 | if (Tok.is(K: tok::raw_identifier)) { |
168 | IdentifierInfo &Info = Context.Idents.get( |
169 | Name: StringRef(SM.getCharacterData(SL: Tok.getLocation()), Tok.getLength())); |
170 | Tok.setIdentifierInfo(&Info); |
171 | Tok.setKind(Info.getTokenID()); |
172 | } |
173 | if (Tok.is(K: tok::less)) |
174 | SawTemplate = true; |
175 | else if (Tok.isOneOf(K1: tok::greater, K2: tok::greatergreater)) |
176 | LastMatchAfterTemplate = std::nullopt; |
177 | else if (Tok.is(K: TK)) { |
178 | if (SawTemplate) |
179 | LastMatchAfterTemplate = Tok; |
180 | else |
181 | LastMatchBeforeTemplate = Tok; |
182 | } |
183 | } |
184 | return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate |
185 | : LastMatchBeforeTemplate; |
186 | } |
187 | |
188 | static bool breakAndReturnEnd(const Stmt &S) { |
189 | return isa<CompoundStmt, DeclStmt, NullStmt>(Val: S); |
190 | } |
191 | |
192 | static bool breakAndReturnEndPlus1Token(const Stmt &S) { |
193 | return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, |
194 | SEHLeaveStmt>(Val: S); |
195 | } |
196 | |
197 | // Given a Stmt which does not include it's semicolon this method returns the |
198 | // SourceLocation of the semicolon. |
199 | static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, |
200 | const SourceManager &SM, |
201 | const LangOptions &LangOpts) { |
202 | |
203 | if (EndLoc.isMacroID()) { |
204 | // Assuming EndLoc points to a function call foo within macro F. |
205 | // This method is supposed to return location of the semicolon within |
206 | // those macro arguments: |
207 | // F ( foo() ; ) |
208 | // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc |
209 | const SourceLocation SpellingLoc = SM.getSpellingLoc(Loc: EndLoc); |
210 | std::optional<Token> NextTok = |
211 | findNextTokenSkippingComments(Start: SpellingLoc, SM, LangOpts); |
212 | |
213 | // Was the next token found successfully? |
214 | // All macro issues are simply resolved by ensuring it's a semicolon. |
215 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) { |
216 | // Ideally this would return `F` with spelling location `;` (NextTok) |
217 | // following the example above. For now simply return NextTok location. |
218 | return NextTok->getLocation(); |
219 | } |
220 | |
221 | // Fallthrough to 'normal handling'. |
222 | // F ( foo() ) ; |
223 | // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc |
224 | } |
225 | |
226 | std::optional<Token> NextTok = |
227 | findNextTokenSkippingComments(Start: EndLoc, SM, LangOpts); |
228 | |
229 | // Testing for semicolon again avoids some issues with macros. |
230 | if (NextTok && NextTok->is(K: tok::TokenKind::semi)) |
231 | return NextTok->getLocation(); |
232 | |
233 | return {}; |
234 | } |
235 | |
236 | SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, |
237 | const LangOptions &LangOpts) { |
238 | |
239 | const Stmt *LastChild = &S; |
240 | while (!LastChild->children().empty() && !breakAndReturnEnd(S: *LastChild) && |
241 | !breakAndReturnEndPlus1Token(S: *LastChild)) { |
242 | for (const Stmt *Child : LastChild->children()) |
243 | LastChild = Child; |
244 | } |
245 | |
246 | if (!breakAndReturnEnd(S: *LastChild) && breakAndReturnEndPlus1Token(S: *LastChild)) |
247 | return getSemicolonAfterStmtEndLoc(EndLoc: S.getEndLoc(), SM, LangOpts); |
248 | |
249 | return S.getEndLoc(); |
250 | } |
251 | |
252 | SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, |
253 | const SourceManager &SM) { |
254 | if (!FuncDecl) |
255 | return {}; |
256 | |
257 | const LangOptions &LangOpts = FuncDecl->getLangOpts(); |
258 | |
259 | if (FuncDecl->getNumParams() == 0) { |
260 | // Start at the beginning of the function declaration, and find the closing |
261 | // parenthesis after which we would place the noexcept specifier. |
262 | Token CurrentToken; |
263 | SourceLocation CurrentLocation = FuncDecl->getBeginLoc(); |
264 | while (!Lexer::getRawToken(Loc: CurrentLocation, Result&: CurrentToken, SM, LangOpts, |
265 | IgnoreWhiteSpace: true)) { |
266 | if (CurrentToken.is(K: tok::r_paren)) |
267 | return CurrentLocation.getLocWithOffset(Offset: 1); |
268 | |
269 | CurrentLocation = CurrentToken.getEndLoc(); |
270 | } |
271 | |
272 | // Failed to find the closing parenthesis, so just return an invalid |
273 | // SourceLocation. |
274 | return {}; |
275 | } |
276 | |
277 | // FunctionDecl with parameters |
278 | const SourceLocation NoexceptLoc = |
279 | FuncDecl->getParamDecl(i: FuncDecl->getNumParams() - 1)->getEndLoc(); |
280 | if (NoexceptLoc.isValid()) |
281 | return Lexer::findLocationAfterToken( |
282 | loc: NoexceptLoc, TKind: tok::r_paren, SM, LangOpts, |
283 | /*SkipTrailingWhitespaceAndNewLine=*/true); |
284 | |
285 | return {}; |
286 | } |
287 | |
288 | } // namespace clang::tidy::utils::lexer |
289 | |