1 | //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H |
10 | #define |
11 | |
12 | #include "clang/AST/ASTContext.h" |
13 | #include "clang/AST/RecursiveASTVisitor.h" |
14 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
15 | #include "clang/Basic/SourceLocation.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/SmallSet.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include <algorithm> |
21 | #include <memory> |
22 | #include <string> |
23 | #include <utility> |
24 | |
25 | namespace clang::tidy::modernize { |
26 | |
27 | enum LoopFixerKind { |
28 | LFK_Array, |
29 | LFK_Iterator, |
30 | LFK_ReverseIterator, |
31 | LFK_PseudoArray |
32 | }; |
33 | |
34 | /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. |
35 | using StmtParentMap = llvm::DenseMap<const clang::Stmt *, const clang::Stmt *>; |
36 | |
37 | /// A map used to walk the AST in reverse: |
38 | /// maps VarDecl to the to parent DeclStmt. |
39 | using DeclParentMap = |
40 | llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *>; |
41 | |
42 | /// A map used to track which variables have been removed by a refactoring pass. |
43 | /// It maps the parent ForStmt to the removed index variable's VarDecl. |
44 | using ReplacedVarsMap = |
45 | llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *>; |
46 | |
47 | /// A map used to remember the variable names generated in a Stmt |
48 | using StmtGeneratedVarNameMap = |
49 | llvm::DenseMap<const clang::Stmt *, std::string>; |
50 | |
51 | /// A vector used to store the AST subtrees of an Expr. |
52 | using ComponentVector = llvm::SmallVector<const clang::Expr *, 16>; |
53 | |
54 | /// Class used build the reverse AST properties needed to detect |
55 | /// name conflicts and free variables. |
56 | class StmtAncestorASTVisitor |
57 | : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> { |
58 | public: |
59 | StmtAncestorASTVisitor() { StmtStack.push_back(Elt: nullptr); } |
60 | |
61 | /// Run the analysis on the AST. |
62 | /// |
63 | /// In case we're running this analysis multiple times, don't repeat the work. |
64 | void gatherAncestors(ASTContext &Ctx) { |
65 | if (StmtAncestors.empty()) |
66 | TraverseAST(AST&: Ctx); |
67 | } |
68 | |
69 | /// Accessor for StmtAncestors. |
70 | const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; } |
71 | |
72 | /// Accessor for DeclParents. |
73 | const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; } |
74 | |
75 | friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>; |
76 | |
77 | private: |
78 | StmtParentMap StmtAncestors; |
79 | DeclParentMap DeclParents; |
80 | llvm::SmallVector<const clang::Stmt *, 16> StmtStack; |
81 | |
82 | bool TraverseStmt(clang::Stmt *Statement); |
83 | bool VisitDeclStmt(clang::DeclStmt *Statement); |
84 | }; |
85 | |
86 | /// Class used to find the variables and member expressions on which an |
87 | /// arbitrary expression depends. |
88 | class ComponentFinderASTVisitor |
89 | : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> { |
90 | public: |
91 | ComponentFinderASTVisitor() = default; |
92 | |
93 | /// Find the components of an expression and place them in a ComponentVector. |
94 | void findExprComponents(const clang::Expr *SourceExpr) { |
95 | TraverseStmt(const_cast<clang::Expr *>(SourceExpr)); |
96 | } |
97 | |
98 | /// Accessor for Components. |
99 | const ComponentVector &getComponents() { return Components; } |
100 | |
101 | friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>; |
102 | |
103 | private: |
104 | ComponentVector Components; |
105 | |
106 | bool VisitDeclRefExpr(clang::DeclRefExpr *E); |
107 | bool VisitMemberExpr(clang::MemberExpr *Member); |
108 | }; |
109 | |
110 | /// Class used to determine if an expression is dependent on a variable declared |
111 | /// inside of the loop where it would be used. |
112 | class DependencyFinderASTVisitor |
113 | : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> { |
114 | public: |
115 | DependencyFinderASTVisitor(const StmtParentMap *StmtParents, |
116 | const DeclParentMap *DeclParents, |
117 | const ReplacedVarsMap *ReplacedVars, |
118 | const clang::Stmt *ContainingStmt) |
119 | : StmtParents(StmtParents), DeclParents(DeclParents), |
120 | ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {} |
121 | |
122 | /// Run the analysis on Body, and return true iff the expression |
123 | /// depends on some variable declared within ContainingStmt. |
124 | /// |
125 | /// This is intended to protect against hoisting the container expression |
126 | /// outside of an inner context if part of that expression is declared in that |
127 | /// inner context. |
128 | /// |
129 | /// For example, |
130 | /// \code |
131 | /// const int N = 10, M = 20; |
132 | /// int arr[N][M]; |
133 | /// int getRow(); |
134 | /// |
135 | /// for (int i = 0; i < M; ++i) { |
136 | /// int k = getRow(); |
137 | /// printf("%d:", arr[k][i]); |
138 | /// } |
139 | /// \endcode |
140 | /// At first glance, this loop looks like it could be changed to |
141 | /// \code |
142 | /// for (int elem : arr[k]) { |
143 | /// int k = getIndex(); |
144 | /// printf("%d:", elem); |
145 | /// } |
146 | /// \endcode |
147 | /// But this is malformed, since `k` is used before it is defined! |
148 | /// |
149 | /// In order to avoid this, this class looks at the container expression |
150 | /// `arr[k]` and decides whether or not it contains a sub-expression declared |
151 | /// within the loop body. |
152 | bool dependsOnInsideVariable(const clang::Stmt *Body) { |
153 | DependsOnInsideVariable = false; |
154 | TraverseStmt(S: const_cast<clang::Stmt *>(Body)); |
155 | return DependsOnInsideVariable; |
156 | } |
157 | |
158 | friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>; |
159 | |
160 | private: |
161 | const StmtParentMap *StmtParents; |
162 | const DeclParentMap *DeclParents; |
163 | const clang::Stmt *ContainingStmt; |
164 | const ReplacedVarsMap *ReplacedVars; |
165 | bool DependsOnInsideVariable; |
166 | |
167 | bool VisitVarDecl(clang::VarDecl *V); |
168 | bool VisitDeclRefExpr(clang::DeclRefExpr *D); |
169 | }; |
170 | |
171 | /// Class used to determine if any declarations used in a Stmt would conflict |
172 | /// with a particular identifier. This search includes the names that don't |
173 | /// actually appear in the AST (i.e. created by a refactoring tool) by including |
174 | /// a map from Stmts to generated names associated with those stmts. |
175 | class DeclFinderASTVisitor |
176 | : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> { |
177 | public: |
178 | DeclFinderASTVisitor(const StringRef &Name, |
179 | const StmtGeneratedVarNameMap *GeneratedDecls) |
180 | : Name(Name), GeneratedDecls(GeneratedDecls) {} |
181 | |
182 | /// Attempts to find any usages of variables name Name in Body, returning |
183 | /// true when it is used in Body. This includes the generated loop variables |
184 | /// of ForStmts which have already been transformed. |
185 | bool findUsages(const clang::Stmt *Body) { |
186 | Found = false; |
187 | TraverseStmt(S: const_cast<clang::Stmt *>(Body)); |
188 | return Found; |
189 | } |
190 | |
191 | friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>; |
192 | |
193 | private: |
194 | std::string Name; |
195 | /// GeneratedDecls keeps track of ForStmts which have been transformed, |
196 | /// mapping each modified ForStmt to the variable generated in the loop. |
197 | const StmtGeneratedVarNameMap *GeneratedDecls; |
198 | bool Found = false; |
199 | |
200 | bool VisitForStmt(clang::ForStmt *); |
201 | bool VisitNamedDecl(clang::NamedDecl *); |
202 | bool VisitDeclRefExpr(clang::DeclRefExpr *); |
203 | bool VisitTypeLoc(clang::TypeLoc); |
204 | }; |
205 | |
206 | /// The information needed to describe a valid convertible usage |
207 | /// of an array index or iterator. |
208 | struct Usage { |
209 | enum UsageKind { |
210 | // Regular usages of the loop index (the ones not specified below). Some |
211 | // examples: |
212 | // \code |
213 | // int X = 8 * Arr[i]; |
214 | // ^~~~~~ |
215 | // f(param1, param2, *It); |
216 | // ^~~ |
217 | // if (Vec[i].SomeBool) {} |
218 | // ^~~~~~ |
219 | // \endcode |
220 | UK_Default, |
221 | // Indicates whether this is an access to a member through the arrow |
222 | // operator on pointers or iterators. |
223 | UK_MemberThroughArrow, |
224 | // If the variable is being captured by a lambda, indicates whether the |
225 | // capture was done by value or by reference. |
226 | UK_CaptureByCopy, |
227 | UK_CaptureByRef |
228 | }; |
229 | // The expression that is going to be converted. Null in case of lambda |
230 | // captures. |
231 | const Expr *Expression; |
232 | |
233 | UsageKind Kind; |
234 | |
235 | // Range that covers this usage. |
236 | SourceRange Range; |
237 | |
238 | explicit Usage(const Expr *E) |
239 | : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {} |
240 | Usage(const Expr *E, UsageKind Kind, SourceRange Range) |
241 | : Expression(E), Kind(Kind), Range(std::move(Range)) {} |
242 | }; |
243 | |
244 | /// A class to encapsulate lowering of the tool's confidence level. |
245 | class Confidence { |
246 | public: |
247 | enum Level { |
248 | // Transformations that are likely to change semantics. |
249 | CL_Risky, |
250 | |
251 | // Transformations that might change semantics. |
252 | CL_Reasonable, |
253 | |
254 | // Transformations that will not change semantics. |
255 | CL_Safe |
256 | }; |
257 | /// Initialize confidence level. |
258 | explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {} |
259 | |
260 | /// Lower the internal confidence level to Level, but do not raise it. |
261 | void lowerTo(Confidence::Level Level) { |
262 | CurrentLevel = std::min(a: Level, b: CurrentLevel); |
263 | } |
264 | |
265 | /// Return the internal confidence level. |
266 | Level getLevel() const { return CurrentLevel; } |
267 | |
268 | private: |
269 | Level CurrentLevel; |
270 | }; |
271 | |
272 | // The main computational result of ForLoopIndexVisitor. |
273 | using UsageResult = llvm::SmallVector<Usage, 8>; |
274 | |
275 | // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck. |
276 | const Expr *digThroughConstructorsConversions(const Expr *E); |
277 | bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second); |
278 | const DeclRefExpr *getDeclRef(const Expr *E); |
279 | bool areSameVariable(const ValueDecl *First, const ValueDecl *Second); |
280 | |
281 | /// Discover usages of expressions consisting of index or iterator |
282 | /// access. |
283 | /// |
284 | /// Given an index variable, recursively crawls a for loop to discover if the |
285 | /// index variable is used in a way consistent with range-based for loop access. |
286 | class ForLoopIndexUseVisitor |
287 | : public RecursiveASTVisitor<ForLoopIndexUseVisitor> { |
288 | public: |
289 | ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, |
290 | const VarDecl *EndVar, const Expr *ContainerExpr, |
291 | const Expr *ArrayBoundExpr, |
292 | bool ContainerNeedsDereference); |
293 | |
294 | /// Finds all uses of IndexVar in Body, placing all usages in Usages, |
295 | /// and returns true if IndexVar was only used in a way consistent with a |
296 | /// range-based for loop. |
297 | /// |
298 | /// The general strategy is to reject any DeclRefExprs referencing IndexVar, |
299 | /// with the exception of certain acceptable patterns. |
300 | /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an |
301 | /// ArraySubscriptExpression. Iterator-based loops may dereference |
302 | /// IndexVar or call methods through operator-> (builtin or overloaded). |
303 | /// Array-like containers may use IndexVar as a parameter to the at() member |
304 | /// function and in overloaded operator[]. |
305 | bool findAndVerifyUsages(const Stmt *Body); |
306 | |
307 | /// Add a set of components that we should consider relevant to the |
308 | /// container. |
309 | void addComponents(const ComponentVector &Components); |
310 | |
311 | /// Accessor for Usages. |
312 | const UsageResult &getUsages() const { return Usages; } |
313 | |
314 | /// Adds the Usage if it was not added before. |
315 | void addUsage(const Usage &U); |
316 | |
317 | /// Get the container indexed by IndexVar, if any. |
318 | const Expr *getContainerIndexed() const { return ContainerExpr; } |
319 | |
320 | /// Returns the statement declaring the variable created as an alias |
321 | /// for the loop element, if any. |
322 | const DeclStmt *getAliasDecl() const { return AliasDecl; } |
323 | |
324 | /// Accessor for ConfidenceLevel. |
325 | Confidence::Level getConfidenceLevel() const { |
326 | return ConfidenceLevel.getLevel(); |
327 | } |
328 | |
329 | /// Indicates if the alias declaration was in a place where it cannot |
330 | /// simply be removed but rather replaced with a use of the alias variable. |
331 | /// For example, variables declared in the condition of an if, switch, or for |
332 | /// stmt. |
333 | bool aliasUseRequired() const { return ReplaceWithAliasUse; } |
334 | |
335 | /// Indicates if the alias declaration came from the init clause of a |
336 | /// nested for loop. SourceRanges provided by Clang for DeclStmts in this |
337 | /// case need to be adjusted. |
338 | bool aliasFromForInit() const { return AliasFromForInit; } |
339 | |
340 | private: |
341 | /// Typedef used in CRTP functions. |
342 | using VisitorBase = RecursiveASTVisitor<ForLoopIndexUseVisitor>; |
343 | friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>; |
344 | |
345 | /// Overriden methods for RecursiveASTVisitor's traversal. |
346 | bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E); |
347 | bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); |
348 | bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); |
349 | bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C, |
350 | Expr *Init); |
351 | bool TraverseMemberExpr(MemberExpr *Member); |
352 | bool TraverseUnaryOperator(UnaryOperator *Uop); |
353 | bool VisitDeclRefExpr(DeclRefExpr *E); |
354 | bool VisitDeclStmt(DeclStmt *S); |
355 | bool TraverseStmt(Stmt *S); |
356 | |
357 | /// Add an expression to the list of expressions on which the container |
358 | /// expression depends. |
359 | void addComponent(const Expr *E); |
360 | |
361 | // Input member variables: |
362 | ASTContext *Context; |
363 | /// The index variable's VarDecl. |
364 | const VarDecl *IndexVar; |
365 | /// The loop's 'end' variable, which cannot be mentioned at all. |
366 | const VarDecl *EndVar; |
367 | /// The Expr which refers to the container. |
368 | const Expr *ContainerExpr; |
369 | /// The Expr which refers to the terminating condition for array-based loops. |
370 | const Expr *ArrayBoundExpr; |
371 | bool ContainerNeedsDereference; |
372 | |
373 | // Output member variables: |
374 | /// A container which holds all usages of IndexVar as the index of |
375 | /// ArraySubscriptExpressions. |
376 | UsageResult Usages; |
377 | llvm::SmallSet<SourceLocation, 8> UsageLocations; |
378 | bool OnlyUsedAsIndex = true; |
379 | /// The DeclStmt for an alias to the container element. |
380 | const DeclStmt *AliasDecl = nullptr; |
381 | Confidence ConfidenceLevel; |
382 | /// A list of expressions on which ContainerExpr depends. |
383 | /// |
384 | /// If any of these expressions are encountered outside of an acceptable usage |
385 | /// of the loop element, lower our confidence level. |
386 | llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16> |
387 | DependentExprs; |
388 | |
389 | /// The parent-in-waiting. Will become the real parent once we traverse down |
390 | /// one level in the AST. |
391 | const Stmt *NextStmtParent = nullptr; |
392 | /// The actual parent of a node when Visit*() calls are made. Only the |
393 | /// parentage of DeclStmt's to possible iteration/selection statements is of |
394 | /// importance. |
395 | const Stmt *CurrStmtParent = nullptr; |
396 | |
397 | /// \see aliasUseRequired(). |
398 | bool ReplaceWithAliasUse = false; |
399 | /// \see aliasFromForInit(). |
400 | bool AliasFromForInit = false; |
401 | }; |
402 | |
403 | struct TUTrackingInfo { |
404 | /// Reset and initialize per-TU tracking information. |
405 | /// |
406 | /// Must be called before using container accessors. |
407 | TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {} |
408 | |
409 | StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; } |
410 | StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; } |
411 | ReplacedVarsMap &getReplacedVars() { return ReplacedVars; } |
412 | |
413 | private: |
414 | std::unique_ptr<StmtAncestorASTVisitor> ParentFinder; |
415 | StmtGeneratedVarNameMap GeneratedDecls; |
416 | ReplacedVarsMap ReplacedVars; |
417 | }; |
418 | |
419 | /// Create names for generated variables within a particular statement. |
420 | /// |
421 | /// VariableNamer uses a DeclContext as a reference point, checking for any |
422 | /// conflicting declarations higher up in the context or within SourceStmt. |
423 | /// It creates a variable name using hints from a source container and the old |
424 | /// index, if they exist. |
425 | class VariableNamer { |
426 | public: |
427 | // Supported naming styles. |
428 | enum NamingStyle { |
429 | NS_CamelBack, |
430 | NS_CamelCase, |
431 | NS_LowerCase, |
432 | NS_UpperCase, |
433 | }; |
434 | |
435 | VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls, |
436 | const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt, |
437 | const clang::VarDecl *OldIndex, |
438 | const clang::ValueDecl *TheContainer, |
439 | const clang::ASTContext *Context, NamingStyle Style) |
440 | : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), |
441 | SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer), |
442 | Context(Context), Style(Style) {} |
443 | |
444 | /// Generate a new index name. |
445 | /// |
446 | /// Generates the name to be used for an inserted iterator. It relies on |
447 | /// declarationExists() to determine that there are no naming conflicts, and |
448 | /// tries to use some hints from the container name and the old index name. |
449 | std::string createIndexName(); |
450 | |
451 | private: |
452 | StmtGeneratedVarNameMap *GeneratedDecls; |
453 | const StmtParentMap *ReverseAST; |
454 | const clang::Stmt *SourceStmt; |
455 | const clang::VarDecl *OldIndex; |
456 | const clang::ValueDecl *TheContainer; |
457 | const clang::ASTContext *Context; |
458 | const NamingStyle Style; |
459 | |
460 | // Determine whether or not a declaration that would conflict with Symbol |
461 | // exists in an outer context or in any statement contained in SourceStmt. |
462 | bool declarationExists(llvm::StringRef Symbol); |
463 | }; |
464 | |
465 | } // namespace clang::tidy::modernize |
466 | |
467 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H |
468 | |