1 | //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InefficientVectorOperationCheck.h" |
10 | #include "../utils/DeclRefExprUtils.h" |
11 | #include "../utils/OptionsUtils.h" |
12 | #include "clang/AST/ASTContext.h" |
13 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | |
16 | using namespace clang::ast_matchers; |
17 | |
18 | namespace clang::tidy::performance { |
19 | |
20 | namespace { |
21 | |
22 | // Matcher names. Given the code: |
23 | // |
24 | // \code |
25 | // void f() { |
26 | // vector<T> v; |
27 | // for (int i = 0; i < 10 + 1; ++i) { |
28 | // v.push_back(i); |
29 | // } |
30 | // |
31 | // SomeProto p; |
32 | // for (int i = 0; i < 10 + 1; ++i) { |
33 | // p.add_xxx(i); |
34 | // } |
35 | // } |
36 | // \endcode |
37 | // |
38 | // The matcher names are bound to following parts of the AST: |
39 | // - LoopCounterName: The entire for loop (as ForStmt). |
40 | // - LoopParentName: The body of function f (as CompoundStmt). |
41 | // - VectorVarDeclName: 'v' (as VarDecl). |
42 | // - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as |
43 | // DeclStmt). |
44 | // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr). |
45 | // - LoopInitVarName: 'i' (as VarDecl). |
46 | // - LoopEndExpr: '10+1' (as Expr). |
47 | // If EnableProto, the proto related names are bound to the following parts: |
48 | // - ProtoVarDeclName: 'p' (as VarDecl). |
49 | // - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt). |
50 | // - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr). |
51 | static const char LoopCounterName[] = "for_loop_counter" ; |
52 | static const char LoopParentName[] = "loop_parent" ; |
53 | static const char VectorVarDeclName[] = "vector_var_decl" ; |
54 | static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt" ; |
55 | static const char PushBackOrEmplaceBackCallName[] = "append_call" ; |
56 | static const char ProtoVarDeclName[] = "proto_var_decl" ; |
57 | static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt" ; |
58 | static const char ProtoAddFieldCallName[] = "proto_add_field" ; |
59 | static const char LoopInitVarName[] = "loop_init_var" ; |
60 | static const char LoopEndExprName[] = "loop_end_expr" ; |
61 | static const char RangeLoopName[] = "for_range_loop" ; |
62 | |
63 | ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() { |
64 | return hasType(InnerMatcher: cxxRecordDecl(hasAnyName( |
65 | "::std::vector" , "::std::set" , "::std::unordered_set" , "::std::map" , |
66 | "::std::unordered_map" , "::std::array" , "::std::deque" ))); |
67 | } |
68 | |
69 | AST_MATCHER(Expr, hasSideEffects) { |
70 | return Node.HasSideEffects(Ctx: Finder->getASTContext()); |
71 | } |
72 | |
73 | } // namespace |
74 | |
75 | InefficientVectorOperationCheck::InefficientVectorOperationCheck( |
76 | StringRef Name, ClangTidyContext *Context) |
77 | : ClangTidyCheck(Name, Context), |
78 | VectorLikeClasses(utils::options::parseStringList( |
79 | Option: Options.get(LocalName: "VectorLikeClasses" , Default: "::std::vector" ))), |
80 | EnableProto(Options.getLocalOrGlobal(LocalName: "EnableProto" , Default: false)) {} |
81 | |
82 | void InefficientVectorOperationCheck::storeOptions( |
83 | ClangTidyOptions::OptionMap &Opts) { |
84 | Options.store(Options&: Opts, LocalName: "VectorLikeClasses" , |
85 | Value: utils::options::serializeStringList(Strings: VectorLikeClasses)); |
86 | Options.store(Options&: Opts, LocalName: "EnableProto" , Value: EnableProto); |
87 | } |
88 | |
89 | void InefficientVectorOperationCheck::addMatcher( |
90 | const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, |
91 | StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, |
92 | StringRef AppendCallName, MatchFinder *Finder) { |
93 | const auto DefaultConstructorCall = cxxConstructExpr( |
94 | hasType(InnerMatcher: TargetRecordDecl), |
95 | hasDeclaration(InnerMatcher: cxxConstructorDecl(isDefaultConstructor()))); |
96 | const auto TargetVarDecl = |
97 | varDecl(hasInitializer(InnerMatcher: DefaultConstructorCall)).bind(ID: VarDeclName); |
98 | const auto TargetVarDefStmt = |
99 | declStmt(hasSingleDecl(InnerMatcher: equalsBoundNode(ID: std::string(VarDeclName)))) |
100 | .bind(ID: VarDeclStmtName); |
101 | |
102 | const auto AppendCallExpr = |
103 | cxxMemberCallExpr( |
104 | callee(InnerMatcher: AppendMethodDecl), on(InnerMatcher: hasType(InnerMatcher: TargetRecordDecl)), |
105 | onImplicitObjectArgument(InnerMatcher: declRefExpr(to(InnerMatcher: TargetVarDecl)))) |
106 | .bind(ID: AppendCallName); |
107 | const auto AppendCall = expr(ignoringImplicit(InnerMatcher: AppendCallExpr)); |
108 | const auto LoopVarInit = |
109 | declStmt(hasSingleDecl(InnerMatcher: varDecl(hasInitializer(InnerMatcher: integerLiteral(equals(Value: 0)))) |
110 | .bind(ID: LoopInitVarName))); |
111 | const auto RefersToLoopVar = ignoringParenImpCasts( |
112 | InnerMatcher: declRefExpr(to(InnerMatcher: varDecl(equalsBoundNode(ID: LoopInitVarName))))); |
113 | |
114 | // Matchers for the loop whose body has only 1 push_back/emplace_back calling |
115 | // statement. |
116 | const auto HasInterestingLoopBody = hasBody( |
117 | InnerMatcher: anyOf(compoundStmt(statementCountIs(N: 1), has(AppendCall)), AppendCall)); |
118 | const auto InInterestingCompoundStmt = |
119 | hasParent(compoundStmt(has(TargetVarDefStmt)).bind(ID: LoopParentName)); |
120 | |
121 | // Match counter-based for loops: |
122 | // for (int i = 0; i < n; ++i) { |
123 | // v.push_back(...); |
124 | // // Or: proto.add_xxx(...); |
125 | // } |
126 | // |
127 | // FIXME: Support more types of counter-based loops like decrement loops. |
128 | Finder->addMatcher( |
129 | NodeMatch: forStmt( |
130 | hasLoopInit(InnerMatcher: LoopVarInit), |
131 | hasCondition(InnerMatcher: binaryOperator( |
132 | hasOperatorName(Name: "<" ), hasLHS(InnerMatcher: RefersToLoopVar), |
133 | hasRHS(InnerMatcher: expr(unless(hasDescendant(expr(RefersToLoopVar)))) |
134 | .bind(ID: LoopEndExprName)))), |
135 | hasIncrement(InnerMatcher: unaryOperator(hasOperatorName(Name: "++" ), |
136 | hasUnaryOperand(InnerMatcher: RefersToLoopVar))), |
137 | HasInterestingLoopBody, InInterestingCompoundStmt) |
138 | .bind(ID: LoopCounterName), |
139 | Action: this); |
140 | |
141 | // Match for-range loops: |
142 | // for (const auto& E : data) { |
143 | // v.push_back(...); |
144 | // // Or: proto.add_xxx(...); |
145 | // } |
146 | // |
147 | // FIXME: Support more complex range-expressions. |
148 | Finder->addMatcher( |
149 | NodeMatch: cxxForRangeStmt( |
150 | hasRangeInit( |
151 | InnerMatcher: anyOf(declRefExpr(supportedContainerTypesMatcher()), |
152 | memberExpr(hasObjectExpression(InnerMatcher: unless(hasSideEffects())), |
153 | supportedContainerTypesMatcher()))), |
154 | HasInterestingLoopBody, InInterestingCompoundStmt) |
155 | .bind(ID: RangeLoopName), |
156 | Action: this); |
157 | } |
158 | |
159 | void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { |
160 | const auto VectorDecl = cxxRecordDecl(hasAnyName(VectorLikeClasses)); |
161 | const auto AppendMethodDecl = |
162 | cxxMethodDecl(hasAnyName("push_back" , "emplace_back" )); |
163 | addMatcher(TargetRecordDecl: VectorDecl, VarDeclName: VectorVarDeclName, VarDeclStmtName: VectorVarDeclStmtName, |
164 | AppendMethodDecl, AppendCallName: PushBackOrEmplaceBackCallName, Finder); |
165 | |
166 | if (EnableProto) { |
167 | const auto ProtoDecl = |
168 | cxxRecordDecl(isDerivedFrom(BaseName: "::proto2::MessageLite" )); |
169 | |
170 | // A method's name starts with "add_" might not mean it's an add field |
171 | // call; it could be the getter for a proto field of which the name starts |
172 | // with "add_". So we exclude const methods. |
173 | const auto AddFieldMethodDecl = |
174 | cxxMethodDecl(matchesName(RegExp: "::add_" ), unless(isConst())); |
175 | addMatcher(TargetRecordDecl: ProtoDecl, VarDeclName: ProtoVarDeclName, VarDeclStmtName: ProtoVarDeclStmtName, |
176 | AppendMethodDecl: AddFieldMethodDecl, AppendCallName: ProtoAddFieldCallName, Finder); |
177 | } |
178 | } |
179 | |
180 | void InefficientVectorOperationCheck::check( |
181 | const MatchFinder::MatchResult &Result) { |
182 | auto* Context = Result.Context; |
183 | if (Context->getDiagnostics().hasUncompilableErrorOccurred()) |
184 | return; |
185 | |
186 | const SourceManager &SM = *Result.SourceManager; |
187 | const auto *VectorVarDecl = |
188 | Result.Nodes.getNodeAs<VarDecl>(ID: VectorVarDeclName); |
189 | const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(ID: LoopCounterName); |
190 | const auto *RangeLoop = |
191 | Result.Nodes.getNodeAs<CXXForRangeStmt>(ID: RangeLoopName); |
192 | const auto *VectorAppendCall = |
193 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: PushBackOrEmplaceBackCallName); |
194 | const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ID: ProtoVarDeclName); |
195 | const auto *ProtoAddFieldCall = |
196 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: ProtoAddFieldCallName); |
197 | const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(ID: LoopEndExprName); |
198 | const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(ID: LoopParentName); |
199 | |
200 | const CXXMemberCallExpr *AppendCall = |
201 | VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall; |
202 | assert(AppendCall && "no append call expression" ); |
203 | |
204 | const Stmt *LoopStmt = ForLoop; |
205 | if (!LoopStmt) |
206 | LoopStmt = RangeLoop; |
207 | |
208 | const auto *TargetVarDecl = VectorVarDecl; |
209 | if (!TargetVarDecl) |
210 | TargetVarDecl = ProtoVarDecl; |
211 | |
212 | llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs = |
213 | utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent, |
214 | *Context); |
215 | for (const auto *Ref : AllVarRefs) { |
216 | // Skip cases where there are usages (defined as DeclRefExpr that refers |
217 | // to "v") of vector variable / proto variable `v` before the for loop. We |
218 | // consider these usages are operations causing memory preallocation (e.g. |
219 | // "v.resize(n)", "v.reserve(n)"). |
220 | // |
221 | // FIXME: make it more intelligent to identify the pre-allocating |
222 | // operations before the for loop. |
223 | if (SM.isBeforeInTranslationUnit(Ref->getLocation(), |
224 | LoopStmt->getBeginLoc())) { |
225 | return; |
226 | } |
227 | } |
228 | |
229 | std::string PartialReserveStmt; |
230 | if (VectorAppendCall != nullptr) { |
231 | PartialReserveStmt = ".reserve" ; |
232 | } else { |
233 | llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName(); |
234 | FieldName.consume_front(Prefix: "add_" ); |
235 | std::string MutableFieldName = ("mutable_" + FieldName).str(); |
236 | PartialReserveStmt = "." + MutableFieldName + |
237 | "()->Reserve" ; // e.g., ".mutable_xxx()->Reserve" |
238 | } |
239 | |
240 | llvm::StringRef VarName = Lexer::getSourceText( |
241 | Range: CharSourceRange::getTokenRange( |
242 | AppendCall->getImplicitObjectArgument()->getSourceRange()), |
243 | SM, LangOpts: Context->getLangOpts()); |
244 | |
245 | std::string ReserveSize; |
246 | // Handle for-range loop cases. |
247 | if (RangeLoop) { |
248 | // Get the range-expression in a for-range statement represented as |
249 | // `for (range-declarator: range-expression)`. |
250 | StringRef RangeInitExpName = |
251 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange( |
252 | RangeLoop->getRangeInit()->getSourceRange()), |
253 | SM, LangOpts: Context->getLangOpts()); |
254 | ReserveSize = (RangeInitExpName + ".size()" ).str(); |
255 | } else if (ForLoop) { |
256 | // Handle counter-based loop cases. |
257 | StringRef LoopEndSource = Lexer::getSourceText( |
258 | Range: CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM, |
259 | LangOpts: Context->getLangOpts()); |
260 | ReserveSize = std::string(LoopEndSource); |
261 | } |
262 | |
263 | auto Diag = diag(AppendCall->getBeginLoc(), |
264 | "%0 is called inside a loop; consider pre-allocating the " |
265 | "container capacity before the loop" ) |
266 | << AppendCall->getMethodDecl()->getDeclName(); |
267 | if (!ReserveSize.empty()) { |
268 | std::string ReserveStmt = |
269 | (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n" ).str(); |
270 | Diag << FixItHint::CreateInsertion(InsertionLoc: LoopStmt->getBeginLoc(), Code: ReserveStmt); |
271 | } |
272 | } |
273 | |
274 | } // namespace clang::tidy::performance |
275 | |