1 | //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InefficientVectorOperationCheck.h" |
10 | #include "../utils/DeclRefExprUtils.h" |
11 | #include "../utils/OptionsUtils.h" |
12 | #include "clang/AST/ASTContext.h" |
13 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | |
16 | using namespace clang::ast_matchers; |
17 | |
18 | namespace clang::tidy::performance { |
19 | |
20 | namespace { |
21 | |
22 | // Matcher names. Given the code: |
23 | // |
24 | // \code |
25 | // void f() { |
26 | // vector<T> v; |
27 | // for (int i = 0; i < 10 + 1; ++i) { |
28 | // v.push_back(i); |
29 | // } |
30 | // |
31 | // SomeProto p; |
32 | // for (int i = 0; i < 10 + 1; ++i) { |
33 | // p.add_xxx(i); |
34 | // } |
35 | // } |
36 | // \endcode |
37 | // |
38 | // The matcher names are bound to following parts of the AST: |
39 | // - LoopCounterName: The entire for loop (as ForStmt). |
40 | // - LoopParentName: The body of function f (as CompoundStmt). |
41 | // - VectorVarDeclName: 'v' (as VarDecl). |
42 | // - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as |
43 | // DeclStmt). |
44 | // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr). |
45 | // - LoopInitVarName: 'i' (as VarDecl). |
46 | // - LoopEndExpr: '10+1' (as Expr). |
47 | // If EnableProto, the proto related names are bound to the following parts: |
48 | // - ProtoVarDeclName: 'p' (as VarDecl). |
49 | // - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt). |
50 | // - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr). |
51 | static const char LoopCounterName[] = "for_loop_counter"; |
52 | static const char LoopParentName[] = "loop_parent"; |
53 | static const char VectorVarDeclName[] = "vector_var_decl"; |
54 | static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt"; |
55 | static const char PushBackOrEmplaceBackCallName[] = "append_call"; |
56 | static const char ProtoVarDeclName[] = "proto_var_decl"; |
57 | static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt"; |
58 | static const char ProtoAddFieldCallName[] = "proto_add_field"; |
59 | static const char LoopInitVarName[] = "loop_init_var"; |
60 | static const char LoopEndExprName[] = "loop_end_expr"; |
61 | static const char RangeLoopName[] = "for_range_loop"; |
62 | |
63 | ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() { |
64 | return hasType(InnerMatcher: cxxRecordDecl(hasAnyName( |
65 | "::std::vector", "::std::set", "::std::unordered_set", "::std::map", |
66 | "::std::unordered_map", "::std::array", "::std::deque"))); |
67 | } |
68 | |
69 | AST_MATCHER(Expr, hasSideEffects) { |
70 | return Node.HasSideEffects(Ctx: Finder->getASTContext()); |
71 | } |
72 | |
73 | } // namespace |
74 | |
75 | InefficientVectorOperationCheck::InefficientVectorOperationCheck( |
76 | StringRef Name, ClangTidyContext *Context) |
77 | : ClangTidyCheck(Name, Context), |
78 | VectorLikeClasses(utils::options::parseStringList( |
79 | Option: Options.get(LocalName: "VectorLikeClasses", Default: "::std::vector"))), |
80 | EnableProto(Options.get(LocalName: "EnableProto", Default: false)) {} |
81 | |
82 | void InefficientVectorOperationCheck::storeOptions( |
83 | ClangTidyOptions::OptionMap &Opts) { |
84 | Options.store(Options&: Opts, LocalName: "VectorLikeClasses", |
85 | Value: utils::options::serializeStringList(Strings: VectorLikeClasses)); |
86 | Options.store(Options&: Opts, LocalName: "EnableProto", Value: EnableProto); |
87 | } |
88 | |
89 | void InefficientVectorOperationCheck::addMatcher( |
90 | const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, |
91 | StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, |
92 | StringRef AppendCallName, MatchFinder *Finder) { |
93 | const auto DefaultConstructorCall = cxxConstructExpr( |
94 | hasType(InnerMatcher: TargetRecordDecl), |
95 | hasDeclaration(InnerMatcher: cxxConstructorDecl(isDefaultConstructor()))); |
96 | const auto TargetVarDecl = |
97 | varDecl(hasInitializer(InnerMatcher: DefaultConstructorCall)).bind(ID: VarDeclName); |
98 | const auto TargetVarDefStmt = |
99 | declStmt(hasSingleDecl(InnerMatcher: equalsBoundNode(ID: std::string(VarDeclName)))) |
100 | .bind(ID: VarDeclStmtName); |
101 | |
102 | const auto AppendCallExpr = |
103 | cxxMemberCallExpr( |
104 | callee(InnerMatcher: AppendMethodDecl), on(InnerMatcher: hasType(InnerMatcher: TargetRecordDecl)), |
105 | onImplicitObjectArgument(InnerMatcher: declRefExpr(to(InnerMatcher: TargetVarDecl)))) |
106 | .bind(ID: AppendCallName); |
107 | const auto AppendCall = expr(ignoringImplicit(InnerMatcher: AppendCallExpr)); |
108 | const auto LoopVarInit = declStmt(hasSingleDecl( |
109 | InnerMatcher: varDecl(hasInitializer(InnerMatcher: ignoringParenImpCasts(InnerMatcher: integerLiteral(equals(Value: 0))))) |
110 | .bind(ID: LoopInitVarName))); |
111 | const auto RefersToLoopVar = ignoringParenImpCasts( |
112 | InnerMatcher: declRefExpr(to(InnerMatcher: varDecl(equalsBoundNode(ID: LoopInitVarName))))); |
113 | |
114 | // Matchers for the loop whose body has only 1 push_back/emplace_back calling |
115 | // statement. |
116 | const auto HasInterestingLoopBody = hasBody( |
117 | InnerMatcher: anyOf(compoundStmt(statementCountIs(N: 1), has(AppendCall)), AppendCall)); |
118 | const auto InInterestingCompoundStmt = |
119 | hasParent(compoundStmt(has(TargetVarDefStmt)).bind(ID: LoopParentName)); |
120 | |
121 | // Match counter-based for loops: |
122 | // for (int i = 0; i < n; ++i) { |
123 | // v.push_back(...); |
124 | // // Or: proto.add_xxx(...); |
125 | // } |
126 | // |
127 | // FIXME: Support more types of counter-based loops like decrement loops. |
128 | Finder->addMatcher( |
129 | NodeMatch: forStmt(hasLoopInit(InnerMatcher: LoopVarInit), |
130 | hasCondition(InnerMatcher: binaryOperator( |
131 | hasOperatorName(Name: "<"), hasLHS(InnerMatcher: RefersToLoopVar), |
132 | hasRHS(InnerMatcher: expr(unless(hasDescendant(expr(RefersToLoopVar)))) |
133 | .bind(ID: LoopEndExprName)))), |
134 | hasIncrement(InnerMatcher: unaryOperator(hasOperatorName(Name: "++"), |
135 | hasUnaryOperand(InnerMatcher: RefersToLoopVar))), |
136 | HasInterestingLoopBody, InInterestingCompoundStmt) |
137 | .bind(ID: LoopCounterName), |
138 | Action: this); |
139 | |
140 | // Match for-range loops: |
141 | // for (const auto& E : data) { |
142 | // v.push_back(...); |
143 | // // Or: proto.add_xxx(...); |
144 | // } |
145 | // |
146 | // FIXME: Support more complex range-expressions. |
147 | Finder->addMatcher( |
148 | NodeMatch: cxxForRangeStmt( |
149 | hasRangeInit( |
150 | InnerMatcher: anyOf(declRefExpr(supportedContainerTypesMatcher()), |
151 | memberExpr(hasObjectExpression(InnerMatcher: unless(hasSideEffects())), |
152 | supportedContainerTypesMatcher()))), |
153 | HasInterestingLoopBody, InInterestingCompoundStmt) |
154 | .bind(ID: RangeLoopName), |
155 | Action: this); |
156 | } |
157 | |
158 | void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { |
159 | const auto VectorDecl = cxxRecordDecl(hasAnyName(VectorLikeClasses)); |
160 | const auto AppendMethodDecl = |
161 | cxxMethodDecl(hasAnyName("push_back", "emplace_back")); |
162 | addMatcher(TargetRecordDecl: VectorDecl, VarDeclName: VectorVarDeclName, VarDeclStmtName: VectorVarDeclStmtName, |
163 | AppendMethodDecl, AppendCallName: PushBackOrEmplaceBackCallName, Finder); |
164 | |
165 | if (EnableProto) { |
166 | const auto ProtoDecl = |
167 | cxxRecordDecl(isDerivedFrom(BaseName: "::proto2::MessageLite")); |
168 | |
169 | // A method's name starts with "add_" might not mean it's an add field |
170 | // call; it could be the getter for a proto field of which the name starts |
171 | // with "add_". So we exclude const methods. |
172 | const auto AddFieldMethodDecl = |
173 | cxxMethodDecl(matchesName(RegExp: "::add_"), unless(isConst())); |
174 | addMatcher(TargetRecordDecl: ProtoDecl, VarDeclName: ProtoVarDeclName, VarDeclStmtName: ProtoVarDeclStmtName, |
175 | AppendMethodDecl: AddFieldMethodDecl, AppendCallName: ProtoAddFieldCallName, Finder); |
176 | } |
177 | } |
178 | |
179 | void InefficientVectorOperationCheck::check( |
180 | const MatchFinder::MatchResult &Result) { |
181 | auto *Context = Result.Context; |
182 | if (Context->getDiagnostics().hasUncompilableErrorOccurred()) |
183 | return; |
184 | |
185 | const SourceManager &SM = *Result.SourceManager; |
186 | const auto *VectorVarDecl = |
187 | Result.Nodes.getNodeAs<VarDecl>(ID: VectorVarDeclName); |
188 | const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(ID: LoopCounterName); |
189 | const auto *RangeLoop = |
190 | Result.Nodes.getNodeAs<CXXForRangeStmt>(ID: RangeLoopName); |
191 | const auto *VectorAppendCall = |
192 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: PushBackOrEmplaceBackCallName); |
193 | const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ID: ProtoVarDeclName); |
194 | const auto *ProtoAddFieldCall = |
195 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: ProtoAddFieldCallName); |
196 | const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(ID: LoopEndExprName); |
197 | const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(ID: LoopParentName); |
198 | |
199 | const CXXMemberCallExpr *AppendCall = |
200 | VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall; |
201 | assert(AppendCall && "no append call expression"); |
202 | |
203 | const Stmt *LoopStmt = ForLoop; |
204 | if (!LoopStmt) |
205 | LoopStmt = RangeLoop; |
206 | |
207 | const auto *TargetVarDecl = VectorVarDecl; |
208 | if (!TargetVarDecl) |
209 | TargetVarDecl = ProtoVarDecl; |
210 | |
211 | llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs = |
212 | utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent, |
213 | *Context); |
214 | for (const auto *Ref : AllVarRefs) { |
215 | // Skip cases where there are usages (defined as DeclRefExpr that refers |
216 | // to "v") of vector variable / proto variable `v` before the for loop. We |
217 | // consider these usages are operations causing memory preallocation (e.g. |
218 | // "v.resize(n)", "v.reserve(n)"). |
219 | // |
220 | // FIXME: make it more intelligent to identify the pre-allocating |
221 | // operations before the for loop. |
222 | if (SM.isBeforeInTranslationUnit(Ref->getLocation(), |
223 | LoopStmt->getBeginLoc())) { |
224 | return; |
225 | } |
226 | } |
227 | |
228 | std::string PartialReserveStmt; |
229 | if (VectorAppendCall != nullptr) { |
230 | PartialReserveStmt = ".reserve"; |
231 | } else { |
232 | llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName(); |
233 | FieldName.consume_front(Prefix: "add_"); |
234 | std::string MutableFieldName = ("mutable_"+ FieldName).str(); |
235 | PartialReserveStmt = "."+ MutableFieldName + |
236 | "()->Reserve"; // e.g., ".mutable_xxx()->Reserve" |
237 | } |
238 | |
239 | llvm::StringRef VarName = Lexer::getSourceText( |
240 | Range: CharSourceRange::getTokenRange( |
241 | AppendCall->getImplicitObjectArgument()->getSourceRange()), |
242 | SM, LangOpts: Context->getLangOpts()); |
243 | |
244 | std::string ReserveSize; |
245 | // Handle for-range loop cases. |
246 | if (RangeLoop) { |
247 | // Get the range-expression in a for-range statement represented as |
248 | // `for (range-declarator: range-expression)`. |
249 | StringRef RangeInitExpName = |
250 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange( |
251 | RangeLoop->getRangeInit()->getSourceRange()), |
252 | SM, LangOpts: Context->getLangOpts()); |
253 | ReserveSize = (RangeInitExpName + ".size()").str(); |
254 | } else if (ForLoop) { |
255 | // Handle counter-based loop cases. |
256 | StringRef LoopEndSource = Lexer::getSourceText( |
257 | Range: CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM, |
258 | LangOpts: Context->getLangOpts()); |
259 | ReserveSize = std::string(LoopEndSource); |
260 | } |
261 | |
262 | auto Diag = diag(AppendCall->getBeginLoc(), |
263 | "%0 is called inside a loop; consider pre-allocating the " |
264 | "container capacity before the loop") |
265 | << AppendCall->getMethodDecl()->getDeclName(); |
266 | if (!ReserveSize.empty()) { |
267 | std::string ReserveStmt = |
268 | (VarName + PartialReserveStmt + "("+ ReserveSize + ");\n").str(); |
269 | Diag << FixItHint::CreateInsertion(InsertionLoc: LoopStmt->getBeginLoc(), Code: ReserveStmt); |
270 | } |
271 | } |
272 | |
273 | } // namespace clang::tidy::performance |
274 |
Definitions
- LoopCounterName
- LoopParentName
- VectorVarDeclName
- VectorVarDeclStmtName
- PushBackOrEmplaceBackCallName
- ProtoVarDeclName
- ProtoVarDeclStmtName
- ProtoAddFieldCallName
- LoopInitVarName
- LoopEndExprName
- RangeLoopName
- supportedContainerTypesMatcher
- hasSideEffects
- InefficientVectorOperationCheck
- storeOptions
- addMatcher
- registerMatchers
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more