| 1 | //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "InefficientVectorOperationCheck.h" |
| 10 | #include "../utils/DeclRefExprUtils.h" |
| 11 | #include "../utils/OptionsUtils.h" |
| 12 | #include "clang/AST/ASTContext.h" |
| 13 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 14 | #include "clang/Lex/Lexer.h" |
| 15 | |
| 16 | using namespace clang::ast_matchers; |
| 17 | |
| 18 | namespace clang::tidy::performance { |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | // Matcher names. Given the code: |
| 23 | // |
| 24 | // \code |
| 25 | // void f() { |
| 26 | // vector<T> v; |
| 27 | // for (int i = 0; i < 10 + 1; ++i) { |
| 28 | // v.push_back(i); |
| 29 | // } |
| 30 | // |
| 31 | // SomeProto p; |
| 32 | // for (int i = 0; i < 10 + 1; ++i) { |
| 33 | // p.add_xxx(i); |
| 34 | // } |
| 35 | // } |
| 36 | // \endcode |
| 37 | // |
| 38 | // The matcher names are bound to following parts of the AST: |
| 39 | // - LoopCounterName: The entire for loop (as ForStmt). |
| 40 | // - LoopParentName: The body of function f (as CompoundStmt). |
| 41 | // - VectorVarDeclName: 'v' (as VarDecl). |
| 42 | // - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as |
| 43 | // DeclStmt). |
| 44 | // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr). |
| 45 | // - LoopInitVarName: 'i' (as VarDecl). |
| 46 | // - LoopEndExpr: '10+1' (as Expr). |
| 47 | // If EnableProto, the proto related names are bound to the following parts: |
| 48 | // - ProtoVarDeclName: 'p' (as VarDecl). |
| 49 | // - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt). |
| 50 | // - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr). |
| 51 | static const char LoopCounterName[] = "for_loop_counter" ; |
| 52 | static const char LoopParentName[] = "loop_parent" ; |
| 53 | static const char VectorVarDeclName[] = "vector_var_decl" ; |
| 54 | static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt" ; |
| 55 | static const char PushBackOrEmplaceBackCallName[] = "append_call" ; |
| 56 | static const char ProtoVarDeclName[] = "proto_var_decl" ; |
| 57 | static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt" ; |
| 58 | static const char ProtoAddFieldCallName[] = "proto_add_field" ; |
| 59 | static const char LoopInitVarName[] = "loop_init_var" ; |
| 60 | static const char LoopEndExprName[] = "loop_end_expr" ; |
| 61 | static const char RangeLoopName[] = "for_range_loop" ; |
| 62 | |
| 63 | ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() { |
| 64 | return hasType(InnerMatcher: cxxRecordDecl(hasAnyName( |
| 65 | "::std::vector" , "::std::set" , "::std::unordered_set" , "::std::map" , |
| 66 | "::std::unordered_map" , "::std::array" , "::std::deque" ))); |
| 67 | } |
| 68 | |
| 69 | AST_MATCHER(Expr, hasSideEffects) { |
| 70 | return Node.HasSideEffects(Ctx: Finder->getASTContext()); |
| 71 | } |
| 72 | |
| 73 | } // namespace |
| 74 | |
| 75 | InefficientVectorOperationCheck::InefficientVectorOperationCheck( |
| 76 | StringRef Name, ClangTidyContext *Context) |
| 77 | : ClangTidyCheck(Name, Context), |
| 78 | VectorLikeClasses(utils::options::parseStringList( |
| 79 | Option: Options.get(LocalName: "VectorLikeClasses" , Default: "::std::vector" ))), |
| 80 | EnableProto(Options.get(LocalName: "EnableProto" , Default: false)) {} |
| 81 | |
| 82 | void InefficientVectorOperationCheck::storeOptions( |
| 83 | ClangTidyOptions::OptionMap &Opts) { |
| 84 | Options.store(Options&: Opts, LocalName: "VectorLikeClasses" , |
| 85 | Value: utils::options::serializeStringList(Strings: VectorLikeClasses)); |
| 86 | Options.store(Options&: Opts, LocalName: "EnableProto" , Value: EnableProto); |
| 87 | } |
| 88 | |
| 89 | void InefficientVectorOperationCheck::addMatcher( |
| 90 | const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, |
| 91 | StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, |
| 92 | StringRef AppendCallName, MatchFinder *Finder) { |
| 93 | const auto DefaultConstructorCall = cxxConstructExpr( |
| 94 | hasType(InnerMatcher: TargetRecordDecl), |
| 95 | hasDeclaration(InnerMatcher: cxxConstructorDecl(isDefaultConstructor()))); |
| 96 | const auto TargetVarDecl = |
| 97 | varDecl(hasInitializer(InnerMatcher: DefaultConstructorCall)).bind(ID: VarDeclName); |
| 98 | const auto TargetVarDefStmt = |
| 99 | declStmt(hasSingleDecl(InnerMatcher: equalsBoundNode(ID: std::string(VarDeclName)))) |
| 100 | .bind(ID: VarDeclStmtName); |
| 101 | |
| 102 | const auto AppendCallExpr = |
| 103 | cxxMemberCallExpr( |
| 104 | callee(InnerMatcher: AppendMethodDecl), on(InnerMatcher: hasType(InnerMatcher: TargetRecordDecl)), |
| 105 | onImplicitObjectArgument(InnerMatcher: declRefExpr(to(InnerMatcher: TargetVarDecl)))) |
| 106 | .bind(ID: AppendCallName); |
| 107 | const auto AppendCall = expr(ignoringImplicit(InnerMatcher: AppendCallExpr)); |
| 108 | const auto LoopVarInit = declStmt(hasSingleDecl( |
| 109 | InnerMatcher: varDecl(hasInitializer(InnerMatcher: ignoringParenImpCasts(InnerMatcher: integerLiteral(equals(Value: 0))))) |
| 110 | .bind(ID: LoopInitVarName))); |
| 111 | const auto RefersToLoopVar = ignoringParenImpCasts( |
| 112 | InnerMatcher: declRefExpr(to(InnerMatcher: varDecl(equalsBoundNode(ID: LoopInitVarName))))); |
| 113 | |
| 114 | // Matchers for the loop whose body has only 1 push_back/emplace_back calling |
| 115 | // statement. |
| 116 | const auto HasInterestingLoopBody = hasBody( |
| 117 | InnerMatcher: anyOf(compoundStmt(statementCountIs(N: 1), has(AppendCall)), AppendCall)); |
| 118 | const auto InInterestingCompoundStmt = |
| 119 | hasParent(compoundStmt(has(TargetVarDefStmt)).bind(ID: LoopParentName)); |
| 120 | |
| 121 | // Match counter-based for loops: |
| 122 | // for (int i = 0; i < n; ++i) { |
| 123 | // v.push_back(...); |
| 124 | // // Or: proto.add_xxx(...); |
| 125 | // } |
| 126 | // |
| 127 | // FIXME: Support more types of counter-based loops like decrement loops. |
| 128 | Finder->addMatcher( |
| 129 | NodeMatch: forStmt(hasLoopInit(InnerMatcher: LoopVarInit), |
| 130 | hasCondition(InnerMatcher: binaryOperator( |
| 131 | hasOperatorName(Name: "<" ), hasLHS(InnerMatcher: RefersToLoopVar), |
| 132 | hasRHS(InnerMatcher: expr(unless(hasDescendant(expr(RefersToLoopVar)))) |
| 133 | .bind(ID: LoopEndExprName)))), |
| 134 | hasIncrement(InnerMatcher: unaryOperator(hasOperatorName(Name: "++" ), |
| 135 | hasUnaryOperand(InnerMatcher: RefersToLoopVar))), |
| 136 | HasInterestingLoopBody, InInterestingCompoundStmt) |
| 137 | .bind(ID: LoopCounterName), |
| 138 | Action: this); |
| 139 | |
| 140 | // Match for-range loops: |
| 141 | // for (const auto& E : data) { |
| 142 | // v.push_back(...); |
| 143 | // // Or: proto.add_xxx(...); |
| 144 | // } |
| 145 | // |
| 146 | // FIXME: Support more complex range-expressions. |
| 147 | Finder->addMatcher( |
| 148 | NodeMatch: cxxForRangeStmt( |
| 149 | hasRangeInit( |
| 150 | InnerMatcher: anyOf(declRefExpr(supportedContainerTypesMatcher()), |
| 151 | memberExpr(hasObjectExpression(InnerMatcher: unless(hasSideEffects())), |
| 152 | supportedContainerTypesMatcher()))), |
| 153 | HasInterestingLoopBody, InInterestingCompoundStmt) |
| 154 | .bind(ID: RangeLoopName), |
| 155 | Action: this); |
| 156 | } |
| 157 | |
| 158 | void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { |
| 159 | const auto VectorDecl = cxxRecordDecl(hasAnyName(VectorLikeClasses)); |
| 160 | const auto AppendMethodDecl = |
| 161 | cxxMethodDecl(hasAnyName("push_back" , "emplace_back" )); |
| 162 | addMatcher(TargetRecordDecl: VectorDecl, VarDeclName: VectorVarDeclName, VarDeclStmtName: VectorVarDeclStmtName, |
| 163 | AppendMethodDecl, AppendCallName: PushBackOrEmplaceBackCallName, Finder); |
| 164 | |
| 165 | if (EnableProto) { |
| 166 | const auto ProtoDecl = |
| 167 | cxxRecordDecl(isDerivedFrom(BaseName: "::proto2::MessageLite" )); |
| 168 | |
| 169 | // A method's name starts with "add_" might not mean it's an add field |
| 170 | // call; it could be the getter for a proto field of which the name starts |
| 171 | // with "add_". So we exclude const methods. |
| 172 | const auto AddFieldMethodDecl = |
| 173 | cxxMethodDecl(matchesName(RegExp: "::add_" ), unless(isConst())); |
| 174 | addMatcher(TargetRecordDecl: ProtoDecl, VarDeclName: ProtoVarDeclName, VarDeclStmtName: ProtoVarDeclStmtName, |
| 175 | AppendMethodDecl: AddFieldMethodDecl, AppendCallName: ProtoAddFieldCallName, Finder); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | void InefficientVectorOperationCheck::check( |
| 180 | const MatchFinder::MatchResult &Result) { |
| 181 | auto *Context = Result.Context; |
| 182 | if (Context->getDiagnostics().hasUncompilableErrorOccurred()) |
| 183 | return; |
| 184 | |
| 185 | const SourceManager &SM = *Result.SourceManager; |
| 186 | const auto *VectorVarDecl = |
| 187 | Result.Nodes.getNodeAs<VarDecl>(ID: VectorVarDeclName); |
| 188 | const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(ID: LoopCounterName); |
| 189 | const auto *RangeLoop = |
| 190 | Result.Nodes.getNodeAs<CXXForRangeStmt>(ID: RangeLoopName); |
| 191 | const auto *VectorAppendCall = |
| 192 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: PushBackOrEmplaceBackCallName); |
| 193 | const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ID: ProtoVarDeclName); |
| 194 | const auto *ProtoAddFieldCall = |
| 195 | Result.Nodes.getNodeAs<CXXMemberCallExpr>(ID: ProtoAddFieldCallName); |
| 196 | const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(ID: LoopEndExprName); |
| 197 | const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(ID: LoopParentName); |
| 198 | |
| 199 | const CXXMemberCallExpr *AppendCall = |
| 200 | VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall; |
| 201 | assert(AppendCall && "no append call expression" ); |
| 202 | |
| 203 | const Stmt *LoopStmt = ForLoop; |
| 204 | if (!LoopStmt) |
| 205 | LoopStmt = RangeLoop; |
| 206 | |
| 207 | const auto *TargetVarDecl = VectorVarDecl; |
| 208 | if (!TargetVarDecl) |
| 209 | TargetVarDecl = ProtoVarDecl; |
| 210 | |
| 211 | llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs = |
| 212 | utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent, |
| 213 | *Context); |
| 214 | for (const auto *Ref : AllVarRefs) { |
| 215 | // Skip cases where there are usages (defined as DeclRefExpr that refers |
| 216 | // to "v") of vector variable / proto variable `v` before the for loop. We |
| 217 | // consider these usages are operations causing memory preallocation (e.g. |
| 218 | // "v.resize(n)", "v.reserve(n)"). |
| 219 | // |
| 220 | // FIXME: make it more intelligent to identify the pre-allocating |
| 221 | // operations before the for loop. |
| 222 | if (SM.isBeforeInTranslationUnit(Ref->getLocation(), |
| 223 | LoopStmt->getBeginLoc())) { |
| 224 | return; |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | std::string PartialReserveStmt; |
| 229 | if (VectorAppendCall != nullptr) { |
| 230 | PartialReserveStmt = ".reserve" ; |
| 231 | } else { |
| 232 | llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName(); |
| 233 | FieldName.consume_front(Prefix: "add_" ); |
| 234 | std::string MutableFieldName = ("mutable_" + FieldName).str(); |
| 235 | PartialReserveStmt = "." + MutableFieldName + |
| 236 | "()->Reserve" ; // e.g., ".mutable_xxx()->Reserve" |
| 237 | } |
| 238 | |
| 239 | llvm::StringRef VarName = Lexer::getSourceText( |
| 240 | Range: CharSourceRange::getTokenRange( |
| 241 | AppendCall->getImplicitObjectArgument()->getSourceRange()), |
| 242 | SM, LangOpts: Context->getLangOpts()); |
| 243 | |
| 244 | std::string ReserveSize; |
| 245 | // Handle for-range loop cases. |
| 246 | if (RangeLoop) { |
| 247 | // Get the range-expression in a for-range statement represented as |
| 248 | // `for (range-declarator: range-expression)`. |
| 249 | StringRef RangeInitExpName = |
| 250 | Lexer::getSourceText(Range: CharSourceRange::getTokenRange( |
| 251 | RangeLoop->getRangeInit()->getSourceRange()), |
| 252 | SM, LangOpts: Context->getLangOpts()); |
| 253 | ReserveSize = (RangeInitExpName + ".size()" ).str(); |
| 254 | } else if (ForLoop) { |
| 255 | // Handle counter-based loop cases. |
| 256 | StringRef LoopEndSource = Lexer::getSourceText( |
| 257 | Range: CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM, |
| 258 | LangOpts: Context->getLangOpts()); |
| 259 | ReserveSize = std::string(LoopEndSource); |
| 260 | } |
| 261 | |
| 262 | auto Diag = diag(AppendCall->getBeginLoc(), |
| 263 | "%0 is called inside a loop; consider pre-allocating the " |
| 264 | "container capacity before the loop" ) |
| 265 | << AppendCall->getMethodDecl()->getDeclName(); |
| 266 | if (!ReserveSize.empty()) { |
| 267 | std::string ReserveStmt = |
| 268 | (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n" ).str(); |
| 269 | Diag << FixItHint::CreateInsertion(InsertionLoc: LoopStmt->getBeginLoc(), Code: ReserveStmt); |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | } // namespace clang::tidy::performance |
| 274 | |