1 | //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "BranchCloneCheck.h" |
10 | #include "../utils/ASTUtils.h" |
11 | #include "clang/AST/ASTContext.h" |
12 | #include "clang/AST/RecursiveASTVisitor.h" |
13 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
14 | #include "clang/Analysis/CloneDetection.h" |
15 | #include "clang/Lex/Lexer.h" |
16 | #include "llvm/Support/Casting.h" |
17 | |
18 | using namespace clang; |
19 | using namespace clang::ast_matchers; |
20 | |
21 | namespace { |
22 | /// A branch in a switch may consist of several statements; while a branch in |
23 | /// an if/else if/else chain is one statement (which may be a CompoundStmt). |
24 | using SwitchBranch = llvm::SmallVector<const Stmt *, 2>; |
25 | } // anonymous namespace |
26 | |
27 | /// Determines if the bodies of two branches in a switch statements are Type I |
28 | /// clones of each other. This function only examines the body of the branch |
29 | /// and ignores the `case X:` or `default:` at the start of the branch. |
30 | static bool areSwitchBranchesIdentical(const SwitchBranch &LHS, |
31 | const SwitchBranch &RHS, |
32 | const ASTContext &Context) { |
33 | if (LHS.size() != RHS.size()) |
34 | return false; |
35 | |
36 | for (size_t I = 0, Size = LHS.size(); I < Size; I++) { |
37 | // NOTE: We strip goto labels and annotations in addition to stripping |
38 | // the `case X:` or `default:` labels, but it is very unlikely that this |
39 | // would cause false positives in real-world code. |
40 | if (!tidy::utils::areStatementsIdentical(FirstStmt: LHS[I]->stripLabelLikeStatements(), |
41 | SecondStmt: RHS[I]->stripLabelLikeStatements(), |
42 | Context)) { |
43 | return false; |
44 | } |
45 | } |
46 | |
47 | return true; |
48 | } |
49 | |
50 | static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) { |
51 | struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> { |
52 | using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue; |
53 | |
54 | bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) { |
55 | return true; // Ignore lambdas |
56 | } |
57 | |
58 | bool TraverseDecl(Decl *) { |
59 | return true; // No need to check declarations |
60 | } |
61 | |
62 | bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) { |
63 | return true; // Ignore sub-switches |
64 | } |
65 | |
66 | bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) { |
67 | return true; // Ignore cases |
68 | } |
69 | |
70 | bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) { |
71 | return true; // Ignore defaults |
72 | } |
73 | |
74 | bool TraverseAttributedStmt(AttributedStmt *S) { |
75 | if (!S) |
76 | return true; |
77 | |
78 | for (const Attr *A : S->getAttrs()) { |
79 | if (isa<FallThroughAttr>(A)) |
80 | return false; |
81 | } |
82 | |
83 | return true; |
84 | } |
85 | } Visitor; |
86 | |
87 | for (const Stmt *Elem : Branch) { |
88 | if (!Visitor.TraverseStmt(S: const_cast<Stmt *>(Elem))) |
89 | return true; |
90 | } |
91 | return false; |
92 | } |
93 | |
94 | namespace clang::tidy::bugprone { |
95 | |
96 | void BranchCloneCheck::registerMatchers(MatchFinder *Finder) { |
97 | Finder->addMatcher( |
98 | NodeMatch: ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())), |
99 | stmt().bind(ID: "if" ), |
100 | hasParent(stmt(unless(ifStmt(hasElse(InnerMatcher: equalsBoundNode(ID: "if" )))))), |
101 | hasElse(InnerMatcher: stmt().bind(ID: "else" ))), |
102 | Action: this); |
103 | Finder->addMatcher(NodeMatch: switchStmt().bind(ID: "switch" ), Action: this); |
104 | Finder->addMatcher(NodeMatch: conditionalOperator().bind(ID: "condOp" ), Action: this); |
105 | } |
106 | |
107 | void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { |
108 | const ASTContext &Context = *Result.Context; |
109 | |
110 | if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>(ID: "if" )) { |
111 | const Stmt *Then = IS->getThen(); |
112 | assert(Then && "An IfStmt must have a `then` branch!" ); |
113 | |
114 | const Stmt *Else = Result.Nodes.getNodeAs<Stmt>(ID: "else" ); |
115 | assert(Else && "We only look for `if` statements with an `else` branch!" ); |
116 | |
117 | if (!isa<IfStmt>(Val: Else)) { |
118 | // Just a simple if with no `else if` branch. |
119 | if (utils::areStatementsIdentical(FirstStmt: Then->IgnoreContainers(), |
120 | SecondStmt: Else->IgnoreContainers(), Context)) { |
121 | diag(Loc: IS->getBeginLoc(), Description: "if with identical then and else branches" ); |
122 | diag(Loc: IS->getElseLoc(), Description: "else branch starts here" , Level: DiagnosticIDs::Note); |
123 | } |
124 | return; |
125 | } |
126 | |
127 | // This is the complicated case when we start an if/else if/else chain. |
128 | // To find all the duplicates, we collect all the branches into a vector. |
129 | llvm::SmallVector<const Stmt *, 4> Branches; |
130 | const IfStmt *Cur = IS; |
131 | while (true) { |
132 | // Store the `then` branch. |
133 | Branches.push_back(Elt: Cur->getThen()); |
134 | |
135 | Else = Cur->getElse(); |
136 | // The chain ends if there is no `else` branch. |
137 | if (!Else) |
138 | break; |
139 | |
140 | // Check if there is another `else if`... |
141 | Cur = dyn_cast<IfStmt>(Val: Else); |
142 | if (!Cur) { |
143 | // ...this is just a plain `else` branch at the end of the chain. |
144 | Branches.push_back(Elt: Else); |
145 | break; |
146 | } |
147 | } |
148 | |
149 | size_t N = Branches.size(); |
150 | llvm::BitVector KnownAsClone(N); |
151 | |
152 | for (size_t I = 0; I + 1 < N; I++) { |
153 | // We have already seen Branches[i] as a clone of an earlier branch. |
154 | if (KnownAsClone[I]) |
155 | continue; |
156 | |
157 | int NumCopies = 1; |
158 | |
159 | for (size_t J = I + 1; J < N; J++) { |
160 | if (KnownAsClone[J] || !utils::areStatementsIdentical( |
161 | FirstStmt: Branches[I]->IgnoreContainers(), |
162 | SecondStmt: Branches[J]->IgnoreContainers(), Context)) |
163 | continue; |
164 | |
165 | NumCopies++; |
166 | KnownAsClone[J] = true; |
167 | |
168 | if (NumCopies == 2) { |
169 | // We report the first occurrence only when we find the second one. |
170 | diag(Loc: Branches[I]->getBeginLoc(), |
171 | Description: "repeated branch body in conditional chain" ); |
172 | SourceLocation End = |
173 | Lexer::getLocForEndOfToken(Loc: Branches[I]->getEndLoc(), Offset: 0, |
174 | SM: *Result.SourceManager, LangOpts: getLangOpts()); |
175 | if (End.isValid()) { |
176 | diag(Loc: End, Description: "end of the original" , Level: DiagnosticIDs::Note); |
177 | } |
178 | } |
179 | |
180 | diag(Loc: Branches[J]->getBeginLoc(), Description: "clone %0 starts here" , |
181 | Level: DiagnosticIDs::Note) |
182 | << (NumCopies - 1); |
183 | } |
184 | } |
185 | return; |
186 | } |
187 | |
188 | if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>(ID: "condOp" )) { |
189 | // We do not try to detect chains of ?: operators. |
190 | if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), |
191 | Context)) |
192 | diag(CO->getQuestionLoc(), |
193 | "conditional operator with identical true and false expressions" ); |
194 | |
195 | return; |
196 | } |
197 | |
198 | if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>(ID: "switch" )) { |
199 | const auto *Body = dyn_cast_or_null<CompoundStmt>(Val: SS->getBody()); |
200 | |
201 | // Code like |
202 | // switch (x) case 0: case 1: foobar(); |
203 | // is legal and calls foobar() if and only if x is either 0 or 1; |
204 | // but we do not try to distinguish branches in such code. |
205 | if (!Body) |
206 | return; |
207 | |
208 | // We will first collect the branches of the switch statements. For the |
209 | // sake of simplicity we say that branches are delimited by the SwitchCase |
210 | // (`case:` or `default:`) children of Body; that is, we ignore `case:` or |
211 | // `default:` labels embedded inside other statements and we do not follow |
212 | // the effects of `break` and other manipulation of the control-flow. |
213 | llvm::SmallVector<SwitchBranch, 4> Branches; |
214 | for (const Stmt *S : Body->body()) { |
215 | // If this is a `case` or `default`, we start a new, empty branch. |
216 | if (isa<SwitchCase>(Val: S)) |
217 | Branches.emplace_back(); |
218 | |
219 | // There may be code before the first branch (which can be dead code |
220 | // and can be code reached either through goto or through case labels |
221 | // that are embedded inside e.g. inner compound statements); we do not |
222 | // store those statements in branches. |
223 | if (!Branches.empty()) |
224 | Branches.back().push_back(Elt: S); |
225 | } |
226 | |
227 | auto *End = Branches.end(); |
228 | auto *BeginCurrent = Branches.begin(); |
229 | while (BeginCurrent < End) { |
230 | if (isFallthroughSwitchBranch(Branch: *BeginCurrent)) { |
231 | ++BeginCurrent; |
232 | continue; |
233 | } |
234 | |
235 | auto *EndCurrent = BeginCurrent + 1; |
236 | while (EndCurrent < End && |
237 | areSwitchBranchesIdentical(LHS: *BeginCurrent, RHS: *EndCurrent, Context)) { |
238 | ++EndCurrent; |
239 | } |
240 | // At this point the iterator range {BeginCurrent, EndCurrent} contains a |
241 | // complete family of consecutive identical branches. |
242 | |
243 | if (EndCurrent == (BeginCurrent + 1)) { |
244 | // No consecutive identical branches that start on BeginCurrent |
245 | BeginCurrent = EndCurrent; |
246 | continue; |
247 | } |
248 | |
249 | diag(Loc: BeginCurrent->front()->getBeginLoc(), |
250 | Description: "switch has %0 consecutive identical branches" ) |
251 | << static_cast<int>(std::distance(first: BeginCurrent, last: EndCurrent)); |
252 | |
253 | SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc(); |
254 | // If the case statement is generated from a macro, it's SourceLocation |
255 | // may be invalid, resulting in an assertion failure down the line. |
256 | // While not optimal, try the begin location in this case, it's still |
257 | // better then nothing. |
258 | if (EndLoc.isInvalid()) |
259 | EndLoc = (EndCurrent - 1)->back()->getBeginLoc(); |
260 | if (EndLoc.isMacroID()) |
261 | EndLoc = Context.getSourceManager().getExpansionLoc(Loc: EndLoc); |
262 | EndLoc = Lexer::getLocForEndOfToken(Loc: EndLoc, Offset: 0, SM: *Result.SourceManager, |
263 | LangOpts: getLangOpts()); |
264 | if (EndLoc.isValid()) { |
265 | diag(Loc: EndLoc, Description: "last of these clones ends here" , Level: DiagnosticIDs::Note); |
266 | } |
267 | BeginCurrent = EndCurrent; |
268 | } |
269 | return; |
270 | } |
271 | |
272 | llvm_unreachable("No if statement and no switch statement." ); |
273 | } |
274 | |
275 | } // namespace clang::tidy::bugprone |
276 | |