1//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BranchCloneCheck.h"
10#include "../utils/ASTUtils.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/RecursiveASTVisitor.h"
13#include "clang/ASTMatchers/ASTMatchFinder.h"
14#include "clang/Analysis/CloneDetection.h"
15#include "clang/Lex/Lexer.h"
16#include "llvm/Support/Casting.h"
17
18using namespace clang;
19using namespace clang::ast_matchers;
20
21namespace {
22/// A branch in a switch may consist of several statements; while a branch in
23/// an if/else if/else chain is one statement (which may be a CompoundStmt).
24using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
25} // anonymous namespace
26
27/// Determines if the bodies of two branches in a switch statements are Type I
28/// clones of each other. This function only examines the body of the branch
29/// and ignores the `case X:` or `default:` at the start of the branch.
30static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
31 const SwitchBranch &RHS,
32 const ASTContext &Context) {
33 if (LHS.size() != RHS.size())
34 return false;
35
36 for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
37 // NOTE: We strip goto labels and annotations in addition to stripping
38 // the `case X:` or `default:` labels, but it is very unlikely that this
39 // would cause false positives in real-world code.
40 if (!tidy::utils::areStatementsIdentical(FirstStmt: LHS[I]->stripLabelLikeStatements(),
41 SecondStmt: RHS[I]->stripLabelLikeStatements(),
42 Context)) {
43 return false;
44 }
45 }
46
47 return true;
48}
49
50static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
51 struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
52 using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
53
54 bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) {
55 return true; // Ignore lambdas
56 }
57
58 bool TraverseDecl(Decl *) {
59 return true; // No need to check declarations
60 }
61
62 bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) {
63 return true; // Ignore sub-switches
64 }
65
66 bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) {
67 return true; // Ignore cases
68 }
69
70 bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) {
71 return true; // Ignore defaults
72 }
73
74 bool TraverseAttributedStmt(AttributedStmt *S) {
75 if (!S)
76 return true;
77
78 for (const Attr *A : S->getAttrs()) {
79 if (isa<FallThroughAttr>(A))
80 return false;
81 }
82
83 return true;
84 }
85 } Visitor;
86
87 for (const Stmt *Elem : Branch) {
88 if (!Visitor.TraverseStmt(S: const_cast<Stmt *>(Elem)))
89 return true;
90 }
91 return false;
92}
93
94namespace clang::tidy::bugprone {
95
96void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
97 Finder->addMatcher(
98 NodeMatch: ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
99 stmt().bind(ID: "if"),
100 hasParent(stmt(unless(ifStmt(hasElse(InnerMatcher: equalsBoundNode(ID: "if")))))),
101 hasElse(InnerMatcher: stmt().bind(ID: "else"))),
102 Action: this);
103 Finder->addMatcher(NodeMatch: switchStmt().bind(ID: "switch"), Action: this);
104 Finder->addMatcher(NodeMatch: conditionalOperator().bind(ID: "condOp"), Action: this);
105}
106
107void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
108 const ASTContext &Context = *Result.Context;
109
110 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>(ID: "if")) {
111 const Stmt *Then = IS->getThen();
112 assert(Then && "An IfStmt must have a `then` branch!");
113
114 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>(ID: "else");
115 assert(Else && "We only look for `if` statements with an `else` branch!");
116
117 if (!isa<IfStmt>(Val: Else)) {
118 // Just a simple if with no `else if` branch.
119 if (utils::areStatementsIdentical(FirstStmt: Then->IgnoreContainers(),
120 SecondStmt: Else->IgnoreContainers(), Context)) {
121 diag(Loc: IS->getBeginLoc(), Description: "if with identical then and else branches");
122 diag(Loc: IS->getElseLoc(), Description: "else branch starts here", Level: DiagnosticIDs::Note);
123 }
124 return;
125 }
126
127 // This is the complicated case when we start an if/else if/else chain.
128 // To find all the duplicates, we collect all the branches into a vector.
129 llvm::SmallVector<const Stmt *, 4> Branches;
130 const IfStmt *Cur = IS;
131 while (true) {
132 // Store the `then` branch.
133 Branches.push_back(Elt: Cur->getThen());
134
135 Else = Cur->getElse();
136 // The chain ends if there is no `else` branch.
137 if (!Else)
138 break;
139
140 // Check if there is another `else if`...
141 Cur = dyn_cast<IfStmt>(Val: Else);
142 if (!Cur) {
143 // ...this is just a plain `else` branch at the end of the chain.
144 Branches.push_back(Elt: Else);
145 break;
146 }
147 }
148
149 size_t N = Branches.size();
150 llvm::BitVector KnownAsClone(N);
151
152 for (size_t I = 0; I + 1 < N; I++) {
153 // We have already seen Branches[i] as a clone of an earlier branch.
154 if (KnownAsClone[I])
155 continue;
156
157 int NumCopies = 1;
158
159 for (size_t J = I + 1; J < N; J++) {
160 if (KnownAsClone[J] || !utils::areStatementsIdentical(
161 FirstStmt: Branches[I]->IgnoreContainers(),
162 SecondStmt: Branches[J]->IgnoreContainers(), Context))
163 continue;
164
165 NumCopies++;
166 KnownAsClone[J] = true;
167
168 if (NumCopies == 2) {
169 // We report the first occurrence only when we find the second one.
170 diag(Loc: Branches[I]->getBeginLoc(),
171 Description: "repeated branch body in conditional chain");
172 SourceLocation End =
173 Lexer::getLocForEndOfToken(Loc: Branches[I]->getEndLoc(), Offset: 0,
174 SM: *Result.SourceManager, LangOpts: getLangOpts());
175 if (End.isValid()) {
176 diag(Loc: End, Description: "end of the original", Level: DiagnosticIDs::Note);
177 }
178 }
179
180 diag(Loc: Branches[J]->getBeginLoc(), Description: "clone %0 starts here",
181 Level: DiagnosticIDs::Note)
182 << (NumCopies - 1);
183 }
184 }
185 return;
186 }
187
188 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>(ID: "condOp")) {
189 // We do not try to detect chains of ?: operators.
190 if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
191 Context))
192 diag(CO->getQuestionLoc(),
193 "conditional operator with identical true and false expressions");
194
195 return;
196 }
197
198 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>(ID: "switch")) {
199 const auto *Body = dyn_cast_or_null<CompoundStmt>(Val: SS->getBody());
200
201 // Code like
202 // switch (x) case 0: case 1: foobar();
203 // is legal and calls foobar() if and only if x is either 0 or 1;
204 // but we do not try to distinguish branches in such code.
205 if (!Body)
206 return;
207
208 // We will first collect the branches of the switch statements. For the
209 // sake of simplicity we say that branches are delimited by the SwitchCase
210 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
211 // `default:` labels embedded inside other statements and we do not follow
212 // the effects of `break` and other manipulation of the control-flow.
213 llvm::SmallVector<SwitchBranch, 4> Branches;
214 for (const Stmt *S : Body->body()) {
215 // If this is a `case` or `default`, we start a new, empty branch.
216 if (isa<SwitchCase>(Val: S))
217 Branches.emplace_back();
218
219 // There may be code before the first branch (which can be dead code
220 // and can be code reached either through goto or through case labels
221 // that are embedded inside e.g. inner compound statements); we do not
222 // store those statements in branches.
223 if (!Branches.empty())
224 Branches.back().push_back(Elt: S);
225 }
226
227 auto *End = Branches.end();
228 auto *BeginCurrent = Branches.begin();
229 while (BeginCurrent < End) {
230 if (isFallthroughSwitchBranch(Branch: *BeginCurrent)) {
231 ++BeginCurrent;
232 continue;
233 }
234
235 auto *EndCurrent = BeginCurrent + 1;
236 while (EndCurrent < End &&
237 areSwitchBranchesIdentical(LHS: *BeginCurrent, RHS: *EndCurrent, Context)) {
238 ++EndCurrent;
239 }
240 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
241 // complete family of consecutive identical branches.
242
243 if (EndCurrent == (BeginCurrent + 1)) {
244 // No consecutive identical branches that start on BeginCurrent
245 BeginCurrent = EndCurrent;
246 continue;
247 }
248
249 diag(Loc: BeginCurrent->front()->getBeginLoc(),
250 Description: "switch has %0 consecutive identical branches")
251 << static_cast<int>(std::distance(first: BeginCurrent, last: EndCurrent));
252
253 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
254 // If the case statement is generated from a macro, it's SourceLocation
255 // may be invalid, resulting in an assertion failure down the line.
256 // While not optimal, try the begin location in this case, it's still
257 // better then nothing.
258 if (EndLoc.isInvalid())
259 EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
260 if (EndLoc.isMacroID())
261 EndLoc = Context.getSourceManager().getExpansionLoc(Loc: EndLoc);
262 EndLoc = Lexer::getLocForEndOfToken(Loc: EndLoc, Offset: 0, SM: *Result.SourceManager,
263 LangOpts: getLangOpts());
264 if (EndLoc.isValid()) {
265 diag(Loc: EndLoc, Description: "last of these clones ends here", Level: DiagnosticIDs::Note);
266 }
267 BeginCurrent = EndCurrent;
268 }
269 return;
270 }
271
272 llvm_unreachable("No if statement and no switch statement.");
273}
274
275} // namespace clang::tidy::bugprone
276

source code of clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp