1//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BranchCloneCheck.h"
10#include "../utils/ASTUtils.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/RecursiveASTVisitor.h"
13#include "clang/ASTMatchers/ASTMatchFinder.h"
14#include "clang/Analysis/CloneDetection.h"
15#include "clang/Lex/Lexer.h"
16
17using namespace clang;
18using namespace clang::ast_matchers;
19
20namespace {
21/// A branch in a switch may consist of several statements; while a branch in
22/// an if/else if/else chain is one statement (which may be a CompoundStmt).
23using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
24} // anonymous namespace
25
26/// Determines if the bodies of two branches in a switch statements are Type I
27/// clones of each other. This function only examines the body of the branch
28/// and ignores the `case X:` or `default:` at the start of the branch.
29static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
30 const SwitchBranch &RHS,
31 const ASTContext &Context) {
32 if (LHS.size() != RHS.size())
33 return false;
34
35 for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
36 // NOTE: We strip goto labels and annotations in addition to stripping
37 // the `case X:` or `default:` labels, but it is very unlikely that this
38 // would cause false positives in real-world code.
39 if (!tidy::utils::areStatementsIdentical(FirstStmt: LHS[I]->stripLabelLikeStatements(),
40 SecondStmt: RHS[I]->stripLabelLikeStatements(),
41 Context)) {
42 return false;
43 }
44 }
45
46 return true;
47}
48
49static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
50 struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
51 using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
52
53 bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) {
54 return true; // Ignore lambdas
55 }
56
57 bool TraverseDecl(Decl *) {
58 return true; // No need to check declarations
59 }
60
61 bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) {
62 return true; // Ignore sub-switches
63 }
64
65 // NOLINTNEXTLINE(readability-identifier-naming) - FIXME
66 bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) {
67 return true; // Ignore cases
68 }
69
70 bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) {
71 return true; // Ignore defaults
72 }
73
74 bool TraverseAttributedStmt(AttributedStmt *S) {
75 if (!S)
76 return true;
77
78 for (const Attr *A : S->getAttrs()) {
79 if (isa<FallThroughAttr>(A))
80 return false;
81 }
82
83 return true;
84 }
85 } Visitor;
86
87 for (const Stmt *Elem : Branch) {
88 if (!Visitor.TraverseStmt(S: const_cast<Stmt *>(Elem)))
89 return true;
90 }
91 return false;
92}
93
94namespace clang::tidy::bugprone {
95
96void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
97 Finder->addMatcher(
98 NodeMatch: ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
99 stmt().bind(ID: "if"),
100 hasParent(stmt(unless(ifStmt(hasElse(InnerMatcher: equalsBoundNode(ID: "if")))))),
101 hasElse(InnerMatcher: stmt().bind(ID: "else"))),
102 Action: this);
103 Finder->addMatcher(NodeMatch: switchStmt().bind(ID: "switch"), Action: this);
104 Finder->addMatcher(NodeMatch: conditionalOperator().bind(ID: "condOp"), Action: this);
105 Finder->addMatcher(
106 NodeMatch: ifStmt((hasThen(InnerMatcher: hasDescendant(ifStmt())))).bind(ID: "ifWithDescendantIf"),
107 Action: this);
108}
109
110/// Determines whether two statement trees are identical regarding
111/// operators and symbols.
112///
113/// Exceptions: expressions containing macros or functions with possible side
114/// effects are never considered identical.
115/// Limitations: (t + u) and (u + t) are not considered identical.
116/// t*(u + t) and t*u + t*t are not considered identical.
117///
118static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
119 const Stmt *Stmt2, bool IgnoreSideEffects) {
120
121 if (!Stmt1 || !Stmt2)
122 return !Stmt1 && !Stmt2;
123
124 // If Stmt1 & Stmt2 are of different class then they are not
125 // identical statements.
126 if (Stmt1->getStmtClass() != Stmt2->getStmtClass())
127 return false;
128
129 const auto *Expr1 = dyn_cast<Expr>(Val: Stmt1);
130 const auto *Expr2 = dyn_cast<Expr>(Val: Stmt2);
131
132 if (Expr1 && Expr2) {
133 // If Stmt1 has side effects then don't warn even if expressions
134 // are identical.
135 if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx) &&
136 Expr2->HasSideEffects(Ctx))
137 return false;
138 // If either expression comes from a macro then don't warn even if
139 // the expressions are identical.
140 if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID()))
141 return false;
142
143 // If all children of two expressions are identical, return true.
144 Expr::const_child_iterator I1 = Expr1->child_begin();
145 Expr::const_child_iterator I2 = Expr2->child_begin();
146 while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) {
147 if (!isIdenticalStmt(Ctx, Stmt1: *I1, Stmt2: *I2, IgnoreSideEffects))
148 return false;
149 ++I1;
150 ++I2;
151 }
152 // If there are different number of children in the statements, return
153 // false.
154 if (I1 != Expr1->child_end())
155 return false;
156 if (I2 != Expr2->child_end())
157 return false;
158 }
159
160 switch (Stmt1->getStmtClass()) {
161 default:
162 return false;
163 case Stmt::CallExprClass:
164 case Stmt::ArraySubscriptExprClass:
165 case Stmt::ArraySectionExprClass:
166 case Stmt::OMPArrayShapingExprClass:
167 case Stmt::OMPIteratorExprClass:
168 case Stmt::ImplicitCastExprClass:
169 case Stmt::ParenExprClass:
170 case Stmt::BreakStmtClass:
171 case Stmt::ContinueStmtClass:
172 case Stmt::NullStmtClass:
173 return true;
174 case Stmt::CStyleCastExprClass: {
175 const auto *CastExpr1 = cast<CStyleCastExpr>(Val: Stmt1);
176 const auto *CastExpr2 = cast<CStyleCastExpr>(Val: Stmt2);
177
178 return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten();
179 }
180 case Stmt::ReturnStmtClass: {
181 const auto *ReturnStmt1 = cast<ReturnStmt>(Val: Stmt1);
182 const auto *ReturnStmt2 = cast<ReturnStmt>(Val: Stmt2);
183
184 return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(),
185 ReturnStmt2->getRetValue(), IgnoreSideEffects);
186 }
187 case Stmt::ForStmtClass: {
188 const auto *ForStmt1 = cast<ForStmt>(Val: Stmt1);
189 const auto *ForStmt2 = cast<ForStmt>(Val: Stmt2);
190
191 if (!isIdenticalStmt(Ctx, Stmt1: ForStmt1->getInit(), Stmt2: ForStmt2->getInit(),
192 IgnoreSideEffects))
193 return false;
194 if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(),
195 IgnoreSideEffects))
196 return false;
197 if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(),
198 IgnoreSideEffects))
199 return false;
200 if (!isIdenticalStmt(Ctx, Stmt1: ForStmt1->getBody(), Stmt2: ForStmt2->getBody(),
201 IgnoreSideEffects))
202 return false;
203 return true;
204 }
205 case Stmt::DoStmtClass: {
206 const auto *DStmt1 = cast<DoStmt>(Val: Stmt1);
207 const auto *DStmt2 = cast<DoStmt>(Val: Stmt2);
208
209 if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(),
210 IgnoreSideEffects))
211 return false;
212 if (!isIdenticalStmt(Ctx, Stmt1: DStmt1->getBody(), Stmt2: DStmt2->getBody(),
213 IgnoreSideEffects))
214 return false;
215 return true;
216 }
217 case Stmt::WhileStmtClass: {
218 const auto *WStmt1 = cast<WhileStmt>(Val: Stmt1);
219 const auto *WStmt2 = cast<WhileStmt>(Val: Stmt2);
220
221 if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(),
222 IgnoreSideEffects))
223 return false;
224 if (!isIdenticalStmt(Ctx, Stmt1: WStmt1->getBody(), Stmt2: WStmt2->getBody(),
225 IgnoreSideEffects))
226 return false;
227 return true;
228 }
229 case Stmt::IfStmtClass: {
230 const auto *IStmt1 = cast<IfStmt>(Val: Stmt1);
231 const auto *IStmt2 = cast<IfStmt>(Val: Stmt2);
232
233 if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(),
234 IgnoreSideEffects))
235 return false;
236 if (!isIdenticalStmt(Ctx, Stmt1: IStmt1->getThen(), Stmt2: IStmt2->getThen(),
237 IgnoreSideEffects))
238 return false;
239 if (!isIdenticalStmt(Ctx, Stmt1: IStmt1->getElse(), Stmt2: IStmt2->getElse(),
240 IgnoreSideEffects))
241 return false;
242 return true;
243 }
244 case Stmt::CompoundStmtClass: {
245 const auto *CompStmt1 = cast<CompoundStmt>(Val: Stmt1);
246 const auto *CompStmt2 = cast<CompoundStmt>(Val: Stmt2);
247
248 if (CompStmt1->size() != CompStmt2->size())
249 return false;
250
251 if (!llvm::all_of(Range: llvm::zip(t: CompStmt1->body(), u: CompStmt2->body()),
252 P: [&Ctx, IgnoreSideEffects](
253 std::tuple<const Stmt *, const Stmt *> StmtPair) {
254 const Stmt *Stmt0 = std::get<0>(t&: StmtPair);
255 const Stmt *Stmt1 = std::get<1>(t&: StmtPair);
256 return isIdenticalStmt(Ctx, Stmt1: Stmt0, Stmt2: Stmt1,
257 IgnoreSideEffects);
258 })) {
259 return false;
260 }
261
262 return true;
263 }
264 case Stmt::CompoundAssignOperatorClass:
265 case Stmt::BinaryOperatorClass: {
266 const auto *BinOp1 = cast<BinaryOperator>(Val: Stmt1);
267 const auto *BinOp2 = cast<BinaryOperator>(Val: Stmt2);
268 return BinOp1->getOpcode() == BinOp2->getOpcode();
269 }
270 case Stmt::CharacterLiteralClass: {
271 const auto *CharLit1 = cast<CharacterLiteral>(Val: Stmt1);
272 const auto *CharLit2 = cast<CharacterLiteral>(Val: Stmt2);
273 return CharLit1->getValue() == CharLit2->getValue();
274 }
275 case Stmt::DeclRefExprClass: {
276 const auto *DeclRef1 = cast<DeclRefExpr>(Val: Stmt1);
277 const auto *DeclRef2 = cast<DeclRefExpr>(Val: Stmt2);
278 return DeclRef1->getDecl() == DeclRef2->getDecl();
279 }
280 case Stmt::IntegerLiteralClass: {
281 const auto *IntLit1 = cast<IntegerLiteral>(Val: Stmt1);
282 const auto *IntLit2 = cast<IntegerLiteral>(Val: Stmt2);
283
284 llvm::APInt I1 = IntLit1->getValue();
285 llvm::APInt I2 = IntLit2->getValue();
286 if (I1.getBitWidth() != I2.getBitWidth())
287 return false;
288 return I1 == I2;
289 }
290 case Stmt::FloatingLiteralClass: {
291 const auto *FloatLit1 = cast<FloatingLiteral>(Val: Stmt1);
292 const auto *FloatLit2 = cast<FloatingLiteral>(Val: Stmt2);
293 return FloatLit1->getValue().bitwiseIsEqual(RHS: FloatLit2->getValue());
294 }
295 case Stmt::StringLiteralClass: {
296 const auto *StringLit1 = cast<StringLiteral>(Val: Stmt1);
297 const auto *StringLit2 = cast<StringLiteral>(Val: Stmt2);
298 return StringLit1->getBytes() == StringLit2->getBytes();
299 }
300 case Stmt::MemberExprClass: {
301 const auto *MemberStmt1 = cast<MemberExpr>(Val: Stmt1);
302 const auto *MemberStmt2 = cast<MemberExpr>(Val: Stmt2);
303 return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl();
304 }
305 case Stmt::UnaryOperatorClass: {
306 const auto *UnaryOp1 = cast<UnaryOperator>(Val: Stmt1);
307 const auto *UnaryOp2 = cast<UnaryOperator>(Val: Stmt2);
308 return UnaryOp1->getOpcode() == UnaryOp2->getOpcode();
309 }
310 }
311}
312
313void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
314 const ASTContext &Context = *Result.Context;
315
316 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>(ID: "if")) {
317 const Stmt *Then = IS->getThen();
318 assert(Then && "An IfStmt must have a `then` branch!");
319
320 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>(ID: "else");
321 assert(Else && "We only look for `if` statements with an `else` branch!");
322
323 if (!isa<IfStmt>(Val: Else)) {
324 // Just a simple if with no `else if` branch.
325 if (utils::areStatementsIdentical(FirstStmt: Then->IgnoreContainers(),
326 SecondStmt: Else->IgnoreContainers(), Context)) {
327 diag(Loc: IS->getBeginLoc(), Description: "if with identical then and else branches");
328 diag(Loc: IS->getElseLoc(), Description: "else branch starts here", Level: DiagnosticIDs::Note);
329 }
330 return;
331 }
332
333 // This is the complicated case when we start an if/else if/else chain.
334 // To find all the duplicates, we collect all the branches into a vector.
335 llvm::SmallVector<const Stmt *, 4> Branches;
336 const IfStmt *Cur = IS;
337 while (true) {
338 // Store the `then` branch.
339 Branches.push_back(Elt: Cur->getThen());
340
341 Else = Cur->getElse();
342 // The chain ends if there is no `else` branch.
343 if (!Else)
344 break;
345
346 // Check if there is another `else if`...
347 Cur = dyn_cast<IfStmt>(Val: Else);
348 if (!Cur) {
349 // ...this is just a plain `else` branch at the end of the chain.
350 Branches.push_back(Elt: Else);
351 break;
352 }
353 }
354
355 size_t N = Branches.size();
356 llvm::BitVector KnownAsClone(N);
357
358 for (size_t I = 0; I + 1 < N; I++) {
359 // We have already seen Branches[i] as a clone of an earlier branch.
360 if (KnownAsClone[I])
361 continue;
362
363 int NumCopies = 1;
364
365 for (size_t J = I + 1; J < N; J++) {
366 if (KnownAsClone[J] || !utils::areStatementsIdentical(
367 FirstStmt: Branches[I]->IgnoreContainers(),
368 SecondStmt: Branches[J]->IgnoreContainers(), Context))
369 continue;
370
371 NumCopies++;
372 KnownAsClone[J] = true;
373
374 if (NumCopies == 2) {
375 // We report the first occurrence only when we find the second one.
376 diag(Loc: Branches[I]->getBeginLoc(),
377 Description: "repeated branch body in conditional chain");
378 SourceLocation End =
379 Lexer::getLocForEndOfToken(Loc: Branches[I]->getEndLoc(), Offset: 0,
380 SM: *Result.SourceManager, LangOpts: getLangOpts());
381 if (End.isValid()) {
382 diag(Loc: End, Description: "end of the original", Level: DiagnosticIDs::Note);
383 }
384 }
385
386 diag(Loc: Branches[J]->getBeginLoc(), Description: "clone %0 starts here",
387 Level: DiagnosticIDs::Note)
388 << (NumCopies - 1);
389 }
390 }
391 return;
392 }
393
394 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>(ID: "condOp")) {
395 // We do not try to detect chains of ?: operators.
396 if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
397 Context))
398 diag(CO->getQuestionLoc(),
399 "conditional operator with identical true and false expressions");
400
401 return;
402 }
403
404 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>(ID: "switch")) {
405 const auto *Body = dyn_cast_or_null<CompoundStmt>(Val: SS->getBody());
406
407 // Code like
408 // switch (x) case 0: case 1: foobar();
409 // is legal and calls foobar() if and only if x is either 0 or 1;
410 // but we do not try to distinguish branches in such code.
411 if (!Body)
412 return;
413
414 // We will first collect the branches of the switch statements. For the
415 // sake of simplicity we say that branches are delimited by the SwitchCase
416 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
417 // `default:` labels embedded inside other statements and we do not follow
418 // the effects of `break` and other manipulation of the control-flow.
419 llvm::SmallVector<SwitchBranch, 4> Branches;
420 for (const Stmt *S : Body->body()) {
421 // If this is a `case` or `default`, we start a new, empty branch.
422 if (isa<SwitchCase>(Val: S))
423 Branches.emplace_back();
424
425 // There may be code before the first branch (which can be dead code
426 // and can be code reached either through goto or through case labels
427 // that are embedded inside e.g. inner compound statements); we do not
428 // store those statements in branches.
429 if (!Branches.empty())
430 Branches.back().push_back(Elt: S);
431 }
432
433 auto *End = Branches.end();
434 auto *BeginCurrent = Branches.begin();
435 while (BeginCurrent < End) {
436 if (isFallthroughSwitchBranch(Branch: *BeginCurrent)) {
437 ++BeginCurrent;
438 continue;
439 }
440
441 auto *EndCurrent = BeginCurrent + 1;
442 while (EndCurrent < End &&
443 areSwitchBranchesIdentical(LHS: *BeginCurrent, RHS: *EndCurrent, Context)) {
444 ++EndCurrent;
445 }
446 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
447 // complete family of consecutive identical branches.
448
449 if (EndCurrent == (BeginCurrent + 1)) {
450 // No consecutive identical branches that start on BeginCurrent
451 BeginCurrent = EndCurrent;
452 continue;
453 }
454
455 diag(Loc: BeginCurrent->front()->getBeginLoc(),
456 Description: "switch has %0 consecutive identical branches")
457 << static_cast<int>(std::distance(first: BeginCurrent, last: EndCurrent));
458
459 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
460 // If the case statement is generated from a macro, it's SourceLocation
461 // may be invalid, resulting in an assertion failure down the line.
462 // While not optimal, try the begin location in this case, it's still
463 // better then nothing.
464 if (EndLoc.isInvalid())
465 EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
466 if (EndLoc.isMacroID())
467 EndLoc = Context.getSourceManager().getExpansionLoc(Loc: EndLoc);
468 EndLoc = Lexer::getLocForEndOfToken(Loc: EndLoc, Offset: 0, SM: *Result.SourceManager,
469 LangOpts: getLangOpts());
470 if (EndLoc.isValid()) {
471 diag(Loc: EndLoc, Description: "last of these clones ends here", Level: DiagnosticIDs::Note);
472 }
473 BeginCurrent = EndCurrent;
474 }
475 return;
476 }
477
478 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>(ID: "ifWithDescendantIf")) {
479 const Stmt *Then = IS->getThen();
480 const auto *CS = dyn_cast<CompoundStmt>(Val: Then);
481 if (CS && (!CS->body_empty())) {
482 const auto *InnerIf = dyn_cast<IfStmt>(Val: *CS->body_begin());
483 if (InnerIf && isIdenticalStmt(Context, IS->getCond(), InnerIf->getCond(),
484 /*IgnoreSideEffects=*/false)) {
485 diag(Loc: IS->getBeginLoc(), Description: "if with identical inner if statement");
486 diag(Loc: InnerIf->getBeginLoc(), Description: "inner if starts here",
487 Level: DiagnosticIDs::Note);
488 }
489 }
490 return;
491 }
492
493 llvm_unreachable("No if statement and no switch statement.");
494}
495
496} // namespace clang::tidy::bugprone
497

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp