BranchCloneCheck.cpp source code [clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp]

1	//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "BranchCloneCheck.h"
10	#include "../utils/ASTUtils.h"
11	#include "clang/AST/ASTContext.h"
12	#include "clang/AST/RecursiveASTVisitor.h"
13	#include "clang/ASTMatchers/ASTMatchFinder.h"
14	#include "clang/Analysis/CloneDetection.h"
15	#include "clang/Lex/Lexer.h"
16	#include "llvm/Support/Casting.h"
17
18	using namespace clang;
19	using namespace clang::ast_matchers;
20
21	namespace {
22	/// A branch in a switch may consist of several statements; while a branch in
23	/// an if/else if/else chain is one statement (which may be a CompoundStmt).
24	using SwitchBranch = llvm::SmallVector<const Stmt *, `2`>;
25	} // anonymous namespace
26
27	/// Determines if the bodies of two branches in a switch statements are Type I
28	/// clones of each other. This function only examines the body of the branch
29	/// and ignores the `case X:` or `default:` at the start of the branch.
30	static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
31	const SwitchBranch &RHS,
32	const ASTContext &Context) {
33	if (LHS.size() != RHS.size())
34	return false;
35
36	for (size_t I = `0`, Size = LHS.size(); I < Size; I++) {
37	// NOTE: We strip goto labels and annotations in addition to stripping
38	// the `case X:` or `default:` labels, but it is very unlikely that this
39	// would cause false positives in real-world code.
40	if (!tidy::utils::areStatementsIdentical(FirstStmt: LHS [I]->stripLabelLikeStatements(),
41	SecondStmt: RHS [I]->stripLabelLikeStatements(),
42	Context)) {
43	return false;
44	}
45	}
46
47	return true;
48	}
49
50	static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
51	struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
52	using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
53
54	bool TraverseLambdaExpr(LambdaExpr , DataRecursionQueue = nullptr) {
55	return true; // Ignore lambdas
56	}
57
58	bool TraverseDecl(Decl *) {
59	return true; // No need to check declarations
60	}
61
62	bool TraverseSwitchStmt(SwitchStmt , DataRecursionQueue = nullptr) {
63	return true; // Ignore sub-switches
64	}
65
66	bool TraverseSwitchCase(SwitchCase , DataRecursionQueue = nullptr) {
67	return true; // Ignore cases
68	}
69
70	bool TraverseDefaultStmt(DefaultStmt , DataRecursionQueue = nullptr) {
71	return true; // Ignore defaults
72	}
73
74	bool TraverseAttributedStmt(AttributedStmt *S) {
75	if (!S)
76	return true;
77
78	for (const Attr *A : S->getAttrs()) {
79	if (isa<FallThroughAttr>(A))
80	return false;
81	}
82
83	return true;
84	}
85	} Visitor;
86
87	for (const Stmt *Elem : Branch) {
88	if (!Visitor.TraverseStmt(S: const_cast<Stmt *>(Elem)))
89	return true;
90	}
91	return false;
92	}
93
94	namespace clang::tidy::bugprone {
95
96	void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
97	Finder->addMatcher(
98	NodeMatch: ifStmt (unless (allOf (isConstexpr(), isInTemplateInstantiation())),
99	stmt ().bind(ID: "if"),
100	hasParent (stmt (unless (ifStmt (hasElse(InnerMatcher: equalsBoundNode(ID: "if")))))),
101	hasElse(InnerMatcher: stmt ().bind(ID: "else"))),
102	Action: this);
103	Finder->addMatcher(NodeMatch: switchStmt ().bind(ID: "switch"), Action: this);
104	Finder->addMatcher(NodeMatch: conditionalOperator ().bind(ID: "condOp"), Action: this);
105	}
106
107	void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
108	const ASTContext &Context = *Result.Context;
109
110	if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>(ID: "if")) {
111	const Stmt *Then = IS->getThen();
112	assert(Then && "An IfStmt must have a `then` branch!");
113
114	const Stmt *Else = Result.Nodes.getNodeAs<Stmt>(ID: "else");
115	assert(Else && "We only look for `if` statements with an `else` branch!");
116
117	if (!isa<IfStmt>(Val: Else)) {
118	// Just a simple if with no `else if` branch.
119	if (utils::areStatementsIdentical(FirstStmt: Then->IgnoreContainers(),
120	SecondStmt: Else->IgnoreContainers(), Context)) {
121	diag(Loc: IS->getBeginLoc(), Description: "if with identical then and else branches");
122	diag(Loc: IS->getElseLoc(), Description: "else branch starts here", Level: DiagnosticIDs::Note);
123	}
124	return;
125	}
126
127	// This is the complicated case when we start an if/else if/else chain.
128	// To find all the duplicates, we collect all the branches into a vector.
129	llvm::SmallVector<const Stmt *, `4`> Branches;
130	const IfStmt *Cur = IS;
131	while (true) {
132	// Store the `then` branch.
133	Branches.push_back(Elt: Cur->getThen());
134
135	Else = Cur->getElse();
136	// The chain ends if there is no `else` branch.
137	if (!Else)
138	break;
139
140	// Check if there is another `else if`...
141	Cur = dyn_cast<IfStmt>(Val: Else);
142	if (!Cur) {
143	// ...this is just a plain `else` branch at the end of the chain.
144	Branches.push_back(Elt: Else);
145	break;
146	}
147	}
148
149	size_t N = Branches.size();
150	llvm::BitVector KnownAsClone(N);
151
152	for (size_t I = `0`; I + `1` < N; I++) {
153	// We have already seen Branches[i] as a clone of an earlier branch.
154	if (KnownAsClone [I])
155	continue;
156
157	int NumCopies = `1`;
158
159	for (size_t J = I + `1`; J < N; J++) {
160	if (KnownAsClone [J] \|\| !utils::areStatementsIdentical(
161	FirstStmt: Branches [I]->IgnoreContainers(),
162	SecondStmt: Branches [J]->IgnoreContainers(), Context))
163	continue;
164
165	NumCopies++;
166	KnownAsClone [J] = true;
167
168	if (NumCopies == `2`) {
169	// We report the first occurrence only when we find the second one.
170	diag(Loc: Branches [I]->getBeginLoc(),
171	Description: "repeated branch body in conditional chain");
172	SourceLocation End =
173	Lexer::getLocForEndOfToken(Loc: Branches [I]->getEndLoc(), Offset: `0`,
174	SM: *Result.SourceManager, LangOpts: getLangOpts());
175	if (End.isValid()) {
176	diag(Loc: End, Description: "end of the original", Level: DiagnosticIDs::Note);
177	}
178	}
179
180	diag(Loc: Branches [J]->getBeginLoc(), Description: "clone %0 starts here",
181	Level: DiagnosticIDs::Note)
182	<< (NumCopies - `1`);
183	}
184	}
185	return;
186	}
187
188	if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>(ID: "condOp")) {
189	// We do not try to detect chains of ?: operators.
190	if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
191	Context))
192	diag(CO->getQuestionLoc(),
193	"conditional operator with identical true and false expressions");
194
195	return;
196	}
197
198	if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>(ID: "switch")) {
199	const auto *Body = dyn_cast_or_null<CompoundStmt>(Val: SS->getBody());
200
201	// Code like
202	// switch (x) case 0: case 1: foobar();
203	// is legal and calls foobar() if and only if x is either 0 or 1;
204	// but we do not try to distinguish branches in such code.
205	if (!Body)
206	return;
207
208	// We will first collect the branches of the switch statements. For the
209	// sake of simplicity we say that branches are delimited by the SwitchCase
210	// (`case:` or `default:`) children of Body; that is, we ignore `case:` or
211	// `default:` labels embedded inside other statements and we do not follow
212	// the effects of `break` and other manipulation of the control-flow.
213	llvm::SmallVector<SwitchBranch, `4`> Branches;
214	for (const Stmt *S : Body->body()) {
215	// If this is a `case` or `default`, we start a new, empty branch.
216	if (isa<SwitchCase>(Val: S))
217	Branches.emplace_back();
218
219	// There may be code before the first branch (which can be dead code
220	// and can be code reached either through goto or through case labels
221	// that are embedded inside e.g. inner compound statements); we do not
222	// store those statements in branches.
223	if (!Branches.empty())
224	Branches.back().push_back(Elt: S);
225	}
226
227	auto *End = Branches.end();
228	auto *BeginCurrent = Branches.begin();
229	while (BeginCurrent < End) {
230	if (isFallthroughSwitchBranch(Branch: *BeginCurrent)) {
231	++BeginCurrent;
232	continue;
233	}
234
235	auto *EndCurrent = BeginCurrent + `1`;
236	while (EndCurrent < End &&
237	areSwitchBranchesIdentical(LHS: BeginCurrent, RHS: EndCurrent, Context)) {
238	++EndCurrent;
239	}
240	// At this point the iterator range {BeginCurrent, EndCurrent} contains a
241	// complete family of consecutive identical branches.
242
243	if (EndCurrent == (BeginCurrent + `1`)) {
244	// No consecutive identical branches that start on BeginCurrent
245	BeginCurrent = EndCurrent;
246	continue;
247	}
248
249	diag(Loc: BeginCurrent->front()->getBeginLoc(),
250	Description: "switch has %0 consecutive identical branches")
251	<< static_cast<int>(std::distance(first: BeginCurrent, last: EndCurrent));
252
253	SourceLocation EndLoc = (EndCurrent - `1`)->back()->getEndLoc();
254	// If the case statement is generated from a macro, it's SourceLocation
255	// may be invalid, resulting in an assertion failure down the line.
256	// While not optimal, try the begin location in this case, it's still
257	// better then nothing.
258	if (EndLoc.isInvalid())
259	EndLoc = (EndCurrent - `1`)->back()->getBeginLoc();
260	if (EndLoc.isMacroID())
261	EndLoc = Context.getSourceManager().getExpansionLoc(Loc: EndLoc);
262	EndLoc = Lexer::getLocForEndOfToken(Loc: EndLoc, Offset: `0`, SM: *Result.SourceManager,
263	LangOpts: getLangOpts());
264	if (EndLoc.isValid()) {
265	diag(Loc: EndLoc, Description: "last of these clones ends here", Level: DiagnosticIDs::Note);
266	}
267	BeginCurrent = EndCurrent;
268	}
269	return;
270	}
271
272	llvm_unreachable("No if statement and no switch statement.");
273	}
274
275	} // namespace clang::tidy::bugprone
276

source code of clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp