1 | //===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "NoRecursionCheck.h" |
10 | #include "clang/AST/ASTContext.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | #include "clang/Analysis/CallGraph.h" |
13 | #include "llvm/ADT/DenseMapInfo.h" |
14 | #include "llvm/ADT/SCCIterator.h" |
15 | |
16 | using namespace clang::ast_matchers; |
17 | |
18 | namespace clang::tidy::misc { |
19 | |
20 | namespace { |
21 | |
22 | /// Much like SmallSet, with two differences: |
23 | /// 1. It can *only* be constructed from an ArrayRef<>. If the element count |
24 | /// is small, there is no copy and said storage *must* outlive us. |
25 | /// 2. it is immutable, the way it was constructed it will stay. |
26 | template <typename T, unsigned SmallSize> class ImmutableSmallSet { |
27 | ArrayRef<T> Vector; |
28 | llvm::DenseSet<T> Set; |
29 | |
30 | static_assert(SmallSize <= 32, "N should be small" ); |
31 | |
32 | bool isSmall() const { return Set.empty(); } |
33 | |
34 | public: |
35 | using size_type = size_t; |
36 | |
37 | ImmutableSmallSet() = delete; |
38 | ImmutableSmallSet(const ImmutableSmallSet &) = delete; |
39 | ImmutableSmallSet(ImmutableSmallSet &&) = delete; |
40 | T &operator=(const ImmutableSmallSet &) = delete; |
41 | T &operator=(ImmutableSmallSet &&) = delete; |
42 | |
43 | // WARNING: Storage *must* outlive us if we decide that the size is small. |
44 | ImmutableSmallSet(ArrayRef<T> Storage) { |
45 | // Is size small-enough to just keep using the existing storage? |
46 | if (Storage.size() <= SmallSize) { |
47 | Vector = Storage; |
48 | return; |
49 | } |
50 | |
51 | // We've decided that it isn't performant to keep using vector. |
52 | // Let's migrate the data into Set. |
53 | Set.reserve(Storage.size()); |
54 | Set.insert(Storage.begin(), Storage.end()); |
55 | } |
56 | |
57 | /// count - Return 1 if the element is in the set, 0 otherwise. |
58 | size_type count(const T &V) const { |
59 | if (isSmall()) { |
60 | // Since the collection is small, just do a linear search. |
61 | return llvm::is_contained(Vector, V) ? 1 : 0; |
62 | } |
63 | |
64 | return Set.count(V); |
65 | } |
66 | }; |
67 | |
68 | /// Much like SmallSetVector, but with one difference: |
69 | /// when the size is \p SmallSize or less, when checking whether an element is |
70 | /// already in the set or not, we perform linear search over the vector, |
71 | /// but if the size is larger than \p SmallSize, we look in set. |
72 | /// FIXME: upstream this into SetVector/SmallSetVector itself. |
73 | template <typename T, unsigned SmallSize> class SmartSmallSetVector { |
74 | public: |
75 | using size_type = size_t; |
76 | |
77 | private: |
78 | SmallVector<T, SmallSize> Vector; |
79 | llvm::DenseSet<T> Set; |
80 | |
81 | static_assert(SmallSize <= 32, "N should be small" ); |
82 | |
83 | // Are we still using Vector for uniqness tracking? |
84 | bool isSmall() const { return Set.empty(); } |
85 | |
86 | // Will one more entry cause Vector to switch away from small-size storage? |
87 | bool entiretyOfVectorSmallSizeIsOccupied() const { |
88 | assert(isSmall() && Vector.size() <= SmallSize && |
89 | "Shouldn't ask if we have already [should have] migrated into Set." ); |
90 | return Vector.size() == SmallSize; |
91 | } |
92 | |
93 | void populateSet() { |
94 | assert(Set.empty() && "Should not have already utilized the Set." ); |
95 | // Magical growth factor prediction - to how many elements do we expect to |
96 | // sanely grow after switching away from small-size storage? |
97 | const size_t NewMaxElts = 4 * Vector.size(); |
98 | Vector.reserve(NewMaxElts); |
99 | Set.reserve(NewMaxElts); |
100 | Set.insert(Vector.begin(), Vector.end()); |
101 | } |
102 | |
103 | /// count - Return 1 if the element is in the set, 0 otherwise. |
104 | size_type count(const T &V) const { |
105 | if (isSmall()) { |
106 | // Since the collection is small, just do a linear search. |
107 | return llvm::is_contained(Vector, V) ? 1 : 0; |
108 | } |
109 | // Look-up in the Set. |
110 | return Set.count(V); |
111 | } |
112 | |
113 | bool setInsert(const T &V) { |
114 | if (count(V) != 0) |
115 | return false; // Already exists. |
116 | // Does not exist, Can/need to record it. |
117 | if (isSmall()) { // Are we still using Vector for uniqness tracking? |
118 | // Will one more entry fit within small-sized Vector? |
119 | if (!entiretyOfVectorSmallSizeIsOccupied()) |
120 | return true; // We'll insert into vector right afterwards anyway. |
121 | // Time to switch to Set. |
122 | populateSet(); |
123 | } |
124 | // Set time! |
125 | // Note that this must be after `populateSet()` might have been called. |
126 | bool SetInsertionSucceeded = Set.insert(V).second; |
127 | (void)SetInsertionSucceeded; |
128 | assert(SetInsertionSucceeded && "We did check that no such value existed" ); |
129 | return true; |
130 | } |
131 | |
132 | public: |
133 | /// Insert a new element into the SmartSmallSetVector. |
134 | /// \returns true if the element was inserted into the SmartSmallSetVector. |
135 | bool insert(const T &X) { |
136 | bool Result = setInsert(X); |
137 | if (Result) |
138 | Vector.push_back(X); |
139 | return Result; |
140 | } |
141 | |
142 | /// Clear the SmartSmallSetVector and return the underlying vector. |
143 | decltype(Vector) takeVector() { |
144 | Set.clear(); |
145 | return std::move(Vector); |
146 | } |
147 | }; |
148 | |
149 | constexpr unsigned SmallCallStackSize = 16; |
150 | constexpr unsigned SmallSCCSize = 32; |
151 | |
152 | using CallStackTy = |
153 | llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>; |
154 | |
155 | // In given SCC, find *some* call stack that will be cyclic. |
156 | // This will only find *one* such stack, it might not be the smallest one, |
157 | // and there may be other loops. |
158 | CallStackTy pathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) { |
159 | // We'll need to be able to performantly look up whether some CallGraphNode |
160 | // is in SCC or not, so cache all the SCC elements in a set. |
161 | const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC); |
162 | |
163 | // Is node N part if the current SCC? |
164 | auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) { |
165 | return SCCElts.count(V: N) != 0; |
166 | }; |
167 | |
168 | // Track the call stack that will cause a cycle. |
169 | SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize> |
170 | CallStackSet; |
171 | |
172 | // Arbitrarily take the first element of SCC as entry point. |
173 | CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); |
174 | // Continue recursing into subsequent callees that are part of this SCC, |
175 | // and are thus known to be part of the call graph loop, until loop forms. |
176 | CallGraphNode::CallRecord *Node = &EntryNode; |
177 | while (true) { |
178 | // Did we see this node before? |
179 | if (!CallStackSet.insert(X: *Node)) |
180 | break; // Cycle completed! Note that didn't insert the node into stack! |
181 | // Else, perform depth-first traversal: out of all callees, pick first one |
182 | // that is part of this SCC. This is not guaranteed to yield shortest cycle. |
183 | Node = llvm::find_if(Range: Node->Callee->callees(), P: NodeIsPartOfSCC); |
184 | } |
185 | |
186 | // Note that we failed to insert the last node, that completes the cycle. |
187 | // But we really want to have it. So insert it manually into stack only. |
188 | CallStackTy CallStack = CallStackSet.takeVector(); |
189 | CallStack.emplace_back(Args&: *Node); |
190 | |
191 | return CallStack; |
192 | } |
193 | |
194 | } // namespace |
195 | |
196 | void NoRecursionCheck::registerMatchers(MatchFinder *Finder) { |
197 | Finder->addMatcher(NodeMatch: translationUnitDecl().bind(ID: "TUDecl" ), Action: this); |
198 | } |
199 | |
200 | void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) { |
201 | assert(!SCC.empty() && "Empty SCC does not make sense." ); |
202 | |
203 | // First of all, call out every strongly connected function. |
204 | for (CallGraphNode *N : SCC) { |
205 | FunctionDecl *D = N->getDefinition(); |
206 | diag(D->getLocation(), "function %0 is within a recursive call chain" ) << D; |
207 | } |
208 | |
209 | // Now, SCC only tells us about strongly connected function declarations in |
210 | // the call graph. It doesn't *really* tell us about the cycles they form. |
211 | // And there may be more than one cycle in SCC. |
212 | // So let's form a call stack that eventually exposes *some* cycle. |
213 | const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC); |
214 | assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle" ); |
215 | |
216 | // While last node of the call stack does cause a loop, due to the way we |
217 | // pathfind the cycle, the loop does not necessarily begin at the first node |
218 | // of the call stack, so drop front nodes of the call stack until it does. |
219 | const auto CyclicCallStack = |
220 | ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack) |
221 | .drop_until(Pred: [LastNode = EventuallyCyclicCallStack.back()]( |
222 | CallGraphNode::CallRecord FrontNode) { |
223 | return FrontNode == LastNode; |
224 | }); |
225 | assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames" ); |
226 | |
227 | // Which function we decided to be the entry point that lead to the recursion? |
228 | FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition(); |
229 | // And now, for ease of understanding, let's print the call sequence that |
230 | // forms the cycle in question. |
231 | diag(CycleEntryFn->getLocation(), |
232 | "example recursive call chain, starting from function %0" , |
233 | DiagnosticIDs::Note) |
234 | << CycleEntryFn; |
235 | for (int CurFrame = 1, NumFrames = CyclicCallStack.size(); |
236 | CurFrame != NumFrames; ++CurFrame) { |
237 | CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1]; |
238 | CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame]; |
239 | |
240 | Decl *PrevDecl = PrevNode.Callee->getDecl(); |
241 | Decl *CurrDecl = CurrNode.Callee->getDecl(); |
242 | |
243 | diag(CurrNode.CallExpr->getBeginLoc(), |
244 | "Frame #%0: function %1 calls function %2 here:" , DiagnosticIDs::Note) |
245 | << CurFrame << cast<NamedDecl>(Val: PrevDecl) << cast<NamedDecl>(Val: CurrDecl); |
246 | } |
247 | |
248 | diag(CyclicCallStack.back().CallExpr->getBeginLoc(), |
249 | "... which was the starting point of the recursive call chain; there " |
250 | "may be other cycles" , |
251 | DiagnosticIDs::Note); |
252 | } |
253 | |
254 | void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) { |
255 | // Build call graph for the entire translation unit. |
256 | const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>(ID: "TUDecl" ); |
257 | CallGraph CG; |
258 | CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU)); |
259 | |
260 | // Look for cycles in call graph, |
261 | // by looking for Strongly Connected Components (SCC's) |
262 | for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(G: &CG), |
263 | SCCE = llvm::scc_end(G: &CG); |
264 | SCCI != SCCE; ++SCCI) { |
265 | if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes. |
266 | continue; |
267 | handleSCC(SCC: *SCCI); |
268 | } |
269 | } |
270 | |
271 | } // namespace clang::tidy::misc |
272 | |