1 | //===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "NoRecursionCheck.h" |
10 | #include "clang/AST/ASTContext.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | #include "clang/Analysis/CallGraph.h" |
13 | #include "llvm/ADT/SCCIterator.h" |
14 | |
15 | using namespace clang::ast_matchers; |
16 | |
17 | namespace clang::tidy::misc { |
18 | |
19 | namespace { |
20 | |
21 | /// Much like SmallSet, with two differences: |
22 | /// 1. It can *only* be constructed from an ArrayRef<>. If the element count |
23 | /// is small, there is no copy and said storage *must* outlive us. |
24 | /// 2. it is immutable, the way it was constructed it will stay. |
25 | template <typename T, unsigned SmallSize> class ImmutableSmallSet { |
26 | ArrayRef<T> Vector; |
27 | llvm::DenseSet<T> Set; |
28 | |
29 | static_assert(SmallSize <= 32, "N should be small" ); |
30 | |
31 | bool isSmall() const { return Set.empty(); } |
32 | |
33 | public: |
34 | using size_type = size_t; |
35 | |
36 | ImmutableSmallSet() = delete; |
37 | ImmutableSmallSet(const ImmutableSmallSet &) = delete; |
38 | ImmutableSmallSet(ImmutableSmallSet &&) = delete; |
39 | T &operator=(const ImmutableSmallSet &) = delete; |
40 | T &operator=(ImmutableSmallSet &&) = delete; |
41 | |
42 | // WARNING: Storage *must* outlive us if we decide that the size is small. |
43 | ImmutableSmallSet(ArrayRef<T> Storage) { |
44 | // Is size small-enough to just keep using the existing storage? |
45 | if (Storage.size() <= SmallSize) { |
46 | Vector = Storage; |
47 | return; |
48 | } |
49 | |
50 | // We've decided that it isn't performant to keep using vector. |
51 | // Let's migrate the data into Set. |
52 | Set.reserve(Storage.size()); |
53 | Set.insert_range(Storage); |
54 | } |
55 | |
56 | /// count - Return 1 if the element is in the set, 0 otherwise. |
57 | size_type count(const T &V) const { |
58 | if (isSmall()) { |
59 | // Since the collection is small, just do a linear search. |
60 | return llvm::is_contained(Vector, V) ? 1 : 0; |
61 | } |
62 | |
63 | return Set.count(V); |
64 | } |
65 | }; |
66 | |
67 | /// Much like SmallSetVector, but with one difference: |
68 | /// when the size is \p SmallSize or less, when checking whether an element is |
69 | /// already in the set or not, we perform linear search over the vector, |
70 | /// but if the size is larger than \p SmallSize, we look in set. |
71 | /// FIXME: upstream this into SetVector/SmallSetVector itself. |
72 | template <typename T, unsigned SmallSize> class SmartSmallSetVector { |
73 | public: |
74 | using size_type = size_t; |
75 | |
76 | private: |
77 | SmallVector<T, SmallSize> Vector; |
78 | llvm::DenseSet<T> Set; |
79 | |
80 | static_assert(SmallSize <= 32, "N should be small" ); |
81 | |
82 | // Are we still using Vector for uniqness tracking? |
83 | bool isSmall() const { return Set.empty(); } |
84 | |
85 | // Will one more entry cause Vector to switch away from small-size storage? |
86 | bool entiretyOfVectorSmallSizeIsOccupied() const { |
87 | assert(isSmall() && Vector.size() <= SmallSize && |
88 | "Shouldn't ask if we have already [should have] migrated into Set." ); |
89 | return Vector.size() == SmallSize; |
90 | } |
91 | |
92 | void populateSet() { |
93 | assert(Set.empty() && "Should not have already utilized the Set." ); |
94 | // Magical growth factor prediction - to how many elements do we expect to |
95 | // sanely grow after switching away from small-size storage? |
96 | const size_t NewMaxElts = 4 * Vector.size(); |
97 | Vector.reserve(NewMaxElts); |
98 | Set.reserve(NewMaxElts); |
99 | Set.insert_range(Vector); |
100 | } |
101 | |
102 | /// count - Return 1 if the element is in the set, 0 otherwise. |
103 | size_type count(const T &V) const { |
104 | if (isSmall()) { |
105 | // Since the collection is small, just do a linear search. |
106 | return llvm::is_contained(Vector, V) ? 1 : 0; |
107 | } |
108 | // Look-up in the Set. |
109 | return Set.count(V); |
110 | } |
111 | |
112 | bool setInsert(const T &V) { |
113 | if (count(V) != 0) |
114 | return false; // Already exists. |
115 | // Does not exist, Can/need to record it. |
116 | if (isSmall()) { // Are we still using Vector for uniqness tracking? |
117 | // Will one more entry fit within small-sized Vector? |
118 | if (!entiretyOfVectorSmallSizeIsOccupied()) |
119 | return true; // We'll insert into vector right afterwards anyway. |
120 | // Time to switch to Set. |
121 | populateSet(); |
122 | } |
123 | // Set time! |
124 | // Note that this must be after `populateSet()` might have been called. |
125 | bool SetInsertionSucceeded = Set.insert(V).second; |
126 | (void)SetInsertionSucceeded; |
127 | assert(SetInsertionSucceeded && "We did check that no such value existed" ); |
128 | return true; |
129 | } |
130 | |
131 | public: |
132 | /// Insert a new element into the SmartSmallSetVector. |
133 | /// \returns true if the element was inserted into the SmartSmallSetVector. |
134 | bool insert(const T &X) { |
135 | bool Result = setInsert(X); |
136 | if (Result) |
137 | Vector.push_back(X); |
138 | return Result; |
139 | } |
140 | |
141 | /// Clear the SmartSmallSetVector and return the underlying vector. |
142 | decltype(Vector) takeVector() { |
143 | Set.clear(); |
144 | return std::move(Vector); |
145 | } |
146 | }; |
147 | |
148 | constexpr unsigned SmallCallStackSize = 16; |
149 | constexpr unsigned SmallSCCSize = 32; |
150 | |
151 | using CallStackTy = |
152 | llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>; |
153 | |
154 | // In given SCC, find *some* call stack that will be cyclic. |
155 | // This will only find *one* such stack, it might not be the smallest one, |
156 | // and there may be other loops. |
157 | CallStackTy pathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) { |
158 | // We'll need to be able to performantly look up whether some CallGraphNode |
159 | // is in SCC or not, so cache all the SCC elements in a set. |
160 | const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC); |
161 | |
162 | // Is node N part if the current SCC? |
163 | auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) { |
164 | return SCCElts.count(V: N) != 0; |
165 | }; |
166 | |
167 | // Track the call stack that will cause a cycle. |
168 | SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize> |
169 | CallStackSet; |
170 | |
171 | // Arbitrarily take the first element of SCC as entry point. |
172 | CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); |
173 | // Continue recursing into subsequent callees that are part of this SCC, |
174 | // and are thus known to be part of the call graph loop, until loop forms. |
175 | CallGraphNode::CallRecord *Node = &EntryNode; |
176 | while (true) { |
177 | // Did we see this node before? |
178 | if (!CallStackSet.insert(X: *Node)) |
179 | break; // Cycle completed! Note that didn't insert the node into stack! |
180 | // Else, perform depth-first traversal: out of all callees, pick first one |
181 | // that is part of this SCC. This is not guaranteed to yield shortest cycle. |
182 | Node = llvm::find_if(Range: Node->Callee->callees(), P: NodeIsPartOfSCC); |
183 | } |
184 | |
185 | // Note that we failed to insert the last node, that completes the cycle. |
186 | // But we really want to have it. So insert it manually into stack only. |
187 | CallStackTy CallStack = CallStackSet.takeVector(); |
188 | CallStack.emplace_back(Args&: *Node); |
189 | |
190 | return CallStack; |
191 | } |
192 | |
193 | } // namespace |
194 | |
195 | void NoRecursionCheck::registerMatchers(MatchFinder *Finder) { |
196 | Finder->addMatcher(NodeMatch: translationUnitDecl().bind(ID: "TUDecl" ), Action: this); |
197 | } |
198 | |
199 | void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) { |
200 | assert(!SCC.empty() && "Empty SCC does not make sense." ); |
201 | |
202 | // First of all, call out every strongly connected function. |
203 | for (CallGraphNode *N : SCC) { |
204 | FunctionDecl *D = N->getDefinition(); |
205 | diag(D->getLocation(), "function %0 is within a recursive call chain" ) << D; |
206 | } |
207 | |
208 | // Now, SCC only tells us about strongly connected function declarations in |
209 | // the call graph. It doesn't *really* tell us about the cycles they form. |
210 | // And there may be more than one cycle in SCC. |
211 | // So let's form a call stack that eventually exposes *some* cycle. |
212 | const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC); |
213 | assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle" ); |
214 | |
215 | // While last node of the call stack does cause a loop, due to the way we |
216 | // pathfind the cycle, the loop does not necessarily begin at the first node |
217 | // of the call stack, so drop front nodes of the call stack until it does. |
218 | const auto CyclicCallStack = |
219 | ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack) |
220 | .drop_until(Pred: [LastNode = EventuallyCyclicCallStack.back()]( |
221 | CallGraphNode::CallRecord FrontNode) { |
222 | return FrontNode == LastNode; |
223 | }); |
224 | assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames" ); |
225 | |
226 | // Which function we decided to be the entry point that lead to the recursion? |
227 | FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition(); |
228 | // And now, for ease of understanding, let's print the call sequence that |
229 | // forms the cycle in question. |
230 | diag(CycleEntryFn->getLocation(), |
231 | "example recursive call chain, starting from function %0" , |
232 | DiagnosticIDs::Note) |
233 | << CycleEntryFn; |
234 | for (int CurFrame = 1, NumFrames = CyclicCallStack.size(); |
235 | CurFrame != NumFrames; ++CurFrame) { |
236 | CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1]; |
237 | CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame]; |
238 | |
239 | Decl *PrevDecl = PrevNode.Callee->getDecl(); |
240 | Decl *CurrDecl = CurrNode.Callee->getDecl(); |
241 | |
242 | diag(CurrNode.CallExpr->getBeginLoc(), |
243 | "Frame #%0: function %1 calls function %2 here:" , DiagnosticIDs::Note) |
244 | << CurFrame << cast<NamedDecl>(Val: PrevDecl) << cast<NamedDecl>(Val: CurrDecl); |
245 | } |
246 | |
247 | diag(CyclicCallStack.back().CallExpr->getBeginLoc(), |
248 | "... which was the starting point of the recursive call chain; there " |
249 | "may be other cycles" , |
250 | DiagnosticIDs::Note); |
251 | } |
252 | |
253 | void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) { |
254 | // Build call graph for the entire translation unit. |
255 | const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>(ID: "TUDecl" ); |
256 | CallGraph CG; |
257 | CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU)); |
258 | |
259 | // Look for cycles in call graph, |
260 | // by looking for Strongly Connected Components (SCC's) |
261 | for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(G: &CG), |
262 | SCCE = llvm::scc_end(G: &CG); |
263 | SCCI != SCCE; ++SCCI) { |
264 | if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes. |
265 | continue; |
266 | handleSCC(SCC: *SCCI); |
267 | } |
268 | } |
269 | |
270 | } // namespace clang::tidy::misc |
271 | |