| 1 | //===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "NoRecursionCheck.h" |
| 10 | #include "clang/AST/ASTContext.h" |
| 11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 12 | #include "clang/Analysis/CallGraph.h" |
| 13 | #include "llvm/ADT/SCCIterator.h" |
| 14 | |
| 15 | using namespace clang::ast_matchers; |
| 16 | |
| 17 | namespace clang::tidy::misc { |
| 18 | |
| 19 | namespace { |
| 20 | |
| 21 | /// Much like SmallSet, with two differences: |
| 22 | /// 1. It can *only* be constructed from an ArrayRef<>. If the element count |
| 23 | /// is small, there is no copy and said storage *must* outlive us. |
| 24 | /// 2. it is immutable, the way it was constructed it will stay. |
| 25 | template <typename T, unsigned SmallSize> class ImmutableSmallSet { |
| 26 | ArrayRef<T> Vector; |
| 27 | llvm::DenseSet<T> Set; |
| 28 | |
| 29 | static_assert(SmallSize <= 32, "N should be small" ); |
| 30 | |
| 31 | bool isSmall() const { return Set.empty(); } |
| 32 | |
| 33 | public: |
| 34 | using size_type = size_t; |
| 35 | |
| 36 | ImmutableSmallSet() = delete; |
| 37 | ImmutableSmallSet(const ImmutableSmallSet &) = delete; |
| 38 | ImmutableSmallSet(ImmutableSmallSet &&) = delete; |
| 39 | T &operator=(const ImmutableSmallSet &) = delete; |
| 40 | T &operator=(ImmutableSmallSet &&) = delete; |
| 41 | |
| 42 | // WARNING: Storage *must* outlive us if we decide that the size is small. |
| 43 | ImmutableSmallSet(ArrayRef<T> Storage) { |
| 44 | // Is size small-enough to just keep using the existing storage? |
| 45 | if (Storage.size() <= SmallSize) { |
| 46 | Vector = Storage; |
| 47 | return; |
| 48 | } |
| 49 | |
| 50 | // We've decided that it isn't performant to keep using vector. |
| 51 | // Let's migrate the data into Set. |
| 52 | Set.reserve(Storage.size()); |
| 53 | Set.insert_range(Storage); |
| 54 | } |
| 55 | |
| 56 | /// count - Return 1 if the element is in the set, 0 otherwise. |
| 57 | size_type count(const T &V) const { |
| 58 | if (isSmall()) { |
| 59 | // Since the collection is small, just do a linear search. |
| 60 | return llvm::is_contained(Vector, V) ? 1 : 0; |
| 61 | } |
| 62 | |
| 63 | return Set.count(V); |
| 64 | } |
| 65 | }; |
| 66 | |
| 67 | /// Much like SmallSetVector, but with one difference: |
| 68 | /// when the size is \p SmallSize or less, when checking whether an element is |
| 69 | /// already in the set or not, we perform linear search over the vector, |
| 70 | /// but if the size is larger than \p SmallSize, we look in set. |
| 71 | /// FIXME: upstream this into SetVector/SmallSetVector itself. |
| 72 | template <typename T, unsigned SmallSize> class SmartSmallSetVector { |
| 73 | public: |
| 74 | using size_type = size_t; |
| 75 | |
| 76 | private: |
| 77 | SmallVector<T, SmallSize> Vector; |
| 78 | llvm::DenseSet<T> Set; |
| 79 | |
| 80 | static_assert(SmallSize <= 32, "N should be small" ); |
| 81 | |
| 82 | // Are we still using Vector for uniqness tracking? |
| 83 | bool isSmall() const { return Set.empty(); } |
| 84 | |
| 85 | // Will one more entry cause Vector to switch away from small-size storage? |
| 86 | bool entiretyOfVectorSmallSizeIsOccupied() const { |
| 87 | assert(isSmall() && Vector.size() <= SmallSize && |
| 88 | "Shouldn't ask if we have already [should have] migrated into Set." ); |
| 89 | return Vector.size() == SmallSize; |
| 90 | } |
| 91 | |
| 92 | void populateSet() { |
| 93 | assert(Set.empty() && "Should not have already utilized the Set." ); |
| 94 | // Magical growth factor prediction - to how many elements do we expect to |
| 95 | // sanely grow after switching away from small-size storage? |
| 96 | const size_t NewMaxElts = 4 * Vector.size(); |
| 97 | Vector.reserve(NewMaxElts); |
| 98 | Set.reserve(NewMaxElts); |
| 99 | Set.insert_range(Vector); |
| 100 | } |
| 101 | |
| 102 | /// count - Return 1 if the element is in the set, 0 otherwise. |
| 103 | size_type count(const T &V) const { |
| 104 | if (isSmall()) { |
| 105 | // Since the collection is small, just do a linear search. |
| 106 | return llvm::is_contained(Vector, V) ? 1 : 0; |
| 107 | } |
| 108 | // Look-up in the Set. |
| 109 | return Set.count(V); |
| 110 | } |
| 111 | |
| 112 | bool setInsert(const T &V) { |
| 113 | if (count(V) != 0) |
| 114 | return false; // Already exists. |
| 115 | // Does not exist, Can/need to record it. |
| 116 | if (isSmall()) { // Are we still using Vector for uniqness tracking? |
| 117 | // Will one more entry fit within small-sized Vector? |
| 118 | if (!entiretyOfVectorSmallSizeIsOccupied()) |
| 119 | return true; // We'll insert into vector right afterwards anyway. |
| 120 | // Time to switch to Set. |
| 121 | populateSet(); |
| 122 | } |
| 123 | // Set time! |
| 124 | // Note that this must be after `populateSet()` might have been called. |
| 125 | bool SetInsertionSucceeded = Set.insert(V).second; |
| 126 | (void)SetInsertionSucceeded; |
| 127 | assert(SetInsertionSucceeded && "We did check that no such value existed" ); |
| 128 | return true; |
| 129 | } |
| 130 | |
| 131 | public: |
| 132 | /// Insert a new element into the SmartSmallSetVector. |
| 133 | /// \returns true if the element was inserted into the SmartSmallSetVector. |
| 134 | bool insert(const T &X) { |
| 135 | bool Result = setInsert(X); |
| 136 | if (Result) |
| 137 | Vector.push_back(X); |
| 138 | return Result; |
| 139 | } |
| 140 | |
| 141 | /// Clear the SmartSmallSetVector and return the underlying vector. |
| 142 | decltype(Vector) takeVector() { |
| 143 | Set.clear(); |
| 144 | return std::move(Vector); |
| 145 | } |
| 146 | }; |
| 147 | |
| 148 | constexpr unsigned SmallCallStackSize = 16; |
| 149 | constexpr unsigned SmallSCCSize = 32; |
| 150 | |
| 151 | using CallStackTy = |
| 152 | llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>; |
| 153 | |
| 154 | // In given SCC, find *some* call stack that will be cyclic. |
| 155 | // This will only find *one* such stack, it might not be the smallest one, |
| 156 | // and there may be other loops. |
| 157 | CallStackTy pathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) { |
| 158 | // We'll need to be able to performantly look up whether some CallGraphNode |
| 159 | // is in SCC or not, so cache all the SCC elements in a set. |
| 160 | const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC); |
| 161 | |
| 162 | // Is node N part if the current SCC? |
| 163 | auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) { |
| 164 | return SCCElts.count(V: N) != 0; |
| 165 | }; |
| 166 | |
| 167 | // Track the call stack that will cause a cycle. |
| 168 | SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize> |
| 169 | CallStackSet; |
| 170 | |
| 171 | // Arbitrarily take the first element of SCC as entry point. |
| 172 | CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); |
| 173 | // Continue recursing into subsequent callees that are part of this SCC, |
| 174 | // and are thus known to be part of the call graph loop, until loop forms. |
| 175 | CallGraphNode::CallRecord *Node = &EntryNode; |
| 176 | while (true) { |
| 177 | // Did we see this node before? |
| 178 | if (!CallStackSet.insert(X: *Node)) |
| 179 | break; // Cycle completed! Note that didn't insert the node into stack! |
| 180 | // Else, perform depth-first traversal: out of all callees, pick first one |
| 181 | // that is part of this SCC. This is not guaranteed to yield shortest cycle. |
| 182 | Node = llvm::find_if(Range: Node->Callee->callees(), P: NodeIsPartOfSCC); |
| 183 | } |
| 184 | |
| 185 | // Note that we failed to insert the last node, that completes the cycle. |
| 186 | // But we really want to have it. So insert it manually into stack only. |
| 187 | CallStackTy CallStack = CallStackSet.takeVector(); |
| 188 | CallStack.emplace_back(Args&: *Node); |
| 189 | |
| 190 | return CallStack; |
| 191 | } |
| 192 | |
| 193 | } // namespace |
| 194 | |
| 195 | void NoRecursionCheck::registerMatchers(MatchFinder *Finder) { |
| 196 | Finder->addMatcher(NodeMatch: translationUnitDecl().bind(ID: "TUDecl" ), Action: this); |
| 197 | } |
| 198 | |
| 199 | void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) { |
| 200 | assert(!SCC.empty() && "Empty SCC does not make sense." ); |
| 201 | |
| 202 | // First of all, call out every strongly connected function. |
| 203 | for (CallGraphNode *N : SCC) { |
| 204 | FunctionDecl *D = N->getDefinition(); |
| 205 | diag(D->getLocation(), "function %0 is within a recursive call chain" ) << D; |
| 206 | } |
| 207 | |
| 208 | // Now, SCC only tells us about strongly connected function declarations in |
| 209 | // the call graph. It doesn't *really* tell us about the cycles they form. |
| 210 | // And there may be more than one cycle in SCC. |
| 211 | // So let's form a call stack that eventually exposes *some* cycle. |
| 212 | const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC); |
| 213 | assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle" ); |
| 214 | |
| 215 | // While last node of the call stack does cause a loop, due to the way we |
| 216 | // pathfind the cycle, the loop does not necessarily begin at the first node |
| 217 | // of the call stack, so drop front nodes of the call stack until it does. |
| 218 | const auto CyclicCallStack = |
| 219 | ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack) |
| 220 | .drop_until(Pred: [LastNode = EventuallyCyclicCallStack.back()]( |
| 221 | CallGraphNode::CallRecord FrontNode) { |
| 222 | return FrontNode == LastNode; |
| 223 | }); |
| 224 | assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames" ); |
| 225 | |
| 226 | // Which function we decided to be the entry point that lead to the recursion? |
| 227 | FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition(); |
| 228 | // And now, for ease of understanding, let's print the call sequence that |
| 229 | // forms the cycle in question. |
| 230 | diag(CycleEntryFn->getLocation(), |
| 231 | "example recursive call chain, starting from function %0" , |
| 232 | DiagnosticIDs::Note) |
| 233 | << CycleEntryFn; |
| 234 | for (int CurFrame = 1, NumFrames = CyclicCallStack.size(); |
| 235 | CurFrame != NumFrames; ++CurFrame) { |
| 236 | CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1]; |
| 237 | CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame]; |
| 238 | |
| 239 | Decl *PrevDecl = PrevNode.Callee->getDecl(); |
| 240 | Decl *CurrDecl = CurrNode.Callee->getDecl(); |
| 241 | |
| 242 | diag(CurrNode.CallExpr->getBeginLoc(), |
| 243 | "Frame #%0: function %1 calls function %2 here:" , DiagnosticIDs::Note) |
| 244 | << CurFrame << cast<NamedDecl>(Val: PrevDecl) << cast<NamedDecl>(Val: CurrDecl); |
| 245 | } |
| 246 | |
| 247 | diag(CyclicCallStack.back().CallExpr->getBeginLoc(), |
| 248 | "... which was the starting point of the recursive call chain; there " |
| 249 | "may be other cycles" , |
| 250 | DiagnosticIDs::Note); |
| 251 | } |
| 252 | |
| 253 | void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) { |
| 254 | // Build call graph for the entire translation unit. |
| 255 | const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>(ID: "TUDecl" ); |
| 256 | CallGraph CG; |
| 257 | CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU)); |
| 258 | |
| 259 | // Look for cycles in call graph, |
| 260 | // by looking for Strongly Connected Components (SCC's) |
| 261 | for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(G: &CG), |
| 262 | SCCE = llvm::scc_end(G: &CG); |
| 263 | SCCI != SCCE; ++SCCI) { |
| 264 | if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes. |
| 265 | continue; |
| 266 | handleSCC(SCC: *SCCI); |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | } // namespace clang::tidy::misc |
| 271 | |