1 | //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the template classes ExplodedNode and ExplodedGraph, |
10 | // which represent a path-sensitive, intra-procedural "exploded graph." |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" |
15 | #include "clang/AST/Expr.h" |
16 | #include "clang/AST/ExprObjC.h" |
17 | #include "clang/AST/ParentMap.h" |
18 | #include "clang/AST/Stmt.h" |
19 | #include "clang/Analysis/CFGStmtMap.h" |
20 | #include "clang/Analysis/ProgramPoint.h" |
21 | #include "clang/Analysis/Support/BumpVector.h" |
22 | #include "clang/Basic/LLVM.h" |
23 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
24 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" |
26 | #include "llvm/ADT/DenseSet.h" |
27 | #include "llvm/ADT/FoldingSet.h" |
28 | #include "llvm/ADT/PointerUnion.h" |
29 | #include "llvm/ADT/SmallVector.h" |
30 | #include "llvm/Support/Casting.h" |
31 | #include <cassert> |
32 | #include <memory> |
33 | #include <optional> |
34 | |
35 | using namespace clang; |
36 | using namespace ento; |
37 | |
38 | //===----------------------------------------------------------------------===// |
39 | // Cleanup. |
40 | //===----------------------------------------------------------------------===// |
41 | |
42 | ExplodedGraph::ExplodedGraph() = default; |
43 | |
44 | ExplodedGraph::~ExplodedGraph() = default; |
45 | |
46 | //===----------------------------------------------------------------------===// |
47 | // Node reclamation. |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) { |
51 | if (!Ex->isLValue()) |
52 | return false; |
53 | return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Val: Ex); |
54 | } |
55 | |
56 | bool ExplodedGraph::shouldCollect(const ExplodedNode *node) { |
57 | // First, we only consider nodes for reclamation of the following |
58 | // conditions apply: |
59 | // |
60 | // (1) 1 predecessor (that has one successor) |
61 | // (2) 1 successor (that has one predecessor) |
62 | // |
63 | // If a node has no successor it is on the "frontier", while a node |
64 | // with no predecessor is a root. |
65 | // |
66 | // After these prerequisites, we discard all "filler" nodes that |
67 | // are used only for intermediate processing, and are not essential |
68 | // for analyzer history: |
69 | // |
70 | // (a) PreStmtPurgeDeadSymbols |
71 | // |
72 | // We then discard all other nodes where *all* of the following conditions |
73 | // apply: |
74 | // |
75 | // (3) The ProgramPoint is for a PostStmt, but not a PostStore. |
76 | // (4) There is no 'tag' for the ProgramPoint. |
77 | // (5) The 'store' is the same as the predecessor. |
78 | // (6) The 'GDM' is the same as the predecessor. |
79 | // (7) The LocationContext is the same as the predecessor. |
80 | // (8) Expressions that are *not* lvalue expressions. |
81 | // (9) The PostStmt isn't for a non-consumed Stmt or Expr. |
82 | // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or |
83 | // PreImplicitCall (so that we would be able to find it when retrying a |
84 | // call with no inlining). |
85 | // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well. |
86 | |
87 | // Conditions 1 and 2. |
88 | if (node->pred_size() != 1 || node->succ_size() != 1) |
89 | return false; |
90 | |
91 | const ExplodedNode *pred = *(node->pred_begin()); |
92 | if (pred->succ_size() != 1) |
93 | return false; |
94 | |
95 | const ExplodedNode *succ = *(node->succ_begin()); |
96 | if (succ->pred_size() != 1) |
97 | return false; |
98 | |
99 | // Now reclaim any nodes that are (by definition) not essential to |
100 | // analysis history and are not consulted by any client code. |
101 | ProgramPoint progPoint = node->getLocation(); |
102 | if (progPoint.getAs<PreStmtPurgeDeadSymbols>()) |
103 | return !progPoint.getTag(); |
104 | |
105 | // Condition 3. |
106 | if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>()) |
107 | return false; |
108 | |
109 | // Condition 4. |
110 | if (progPoint.getTag()) |
111 | return false; |
112 | |
113 | // Conditions 5, 6, and 7. |
114 | ProgramStateRef state = node->getState(); |
115 | ProgramStateRef pred_state = pred->getState(); |
116 | if (state->store != pred_state->store || state->GDM != pred_state->GDM || |
117 | progPoint.getLocationContext() != pred->getLocationContext()) |
118 | return false; |
119 | |
120 | // All further checks require expressions. As per #3, we know that we have |
121 | // a PostStmt. |
122 | const Expr *Ex = dyn_cast<Expr>(Val: progPoint.castAs<PostStmt>().getStmt()); |
123 | if (!Ex) |
124 | return false; |
125 | |
126 | // Condition 8. |
127 | // Do not collect nodes for "interesting" lvalue expressions since they are |
128 | // used extensively for generating path diagnostics. |
129 | if (isInterestingLValueExpr(Ex)) |
130 | return false; |
131 | |
132 | // Condition 9. |
133 | // Do not collect nodes for non-consumed Stmt or Expr to ensure precise |
134 | // diagnostic generation; specifically, so that we could anchor arrows |
135 | // pointing to the beginning of statements (as written in code). |
136 | const ParentMap &PM = progPoint.getLocationContext()->getParentMap(); |
137 | if (!PM.isConsumedExpr(E: Ex)) |
138 | return false; |
139 | |
140 | // Condition 10. |
141 | const ProgramPoint SuccLoc = succ->getLocation(); |
142 | if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>()) |
143 | if (CallEvent::isCallStmt(S: SP->getStmt())) |
144 | return false; |
145 | |
146 | // Condition 10, continuation. |
147 | if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>()) |
148 | return false; |
149 | |
150 | return true; |
151 | } |
152 | |
153 | void ExplodedGraph::collectNode(ExplodedNode *node) { |
154 | // Removing a node means: |
155 | // (a) changing the predecessors successor to the successor of this node |
156 | // (b) changing the successors predecessor to the predecessor of this node |
157 | // (c) Putting 'node' onto freeNodes. |
158 | assert(node->pred_size() == 1 || node->succ_size() == 1); |
159 | ExplodedNode *pred = *(node->pred_begin()); |
160 | ExplodedNode *succ = *(node->succ_begin()); |
161 | pred->replaceSuccessor(node: succ); |
162 | succ->replacePredecessor(node: pred); |
163 | FreeNodes.push_back(x: node); |
164 | Nodes.RemoveNode(N: node); |
165 | --NumNodes; |
166 | node->~ExplodedNode(); |
167 | } |
168 | |
169 | void ExplodedGraph::reclaimRecentlyAllocatedNodes() { |
170 | if (ChangedNodes.empty()) |
171 | return; |
172 | |
173 | // Only periodically reclaim nodes so that we can build up a set of |
174 | // nodes that meet the reclamation criteria. Freshly created nodes |
175 | // by definition have no successor, and thus cannot be reclaimed (see below). |
176 | assert(ReclaimCounter > 0); |
177 | if (--ReclaimCounter != 0) |
178 | return; |
179 | ReclaimCounter = ReclaimNodeInterval; |
180 | |
181 | for (const auto node : ChangedNodes) |
182 | if (shouldCollect(node)) |
183 | collectNode(node); |
184 | ChangedNodes.clear(); |
185 | } |
186 | |
187 | //===----------------------------------------------------------------------===// |
188 | // ExplodedNode. |
189 | //===----------------------------------------------------------------------===// |
190 | |
191 | // An NodeGroup's storage type is actually very much like a TinyPtrVector: |
192 | // it can be either a pointer to a single ExplodedNode, or a pointer to a |
193 | // BumpVector allocated with the ExplodedGraph's allocator. This allows the |
194 | // common case of single-node NodeGroups to be implemented with no extra memory. |
195 | // |
196 | // Consequently, each of the NodeGroup methods have up to four cases to handle: |
197 | // 1. The flag is set and this group does not actually contain any nodes. |
198 | // 2. The group is empty, in which case the storage value is null. |
199 | // 3. The group contains a single node. |
200 | // 4. The group contains more than one node. |
201 | using ExplodedNodeVector = BumpVector<ExplodedNode *>; |
202 | using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>; |
203 | |
204 | void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) { |
205 | assert(!V->isSink()); |
206 | Preds.addNode(N: V, G); |
207 | V->Succs.addNode(N: this, G); |
208 | } |
209 | |
210 | void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) { |
211 | assert(!getFlag()); |
212 | |
213 | GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P); |
214 | assert(Storage.is<ExplodedNode *>()); |
215 | Storage = node; |
216 | assert(Storage.is<ExplodedNode *>()); |
217 | } |
218 | |
219 | void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { |
220 | assert(!getFlag()); |
221 | |
222 | GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P); |
223 | if (Storage.isNull()) { |
224 | Storage = N; |
225 | assert(Storage.is<ExplodedNode *>()); |
226 | return; |
227 | } |
228 | |
229 | ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>(); |
230 | |
231 | if (!V) { |
232 | // Switch from single-node to multi-node representation. |
233 | ExplodedNode *Old = Storage.get<ExplodedNode *>(); |
234 | |
235 | BumpVectorContext &Ctx = G.getNodeAllocator(); |
236 | V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4); |
237 | V->push_back(Elt: Old, C&: Ctx); |
238 | |
239 | Storage = V; |
240 | assert(!getFlag()); |
241 | assert(Storage.is<ExplodedNodeVector *>()); |
242 | } |
243 | |
244 | V->push_back(Elt: N, C&: G.getNodeAllocator()); |
245 | } |
246 | |
247 | unsigned ExplodedNode::NodeGroup::size() const { |
248 | if (getFlag()) |
249 | return 0; |
250 | |
251 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |
252 | if (Storage.isNull()) |
253 | return 0; |
254 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |
255 | return V->size(); |
256 | return 1; |
257 | } |
258 | |
259 | ExplodedNode * const *ExplodedNode::NodeGroup::begin() const { |
260 | if (getFlag()) |
261 | return nullptr; |
262 | |
263 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |
264 | if (Storage.isNull()) |
265 | return nullptr; |
266 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |
267 | return V->begin(); |
268 | return Storage.getAddrOfPtr1(); |
269 | } |
270 | |
271 | ExplodedNode * const *ExplodedNode::NodeGroup::end() const { |
272 | if (getFlag()) |
273 | return nullptr; |
274 | |
275 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |
276 | if (Storage.isNull()) |
277 | return nullptr; |
278 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |
279 | return V->end(); |
280 | return Storage.getAddrOfPtr1() + 1; |
281 | } |
282 | |
283 | bool ExplodedNode::isTrivial() const { |
284 | return pred_size() == 1 && succ_size() == 1 && |
285 | getFirstPred()->getState()->getID() == getState()->getID() && |
286 | getFirstPred()->succ_size() == 1; |
287 | } |
288 | |
289 | const CFGBlock *ExplodedNode::getCFGBlock() const { |
290 | ProgramPoint P = getLocation(); |
291 | if (auto BEP = P.getAs<BlockEntrance>()) |
292 | return BEP->getBlock(); |
293 | |
294 | // Find the node's current statement in the CFG. |
295 | // FIXME: getStmtForDiagnostics() does nasty things in order to provide |
296 | // a valid statement for body farms, do we need this behavior here? |
297 | if (const Stmt *S = getStmtForDiagnostics()) |
298 | return getLocationContext() |
299 | ->getAnalysisDeclContext() |
300 | ->getCFGStmtMap() |
301 | ->getBlock(S); |
302 | |
303 | return nullptr; |
304 | } |
305 | |
306 | static const LocationContext * |
307 | findTopAutosynthesizedParentContext(const LocationContext *LC) { |
308 | assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized()); |
309 | const LocationContext *ParentLC = LC->getParent(); |
310 | assert(ParentLC && "We don't start analysis from autosynthesized code" ); |
311 | while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) { |
312 | LC = ParentLC; |
313 | ParentLC = LC->getParent(); |
314 | assert(ParentLC && "We don't start analysis from autosynthesized code" ); |
315 | } |
316 | return LC; |
317 | } |
318 | |
319 | const Stmt *ExplodedNode::getStmtForDiagnostics() const { |
320 | // We cannot place diagnostics on autosynthesized code. |
321 | // Put them onto the call site through which we jumped into autosynthesized |
322 | // code for the first time. |
323 | const LocationContext *LC = getLocationContext(); |
324 | if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) { |
325 | // It must be a stack frame because we only autosynthesize functions. |
326 | return cast<StackFrameContext>(Val: findTopAutosynthesizedParentContext(LC)) |
327 | ->getCallSite(); |
328 | } |
329 | // Otherwise, see if the node's program point directly points to a statement. |
330 | // FIXME: Refactor into a ProgramPoint method? |
331 | ProgramPoint P = getLocation(); |
332 | if (auto SP = P.getAs<StmtPoint>()) |
333 | return SP->getStmt(); |
334 | if (auto BE = P.getAs<BlockEdge>()) |
335 | return BE->getSrc()->getTerminatorStmt(); |
336 | if (auto CE = P.getAs<CallEnter>()) |
337 | return CE->getCallExpr(); |
338 | if (auto CEE = P.getAs<CallExitEnd>()) |
339 | return CEE->getCalleeContext()->getCallSite(); |
340 | if (auto PIPP = P.getAs<PostInitializer>()) |
341 | return PIPP->getInitializer()->getInit(); |
342 | if (auto CEB = P.getAs<CallExitBegin>()) |
343 | return CEB->getReturnStmt(); |
344 | if (auto FEP = P.getAs<FunctionExitPoint>()) |
345 | return FEP->getStmt(); |
346 | |
347 | return nullptr; |
348 | } |
349 | |
350 | const Stmt *ExplodedNode::getNextStmtForDiagnostics() const { |
351 | for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) { |
352 | if (const Stmt *S = N->getStmtForDiagnostics()) { |
353 | // Check if the statement is '?' or '&&'/'||'. These are "merges", |
354 | // not actual statement points. |
355 | switch (S->getStmtClass()) { |
356 | case Stmt::ChooseExprClass: |
357 | case Stmt::BinaryConditionalOperatorClass: |
358 | case Stmt::ConditionalOperatorClass: |
359 | continue; |
360 | case Stmt::BinaryOperatorClass: { |
361 | BinaryOperatorKind Op = cast<BinaryOperator>(Val: S)->getOpcode(); |
362 | if (Op == BO_LAnd || Op == BO_LOr) |
363 | continue; |
364 | break; |
365 | } |
366 | default: |
367 | break; |
368 | } |
369 | // We found the statement, so return it. |
370 | return S; |
371 | } |
372 | } |
373 | |
374 | return nullptr; |
375 | } |
376 | |
377 | const Stmt *ExplodedNode::getPreviousStmtForDiagnostics() const { |
378 | for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred()) |
379 | if (const Stmt *S = N->getStmtForDiagnostics()) |
380 | return S; |
381 | |
382 | return nullptr; |
383 | } |
384 | |
385 | const Stmt *ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const { |
386 | if (const Stmt *S = getStmtForDiagnostics()) |
387 | return S; |
388 | |
389 | return getPreviousStmtForDiagnostics(); |
390 | } |
391 | |
392 | ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L, |
393 | ProgramStateRef State, |
394 | bool IsSink, |
395 | bool* IsNew) { |
396 | // Profile 'State' to determine if we already have an existing node. |
397 | llvm::FoldingSetNodeID profile; |
398 | void *InsertPos = nullptr; |
399 | |
400 | NodeTy::Profile(ID&: profile, Loc: L, state: State, IsSink); |
401 | NodeTy* V = Nodes.FindNodeOrInsertPos(ID: profile, InsertPos); |
402 | |
403 | if (!V) { |
404 | if (!FreeNodes.empty()) { |
405 | V = FreeNodes.back(); |
406 | FreeNodes.pop_back(); |
407 | } |
408 | else { |
409 | // Allocate a new node. |
410 | V = getAllocator().Allocate<NodeTy>(); |
411 | } |
412 | |
413 | ++NumNodes; |
414 | new (V) NodeTy(L, State, NumNodes, IsSink); |
415 | |
416 | if (ReclaimNodeInterval) |
417 | ChangedNodes.push_back(x: V); |
418 | |
419 | // Insert the node into the node set and return it. |
420 | Nodes.InsertNode(N: V, InsertPos); |
421 | |
422 | if (IsNew) *IsNew = true; |
423 | } |
424 | else |
425 | if (IsNew) *IsNew = false; |
426 | |
427 | return V; |
428 | } |
429 | |
430 | ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L, |
431 | ProgramStateRef State, |
432 | int64_t Id, |
433 | bool IsSink) { |
434 | NodeTy *V = getAllocator().Allocate<NodeTy>(); |
435 | new (V) NodeTy(L, State, Id, IsSink); |
436 | return V; |
437 | } |
438 | |
439 | std::unique_ptr<ExplodedGraph> |
440 | ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks, |
441 | InterExplodedGraphMap *ForwardMap, |
442 | InterExplodedGraphMap *InverseMap) const { |
443 | if (Nodes.empty()) |
444 | return nullptr; |
445 | |
446 | using Pass1Ty = llvm::DenseSet<const ExplodedNode *>; |
447 | Pass1Ty Pass1; |
448 | |
449 | using Pass2Ty = InterExplodedGraphMap; |
450 | InterExplodedGraphMap Pass2Scratch; |
451 | Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch; |
452 | |
453 | SmallVector<const ExplodedNode*, 10> WL1, WL2; |
454 | |
455 | // ===- Pass 1 (reverse DFS) -=== |
456 | for (const auto Sink : Sinks) |
457 | if (Sink) |
458 | WL1.push_back(Elt: Sink); |
459 | |
460 | // Process the first worklist until it is empty. |
461 | while (!WL1.empty()) { |
462 | const ExplodedNode *N = WL1.pop_back_val(); |
463 | |
464 | // Have we already visited this node? If so, continue to the next one. |
465 | if (!Pass1.insert(V: N).second) |
466 | continue; |
467 | |
468 | // If this is a root enqueue it to the second worklist. |
469 | if (N->Preds.empty()) { |
470 | WL2.push_back(Elt: N); |
471 | continue; |
472 | } |
473 | |
474 | // Visit our predecessors and enqueue them. |
475 | WL1.append(in_start: N->Preds.begin(), in_end: N->Preds.end()); |
476 | } |
477 | |
478 | // We didn't hit a root? Return with a null pointer for the new graph. |
479 | if (WL2.empty()) |
480 | return nullptr; |
481 | |
482 | // Create an empty graph. |
483 | std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph(); |
484 | |
485 | // ===- Pass 2 (forward DFS to construct the new graph) -=== |
486 | while (!WL2.empty()) { |
487 | const ExplodedNode *N = WL2.pop_back_val(); |
488 | |
489 | // Skip this node if we have already processed it. |
490 | if (Pass2.contains(Val: N)) |
491 | continue; |
492 | |
493 | // Create the corresponding node in the new graph and record the mapping |
494 | // from the old node to the new node. |
495 | ExplodedNode *NewN = G->createUncachedNode(L: N->getLocation(), State: N->State, |
496 | Id: N->getID(), IsSink: N->isSink()); |
497 | Pass2[N] = NewN; |
498 | |
499 | // Also record the reverse mapping from the new node to the old node. |
500 | if (InverseMap) (*InverseMap)[NewN] = N; |
501 | |
502 | // If this node is a root, designate it as such in the graph. |
503 | if (N->Preds.empty()) |
504 | G->addRoot(V: NewN); |
505 | |
506 | // In the case that some of the intended predecessors of NewN have already |
507 | // been created, we should hook them up as predecessors. |
508 | |
509 | // Walk through the predecessors of 'N' and hook up their corresponding |
510 | // nodes in the new graph (if any) to the freshly created node. |
511 | for (const ExplodedNode *Pred : N->Preds) { |
512 | Pass2Ty::iterator PI = Pass2.find(Val: Pred); |
513 | if (PI == Pass2.end()) |
514 | continue; |
515 | |
516 | NewN->addPredecessor(V: const_cast<ExplodedNode *>(PI->second), G&: *G); |
517 | } |
518 | |
519 | // In the case that some of the intended successors of NewN have already |
520 | // been created, we should hook them up as successors. Otherwise, enqueue |
521 | // the new nodes from the original graph that should have nodes created |
522 | // in the new graph. |
523 | for (const ExplodedNode *Succ : N->Succs) { |
524 | Pass2Ty::iterator PI = Pass2.find(Val: Succ); |
525 | if (PI != Pass2.end()) { |
526 | const_cast<ExplodedNode *>(PI->second)->addPredecessor(V: NewN, G&: *G); |
527 | continue; |
528 | } |
529 | |
530 | // Enqueue nodes to the worklist that were marked during pass 1. |
531 | if (Pass1.count(V: Succ)) |
532 | WL2.push_back(Elt: Succ); |
533 | } |
534 | } |
535 | |
536 | return G; |
537 | } |
538 | |