1 | //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | // |

9 | // This file defines the template classes ExplodedNode and ExplodedGraph, |

10 | // which represent a path-sensitive, intra-procedural "exploded graph." |

11 | // |

12 | //===----------------------------------------------------------------------===// |

13 | |

14 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" |

15 | #include "clang/AST/Expr.h" |

16 | #include "clang/AST/ExprObjC.h" |

17 | #include "clang/AST/ParentMap.h" |

18 | #include "clang/AST/Stmt.h" |

19 | #include "clang/Analysis/CFGStmtMap.h" |

20 | #include "clang/Analysis/ProgramPoint.h" |

21 | #include "clang/Analysis/Support/BumpVector.h" |

22 | #include "clang/Basic/LLVM.h" |

23 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |

24 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |

25 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" |

26 | #include "llvm/ADT/DenseSet.h" |

27 | #include "llvm/ADT/FoldingSet.h" |

28 | #include "llvm/ADT/Optional.h" |

29 | #include "llvm/ADT/PointerUnion.h" |

30 | #include "llvm/ADT/SmallVector.h" |

31 | #include "llvm/Support/Casting.h" |

32 | #include <cassert> |

33 | #include <memory> |

34 | |

35 | using namespace clang; |

36 | using namespace ento; |

37 | |

38 | //===----------------------------------------------------------------------===// |

39 | // Cleanup. |

40 | //===----------------------------------------------------------------------===// |

41 | |

42 | ExplodedGraph::ExplodedGraph() = default; |

43 | |

44 | ExplodedGraph::~ExplodedGraph() = default; |

45 | |

46 | //===----------------------------------------------------------------------===// |

47 | // Node reclamation. |

48 | //===----------------------------------------------------------------------===// |

49 | |

50 | bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) { |

51 | if (!Ex->isLValue()) |

52 | return false; |

53 | return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex); |

54 | } |

55 | |

56 | bool ExplodedGraph::shouldCollect(const ExplodedNode *node) { |

57 | // First, we only consider nodes for reclamation of the following |

58 | // conditions apply: |

59 | // |

60 | // (1) 1 predecessor (that has one successor) |

61 | // (2) 1 successor (that has one predecessor) |

62 | // |

63 | // If a node has no successor it is on the "frontier", while a node |

64 | // with no predecessor is a root. |

65 | // |

66 | // After these prerequisites, we discard all "filler" nodes that |

67 | // are used only for intermediate processing, and are not essential |

68 | // for analyzer history: |

69 | // |

70 | // (a) PreStmtPurgeDeadSymbols |

71 | // |

72 | // We then discard all other nodes where *all* of the following conditions |

73 | // apply: |

74 | // |

75 | // (3) The ProgramPoint is for a PostStmt, but not a PostStore. |

76 | // (4) There is no 'tag' for the ProgramPoint. |

77 | // (5) The 'store' is the same as the predecessor. |

78 | // (6) The 'GDM' is the same as the predecessor. |

79 | // (7) The LocationContext is the same as the predecessor. |

80 | // (8) Expressions that are *not* lvalue expressions. |

81 | // (9) The PostStmt isn't for a non-consumed Stmt or Expr. |

82 | // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or |

83 | // PreImplicitCall (so that we would be able to find it when retrying a |

84 | // call with no inlining). |

85 | // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well. |

86 | |

87 | // Conditions 1 and 2. |

88 | if (node->pred_size() != 1 || node->succ_size() != 1) |

89 | return false; |

90 | |

91 | const ExplodedNode *pred = *(node->pred_begin()); |

92 | if (pred->succ_size() != 1) |

93 | return false; |

94 | |

95 | const ExplodedNode *succ = *(node->succ_begin()); |

96 | if (succ->pred_size() != 1) |

97 | return false; |

98 | |

99 | // Now reclaim any nodes that are (by definition) not essential to |

100 | // analysis history and are not consulted by any client code. |

101 | ProgramPoint progPoint = node->getLocation(); |

102 | if (progPoint.getAs<PreStmtPurgeDeadSymbols>()) |

103 | return !progPoint.getTag(); |

104 | |

105 | // Condition 3. |

106 | if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>()) |

107 | return false; |

108 | |

109 | // Condition 4. |

110 | if (progPoint.getTag()) |

111 | return false; |

112 | |

113 | // Conditions 5, 6, and 7. |

114 | ProgramStateRef state = node->getState(); |

115 | ProgramStateRef pred_state = pred->getState(); |

116 | if (state->store != pred_state->store || state->GDM != pred_state->GDM || |

117 | progPoint.getLocationContext() != pred->getLocationContext()) |

118 | return false; |

119 | |

120 | // All further checks require expressions. As per #3, we know that we have |

121 | // a PostStmt. |

122 | const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt()); |

123 | if (!Ex) |

124 | return false; |

125 | |

126 | // Condition 8. |

127 | // Do not collect nodes for "interesting" lvalue expressions since they are |

128 | // used extensively for generating path diagnostics. |

129 | if (isInterestingLValueExpr(Ex)) |

130 | return false; |

131 | |

132 | // Condition 9. |

133 | // Do not collect nodes for non-consumed Stmt or Expr to ensure precise |

134 | // diagnostic generation; specifically, so that we could anchor arrows |

135 | // pointing to the beginning of statements (as written in code). |

136 | const ParentMap &PM = progPoint.getLocationContext()->getParentMap(); |

137 | if (!PM.isConsumedExpr(Ex)) |

138 | return false; |

139 | |

140 | // Condition 10. |

141 | const ProgramPoint SuccLoc = succ->getLocation(); |

142 | if (Optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>()) |

143 | if (CallEvent::isCallStmt(SP->getStmt())) |

144 | return false; |

145 | |

146 | // Condition 10, continuation. |

147 | if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>()) |

148 | return false; |

149 | |

150 | return true; |

151 | } |

152 | |

153 | void ExplodedGraph::collectNode(ExplodedNode *node) { |

154 | // Removing a node means: |

155 | // (a) changing the predecessors successor to the successor of this node |

156 | // (b) changing the successors predecessor to the predecessor of this node |

157 | // (c) Putting 'node' onto freeNodes. |

158 | assert(node->pred_size() == 1 || node->succ_size() == 1); |

159 | ExplodedNode *pred = *(node->pred_begin()); |

160 | ExplodedNode *succ = *(node->succ_begin()); |

161 | pred->replaceSuccessor(succ); |

162 | succ->replacePredecessor(pred); |

163 | FreeNodes.push_back(node); |

164 | Nodes.RemoveNode(node); |

165 | --NumNodes; |

166 | node->~ExplodedNode(); |

167 | } |

168 | |

169 | void ExplodedGraph::reclaimRecentlyAllocatedNodes() { |

170 | if (ChangedNodes.empty()) |

171 | return; |

172 | |

173 | // Only periodically reclaim nodes so that we can build up a set of |

174 | // nodes that meet the reclamation criteria. Freshly created nodes |

175 | // by definition have no successor, and thus cannot be reclaimed (see below). |

176 | assert(ReclaimCounter > 0); |

177 | if (--ReclaimCounter != 0) |

178 | return; |

179 | ReclaimCounter = ReclaimNodeInterval; |

180 | |

181 | for (const auto node : ChangedNodes) |

182 | if (shouldCollect(node)) |

183 | collectNode(node); |

184 | ChangedNodes.clear(); |

185 | } |

186 | |

187 | //===----------------------------------------------------------------------===// |

188 | // ExplodedNode. |

189 | //===----------------------------------------------------------------------===// |

190 | |

191 | // An NodeGroup's storage type is actually very much like a TinyPtrVector: |

192 | // it can be either a pointer to a single ExplodedNode, or a pointer to a |

193 | // BumpVector allocated with the ExplodedGraph's allocator. This allows the |

194 | // common case of single-node NodeGroups to be implemented with no extra memory. |

195 | // |

196 | // Consequently, each of the NodeGroup methods have up to four cases to handle: |

197 | // 1. The flag is set and this group does not actually contain any nodes. |

198 | // 2. The group is empty, in which case the storage value is null. |

199 | // 3. The group contains a single node. |

200 | // 4. The group contains more than one node. |

201 | using ExplodedNodeVector = BumpVector<ExplodedNode *>; |

202 | using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>; |

203 | |

204 | void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) { |

205 | assert(!V->isSink()); |

206 | Preds.addNode(V, G); |

207 | V->Succs.addNode(this, G); |

208 | } |

209 | |

210 | void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) { |

211 | assert(!getFlag()); |

212 | |

213 | GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P); |

214 | assert(Storage.is<ExplodedNode *>()); |

215 | Storage = node; |

216 | assert(Storage.is<ExplodedNode *>()); |

217 | } |

218 | |

219 | void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) { |

220 | assert(!getFlag()); |

221 | |

222 | GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P); |

223 | if (Storage.isNull()) { |

224 | Storage = N; |

225 | assert(Storage.is<ExplodedNode *>()); |

226 | return; |

227 | } |

228 | |

229 | ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>(); |

230 | |

231 | if (!V) { |

232 | // Switch from single-node to multi-node representation. |

233 | ExplodedNode *Old = Storage.get<ExplodedNode *>(); |

234 | |

235 | BumpVectorContext &Ctx = G.getNodeAllocator(); |

236 | V = G.getAllocator().Allocate<ExplodedNodeVector>(); |

237 | new (V) ExplodedNodeVector(Ctx, 4); |

238 | V->push_back(Old, Ctx); |

239 | |

240 | Storage = V; |

241 | assert(!getFlag()); |

242 | assert(Storage.is<ExplodedNodeVector *>()); |

243 | } |

244 | |

245 | V->push_back(N, G.getNodeAllocator()); |

246 | } |

247 | |

248 | unsigned ExplodedNode::NodeGroup::size() const { |

249 | if (getFlag()) |

250 | return 0; |

251 | |

252 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |

253 | if (Storage.isNull()) |

254 | return 0; |

255 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |

256 | return V->size(); |

257 | return 1; |

258 | } |

259 | |

260 | ExplodedNode * const *ExplodedNode::NodeGroup::begin() const { |

261 | if (getFlag()) |

262 | return nullptr; |

263 | |

264 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |

265 | if (Storage.isNull()) |

266 | return nullptr; |

267 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |

268 | return V->begin(); |

269 | return Storage.getAddrOfPtr1(); |

270 | } |

271 | |

272 | ExplodedNode * const *ExplodedNode::NodeGroup::end() const { |

273 | if (getFlag()) |

274 | return nullptr; |

275 | |

276 | const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P); |

277 | if (Storage.isNull()) |

278 | return nullptr; |

279 | if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>()) |

280 | return V->end(); |

281 | return Storage.getAddrOfPtr1() + 1; |

282 | } |

283 | |

284 | bool ExplodedNode::isTrivial() const { |

285 | return pred_size() == 1 && succ_size() == 1 && |

286 | getFirstPred()->getState()->getID() == getState()->getID() && |

287 | getFirstPred()->succ_size() == 1; |

288 | } |

289 | |

290 | const CFGBlock *ExplodedNode::getCFGBlock() const { |

291 | ProgramPoint P = getLocation(); |

292 | if (auto BEP = P.getAs<BlockEntrance>()) |

293 | return BEP->getBlock(); |

294 | |

295 | // Find the node's current statement in the CFG. |

296 | // FIXME: getStmtForDiagnostics() does nasty things in order to provide |

297 | // a valid statement for body farms, do we need this behavior here? |

298 | if (const Stmt *S = getStmtForDiagnostics()) |

299 | return getLocationContext() |

300 | ->getAnalysisDeclContext() |

301 | ->getCFGStmtMap() |

302 | ->getBlock(S); |

303 | |

304 | return nullptr; |

305 | } |

306 | |

307 | static const LocationContext * |

308 | findTopAutosynthesizedParentContext(const LocationContext *LC) { |

309 | assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized()); |

310 | const LocationContext *ParentLC = LC->getParent(); |

311 | assert(ParentLC && "We don't start analysis from autosynthesized code"); |

312 | while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) { |

313 | LC = ParentLC; |

314 | ParentLC = LC->getParent(); |

315 | assert(ParentLC && "We don't start analysis from autosynthesized code"); |

316 | } |

317 | return LC; |

318 | } |

319 | |

320 | const Stmt *ExplodedNode::getStmtForDiagnostics() const { |

321 | // We cannot place diagnostics on autosynthesized code. |

322 | // Put them onto the call site through which we jumped into autosynthesized |

323 | // code for the first time. |

324 | const LocationContext *LC = getLocationContext(); |

325 | if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) { |

326 | // It must be a stack frame because we only autosynthesize functions. |

327 | return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC)) |

328 | ->getCallSite(); |

329 | } |

330 | // Otherwise, see if the node's program point directly points to a statement. |

331 | // FIXME: Refactor into a ProgramPoint method? |

332 | ProgramPoint P = getLocation(); |

333 | if (auto SP = P.getAs<StmtPoint>()) |

334 | return SP->getStmt(); |

335 | if (auto BE = P.getAs<BlockEdge>()) |

336 | return BE->getSrc()->getTerminatorStmt(); |

337 | if (auto CE = P.getAs<CallEnter>()) |

338 | return CE->getCallExpr(); |

339 | if (auto CEE = P.getAs<CallExitEnd>()) |

340 | return CEE->getCalleeContext()->getCallSite(); |

341 | if (auto PIPP = P.getAs<PostInitializer>()) |

342 | return PIPP->getInitializer()->getInit(); |

343 | if (auto CEB = P.getAs<CallExitBegin>()) |

344 | return CEB->getReturnStmt(); |

345 | if (auto FEP = P.getAs<FunctionExitPoint>()) |

346 | return FEP->getStmt(); |

347 | |

348 | return nullptr; |

349 | } |

350 | |

351 | const Stmt *ExplodedNode::getNextStmtForDiagnostics() const { |

352 | for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) { |

353 | if (const Stmt *S = N->getStmtForDiagnostics()) { |

354 | // Check if the statement is '?' or '&&'/'||'. These are "merges", |

355 | // not actual statement points. |

356 | switch (S->getStmtClass()) { |

357 | case Stmt::ChooseExprClass: |

358 | case Stmt::BinaryConditionalOperatorClass: |

359 | case Stmt::ConditionalOperatorClass: |

360 | continue; |

361 | case Stmt::BinaryOperatorClass: { |

362 | BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode(); |

363 | if (Op == BO_LAnd || Op == BO_LOr) |

364 | continue; |

365 | break; |

366 | } |

367 | default: |

368 | break; |

369 | } |

370 | // We found the statement, so return it. |

371 | return S; |

372 | } |

373 | } |

374 | |

375 | return nullptr; |

376 | } |

377 | |

378 | const Stmt *ExplodedNode::getPreviousStmtForDiagnostics() const { |

379 | for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred()) |

380 | if (const Stmt *S = N->getStmtForDiagnostics()) |

381 | return S; |

382 | |

383 | return nullptr; |

384 | } |

385 | |

386 | const Stmt *ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const { |

387 | if (const Stmt *S = getStmtForDiagnostics()) |

388 | return S; |

389 | |

390 | return getPreviousStmtForDiagnostics(); |

391 | } |

392 | |

393 | ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L, |

394 | ProgramStateRef State, |

395 | bool IsSink, |

396 | bool* IsNew) { |

397 | // Profile 'State' to determine if we already have an existing node. |

398 | llvm::FoldingSetNodeID profile; |

399 | void *InsertPos = nullptr; |

400 | |

401 | NodeTy::Profile(profile, L, State, IsSink); |

402 | NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos); |

403 | |

404 | if (!V) { |

405 | if (!FreeNodes.empty()) { |

406 | V = FreeNodes.back(); |

407 | FreeNodes.pop_back(); |

408 | } |

409 | else { |

410 | // Allocate a new node. |

411 | V = (NodeTy*) getAllocator().Allocate<NodeTy>(); |

412 | } |

413 | |

414 | ++NumNodes; |

415 | new (V) NodeTy(L, State, NumNodes, IsSink); |

416 | |

417 | if (ReclaimNodeInterval) |

418 | ChangedNodes.push_back(V); |

419 | |

420 | // Insert the node into the node set and return it. |

421 | Nodes.InsertNode(V, InsertPos); |

422 | |

423 | if (IsNew) *IsNew = true; |

424 | } |

425 | else |

426 | if (IsNew) *IsNew = false; |

427 | |

428 | return V; |

429 | } |

430 | |

431 | ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L, |

432 | ProgramStateRef State, |

433 | int64_t Id, |

434 | bool IsSink) { |

435 | NodeTy *V = (NodeTy *) getAllocator().Allocate<NodeTy>(); |

436 | new (V) NodeTy(L, State, Id, IsSink); |

437 | return V; |

438 | } |

439 | |

440 | std::unique_ptr<ExplodedGraph> |

441 | ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks, |

442 | InterExplodedGraphMap *ForwardMap, |

443 | InterExplodedGraphMap *InverseMap) const { |

444 | if (Nodes.empty()) |

445 | return nullptr; |

446 | |

447 | using Pass1Ty = llvm::DenseSet<const ExplodedNode *>; |

448 | Pass1Ty Pass1; |

449 | |

450 | using Pass2Ty = InterExplodedGraphMap; |

451 | InterExplodedGraphMap Pass2Scratch; |

452 | Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch; |

453 | |

454 | SmallVector<const ExplodedNode*, 10> WL1, WL2; |

455 | |

456 | // ===- Pass 1 (reverse DFS) -=== |

457 | for (const auto Sink : Sinks) |

458 | if (Sink) |

459 | WL1.push_back(Sink); |

460 | |

461 | // Process the first worklist until it is empty. |

462 | while (!WL1.empty()) { |

463 | const ExplodedNode *N = WL1.pop_back_val(); |

464 | |

465 | // Have we already visited this node? If so, continue to the next one. |

466 | if (!Pass1.insert(N).second) |

467 | continue; |

468 | |

469 | // If this is a root enqueue it to the second worklist. |

470 | if (N->Preds.empty()) { |

471 | WL2.push_back(N); |

472 | continue; |

473 | } |

474 | |

475 | // Visit our predecessors and enqueue them. |

476 | WL1.append(N->Preds.begin(), N->Preds.end()); |

477 | } |

478 | |

479 | // We didn't hit a root? Return with a null pointer for the new graph. |

480 | if (WL2.empty()) |

481 | return nullptr; |

482 | |

483 | // Create an empty graph. |

484 | std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph(); |

485 | |

486 | // ===- Pass 2 (forward DFS to construct the new graph) -=== |

487 | while (!WL2.empty()) { |

488 | const ExplodedNode *N = WL2.pop_back_val(); |

489 | |

490 | // Skip this node if we have already processed it. |

491 | if (Pass2.find(N) != Pass2.end()) |

492 | continue; |

493 | |

494 | // Create the corresponding node in the new graph and record the mapping |

495 | // from the old node to the new node. |

496 | ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State, |

497 | N->getID(), N->isSink()); |

498 | Pass2[N] = NewN; |

499 | |

500 | // Also record the reverse mapping from the new node to the old node. |

501 | if (InverseMap) (*InverseMap)[NewN] = N; |

502 | |

503 | // If this node is a root, designate it as such in the graph. |

504 | if (N->Preds.empty()) |

505 | G->addRoot(NewN); |

506 | |

507 | // In the case that some of the intended predecessors of NewN have already |

508 | // been created, we should hook them up as predecessors. |

509 | |

510 | // Walk through the predecessors of 'N' and hook up their corresponding |

511 | // nodes in the new graph (if any) to the freshly created node. |

512 | for (ExplodedNode::pred_iterator I = N->Preds.begin(), E = N->Preds.end(); |

513 | I != E; ++I) { |

514 | Pass2Ty::iterator PI = Pass2.find(*I); |

515 | if (PI == Pass2.end()) |

516 | continue; |

517 | |

518 | NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G); |

519 | } |

520 | |

521 | // In the case that some of the intended successors of NewN have already |

522 | // been created, we should hook them up as successors. Otherwise, enqueue |

523 | // the new nodes from the original graph that should have nodes created |

524 | // in the new graph. |

525 | for (ExplodedNode::succ_iterator I = N->Succs.begin(), E = N->Succs.end(); |

526 | I != E; ++I) { |

527 | Pass2Ty::iterator PI = Pass2.find(*I); |

528 | if (PI != Pass2.end()) { |

529 | const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G); |

530 | continue; |

531 | } |

532 | |

533 | // Enqueue nodes to the worklist that were marked during pass 1. |

534 | if (Pass1.count(*I)) |

535 | WL2.push_back(*I); |

536 | } |

537 | } |

538 | |

539 | return G; |

540 | } |

541 |