| 1 | //===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #include "clang/Analysis/Analyses/LifetimeSafety.h" |
| 9 | #include "clang/AST/Decl.h" |
| 10 | #include "clang/AST/Expr.h" |
| 11 | #include "clang/AST/StmtVisitor.h" |
| 12 | #include "clang/AST/Type.h" |
| 13 | #include "clang/Analysis/Analyses/PostOrderCFGView.h" |
| 14 | #include "clang/Analysis/AnalysisDeclContext.h" |
| 15 | #include "clang/Analysis/CFG.h" |
| 16 | #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" |
| 17 | #include "llvm/ADT/FoldingSet.h" |
| 18 | #include "llvm/ADT/ImmutableMap.h" |
| 19 | #include "llvm/ADT/ImmutableSet.h" |
| 20 | #include "llvm/ADT/PointerUnion.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/Support/Debug.h" |
| 23 | #include "llvm/Support/TimeProfiler.h" |
| 24 | #include <cstdint> |
| 25 | |
| 26 | namespace clang { |
| 27 | namespace { |
| 28 | |
| 29 | /// Represents the storage location being borrowed, e.g., a specific stack |
| 30 | /// variable. |
| 31 | /// TODO: Model access paths of other types, e.g., s.field, heap and globals. |
| 32 | struct AccessPath { |
| 33 | const clang::ValueDecl *D; |
| 34 | |
| 35 | AccessPath(const clang::ValueDecl *D) : D(D) {} |
| 36 | }; |
| 37 | |
| 38 | /// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. |
| 39 | /// Used for giving ID to loans and origins. |
| 40 | template <typename Tag> struct ID { |
| 41 | uint32_t Value = 0; |
| 42 | |
| 43 | bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } |
| 44 | bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } |
| 45 | bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } |
| 46 | ID<Tag> operator++(int) { |
| 47 | ID<Tag> Tmp = *this; |
| 48 | ++Value; |
| 49 | return Tmp; |
| 50 | } |
| 51 | void Profile(llvm::FoldingSetNodeID &IDBuilder) const { |
| 52 | IDBuilder.AddInteger(I: Value); |
| 53 | } |
| 54 | }; |
| 55 | |
| 56 | template <typename Tag> |
| 57 | inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) { |
| 58 | return OS << ID.Value; |
| 59 | } |
| 60 | |
| 61 | using LoanID = ID<struct LoanTag>; |
| 62 | using OriginID = ID<struct OriginTag>; |
| 63 | |
| 64 | /// Information about a single borrow, or "Loan". A loan is created when a |
| 65 | /// reference or pointer is created. |
| 66 | struct Loan { |
| 67 | /// TODO: Represent opaque loans. |
| 68 | /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it |
| 69 | /// is represented as empty LoanSet |
| 70 | LoanID ID; |
| 71 | AccessPath Path; |
| 72 | SourceLocation IssueLoc; |
| 73 | |
| 74 | Loan(LoanID id, AccessPath path, SourceLocation loc) |
| 75 | : ID(id), Path(path), IssueLoc(loc) {} |
| 76 | }; |
| 77 | |
| 78 | /// An Origin is a symbolic identifier that represents the set of possible |
| 79 | /// loans a pointer-like object could hold at any given time. |
| 80 | /// TODO: Enhance the origin model to handle complex types, pointer |
| 81 | /// indirection and reborrowing. The plan is to move from a single origin per |
| 82 | /// variable/expression to a "list of origins" governed by the Type. |
| 83 | /// For example, the type 'int**' would have two origins. |
| 84 | /// See discussion: |
| 85 | /// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 |
| 86 | struct Origin { |
| 87 | OriginID ID; |
| 88 | /// A pointer to the AST node that this origin represents. This union |
| 89 | /// distinguishes between origins from declarations (variables or parameters) |
| 90 | /// and origins from expressions. |
| 91 | llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; |
| 92 | |
| 93 | Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} |
| 94 | Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} |
| 95 | |
| 96 | const clang::ValueDecl *getDecl() const { |
| 97 | return Ptr.dyn_cast<const clang::ValueDecl *>(); |
| 98 | } |
| 99 | const clang::Expr *getExpr() const { |
| 100 | return Ptr.dyn_cast<const clang::Expr *>(); |
| 101 | } |
| 102 | }; |
| 103 | |
| 104 | /// Manages the creation, storage and retrieval of loans. |
| 105 | class LoanManager { |
| 106 | public: |
| 107 | LoanManager() = default; |
| 108 | |
| 109 | Loan &addLoan(AccessPath Path, SourceLocation Loc) { |
| 110 | AllLoans.emplace_back(Args: getNextLoanID(), Args&: Path, Args&: Loc); |
| 111 | return AllLoans.back(); |
| 112 | } |
| 113 | |
| 114 | const Loan &getLoan(LoanID ID) const { |
| 115 | assert(ID.Value < AllLoans.size()); |
| 116 | return AllLoans[ID.Value]; |
| 117 | } |
| 118 | llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } |
| 119 | |
| 120 | private: |
| 121 | LoanID getNextLoanID() { return NextLoanID++; } |
| 122 | |
| 123 | LoanID NextLoanID{.Value: 0}; |
| 124 | /// TODO(opt): Profile and evaluate the usefullness of small buffer |
| 125 | /// optimisation. |
| 126 | llvm::SmallVector<Loan> AllLoans; |
| 127 | }; |
| 128 | |
| 129 | /// Manages the creation, storage, and retrieval of origins for pointer-like |
| 130 | /// variables and expressions. |
| 131 | class OriginManager { |
| 132 | public: |
| 133 | OriginManager() = default; |
| 134 | |
| 135 | Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { |
| 136 | AllOrigins.emplace_back(Args&: ID, Args: &D); |
| 137 | return AllOrigins.back(); |
| 138 | } |
| 139 | Origin &addOrigin(OriginID ID, const clang::Expr &E) { |
| 140 | AllOrigins.emplace_back(Args&: ID, Args: &E); |
| 141 | return AllOrigins.back(); |
| 142 | } |
| 143 | |
| 144 | OriginID get(const Expr &E) { |
| 145 | // Origin of DeclRefExpr is that of the declaration it refers to. |
| 146 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: &E)) |
| 147 | return get(D: *DRE->getDecl()); |
| 148 | auto It = ExprToOriginID.find(Val: &E); |
| 149 | // TODO: This should be an assert(It != ExprToOriginID.end()). The current |
| 150 | // implementation falls back to getOrCreate to avoid crashing on |
| 151 | // yet-unhandled pointer expressions, creating an empty origin for them. |
| 152 | if (It == ExprToOriginID.end()) |
| 153 | return getOrCreate(E); |
| 154 | |
| 155 | return It->second; |
| 156 | } |
| 157 | |
| 158 | OriginID get(const ValueDecl &D) { |
| 159 | auto It = DeclToOriginID.find(Val: &D); |
| 160 | // TODO: This should be an assert(It != DeclToOriginID.end()). The current |
| 161 | // implementation falls back to getOrCreate to avoid crashing on |
| 162 | // yet-unhandled pointer expressions, creating an empty origin for them. |
| 163 | if (It == DeclToOriginID.end()) |
| 164 | return getOrCreate(D); |
| 165 | |
| 166 | return It->second; |
| 167 | } |
| 168 | |
| 169 | OriginID getOrCreate(const Expr &E) { |
| 170 | auto It = ExprToOriginID.find(Val: &E); |
| 171 | if (It != ExprToOriginID.end()) |
| 172 | return It->second; |
| 173 | |
| 174 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: &E)) { |
| 175 | // Origin of DeclRefExpr is that of the declaration it refers to. |
| 176 | return getOrCreate(D: *DRE->getDecl()); |
| 177 | } |
| 178 | OriginID NewID = getNextOriginID(); |
| 179 | addOrigin(ID: NewID, E); |
| 180 | ExprToOriginID[&E] = NewID; |
| 181 | return NewID; |
| 182 | } |
| 183 | |
| 184 | const Origin &getOrigin(OriginID ID) const { |
| 185 | assert(ID.Value < AllOrigins.size()); |
| 186 | return AllOrigins[ID.Value]; |
| 187 | } |
| 188 | |
| 189 | llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } |
| 190 | |
| 191 | OriginID getOrCreate(const ValueDecl &D) { |
| 192 | auto It = DeclToOriginID.find(Val: &D); |
| 193 | if (It != DeclToOriginID.end()) |
| 194 | return It->second; |
| 195 | OriginID NewID = getNextOriginID(); |
| 196 | addOrigin(ID: NewID, D); |
| 197 | DeclToOriginID[&D] = NewID; |
| 198 | return NewID; |
| 199 | } |
| 200 | |
| 201 | private: |
| 202 | OriginID getNextOriginID() { return NextOriginID++; } |
| 203 | |
| 204 | OriginID NextOriginID{.Value: 0}; |
| 205 | /// TODO(opt): Profile and evaluate the usefullness of small buffer |
| 206 | /// optimisation. |
| 207 | llvm::SmallVector<Origin> AllOrigins; |
| 208 | llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; |
| 209 | llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; |
| 210 | }; |
| 211 | |
| 212 | /// An abstract base class for a single, atomic lifetime-relevant event. |
| 213 | class Fact { |
| 214 | |
| 215 | public: |
| 216 | enum class Kind : uint8_t { |
| 217 | /// A new loan is issued from a borrow expression (e.g., &x). |
| 218 | Issue, |
| 219 | /// A loan expires as its underlying storage is freed (e.g., variable goes |
| 220 | /// out of scope). |
| 221 | Expire, |
| 222 | /// An origin is propagated from a source to a destination (e.g., p = q). |
| 223 | AssignOrigin, |
| 224 | /// An origin escapes the function by flowing into the return value. |
| 225 | ReturnOfOrigin |
| 226 | }; |
| 227 | |
| 228 | private: |
| 229 | Kind K; |
| 230 | |
| 231 | protected: |
| 232 | Fact(Kind K) : K(K) {} |
| 233 | |
| 234 | public: |
| 235 | virtual ~Fact() = default; |
| 236 | Kind getKind() const { return K; } |
| 237 | |
| 238 | template <typename T> const T *getAs() const { |
| 239 | if (T::classof(this)) |
| 240 | return static_cast<const T *>(this); |
| 241 | return nullptr; |
| 242 | } |
| 243 | |
| 244 | virtual void dump(llvm::raw_ostream &OS) const { |
| 245 | OS << "Fact (Kind: " << static_cast<int>(K) << ")\n" ; |
| 246 | } |
| 247 | }; |
| 248 | |
| 249 | class IssueFact : public Fact { |
| 250 | LoanID LID; |
| 251 | OriginID OID; |
| 252 | |
| 253 | public: |
| 254 | static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } |
| 255 | |
| 256 | IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} |
| 257 | LoanID getLoanID() const { return LID; } |
| 258 | OriginID getOriginID() const { return OID; } |
| 259 | void dump(llvm::raw_ostream &OS) const override { |
| 260 | OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() |
| 261 | << ")\n" ; |
| 262 | } |
| 263 | }; |
| 264 | |
| 265 | class ExpireFact : public Fact { |
| 266 | LoanID LID; |
| 267 | |
| 268 | public: |
| 269 | static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } |
| 270 | |
| 271 | ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} |
| 272 | LoanID getLoanID() const { return LID; } |
| 273 | void dump(llvm::raw_ostream &OS) const override { |
| 274 | OS << "Expire (LoanID: " << getLoanID() << ")\n" ; |
| 275 | } |
| 276 | }; |
| 277 | |
| 278 | class AssignOriginFact : public Fact { |
| 279 | OriginID OIDDest; |
| 280 | OriginID OIDSrc; |
| 281 | |
| 282 | public: |
| 283 | static bool classof(const Fact *F) { |
| 284 | return F->getKind() == Kind::AssignOrigin; |
| 285 | } |
| 286 | |
| 287 | AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) |
| 288 | : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} |
| 289 | OriginID getDestOriginID() const { return OIDDest; } |
| 290 | OriginID getSrcOriginID() const { return OIDSrc; } |
| 291 | void dump(llvm::raw_ostream &OS) const override { |
| 292 | OS << "AssignOrigin (DestID: " << getDestOriginID() |
| 293 | << ", SrcID: " << getSrcOriginID() << ")\n" ; |
| 294 | } |
| 295 | }; |
| 296 | |
| 297 | class ReturnOfOriginFact : public Fact { |
| 298 | OriginID OID; |
| 299 | |
| 300 | public: |
| 301 | static bool classof(const Fact *F) { |
| 302 | return F->getKind() == Kind::ReturnOfOrigin; |
| 303 | } |
| 304 | |
| 305 | ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} |
| 306 | OriginID getReturnedOriginID() const { return OID; } |
| 307 | void dump(llvm::raw_ostream &OS) const override { |
| 308 | OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n" ; |
| 309 | } |
| 310 | }; |
| 311 | |
| 312 | class FactManager { |
| 313 | public: |
| 314 | llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { |
| 315 | auto It = BlockToFactsMap.find(Val: B); |
| 316 | if (It != BlockToFactsMap.end()) |
| 317 | return It->second; |
| 318 | return {}; |
| 319 | } |
| 320 | |
| 321 | void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { |
| 322 | if (!NewFacts.empty()) |
| 323 | BlockToFactsMap[B].assign(in_start: NewFacts.begin(), in_end: NewFacts.end()); |
| 324 | } |
| 325 | |
| 326 | template <typename FactType, typename... Args> |
| 327 | FactType *createFact(Args &&...args) { |
| 328 | void *Mem = FactAllocator.Allocate<FactType>(); |
| 329 | return new (Mem) FactType(std::forward<Args>(args)...); |
| 330 | } |
| 331 | |
| 332 | void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { |
| 333 | llvm::dbgs() << "==========================================\n" ; |
| 334 | llvm::dbgs() << " Lifetime Analysis Facts:\n" ; |
| 335 | llvm::dbgs() << "==========================================\n" ; |
| 336 | if (const Decl *D = AC.getDecl()) |
| 337 | if (const auto *ND = dyn_cast<NamedDecl>(Val: D)) |
| 338 | llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n" ; |
| 339 | // Print blocks in the order as they appear in code for a stable ordering. |
| 340 | for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { |
| 341 | llvm::dbgs() << " Block B" << B->getBlockID() << ":\n" ; |
| 342 | auto It = BlockToFactsMap.find(Val: B); |
| 343 | if (It != BlockToFactsMap.end()) { |
| 344 | for (const Fact *F : It->second) { |
| 345 | llvm::dbgs() << " " ; |
| 346 | F->dump(OS&: llvm::dbgs()); |
| 347 | } |
| 348 | } |
| 349 | llvm::dbgs() << " End of Block\n" ; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | LoanManager &getLoanMgr() { return LoanMgr; } |
| 354 | OriginManager &getOriginMgr() { return OriginMgr; } |
| 355 | |
| 356 | private: |
| 357 | LoanManager LoanMgr; |
| 358 | OriginManager OriginMgr; |
| 359 | llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> |
| 360 | BlockToFactsMap; |
| 361 | llvm::BumpPtrAllocator FactAllocator; |
| 362 | }; |
| 363 | |
| 364 | class FactGenerator : public ConstStmtVisitor<FactGenerator> { |
| 365 | |
| 366 | public: |
| 367 | FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) |
| 368 | : FactMgr(FactMgr), AC(AC) {} |
| 369 | |
| 370 | void run() { |
| 371 | llvm::TimeTraceScope TimeProfile("FactGenerator" ); |
| 372 | // Iterate through the CFG blocks in reverse post-order to ensure that |
| 373 | // initializations and destructions are processed in the correct sequence. |
| 374 | for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { |
| 375 | CurrentBlockFacts.clear(); |
| 376 | for (unsigned I = 0; I < Block->size(); ++I) { |
| 377 | const CFGElement &Element = Block->Elements[I]; |
| 378 | if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) |
| 379 | Visit(S: CS->getStmt()); |
| 380 | else if (std::optional<CFGAutomaticObjDtor> DtorOpt = |
| 381 | Element.getAs<CFGAutomaticObjDtor>()) |
| 382 | handleDestructor(DtorOpt: *DtorOpt); |
| 383 | } |
| 384 | FactMgr.addBlockFacts(B: Block, NewFacts: CurrentBlockFacts); |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | void VisitDeclStmt(const DeclStmt *DS) { |
| 389 | for (const Decl *D : DS->decls()) |
| 390 | if (const auto *VD = dyn_cast<VarDecl>(Val: D)) |
| 391 | if (hasOrigin(QT: VD->getType())) |
| 392 | if (const Expr *InitExpr = VD->getInit()) |
| 393 | addAssignOriginFact(D: *VD, S: *InitExpr); |
| 394 | } |
| 395 | |
| 396 | void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { |
| 397 | /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized |
| 398 | /// pointers can use the same type of loan. |
| 399 | FactMgr.getOriginMgr().getOrCreate(E: *N); |
| 400 | } |
| 401 | |
| 402 | void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { |
| 403 | if (!hasOrigin(QT: ICE->getType())) |
| 404 | return; |
| 405 | Visit(S: ICE->getSubExpr()); |
| 406 | // An ImplicitCastExpr node itself gets an origin, which flows from the |
| 407 | // origin of its sub-expression (after stripping its own parens/casts). |
| 408 | // TODO: Consider if this is actually useful in practice. Alternatively, we |
| 409 | // could directly use the sub-expression's OriginID instead of creating a |
| 410 | // new one. |
| 411 | addAssignOriginFact(D: *ICE, S: *ICE->getSubExpr()); |
| 412 | } |
| 413 | |
| 414 | void VisitUnaryOperator(const UnaryOperator *UO) { |
| 415 | if (UO->getOpcode() == UO_AddrOf) { |
| 416 | const Expr *SubExpr = UO->getSubExpr(); |
| 417 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SubExpr)) { |
| 418 | if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) { |
| 419 | // Check if it's a local variable. |
| 420 | if (VD->hasLocalStorage()) { |
| 421 | OriginID OID = FactMgr.getOriginMgr().getOrCreate(E: *UO); |
| 422 | AccessPath AddrOfLocalVarPath(VD); |
| 423 | const Loan &L = FactMgr.getLoanMgr().addLoan(Path: AddrOfLocalVarPath, |
| 424 | Loc: UO->getOperatorLoc()); |
| 425 | CurrentBlockFacts.push_back( |
| 426 | Elt: FactMgr.createFact<IssueFact>(args: L.ID, args&: OID)); |
| 427 | } |
| 428 | } |
| 429 | } |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | void VisitReturnStmt(const ReturnStmt *RS) { |
| 434 | if (const Expr *RetExpr = RS->getRetValue()) { |
| 435 | if (hasOrigin(QT: RetExpr->getType())) { |
| 436 | OriginID OID = FactMgr.getOriginMgr().getOrCreate(E: *RetExpr); |
| 437 | CurrentBlockFacts.push_back( |
| 438 | Elt: FactMgr.createFact<ReturnOfOriginFact>(args&: OID)); |
| 439 | } |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | void VisitBinaryOperator(const BinaryOperator *BO) { |
| 444 | if (BO->isAssignmentOp()) { |
| 445 | const Expr *LHSExpr = BO->getLHS(); |
| 446 | const Expr *RHSExpr = BO->getRHS(); |
| 447 | |
| 448 | // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` |
| 449 | // LHS must be a pointer/reference type that can be an origin. |
| 450 | // RHS must also represent an origin (either another pointer/ref or an |
| 451 | // address-of). |
| 452 | if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(Val: LHSExpr)) |
| 453 | if (const auto *VD_LHS = |
| 454 | dyn_cast<ValueDecl>(Val: DRE_LHS->getDecl()->getCanonicalDecl()); |
| 455 | VD_LHS && hasOrigin(QT: VD_LHS->getType())) |
| 456 | addAssignOriginFact(D: *VD_LHS, S: *RHSExpr); |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | private: |
| 461 | // Check if a type has an origin. |
| 462 | bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } |
| 463 | |
| 464 | template <typename Destination, typename Source> |
| 465 | void addAssignOriginFact(const Destination &D, const Source &S) { |
| 466 | OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); |
| 467 | OriginID SrcOID = FactMgr.getOriginMgr().get(S); |
| 468 | CurrentBlockFacts.push_back( |
| 469 | Elt: FactMgr.createFact<AssignOriginFact>(args&: DestOID, args&: SrcOID)); |
| 470 | } |
| 471 | |
| 472 | void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { |
| 473 | /// TODO: Also handle trivial destructors (e.g., for `int` |
| 474 | /// variables) which will never have a CFGAutomaticObjDtor node. |
| 475 | /// TODO: Handle loans to temporaries. |
| 476 | /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the |
| 477 | /// lifetime ends. |
| 478 | const VarDecl *DestructedVD = DtorOpt.getVarDecl(); |
| 479 | if (!DestructedVD) |
| 480 | return; |
| 481 | // Iterate through all loans to see if any expire. |
| 482 | /// TODO(opt): Do better than a linear search to find loans associated with |
| 483 | /// 'DestructedVD'. |
| 484 | for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { |
| 485 | const AccessPath &LoanPath = L.Path; |
| 486 | // Check if the loan is for a stack variable and if that variable |
| 487 | // is the one being destructed. |
| 488 | if (LoanPath.D == DestructedVD) |
| 489 | CurrentBlockFacts.push_back(Elt: FactMgr.createFact<ExpireFact>(args: L.ID)); |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | FactManager &FactMgr; |
| 494 | AnalysisDeclContext &AC; |
| 495 | llvm::SmallVector<Fact *> CurrentBlockFacts; |
| 496 | }; |
| 497 | |
| 498 | // ========================================================================= // |
| 499 | // The Dataflow Lattice |
| 500 | // ========================================================================= // |
| 501 | |
| 502 | // Using LLVM's immutable collections is efficient for dataflow analysis |
| 503 | // as it avoids deep copies during state transitions. |
| 504 | // TODO(opt): Consider using a bitset to represent the set of loans. |
| 505 | using LoanSet = llvm::ImmutableSet<LoanID>; |
| 506 | using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; |
| 507 | |
| 508 | /// An object to hold the factories for immutable collections, ensuring |
| 509 | /// that all created states share the same underlying memory management. |
| 510 | struct LifetimeFactory { |
| 511 | OriginLoanMap::Factory OriginMapFactory; |
| 512 | LoanSet::Factory LoanSetFact; |
| 513 | |
| 514 | /// Creates a singleton set containing only the given loan ID. |
| 515 | LoanSet createLoanSet(LoanID LID) { |
| 516 | return LoanSetFact.add(Old: LoanSetFact.getEmptySet(), V: LID); |
| 517 | } |
| 518 | }; |
| 519 | |
| 520 | /// LifetimeLattice represents the state of our analysis at a given program |
| 521 | /// point. It is an immutable object, and all operations produce a new |
| 522 | /// instance rather than modifying the existing one. |
| 523 | struct LifetimeLattice { |
| 524 | /// The map from an origin to the set of loans it contains. |
| 525 | /// The lattice has a finite height: An origin's loan set is bounded by the |
| 526 | /// total number of loans in the function. |
| 527 | /// TODO(opt): To reduce the lattice size, propagate origins of declarations, |
| 528 | /// not expressions, because expressions are not visible across blocks. |
| 529 | OriginLoanMap Origins = OriginLoanMap(nullptr); |
| 530 | |
| 531 | explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} |
| 532 | LifetimeLattice() = default; |
| 533 | |
| 534 | bool operator==(const LifetimeLattice &Other) const { |
| 535 | return Origins == Other.Origins; |
| 536 | } |
| 537 | bool operator!=(const LifetimeLattice &Other) const { |
| 538 | return !(*this == Other); |
| 539 | } |
| 540 | |
| 541 | LoanSet getLoans(OriginID OID) const { |
| 542 | if (auto *Loans = Origins.lookup(K: OID)) |
| 543 | return *Loans; |
| 544 | return LoanSet(nullptr); |
| 545 | } |
| 546 | |
| 547 | /// Computes the union of two lattices by performing a key-wise join of |
| 548 | /// their OriginLoanMaps. |
| 549 | // TODO(opt): This key-wise join is a performance bottleneck. A more |
| 550 | // efficient merge could be implemented using a Patricia Trie or HAMT |
| 551 | // instead of the current AVL-tree-based ImmutableMap. |
| 552 | // TODO(opt): Keep the state small by removing origins which become dead. |
| 553 | LifetimeLattice join(const LifetimeLattice &Other, |
| 554 | LifetimeFactory &Factory) const { |
| 555 | /// Merge the smaller map into the larger one ensuring we iterate over the |
| 556 | /// smaller map. |
| 557 | if (Origins.getHeight() < Other.Origins.getHeight()) |
| 558 | return Other.join(Other: *this, Factory); |
| 559 | |
| 560 | OriginLoanMap JoinedState = Origins; |
| 561 | // For each origin in the other map, union its loan set with ours. |
| 562 | for (const auto &Entry : Other.Origins) { |
| 563 | OriginID OID = Entry.first; |
| 564 | LoanSet OtherLoanSet = Entry.second; |
| 565 | JoinedState = Factory.OriginMapFactory.add( |
| 566 | Old: JoinedState, K: OID, D: join(a: getLoans(OID), b: OtherLoanSet, Factory)); |
| 567 | } |
| 568 | return LifetimeLattice(JoinedState); |
| 569 | } |
| 570 | |
| 571 | LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { |
| 572 | /// Merge the smaller set into the larger one ensuring we iterate over the |
| 573 | /// smaller set. |
| 574 | if (a.getHeight() < b.getHeight()) |
| 575 | std::swap(a&: a, b&: b); |
| 576 | LoanSet Result = a; |
| 577 | for (LoanID LID : b) { |
| 578 | /// TODO(opt): Profiling shows that this loop is a major performance |
| 579 | /// bottleneck. Investigate using a BitVector to represent the set of |
| 580 | /// loans for improved join performance. |
| 581 | Result = Factory.LoanSetFact.add(Old: Result, V: LID); |
| 582 | } |
| 583 | return Result; |
| 584 | } |
| 585 | |
| 586 | void dump(llvm::raw_ostream &OS) const { |
| 587 | OS << "LifetimeLattice State:\n" ; |
| 588 | if (Origins.isEmpty()) |
| 589 | OS << " <empty>\n" ; |
| 590 | for (const auto &Entry : Origins) { |
| 591 | if (Entry.second.isEmpty()) |
| 592 | OS << " Origin " << Entry.first << " contains no loans\n" ; |
| 593 | for (const LoanID &LID : Entry.second) |
| 594 | OS << " Origin " << Entry.first << " contains Loan " << LID << "\n" ; |
| 595 | } |
| 596 | } |
| 597 | }; |
| 598 | |
| 599 | // ========================================================================= // |
| 600 | // The Transfer Function |
| 601 | // ========================================================================= // |
| 602 | class Transferer { |
| 603 | FactManager &AllFacts; |
| 604 | LifetimeFactory &Factory; |
| 605 | |
| 606 | public: |
| 607 | explicit Transferer(FactManager &F, LifetimeFactory &Factory) |
| 608 | : AllFacts(F), Factory(Factory) {} |
| 609 | |
| 610 | /// Computes the exit state of a block by applying all its facts sequentially |
| 611 | /// to a given entry state. |
| 612 | /// TODO: We might need to store intermediate states per-fact in the block for |
| 613 | /// later analysis. |
| 614 | LifetimeLattice transferBlock(const CFGBlock *Block, |
| 615 | LifetimeLattice EntryState) { |
| 616 | LifetimeLattice BlockState = EntryState; |
| 617 | llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(B: Block); |
| 618 | |
| 619 | for (const Fact *F : Facts) { |
| 620 | BlockState = transferFact(In: BlockState, F); |
| 621 | } |
| 622 | return BlockState; |
| 623 | } |
| 624 | |
| 625 | private: |
| 626 | LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) { |
| 627 | switch (F->getKind()) { |
| 628 | case Fact::Kind::Issue: |
| 629 | return transfer(In, F: *F->getAs<IssueFact>()); |
| 630 | case Fact::Kind::AssignOrigin: |
| 631 | return transfer(InState: In, F: *F->getAs<AssignOriginFact>()); |
| 632 | // Expire and ReturnOfOrigin facts don't modify the Origins and the State. |
| 633 | case Fact::Kind::Expire: |
| 634 | case Fact::Kind::ReturnOfOrigin: |
| 635 | return In; |
| 636 | } |
| 637 | llvm_unreachable("Unknown fact kind" ); |
| 638 | } |
| 639 | |
| 640 | /// A new loan is issued to the origin. Old loans are erased. |
| 641 | LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) { |
| 642 | OriginID OID = F.getOriginID(); |
| 643 | LoanID LID = F.getLoanID(); |
| 644 | return LifetimeLattice(Factory.OriginMapFactory.add( |
| 645 | Old: In.Origins, K: OID, D: Factory.createLoanSet(LID))); |
| 646 | } |
| 647 | |
| 648 | /// The destination origin's loan set is replaced by the source's. |
| 649 | /// This implicitly "resets" the old loans of the destination. |
| 650 | LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) { |
| 651 | OriginID DestOID = F.getDestOriginID(); |
| 652 | OriginID SrcOID = F.getSrcOriginID(); |
| 653 | LoanSet SrcLoans = InState.getLoans(OID: SrcOID); |
| 654 | return LifetimeLattice( |
| 655 | Factory.OriginMapFactory.add(Old: InState.Origins, K: DestOID, D: SrcLoans)); |
| 656 | } |
| 657 | }; |
| 658 | |
| 659 | // ========================================================================= // |
| 660 | // Dataflow analysis |
| 661 | // ========================================================================= // |
| 662 | |
| 663 | /// Drives the intra-procedural dataflow analysis. |
| 664 | /// |
| 665 | /// Orchestrates the analysis by iterating over the CFG using a worklist |
| 666 | /// algorithm. It computes a fixed point by propagating the LifetimeLattice |
| 667 | /// state through each block until the state no longer changes. |
| 668 | /// TODO: Maybe use the dataflow framework! The framework might need changes |
| 669 | /// to support the current comparison done at block-entry. |
| 670 | class LifetimeDataflow { |
| 671 | const CFG &Cfg; |
| 672 | AnalysisDeclContext &AC; |
| 673 | LifetimeFactory LifetimeFact; |
| 674 | |
| 675 | Transferer Xfer; |
| 676 | |
| 677 | /// Stores the merged analysis state at the entry of each CFG block. |
| 678 | llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates; |
| 679 | /// Stores the analysis state at the exit of each CFG block, after the |
| 680 | /// transfer function has been applied. |
| 681 | llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates; |
| 682 | |
| 683 | public: |
| 684 | LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) |
| 685 | : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {} |
| 686 | |
| 687 | void run() { |
| 688 | llvm::TimeTraceScope TimeProfile("Lifetime Dataflow" ); |
| 689 | ForwardDataflowWorklist Worklist(Cfg, AC); |
| 690 | const CFGBlock *Entry = &Cfg.getEntry(); |
| 691 | BlockEntryStates[Entry] = LifetimeLattice{}; |
| 692 | Worklist.enqueueBlock(Block: Entry); |
| 693 | while (const CFGBlock *B = Worklist.dequeue()) { |
| 694 | LifetimeLattice EntryState = getEntryState(B); |
| 695 | LifetimeLattice ExitState = Xfer.transferBlock(Block: B, EntryState); |
| 696 | BlockExitStates[B] = ExitState; |
| 697 | |
| 698 | for (const CFGBlock *Successor : B->succs()) { |
| 699 | auto SuccIt = BlockEntryStates.find(Val: Successor); |
| 700 | LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) |
| 701 | ? SuccIt->second |
| 702 | : LifetimeLattice{}; |
| 703 | LifetimeLattice NewSuccEntryState = |
| 704 | OldSuccEntryState.join(Other: ExitState, Factory&: LifetimeFact); |
| 705 | // Enqueue the successor if its entry state has changed. |
| 706 | // TODO(opt): Consider changing 'join' to report a change if != |
| 707 | // comparison is found expensive. |
| 708 | if (SuccIt == BlockEntryStates.end() || |
| 709 | NewSuccEntryState != OldSuccEntryState) { |
| 710 | BlockEntryStates[Successor] = NewSuccEntryState; |
| 711 | Worklist.enqueueBlock(Block: Successor); |
| 712 | } |
| 713 | } |
| 714 | } |
| 715 | } |
| 716 | |
| 717 | void dump() const { |
| 718 | llvm::dbgs() << "==========================================\n" ; |
| 719 | llvm::dbgs() << " Dataflow results:\n" ; |
| 720 | llvm::dbgs() << "==========================================\n" ; |
| 721 | const CFGBlock &B = Cfg.getExit(); |
| 722 | getExitState(B: &B).dump(OS&: llvm::dbgs()); |
| 723 | } |
| 724 | |
| 725 | LifetimeLattice getEntryState(const CFGBlock *B) const { |
| 726 | return BlockEntryStates.lookup(Val: B); |
| 727 | } |
| 728 | |
| 729 | LifetimeLattice getExitState(const CFGBlock *B) const { |
| 730 | return BlockExitStates.lookup(Val: B); |
| 731 | } |
| 732 | }; |
| 733 | |
| 734 | // ========================================================================= // |
| 735 | // TODO: Analysing dataflow results and error reporting. |
| 736 | // ========================================================================= // |
| 737 | } // anonymous namespace |
| 738 | |
| 739 | void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, |
| 740 | AnalysisDeclContext &AC) { |
| 741 | llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis" ); |
| 742 | DEBUG_WITH_TYPE("PrintCFG" , Cfg.dump(AC.getASTContext().getLangOpts(), |
| 743 | /*ShowColors=*/true)); |
| 744 | FactManager FactMgr; |
| 745 | FactGenerator FactGen(FactMgr, AC); |
| 746 | FactGen.run(); |
| 747 | DEBUG_WITH_TYPE("LifetimeFacts" , FactMgr.dump(Cfg, AC)); |
| 748 | |
| 749 | /// TODO(opt): Consider optimizing individual blocks before running the |
| 750 | /// dataflow analysis. |
| 751 | /// 1. Expression Origins: These are assigned once and read at most once, |
| 752 | /// forming simple chains. These chains can be compressed into a single |
| 753 | /// assignment. |
| 754 | /// 2. Block-Local Loans: Origins of expressions are never read by other |
| 755 | /// blocks; only Decls are visible. Therefore, loans in a block that |
| 756 | /// never reach an Origin associated with a Decl can be safely dropped by |
| 757 | /// the analysis. |
| 758 | LifetimeDataflow Dataflow(Cfg, FactMgr, AC); |
| 759 | Dataflow.run(); |
| 760 | DEBUG_WITH_TYPE("LifetimeDataflow" , Dataflow.dump()); |
| 761 | } |
| 762 | } // namespace clang |
| 763 | |