| 1 | //===- StackArrays.cpp ----------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
| 10 | #include "flang/Optimizer/Builder/LowLevelIntrinsics.h" |
| 11 | #include "flang/Optimizer/Dialect/FIRAttr.h" |
| 12 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
| 13 | #include "flang/Optimizer/Dialect/FIROps.h" |
| 14 | #include "flang/Optimizer/Dialect/FIRType.h" |
| 15 | #include "flang/Optimizer/Dialect/Support/FIRContext.h" |
| 16 | #include "flang/Optimizer/Support/DataLayout.h" |
| 17 | #include "flang/Optimizer/Transforms/Passes.h" |
| 18 | #include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" |
| 19 | #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" |
| 20 | #include "mlir/Analysis/DataFlow/DenseAnalysis.h" |
| 21 | #include "mlir/Analysis/DataFlowFramework.h" |
| 22 | #include "mlir/Dialect/DLTI/DLTI.h" |
| 23 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
| 24 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| 25 | #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| 26 | #include "mlir/IR/Builders.h" |
| 27 | #include "mlir/IR/Diagnostics.h" |
| 28 | #include "mlir/IR/Value.h" |
| 29 | #include "mlir/Interfaces/LoopLikeInterface.h" |
| 30 | #include "mlir/Pass/Pass.h" |
| 31 | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| 32 | #include "mlir/Transforms/Passes.h" |
| 33 | #include "llvm/ADT/DenseMap.h" |
| 34 | #include "llvm/ADT/DenseSet.h" |
| 35 | #include "llvm/ADT/PointerUnion.h" |
| 36 | #include "llvm/Support/Casting.h" |
| 37 | #include "llvm/Support/raw_ostream.h" |
| 38 | #include <optional> |
| 39 | |
| 40 | namespace fir { |
| 41 | #define GEN_PASS_DEF_STACKARRAYS |
| 42 | #include "flang/Optimizer/Transforms/Passes.h.inc" |
| 43 | } // namespace fir |
| 44 | |
| 45 | #define DEBUG_TYPE "stack-arrays" |
| 46 | |
| 47 | static llvm::cl::opt<std::size_t> maxAllocsPerFunc( |
| 48 | "stack-arrays-max-allocs" , |
| 49 | llvm::cl::desc("The maximum number of heap allocations to consider in one " |
| 50 | "function before skipping (to save compilation time). Set " |
| 51 | "to 0 for no limit." ), |
| 52 | llvm::cl::init(1000), llvm::cl::Hidden); |
| 53 | |
| 54 | static llvm::cl::opt<bool> emitLifetimeMarkers( |
| 55 | "stack-arrays-lifetime" , |
| 56 | llvm::cl::desc("Add lifetime markers to generated constant size allocas" ), |
| 57 | llvm::cl::init(false), llvm::cl::Hidden); |
| 58 | |
| 59 | namespace { |
| 60 | |
| 61 | /// The state of an SSA value at each program point |
| 62 | enum class AllocationState { |
| 63 | /// This means that the allocation state of a variable cannot be determined |
| 64 | /// at this program point, e.g. because one route through a conditional freed |
| 65 | /// the variable and the other route didn't. |
| 66 | /// This asserts a known-unknown: different from the unknown-unknown of having |
| 67 | /// no AllocationState stored for a particular SSA value |
| 68 | Unknown, |
| 69 | /// Means this SSA value was allocated on the heap in this function and has |
| 70 | /// now been freed |
| 71 | Freed, |
| 72 | /// Means this SSA value was allocated on the heap in this function and is a |
| 73 | /// candidate for moving to the stack |
| 74 | Allocated, |
| 75 | }; |
| 76 | |
| 77 | /// Stores where an alloca should be inserted. If the PointerUnion is an |
| 78 | /// Operation the alloca should be inserted /after/ the operation. If it is a |
| 79 | /// block, the alloca can be placed anywhere in that block. |
| 80 | class InsertionPoint { |
| 81 | llvm::PointerUnion<mlir::Operation *, mlir::Block *> location; |
| 82 | bool saveRestoreStack; |
| 83 | |
| 84 | /// Get contained pointer type or nullptr |
| 85 | template <class T> |
| 86 | T *tryGetPtr() const { |
| 87 | // Use llvm::dyn_cast_if_present because location may be null here. |
| 88 | if (T *ptr = llvm::dyn_cast_if_present<T *>(location)) |
| 89 | return ptr; |
| 90 | return nullptr; |
| 91 | } |
| 92 | |
| 93 | public: |
| 94 | template <class T> |
| 95 | InsertionPoint(T *ptr, bool saveRestoreStack = false) |
| 96 | : location(ptr), saveRestoreStack{saveRestoreStack} {} |
| 97 | InsertionPoint(std::nullptr_t null) |
| 98 | : location(null), saveRestoreStack{false} {} |
| 99 | |
| 100 | /// Get contained operation, or nullptr |
| 101 | mlir::Operation *tryGetOperation() const { |
| 102 | return tryGetPtr<mlir::Operation>(); |
| 103 | } |
| 104 | |
| 105 | /// Get contained block, or nullptr |
| 106 | mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); } |
| 107 | |
| 108 | /// Get whether the stack should be saved/restored. If yes, an llvm.stacksave |
| 109 | /// intrinsic should be added before the alloca, and an llvm.stackrestore |
| 110 | /// intrinsic should be added where the freemem is |
| 111 | bool shouldSaveRestoreStack() const { return saveRestoreStack; } |
| 112 | |
| 113 | operator bool() const { return tryGetOperation() || tryGetBlock(); } |
| 114 | |
| 115 | bool operator==(const InsertionPoint &rhs) const { |
| 116 | return (location == rhs.location) && |
| 117 | (saveRestoreStack == rhs.saveRestoreStack); |
| 118 | } |
| 119 | |
| 120 | bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); } |
| 121 | }; |
| 122 | |
| 123 | /// Maps SSA values to their AllocationState at a particular program point. |
| 124 | /// Also caches the insertion points for the new alloca operations |
| 125 | class LatticePoint : public mlir::dataflow::AbstractDenseLattice { |
| 126 | // Maps all values we are interested in to states |
| 127 | llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap; |
| 128 | |
| 129 | public: |
| 130 | MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint) |
| 131 | using AbstractDenseLattice::AbstractDenseLattice; |
| 132 | |
| 133 | bool operator==(const LatticePoint &rhs) const { |
| 134 | return stateMap == rhs.stateMap; |
| 135 | } |
| 136 | |
| 137 | /// Join the lattice accross control-flow edges |
| 138 | mlir::ChangeResult join(const AbstractDenseLattice &lattice) override; |
| 139 | |
| 140 | void print(llvm::raw_ostream &os) const override; |
| 141 | |
| 142 | /// Clear all modifications |
| 143 | mlir::ChangeResult reset(); |
| 144 | |
| 145 | /// Set the state of an SSA value |
| 146 | mlir::ChangeResult set(mlir::Value value, AllocationState state); |
| 147 | |
| 148 | /// Get fir.allocmem ops which were allocated in this function and always |
| 149 | /// freed before the function returns, plus whre to insert replacement |
| 150 | /// fir.alloca ops |
| 151 | void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const; |
| 152 | |
| 153 | std::optional<AllocationState> get(mlir::Value val) const; |
| 154 | }; |
| 155 | |
| 156 | class AllocationAnalysis |
| 157 | : public mlir::dataflow::DenseForwardDataFlowAnalysis<LatticePoint> { |
| 158 | public: |
| 159 | using DenseForwardDataFlowAnalysis::DenseForwardDataFlowAnalysis; |
| 160 | |
| 161 | mlir::LogicalResult visitOperation(mlir::Operation *op, |
| 162 | const LatticePoint &before, |
| 163 | LatticePoint *after) override; |
| 164 | |
| 165 | /// At an entry point, the last modifications of all memory resources are |
| 166 | /// yet to be determined |
| 167 | void setToEntryState(LatticePoint *lattice) override; |
| 168 | |
| 169 | protected: |
| 170 | /// Visit control flow operations and decide whether to call visitOperation |
| 171 | /// to apply the transfer function |
| 172 | mlir::LogicalResult processOperation(mlir::Operation *op) override; |
| 173 | }; |
| 174 | |
| 175 | /// Drives analysis to find candidate fir.allocmem operations which could be |
| 176 | /// moved to the stack. Intended to be used with mlir::Pass::getAnalysis |
| 177 | class StackArraysAnalysisWrapper { |
| 178 | public: |
| 179 | MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper) |
| 180 | |
| 181 | // Maps fir.allocmem -> place to insert alloca |
| 182 | using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>; |
| 183 | |
| 184 | StackArraysAnalysisWrapper(mlir::Operation *op) {} |
| 185 | |
| 186 | // returns nullptr if analysis failed |
| 187 | const AllocMemMap *getCandidateOps(mlir::Operation *func); |
| 188 | |
| 189 | private: |
| 190 | llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps; |
| 191 | |
| 192 | llvm::LogicalResult analyseFunction(mlir::Operation *func); |
| 193 | }; |
| 194 | |
| 195 | /// Converts a fir.allocmem to a fir.alloca |
| 196 | class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> { |
| 197 | public: |
| 198 | explicit AllocMemConversion( |
| 199 | mlir::MLIRContext *ctx, |
| 200 | const StackArraysAnalysisWrapper::AllocMemMap &candidateOps, |
| 201 | std::optional<mlir::DataLayout> &dl, |
| 202 | std::optional<fir::KindMapping> &kindMap) |
| 203 | : OpRewritePattern(ctx), candidateOps{candidateOps}, dl{dl}, |
| 204 | kindMap{kindMap} {} |
| 205 | |
| 206 | llvm::LogicalResult |
| 207 | matchAndRewrite(fir::AllocMemOp allocmem, |
| 208 | mlir::PatternRewriter &rewriter) const override; |
| 209 | |
| 210 | /// Determine where to insert the alloca operation. The returned value should |
| 211 | /// be checked to see if it is inside a loop |
| 212 | static InsertionPoint |
| 213 | findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc, |
| 214 | const llvm::SmallVector<mlir::Operation *> &freeOps); |
| 215 | |
| 216 | private: |
| 217 | /// Handle to the DFA (already run) |
| 218 | const StackArraysAnalysisWrapper::AllocMemMap &candidateOps; |
| 219 | |
| 220 | const std::optional<mlir::DataLayout> &dl; |
| 221 | const std::optional<fir::KindMapping> &kindMap; |
| 222 | |
| 223 | /// If we failed to find an insertion point not inside a loop, see if it would |
| 224 | /// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop |
| 225 | static InsertionPoint findAllocaLoopInsertionPoint( |
| 226 | fir::AllocMemOp &oldAlloc, |
| 227 | const llvm::SmallVector<mlir::Operation *> &freeOps); |
| 228 | |
| 229 | /// Returns the alloca if it was successfully inserted, otherwise {} |
| 230 | std::optional<fir::AllocaOp> |
| 231 | insertAlloca(fir::AllocMemOp &oldAlloc, |
| 232 | mlir::PatternRewriter &rewriter) const; |
| 233 | |
| 234 | /// Inserts a stacksave before oldAlloc and a stackrestore after each freemem |
| 235 | void insertStackSaveRestore(fir::AllocMemOp oldAlloc, |
| 236 | mlir::PatternRewriter &rewriter) const; |
| 237 | /// Emit lifetime markers for newAlloc between oldAlloc and each freemem. |
| 238 | /// If the allocation is dynamic, no life markers are emitted. |
| 239 | void insertLifetimeMarkers(fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc, |
| 240 | mlir::PatternRewriter &rewriter) const; |
| 241 | }; |
| 242 | |
| 243 | class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> { |
| 244 | public: |
| 245 | StackArraysPass() = default; |
| 246 | StackArraysPass(const StackArraysPass &pass); |
| 247 | |
| 248 | llvm::StringRef getDescription() const override; |
| 249 | |
| 250 | void runOnOperation() override; |
| 251 | |
| 252 | private: |
| 253 | Statistic runCount{this, "stackArraysRunCount" , |
| 254 | "Number of heap allocations moved to the stack" }; |
| 255 | }; |
| 256 | |
| 257 | } // namespace |
| 258 | |
| 259 | static void print(llvm::raw_ostream &os, AllocationState state) { |
| 260 | switch (state) { |
| 261 | case AllocationState::Unknown: |
| 262 | os << "Unknown" ; |
| 263 | break; |
| 264 | case AllocationState::Freed: |
| 265 | os << "Freed" ; |
| 266 | break; |
| 267 | case AllocationState::Allocated: |
| 268 | os << "Allocated" ; |
| 269 | break; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | /// Join two AllocationStates for the same value coming from different CFG |
| 274 | /// blocks |
| 275 | static AllocationState join(AllocationState lhs, AllocationState rhs) { |
| 276 | // | Allocated | Freed | Unknown |
| 277 | // ========= | ========= | ========= | ========= |
| 278 | // Allocated | Allocated | Unknown | Unknown |
| 279 | // Freed | Unknown | Freed | Unknown |
| 280 | // Unknown | Unknown | Unknown | Unknown |
| 281 | if (lhs == rhs) |
| 282 | return lhs; |
| 283 | return AllocationState::Unknown; |
| 284 | } |
| 285 | |
| 286 | mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) { |
| 287 | const auto &rhs = static_cast<const LatticePoint &>(lattice); |
| 288 | mlir::ChangeResult changed = mlir::ChangeResult::NoChange; |
| 289 | |
| 290 | // add everything from rhs to map, handling cases where values are in both |
| 291 | for (const auto &[value, rhsState] : rhs.stateMap) { |
| 292 | auto it = stateMap.find(value); |
| 293 | if (it != stateMap.end()) { |
| 294 | // value is present in both maps |
| 295 | AllocationState myState = it->second; |
| 296 | AllocationState newState = ::join(myState, rhsState); |
| 297 | if (newState != myState) { |
| 298 | changed = mlir::ChangeResult::Change; |
| 299 | it->getSecond() = newState; |
| 300 | } |
| 301 | } else { |
| 302 | // value not present in current map: add it |
| 303 | stateMap.insert({value, rhsState}); |
| 304 | changed = mlir::ChangeResult::Change; |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | return changed; |
| 309 | } |
| 310 | |
| 311 | void LatticePoint::print(llvm::raw_ostream &os) const { |
| 312 | for (const auto &[value, state] : stateMap) { |
| 313 | os << "\n * " << value << ": " ; |
| 314 | ::print(os, state); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | mlir::ChangeResult LatticePoint::reset() { |
| 319 | if (stateMap.empty()) |
| 320 | return mlir::ChangeResult::NoChange; |
| 321 | stateMap.clear(); |
| 322 | return mlir::ChangeResult::Change; |
| 323 | } |
| 324 | |
| 325 | mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) { |
| 326 | if (stateMap.count(value)) { |
| 327 | // already in map |
| 328 | AllocationState &oldState = stateMap[value]; |
| 329 | if (oldState != state) { |
| 330 | stateMap[value] = state; |
| 331 | return mlir::ChangeResult::Change; |
| 332 | } |
| 333 | return mlir::ChangeResult::NoChange; |
| 334 | } |
| 335 | stateMap.insert({value, state}); |
| 336 | return mlir::ChangeResult::Change; |
| 337 | } |
| 338 | |
| 339 | /// Get values which were allocated in this function and always freed before |
| 340 | /// the function returns |
| 341 | void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const { |
| 342 | for (auto &[value, state] : stateMap) { |
| 343 | if (state == AllocationState::Freed) |
| 344 | out.insert(value); |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | std::optional<AllocationState> LatticePoint::get(mlir::Value val) const { |
| 349 | auto it = stateMap.find(val); |
| 350 | if (it == stateMap.end()) |
| 351 | return {}; |
| 352 | return it->second; |
| 353 | } |
| 354 | |
| 355 | static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) { |
| 356 | while (mlir::Operation *op = value.getDefiningOp()) { |
| 357 | if (auto declareOp = llvm::dyn_cast<fir::DeclareOp>(op)) |
| 358 | value = declareOp.getMemref(); |
| 359 | else if (auto convertOp = llvm::dyn_cast<fir::ConvertOp>(op)) |
| 360 | value = convertOp->getOperand(0); |
| 361 | else |
| 362 | return value; |
| 363 | } |
| 364 | return value; |
| 365 | } |
| 366 | |
| 367 | mlir::LogicalResult AllocationAnalysis::visitOperation( |
| 368 | mlir::Operation *op, const LatticePoint &before, LatticePoint *after) { |
| 369 | LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op |
| 370 | << "\n" ); |
| 371 | LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n" ); |
| 372 | |
| 373 | // propagate before -> after |
| 374 | mlir::ChangeResult changed = after->join(before); |
| 375 | |
| 376 | if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) { |
| 377 | assert(op->getNumResults() == 1 && "fir.allocmem has one result" ); |
| 378 | auto attr = op->getAttrOfType<fir::MustBeHeapAttr>( |
| 379 | fir::MustBeHeapAttr::getAttrName()); |
| 380 | if (attr && attr.getValue()) { |
| 381 | LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n" ); |
| 382 | // skip allocation marked not to be moved |
| 383 | return mlir::success(); |
| 384 | } |
| 385 | |
| 386 | auto retTy = allocmem.getAllocatedType(); |
| 387 | if (!mlir::isa<fir::SequenceType>(retTy)) { |
| 388 | LLVM_DEBUG(llvm::dbgs() |
| 389 | << "--Allocation is not for an array: skipping\n" ); |
| 390 | return mlir::success(); |
| 391 | } |
| 392 | |
| 393 | mlir::Value result = op->getResult(0); |
| 394 | changed |= after->set(result, AllocationState::Allocated); |
| 395 | } else if (mlir::isa<fir::FreeMemOp>(op)) { |
| 396 | assert(op->getNumOperands() == 1 && "fir.freemem has one operand" ); |
| 397 | mlir::Value operand = op->getOperand(0); |
| 398 | |
| 399 | // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir |
| 400 | // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look |
| 401 | // past converts in case the pointer was changed between different pointer |
| 402 | // types. |
| 403 | operand = lookThroughDeclaresAndConverts(operand); |
| 404 | |
| 405 | std::optional<AllocationState> operandState = before.get(operand); |
| 406 | if (operandState && *operandState == AllocationState::Allocated) { |
| 407 | // don't tag things not allocated in this function as freed, so that we |
| 408 | // don't think they are candidates for moving to the stack |
| 409 | changed |= after->set(operand, AllocationState::Freed); |
| 410 | } |
| 411 | } else if (mlir::isa<fir::ResultOp>(op)) { |
| 412 | mlir::Operation *parent = op->getParentOp(); |
| 413 | LatticePoint *parentLattice = getLattice(getProgramPointAfter(parent)); |
| 414 | assert(parentLattice); |
| 415 | mlir::ChangeResult parentChanged = parentLattice->join(*after); |
| 416 | propagateIfChanged(parentLattice, parentChanged); |
| 417 | } |
| 418 | |
| 419 | // we pass lattices straight through fir.call because called functions should |
| 420 | // not deallocate flang-generated array temporaries |
| 421 | |
| 422 | LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n" ); |
| 423 | propagateIfChanged(after, changed); |
| 424 | return mlir::success(); |
| 425 | } |
| 426 | |
| 427 | void AllocationAnalysis::setToEntryState(LatticePoint *lattice) { |
| 428 | propagateIfChanged(lattice, lattice->reset()); |
| 429 | } |
| 430 | |
| 431 | /// Mostly a copy of AbstractDenseLattice::processOperation - the difference |
| 432 | /// being that call operations are passed through to the transfer function |
| 433 | mlir::LogicalResult AllocationAnalysis::processOperation(mlir::Operation *op) { |
| 434 | mlir::ProgramPoint *point = getProgramPointAfter(op); |
| 435 | // If the containing block is not executable, bail out. |
| 436 | if (op->getBlock() != nullptr && |
| 437 | !getOrCreateFor<mlir::dataflow::Executable>( |
| 438 | point, getProgramPointBefore(op->getBlock())) |
| 439 | ->isLive()) |
| 440 | return mlir::success(); |
| 441 | |
| 442 | // Get the dense lattice to update |
| 443 | mlir::dataflow::AbstractDenseLattice *after = getLattice(point); |
| 444 | |
| 445 | // If this op implements region control-flow, then control-flow dictates its |
| 446 | // transfer function. |
| 447 | if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op)) { |
| 448 | visitRegionBranchOperation(point, branch, after); |
| 449 | return mlir::success(); |
| 450 | } |
| 451 | |
| 452 | // pass call operations through to the transfer function |
| 453 | |
| 454 | // Get the dense state before the execution of the op. |
| 455 | const mlir::dataflow::AbstractDenseLattice *before = |
| 456 | getLatticeFor(point, getProgramPointBefore(op)); |
| 457 | |
| 458 | /// Invoke the operation transfer function |
| 459 | return visitOperationImpl(op, *before, after); |
| 460 | } |
| 461 | |
| 462 | llvm::LogicalResult |
| 463 | StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) { |
| 464 | assert(mlir::isa<mlir::func::FuncOp>(func)); |
| 465 | size_t nAllocs = 0; |
| 466 | func->walk([&nAllocs](fir::AllocMemOp) { nAllocs++; }); |
| 467 | // don't bother with the analysis if there are no heap allocations |
| 468 | if (nAllocs == 0) |
| 469 | return mlir::success(); |
| 470 | if ((maxAllocsPerFunc != 0) && (nAllocs > maxAllocsPerFunc)) { |
| 471 | LLVM_DEBUG(llvm::dbgs() << "Skipping stack arrays for function with " |
| 472 | << nAllocs << " heap allocations" ); |
| 473 | return mlir::success(); |
| 474 | } |
| 475 | |
| 476 | mlir::DataFlowSolver solver; |
| 477 | // constant propagation is required for dead code analysis, dead code analysis |
| 478 | // is required to mark blocks live (required for mlir dense dfa) |
| 479 | solver.load<mlir::dataflow::SparseConstantPropagation>(); |
| 480 | solver.load<mlir::dataflow::DeadCodeAnalysis>(); |
| 481 | |
| 482 | auto [it, inserted] = funcMaps.try_emplace(func); |
| 483 | AllocMemMap &candidateOps = it->second; |
| 484 | |
| 485 | solver.load<AllocationAnalysis>(); |
| 486 | if (failed(solver.initializeAndRun(func))) { |
| 487 | llvm::errs() << "DataFlowSolver failed!" ; |
| 488 | return mlir::failure(); |
| 489 | } |
| 490 | |
| 491 | LatticePoint point{solver.getProgramPointAfter(func)}; |
| 492 | auto joinOperationLattice = [&](mlir::Operation *op) { |
| 493 | const LatticePoint *lattice = |
| 494 | solver.lookupState<LatticePoint>(solver.getProgramPointAfter(op)); |
| 495 | // there will be no lattice for an unreachable block |
| 496 | if (lattice) |
| 497 | (void)point.join(*lattice); |
| 498 | }; |
| 499 | |
| 500 | func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); }); |
| 501 | func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); }); |
| 502 | func->walk( |
| 503 | [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); }); |
| 504 | func->walk([&](mlir::omp::YieldOp child) { joinOperationLattice(child); }); |
| 505 | |
| 506 | llvm::DenseSet<mlir::Value> freedValues; |
| 507 | point.appendFreedValues(freedValues); |
| 508 | |
| 509 | // Find all fir.freemem operations corresponding to fir.allocmem |
| 510 | // in freedValues. It is best to find the association going back |
| 511 | // from fir.freemem to fir.allocmem through the def-use chains, |
| 512 | // so that we can use lookThroughDeclaresAndConverts same way |
| 513 | // the AllocationAnalysis is handling them. |
| 514 | llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>> |
| 515 | allocToFreeMemMap; |
| 516 | func->walk([&](fir::FreeMemOp freeOp) { |
| 517 | mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref()); |
| 518 | if (!freedValues.count(memref)) |
| 519 | return; |
| 520 | |
| 521 | auto allocMem = memref.getDefiningOp<fir::AllocMemOp>(); |
| 522 | allocToFreeMemMap[allocMem].push_back(freeOp); |
| 523 | }); |
| 524 | |
| 525 | // We only replace allocations which are definately freed on all routes |
| 526 | // through the function because otherwise the allocation may have an intende |
| 527 | // lifetime longer than the current stack frame (e.g. a heap allocation which |
| 528 | // is then freed by another function). |
| 529 | for (mlir::Value freedValue : freedValues) { |
| 530 | fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>(); |
| 531 | InsertionPoint insertionPoint = |
| 532 | AllocMemConversion::findAllocaInsertionPoint( |
| 533 | allocmem, allocToFreeMemMap[allocmem]); |
| 534 | if (insertionPoint) |
| 535 | candidateOps.insert({allocmem, insertionPoint}); |
| 536 | } |
| 537 | |
| 538 | LLVM_DEBUG(for (auto [allocMemOp, _] |
| 539 | : candidateOps) { |
| 540 | llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n'; |
| 541 | }); |
| 542 | return mlir::success(); |
| 543 | } |
| 544 | |
| 545 | const StackArraysAnalysisWrapper::AllocMemMap * |
| 546 | StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) { |
| 547 | if (!funcMaps.contains(func)) |
| 548 | if (mlir::failed(analyseFunction(func))) |
| 549 | return nullptr; |
| 550 | return &funcMaps[func]; |
| 551 | } |
| 552 | |
| 553 | /// Restore the old allocation type exected by existing code |
| 554 | static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter, |
| 555 | const mlir::Location &loc, |
| 556 | mlir::Value heap, mlir::Value stack) { |
| 557 | mlir::Type heapTy = heap.getType(); |
| 558 | mlir::Type stackTy = stack.getType(); |
| 559 | |
| 560 | if (heapTy == stackTy) |
| 561 | return stack; |
| 562 | |
| 563 | fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy); |
| 564 | LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy = |
| 565 | mlir::cast<fir::ReferenceType>(stackTy); |
| 566 | assert(firHeapTy.getElementType() == firRefTy.getElementType() && |
| 567 | "Allocations must have the same type" ); |
| 568 | |
| 569 | auto insertionPoint = rewriter.saveInsertionPoint(); |
| 570 | rewriter.setInsertionPointAfter(stack.getDefiningOp()); |
| 571 | mlir::Value conv = |
| 572 | rewriter.create<fir::ConvertOp>(loc, firHeapTy, stack).getResult(); |
| 573 | rewriter.restoreInsertionPoint(insertionPoint); |
| 574 | return conv; |
| 575 | } |
| 576 | |
| 577 | llvm::LogicalResult |
| 578 | AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, |
| 579 | mlir::PatternRewriter &rewriter) const { |
| 580 | auto oldInsertionPt = rewriter.saveInsertionPoint(); |
| 581 | // add alloca operation |
| 582 | std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter); |
| 583 | rewriter.restoreInsertionPoint(oldInsertionPt); |
| 584 | if (!alloca) |
| 585 | return mlir::failure(); |
| 586 | |
| 587 | // remove freemem operations |
| 588 | llvm::SmallVector<mlir::Operation *> erases; |
| 589 | mlir::Operation *parent = allocmem->getParentOp(); |
| 590 | // TODO: this shouldn't need to be re-calculated for every allocmem |
| 591 | parent->walk([&](fir::FreeMemOp freeOp) { |
| 592 | if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem) |
| 593 | erases.push_back(freeOp); |
| 594 | }); |
| 595 | |
| 596 | // now we are done iterating the users, it is safe to mutate them |
| 597 | for (mlir::Operation *erase : erases) |
| 598 | rewriter.eraseOp(erase); |
| 599 | |
| 600 | // replace references to heap allocation with references to stack allocation |
| 601 | mlir::Value newValue = convertAllocationType( |
| 602 | rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult()); |
| 603 | rewriter.replaceAllUsesWith(allocmem.getResult(), newValue); |
| 604 | |
| 605 | // remove allocmem operation |
| 606 | rewriter.eraseOp(allocmem.getOperation()); |
| 607 | |
| 608 | return mlir::success(); |
| 609 | } |
| 610 | |
| 611 | static bool isInLoop(mlir::Block *block) { |
| 612 | return mlir::LoopLikeOpInterface::blockIsInLoop(block); |
| 613 | } |
| 614 | |
| 615 | static bool isInLoop(mlir::Operation *op) { |
| 616 | return isInLoop(op->getBlock()) || |
| 617 | op->getParentOfType<mlir::LoopLikeOpInterface>(); |
| 618 | } |
| 619 | |
| 620 | InsertionPoint AllocMemConversion::findAllocaInsertionPoint( |
| 621 | fir::AllocMemOp &oldAlloc, |
| 622 | const llvm::SmallVector<mlir::Operation *> &freeOps) { |
| 623 | // Ideally the alloca should be inserted at the end of the function entry |
| 624 | // block so that we do not allocate stack space in a loop. However, |
| 625 | // the operands to the alloca may not be available that early, so insert it |
| 626 | // after the last operand becomes available |
| 627 | // If the old allocmem op was in an openmp region then it should not be moved |
| 628 | // outside of that |
| 629 | LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: " |
| 630 | << oldAlloc << "\n" ); |
| 631 | |
| 632 | // check that an Operation or Block we are about to return is not in a loop |
| 633 | auto checkReturn = [&](auto *point) -> InsertionPoint { |
| 634 | if (isInLoop(point)) { |
| 635 | mlir::Operation *oldAllocOp = oldAlloc.getOperation(); |
| 636 | if (isInLoop(oldAllocOp)) { |
| 637 | // where we want to put it is in a loop, and even the old location is in |
| 638 | // a loop. Give up. |
| 639 | return findAllocaLoopInsertionPoint(oldAlloc, freeOps); |
| 640 | } |
| 641 | return {oldAllocOp}; |
| 642 | } |
| 643 | return {point}; |
| 644 | }; |
| 645 | |
| 646 | auto oldOmpRegion = |
| 647 | oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
| 648 | |
| 649 | // Find when the last operand value becomes available |
| 650 | mlir::Block *operandsBlock = nullptr; |
| 651 | mlir::Operation *lastOperand = nullptr; |
| 652 | for (mlir::Value operand : oldAlloc.getOperands()) { |
| 653 | LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n" ); |
| 654 | mlir::Operation *op = operand.getDefiningOp(); |
| 655 | if (!op) |
| 656 | return checkReturn(oldAlloc.getOperation()); |
| 657 | if (!operandsBlock) |
| 658 | operandsBlock = op->getBlock(); |
| 659 | else if (operandsBlock != op->getBlock()) { |
| 660 | LLVM_DEBUG(llvm::dbgs() |
| 661 | << "----operand declared in a different block!\n" ); |
| 662 | // Operation::isBeforeInBlock requires the operations to be in the same |
| 663 | // block. The best we can do is the location of the allocmem. |
| 664 | return checkReturn(oldAlloc.getOperation()); |
| 665 | } |
| 666 | if (!lastOperand || lastOperand->isBeforeInBlock(op)) |
| 667 | lastOperand = op; |
| 668 | } |
| 669 | |
| 670 | if (lastOperand) { |
| 671 | // there were value operands to the allocmem so insert after the last one |
| 672 | LLVM_DEBUG(llvm::dbgs() |
| 673 | << "--Placing after last operand: " << *lastOperand << "\n" ); |
| 674 | // check we aren't moving out of an omp region |
| 675 | auto lastOpOmpRegion = |
| 676 | lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
| 677 | if (lastOpOmpRegion == oldOmpRegion) |
| 678 | return checkReturn(lastOperand); |
| 679 | // Presumably this happened because the operands became ready before the |
| 680 | // start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should |
| 681 | // imply that oldOmpRegion comes after lastOpOmpRegion. |
| 682 | return checkReturn(oldOmpRegion.getAllocaBlock()); |
| 683 | } |
| 684 | |
| 685 | // There were no value operands to the allocmem so we are safe to insert it |
| 686 | // as early as we want |
| 687 | |
| 688 | // handle openmp case |
| 689 | if (oldOmpRegion) |
| 690 | return checkReturn(oldOmpRegion.getAllocaBlock()); |
| 691 | |
| 692 | // fall back to the function entry block |
| 693 | mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>(); |
| 694 | assert(func && "This analysis is run on func.func" ); |
| 695 | mlir::Block &entryBlock = func.getBlocks().front(); |
| 696 | LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n" ); |
| 697 | return checkReturn(&entryBlock); |
| 698 | } |
| 699 | |
| 700 | InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint( |
| 701 | fir::AllocMemOp &oldAlloc, |
| 702 | const llvm::SmallVector<mlir::Operation *> &freeOps) { |
| 703 | mlir::Operation *oldAllocOp = oldAlloc; |
| 704 | // This is only called as a last resort. We should try to insert at the |
| 705 | // location of the old allocation, which is inside of a loop, using |
| 706 | // llvm.stacksave/llvm.stackrestore |
| 707 | |
| 708 | assert(freeOps.size() && "DFA should only return freed memory" ); |
| 709 | |
| 710 | // Don't attempt to reason about a stacksave/stackrestore between different |
| 711 | // blocks |
| 712 | for (mlir::Operation *free : freeOps) |
| 713 | if (free->getBlock() != oldAllocOp->getBlock()) |
| 714 | return {nullptr}; |
| 715 | |
| 716 | // Check that there aren't any other stack allocations in between the |
| 717 | // stack save and stack restore |
| 718 | // note: for flang generated temporaries there should only be one free op |
| 719 | for (mlir::Operation *free : freeOps) { |
| 720 | for (mlir::Operation *op = oldAlloc; op && op != free; |
| 721 | op = op->getNextNode()) { |
| 722 | if (mlir::isa<fir::AllocaOp>(op)) |
| 723 | return {nullptr}; |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true}; |
| 728 | } |
| 729 | |
| 730 | std::optional<fir::AllocaOp> |
| 731 | AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc, |
| 732 | mlir::PatternRewriter &rewriter) const { |
| 733 | auto it = candidateOps.find(oldAlloc.getOperation()); |
| 734 | if (it == candidateOps.end()) |
| 735 | return {}; |
| 736 | InsertionPoint insertionPoint = it->second; |
| 737 | if (!insertionPoint) |
| 738 | return {}; |
| 739 | |
| 740 | if (insertionPoint.shouldSaveRestoreStack()) |
| 741 | insertStackSaveRestore(oldAlloc, rewriter); |
| 742 | |
| 743 | mlir::Location loc = oldAlloc.getLoc(); |
| 744 | mlir::Type varTy = oldAlloc.getInType(); |
| 745 | if (mlir::Operation *op = insertionPoint.tryGetOperation()) { |
| 746 | rewriter.setInsertionPointAfter(op); |
| 747 | } else { |
| 748 | mlir::Block *block = insertionPoint.tryGetBlock(); |
| 749 | assert(block && "There must be a valid insertion point" ); |
| 750 | rewriter.setInsertionPointToStart(block); |
| 751 | } |
| 752 | |
| 753 | auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef { |
| 754 | if (opt) |
| 755 | return *opt; |
| 756 | return {}; |
| 757 | }; |
| 758 | |
| 759 | llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName()); |
| 760 | llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName()); |
| 761 | auto alloca = rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName, |
| 762 | oldAlloc.getTypeparams(), |
| 763 | oldAlloc.getShape()); |
| 764 | if (emitLifetimeMarkers) |
| 765 | insertLifetimeMarkers(oldAlloc, alloca, rewriter); |
| 766 | |
| 767 | return alloca; |
| 768 | } |
| 769 | |
| 770 | static void |
| 771 | visitFreeMemOp(fir::AllocMemOp oldAlloc, |
| 772 | const std::function<void(mlir::Operation *)> &callBack) { |
| 773 | for (mlir::Operation *user : oldAlloc->getUsers()) { |
| 774 | if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) { |
| 775 | for (mlir::Operation *user : declareOp->getUsers()) { |
| 776 | if (mlir::isa<fir::FreeMemOp>(user)) |
| 777 | callBack(user); |
| 778 | } |
| 779 | } |
| 780 | |
| 781 | if (mlir::isa<fir::FreeMemOp>(user)) |
| 782 | callBack(user); |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | void AllocMemConversion::insertStackSaveRestore( |
| 787 | fir::AllocMemOp oldAlloc, mlir::PatternRewriter &rewriter) const { |
| 788 | mlir::OpBuilder::InsertionGuard insertGuard(rewriter); |
| 789 | auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>(); |
| 790 | fir::FirOpBuilder builder{rewriter, mod}; |
| 791 | |
| 792 | builder.setInsertionPoint(oldAlloc); |
| 793 | mlir::Value sp = builder.genStackSave(oldAlloc.getLoc()); |
| 794 | |
| 795 | auto createStackRestoreCall = [&](mlir::Operation *user) { |
| 796 | builder.setInsertionPoint(user); |
| 797 | builder.genStackRestore(user->getLoc(), sp); |
| 798 | }; |
| 799 | visitFreeMemOp(oldAlloc, createStackRestoreCall); |
| 800 | } |
| 801 | |
| 802 | void AllocMemConversion::insertLifetimeMarkers( |
| 803 | fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc, |
| 804 | mlir::PatternRewriter &rewriter) const { |
| 805 | if (!dl || !kindMap) |
| 806 | return; |
| 807 | llvm::StringRef attrName = fir::getHasLifetimeMarkerAttrName(); |
| 808 | // Do not add lifetime markers if the alloca already has any. |
| 809 | if (newAlloc->hasAttr(attrName)) |
| 810 | return; |
| 811 | if (std::optional<int64_t> size = |
| 812 | fir::getAllocaByteSize(newAlloc, *dl, *kindMap)) { |
| 813 | mlir::OpBuilder::InsertionGuard insertGuard(rewriter); |
| 814 | rewriter.setInsertionPoint(oldAlloc); |
| 815 | mlir::Value ptr = fir::factory::genLifetimeStart( |
| 816 | rewriter, newAlloc.getLoc(), newAlloc, *size, &*dl); |
| 817 | visitFreeMemOp(oldAlloc, [&](mlir::Operation *op) { |
| 818 | rewriter.setInsertionPoint(op); |
| 819 | fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr, *size); |
| 820 | }); |
| 821 | newAlloc->setAttr(attrName, rewriter.getUnitAttr()); |
| 822 | } |
| 823 | } |
| 824 | |
| 825 | StackArraysPass::StackArraysPass(const StackArraysPass &pass) |
| 826 | : fir::impl::StackArraysBase<StackArraysPass>(pass) {} |
| 827 | |
| 828 | llvm::StringRef StackArraysPass::getDescription() const { |
| 829 | return "Move heap allocated array temporaries to the stack" ; |
| 830 | } |
| 831 | |
| 832 | void StackArraysPass::runOnOperation() { |
| 833 | mlir::func::FuncOp func = getOperation(); |
| 834 | |
| 835 | auto &analysis = getAnalysis<StackArraysAnalysisWrapper>(); |
| 836 | const StackArraysAnalysisWrapper::AllocMemMap *candidateOps = |
| 837 | analysis.getCandidateOps(func); |
| 838 | if (!candidateOps) { |
| 839 | signalPassFailure(); |
| 840 | return; |
| 841 | } |
| 842 | |
| 843 | if (candidateOps->empty()) |
| 844 | return; |
| 845 | runCount += candidateOps->size(); |
| 846 | |
| 847 | llvm::SmallVector<mlir::Operation *> opsToConvert; |
| 848 | opsToConvert.reserve(candidateOps->size()); |
| 849 | for (auto [op, _] : *candidateOps) |
| 850 | opsToConvert.push_back(op); |
| 851 | |
| 852 | mlir::MLIRContext &context = getContext(); |
| 853 | mlir::RewritePatternSet patterns(&context); |
| 854 | mlir::GreedyRewriteConfig config; |
| 855 | // prevent the pattern driver form merging blocks |
| 856 | config.setRegionSimplificationLevel( |
| 857 | mlir::GreedySimplifyRegionLevel::Disabled); |
| 858 | |
| 859 | auto module = func->getParentOfType<mlir::ModuleOp>(); |
| 860 | std::optional<mlir::DataLayout> dl = |
| 861 | module ? fir::support::getOrSetMLIRDataLayout( |
| 862 | module, /*allowDefaultLayout=*/false) |
| 863 | : std::nullopt; |
| 864 | std::optional<fir::KindMapping> kindMap; |
| 865 | if (module) |
| 866 | kindMap = fir::getKindMapping(module); |
| 867 | |
| 868 | patterns.insert<AllocMemConversion>(&context, *candidateOps, dl, kindMap); |
| 869 | if (mlir::failed(mlir::applyOpPatternsGreedily( |
| 870 | opsToConvert, std::move(patterns), config))) { |
| 871 | mlir::emitError(func->getLoc(), "error in stack arrays optimization\n" ); |
| 872 | signalPassFailure(); |
| 873 | } |
| 874 | } |
| 875 | |