1 | //===- StackArrays.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
10 | #include "flang/Optimizer/Builder/LowLevelIntrinsics.h" |
11 | #include "flang/Optimizer/Dialect/FIRAttr.h" |
12 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
13 | #include "flang/Optimizer/Dialect/FIROps.h" |
14 | #include "flang/Optimizer/Dialect/FIRType.h" |
15 | #include "flang/Optimizer/Dialect/Support/FIRContext.h" |
16 | #include "flang/Optimizer/Support/DataLayout.h" |
17 | #include "flang/Optimizer/Transforms/Passes.h" |
18 | #include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" |
19 | #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" |
20 | #include "mlir/Analysis/DataFlow/DenseAnalysis.h" |
21 | #include "mlir/Analysis/DataFlowFramework.h" |
22 | #include "mlir/Dialect/DLTI/DLTI.h" |
23 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
24 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
25 | #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
26 | #include "mlir/IR/Builders.h" |
27 | #include "mlir/IR/Diagnostics.h" |
28 | #include "mlir/IR/Value.h" |
29 | #include "mlir/Interfaces/LoopLikeInterface.h" |
30 | #include "mlir/Pass/Pass.h" |
31 | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
32 | #include "mlir/Transforms/Passes.h" |
33 | #include "llvm/ADT/DenseMap.h" |
34 | #include "llvm/ADT/DenseSet.h" |
35 | #include "llvm/ADT/PointerUnion.h" |
36 | #include "llvm/Support/Casting.h" |
37 | #include "llvm/Support/raw_ostream.h" |
38 | #include <optional> |
39 | |
40 | namespace fir { |
41 | #define GEN_PASS_DEF_STACKARRAYS |
42 | #include "flang/Optimizer/Transforms/Passes.h.inc" |
43 | } // namespace fir |
44 | |
45 | #define DEBUG_TYPE "stack-arrays" |
46 | |
47 | static llvm::cl::opt<std::size_t> maxAllocsPerFunc( |
48 | "stack-arrays-max-allocs" , |
49 | llvm::cl::desc("The maximum number of heap allocations to consider in one " |
50 | "function before skipping (to save compilation time). Set " |
51 | "to 0 for no limit." ), |
52 | llvm::cl::init(1000), llvm::cl::Hidden); |
53 | |
54 | static llvm::cl::opt<bool> emitLifetimeMarkers( |
55 | "stack-arrays-lifetime" , |
56 | llvm::cl::desc("Add lifetime markers to generated constant size allocas" ), |
57 | llvm::cl::init(false), llvm::cl::Hidden); |
58 | |
59 | namespace { |
60 | |
61 | /// The state of an SSA value at each program point |
62 | enum class AllocationState { |
63 | /// This means that the allocation state of a variable cannot be determined |
64 | /// at this program point, e.g. because one route through a conditional freed |
65 | /// the variable and the other route didn't. |
66 | /// This asserts a known-unknown: different from the unknown-unknown of having |
67 | /// no AllocationState stored for a particular SSA value |
68 | Unknown, |
69 | /// Means this SSA value was allocated on the heap in this function and has |
70 | /// now been freed |
71 | Freed, |
72 | /// Means this SSA value was allocated on the heap in this function and is a |
73 | /// candidate for moving to the stack |
74 | Allocated, |
75 | }; |
76 | |
77 | /// Stores where an alloca should be inserted. If the PointerUnion is an |
78 | /// Operation the alloca should be inserted /after/ the operation. If it is a |
79 | /// block, the alloca can be placed anywhere in that block. |
80 | class InsertionPoint { |
81 | llvm::PointerUnion<mlir::Operation *, mlir::Block *> location; |
82 | bool saveRestoreStack; |
83 | |
84 | /// Get contained pointer type or nullptr |
85 | template <class T> |
86 | T *tryGetPtr() const { |
87 | // Use llvm::dyn_cast_if_present because location may be null here. |
88 | if (T *ptr = llvm::dyn_cast_if_present<T *>(location)) |
89 | return ptr; |
90 | return nullptr; |
91 | } |
92 | |
93 | public: |
94 | template <class T> |
95 | InsertionPoint(T *ptr, bool saveRestoreStack = false) |
96 | : location(ptr), saveRestoreStack{saveRestoreStack} {} |
97 | InsertionPoint(std::nullptr_t null) |
98 | : location(null), saveRestoreStack{false} {} |
99 | |
100 | /// Get contained operation, or nullptr |
101 | mlir::Operation *tryGetOperation() const { |
102 | return tryGetPtr<mlir::Operation>(); |
103 | } |
104 | |
105 | /// Get contained block, or nullptr |
106 | mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); } |
107 | |
108 | /// Get whether the stack should be saved/restored. If yes, an llvm.stacksave |
109 | /// intrinsic should be added before the alloca, and an llvm.stackrestore |
110 | /// intrinsic should be added where the freemem is |
111 | bool shouldSaveRestoreStack() const { return saveRestoreStack; } |
112 | |
113 | operator bool() const { return tryGetOperation() || tryGetBlock(); } |
114 | |
115 | bool operator==(const InsertionPoint &rhs) const { |
116 | return (location == rhs.location) && |
117 | (saveRestoreStack == rhs.saveRestoreStack); |
118 | } |
119 | |
120 | bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); } |
121 | }; |
122 | |
123 | /// Maps SSA values to their AllocationState at a particular program point. |
124 | /// Also caches the insertion points for the new alloca operations |
125 | class LatticePoint : public mlir::dataflow::AbstractDenseLattice { |
126 | // Maps all values we are interested in to states |
127 | llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap; |
128 | |
129 | public: |
130 | MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint) |
131 | using AbstractDenseLattice::AbstractDenseLattice; |
132 | |
133 | bool operator==(const LatticePoint &rhs) const { |
134 | return stateMap == rhs.stateMap; |
135 | } |
136 | |
137 | /// Join the lattice accross control-flow edges |
138 | mlir::ChangeResult join(const AbstractDenseLattice &lattice) override; |
139 | |
140 | void print(llvm::raw_ostream &os) const override; |
141 | |
142 | /// Clear all modifications |
143 | mlir::ChangeResult reset(); |
144 | |
145 | /// Set the state of an SSA value |
146 | mlir::ChangeResult set(mlir::Value value, AllocationState state); |
147 | |
148 | /// Get fir.allocmem ops which were allocated in this function and always |
149 | /// freed before the function returns, plus whre to insert replacement |
150 | /// fir.alloca ops |
151 | void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const; |
152 | |
153 | std::optional<AllocationState> get(mlir::Value val) const; |
154 | }; |
155 | |
156 | class AllocationAnalysis |
157 | : public mlir::dataflow::DenseForwardDataFlowAnalysis<LatticePoint> { |
158 | public: |
159 | using DenseForwardDataFlowAnalysis::DenseForwardDataFlowAnalysis; |
160 | |
161 | mlir::LogicalResult visitOperation(mlir::Operation *op, |
162 | const LatticePoint &before, |
163 | LatticePoint *after) override; |
164 | |
165 | /// At an entry point, the last modifications of all memory resources are |
166 | /// yet to be determined |
167 | void setToEntryState(LatticePoint *lattice) override; |
168 | |
169 | protected: |
170 | /// Visit control flow operations and decide whether to call visitOperation |
171 | /// to apply the transfer function |
172 | mlir::LogicalResult processOperation(mlir::Operation *op) override; |
173 | }; |
174 | |
175 | /// Drives analysis to find candidate fir.allocmem operations which could be |
176 | /// moved to the stack. Intended to be used with mlir::Pass::getAnalysis |
177 | class StackArraysAnalysisWrapper { |
178 | public: |
179 | MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper) |
180 | |
181 | // Maps fir.allocmem -> place to insert alloca |
182 | using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>; |
183 | |
184 | StackArraysAnalysisWrapper(mlir::Operation *op) {} |
185 | |
186 | // returns nullptr if analysis failed |
187 | const AllocMemMap *getCandidateOps(mlir::Operation *func); |
188 | |
189 | private: |
190 | llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps; |
191 | |
192 | llvm::LogicalResult analyseFunction(mlir::Operation *func); |
193 | }; |
194 | |
195 | /// Converts a fir.allocmem to a fir.alloca |
196 | class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> { |
197 | public: |
198 | explicit AllocMemConversion( |
199 | mlir::MLIRContext *ctx, |
200 | const StackArraysAnalysisWrapper::AllocMemMap &candidateOps, |
201 | std::optional<mlir::DataLayout> &dl, |
202 | std::optional<fir::KindMapping> &kindMap) |
203 | : OpRewritePattern(ctx), candidateOps{candidateOps}, dl{dl}, |
204 | kindMap{kindMap} {} |
205 | |
206 | llvm::LogicalResult |
207 | matchAndRewrite(fir::AllocMemOp allocmem, |
208 | mlir::PatternRewriter &rewriter) const override; |
209 | |
210 | /// Determine where to insert the alloca operation. The returned value should |
211 | /// be checked to see if it is inside a loop |
212 | static InsertionPoint |
213 | findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc, |
214 | const llvm::SmallVector<mlir::Operation *> &freeOps); |
215 | |
216 | private: |
217 | /// Handle to the DFA (already run) |
218 | const StackArraysAnalysisWrapper::AllocMemMap &candidateOps; |
219 | |
220 | const std::optional<mlir::DataLayout> &dl; |
221 | const std::optional<fir::KindMapping> &kindMap; |
222 | |
223 | /// If we failed to find an insertion point not inside a loop, see if it would |
224 | /// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop |
225 | static InsertionPoint findAllocaLoopInsertionPoint( |
226 | fir::AllocMemOp &oldAlloc, |
227 | const llvm::SmallVector<mlir::Operation *> &freeOps); |
228 | |
229 | /// Returns the alloca if it was successfully inserted, otherwise {} |
230 | std::optional<fir::AllocaOp> |
231 | insertAlloca(fir::AllocMemOp &oldAlloc, |
232 | mlir::PatternRewriter &rewriter) const; |
233 | |
234 | /// Inserts a stacksave before oldAlloc and a stackrestore after each freemem |
235 | void insertStackSaveRestore(fir::AllocMemOp oldAlloc, |
236 | mlir::PatternRewriter &rewriter) const; |
237 | /// Emit lifetime markers for newAlloc between oldAlloc and each freemem. |
238 | /// If the allocation is dynamic, no life markers are emitted. |
239 | void insertLifetimeMarkers(fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc, |
240 | mlir::PatternRewriter &rewriter) const; |
241 | }; |
242 | |
243 | class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> { |
244 | public: |
245 | StackArraysPass() = default; |
246 | StackArraysPass(const StackArraysPass &pass); |
247 | |
248 | llvm::StringRef getDescription() const override; |
249 | |
250 | void runOnOperation() override; |
251 | |
252 | private: |
253 | Statistic runCount{this, "stackArraysRunCount" , |
254 | "Number of heap allocations moved to the stack" }; |
255 | }; |
256 | |
257 | } // namespace |
258 | |
259 | static void print(llvm::raw_ostream &os, AllocationState state) { |
260 | switch (state) { |
261 | case AllocationState::Unknown: |
262 | os << "Unknown" ; |
263 | break; |
264 | case AllocationState::Freed: |
265 | os << "Freed" ; |
266 | break; |
267 | case AllocationState::Allocated: |
268 | os << "Allocated" ; |
269 | break; |
270 | } |
271 | } |
272 | |
273 | /// Join two AllocationStates for the same value coming from different CFG |
274 | /// blocks |
275 | static AllocationState join(AllocationState lhs, AllocationState rhs) { |
276 | // | Allocated | Freed | Unknown |
277 | // ========= | ========= | ========= | ========= |
278 | // Allocated | Allocated | Unknown | Unknown |
279 | // Freed | Unknown | Freed | Unknown |
280 | // Unknown | Unknown | Unknown | Unknown |
281 | if (lhs == rhs) |
282 | return lhs; |
283 | return AllocationState::Unknown; |
284 | } |
285 | |
286 | mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) { |
287 | const auto &rhs = static_cast<const LatticePoint &>(lattice); |
288 | mlir::ChangeResult changed = mlir::ChangeResult::NoChange; |
289 | |
290 | // add everything from rhs to map, handling cases where values are in both |
291 | for (const auto &[value, rhsState] : rhs.stateMap) { |
292 | auto it = stateMap.find(value); |
293 | if (it != stateMap.end()) { |
294 | // value is present in both maps |
295 | AllocationState myState = it->second; |
296 | AllocationState newState = ::join(myState, rhsState); |
297 | if (newState != myState) { |
298 | changed = mlir::ChangeResult::Change; |
299 | it->getSecond() = newState; |
300 | } |
301 | } else { |
302 | // value not present in current map: add it |
303 | stateMap.insert({value, rhsState}); |
304 | changed = mlir::ChangeResult::Change; |
305 | } |
306 | } |
307 | |
308 | return changed; |
309 | } |
310 | |
311 | void LatticePoint::print(llvm::raw_ostream &os) const { |
312 | for (const auto &[value, state] : stateMap) { |
313 | os << "\n * " << value << ": " ; |
314 | ::print(os, state); |
315 | } |
316 | } |
317 | |
318 | mlir::ChangeResult LatticePoint::reset() { |
319 | if (stateMap.empty()) |
320 | return mlir::ChangeResult::NoChange; |
321 | stateMap.clear(); |
322 | return mlir::ChangeResult::Change; |
323 | } |
324 | |
325 | mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) { |
326 | if (stateMap.count(value)) { |
327 | // already in map |
328 | AllocationState &oldState = stateMap[value]; |
329 | if (oldState != state) { |
330 | stateMap[value] = state; |
331 | return mlir::ChangeResult::Change; |
332 | } |
333 | return mlir::ChangeResult::NoChange; |
334 | } |
335 | stateMap.insert({value, state}); |
336 | return mlir::ChangeResult::Change; |
337 | } |
338 | |
339 | /// Get values which were allocated in this function and always freed before |
340 | /// the function returns |
341 | void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const { |
342 | for (auto &[value, state] : stateMap) { |
343 | if (state == AllocationState::Freed) |
344 | out.insert(value); |
345 | } |
346 | } |
347 | |
348 | std::optional<AllocationState> LatticePoint::get(mlir::Value val) const { |
349 | auto it = stateMap.find(val); |
350 | if (it == stateMap.end()) |
351 | return {}; |
352 | return it->second; |
353 | } |
354 | |
355 | static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) { |
356 | while (mlir::Operation *op = value.getDefiningOp()) { |
357 | if (auto declareOp = llvm::dyn_cast<fir::DeclareOp>(op)) |
358 | value = declareOp.getMemref(); |
359 | else if (auto convertOp = llvm::dyn_cast<fir::ConvertOp>(op)) |
360 | value = convertOp->getOperand(0); |
361 | else |
362 | return value; |
363 | } |
364 | return value; |
365 | } |
366 | |
367 | mlir::LogicalResult AllocationAnalysis::visitOperation( |
368 | mlir::Operation *op, const LatticePoint &before, LatticePoint *after) { |
369 | LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op |
370 | << "\n" ); |
371 | LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n" ); |
372 | |
373 | // propagate before -> after |
374 | mlir::ChangeResult changed = after->join(before); |
375 | |
376 | if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) { |
377 | assert(op->getNumResults() == 1 && "fir.allocmem has one result" ); |
378 | auto attr = op->getAttrOfType<fir::MustBeHeapAttr>( |
379 | fir::MustBeHeapAttr::getAttrName()); |
380 | if (attr && attr.getValue()) { |
381 | LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n" ); |
382 | // skip allocation marked not to be moved |
383 | return mlir::success(); |
384 | } |
385 | |
386 | auto retTy = allocmem.getAllocatedType(); |
387 | if (!mlir::isa<fir::SequenceType>(retTy)) { |
388 | LLVM_DEBUG(llvm::dbgs() |
389 | << "--Allocation is not for an array: skipping\n" ); |
390 | return mlir::success(); |
391 | } |
392 | |
393 | mlir::Value result = op->getResult(0); |
394 | changed |= after->set(result, AllocationState::Allocated); |
395 | } else if (mlir::isa<fir::FreeMemOp>(op)) { |
396 | assert(op->getNumOperands() == 1 && "fir.freemem has one operand" ); |
397 | mlir::Value operand = op->getOperand(0); |
398 | |
399 | // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir |
400 | // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look |
401 | // past converts in case the pointer was changed between different pointer |
402 | // types. |
403 | operand = lookThroughDeclaresAndConverts(operand); |
404 | |
405 | std::optional<AllocationState> operandState = before.get(operand); |
406 | if (operandState && *operandState == AllocationState::Allocated) { |
407 | // don't tag things not allocated in this function as freed, so that we |
408 | // don't think they are candidates for moving to the stack |
409 | changed |= after->set(operand, AllocationState::Freed); |
410 | } |
411 | } else if (mlir::isa<fir::ResultOp>(op)) { |
412 | mlir::Operation *parent = op->getParentOp(); |
413 | LatticePoint *parentLattice = getLattice(getProgramPointAfter(parent)); |
414 | assert(parentLattice); |
415 | mlir::ChangeResult parentChanged = parentLattice->join(*after); |
416 | propagateIfChanged(parentLattice, parentChanged); |
417 | } |
418 | |
419 | // we pass lattices straight through fir.call because called functions should |
420 | // not deallocate flang-generated array temporaries |
421 | |
422 | LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n" ); |
423 | propagateIfChanged(after, changed); |
424 | return mlir::success(); |
425 | } |
426 | |
427 | void AllocationAnalysis::setToEntryState(LatticePoint *lattice) { |
428 | propagateIfChanged(lattice, lattice->reset()); |
429 | } |
430 | |
431 | /// Mostly a copy of AbstractDenseLattice::processOperation - the difference |
432 | /// being that call operations are passed through to the transfer function |
433 | mlir::LogicalResult AllocationAnalysis::processOperation(mlir::Operation *op) { |
434 | mlir::ProgramPoint *point = getProgramPointAfter(op); |
435 | // If the containing block is not executable, bail out. |
436 | if (op->getBlock() != nullptr && |
437 | !getOrCreateFor<mlir::dataflow::Executable>( |
438 | point, getProgramPointBefore(op->getBlock())) |
439 | ->isLive()) |
440 | return mlir::success(); |
441 | |
442 | // Get the dense lattice to update |
443 | mlir::dataflow::AbstractDenseLattice *after = getLattice(point); |
444 | |
445 | // If this op implements region control-flow, then control-flow dictates its |
446 | // transfer function. |
447 | if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op)) { |
448 | visitRegionBranchOperation(point, branch, after); |
449 | return mlir::success(); |
450 | } |
451 | |
452 | // pass call operations through to the transfer function |
453 | |
454 | // Get the dense state before the execution of the op. |
455 | const mlir::dataflow::AbstractDenseLattice *before = |
456 | getLatticeFor(point, getProgramPointBefore(op)); |
457 | |
458 | /// Invoke the operation transfer function |
459 | return visitOperationImpl(op, *before, after); |
460 | } |
461 | |
462 | llvm::LogicalResult |
463 | StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) { |
464 | assert(mlir::isa<mlir::func::FuncOp>(func)); |
465 | size_t nAllocs = 0; |
466 | func->walk([&nAllocs](fir::AllocMemOp) { nAllocs++; }); |
467 | // don't bother with the analysis if there are no heap allocations |
468 | if (nAllocs == 0) |
469 | return mlir::success(); |
470 | if ((maxAllocsPerFunc != 0) && (nAllocs > maxAllocsPerFunc)) { |
471 | LLVM_DEBUG(llvm::dbgs() << "Skipping stack arrays for function with " |
472 | << nAllocs << " heap allocations" ); |
473 | return mlir::success(); |
474 | } |
475 | |
476 | mlir::DataFlowSolver solver; |
477 | // constant propagation is required for dead code analysis, dead code analysis |
478 | // is required to mark blocks live (required for mlir dense dfa) |
479 | solver.load<mlir::dataflow::SparseConstantPropagation>(); |
480 | solver.load<mlir::dataflow::DeadCodeAnalysis>(); |
481 | |
482 | auto [it, inserted] = funcMaps.try_emplace(func); |
483 | AllocMemMap &candidateOps = it->second; |
484 | |
485 | solver.load<AllocationAnalysis>(); |
486 | if (failed(solver.initializeAndRun(func))) { |
487 | llvm::errs() << "DataFlowSolver failed!" ; |
488 | return mlir::failure(); |
489 | } |
490 | |
491 | LatticePoint point{solver.getProgramPointAfter(func)}; |
492 | auto joinOperationLattice = [&](mlir::Operation *op) { |
493 | const LatticePoint *lattice = |
494 | solver.lookupState<LatticePoint>(solver.getProgramPointAfter(op)); |
495 | // there will be no lattice for an unreachable block |
496 | if (lattice) |
497 | (void)point.join(*lattice); |
498 | }; |
499 | |
500 | func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); }); |
501 | func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); }); |
502 | func->walk( |
503 | [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); }); |
504 | func->walk([&](mlir::omp::YieldOp child) { joinOperationLattice(child); }); |
505 | |
506 | llvm::DenseSet<mlir::Value> freedValues; |
507 | point.appendFreedValues(freedValues); |
508 | |
509 | // Find all fir.freemem operations corresponding to fir.allocmem |
510 | // in freedValues. It is best to find the association going back |
511 | // from fir.freemem to fir.allocmem through the def-use chains, |
512 | // so that we can use lookThroughDeclaresAndConverts same way |
513 | // the AllocationAnalysis is handling them. |
514 | llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>> |
515 | allocToFreeMemMap; |
516 | func->walk([&](fir::FreeMemOp freeOp) { |
517 | mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref()); |
518 | if (!freedValues.count(memref)) |
519 | return; |
520 | |
521 | auto allocMem = memref.getDefiningOp<fir::AllocMemOp>(); |
522 | allocToFreeMemMap[allocMem].push_back(freeOp); |
523 | }); |
524 | |
525 | // We only replace allocations which are definately freed on all routes |
526 | // through the function because otherwise the allocation may have an intende |
527 | // lifetime longer than the current stack frame (e.g. a heap allocation which |
528 | // is then freed by another function). |
529 | for (mlir::Value freedValue : freedValues) { |
530 | fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>(); |
531 | InsertionPoint insertionPoint = |
532 | AllocMemConversion::findAllocaInsertionPoint( |
533 | allocmem, allocToFreeMemMap[allocmem]); |
534 | if (insertionPoint) |
535 | candidateOps.insert({allocmem, insertionPoint}); |
536 | } |
537 | |
538 | LLVM_DEBUG(for (auto [allocMemOp, _] |
539 | : candidateOps) { |
540 | llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n'; |
541 | }); |
542 | return mlir::success(); |
543 | } |
544 | |
545 | const StackArraysAnalysisWrapper::AllocMemMap * |
546 | StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) { |
547 | if (!funcMaps.contains(func)) |
548 | if (mlir::failed(analyseFunction(func))) |
549 | return nullptr; |
550 | return &funcMaps[func]; |
551 | } |
552 | |
553 | /// Restore the old allocation type exected by existing code |
554 | static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter, |
555 | const mlir::Location &loc, |
556 | mlir::Value heap, mlir::Value stack) { |
557 | mlir::Type heapTy = heap.getType(); |
558 | mlir::Type stackTy = stack.getType(); |
559 | |
560 | if (heapTy == stackTy) |
561 | return stack; |
562 | |
563 | fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy); |
564 | LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy = |
565 | mlir::cast<fir::ReferenceType>(stackTy); |
566 | assert(firHeapTy.getElementType() == firRefTy.getElementType() && |
567 | "Allocations must have the same type" ); |
568 | |
569 | auto insertionPoint = rewriter.saveInsertionPoint(); |
570 | rewriter.setInsertionPointAfter(stack.getDefiningOp()); |
571 | mlir::Value conv = |
572 | rewriter.create<fir::ConvertOp>(loc, firHeapTy, stack).getResult(); |
573 | rewriter.restoreInsertionPoint(insertionPoint); |
574 | return conv; |
575 | } |
576 | |
577 | llvm::LogicalResult |
578 | AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, |
579 | mlir::PatternRewriter &rewriter) const { |
580 | auto oldInsertionPt = rewriter.saveInsertionPoint(); |
581 | // add alloca operation |
582 | std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter); |
583 | rewriter.restoreInsertionPoint(oldInsertionPt); |
584 | if (!alloca) |
585 | return mlir::failure(); |
586 | |
587 | // remove freemem operations |
588 | llvm::SmallVector<mlir::Operation *> erases; |
589 | mlir::Operation *parent = allocmem->getParentOp(); |
590 | // TODO: this shouldn't need to be re-calculated for every allocmem |
591 | parent->walk([&](fir::FreeMemOp freeOp) { |
592 | if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem) |
593 | erases.push_back(freeOp); |
594 | }); |
595 | |
596 | // now we are done iterating the users, it is safe to mutate them |
597 | for (mlir::Operation *erase : erases) |
598 | rewriter.eraseOp(erase); |
599 | |
600 | // replace references to heap allocation with references to stack allocation |
601 | mlir::Value newValue = convertAllocationType( |
602 | rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult()); |
603 | rewriter.replaceAllUsesWith(allocmem.getResult(), newValue); |
604 | |
605 | // remove allocmem operation |
606 | rewriter.eraseOp(allocmem.getOperation()); |
607 | |
608 | return mlir::success(); |
609 | } |
610 | |
611 | static bool isInLoop(mlir::Block *block) { |
612 | return mlir::LoopLikeOpInterface::blockIsInLoop(block); |
613 | } |
614 | |
615 | static bool isInLoop(mlir::Operation *op) { |
616 | return isInLoop(op->getBlock()) || |
617 | op->getParentOfType<mlir::LoopLikeOpInterface>(); |
618 | } |
619 | |
620 | InsertionPoint AllocMemConversion::findAllocaInsertionPoint( |
621 | fir::AllocMemOp &oldAlloc, |
622 | const llvm::SmallVector<mlir::Operation *> &freeOps) { |
623 | // Ideally the alloca should be inserted at the end of the function entry |
624 | // block so that we do not allocate stack space in a loop. However, |
625 | // the operands to the alloca may not be available that early, so insert it |
626 | // after the last operand becomes available |
627 | // If the old allocmem op was in an openmp region then it should not be moved |
628 | // outside of that |
629 | LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: " |
630 | << oldAlloc << "\n" ); |
631 | |
632 | // check that an Operation or Block we are about to return is not in a loop |
633 | auto checkReturn = [&](auto *point) -> InsertionPoint { |
634 | if (isInLoop(point)) { |
635 | mlir::Operation *oldAllocOp = oldAlloc.getOperation(); |
636 | if (isInLoop(oldAllocOp)) { |
637 | // where we want to put it is in a loop, and even the old location is in |
638 | // a loop. Give up. |
639 | return findAllocaLoopInsertionPoint(oldAlloc, freeOps); |
640 | } |
641 | return {oldAllocOp}; |
642 | } |
643 | return {point}; |
644 | }; |
645 | |
646 | auto oldOmpRegion = |
647 | oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
648 | |
649 | // Find when the last operand value becomes available |
650 | mlir::Block *operandsBlock = nullptr; |
651 | mlir::Operation *lastOperand = nullptr; |
652 | for (mlir::Value operand : oldAlloc.getOperands()) { |
653 | LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n" ); |
654 | mlir::Operation *op = operand.getDefiningOp(); |
655 | if (!op) |
656 | return checkReturn(oldAlloc.getOperation()); |
657 | if (!operandsBlock) |
658 | operandsBlock = op->getBlock(); |
659 | else if (operandsBlock != op->getBlock()) { |
660 | LLVM_DEBUG(llvm::dbgs() |
661 | << "----operand declared in a different block!\n" ); |
662 | // Operation::isBeforeInBlock requires the operations to be in the same |
663 | // block. The best we can do is the location of the allocmem. |
664 | return checkReturn(oldAlloc.getOperation()); |
665 | } |
666 | if (!lastOperand || lastOperand->isBeforeInBlock(op)) |
667 | lastOperand = op; |
668 | } |
669 | |
670 | if (lastOperand) { |
671 | // there were value operands to the allocmem so insert after the last one |
672 | LLVM_DEBUG(llvm::dbgs() |
673 | << "--Placing after last operand: " << *lastOperand << "\n" ); |
674 | // check we aren't moving out of an omp region |
675 | auto lastOpOmpRegion = |
676 | lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
677 | if (lastOpOmpRegion == oldOmpRegion) |
678 | return checkReturn(lastOperand); |
679 | // Presumably this happened because the operands became ready before the |
680 | // start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should |
681 | // imply that oldOmpRegion comes after lastOpOmpRegion. |
682 | return checkReturn(oldOmpRegion.getAllocaBlock()); |
683 | } |
684 | |
685 | // There were no value operands to the allocmem so we are safe to insert it |
686 | // as early as we want |
687 | |
688 | // handle openmp case |
689 | if (oldOmpRegion) |
690 | return checkReturn(oldOmpRegion.getAllocaBlock()); |
691 | |
692 | // fall back to the function entry block |
693 | mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>(); |
694 | assert(func && "This analysis is run on func.func" ); |
695 | mlir::Block &entryBlock = func.getBlocks().front(); |
696 | LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n" ); |
697 | return checkReturn(&entryBlock); |
698 | } |
699 | |
700 | InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint( |
701 | fir::AllocMemOp &oldAlloc, |
702 | const llvm::SmallVector<mlir::Operation *> &freeOps) { |
703 | mlir::Operation *oldAllocOp = oldAlloc; |
704 | // This is only called as a last resort. We should try to insert at the |
705 | // location of the old allocation, which is inside of a loop, using |
706 | // llvm.stacksave/llvm.stackrestore |
707 | |
708 | assert(freeOps.size() && "DFA should only return freed memory" ); |
709 | |
710 | // Don't attempt to reason about a stacksave/stackrestore between different |
711 | // blocks |
712 | for (mlir::Operation *free : freeOps) |
713 | if (free->getBlock() != oldAllocOp->getBlock()) |
714 | return {nullptr}; |
715 | |
716 | // Check that there aren't any other stack allocations in between the |
717 | // stack save and stack restore |
718 | // note: for flang generated temporaries there should only be one free op |
719 | for (mlir::Operation *free : freeOps) { |
720 | for (mlir::Operation *op = oldAlloc; op && op != free; |
721 | op = op->getNextNode()) { |
722 | if (mlir::isa<fir::AllocaOp>(op)) |
723 | return {nullptr}; |
724 | } |
725 | } |
726 | |
727 | return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true}; |
728 | } |
729 | |
730 | std::optional<fir::AllocaOp> |
731 | AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc, |
732 | mlir::PatternRewriter &rewriter) const { |
733 | auto it = candidateOps.find(oldAlloc.getOperation()); |
734 | if (it == candidateOps.end()) |
735 | return {}; |
736 | InsertionPoint insertionPoint = it->second; |
737 | if (!insertionPoint) |
738 | return {}; |
739 | |
740 | if (insertionPoint.shouldSaveRestoreStack()) |
741 | insertStackSaveRestore(oldAlloc, rewriter); |
742 | |
743 | mlir::Location loc = oldAlloc.getLoc(); |
744 | mlir::Type varTy = oldAlloc.getInType(); |
745 | if (mlir::Operation *op = insertionPoint.tryGetOperation()) { |
746 | rewriter.setInsertionPointAfter(op); |
747 | } else { |
748 | mlir::Block *block = insertionPoint.tryGetBlock(); |
749 | assert(block && "There must be a valid insertion point" ); |
750 | rewriter.setInsertionPointToStart(block); |
751 | } |
752 | |
753 | auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef { |
754 | if (opt) |
755 | return *opt; |
756 | return {}; |
757 | }; |
758 | |
759 | llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName()); |
760 | llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName()); |
761 | auto alloca = rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName, |
762 | oldAlloc.getTypeparams(), |
763 | oldAlloc.getShape()); |
764 | if (emitLifetimeMarkers) |
765 | insertLifetimeMarkers(oldAlloc, alloca, rewriter); |
766 | |
767 | return alloca; |
768 | } |
769 | |
770 | static void |
771 | visitFreeMemOp(fir::AllocMemOp oldAlloc, |
772 | const std::function<void(mlir::Operation *)> &callBack) { |
773 | for (mlir::Operation *user : oldAlloc->getUsers()) { |
774 | if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) { |
775 | for (mlir::Operation *user : declareOp->getUsers()) { |
776 | if (mlir::isa<fir::FreeMemOp>(user)) |
777 | callBack(user); |
778 | } |
779 | } |
780 | |
781 | if (mlir::isa<fir::FreeMemOp>(user)) |
782 | callBack(user); |
783 | } |
784 | } |
785 | |
786 | void AllocMemConversion::insertStackSaveRestore( |
787 | fir::AllocMemOp oldAlloc, mlir::PatternRewriter &rewriter) const { |
788 | mlir::OpBuilder::InsertionGuard insertGuard(rewriter); |
789 | auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>(); |
790 | fir::FirOpBuilder builder{rewriter, mod}; |
791 | |
792 | builder.setInsertionPoint(oldAlloc); |
793 | mlir::Value sp = builder.genStackSave(oldAlloc.getLoc()); |
794 | |
795 | auto createStackRestoreCall = [&](mlir::Operation *user) { |
796 | builder.setInsertionPoint(user); |
797 | builder.genStackRestore(user->getLoc(), sp); |
798 | }; |
799 | visitFreeMemOp(oldAlloc, createStackRestoreCall); |
800 | } |
801 | |
802 | void AllocMemConversion::insertLifetimeMarkers( |
803 | fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc, |
804 | mlir::PatternRewriter &rewriter) const { |
805 | if (!dl || !kindMap) |
806 | return; |
807 | llvm::StringRef attrName = fir::getHasLifetimeMarkerAttrName(); |
808 | // Do not add lifetime markers if the alloca already has any. |
809 | if (newAlloc->hasAttr(attrName)) |
810 | return; |
811 | if (std::optional<int64_t> size = |
812 | fir::getAllocaByteSize(newAlloc, *dl, *kindMap)) { |
813 | mlir::OpBuilder::InsertionGuard insertGuard(rewriter); |
814 | rewriter.setInsertionPoint(oldAlloc); |
815 | mlir::Value ptr = fir::factory::genLifetimeStart( |
816 | rewriter, newAlloc.getLoc(), newAlloc, *size, &*dl); |
817 | visitFreeMemOp(oldAlloc, [&](mlir::Operation *op) { |
818 | rewriter.setInsertionPoint(op); |
819 | fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr, *size); |
820 | }); |
821 | newAlloc->setAttr(attrName, rewriter.getUnitAttr()); |
822 | } |
823 | } |
824 | |
825 | StackArraysPass::StackArraysPass(const StackArraysPass &pass) |
826 | : fir::impl::StackArraysBase<StackArraysPass>(pass) {} |
827 | |
828 | llvm::StringRef StackArraysPass::getDescription() const { |
829 | return "Move heap allocated array temporaries to the stack" ; |
830 | } |
831 | |
832 | void StackArraysPass::runOnOperation() { |
833 | mlir::func::FuncOp func = getOperation(); |
834 | |
835 | auto &analysis = getAnalysis<StackArraysAnalysisWrapper>(); |
836 | const StackArraysAnalysisWrapper::AllocMemMap *candidateOps = |
837 | analysis.getCandidateOps(func); |
838 | if (!candidateOps) { |
839 | signalPassFailure(); |
840 | return; |
841 | } |
842 | |
843 | if (candidateOps->empty()) |
844 | return; |
845 | runCount += candidateOps->size(); |
846 | |
847 | llvm::SmallVector<mlir::Operation *> opsToConvert; |
848 | opsToConvert.reserve(candidateOps->size()); |
849 | for (auto [op, _] : *candidateOps) |
850 | opsToConvert.push_back(op); |
851 | |
852 | mlir::MLIRContext &context = getContext(); |
853 | mlir::RewritePatternSet patterns(&context); |
854 | mlir::GreedyRewriteConfig config; |
855 | // prevent the pattern driver form merging blocks |
856 | config.setRegionSimplificationLevel( |
857 | mlir::GreedySimplifyRegionLevel::Disabled); |
858 | |
859 | auto module = func->getParentOfType<mlir::ModuleOp>(); |
860 | std::optional<mlir::DataLayout> dl = |
861 | module ? fir::support::getOrSetMLIRDataLayout( |
862 | module, /*allowDefaultLayout=*/false) |
863 | : std::nullopt; |
864 | std::optional<fir::KindMapping> kindMap; |
865 | if (module) |
866 | kindMap = fir::getKindMapping(module); |
867 | |
868 | patterns.insert<AllocMemConversion>(&context, *candidateOps, dl, kindMap); |
869 | if (mlir::failed(mlir::applyOpPatternsGreedily( |
870 | opsToConvert, std::move(patterns), config))) { |
871 | mlir::emitError(func->getLoc(), "error in stack arrays optimization\n" ); |
872 | signalPassFailure(); |
873 | } |
874 | } |
875 | |