1//===- StackArrays.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Optimizer/Builder/FIRBuilder.h"
10#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
11#include "flang/Optimizer/Dialect/FIRAttr.h"
12#include "flang/Optimizer/Dialect/FIRDialect.h"
13#include "flang/Optimizer/Dialect/FIROps.h"
14#include "flang/Optimizer/Dialect/FIRType.h"
15#include "flang/Optimizer/Dialect/Support/FIRContext.h"
16#include "flang/Optimizer/Support/DataLayout.h"
17#include "flang/Optimizer/Transforms/Passes.h"
18#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
19#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
20#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
21#include "mlir/Analysis/DataFlowFramework.h"
22#include "mlir/Dialect/DLTI/DLTI.h"
23#include "mlir/Dialect/Func/IR/FuncOps.h"
24#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
25#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
26#include "mlir/IR/Builders.h"
27#include "mlir/IR/Diagnostics.h"
28#include "mlir/IR/Value.h"
29#include "mlir/Interfaces/LoopLikeInterface.h"
30#include "mlir/Pass/Pass.h"
31#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
32#include "mlir/Transforms/Passes.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/PointerUnion.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/raw_ostream.h"
38#include <optional>
39
40namespace fir {
41#define GEN_PASS_DEF_STACKARRAYS
42#include "flang/Optimizer/Transforms/Passes.h.inc"
43} // namespace fir
44
45#define DEBUG_TYPE "stack-arrays"
46
47static llvm::cl::opt<std::size_t> maxAllocsPerFunc(
48 "stack-arrays-max-allocs",
49 llvm::cl::desc("The maximum number of heap allocations to consider in one "
50 "function before skipping (to save compilation time). Set "
51 "to 0 for no limit."),
52 llvm::cl::init(1000), llvm::cl::Hidden);
53
54static llvm::cl::opt<bool> emitLifetimeMarkers(
55 "stack-arrays-lifetime",
56 llvm::cl::desc("Add lifetime markers to generated constant size allocas"),
57 llvm::cl::init(false), llvm::cl::Hidden);
58
59namespace {
60
61/// The state of an SSA value at each program point
62enum class AllocationState {
63 /// This means that the allocation state of a variable cannot be determined
64 /// at this program point, e.g. because one route through a conditional freed
65 /// the variable and the other route didn't.
66 /// This asserts a known-unknown: different from the unknown-unknown of having
67 /// no AllocationState stored for a particular SSA value
68 Unknown,
69 /// Means this SSA value was allocated on the heap in this function and has
70 /// now been freed
71 Freed,
72 /// Means this SSA value was allocated on the heap in this function and is a
73 /// candidate for moving to the stack
74 Allocated,
75};
76
77/// Stores where an alloca should be inserted. If the PointerUnion is an
78/// Operation the alloca should be inserted /after/ the operation. If it is a
79/// block, the alloca can be placed anywhere in that block.
80class InsertionPoint {
81 llvm::PointerUnion<mlir::Operation *, mlir::Block *> location;
82 bool saveRestoreStack;
83
84 /// Get contained pointer type or nullptr
85 template <class T>
86 T *tryGetPtr() const {
87 // Use llvm::dyn_cast_if_present because location may be null here.
88 if (T *ptr = llvm::dyn_cast_if_present<T *>(location))
89 return ptr;
90 return nullptr;
91 }
92
93public:
94 template <class T>
95 InsertionPoint(T *ptr, bool saveRestoreStack = false)
96 : location(ptr), saveRestoreStack{saveRestoreStack} {}
97 InsertionPoint(std::nullptr_t null)
98 : location(null), saveRestoreStack{false} {}
99
100 /// Get contained operation, or nullptr
101 mlir::Operation *tryGetOperation() const {
102 return tryGetPtr<mlir::Operation>();
103 }
104
105 /// Get contained block, or nullptr
106 mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); }
107
108 /// Get whether the stack should be saved/restored. If yes, an llvm.stacksave
109 /// intrinsic should be added before the alloca, and an llvm.stackrestore
110 /// intrinsic should be added where the freemem is
111 bool shouldSaveRestoreStack() const { return saveRestoreStack; }
112
113 operator bool() const { return tryGetOperation() || tryGetBlock(); }
114
115 bool operator==(const InsertionPoint &rhs) const {
116 return (location == rhs.location) &&
117 (saveRestoreStack == rhs.saveRestoreStack);
118 }
119
120 bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); }
121};
122
123/// Maps SSA values to their AllocationState at a particular program point.
124/// Also caches the insertion points for the new alloca operations
125class LatticePoint : public mlir::dataflow::AbstractDenseLattice {
126 // Maps all values we are interested in to states
127 llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap;
128
129public:
130 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint)
131 using AbstractDenseLattice::AbstractDenseLattice;
132
133 bool operator==(const LatticePoint &rhs) const {
134 return stateMap == rhs.stateMap;
135 }
136
137 /// Join the lattice accross control-flow edges
138 mlir::ChangeResult join(const AbstractDenseLattice &lattice) override;
139
140 void print(llvm::raw_ostream &os) const override;
141
142 /// Clear all modifications
143 mlir::ChangeResult reset();
144
145 /// Set the state of an SSA value
146 mlir::ChangeResult set(mlir::Value value, AllocationState state);
147
148 /// Get fir.allocmem ops which were allocated in this function and always
149 /// freed before the function returns, plus whre to insert replacement
150 /// fir.alloca ops
151 void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const;
152
153 std::optional<AllocationState> get(mlir::Value val) const;
154};
155
156class AllocationAnalysis
157 : public mlir::dataflow::DenseForwardDataFlowAnalysis<LatticePoint> {
158public:
159 using DenseForwardDataFlowAnalysis::DenseForwardDataFlowAnalysis;
160
161 mlir::LogicalResult visitOperation(mlir::Operation *op,
162 const LatticePoint &before,
163 LatticePoint *after) override;
164
165 /// At an entry point, the last modifications of all memory resources are
166 /// yet to be determined
167 void setToEntryState(LatticePoint *lattice) override;
168
169protected:
170 /// Visit control flow operations and decide whether to call visitOperation
171 /// to apply the transfer function
172 mlir::LogicalResult processOperation(mlir::Operation *op) override;
173};
174
175/// Drives analysis to find candidate fir.allocmem operations which could be
176/// moved to the stack. Intended to be used with mlir::Pass::getAnalysis
177class StackArraysAnalysisWrapper {
178public:
179 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)
180
181 // Maps fir.allocmem -> place to insert alloca
182 using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;
183
184 StackArraysAnalysisWrapper(mlir::Operation *op) {}
185
186 // returns nullptr if analysis failed
187 const AllocMemMap *getCandidateOps(mlir::Operation *func);
188
189private:
190 llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;
191
192 llvm::LogicalResult analyseFunction(mlir::Operation *func);
193};
194
195/// Converts a fir.allocmem to a fir.alloca
196class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
197public:
198 explicit AllocMemConversion(
199 mlir::MLIRContext *ctx,
200 const StackArraysAnalysisWrapper::AllocMemMap &candidateOps,
201 std::optional<mlir::DataLayout> &dl,
202 std::optional<fir::KindMapping> &kindMap)
203 : OpRewritePattern(ctx), candidateOps{candidateOps}, dl{dl},
204 kindMap{kindMap} {}
205
206 llvm::LogicalResult
207 matchAndRewrite(fir::AllocMemOp allocmem,
208 mlir::PatternRewriter &rewriter) const override;
209
210 /// Determine where to insert the alloca operation. The returned value should
211 /// be checked to see if it is inside a loop
212 static InsertionPoint
213 findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc,
214 const llvm::SmallVector<mlir::Operation *> &freeOps);
215
216private:
217 /// Handle to the DFA (already run)
218 const StackArraysAnalysisWrapper::AllocMemMap &candidateOps;
219
220 const std::optional<mlir::DataLayout> &dl;
221 const std::optional<fir::KindMapping> &kindMap;
222
223 /// If we failed to find an insertion point not inside a loop, see if it would
224 /// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
225 static InsertionPoint findAllocaLoopInsertionPoint(
226 fir::AllocMemOp &oldAlloc,
227 const llvm::SmallVector<mlir::Operation *> &freeOps);
228
229 /// Returns the alloca if it was successfully inserted, otherwise {}
230 std::optional<fir::AllocaOp>
231 insertAlloca(fir::AllocMemOp &oldAlloc,
232 mlir::PatternRewriter &rewriter) const;
233
234 /// Inserts a stacksave before oldAlloc and a stackrestore after each freemem
235 void insertStackSaveRestore(fir::AllocMemOp oldAlloc,
236 mlir::PatternRewriter &rewriter) const;
237 /// Emit lifetime markers for newAlloc between oldAlloc and each freemem.
238 /// If the allocation is dynamic, no life markers are emitted.
239 void insertLifetimeMarkers(fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc,
240 mlir::PatternRewriter &rewriter) const;
241};
242
243class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> {
244public:
245 StackArraysPass() = default;
246 StackArraysPass(const StackArraysPass &pass);
247
248 llvm::StringRef getDescription() const override;
249
250 void runOnOperation() override;
251
252private:
253 Statistic runCount{this, "stackArraysRunCount",
254 "Number of heap allocations moved to the stack"};
255};
256
257} // namespace
258
259static void print(llvm::raw_ostream &os, AllocationState state) {
260 switch (state) {
261 case AllocationState::Unknown:
262 os << "Unknown";
263 break;
264 case AllocationState::Freed:
265 os << "Freed";
266 break;
267 case AllocationState::Allocated:
268 os << "Allocated";
269 break;
270 }
271}
272
273/// Join two AllocationStates for the same value coming from different CFG
274/// blocks
275static AllocationState join(AllocationState lhs, AllocationState rhs) {
276 // | Allocated | Freed | Unknown
277 // ========= | ========= | ========= | =========
278 // Allocated | Allocated | Unknown | Unknown
279 // Freed | Unknown | Freed | Unknown
280 // Unknown | Unknown | Unknown | Unknown
281 if (lhs == rhs)
282 return lhs;
283 return AllocationState::Unknown;
284}
285
286mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
287 const auto &rhs = static_cast<const LatticePoint &>(lattice);
288 mlir::ChangeResult changed = mlir::ChangeResult::NoChange;
289
290 // add everything from rhs to map, handling cases where values are in both
291 for (const auto &[value, rhsState] : rhs.stateMap) {
292 auto it = stateMap.find(value);
293 if (it != stateMap.end()) {
294 // value is present in both maps
295 AllocationState myState = it->second;
296 AllocationState newState = ::join(myState, rhsState);
297 if (newState != myState) {
298 changed = mlir::ChangeResult::Change;
299 it->getSecond() = newState;
300 }
301 } else {
302 // value not present in current map: add it
303 stateMap.insert({value, rhsState});
304 changed = mlir::ChangeResult::Change;
305 }
306 }
307
308 return changed;
309}
310
311void LatticePoint::print(llvm::raw_ostream &os) const {
312 for (const auto &[value, state] : stateMap) {
313 os << "\n * " << value << ": ";
314 ::print(os, state);
315 }
316}
317
318mlir::ChangeResult LatticePoint::reset() {
319 if (stateMap.empty())
320 return mlir::ChangeResult::NoChange;
321 stateMap.clear();
322 return mlir::ChangeResult::Change;
323}
324
325mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) {
326 if (stateMap.count(value)) {
327 // already in map
328 AllocationState &oldState = stateMap[value];
329 if (oldState != state) {
330 stateMap[value] = state;
331 return mlir::ChangeResult::Change;
332 }
333 return mlir::ChangeResult::NoChange;
334 }
335 stateMap.insert({value, state});
336 return mlir::ChangeResult::Change;
337}
338
339/// Get values which were allocated in this function and always freed before
340/// the function returns
341void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const {
342 for (auto &[value, state] : stateMap) {
343 if (state == AllocationState::Freed)
344 out.insert(value);
345 }
346}
347
348std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
349 auto it = stateMap.find(val);
350 if (it == stateMap.end())
351 return {};
352 return it->second;
353}
354
355static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) {
356 while (mlir::Operation *op = value.getDefiningOp()) {
357 if (auto declareOp = llvm::dyn_cast<fir::DeclareOp>(op))
358 value = declareOp.getMemref();
359 else if (auto convertOp = llvm::dyn_cast<fir::ConvertOp>(op))
360 value = convertOp->getOperand(0);
361 else
362 return value;
363 }
364 return value;
365}
366
367mlir::LogicalResult AllocationAnalysis::visitOperation(
368 mlir::Operation *op, const LatticePoint &before, LatticePoint *after) {
369 LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
370 << "\n");
371 LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
372
373 // propagate before -> after
374 mlir::ChangeResult changed = after->join(before);
375
376 if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) {
377 assert(op->getNumResults() == 1 && "fir.allocmem has one result");
378 auto attr = op->getAttrOfType<fir::MustBeHeapAttr>(
379 fir::MustBeHeapAttr::getAttrName());
380 if (attr && attr.getValue()) {
381 LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n");
382 // skip allocation marked not to be moved
383 return mlir::success();
384 }
385
386 auto retTy = allocmem.getAllocatedType();
387 if (!mlir::isa<fir::SequenceType>(retTy)) {
388 LLVM_DEBUG(llvm::dbgs()
389 << "--Allocation is not for an array: skipping\n");
390 return mlir::success();
391 }
392
393 mlir::Value result = op->getResult(0);
394 changed |= after->set(result, AllocationState::Allocated);
395 } else if (mlir::isa<fir::FreeMemOp>(op)) {
396 assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
397 mlir::Value operand = op->getOperand(0);
398
399 // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir
400 // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look
401 // past converts in case the pointer was changed between different pointer
402 // types.
403 operand = lookThroughDeclaresAndConverts(operand);
404
405 std::optional<AllocationState> operandState = before.get(operand);
406 if (operandState && *operandState == AllocationState::Allocated) {
407 // don't tag things not allocated in this function as freed, so that we
408 // don't think they are candidates for moving to the stack
409 changed |= after->set(operand, AllocationState::Freed);
410 }
411 } else if (mlir::isa<fir::ResultOp>(op)) {
412 mlir::Operation *parent = op->getParentOp();
413 LatticePoint *parentLattice = getLattice(getProgramPointAfter(parent));
414 assert(parentLattice);
415 mlir::ChangeResult parentChanged = parentLattice->join(*after);
416 propagateIfChanged(parentLattice, parentChanged);
417 }
418
419 // we pass lattices straight through fir.call because called functions should
420 // not deallocate flang-generated array temporaries
421
422 LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n");
423 propagateIfChanged(after, changed);
424 return mlir::success();
425}
426
427void AllocationAnalysis::setToEntryState(LatticePoint *lattice) {
428 propagateIfChanged(lattice, lattice->reset());
429}
430
431/// Mostly a copy of AbstractDenseLattice::processOperation - the difference
432/// being that call operations are passed through to the transfer function
433mlir::LogicalResult AllocationAnalysis::processOperation(mlir::Operation *op) {
434 mlir::ProgramPoint *point = getProgramPointAfter(op);
435 // If the containing block is not executable, bail out.
436 if (op->getBlock() != nullptr &&
437 !getOrCreateFor<mlir::dataflow::Executable>(
438 point, getProgramPointBefore(op->getBlock()))
439 ->isLive())
440 return mlir::success();
441
442 // Get the dense lattice to update
443 mlir::dataflow::AbstractDenseLattice *after = getLattice(point);
444
445 // If this op implements region control-flow, then control-flow dictates its
446 // transfer function.
447 if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op)) {
448 visitRegionBranchOperation(point, branch, after);
449 return mlir::success();
450 }
451
452 // pass call operations through to the transfer function
453
454 // Get the dense state before the execution of the op.
455 const mlir::dataflow::AbstractDenseLattice *before =
456 getLatticeFor(point, getProgramPointBefore(op));
457
458 /// Invoke the operation transfer function
459 return visitOperationImpl(op, *before, after);
460}
461
462llvm::LogicalResult
463StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
464 assert(mlir::isa<mlir::func::FuncOp>(func));
465 size_t nAllocs = 0;
466 func->walk([&nAllocs](fir::AllocMemOp) { nAllocs++; });
467 // don't bother with the analysis if there are no heap allocations
468 if (nAllocs == 0)
469 return mlir::success();
470 if ((maxAllocsPerFunc != 0) && (nAllocs > maxAllocsPerFunc)) {
471 LLVM_DEBUG(llvm::dbgs() << "Skipping stack arrays for function with "
472 << nAllocs << " heap allocations");
473 return mlir::success();
474 }
475
476 mlir::DataFlowSolver solver;
477 // constant propagation is required for dead code analysis, dead code analysis
478 // is required to mark blocks live (required for mlir dense dfa)
479 solver.load<mlir::dataflow::SparseConstantPropagation>();
480 solver.load<mlir::dataflow::DeadCodeAnalysis>();
481
482 auto [it, inserted] = funcMaps.try_emplace(func);
483 AllocMemMap &candidateOps = it->second;
484
485 solver.load<AllocationAnalysis>();
486 if (failed(solver.initializeAndRun(func))) {
487 llvm::errs() << "DataFlowSolver failed!";
488 return mlir::failure();
489 }
490
491 LatticePoint point{solver.getProgramPointAfter(func)};
492 auto joinOperationLattice = [&](mlir::Operation *op) {
493 const LatticePoint *lattice =
494 solver.lookupState<LatticePoint>(solver.getProgramPointAfter(op));
495 // there will be no lattice for an unreachable block
496 if (lattice)
497 (void)point.join(*lattice);
498 };
499
500 func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); });
501 func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); });
502 func->walk(
503 [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); });
504 func->walk([&](mlir::omp::YieldOp child) { joinOperationLattice(child); });
505
506 llvm::DenseSet<mlir::Value> freedValues;
507 point.appendFreedValues(freedValues);
508
509 // Find all fir.freemem operations corresponding to fir.allocmem
510 // in freedValues. It is best to find the association going back
511 // from fir.freemem to fir.allocmem through the def-use chains,
512 // so that we can use lookThroughDeclaresAndConverts same way
513 // the AllocationAnalysis is handling them.
514 llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
515 allocToFreeMemMap;
516 func->walk([&](fir::FreeMemOp freeOp) {
517 mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref());
518 if (!freedValues.count(memref))
519 return;
520
521 auto allocMem = memref.getDefiningOp<fir::AllocMemOp>();
522 allocToFreeMemMap[allocMem].push_back(freeOp);
523 });
524
525 // We only replace allocations which are definately freed on all routes
526 // through the function because otherwise the allocation may have an intende
527 // lifetime longer than the current stack frame (e.g. a heap allocation which
528 // is then freed by another function).
529 for (mlir::Value freedValue : freedValues) {
530 fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>();
531 InsertionPoint insertionPoint =
532 AllocMemConversion::findAllocaInsertionPoint(
533 allocmem, allocToFreeMemMap[allocmem]);
534 if (insertionPoint)
535 candidateOps.insert({allocmem, insertionPoint});
536 }
537
538 LLVM_DEBUG(for (auto [allocMemOp, _]
539 : candidateOps) {
540 llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n';
541 });
542 return mlir::success();
543}
544
545const StackArraysAnalysisWrapper::AllocMemMap *
546StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {
547 if (!funcMaps.contains(func))
548 if (mlir::failed(analyseFunction(func)))
549 return nullptr;
550 return &funcMaps[func];
551}
552
553/// Restore the old allocation type exected by existing code
554static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter,
555 const mlir::Location &loc,
556 mlir::Value heap, mlir::Value stack) {
557 mlir::Type heapTy = heap.getType();
558 mlir::Type stackTy = stack.getType();
559
560 if (heapTy == stackTy)
561 return stack;
562
563 fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy);
564 LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy =
565 mlir::cast<fir::ReferenceType>(stackTy);
566 assert(firHeapTy.getElementType() == firRefTy.getElementType() &&
567 "Allocations must have the same type");
568
569 auto insertionPoint = rewriter.saveInsertionPoint();
570 rewriter.setInsertionPointAfter(stack.getDefiningOp());
571 mlir::Value conv =
572 rewriter.create<fir::ConvertOp>(loc, firHeapTy, stack).getResult();
573 rewriter.restoreInsertionPoint(insertionPoint);
574 return conv;
575}
576
577llvm::LogicalResult
578AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
579 mlir::PatternRewriter &rewriter) const {
580 auto oldInsertionPt = rewriter.saveInsertionPoint();
581 // add alloca operation
582 std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
583 rewriter.restoreInsertionPoint(oldInsertionPt);
584 if (!alloca)
585 return mlir::failure();
586
587 // remove freemem operations
588 llvm::SmallVector<mlir::Operation *> erases;
589 mlir::Operation *parent = allocmem->getParentOp();
590 // TODO: this shouldn't need to be re-calculated for every allocmem
591 parent->walk([&](fir::FreeMemOp freeOp) {
592 if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem)
593 erases.push_back(freeOp);
594 });
595
596 // now we are done iterating the users, it is safe to mutate them
597 for (mlir::Operation *erase : erases)
598 rewriter.eraseOp(erase);
599
600 // replace references to heap allocation with references to stack allocation
601 mlir::Value newValue = convertAllocationType(
602 rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult());
603 rewriter.replaceAllUsesWith(allocmem.getResult(), newValue);
604
605 // remove allocmem operation
606 rewriter.eraseOp(allocmem.getOperation());
607
608 return mlir::success();
609}
610
611static bool isInLoop(mlir::Block *block) {
612 return mlir::LoopLikeOpInterface::blockIsInLoop(block);
613}
614
615static bool isInLoop(mlir::Operation *op) {
616 return isInLoop(op->getBlock()) ||
617 op->getParentOfType<mlir::LoopLikeOpInterface>();
618}
619
620InsertionPoint AllocMemConversion::findAllocaInsertionPoint(
621 fir::AllocMemOp &oldAlloc,
622 const llvm::SmallVector<mlir::Operation *> &freeOps) {
623 // Ideally the alloca should be inserted at the end of the function entry
624 // block so that we do not allocate stack space in a loop. However,
625 // the operands to the alloca may not be available that early, so insert it
626 // after the last operand becomes available
627 // If the old allocmem op was in an openmp region then it should not be moved
628 // outside of that
629 LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: "
630 << oldAlloc << "\n");
631
632 // check that an Operation or Block we are about to return is not in a loop
633 auto checkReturn = [&](auto *point) -> InsertionPoint {
634 if (isInLoop(point)) {
635 mlir::Operation *oldAllocOp = oldAlloc.getOperation();
636 if (isInLoop(oldAllocOp)) {
637 // where we want to put it is in a loop, and even the old location is in
638 // a loop. Give up.
639 return findAllocaLoopInsertionPoint(oldAlloc, freeOps);
640 }
641 return {oldAllocOp};
642 }
643 return {point};
644 };
645
646 auto oldOmpRegion =
647 oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
648
649 // Find when the last operand value becomes available
650 mlir::Block *operandsBlock = nullptr;
651 mlir::Operation *lastOperand = nullptr;
652 for (mlir::Value operand : oldAlloc.getOperands()) {
653 LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n");
654 mlir::Operation *op = operand.getDefiningOp();
655 if (!op)
656 return checkReturn(oldAlloc.getOperation());
657 if (!operandsBlock)
658 operandsBlock = op->getBlock();
659 else if (operandsBlock != op->getBlock()) {
660 LLVM_DEBUG(llvm::dbgs()
661 << "----operand declared in a different block!\n");
662 // Operation::isBeforeInBlock requires the operations to be in the same
663 // block. The best we can do is the location of the allocmem.
664 return checkReturn(oldAlloc.getOperation());
665 }
666 if (!lastOperand || lastOperand->isBeforeInBlock(op))
667 lastOperand = op;
668 }
669
670 if (lastOperand) {
671 // there were value operands to the allocmem so insert after the last one
672 LLVM_DEBUG(llvm::dbgs()
673 << "--Placing after last operand: " << *lastOperand << "\n");
674 // check we aren't moving out of an omp region
675 auto lastOpOmpRegion =
676 lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
677 if (lastOpOmpRegion == oldOmpRegion)
678 return checkReturn(lastOperand);
679 // Presumably this happened because the operands became ready before the
680 // start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should
681 // imply that oldOmpRegion comes after lastOpOmpRegion.
682 return checkReturn(oldOmpRegion.getAllocaBlock());
683 }
684
685 // There were no value operands to the allocmem so we are safe to insert it
686 // as early as we want
687
688 // handle openmp case
689 if (oldOmpRegion)
690 return checkReturn(oldOmpRegion.getAllocaBlock());
691
692 // fall back to the function entry block
693 mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>();
694 assert(func && "This analysis is run on func.func");
695 mlir::Block &entryBlock = func.getBlocks().front();
696 LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n");
697 return checkReturn(&entryBlock);
698}
699
700InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint(
701 fir::AllocMemOp &oldAlloc,
702 const llvm::SmallVector<mlir::Operation *> &freeOps) {
703 mlir::Operation *oldAllocOp = oldAlloc;
704 // This is only called as a last resort. We should try to insert at the
705 // location of the old allocation, which is inside of a loop, using
706 // llvm.stacksave/llvm.stackrestore
707
708 assert(freeOps.size() && "DFA should only return freed memory");
709
710 // Don't attempt to reason about a stacksave/stackrestore between different
711 // blocks
712 for (mlir::Operation *free : freeOps)
713 if (free->getBlock() != oldAllocOp->getBlock())
714 return {nullptr};
715
716 // Check that there aren't any other stack allocations in between the
717 // stack save and stack restore
718 // note: for flang generated temporaries there should only be one free op
719 for (mlir::Operation *free : freeOps) {
720 for (mlir::Operation *op = oldAlloc; op && op != free;
721 op = op->getNextNode()) {
722 if (mlir::isa<fir::AllocaOp>(op))
723 return {nullptr};
724 }
725 }
726
727 return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true};
728}
729
730std::optional<fir::AllocaOp>
731AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc,
732 mlir::PatternRewriter &rewriter) const {
733 auto it = candidateOps.find(oldAlloc.getOperation());
734 if (it == candidateOps.end())
735 return {};
736 InsertionPoint insertionPoint = it->second;
737 if (!insertionPoint)
738 return {};
739
740 if (insertionPoint.shouldSaveRestoreStack())
741 insertStackSaveRestore(oldAlloc, rewriter);
742
743 mlir::Location loc = oldAlloc.getLoc();
744 mlir::Type varTy = oldAlloc.getInType();
745 if (mlir::Operation *op = insertionPoint.tryGetOperation()) {
746 rewriter.setInsertionPointAfter(op);
747 } else {
748 mlir::Block *block = insertionPoint.tryGetBlock();
749 assert(block && "There must be a valid insertion point");
750 rewriter.setInsertionPointToStart(block);
751 }
752
753 auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
754 if (opt)
755 return *opt;
756 return {};
757 };
758
759 llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName());
760 llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName());
761 auto alloca = rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName,
762 oldAlloc.getTypeparams(),
763 oldAlloc.getShape());
764 if (emitLifetimeMarkers)
765 insertLifetimeMarkers(oldAlloc, alloca, rewriter);
766
767 return alloca;
768}
769
770static void
771visitFreeMemOp(fir::AllocMemOp oldAlloc,
772 const std::function<void(mlir::Operation *)> &callBack) {
773 for (mlir::Operation *user : oldAlloc->getUsers()) {
774 if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
775 for (mlir::Operation *user : declareOp->getUsers()) {
776 if (mlir::isa<fir::FreeMemOp>(user))
777 callBack(user);
778 }
779 }
780
781 if (mlir::isa<fir::FreeMemOp>(user))
782 callBack(user);
783 }
784}
785
786void AllocMemConversion::insertStackSaveRestore(
787 fir::AllocMemOp oldAlloc, mlir::PatternRewriter &rewriter) const {
788 mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
789 auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>();
790 fir::FirOpBuilder builder{rewriter, mod};
791
792 builder.setInsertionPoint(oldAlloc);
793 mlir::Value sp = builder.genStackSave(oldAlloc.getLoc());
794
795 auto createStackRestoreCall = [&](mlir::Operation *user) {
796 builder.setInsertionPoint(user);
797 builder.genStackRestore(user->getLoc(), sp);
798 };
799 visitFreeMemOp(oldAlloc, createStackRestoreCall);
800}
801
802void AllocMemConversion::insertLifetimeMarkers(
803 fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc,
804 mlir::PatternRewriter &rewriter) const {
805 if (!dl || !kindMap)
806 return;
807 llvm::StringRef attrName = fir::getHasLifetimeMarkerAttrName();
808 // Do not add lifetime markers if the alloca already has any.
809 if (newAlloc->hasAttr(attrName))
810 return;
811 if (std::optional<int64_t> size =
812 fir::getAllocaByteSize(newAlloc, *dl, *kindMap)) {
813 mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
814 rewriter.setInsertionPoint(oldAlloc);
815 mlir::Value ptr = fir::factory::genLifetimeStart(
816 rewriter, newAlloc.getLoc(), newAlloc, *size, &*dl);
817 visitFreeMemOp(oldAlloc, [&](mlir::Operation *op) {
818 rewriter.setInsertionPoint(op);
819 fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr, *size);
820 });
821 newAlloc->setAttr(attrName, rewriter.getUnitAttr());
822 }
823}
824
825StackArraysPass::StackArraysPass(const StackArraysPass &pass)
826 : fir::impl::StackArraysBase<StackArraysPass>(pass) {}
827
828llvm::StringRef StackArraysPass::getDescription() const {
829 return "Move heap allocated array temporaries to the stack";
830}
831
832void StackArraysPass::runOnOperation() {
833 mlir::func::FuncOp func = getOperation();
834
835 auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();
836 const StackArraysAnalysisWrapper::AllocMemMap *candidateOps =
837 analysis.getCandidateOps(func);
838 if (!candidateOps) {
839 signalPassFailure();
840 return;
841 }
842
843 if (candidateOps->empty())
844 return;
845 runCount += candidateOps->size();
846
847 llvm::SmallVector<mlir::Operation *> opsToConvert;
848 opsToConvert.reserve(candidateOps->size());
849 for (auto [op, _] : *candidateOps)
850 opsToConvert.push_back(op);
851
852 mlir::MLIRContext &context = getContext();
853 mlir::RewritePatternSet patterns(&context);
854 mlir::GreedyRewriteConfig config;
855 // prevent the pattern driver form merging blocks
856 config.setRegionSimplificationLevel(
857 mlir::GreedySimplifyRegionLevel::Disabled);
858
859 auto module = func->getParentOfType<mlir::ModuleOp>();
860 std::optional<mlir::DataLayout> dl =
861 module ? fir::support::getOrSetMLIRDataLayout(
862 module, /*allowDefaultLayout=*/false)
863 : std::nullopt;
864 std::optional<fir::KindMapping> kindMap;
865 if (module)
866 kindMap = fir::getKindMapping(module);
867
868 patterns.insert<AllocMemConversion>(&context, *candidateOps, dl, kindMap);
869 if (mlir::failed(mlir::applyOpPatternsGreedily(
870 opsToConvert, std::move(patterns), config))) {
871 mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");
872 signalPassFailure();
873 }
874}
875

source code of flang/lib/Optimizer/Transforms/StackArrays.cpp