StackArrays.cpp source code [flang/lib/Optimizer/Transforms/StackArrays.cpp]

1	//===- StackArrays.cpp ----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "flang/Optimizer/Builder/FIRBuilder.h"
10	#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
11	#include "flang/Optimizer/Dialect/FIRAttr.h"
12	#include "flang/Optimizer/Dialect/FIRDialect.h"
13	#include "flang/Optimizer/Dialect/FIROps.h"
14	#include "flang/Optimizer/Dialect/FIRType.h"
15	#include "flang/Optimizer/Dialect/Support/FIRContext.h"
16	#include "flang/Optimizer/Support/DataLayout.h"
17	#include "flang/Optimizer/Transforms/Passes.h"
18	#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
19	#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
20	#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
21	#include "mlir/Analysis/DataFlowFramework.h"
22	#include "mlir/Dialect/DLTI/DLTI.h"
23	#include "mlir/Dialect/Func/IR/FuncOps.h"
24	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
25	#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
26	#include "mlir/IR/Builders.h"
27	#include "mlir/IR/Diagnostics.h"
28	#include "mlir/IR/Value.h"
29	#include "mlir/Interfaces/LoopLikeInterface.h"
30	#include "mlir/Pass/Pass.h"
31	#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
32	#include "mlir/Transforms/Passes.h"
33	#include "llvm/ADT/DenseMap.h"
34	#include "llvm/ADT/DenseSet.h"
35	#include "llvm/ADT/PointerUnion.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/raw_ostream.h"
38	#include <optional>
39
40	namespace fir {
41	#define GEN_PASS_DEF_STACKARRAYS
42	#include "flang/Optimizer/Transforms/Passes.h.inc"
43	} // namespace fir
44
45	#define DEBUG_TYPE "stack-arrays"
46
47	static llvm::cl::opt<std::size_t> maxAllocsPerFunc(
48	"stack-arrays-max-allocs",
49	llvm::cl::desc("The maximum number of heap allocations to consider in one "
50	"function before skipping (to save compilation time). Set "
51	"to 0 for no limit."),
52	llvm::cl::init(`1000`), llvm::cl::Hidden);
53
54	static llvm::cl::opt<bool> emitLifetimeMarkers(
55	"stack-arrays-lifetime",
56	llvm::cl::desc("Add lifetime markers to generated constant size allocas"),
57	llvm::cl::init(false), llvm::cl::Hidden);
58
59	namespace {
60
61	/// The state of an SSA value at each program point
62	enum class AllocationState {
63	/// This means that the allocation state of a variable cannot be determined
64	/// at this program point, e.g. because one route through a conditional freed
65	/// the variable and the other route didn't.
66	/// This asserts a known-unknown: different from the unknown-unknown of having
67	/// no AllocationState stored for a particular SSA value
68	Unknown,
69	/// Means this SSA value was allocated on the heap in this function and has
70	/// now been freed
71	Freed,
72	/// Means this SSA value was allocated on the heap in this function and is a
73	/// candidate for moving to the stack
74	Allocated,
75	};
76
77	/// Stores where an alloca should be inserted. If the PointerUnion is an
78	/// Operation the alloca should be inserted /after/ the operation. If it is a
79	/// block, the alloca can be placed anywhere in that block.
80	class InsertionPoint {
81	llvm::PointerUnion<mlir::Operation , mlir::Block > location;
82	bool saveRestoreStack;
83
84	/// Get contained pointer type or nullptr
85	template <class T>
86	T tryGetPtr() const* {
87	// Use llvm::dyn_cast_if_present because location may be null here.
88	if (T ptr = llvm::dyn_cast_if_present<T >(location))
89	return ptr;
90	return nullptr;
91	}
92
93	public:
94	template <class T>
95	InsertionPoint(T ptr, bool* saveRestoreStack = false)
96	: location(ptr), saveRestoreStack{saveRestoreStack} {}
97	InsertionPoint(std::nullptr_t null)
98	: location(null), saveRestoreStack{false} {}
99
100	/// Get contained operation, or nullptr
101	mlir::Operation tryGetOperation() const* {
102	return tryGetPtr<mlir::Operation>();
103	}
104
105	/// Get contained block, or nullptr
106	mlir::Block tryGetBlock() const* { return tryGetPtr<mlir::Block>(); }
107
108	/// Get whether the stack should be saved/restored. If yes, an llvm.stacksave
109	/// intrinsic should be added before the alloca, and an llvm.stackrestore
110	/// intrinsic should be added where the freemem is
111	bool shouldSaveRestoreStack() const { return saveRestoreStack; }
112
113	operator bool() const { return tryGetOperation() \|\| tryGetBlock(); }
114
115	bool operator==(const InsertionPoint &rhs) const {
116	return (location == rhs.location) &&
117	(saveRestoreStack == rhs.saveRestoreStack);
118	}
119
120	bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); }
121	};
122
123	/// Maps SSA values to their AllocationState at a particular program point.
124	/// Also caches the insertion points for the new alloca operations
125	class LatticePoint : public mlir::dataflow::AbstractDenseLattice {
126	// Maps all values we are interested in to states
127	llvm::SmallDenseMap<mlir::Value, AllocationState, `1`> stateMap;
128
129	public:
130	MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint)
131	using AbstractDenseLattice::AbstractDenseLattice;
132
133	bool operator==(const LatticePoint &rhs) const {
134	return stateMap == rhs.stateMap;
135	}
136
137	/// Join the lattice accross control-flow edges
138	mlir::ChangeResult join(const AbstractDenseLattice &lattice) override;
139
140	void print(llvm::raw_ostream &os) const override;
141
142	/// Clear all modifications
143	mlir::ChangeResult reset();
144
145	/// Set the state of an SSA value
146	mlir::ChangeResult set(mlir::Value value, AllocationState state);
147
148	/// Get fir.allocmem ops which were allocated in this function and always
149	/// freed before the function returns, plus whre to insert replacement
150	/// fir.alloca ops
151	void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const;
152
153	std::optional<AllocationState> get(mlir::Value val) const;
154	};
155
156	class AllocationAnalysis
157	: public mlir::dataflow::DenseForwardDataFlowAnalysis<LatticePoint> {
158	public:
159	using DenseForwardDataFlowAnalysis::DenseForwardDataFlowAnalysis;
160
161	mlir::LogicalResult visitOperation(mlir::Operation *op,
162	const LatticePoint &before,
163	LatticePoint *after) override;
164
165	/// At an entry point, the last modifications of all memory resources are
166	/// yet to be determined
167	void setToEntryState(LatticePoint *lattice) override;
168
169	protected:
170	/// Visit control flow operations and decide whether to call visitOperation
171	/// to apply the transfer function
172	mlir::LogicalResult processOperation(mlir::Operation *op) override;
173	};
174
175	/// Drives analysis to find candidate fir.allocmem operations which could be
176	/// moved to the stack. Intended to be used with mlir::Pass::getAnalysis
177	class StackArraysAnalysisWrapper {
178	public:
179	MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)
180
181	// Maps fir.allocmem -> place to insert alloca
182	using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;
183
184	StackArraysAnalysisWrapper(mlir::Operation *op) {}
185
186	// returns nullptr if analysis failed
187	const AllocMemMap getCandidateOps(mlir::Operation func);
188
189	private:
190	llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;
191
192	llvm::LogicalResult analyseFunction(mlir::Operation *func);
193	};
194
195	/// Converts a fir.allocmem to a fir.alloca
196	class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
197	public:
198	explicit AllocMemConversion(
199	mlir::MLIRContext *ctx,
200	const StackArraysAnalysisWrapper::AllocMemMap &candidateOps,
201	std::optional<mlir::DataLayout> &dl,
202	std::optional<fir::KindMapping> &kindMap)
203	: OpRewritePattern(ctx), candidateOps{candidateOps}, dl{dl},
204	kindMap{kindMap} {}
205
206	llvm::LogicalResult
207	matchAndRewrite(fir::AllocMemOp allocmem,
208	mlir::PatternRewriter &rewriter) const override;
209
210	/// Determine where to insert the alloca operation. The returned value should
211	/// be checked to see if it is inside a loop
212	static InsertionPoint
213	findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc,
214	const llvm::SmallVector<mlir::Operation *> &freeOps);
215
216	private:
217	/// Handle to the DFA (already run)
218	const StackArraysAnalysisWrapper::AllocMemMap &candidateOps;
219
220	const std::optional<mlir::DataLayout> &dl;
221	const std::optional<fir::KindMapping> &kindMap;
222
223	/// If we failed to find an insertion point not inside a loop, see if it would
224	/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
225	static InsertionPoint findAllocaLoopInsertionPoint(
226	fir::AllocMemOp &oldAlloc,
227	const llvm::SmallVector<mlir::Operation *> &freeOps);
228
229	/// Returns the alloca if it was successfully inserted, otherwise {}
230	std::optional<fir::AllocaOp>
231	insertAlloca(fir::AllocMemOp &oldAlloc,
232	mlir::PatternRewriter &rewriter) const;
233
234	/// Inserts a stacksave before oldAlloc and a stackrestore after each freemem
235	void insertStackSaveRestore(fir::AllocMemOp oldAlloc,
236	mlir::PatternRewriter &rewriter) const;
237	/// Emit lifetime markers for newAlloc between oldAlloc and each freemem.
238	/// If the allocation is dynamic, no life markers are emitted.
239	void insertLifetimeMarkers(fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc,
240	mlir::PatternRewriter &rewriter) const;
241	};
242
243	class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> {
244	public:
245	StackArraysPass() = default;
246	StackArraysPass(const StackArraysPass &pass);
247
248	llvm::StringRef getDescription() const override;
249
250	void runOnOperation() override;
251
252	private:
253	Statistic runCount{this, "stackArraysRunCount",
254	"Number of heap allocations moved to the stack"};
255	};
256
257	} // namespace
258
259	static void print(llvm::raw_ostream &os, AllocationState state) {
260	switch (state) {
261	case AllocationState::Unknown:
262	os << "Unknown";
263	break;
264	case AllocationState::Freed:
265	os << "Freed";
266	break;
267	case AllocationState::Allocated:
268	os << "Allocated";
269	break;
270	}
271	}
272
273	/// Join two AllocationStates for the same value coming from different CFG
274	/// blocks
275	static AllocationState join(AllocationState lhs, AllocationState rhs) {
276	// \| Allocated \| Freed \| Unknown
277	// ========= \| ========= \| ========= \| =========
278	// Allocated \| Allocated \| Unknown \| Unknown
279	// Freed \| Unknown \| Freed \| Unknown
280	// Unknown \| Unknown \| Unknown \| Unknown
281	if (lhs == rhs)
282	return lhs;
283	return AllocationState::Unknown;
284	}
285
286	mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
287	const auto &rhs = static_cast<const LatticePoint &>(lattice);
288	mlir::ChangeResult changed = mlir::ChangeResult::NoChange;
289
290	// add everything from rhs to map, handling cases where values are in both
291	for (const auto &[value, rhsState] : rhs.stateMap) {
292	auto it = stateMap.find(value);
293	if (it != stateMap.end()) {
294	// value is present in both maps
295	AllocationState myState = it->second;
296	AllocationState newState = ::join(myState, rhsState);
297	if (newState != myState) {
298	changed = mlir::ChangeResult::Change;
299	it->getSecond() = newState;
300	}
301	} else {
302	// value not present in current map: add it
303	stateMap.insert({value, rhsState});
304	changed = mlir::ChangeResult::Change;
305	}
306	}
307
308	return changed;
309	}
310
311	void LatticePoint::print(llvm::raw_ostream &os) const {
312	for (const auto &[value, state] : stateMap) {
313	os << "\n * " << value << ": ";
314	::print(os, state);
315	}
316	}
317
318	mlir::ChangeResult LatticePoint::reset() {
319	if (stateMap.empty())
320	return mlir::ChangeResult::NoChange;
321	stateMap.clear();
322	return mlir::ChangeResult::Change;
323	}
324
325	mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) {
326	if (stateMap.count(value)) {
327	// already in map
328	AllocationState &oldState = stateMap[value];
329	if (oldState != state) {
330	stateMap[value] = state;
331	return mlir::ChangeResult::Change;
332	}
333	return mlir::ChangeResult::NoChange;
334	}
335	stateMap.insert({value, state});
336	return mlir::ChangeResult::Change;
337	}
338
339	/// Get values which were allocated in this function and always freed before
340	/// the function returns
341	void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const {
342	for (auto &[value, state] : stateMap) {
343	if (state == AllocationState::Freed)
344	out.insert(value);
345	}
346	}
347
348	std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
349	auto it = stateMap.find(val);
350	if (it == stateMap.end())
351	return {};
352	return it->second;
353	}
354
355	static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) {
356	while (mlir::Operation *op = value.getDefiningOp()) {
357	if (auto declareOp = llvm::dyn_cast<fir::DeclareOp>(op))
358	value = declareOp.getMemref();
359	else if (auto convertOp = llvm::dyn_cast<fir::ConvertOp>(op))
360	value = convertOp->getOperand(`0`);
361	else
362	return value;
363	}
364	return value;
365	}
366
367	mlir::LogicalResult AllocationAnalysis::visitOperation(
368	mlir::Operation op, const* LatticePoint &before, LatticePoint *after) {
369	LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
370	<< "\n");
371	LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
372
373	// propagate before -> after
374	mlir::ChangeResult changed = after->join(before);
375
376	if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) {
377	assert(op->getNumResults() == `1` && "fir.allocmem has one result");
378	auto attr = op->getAttrOfType<fir::MustBeHeapAttr>(
379	fir::MustBeHeapAttr::getAttrName());
380	if (attr && attr.getValue()) {
381	LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n");
382	// skip allocation marked not to be moved
383	return mlir::success();
384	}
385
386	auto retTy = allocmem.getAllocatedType();
387	if (!mlir::isa<fir::SequenceType>(retTy)) {
388	LLVM_DEBUG(llvm::dbgs()
389	<< "--Allocation is not for an array: skipping\n");
390	return mlir::success();
391	}
392
393	mlir::Value result = op->getResult(`0`);
394	changed \|= after->set(result, AllocationState::Allocated);
395	} else if (mlir::isa<fir::FreeMemOp>(op)) {
396	assert(op->getNumOperands() == `1` && "fir.freemem has one operand");
397	mlir::Value operand = op->getOperand(`0`);
398
399	// Note: StackArrays is scheduled in the pass pipeline after lowering hlfir
400	// to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look
401	// past converts in case the pointer was changed between different pointer
402	// types.
403	operand = lookThroughDeclaresAndConverts(operand);
404
405	std::optional<AllocationState> operandState = before.get(operand);
406	if (operandState && *operandState == AllocationState::Allocated) {
407	// don't tag things not allocated in this function as freed, so that we
408	// don't think they are candidates for moving to the stack
409	changed \|= after->set(operand, AllocationState::Freed);
410	}
411	} else if (mlir::isa<fir::ResultOp>(op)) {
412	mlir::Operation *parent = op->getParentOp();
413	LatticePoint *parentLattice = getLattice(getProgramPointAfter(parent));
414	assert(parentLattice);
415	mlir::ChangeResult parentChanged = parentLattice->join(*after);
416	propagateIfChanged(parentLattice, parentChanged);
417	}
418
419	// we pass lattices straight through fir.call because called functions should
420	// not deallocate flang-generated array temporaries
421
422	LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n");
423	propagateIfChanged(after, changed);
424	return mlir::success();
425	}
426
427	void AllocationAnalysis::setToEntryState(LatticePoint *lattice) {
428	propagateIfChanged(lattice, lattice->reset());
429	}
430
431	/// Mostly a copy of AbstractDenseLattice::processOperation - the difference
432	/// being that call operations are passed through to the transfer function
433	mlir::LogicalResult AllocationAnalysis::processOperation(mlir::Operation *op) {
434	mlir::ProgramPoint *point = getProgramPointAfter(op);
435	// If the containing block is not executable, bail out.
436	if (op->getBlock() != nullptr &&
437	!getOrCreateFor<mlir::dataflow::Executable>(
438	point, getProgramPointBefore(op->getBlock()))
439	->isLive())
440	return mlir::success();
441
442	// Get the dense lattice to update
443	mlir::dataflow::AbstractDenseLattice *after = getLattice(point);
444
445	// If this op implements region control-flow, then control-flow dictates its
446	// transfer function.
447	if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op)) {
448	visitRegionBranchOperation(point, branch, after);
449	return mlir::success();
450	}
451
452	// pass call operations through to the transfer function
453
454	// Get the dense state before the execution of the op.
455	const mlir::dataflow::AbstractDenseLattice *before =
456	getLatticeFor(point, getProgramPointBefore(op));
457
458	/// Invoke the operation transfer function
459	return visitOperationImpl(op, *before, after);
460	}
461
462	llvm::LogicalResult
463	StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
464	assert(mlir::isa<mlir::func::FuncOp>(func));
465	size_t nAllocs = `0`;
466	func->walk([&nAllocs](fir::AllocMemOp) { nAllocs++; });
467	// don't bother with the analysis if there are no heap allocations
468	if (nAllocs == `0`)
469	return mlir::success();
470	if ((maxAllocsPerFunc != `0`) && (nAllocs > maxAllocsPerFunc)) {
471	LLVM_DEBUG(llvm::dbgs() << "Skipping stack arrays for function with "
472	<< nAllocs << " heap allocations");
473	return mlir::success();
474	}
475
476	mlir::DataFlowSolver solver;
477	// constant propagation is required for dead code analysis, dead code analysis
478	// is required to mark blocks live (required for mlir dense dfa)
479	solver.load<mlir::dataflow::SparseConstantPropagation>();
480	solver.load<mlir::dataflow::DeadCodeAnalysis>();
481
482	auto [it, inserted] = funcMaps.try_emplace(func);
483	AllocMemMap &candidateOps = it->second;
484
485	solver.load<AllocationAnalysis>();
486	if (failed(solver.initializeAndRun(func))) {
487	llvm::errs() << "DataFlowSolver failed!";
488	return mlir::failure();
489	}
490
491	LatticePoint point{solver.getProgramPointAfter(func)};
492	auto joinOperationLattice = [&](mlir::Operation *op) {
493	const LatticePoint *lattice =
494	solver.lookupState<LatticePoint>(solver.getProgramPointAfter(op));
495	// there will be no lattice for an unreachable block
496	if (lattice)
497	(void)point.join(*lattice);
498	};
499
500	func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); });
501	func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); });
502	func->walk(
503	[&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); });
504	func->walk([&](mlir::omp::YieldOp child) { joinOperationLattice(child); });
505
506	llvm::DenseSet<mlir::Value> freedValues;
507	point.appendFreedValues(freedValues);
508
509	// Find all fir.freemem operations corresponding to fir.allocmem
510	// in freedValues. It is best to find the association going back
511	// from fir.freemem to fir.allocmem through the def-use chains,
512	// so that we can use lookThroughDeclaresAndConverts same way
513	// the AllocationAnalysis is handling them.
514	llvm::DenseMap<mlir::Operation , llvm::SmallVector<mlir::Operation >>
515	allocToFreeMemMap;
516	func->walk([&](fir::FreeMemOp freeOp) {
517	mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref());
518	if (!freedValues.count(memref))
519	return;
520
521	auto allocMem = memref.getDefiningOp<fir::AllocMemOp>();
522	allocToFreeMemMap[allocMem].push_back(freeOp);
523	});
524
525	// We only replace allocations which are definately freed on all routes
526	// through the function because otherwise the allocation may have an intende
527	// lifetime longer than the current stack frame (e.g. a heap allocation which
528	// is then freed by another function).
529	for (mlir::Value freedValue : freedValues) {
530	fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>();
531	InsertionPoint insertionPoint =
532	AllocMemConversion::findAllocaInsertionPoint(
533	allocmem, allocToFreeMemMap[allocmem]);
534	if (insertionPoint)
535	candidateOps.insert({allocmem, insertionPoint});
536	}
537
538	LLVM_DEBUG(for (auto [allocMemOp, _]
539	: candidateOps) {
540	llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << `'\n'`;
541	});
542	return mlir::success();
543	}
544
545	const StackArraysAnalysisWrapper::AllocMemMap *
546	StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {
547	if (!funcMaps.contains(func))
548	if (mlir::failed(analyseFunction(func)))
549	return nullptr;
550	return &funcMaps[func];
551	}
552
553	/// Restore the old allocation type exected by existing code
554	static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter,
555	const mlir::Location &loc,
556	mlir::Value heap, mlir::Value stack) {
557	mlir::Type heapTy = heap.getType();
558	mlir::Type stackTy = stack.getType();
559
560	if (heapTy == stackTy)
561	return stack;
562
563	fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy);
564	LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy =
565	mlir::cast<fir::ReferenceType>(stackTy);
566	assert(firHeapTy.getElementType() == firRefTy.getElementType() &&
567	"Allocations must have the same type");
568
569	auto insertionPoint = rewriter.saveInsertionPoint();
570	rewriter.setInsertionPointAfter(stack.getDefiningOp());
571	mlir::Value conv =
572	rewriter.create<fir::ConvertOp>(loc, firHeapTy, stack).getResult();
573	rewriter.restoreInsertionPoint(insertionPoint);
574	return conv;
575	}
576
577	llvm::LogicalResult
578	AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
579	mlir::PatternRewriter &rewriter) const {
580	auto oldInsertionPt = rewriter.saveInsertionPoint();
581	// add alloca operation
582	std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
583	rewriter.restoreInsertionPoint(oldInsertionPt);
584	if (!alloca)
585	return mlir::failure();
586
587	// remove freemem operations
588	llvm::SmallVector<mlir::Operation *> erases;
589	mlir::Operation *parent = allocmem->getParentOp();
590	// TODO: this shouldn't need to be re-calculated for every allocmem
591	parent->walk([&](fir::FreeMemOp freeOp) {
592	if (lookThroughDeclaresAndConverts(freeOp->getOperand(`0`)) == allocmem)
593	erases.push_back(freeOp);
594	});
595
596	// now we are done iterating the users, it is safe to mutate them
597	for (mlir::Operation *erase : erases)
598	rewriter.eraseOp(erase);
599
600	// replace references to heap allocation with references to stack allocation
601	mlir::Value newValue = convertAllocationType(
602	rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult());
603	rewriter.replaceAllUsesWith(allocmem.getResult(), newValue);
604
605	// remove allocmem operation
606	rewriter.eraseOp(allocmem.getOperation());
607
608	return mlir::success();
609	}
610
611	static bool isInLoop(mlir::Block *block) {
612	return mlir::LoopLikeOpInterface::blockIsInLoop(block);
613	}
614
615	static bool isInLoop(mlir::Operation *op) {
616	return isInLoop(op->getBlock()) \|\|
617	op->getParentOfType<mlir::LoopLikeOpInterface>();
618	}
619
620	InsertionPoint AllocMemConversion::findAllocaInsertionPoint(
621	fir::AllocMemOp &oldAlloc,
622	const llvm::SmallVector<mlir::Operation *> &freeOps) {
623	// Ideally the alloca should be inserted at the end of the function entry
624	// block so that we do not allocate stack space in a loop. However,
625	// the operands to the alloca may not be available that early, so insert it
626	// after the last operand becomes available
627	// If the old allocmem op was in an openmp region then it should not be moved
628	// outside of that
629	LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: "
630	<< oldAlloc << "\n");
631
632	// check that an Operation or Block we are about to return is not in a loop
633	auto checkReturn = [&](auto *point) -> InsertionPoint {
634	if (isInLoop(point)) {
635	mlir::Operation *oldAllocOp = oldAlloc.getOperation();
636	if (isInLoop(oldAllocOp)) {
637	// where we want to put it is in a loop, and even the old location is in
638	// a loop. Give up.
639	return findAllocaLoopInsertionPoint(oldAlloc, freeOps);
640	}
641	return {oldAllocOp};
642	}
643	return {point};
644	};
645
646	auto oldOmpRegion =
647	oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
648
649	// Find when the last operand value becomes available
650	mlir::Block operandsBlock = nullptr*;
651	mlir::Operation lastOperand = nullptr*;
652	for (mlir::Value operand : oldAlloc.getOperands()) {
653	LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n");
654	mlir::Operation *op = operand.getDefiningOp();
655	if (!op)
656	return checkReturn(oldAlloc.getOperation());
657	if (!operandsBlock)
658	operandsBlock = op->getBlock();
659	else if (operandsBlock != op->getBlock()) {
660	LLVM_DEBUG(llvm::dbgs()
661	<< "----operand declared in a different block!\n");
662	// Operation::isBeforeInBlock requires the operations to be in the same
663	// block. The best we can do is the location of the allocmem.
664	return checkReturn(oldAlloc.getOperation());
665	}
666	if (!lastOperand \|\| lastOperand->isBeforeInBlock(op))
667	lastOperand = op;
668	}
669
670	if (lastOperand) {
671	// there were value operands to the allocmem so insert after the last one
672	LLVM_DEBUG(llvm::dbgs()
673	<< "--Placing after last operand: " << *lastOperand << "\n");
674	// check we aren't moving out of an omp region
675	auto lastOpOmpRegion =
676	lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
677	if (lastOpOmpRegion == oldOmpRegion)
678	return checkReturn(lastOperand);
679	// Presumably this happened because the operands became ready before the
680	// start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should
681	// imply that oldOmpRegion comes after lastOpOmpRegion.
682	return checkReturn(oldOmpRegion.getAllocaBlock());
683	}
684
685	// There were no value operands to the allocmem so we are safe to insert it
686	// as early as we want
687
688	// handle openmp case
689	if (oldOmpRegion)
690	return checkReturn(oldOmpRegion.getAllocaBlock());
691
692	// fall back to the function entry block
693	mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>();
694	assert(func && "This analysis is run on func.func");
695	mlir::Block &entryBlock = func.getBlocks().front();
696	LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n");
697	return checkReturn(&entryBlock);
698	}
699
700	InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint(
701	fir::AllocMemOp &oldAlloc,
702	const llvm::SmallVector<mlir::Operation *> &freeOps) {
703	mlir::Operation *oldAllocOp = oldAlloc;
704	// This is only called as a last resort. We should try to insert at the
705	// location of the old allocation, which is inside of a loop, using
706	// llvm.stacksave/llvm.stackrestore
707
708	assert(freeOps.size() && "DFA should only return freed memory");
709
710	// Don't attempt to reason about a stacksave/stackrestore between different
711	// blocks
712	for (mlir::Operation *free : freeOps)
713	if (free->getBlock() != oldAllocOp->getBlock())
714	return {nullptr};
715
716	// Check that there aren't any other stack allocations in between the
717	// stack save and stack restore
718	// note: for flang generated temporaries there should only be one free op
719	for (mlir::Operation *free : freeOps) {
720	for (mlir::Operation *op = oldAlloc; op && op != free;
721	op = op->getNextNode()) {
722	if (mlir::isa<fir::AllocaOp>(op))
723	return {nullptr};
724	}
725	}
726
727	return InsertionPoint{oldAllocOp, /shouldStackSaveRestore=/true};
728	}
729
730	std::optional<fir::AllocaOp>
731	AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc,
732	mlir::PatternRewriter &rewriter) const {
733	auto it = candidateOps.find(oldAlloc.getOperation());
734	if (it == candidateOps.end())
735	return {};
736	InsertionPoint insertionPoint = it->second;
737	if (!insertionPoint)
738	return {};
739
740	if (insertionPoint.shouldSaveRestoreStack())
741	insertStackSaveRestore(oldAlloc, rewriter);
742
743	mlir::Location loc = oldAlloc.getLoc();
744	mlir::Type varTy = oldAlloc.getInType();
745	if (mlir::Operation *op = insertionPoint.tryGetOperation()) {
746	rewriter.setInsertionPointAfter(op);
747	} else {
748	mlir::Block *block = insertionPoint.tryGetBlock();
749	assert(block && "There must be a valid insertion point");
750	rewriter.setInsertionPointToStart(block);
751	}
752
753	auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
754	if (opt)
755	return *opt;
756	return {};
757	};
758
759	llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName());
760	llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName());
761	auto alloca = rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName,
762	oldAlloc.getTypeparams(),
763	oldAlloc.getShape());
764	if (emitLifetimeMarkers)
765	insertLifetimeMarkers(oldAlloc, alloca, rewriter);
766
767	return alloca;
768	}
769
770	static void
771	visitFreeMemOp(fir::AllocMemOp oldAlloc,
772	const std::function<void(mlir::Operation *)> &callBack) {
773	for (mlir::Operation *user : oldAlloc->getUsers()) {
774	if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
775	for (mlir::Operation *user : declareOp->getUsers()) {
776	if (mlir::isa<fir::FreeMemOp>(user))
777	callBack(user);
778	}
779	}
780
781	if (mlir::isa<fir::FreeMemOp>(user))
782	callBack(user);
783	}
784	}
785
786	void AllocMemConversion::insertStackSaveRestore(
787	fir::AllocMemOp oldAlloc, mlir::PatternRewriter &rewriter) const {
788	mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
789	auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>();
790	fir::FirOpBuilder builder{rewriter, mod};
791
792	builder.setInsertionPoint(oldAlloc);
793	mlir::Value sp = builder.genStackSave(oldAlloc.getLoc());
794
795	auto createStackRestoreCall = [&](mlir::Operation *user) {
796	builder.setInsertionPoint(user);
797	builder.genStackRestore(user->getLoc(), sp);
798	};
799	visitFreeMemOp(oldAlloc, createStackRestoreCall);
800	}
801
802	void AllocMemConversion::insertLifetimeMarkers(
803	fir::AllocMemOp oldAlloc, fir::AllocaOp newAlloc,
804	mlir::PatternRewriter &rewriter) const {
805	if (!dl \|\| !kindMap)
806	return;
807	llvm::StringRef attrName = fir::getHasLifetimeMarkerAttrName();
808	// Do not add lifetime markers if the alloca already has any.
809	if (newAlloc->hasAttr(attrName))
810	return;
811	if (std::optional<int64_t> size =
812	fir::getAllocaByteSize(newAlloc, dl, kindMap)) {
813	mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
814	rewriter.setInsertionPoint(oldAlloc);
815	mlir::Value ptr = fir::factory::genLifetimeStart(
816	rewriter, newAlloc.getLoc(), newAlloc, size, &dl);
817	visitFreeMemOp(oldAlloc, [&](mlir::Operation *op) {
818	rewriter.setInsertionPoint(op);
819	fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr, *size);
820	});
821	newAlloc->setAttr(attrName, rewriter.getUnitAttr());
822	}
823	}
824
825	StackArraysPass::StackArraysPass(const StackArraysPass &pass)
826	: fir::impl::StackArraysBase<StackArraysPass>(pass) {}
827
828	llvm::StringRef StackArraysPass::getDescription() const {
829	return "Move heap allocated array temporaries to the stack";
830	}
831
832	void StackArraysPass::runOnOperation() {
833	mlir::func::FuncOp func = getOperation();
834
835	auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();
836	const StackArraysAnalysisWrapper::AllocMemMap *candidateOps =
837	analysis.getCandidateOps(func);
838	if (!candidateOps) {
839	signalPassFailure();
840	return;
841	}
842
843	if (candidateOps->empty())
844	return;
845	runCount += candidateOps->size();
846
847	llvm::SmallVector<mlir::Operation *> opsToConvert;
848	opsToConvert.reserve(candidateOps->size());
849	for (auto [op, _] : *candidateOps)
850	opsToConvert.push_back(op);
851
852	mlir::MLIRContext &context = getContext();
853	mlir::RewritePatternSet patterns(&context);
854	mlir::GreedyRewriteConfig config;
855	// prevent the pattern driver form merging blocks
856	config.setRegionSimplificationLevel(
857	mlir::GreedySimplifyRegionLevel::Disabled);
858
859	auto module = func->getParentOfType<mlir::ModuleOp>();
860	std::optional<mlir::DataLayout> dl =
861	module ? fir::support::getOrSetMLIRDataLayout(
862	module, /allowDefaultLayout=/false)
863	: std::nullopt;
864	std::optional<fir::KindMapping> kindMap;
865	if (module)
866	kindMap = fir::getKindMapping(module);
867
868	patterns.insert<AllocMemConversion>(&context, *candidateOps, dl, kindMap);
869	if (mlir::failed(mlir::applyOpPatternsGreedily(
870	opsToConvert, std::move(patterns), config))) {
871	mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");
872	signalPassFailure();
873	}
874	}
875

source code of flang/lib/Optimizer/Transforms/StackArrays.cpp