1 | //===- MemoryAllocation.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
10 | #include "flang/Optimizer/Dialect/FIROps.h" |
11 | #include "flang/Optimizer/Dialect/FIRType.h" |
12 | #include "flang/Optimizer/Transforms/Passes.h" |
13 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
14 | #include "mlir/IR/Diagnostics.h" |
15 | #include "mlir/Pass/Pass.h" |
16 | #include "mlir/Transforms/DialectConversion.h" |
17 | #include "mlir/Transforms/Passes.h" |
18 | #include "llvm/ADT/TypeSwitch.h" |
19 | |
20 | namespace fir { |
21 | #define GEN_PASS_DEF_MEMORYALLOCATIONOPT |
22 | #include "flang/Optimizer/Transforms/Passes.h.inc" |
23 | } // namespace fir |
24 | |
25 | #define DEBUG_TYPE "flang-memory-allocation-opt" |
26 | |
27 | // Number of elements in an array does not determine where it is allocated. |
28 | static constexpr std::size_t unlimitedArraySize = ~static_cast<std::size_t>(0); |
29 | |
30 | namespace { |
31 | struct MemoryAllocationOptions { |
32 | // Always move dynamic array allocations to the heap. This may result in more |
33 | // heap fragmentation, so may impact performance negatively. |
34 | bool dynamicArrayOnHeap = false; |
35 | |
36 | // Number of elements in array threshold for moving to heap. In environments |
37 | // with limited stack size, moving large arrays to the heap can avoid running |
38 | // out of stack space. |
39 | std::size_t maxStackArraySize = unlimitedArraySize; |
40 | }; |
41 | |
42 | class ReturnAnalysis { |
43 | public: |
44 | MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ReturnAnalysis) |
45 | |
46 | ReturnAnalysis(mlir::Operation *op) { |
47 | if (auto func = mlir::dyn_cast<mlir::func::FuncOp>(op)) |
48 | for (mlir::Block &block : func) |
49 | for (mlir::Operation &i : block) |
50 | if (mlir::isa<mlir::func::ReturnOp>(i)) { |
51 | returnMap[op].push_back(&i); |
52 | break; |
53 | } |
54 | } |
55 | |
56 | llvm::SmallVector<mlir::Operation *> getReturns(mlir::Operation *func) const { |
57 | auto iter = returnMap.find(func); |
58 | if (iter != returnMap.end()) |
59 | return iter->second; |
60 | return {}; |
61 | } |
62 | |
63 | private: |
64 | llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>> |
65 | returnMap; |
66 | }; |
67 | } // namespace |
68 | |
69 | /// Return `true` if this allocation is to remain on the stack (`fir.alloca`). |
70 | /// Otherwise the allocation should be moved to the heap (`fir.allocmem`). |
71 | static inline bool keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry, |
72 | const MemoryAllocationOptions &options) { |
73 | // Limitation: only arrays allocated on the stack in the entry block are |
74 | // considered for now. |
75 | // TODO: Generalize the algorithm and placement of the freemem nodes. |
76 | if (alloca->getBlock() != entry) |
77 | return true; |
78 | if (auto seqTy = alloca.getInType().dyn_cast<fir::SequenceType>()) { |
79 | if (fir::hasDynamicSize(seqTy)) { |
80 | // Move all arrays with runtime determined size to the heap. |
81 | if (options.dynamicArrayOnHeap) |
82 | return false; |
83 | } else { |
84 | std::int64_t numberOfElements = 1; |
85 | for (std::int64_t i : seqTy.getShape()) { |
86 | numberOfElements *= i; |
87 | // If the count is suspicious, then don't change anything here. |
88 | if (numberOfElements <= 0) |
89 | return true; |
90 | } |
91 | // If the number of elements exceeds the threshold, move the allocation to |
92 | // the heap. |
93 | if (static_cast<std::size_t>(numberOfElements) > |
94 | options.maxStackArraySize) { |
95 | LLVM_DEBUG(llvm::dbgs() |
96 | << "memory allocation opt: found " << alloca << '\n'); |
97 | return false; |
98 | } |
99 | } |
100 | } |
101 | return true; |
102 | } |
103 | |
104 | namespace { |
105 | class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> { |
106 | public: |
107 | using OpRewritePattern::OpRewritePattern; |
108 | |
109 | AllocaOpConversion(mlir::MLIRContext *ctx, |
110 | llvm::ArrayRef<mlir::Operation *> rets) |
111 | : OpRewritePattern(ctx), returnOps(rets) {} |
112 | |
113 | mlir::LogicalResult |
114 | matchAndRewrite(fir::AllocaOp alloca, |
115 | mlir::PatternRewriter &rewriter) const override { |
116 | auto loc = alloca.getLoc(); |
117 | mlir::Type varTy = alloca.getInType(); |
118 | auto unpackName = |
119 | [](std::optional<llvm::StringRef> opt) -> llvm::StringRef { |
120 | if (opt) |
121 | return *opt; |
122 | return {}; |
123 | }; |
124 | auto uniqName = unpackName(alloca.getUniqName()); |
125 | auto bindcName = unpackName(alloca.getBindcName()); |
126 | auto heap = rewriter.create<fir::AllocMemOp>( |
127 | loc, varTy, uniqName, bindcName, alloca.getTypeparams(), |
128 | alloca.getShape()); |
129 | auto insPt = rewriter.saveInsertionPoint(); |
130 | for (mlir::Operation *retOp : returnOps) { |
131 | rewriter.setInsertionPoint(retOp); |
132 | [[maybe_unused]] auto free = rewriter.create<fir::FreeMemOp>(loc, heap); |
133 | LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free |
134 | << " for " << heap << '\n'); |
135 | } |
136 | rewriter.restoreInsertionPoint(insPt); |
137 | rewriter.replaceOpWithNewOp<fir::ConvertOp>( |
138 | alloca, fir::ReferenceType::get(varTy), heap); |
139 | LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca |
140 | << " with " << heap << '\n'); |
141 | return mlir::success(); |
142 | } |
143 | |
144 | private: |
145 | llvm::ArrayRef<mlir::Operation *> returnOps; |
146 | }; |
147 | |
148 | /// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based |
149 | /// on heuristics and settings. The intention is to allow better performance and |
150 | /// workarounds for conditions such as environments with limited stack space. |
151 | /// |
152 | /// Currently, implements two conversions from stack to heap allocation. |
153 | /// 1. If a stack allocation is an array larger than some threshold value |
154 | /// make it a heap allocation. |
155 | /// 2. If a stack allocation is an array with a runtime evaluated size make |
156 | /// it a heap allocation. |
157 | class MemoryAllocationOpt |
158 | : public fir::impl::MemoryAllocationOptBase<MemoryAllocationOpt> { |
159 | public: |
160 | MemoryAllocationOpt() { |
161 | // Set options with default values. (See Passes.td.) Note that the |
162 | // command-line options, e.g. dynamicArrayOnHeap, are not set yet. |
163 | options = {dynamicArrayOnHeap, maxStackArraySize}; |
164 | } |
165 | |
166 | MemoryAllocationOpt(bool dynOnHeap, std::size_t maxStackSize) { |
167 | // Set options with default values. (See Passes.td.) |
168 | options = {.dynamicArrayOnHeap: dynOnHeap, .maxStackArraySize: maxStackSize}; |
169 | } |
170 | |
171 | /// Override `options` if command-line options have been set. |
172 | inline void useCommandLineOptions() { |
173 | if (dynamicArrayOnHeap) |
174 | options.dynamicArrayOnHeap = dynamicArrayOnHeap; |
175 | if (maxStackArraySize != unlimitedArraySize) |
176 | options.maxStackArraySize = maxStackArraySize; |
177 | } |
178 | |
179 | void runOnOperation() override { |
180 | auto *context = &getContext(); |
181 | auto func = getOperation(); |
182 | mlir::RewritePatternSet patterns(context); |
183 | mlir::ConversionTarget target(*context); |
184 | |
185 | useCommandLineOptions(); |
186 | LLVM_DEBUG(llvm::dbgs() |
187 | << "dynamic arrays on heap: " << options.dynamicArrayOnHeap |
188 | << "\nmaximum number of elements of array on stack: " |
189 | << options.maxStackArraySize << '\n'); |
190 | |
191 | // If func is a declaration, skip it. |
192 | if (func.empty()) |
193 | return; |
194 | |
195 | const auto &analysis = getAnalysis<ReturnAnalysis>(); |
196 | |
197 | target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect, |
198 | mlir::func::FuncDialect>(); |
199 | target.addDynamicallyLegalOp<fir::AllocaOp>([&](fir::AllocaOp alloca) { |
200 | return keepStackAllocation(alloca, &func.front(), options); |
201 | }); |
202 | |
203 | llvm::SmallVector<mlir::Operation *> returnOps = analysis.getReturns(func); |
204 | patterns.insert<AllocaOpConversion>(context, returnOps); |
205 | if (mlir::failed( |
206 | mlir::applyPartialConversion(func, target, std::move(patterns)))) { |
207 | mlir::emitError(func.getLoc(), |
208 | "error in memory allocation optimization\n" ); |
209 | signalPassFailure(); |
210 | } |
211 | } |
212 | |
213 | private: |
214 | MemoryAllocationOptions options; |
215 | }; |
216 | } // namespace |
217 | |
218 | std::unique_ptr<mlir::Pass> fir::createMemoryAllocationPass() { |
219 | return std::make_unique<MemoryAllocationOpt>(); |
220 | } |
221 | |
222 | std::unique_ptr<mlir::Pass> |
223 | fir::createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize) { |
224 | return std::make_unique<MemoryAllocationOpt>(args&: dynOnHeap, args&: maxStackSize); |
225 | } |
226 | |