1 | //===- MemoryUtils.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Optimizer/Transforms/MemoryUtils.h" |
10 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
11 | #include "flang/Optimizer/Builder/Todo.h" |
12 | #include "mlir/Dialect/OpenACC/OpenACC.h" |
13 | #include "mlir/IR/Dominance.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | |
16 | namespace { |
17 | /// Helper class to detect if an alloca is inside an mlir::Block that can be |
18 | /// reached again before its deallocation points via block successors. This |
19 | /// analysis is only valid if the deallocation points are inside (or nested |
20 | /// inside) the same region as alloca because it does not consider region CFG |
21 | /// (for instance, the block inside a fir.do_loop is obviously inside a loop, |
22 | /// but is not a loop formed by blocks). The dominance of the alloca on its |
23 | /// deallocation points implies this pre-condition (although it is more |
24 | /// restrictive). |
25 | class BlockCycleDetector { |
26 | public: |
27 | bool allocaIsInCycle(fir::AllocaOp alloca, |
28 | llvm::ArrayRef<mlir::Operation *> deallocationPoints); |
29 | |
30 | private: |
31 | // Cache for blocks owning alloca that have been analyzed. In many Fortran |
32 | // programs, allocas are usually made in the same blocks with no block cycles. |
33 | // So getting a fast "no" is beneficial. |
34 | llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed; |
35 | }; |
36 | } // namespace |
37 | |
38 | namespace { |
39 | class AllocaReplaceImpl { |
40 | public: |
41 | AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter, |
42 | fir::DeallocCallBack deallocGenerator) |
43 | : allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {} |
44 | bool replace(mlir::RewriterBase &, fir::AllocaOp); |
45 | |
46 | private: |
47 | mlir::Region *findDeallocationPointsAndOwner( |
48 | fir::AllocaOp alloca, |
49 | llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints); |
50 | bool |
51 | allocDominatesDealloc(fir::AllocaOp alloca, |
52 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
53 | return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) { |
54 | return this->dominanceInfo.properlyDominates(alloca.getOperation(), |
55 | deallocPoint); |
56 | }); |
57 | } |
58 | void |
59 | genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp, |
60 | llvm::ArrayRef<mlir::Operation *> deallocationPoints, |
61 | mlir::Value replacement, mlir::Region &owningRegion); |
62 | |
63 | private: |
64 | fir::AllocaRewriterCallBack allocaRewriter; |
65 | fir::DeallocCallBack deallocGenerator; |
66 | mlir::DominanceInfo dominanceInfo; |
67 | BlockCycleDetector blockCycleDetector; |
68 | }; |
69 | } // namespace |
70 | |
71 | static bool |
72 | allocaIsInCycleImpl(mlir::Block *allocaBlock, |
73 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
74 | llvm::DenseSet<mlir::Block *> seen; |
75 | // Insert the deallocation point blocks as "seen" so that the block |
76 | // traversal will stop at them. |
77 | for (mlir::Operation *deallocPoint : deallocationPoints) |
78 | seen.insert(deallocPoint->getBlock()); |
79 | if (seen.contains(allocaBlock)) |
80 | return false; |
81 | // Traverse the block successor graph starting by the alloca block. |
82 | llvm::SmallVector<mlir::Block *> successors{allocaBlock}; |
83 | while (!successors.empty()) |
84 | for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) { |
85 | if (next == allocaBlock) |
86 | return true; |
87 | if (auto pair = seen.insert(next); pair.second) |
88 | successors.push_back(next); |
89 | } |
90 | // The traversal did not reach the alloca block again. |
91 | return false; |
92 | } |
93 | bool BlockCycleDetector::allocaIsInCycle( |
94 | fir::AllocaOp alloca, |
95 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
96 | mlir::Block *allocaBlock = alloca->getBlock(); |
97 | auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false); |
98 | bool alreadyAnalyzed = !analyzedPair.second; |
99 | bool &isInCycle = analyzedPair.first->second; |
100 | // Fast exit if block was already analyzed and no cycle was found. |
101 | if (alreadyAnalyzed && !isInCycle) |
102 | return false; |
103 | // If the analysis was not done generically for this block, run it and |
104 | // save the result. |
105 | if (!alreadyAnalyzed) |
106 | isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {}); |
107 | if (!isInCycle) |
108 | return false; |
109 | // If the generic analysis found a block loop, see if the deallocation |
110 | // point would be reached before reaching the block again. Do not |
111 | // cache that analysis that is specific to the deallocation points |
112 | // found for this alloca. |
113 | return allocaIsInCycleImpl(allocaBlock, deallocationPoints); |
114 | } |
115 | |
116 | static bool terminatorYieldsMemory(mlir::Operation &terminator) { |
117 | return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) { |
118 | return fir::conformsWithPassByRef(res.getType()); |
119 | }); |
120 | } |
121 | |
122 | static bool isRegionTerminator(mlir::Operation &terminator) { |
123 | // Using ReturnLike trait is tempting but it is not set on |
124 | // all region terminator that matters (like omp::TerminatorOp that |
125 | // has no results). |
126 | // May be true for dead code. It is not a correctness issue and dead code can |
127 | // be eliminated by running region simplification before this utility is |
128 | // used. |
129 | // May also be true for unreachable like terminators (e.g., after an abort |
130 | // call related to Fortran STOP). This is also OK, the inserted deallocation |
131 | // will simply never be reached. It is easier for the rest of the code here |
132 | // to assume there is always at least one deallocation point, so keep |
133 | // unreachable terminators. |
134 | return !terminator.hasSuccessors(); |
135 | } |
136 | |
137 | mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner( |
138 | fir::AllocaOp alloca, |
139 | llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) { |
140 | // Step 1: Identify the operation and region owning the alloca. |
141 | mlir::Region *owningRegion = alloca.getOwnerRegion(); |
142 | if (!owningRegion) |
143 | return nullptr; |
144 | mlir::Operation *owningOp = owningRegion->getParentOp(); |
145 | assert(owningOp && "region expected to be owned" ); |
146 | // Step 2: Identify the exit points of the owning region, they are the default |
147 | // deallocation points. TODO: detect and use lifetime markers to get earlier |
148 | // deallocation points. |
149 | bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp); |
150 | for (mlir::Block &block : owningRegion->getBlocks()) |
151 | if (mlir::Operation *terminator = block.getTerminator(); |
152 | isRegionTerminator(*terminator)) { |
153 | // FIXME: OpenACC and OpenMP privatization recipe are stand alone |
154 | // operation meant to be later "inlined", the value they return may |
155 | // be the address of a local alloca. It would be incorrect to insert |
156 | // deallocation before the terminator (this would introduce use after |
157 | // free once the recipe is inlined. |
158 | // This probably require redesign or special handling on the OpenACC/MP |
159 | // side. |
160 | if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator)) |
161 | return nullptr; |
162 | deallocationPoints.push_back(terminator); |
163 | } |
164 | // If no block terminators without successors have been found, this is |
165 | // an odd region we cannot reason about (never seen yet in FIR and |
166 | // mainstream dialects, but MLIR does not really prevent it). |
167 | if (deallocationPoints.empty()) |
168 | return nullptr; |
169 | |
170 | // Step 3: detect block based loops between the allocation and deallocation |
171 | // points, and add a deallocation point on the back edge to avoid memory |
172 | // leaks. |
173 | // The detection avoids doing region CFG analysis by assuming that there may |
174 | // be cycles if deallocation points are not dominated by the alloca. |
175 | // This leaves the cases where the deallocation points are in the same region |
176 | // as the alloca (or nested inside it). In which cases there may be a back |
177 | // edge between the alloca and the deallocation point via block successors. An |
178 | // analysis is run to detect those cases. |
179 | // When a loop is detected, the easiest solution to deallocate on the back |
180 | // edge is to store the allocated memory address in a variable (that dominates |
181 | // the loops) and to deallocate the address in that variable if it is set |
182 | // before executing the allocation. This strategy still leads to correct |
183 | // execution in the "false positive" cases. |
184 | // Hence, the alloca is added as a deallocation point when there is no |
185 | // dominance. Note that bringing lifetime markers above will reduce the |
186 | // false positives. |
187 | if (!allocDominatesDealloc(alloca, deallocationPoints) || |
188 | blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints)) |
189 | deallocationPoints.push_back(alloca.getOperation()); |
190 | return owningRegion; |
191 | } |
192 | |
193 | void AllocaReplaceImpl::genIndirectDeallocation( |
194 | mlir::RewriterBase &rewriter, fir::AllocaOp alloca, |
195 | llvm::ArrayRef<mlir::Operation *> deallocationPoints, |
196 | mlir::Value replacement, mlir::Region &owningRegion) { |
197 | mlir::Location loc = alloca.getLoc(); |
198 | auto replacementInsertPoint = rewriter.saveInsertionPoint(); |
199 | // Create C pointer variable in the entry block to store the alloc |
200 | // and access it indirectly in the entry points that do not dominate. |
201 | rewriter.setInsertionPointToStart(&owningRegion.front()); |
202 | mlir::Type heapType = fir::HeapType::get(alloca.getInType()); |
203 | mlir::Value ptrVar = rewriter.create<fir::AllocaOp>(loc, heapType); |
204 | mlir::Value nullPtr = rewriter.create<fir::ZeroOp>(loc, heapType); |
205 | rewriter.create<fir::StoreOp>(loc, nullPtr, ptrVar); |
206 | // TODO: introducing a pointer compare op in FIR would help |
207 | // generating less IR here. |
208 | mlir::Type intPtrTy = fir::getIntPtrType(rewriter); |
209 | mlir::Value c0 = rewriter.create<mlir::arith::ConstantOp>( |
210 | loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0)); |
211 | |
212 | // Store new storage address right after its creation. |
213 | rewriter.restoreInsertionPoint(replacementInsertPoint); |
214 | mlir::Value castReplacement = |
215 | fir::factory::createConvert(rewriter, loc, heapType, replacement); |
216 | rewriter.create<fir::StoreOp>(loc, castReplacement, ptrVar); |
217 | |
218 | // Generate conditional deallocation at every deallocation point. |
219 | auto genConditionalDealloc = [&](mlir::Location loc) { |
220 | mlir::Value ptrVal = rewriter.create<fir::LoadOp>(loc, ptrVar); |
221 | mlir::Value ptrToInt = |
222 | rewriter.create<fir::ConvertOp>(loc, intPtrTy, ptrVal); |
223 | mlir::Value isAllocated = rewriter.create<mlir::arith::CmpIOp>( |
224 | loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0); |
225 | auto ifOp = rewriter.create<fir::IfOp>(loc, std::nullopt, isAllocated, |
226 | /*withElseRegion=*/false); |
227 | rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
228 | mlir::Value cast = fir::factory::createConvert( |
229 | rewriter, loc, replacement.getType(), ptrVal); |
230 | deallocGenerator(loc, rewriter, cast); |
231 | // Currently there is no need to reset the pointer var because two |
232 | // deallocation points can never be reached without going through the |
233 | // alloca. |
234 | rewriter.setInsertionPointAfter(ifOp); |
235 | }; |
236 | for (mlir::Operation *deallocPoint : deallocationPoints) { |
237 | rewriter.setInsertionPoint(deallocPoint); |
238 | genConditionalDealloc(deallocPoint->getLoc()); |
239 | } |
240 | } |
241 | |
242 | bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter, |
243 | fir::AllocaOp alloca) { |
244 | llvm::SmallVector<mlir::Operation *> deallocationPoints; |
245 | mlir::Region *owningRegion = |
246 | findDeallocationPointsAndOwner(alloca, deallocationPoints); |
247 | if (!owningRegion) |
248 | return false; |
249 | rewriter.setInsertionPointAfter(alloca.getOperation()); |
250 | bool deallocPointsDominateAlloc = |
251 | allocDominatesDealloc(alloca, deallocationPoints); |
252 | if (mlir::Value replacement = |
253 | allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) { |
254 | mlir::Value castReplacement = fir::factory::createConvert( |
255 | rewriter, alloca.getLoc(), alloca.getType(), replacement); |
256 | if (deallocPointsDominateAlloc) |
257 | for (mlir::Operation *deallocPoint : deallocationPoints) { |
258 | rewriter.setInsertionPoint(deallocPoint); |
259 | deallocGenerator(deallocPoint->getLoc(), rewriter, replacement); |
260 | } |
261 | else |
262 | genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement, |
263 | *owningRegion); |
264 | rewriter.replaceOp(alloca, castReplacement); |
265 | } |
266 | return true; |
267 | } |
268 | |
269 | bool fir::replaceAllocas(mlir::RewriterBase &rewriter, |
270 | mlir::Operation *parentOp, |
271 | MustRewriteCallBack mustReplace, |
272 | AllocaRewriterCallBack allocaRewriter, |
273 | DeallocCallBack deallocGenerator) { |
274 | // If the parent operation is not an alloca owner, the code below would risk |
275 | // modifying IR outside of parentOp. |
276 | if (!fir::AllocaOp::ownsNestedAlloca(parentOp)) |
277 | return false; |
278 | auto insertPoint = rewriter.saveInsertionPoint(); |
279 | bool replacedAllRequestedAlloca = true; |
280 | AllocaReplaceImpl impl(allocaRewriter, deallocGenerator); |
281 | parentOp->walk([&](fir::AllocaOp alloca) { |
282 | if (mustReplace(alloca)) |
283 | replacedAllRequestedAlloca &= impl.replace(rewriter, alloca); |
284 | }); |
285 | rewriter.restoreInsertionPoint(insertPoint); |
286 | return replacedAllRequestedAlloca; |
287 | } |
288 | |