| 1 | //===- MemoryUtils.cpp ----------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang/Optimizer/Transforms/MemoryUtils.h" |
| 10 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
| 11 | #include "flang/Optimizer/Builder/Todo.h" |
| 12 | #include "mlir/Dialect/OpenACC/OpenACC.h" |
| 13 | #include "mlir/IR/Dominance.h" |
| 14 | #include "llvm/ADT/STLExtras.h" |
| 15 | |
| 16 | namespace { |
| 17 | /// Helper class to detect if an alloca is inside an mlir::Block that can be |
| 18 | /// reached again before its deallocation points via block successors. This |
| 19 | /// analysis is only valid if the deallocation points are inside (or nested |
| 20 | /// inside) the same region as alloca because it does not consider region CFG |
| 21 | /// (for instance, the block inside a fir.do_loop is obviously inside a loop, |
| 22 | /// but is not a loop formed by blocks). The dominance of the alloca on its |
| 23 | /// deallocation points implies this pre-condition (although it is more |
| 24 | /// restrictive). |
| 25 | class BlockCycleDetector { |
| 26 | public: |
| 27 | bool allocaIsInCycle(fir::AllocaOp alloca, |
| 28 | llvm::ArrayRef<mlir::Operation *> deallocationPoints); |
| 29 | |
| 30 | private: |
| 31 | // Cache for blocks owning alloca that have been analyzed. In many Fortran |
| 32 | // programs, allocas are usually made in the same blocks with no block cycles. |
| 33 | // So getting a fast "no" is beneficial. |
| 34 | llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed; |
| 35 | }; |
| 36 | } // namespace |
| 37 | |
| 38 | namespace { |
| 39 | class AllocaReplaceImpl { |
| 40 | public: |
| 41 | AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter, |
| 42 | fir::DeallocCallBack deallocGenerator) |
| 43 | : allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {} |
| 44 | bool replace(mlir::RewriterBase &, fir::AllocaOp); |
| 45 | |
| 46 | private: |
| 47 | mlir::Region *findDeallocationPointsAndOwner( |
| 48 | fir::AllocaOp alloca, |
| 49 | llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints); |
| 50 | bool |
| 51 | allocDominatesDealloc(fir::AllocaOp alloca, |
| 52 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
| 53 | return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) { |
| 54 | return this->dominanceInfo.properlyDominates(alloca.getOperation(), |
| 55 | deallocPoint); |
| 56 | }); |
| 57 | } |
| 58 | void |
| 59 | genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp, |
| 60 | llvm::ArrayRef<mlir::Operation *> deallocationPoints, |
| 61 | mlir::Value replacement, mlir::Region &owningRegion); |
| 62 | |
| 63 | private: |
| 64 | fir::AllocaRewriterCallBack allocaRewriter; |
| 65 | fir::DeallocCallBack deallocGenerator; |
| 66 | mlir::DominanceInfo dominanceInfo; |
| 67 | BlockCycleDetector blockCycleDetector; |
| 68 | }; |
| 69 | } // namespace |
| 70 | |
| 71 | static bool |
| 72 | allocaIsInCycleImpl(mlir::Block *allocaBlock, |
| 73 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
| 74 | llvm::DenseSet<mlir::Block *> seen; |
| 75 | // Insert the deallocation point blocks as "seen" so that the block |
| 76 | // traversal will stop at them. |
| 77 | for (mlir::Operation *deallocPoint : deallocationPoints) |
| 78 | seen.insert(deallocPoint->getBlock()); |
| 79 | if (seen.contains(allocaBlock)) |
| 80 | return false; |
| 81 | // Traverse the block successor graph starting by the alloca block. |
| 82 | llvm::SmallVector<mlir::Block *> successors{allocaBlock}; |
| 83 | while (!successors.empty()) |
| 84 | for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) { |
| 85 | if (next == allocaBlock) |
| 86 | return true; |
| 87 | if (auto pair = seen.insert(next); pair.second) |
| 88 | successors.push_back(next); |
| 89 | } |
| 90 | // The traversal did not reach the alloca block again. |
| 91 | return false; |
| 92 | } |
| 93 | bool BlockCycleDetector::allocaIsInCycle( |
| 94 | fir::AllocaOp alloca, |
| 95 | llvm::ArrayRef<mlir::Operation *> deallocationPoints) { |
| 96 | mlir::Block *allocaBlock = alloca->getBlock(); |
| 97 | auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false); |
| 98 | bool alreadyAnalyzed = !analyzedPair.second; |
| 99 | bool &isInCycle = analyzedPair.first->second; |
| 100 | // Fast exit if block was already analyzed and no cycle was found. |
| 101 | if (alreadyAnalyzed && !isInCycle) |
| 102 | return false; |
| 103 | // If the analysis was not done generically for this block, run it and |
| 104 | // save the result. |
| 105 | if (!alreadyAnalyzed) |
| 106 | isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {}); |
| 107 | if (!isInCycle) |
| 108 | return false; |
| 109 | // If the generic analysis found a block loop, see if the deallocation |
| 110 | // point would be reached before reaching the block again. Do not |
| 111 | // cache that analysis that is specific to the deallocation points |
| 112 | // found for this alloca. |
| 113 | return allocaIsInCycleImpl(allocaBlock, deallocationPoints); |
| 114 | } |
| 115 | |
| 116 | static bool terminatorYieldsMemory(mlir::Operation &terminator) { |
| 117 | return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) { |
| 118 | return fir::conformsWithPassByRef(res.getType()); |
| 119 | }); |
| 120 | } |
| 121 | |
| 122 | static bool isRegionTerminator(mlir::Operation &terminator) { |
| 123 | // Using ReturnLike trait is tempting but it is not set on |
| 124 | // all region terminator that matters (like omp::TerminatorOp that |
| 125 | // has no results). |
| 126 | // May be true for dead code. It is not a correctness issue and dead code can |
| 127 | // be eliminated by running region simplification before this utility is |
| 128 | // used. |
| 129 | // May also be true for unreachable like terminators (e.g., after an abort |
| 130 | // call related to Fortran STOP). This is also OK, the inserted deallocation |
| 131 | // will simply never be reached. It is easier for the rest of the code here |
| 132 | // to assume there is always at least one deallocation point, so keep |
| 133 | // unreachable terminators. |
| 134 | return !terminator.hasSuccessors(); |
| 135 | } |
| 136 | |
| 137 | mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner( |
| 138 | fir::AllocaOp alloca, |
| 139 | llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) { |
| 140 | // Step 1: Identify the operation and region owning the alloca. |
| 141 | mlir::Region *owningRegion = alloca.getOwnerRegion(); |
| 142 | if (!owningRegion) |
| 143 | return nullptr; |
| 144 | mlir::Operation *owningOp = owningRegion->getParentOp(); |
| 145 | assert(owningOp && "region expected to be owned" ); |
| 146 | // Step 2: Identify the exit points of the owning region, they are the default |
| 147 | // deallocation points. TODO: detect and use lifetime markers to get earlier |
| 148 | // deallocation points. |
| 149 | bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp); |
| 150 | for (mlir::Block &block : owningRegion->getBlocks()) |
| 151 | if (mlir::Operation *terminator = block.getTerminator(); |
| 152 | isRegionTerminator(*terminator)) { |
| 153 | // FIXME: OpenACC and OpenMP privatization recipe are stand alone |
| 154 | // operation meant to be later "inlined", the value they return may |
| 155 | // be the address of a local alloca. It would be incorrect to insert |
| 156 | // deallocation before the terminator (this would introduce use after |
| 157 | // free once the recipe is inlined. |
| 158 | // This probably require redesign or special handling on the OpenACC/MP |
| 159 | // side. |
| 160 | if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator)) |
| 161 | return nullptr; |
| 162 | deallocationPoints.push_back(terminator); |
| 163 | } |
| 164 | // If no block terminators without successors have been found, this is |
| 165 | // an odd region we cannot reason about (never seen yet in FIR and |
| 166 | // mainstream dialects, but MLIR does not really prevent it). |
| 167 | if (deallocationPoints.empty()) |
| 168 | return nullptr; |
| 169 | |
| 170 | // Step 3: detect block based loops between the allocation and deallocation |
| 171 | // points, and add a deallocation point on the back edge to avoid memory |
| 172 | // leaks. |
| 173 | // The detection avoids doing region CFG analysis by assuming that there may |
| 174 | // be cycles if deallocation points are not dominated by the alloca. |
| 175 | // This leaves the cases where the deallocation points are in the same region |
| 176 | // as the alloca (or nested inside it). In which cases there may be a back |
| 177 | // edge between the alloca and the deallocation point via block successors. An |
| 178 | // analysis is run to detect those cases. |
| 179 | // When a loop is detected, the easiest solution to deallocate on the back |
| 180 | // edge is to store the allocated memory address in a variable (that dominates |
| 181 | // the loops) and to deallocate the address in that variable if it is set |
| 182 | // before executing the allocation. This strategy still leads to correct |
| 183 | // execution in the "false positive" cases. |
| 184 | // Hence, the alloca is added as a deallocation point when there is no |
| 185 | // dominance. Note that bringing lifetime markers above will reduce the |
| 186 | // false positives. |
| 187 | if (!allocDominatesDealloc(alloca, deallocationPoints) || |
| 188 | blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints)) |
| 189 | deallocationPoints.push_back(alloca.getOperation()); |
| 190 | return owningRegion; |
| 191 | } |
| 192 | |
| 193 | void AllocaReplaceImpl::genIndirectDeallocation( |
| 194 | mlir::RewriterBase &rewriter, fir::AllocaOp alloca, |
| 195 | llvm::ArrayRef<mlir::Operation *> deallocationPoints, |
| 196 | mlir::Value replacement, mlir::Region &owningRegion) { |
| 197 | mlir::Location loc = alloca.getLoc(); |
| 198 | auto replacementInsertPoint = rewriter.saveInsertionPoint(); |
| 199 | // Create C pointer variable in the entry block to store the alloc |
| 200 | // and access it indirectly in the entry points that do not dominate. |
| 201 | rewriter.setInsertionPointToStart(&owningRegion.front()); |
| 202 | mlir::Type heapType = fir::HeapType::get(alloca.getInType()); |
| 203 | mlir::Value ptrVar = rewriter.create<fir::AllocaOp>(loc, heapType); |
| 204 | mlir::Value nullPtr = rewriter.create<fir::ZeroOp>(loc, heapType); |
| 205 | rewriter.create<fir::StoreOp>(loc, nullPtr, ptrVar); |
| 206 | // TODO: introducing a pointer compare op in FIR would help |
| 207 | // generating less IR here. |
| 208 | mlir::Type intPtrTy = fir::getIntPtrType(rewriter); |
| 209 | mlir::Value c0 = rewriter.create<mlir::arith::ConstantOp>( |
| 210 | loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0)); |
| 211 | |
| 212 | // Store new storage address right after its creation. |
| 213 | rewriter.restoreInsertionPoint(replacementInsertPoint); |
| 214 | mlir::Value castReplacement = |
| 215 | fir::factory::createConvert(rewriter, loc, heapType, replacement); |
| 216 | rewriter.create<fir::StoreOp>(loc, castReplacement, ptrVar); |
| 217 | |
| 218 | // Generate conditional deallocation at every deallocation point. |
| 219 | auto genConditionalDealloc = [&](mlir::Location loc) { |
| 220 | mlir::Value ptrVal = rewriter.create<fir::LoadOp>(loc, ptrVar); |
| 221 | mlir::Value ptrToInt = |
| 222 | rewriter.create<fir::ConvertOp>(loc, intPtrTy, ptrVal); |
| 223 | mlir::Value isAllocated = rewriter.create<mlir::arith::CmpIOp>( |
| 224 | loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0); |
| 225 | auto ifOp = rewriter.create<fir::IfOp>(loc, std::nullopt, isAllocated, |
| 226 | /*withElseRegion=*/false); |
| 227 | rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| 228 | mlir::Value cast = fir::factory::createConvert( |
| 229 | rewriter, loc, replacement.getType(), ptrVal); |
| 230 | deallocGenerator(loc, rewriter, cast); |
| 231 | // Currently there is no need to reset the pointer var because two |
| 232 | // deallocation points can never be reached without going through the |
| 233 | // alloca. |
| 234 | rewriter.setInsertionPointAfter(ifOp); |
| 235 | }; |
| 236 | for (mlir::Operation *deallocPoint : deallocationPoints) { |
| 237 | rewriter.setInsertionPoint(deallocPoint); |
| 238 | genConditionalDealloc(deallocPoint->getLoc()); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter, |
| 243 | fir::AllocaOp alloca) { |
| 244 | llvm::SmallVector<mlir::Operation *> deallocationPoints; |
| 245 | mlir::Region *owningRegion = |
| 246 | findDeallocationPointsAndOwner(alloca, deallocationPoints); |
| 247 | if (!owningRegion) |
| 248 | return false; |
| 249 | rewriter.setInsertionPointAfter(alloca.getOperation()); |
| 250 | bool deallocPointsDominateAlloc = |
| 251 | allocDominatesDealloc(alloca, deallocationPoints); |
| 252 | if (mlir::Value replacement = |
| 253 | allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) { |
| 254 | mlir::Value castReplacement = fir::factory::createConvert( |
| 255 | rewriter, alloca.getLoc(), alloca.getType(), replacement); |
| 256 | if (deallocPointsDominateAlloc) |
| 257 | for (mlir::Operation *deallocPoint : deallocationPoints) { |
| 258 | rewriter.setInsertionPoint(deallocPoint); |
| 259 | deallocGenerator(deallocPoint->getLoc(), rewriter, replacement); |
| 260 | } |
| 261 | else |
| 262 | genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement, |
| 263 | *owningRegion); |
| 264 | rewriter.replaceOp(alloca, castReplacement); |
| 265 | } |
| 266 | return true; |
| 267 | } |
| 268 | |
| 269 | bool fir::replaceAllocas(mlir::RewriterBase &rewriter, |
| 270 | mlir::Operation *parentOp, |
| 271 | MustRewriteCallBack mustReplace, |
| 272 | AllocaRewriterCallBack allocaRewriter, |
| 273 | DeallocCallBack deallocGenerator) { |
| 274 | // If the parent operation is not an alloca owner, the code below would risk |
| 275 | // modifying IR outside of parentOp. |
| 276 | if (!fir::AllocaOp::ownsNestedAlloca(parentOp)) |
| 277 | return false; |
| 278 | auto insertPoint = rewriter.saveInsertionPoint(); |
| 279 | bool replacedAllRequestedAlloca = true; |
| 280 | AllocaReplaceImpl impl(allocaRewriter, deallocGenerator); |
| 281 | parentOp->walk([&](fir::AllocaOp alloca) { |
| 282 | if (mustReplace(alloca)) |
| 283 | replacedAllRequestedAlloca &= impl.replace(rewriter, alloca); |
| 284 | }); |
| 285 | rewriter.restoreInsertionPoint(insertPoint); |
| 286 | return replacedAllRequestedAlloca; |
| 287 | } |
| 288 | |