| 1 | //===-- LowerRepackArrays.cpp ---------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This pass expands fir.pack_array and fir.unpack_array operations |
| 10 | /// into sequences of other FIR operations and Fortran runtime calls. |
| 11 | /// This pass is using structured control flow FIR operations such |
| 12 | /// as fir.if, so its placement in the pipeline should guarantee |
| 13 | /// further lowering of these operations. |
| 14 | /// |
| 15 | /// A fir.pack_array operation is converted into a sequence of checks |
| 16 | /// identifying whether an array needs to be copied into a contiguous |
| 17 | /// temporary. When the checks pass, a new memory allocation is done |
| 18 | /// for the temporary array (in either stack or heap memory). |
| 19 | /// If `fir.pack_array` does not have no_copy attribute, then |
| 20 | /// the original array is shallow-copied into the temporary. |
| 21 | /// |
| 22 | /// A fir.unpack_array operations is converted into a check |
| 23 | /// of whether the original and the temporary arrays are different |
| 24 | /// memory. When the check passes, the temporary array might be |
| 25 | /// shallow-copied into the original array, and then the temporary |
| 26 | /// array is deallocated (if it was allocated in stack memory, |
| 27 | /// then there is no explicit deallocation). |
| 28 | //===----------------------------------------------------------------------===// |
| 29 | |
| 30 | #include "flang/Optimizer/CodeGen/CodeGen.h" |
| 31 | |
| 32 | #include "flang/Optimizer/Builder/Character.h" |
| 33 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
| 34 | #include "flang/Optimizer/Builder/MutableBox.h" |
| 35 | #include "flang/Optimizer/Builder/Runtime/Allocatable.h" |
| 36 | #include "flang/Optimizer/Builder/Runtime/Transformational.h" |
| 37 | #include "flang/Optimizer/Builder/Todo.h" |
| 38 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
| 39 | #include "flang/Optimizer/Dialect/FIROps.h" |
| 40 | #include "flang/Optimizer/Dialect/FIRType.h" |
| 41 | #include "flang/Optimizer/OpenACC/RegisterOpenACCExtensions.h" |
| 42 | #include "flang/Optimizer/OpenMP/Support/RegisterOpenMPExtensions.h" |
| 43 | #include "mlir/Pass/Pass.h" |
| 44 | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| 45 | |
| 46 | namespace fir { |
| 47 | #define GEN_PASS_DEF_LOWERREPACKARRAYSPASS |
| 48 | #include "flang/Optimizer/CodeGen/CGPasses.h.inc" |
| 49 | } // namespace fir |
| 50 | |
| 51 | #define DEBUG_TYPE "lower-repack-arrays" |
| 52 | |
| 53 | namespace { |
| 54 | class PackArrayConversion : public mlir::OpRewritePattern<fir::PackArrayOp> { |
| 55 | public: |
| 56 | using OpRewritePattern::OpRewritePattern; |
| 57 | |
| 58 | mlir::LogicalResult |
| 59 | matchAndRewrite(fir::PackArrayOp op, |
| 60 | mlir::PatternRewriter &rewriter) const override; |
| 61 | |
| 62 | private: |
| 63 | static constexpr llvm::StringRef bufferName = ".repacked" ; |
| 64 | |
| 65 | // Return value of fir::BaseBoxType that represents a temporary |
| 66 | // array created for the original box with given extents and |
| 67 | // type parameters. The new box has the default lower bounds. |
| 68 | // If useStack is true, then the temporary will be allocated |
| 69 | // in stack memory (when possible). |
| 70 | static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder, |
| 71 | mlir::Location loc, bool useStack, |
| 72 | mlir::Value origBox, |
| 73 | llvm::ArrayRef<mlir::Value> extents, |
| 74 | llvm::ArrayRef<mlir::Value> typeParams); |
| 75 | |
| 76 | // Generate value of fir::BaseBoxType that represents the result |
| 77 | // of the given fir.pack_array operation. The original box |
| 78 | // is assumed to be present (though, it may represent an empty array). |
| 79 | static mlir::FailureOr<mlir::Value> genRepackedBox(fir::FirOpBuilder &builder, |
| 80 | mlir::Location loc, |
| 81 | fir::PackArrayOp packOp); |
| 82 | }; |
| 83 | |
| 84 | class UnpackArrayConversion |
| 85 | : public mlir::OpRewritePattern<fir::UnpackArrayOp> { |
| 86 | public: |
| 87 | using OpRewritePattern::OpRewritePattern; |
| 88 | |
| 89 | mlir::LogicalResult |
| 90 | matchAndRewrite(fir::UnpackArrayOp op, |
| 91 | mlir::PatternRewriter &rewriter) const override; |
| 92 | }; |
| 93 | } // anonymous namespace |
| 94 | |
| 95 | // Return true iff for the given original boxed array we can |
| 96 | // allocate temporary memory in stack memory. |
| 97 | // This function is used to synchronize allocation/deallocation |
| 98 | // implied by fir.pack_array and fir.unpack_array, because |
| 99 | // the presence of the stack attribute does not automatically |
| 100 | // mean that the allocation is actually done in stack memory. |
| 101 | // For example, we always do the heap allocation for polymorphic |
| 102 | // types using Fortran runtime. |
| 103 | // Adding the polymorpic mold to fir.alloca and then using |
| 104 | // Fortran runtime to compute the allocation size could probably |
| 105 | // resolve this limitation. |
| 106 | static bool canAllocateTempOnStack(mlir::Value box) { |
| 107 | return !fir::isPolymorphicType(box.getType()); |
| 108 | } |
| 109 | |
| 110 | /// Return true if array repacking is safe either statically |
| 111 | /// (there are no 'is_safe' attributes) or dynamically |
| 112 | /// (neither of the 'is_safe' attributes claims 'isDynamicallySafe() == false'). |
| 113 | /// \p op is either fir.pack_array or fir.unpack_array. |
| 114 | template <typename OP> |
| 115 | static bool repackIsSafe(OP op) { |
| 116 | bool isSafe = true; |
| 117 | if (auto isSafeAttrs = op.getIsSafe()) { |
| 118 | // We currently support only the attributes for which |
| 119 | // isDynamicallySafe() returns false. |
| 120 | for (auto attr : *isSafeAttrs) { |
| 121 | auto iface = mlir::cast<fir::SafeTempArrayCopyAttrInterface>(attr); |
| 122 | if (iface.isDynamicallySafe()) |
| 123 | TODO(op.getLoc(), "dynamically safe array repacking" ); |
| 124 | else |
| 125 | isSafe = false; |
| 126 | } |
| 127 | } |
| 128 | return isSafe; |
| 129 | } |
| 130 | |
| 131 | mlir::LogicalResult |
| 132 | PackArrayConversion::matchAndRewrite(fir::PackArrayOp op, |
| 133 | mlir::PatternRewriter &rewriter) const { |
| 134 | mlir::Value box = op.getArray(); |
| 135 | // If repacking is not safe, then just use the original box. |
| 136 | if (!repackIsSafe(op)) { |
| 137 | rewriter.replaceOp(op, box); |
| 138 | return mlir::success(); |
| 139 | } |
| 140 | |
| 141 | mlir::Location loc = op.getLoc(); |
| 142 | fir::FirOpBuilder builder(rewriter, op.getOperation()); |
| 143 | if (op.getMaxSize() || op.getMaxElementSize() || op.getMinStride()) |
| 144 | TODO(loc, "fir.pack_array with constraints" ); |
| 145 | if (op.getHeuristics() != fir::PackArrayHeuristics::None) |
| 146 | TODO(loc, "fir.pack_array with heuristics" ); |
| 147 | |
| 148 | auto boxType = mlir::cast<fir::BaseBoxType>(box.getType()); |
| 149 | |
| 150 | // For now we have to always check if the box is present. |
| 151 | auto isPresent = |
| 152 | builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), box); |
| 153 | |
| 154 | fir::IfOp ifOp = builder.create<fir::IfOp>(loc, boxType, isPresent, |
| 155 | /*withElseRegion=*/true); |
| 156 | builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| 157 | // The box is present. |
| 158 | auto newBox = genRepackedBox(builder, loc, op); |
| 159 | if (mlir::failed(newBox)) |
| 160 | return newBox; |
| 161 | builder.create<fir::ResultOp>(loc, *newBox); |
| 162 | |
| 163 | // The box is not present. Return original box. |
| 164 | builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
| 165 | builder.create<fir::ResultOp>(loc, box); |
| 166 | |
| 167 | rewriter.replaceOp(op, ifOp.getResult(0)); |
| 168 | return mlir::success(); |
| 169 | } |
| 170 | |
| 171 | mlir::Value PackArrayConversion::allocateTempBuffer( |
| 172 | fir::FirOpBuilder &builder, mlir::Location loc, bool useStack, |
| 173 | mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents, |
| 174 | llvm::ArrayRef<mlir::Value> typeParams) { |
| 175 | auto tempType = mlir::cast<fir::SequenceType>( |
| 176 | fir::extractSequenceType(origBox.getType())); |
| 177 | assert(tempType.getDimension() == extents.size() && |
| 178 | "number of extents does not match the rank" ); |
| 179 | |
| 180 | mlir::Value shape = builder.genShape(loc, extents); |
| 181 | auto [base, isHeapAllocation] = builder.createArrayTemp( |
| 182 | loc, tempType, shape, extents, typeParams, |
| 183 | fir::FirOpBuilder::genTempDeclareOp, |
| 184 | fir::isPolymorphicType(origBox.getType()) ? origBox : nullptr, useStack, |
| 185 | bufferName); |
| 186 | // Make sure canAllocateTempOnStack() can recognize when |
| 187 | // the temporary is actually allocated on the stack |
| 188 | // by createArrayTemp(). Otherwise, we may miss dynamic |
| 189 | // deallocation when lowering fir.unpack_array. |
| 190 | if (useStack && canAllocateTempOnStack(origBox)) |
| 191 | assert(!isHeapAllocation && "temp must have been allocated on the stack" ); |
| 192 | |
| 193 | mlir::Type ptrType = base.getType(); |
| 194 | if (llvm::isa<fir::BaseBoxType>(ptrType)) |
| 195 | return base; |
| 196 | |
| 197 | mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType) |
| 198 | ? ptrType |
| 199 | : fir::unwrapRefType(ptrType)); |
| 200 | mlir::Value newBox = |
| 201 | builder.createBox(loc, tempBoxType, base, shape, /*slice=*/nullptr, |
| 202 | typeParams, /*tdesc=*/nullptr); |
| 203 | return newBox; |
| 204 | } |
| 205 | |
| 206 | mlir::FailureOr<mlir::Value> |
| 207 | PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder, |
| 208 | mlir::Location loc, fir::PackArrayOp op) { |
| 209 | mlir::OpBuilder::InsertionGuard guard(builder); |
| 210 | mlir::Value box = op.getArray(); |
| 211 | |
| 212 | llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(), |
| 213 | op.getTypeparams().end()); |
| 214 | auto boxType = mlir::cast<fir::BaseBoxType>(box.getType()); |
| 215 | mlir::Type indexType = builder.getIndexType(); |
| 216 | |
| 217 | // If type parameters are not specified by fir.pack_array, |
| 218 | // figure out how many of them we need to read from the box. |
| 219 | unsigned numTypeParams = 0; |
| 220 | if (typeParams.size() == 0) { |
| 221 | if (auto recordType = |
| 222 | mlir::dyn_cast<fir::RecordType>(boxType.unwrapInnerType())) |
| 223 | if (recordType.getNumLenParams() != 0) |
| 224 | TODO(loc, |
| 225 | "allocating temporary for a parameterized derived type array" ); |
| 226 | |
| 227 | if (auto charType = |
| 228 | mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType())) { |
| 229 | if (charType.hasDynamicLen()) { |
| 230 | // Read one length parameter from the box. |
| 231 | numTypeParams = 1; |
| 232 | } else { |
| 233 | // Place the constant length into typeParams. |
| 234 | mlir::Value length = |
| 235 | builder.createIntegerConstant(loc, indexType, charType.getLen()); |
| 236 | typeParams.push_back(length); |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | // Create a temporay iff the original is not contigous and is not empty. |
| 242 | auto isNotContiguous = builder.genNot( |
| 243 | loc, builder.create<fir::IsContiguousBoxOp>(loc, box, op.getInnermost())); |
| 244 | auto dataAddr = |
| 245 | builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(boxType), box); |
| 246 | auto isNotEmpty = |
| 247 | builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), dataAddr); |
| 248 | auto doPack = |
| 249 | builder.create<mlir::arith::AndIOp>(loc, isNotContiguous, isNotEmpty); |
| 250 | |
| 251 | fir::IfOp ifOp = |
| 252 | builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true); |
| 253 | |
| 254 | // Return original box. |
| 255 | builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
| 256 | builder.create<fir::ResultOp>(loc, box); |
| 257 | |
| 258 | // Create a new box. |
| 259 | builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
| 260 | |
| 261 | // Get lower bounds and extents from the box. |
| 262 | llvm::SmallVector<mlir::Value, Fortran::common::maxRank> lbounds, extents; |
| 263 | fir::factory::genDimInfoFromBox(builder, loc, box, &lbounds, &extents, |
| 264 | /*strides=*/nullptr); |
| 265 | // Get the type parameters from the box, if needed. |
| 266 | llvm::SmallVector<mlir::Value> assumedTypeParams; |
| 267 | if (numTypeParams != 0) { |
| 268 | if (auto charType = |
| 269 | mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType())) |
| 270 | if (charType.hasDynamicLen()) { |
| 271 | fir::factory::CharacterExprHelper charHelper(builder, loc); |
| 272 | mlir::Value len = charHelper.readLengthFromBox(box, charType); |
| 273 | typeParams.push_back(builder.createConvert(loc, indexType, len)); |
| 274 | } |
| 275 | |
| 276 | if (numTypeParams != typeParams.size()) |
| 277 | return emitError(loc) << "failed to compute the type parameters for " |
| 278 | << op.getOperation() << '\n'; |
| 279 | } |
| 280 | |
| 281 | mlir::Value tempBox = |
| 282 | allocateTempBuffer(builder, loc, op.getStack(), box, extents, typeParams); |
| 283 | if (!op.getNoCopy()) |
| 284 | fir::runtime::genShallowCopy(builder, loc, tempBox, box, |
| 285 | /*resultIsAllocated=*/true); |
| 286 | |
| 287 | // Set lower bounds after the original box. |
| 288 | mlir::Value shift = builder.genShift(loc, lbounds); |
| 289 | tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shift, |
| 290 | /*slice=*/nullptr); |
| 291 | builder.create<fir::ResultOp>(loc, tempBox); |
| 292 | |
| 293 | return ifOp.getResult(0); |
| 294 | } |
| 295 | |
| 296 | mlir::LogicalResult |
| 297 | UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op, |
| 298 | mlir::PatternRewriter &rewriter) const { |
| 299 | // If repacking is not safe, then just remove the operation. |
| 300 | if (!repackIsSafe(op)) { |
| 301 | rewriter.eraseOp(op); |
| 302 | return mlir::success(); |
| 303 | } |
| 304 | |
| 305 | mlir::Location loc = op.getLoc(); |
| 306 | fir::FirOpBuilder builder(rewriter, op.getOperation()); |
| 307 | mlir::Type predicateType = builder.getI1Type(); |
| 308 | mlir::Value tempBox = op.getTemp(); |
| 309 | mlir::Value originalBox = op.getOriginal(); |
| 310 | |
| 311 | // For now we have to always check if the box is present. |
| 312 | auto isPresent = |
| 313 | builder.create<fir::IsPresentOp>(loc, predicateType, originalBox); |
| 314 | |
| 315 | builder.genIfThen(loc, isPresent).genThen([&]() { |
| 316 | mlir::Type addrType = |
| 317 | fir::HeapType::get(fir::extractSequenceType(tempBox.getType())); |
| 318 | mlir::Value tempAddr = |
| 319 | builder.create<fir::BoxAddrOp>(loc, addrType, tempBox); |
| 320 | mlir::Value originalAddr = |
| 321 | builder.create<fir::BoxAddrOp>(loc, addrType, originalBox); |
| 322 | |
| 323 | auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne, |
| 324 | tempAddr, originalAddr); |
| 325 | builder.genIfThen(loc, isNotSame).genThen([&]() {}); |
| 326 | // Copy from temporary to the original. |
| 327 | if (!op.getNoCopy()) |
| 328 | fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox, |
| 329 | /*resultIsAllocated=*/true); |
| 330 | |
| 331 | // Deallocate, if it was allocated in heap. |
| 332 | // Note that the stack attribute does not always mean |
| 333 | // that the allocation was actually done in stack memory. |
| 334 | // There are currently cases where we delegate the allocation |
| 335 | // to the runtime that uses heap memory, even when the stack |
| 336 | // attribute is set on fir.pack_array. |
| 337 | if (!op.getStack() || !canAllocateTempOnStack(originalBox)) |
| 338 | builder.create<fir::FreeMemOp>(loc, tempAddr); |
| 339 | }); |
| 340 | rewriter.eraseOp(op); |
| 341 | return mlir::success(); |
| 342 | } |
| 343 | |
| 344 | namespace { |
| 345 | class LowerRepackArraysPass |
| 346 | : public fir::impl::LowerRepackArraysPassBase<LowerRepackArraysPass> { |
| 347 | public: |
| 348 | using LowerRepackArraysPassBase< |
| 349 | LowerRepackArraysPass>::LowerRepackArraysPassBase; |
| 350 | |
| 351 | void runOnOperation() override final { |
| 352 | auto *context = &getContext(); |
| 353 | mlir::ModuleOp module = getOperation(); |
| 354 | mlir::RewritePatternSet patterns(context); |
| 355 | patterns.insert<PackArrayConversion>(context); |
| 356 | patterns.insert<UnpackArrayConversion>(context); |
| 357 | mlir::GreedyRewriteConfig config; |
| 358 | config.setRegionSimplificationLevel( |
| 359 | mlir::GreedySimplifyRegionLevel::Disabled); |
| 360 | (void)applyPatternsGreedily(module, std::move(patterns), config); |
| 361 | } |
| 362 | |
| 363 | void getDependentDialects(mlir::DialectRegistry ®istry) const override { |
| 364 | fir::acc::registerTransformationalAttrsDependentDialects(registry); |
| 365 | fir::omp::registerTransformationalAttrsDependentDialects(registry); |
| 366 | } |
| 367 | }; |
| 368 | |
| 369 | } // anonymous namespace |
| 370 | |