1 | //===-- LowerRepackArrays.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This pass expands fir.pack_array and fir.unpack_array operations |
10 | /// into sequences of other FIR operations and Fortran runtime calls. |
11 | /// This pass is using structured control flow FIR operations such |
12 | /// as fir.if, so its placement in the pipeline should guarantee |
13 | /// further lowering of these operations. |
14 | /// |
15 | /// A fir.pack_array operation is converted into a sequence of checks |
16 | /// identifying whether an array needs to be copied into a contiguous |
17 | /// temporary. When the checks pass, a new memory allocation is done |
18 | /// for the temporary array (in either stack or heap memory). |
19 | /// If `fir.pack_array` does not have no_copy attribute, then |
20 | /// the original array is shallow-copied into the temporary. |
21 | /// |
22 | /// A fir.unpack_array operations is converted into a check |
23 | /// of whether the original and the temporary arrays are different |
24 | /// memory. When the check passes, the temporary array might be |
25 | /// shallow-copied into the original array, and then the temporary |
26 | /// array is deallocated (if it was allocated in stack memory, |
27 | /// then there is no explicit deallocation). |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #include "flang/Optimizer/CodeGen/CodeGen.h" |
31 | |
32 | #include "flang/Optimizer/Builder/Character.h" |
33 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
34 | #include "flang/Optimizer/Builder/MutableBox.h" |
35 | #include "flang/Optimizer/Builder/Runtime/Allocatable.h" |
36 | #include "flang/Optimizer/Builder/Runtime/Transformational.h" |
37 | #include "flang/Optimizer/Builder/Todo.h" |
38 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
39 | #include "flang/Optimizer/Dialect/FIROps.h" |
40 | #include "flang/Optimizer/Dialect/FIRType.h" |
41 | #include "flang/Optimizer/OpenACC/RegisterOpenACCExtensions.h" |
42 | #include "flang/Optimizer/OpenMP/Support/RegisterOpenMPExtensions.h" |
43 | #include "mlir/Pass/Pass.h" |
44 | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
45 | |
46 | namespace fir { |
47 | #define GEN_PASS_DEF_LOWERREPACKARRAYSPASS |
48 | #include "flang/Optimizer/CodeGen/CGPasses.h.inc" |
49 | } // namespace fir |
50 | |
51 | #define DEBUG_TYPE "lower-repack-arrays" |
52 | |
53 | namespace { |
54 | class PackArrayConversion : public mlir::OpRewritePattern<fir::PackArrayOp> { |
55 | public: |
56 | using OpRewritePattern::OpRewritePattern; |
57 | |
58 | mlir::LogicalResult |
59 | matchAndRewrite(fir::PackArrayOp op, |
60 | mlir::PatternRewriter &rewriter) const override; |
61 | |
62 | private: |
63 | static constexpr llvm::StringRef bufferName = ".repacked" ; |
64 | |
65 | // Return value of fir::BaseBoxType that represents a temporary |
66 | // array created for the original box with given extents and |
67 | // type parameters. The new box has the default lower bounds. |
68 | // If useStack is true, then the temporary will be allocated |
69 | // in stack memory (when possible). |
70 | static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder, |
71 | mlir::Location loc, bool useStack, |
72 | mlir::Value origBox, |
73 | llvm::ArrayRef<mlir::Value> extents, |
74 | llvm::ArrayRef<mlir::Value> typeParams); |
75 | |
76 | // Generate value of fir::BaseBoxType that represents the result |
77 | // of the given fir.pack_array operation. The original box |
78 | // is assumed to be present (though, it may represent an empty array). |
79 | static mlir::FailureOr<mlir::Value> genRepackedBox(fir::FirOpBuilder &builder, |
80 | mlir::Location loc, |
81 | fir::PackArrayOp packOp); |
82 | }; |
83 | |
84 | class UnpackArrayConversion |
85 | : public mlir::OpRewritePattern<fir::UnpackArrayOp> { |
86 | public: |
87 | using OpRewritePattern::OpRewritePattern; |
88 | |
89 | mlir::LogicalResult |
90 | matchAndRewrite(fir::UnpackArrayOp op, |
91 | mlir::PatternRewriter &rewriter) const override; |
92 | }; |
93 | } // anonymous namespace |
94 | |
95 | // Return true iff for the given original boxed array we can |
96 | // allocate temporary memory in stack memory. |
97 | // This function is used to synchronize allocation/deallocation |
98 | // implied by fir.pack_array and fir.unpack_array, because |
99 | // the presence of the stack attribute does not automatically |
100 | // mean that the allocation is actually done in stack memory. |
101 | // For example, we always do the heap allocation for polymorphic |
102 | // types using Fortran runtime. |
103 | // Adding the polymorpic mold to fir.alloca and then using |
104 | // Fortran runtime to compute the allocation size could probably |
105 | // resolve this limitation. |
106 | static bool canAllocateTempOnStack(mlir::Value box) { |
107 | return !fir::isPolymorphicType(box.getType()); |
108 | } |
109 | |
110 | /// Return true if array repacking is safe either statically |
111 | /// (there are no 'is_safe' attributes) or dynamically |
112 | /// (neither of the 'is_safe' attributes claims 'isDynamicallySafe() == false'). |
113 | /// \p op is either fir.pack_array or fir.unpack_array. |
114 | template <typename OP> |
115 | static bool repackIsSafe(OP op) { |
116 | bool isSafe = true; |
117 | if (auto isSafeAttrs = op.getIsSafe()) { |
118 | // We currently support only the attributes for which |
119 | // isDynamicallySafe() returns false. |
120 | for (auto attr : *isSafeAttrs) { |
121 | auto iface = mlir::cast<fir::SafeTempArrayCopyAttrInterface>(attr); |
122 | if (iface.isDynamicallySafe()) |
123 | TODO(op.getLoc(), "dynamically safe array repacking" ); |
124 | else |
125 | isSafe = false; |
126 | } |
127 | } |
128 | return isSafe; |
129 | } |
130 | |
131 | mlir::LogicalResult |
132 | PackArrayConversion::matchAndRewrite(fir::PackArrayOp op, |
133 | mlir::PatternRewriter &rewriter) const { |
134 | mlir::Value box = op.getArray(); |
135 | // If repacking is not safe, then just use the original box. |
136 | if (!repackIsSafe(op)) { |
137 | rewriter.replaceOp(op, box); |
138 | return mlir::success(); |
139 | } |
140 | |
141 | mlir::Location loc = op.getLoc(); |
142 | fir::FirOpBuilder builder(rewriter, op.getOperation()); |
143 | if (op.getMaxSize() || op.getMaxElementSize() || op.getMinStride()) |
144 | TODO(loc, "fir.pack_array with constraints" ); |
145 | if (op.getHeuristics() != fir::PackArrayHeuristics::None) |
146 | TODO(loc, "fir.pack_array with heuristics" ); |
147 | |
148 | auto boxType = mlir::cast<fir::BaseBoxType>(box.getType()); |
149 | |
150 | // For now we have to always check if the box is present. |
151 | auto isPresent = |
152 | builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), box); |
153 | |
154 | fir::IfOp ifOp = builder.create<fir::IfOp>(loc, boxType, isPresent, |
155 | /*withElseRegion=*/true); |
156 | builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
157 | // The box is present. |
158 | auto newBox = genRepackedBox(builder, loc, op); |
159 | if (mlir::failed(newBox)) |
160 | return newBox; |
161 | builder.create<fir::ResultOp>(loc, *newBox); |
162 | |
163 | // The box is not present. Return original box. |
164 | builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
165 | builder.create<fir::ResultOp>(loc, box); |
166 | |
167 | rewriter.replaceOp(op, ifOp.getResult(0)); |
168 | return mlir::success(); |
169 | } |
170 | |
171 | mlir::Value PackArrayConversion::allocateTempBuffer( |
172 | fir::FirOpBuilder &builder, mlir::Location loc, bool useStack, |
173 | mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents, |
174 | llvm::ArrayRef<mlir::Value> typeParams) { |
175 | auto tempType = mlir::cast<fir::SequenceType>( |
176 | fir::extractSequenceType(origBox.getType())); |
177 | assert(tempType.getDimension() == extents.size() && |
178 | "number of extents does not match the rank" ); |
179 | |
180 | mlir::Value shape = builder.genShape(loc, extents); |
181 | auto [base, isHeapAllocation] = builder.createArrayTemp( |
182 | loc, tempType, shape, extents, typeParams, |
183 | fir::FirOpBuilder::genTempDeclareOp, |
184 | fir::isPolymorphicType(origBox.getType()) ? origBox : nullptr, useStack, |
185 | bufferName); |
186 | // Make sure canAllocateTempOnStack() can recognize when |
187 | // the temporary is actually allocated on the stack |
188 | // by createArrayTemp(). Otherwise, we may miss dynamic |
189 | // deallocation when lowering fir.unpack_array. |
190 | if (useStack && canAllocateTempOnStack(origBox)) |
191 | assert(!isHeapAllocation && "temp must have been allocated on the stack" ); |
192 | |
193 | mlir::Type ptrType = base.getType(); |
194 | if (llvm::isa<fir::BaseBoxType>(ptrType)) |
195 | return base; |
196 | |
197 | mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType) |
198 | ? ptrType |
199 | : fir::unwrapRefType(ptrType)); |
200 | mlir::Value newBox = |
201 | builder.createBox(loc, tempBoxType, base, shape, /*slice=*/nullptr, |
202 | typeParams, /*tdesc=*/nullptr); |
203 | return newBox; |
204 | } |
205 | |
206 | mlir::FailureOr<mlir::Value> |
207 | PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder, |
208 | mlir::Location loc, fir::PackArrayOp op) { |
209 | mlir::OpBuilder::InsertionGuard guard(builder); |
210 | mlir::Value box = op.getArray(); |
211 | |
212 | llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(), |
213 | op.getTypeparams().end()); |
214 | auto boxType = mlir::cast<fir::BaseBoxType>(box.getType()); |
215 | mlir::Type indexType = builder.getIndexType(); |
216 | |
217 | // If type parameters are not specified by fir.pack_array, |
218 | // figure out how many of them we need to read from the box. |
219 | unsigned numTypeParams = 0; |
220 | if (typeParams.size() == 0) { |
221 | if (auto recordType = |
222 | mlir::dyn_cast<fir::RecordType>(boxType.unwrapInnerType())) |
223 | if (recordType.getNumLenParams() != 0) |
224 | TODO(loc, |
225 | "allocating temporary for a parameterized derived type array" ); |
226 | |
227 | if (auto charType = |
228 | mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType())) { |
229 | if (charType.hasDynamicLen()) { |
230 | // Read one length parameter from the box. |
231 | numTypeParams = 1; |
232 | } else { |
233 | // Place the constant length into typeParams. |
234 | mlir::Value length = |
235 | builder.createIntegerConstant(loc, indexType, charType.getLen()); |
236 | typeParams.push_back(length); |
237 | } |
238 | } |
239 | } |
240 | |
241 | // Create a temporay iff the original is not contigous and is not empty. |
242 | auto isNotContiguous = builder.genNot( |
243 | loc, builder.create<fir::IsContiguousBoxOp>(loc, box, op.getInnermost())); |
244 | auto dataAddr = |
245 | builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(boxType), box); |
246 | auto isNotEmpty = |
247 | builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), dataAddr); |
248 | auto doPack = |
249 | builder.create<mlir::arith::AndIOp>(loc, isNotContiguous, isNotEmpty); |
250 | |
251 | fir::IfOp ifOp = |
252 | builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true); |
253 | |
254 | // Return original box. |
255 | builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
256 | builder.create<fir::ResultOp>(loc, box); |
257 | |
258 | // Create a new box. |
259 | builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
260 | |
261 | // Get lower bounds and extents from the box. |
262 | llvm::SmallVector<mlir::Value, Fortran::common::maxRank> lbounds, extents; |
263 | fir::factory::genDimInfoFromBox(builder, loc, box, &lbounds, &extents, |
264 | /*strides=*/nullptr); |
265 | // Get the type parameters from the box, if needed. |
266 | llvm::SmallVector<mlir::Value> assumedTypeParams; |
267 | if (numTypeParams != 0) { |
268 | if (auto charType = |
269 | mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType())) |
270 | if (charType.hasDynamicLen()) { |
271 | fir::factory::CharacterExprHelper charHelper(builder, loc); |
272 | mlir::Value len = charHelper.readLengthFromBox(box, charType); |
273 | typeParams.push_back(builder.createConvert(loc, indexType, len)); |
274 | } |
275 | |
276 | if (numTypeParams != typeParams.size()) |
277 | return emitError(loc) << "failed to compute the type parameters for " |
278 | << op.getOperation() << '\n'; |
279 | } |
280 | |
281 | mlir::Value tempBox = |
282 | allocateTempBuffer(builder, loc, op.getStack(), box, extents, typeParams); |
283 | if (!op.getNoCopy()) |
284 | fir::runtime::genShallowCopy(builder, loc, tempBox, box, |
285 | /*resultIsAllocated=*/true); |
286 | |
287 | // Set lower bounds after the original box. |
288 | mlir::Value shift = builder.genShift(loc, lbounds); |
289 | tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shift, |
290 | /*slice=*/nullptr); |
291 | builder.create<fir::ResultOp>(loc, tempBox); |
292 | |
293 | return ifOp.getResult(0); |
294 | } |
295 | |
296 | mlir::LogicalResult |
297 | UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op, |
298 | mlir::PatternRewriter &rewriter) const { |
299 | // If repacking is not safe, then just remove the operation. |
300 | if (!repackIsSafe(op)) { |
301 | rewriter.eraseOp(op); |
302 | return mlir::success(); |
303 | } |
304 | |
305 | mlir::Location loc = op.getLoc(); |
306 | fir::FirOpBuilder builder(rewriter, op.getOperation()); |
307 | mlir::Type predicateType = builder.getI1Type(); |
308 | mlir::Value tempBox = op.getTemp(); |
309 | mlir::Value originalBox = op.getOriginal(); |
310 | |
311 | // For now we have to always check if the box is present. |
312 | auto isPresent = |
313 | builder.create<fir::IsPresentOp>(loc, predicateType, originalBox); |
314 | |
315 | builder.genIfThen(loc, isPresent).genThen([&]() { |
316 | mlir::Type addrType = |
317 | fir::HeapType::get(fir::extractSequenceType(tempBox.getType())); |
318 | mlir::Value tempAddr = |
319 | builder.create<fir::BoxAddrOp>(loc, addrType, tempBox); |
320 | mlir::Value originalAddr = |
321 | builder.create<fir::BoxAddrOp>(loc, addrType, originalBox); |
322 | |
323 | auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne, |
324 | tempAddr, originalAddr); |
325 | builder.genIfThen(loc, isNotSame).genThen([&]() {}); |
326 | // Copy from temporary to the original. |
327 | if (!op.getNoCopy()) |
328 | fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox, |
329 | /*resultIsAllocated=*/true); |
330 | |
331 | // Deallocate, if it was allocated in heap. |
332 | // Note that the stack attribute does not always mean |
333 | // that the allocation was actually done in stack memory. |
334 | // There are currently cases where we delegate the allocation |
335 | // to the runtime that uses heap memory, even when the stack |
336 | // attribute is set on fir.pack_array. |
337 | if (!op.getStack() || !canAllocateTempOnStack(originalBox)) |
338 | builder.create<fir::FreeMemOp>(loc, tempAddr); |
339 | }); |
340 | rewriter.eraseOp(op); |
341 | return mlir::success(); |
342 | } |
343 | |
344 | namespace { |
345 | class LowerRepackArraysPass |
346 | : public fir::impl::LowerRepackArraysPassBase<LowerRepackArraysPass> { |
347 | public: |
348 | using LowerRepackArraysPassBase< |
349 | LowerRepackArraysPass>::LowerRepackArraysPassBase; |
350 | |
351 | void runOnOperation() override final { |
352 | auto *context = &getContext(); |
353 | mlir::ModuleOp module = getOperation(); |
354 | mlir::RewritePatternSet patterns(context); |
355 | patterns.insert<PackArrayConversion>(context); |
356 | patterns.insert<UnpackArrayConversion>(context); |
357 | mlir::GreedyRewriteConfig config; |
358 | config.setRegionSimplificationLevel( |
359 | mlir::GreedySimplifyRegionLevel::Disabled); |
360 | (void)applyPatternsGreedily(module, std::move(patterns), config); |
361 | } |
362 | |
363 | void getDependentDialects(mlir::DialectRegistry ®istry) const override { |
364 | fir::acc::registerTransformationalAttrsDependentDialects(registry); |
365 | fir::omp::registerTransformationalAttrsDependentDialects(registry); |
366 | } |
367 | }; |
368 | |
369 | } // anonymous namespace |
370 | |