1 | //===- LoopVersioning.cpp -------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | //===----------------------------------------------------------------------===// |
10 | /// \file |
11 | /// This pass looks for loops iterating over assumed-shape arrays, that can |
12 | /// be optimized by "guessing" that the stride is element-sized. |
13 | /// |
14 | /// This is done by creating two versions of the same loop: one which assumes |
15 | /// that the elements are contiguous (stride == size of element), and one that |
16 | /// is the original generic loop. |
17 | /// |
18 | /// As a side-effect of the assumed element size stride, the array is also |
19 | /// flattened to make it a 1D array - this is because the internal array |
20 | /// structure must be either 1D or have known sizes in all dimensions - and at |
21 | /// least one of the dimensions here is already unknown. |
22 | /// |
23 | /// There are two distinct benefits here: |
24 | /// 1. The loop that iterates over the elements is somewhat simplified by the |
25 | /// constant stride calculation. |
26 | /// 2. Since the compiler can understand the size of the stride, it can use |
27 | /// vector instructions, where an unknown (at compile time) stride does often |
28 | /// prevent vector operations from being used. |
29 | /// |
30 | /// A known drawback is that the code-size is increased, in some cases that can |
31 | /// be quite substantial - 3-4x is quite plausible (this includes that the loop |
32 | /// gets vectorized, which in itself often more than doubles the size of the |
33 | /// code, because unless the loop size is known, there will be a modulo |
34 | /// vector-size remainder to deal with. |
35 | /// |
36 | /// TODO: Do we need some size limit where loops no longer get duplicated? |
37 | // Maybe some sort of cost analysis. |
38 | /// TODO: Should some loop content - for example calls to functions and |
39 | /// subroutines inhibit the versioning of the loops. Plausibly, this |
40 | /// could be part of the cost analysis above. |
41 | //===----------------------------------------------------------------------===// |
42 | |
43 | #include "flang/ISO_Fortran_binding_wrapper.h" |
44 | #include "flang/Optimizer/Builder/BoxValue.h" |
45 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
46 | #include "flang/Optimizer/Builder/Runtime/Inquiry.h" |
47 | #include "flang/Optimizer/Dialect/FIRDialect.h" |
48 | #include "flang/Optimizer/Dialect/FIROps.h" |
49 | #include "flang/Optimizer/Dialect/FIRType.h" |
50 | #include "flang/Optimizer/Dialect/Support/FIRContext.h" |
51 | #include "flang/Optimizer/Dialect/Support/KindMapping.h" |
52 | #include "flang/Optimizer/Support/DataLayout.h" |
53 | #include "flang/Optimizer/Transforms/Passes.h" |
54 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
55 | #include "mlir/IR/Dominance.h" |
56 | #include "mlir/IR/Matchers.h" |
57 | #include "mlir/IR/TypeUtilities.h" |
58 | #include "mlir/Pass/Pass.h" |
59 | #include "mlir/Transforms/DialectConversion.h" |
60 | #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
61 | #include "mlir/Transforms/RegionUtils.h" |
62 | #include "llvm/Support/Debug.h" |
63 | #include "llvm/Support/raw_ostream.h" |
64 | |
65 | #include <algorithm> |
66 | |
67 | namespace fir { |
68 | #define GEN_PASS_DEF_LOOPVERSIONING |
69 | #include "flang/Optimizer/Transforms/Passes.h.inc" |
70 | } // namespace fir |
71 | |
72 | #define DEBUG_TYPE "flang-loop-versioning" |
73 | |
74 | namespace { |
75 | |
76 | class LoopVersioningPass |
77 | : public fir::impl::LoopVersioningBase<LoopVersioningPass> { |
78 | public: |
79 | void runOnOperation() override; |
80 | }; |
81 | |
82 | /// @struct ArgInfo |
83 | /// A structure to hold an argument, the size of the argument and dimension |
84 | /// information. |
85 | struct ArgInfo { |
86 | mlir::Value arg; |
87 | size_t size; |
88 | unsigned rank; |
89 | fir::BoxDimsOp dims[CFI_MAX_RANK]; |
90 | }; |
91 | |
92 | /// @struct ArgsUsageInLoop |
93 | /// A structure providing information about the function arguments |
94 | /// usage by the instructions immediately nested in a loop. |
95 | struct ArgsUsageInLoop { |
96 | /// Mapping between the memref operand of an array indexing |
97 | /// operation (e.g. fir.coordinate_of) and the argument information. |
98 | llvm::DenseMap<mlir::Value, ArgInfo> usageInfo; |
99 | /// Some array indexing operations inside a loop cannot be transformed. |
100 | /// This vector holds the memref operands of such operations. |
101 | /// The vector is used to make sure that we do not try to transform |
102 | /// any outer loop, since this will imply the operation rewrite |
103 | /// in this loop. |
104 | llvm::SetVector<mlir::Value> cannotTransform; |
105 | |
106 | // Debug dump of the structure members assuming that |
107 | // the information has been collected for the given loop. |
108 | void dump(fir::DoLoopOp loop) const { |
109 | LLVM_DEBUG({ |
110 | mlir::OpPrintingFlags printFlags; |
111 | printFlags.skipRegions(); |
112 | llvm::dbgs() << "Arguments usage info for loop:\n" ; |
113 | loop.print(llvm::dbgs(), printFlags); |
114 | llvm::dbgs() << "\nUsed args:\n" ; |
115 | for (auto &use : usageInfo) { |
116 | mlir::Value v = use.first; |
117 | v.print(llvm::dbgs(), printFlags); |
118 | llvm::dbgs() << "\n" ; |
119 | } |
120 | llvm::dbgs() << "\nCannot transform args:\n" ; |
121 | for (mlir::Value arg : cannotTransform) { |
122 | arg.print(llvm::dbgs(), printFlags); |
123 | llvm::dbgs() << "\n" ; |
124 | } |
125 | llvm::dbgs() << "====\n" ; |
126 | }); |
127 | } |
128 | |
129 | // Erase usageInfo and cannotTransform entries for a set |
130 | // of given arguments. |
131 | void eraseUsage(const llvm::SetVector<mlir::Value> &args) { |
132 | for (auto &arg : args) |
133 | usageInfo.erase(arg); |
134 | cannotTransform.set_subtract(args); |
135 | } |
136 | |
137 | // Erase usageInfo and cannotTransform entries for a set |
138 | // of given arguments provided in the form of usageInfo map. |
139 | void eraseUsage(const llvm::DenseMap<mlir::Value, ArgInfo> &args) { |
140 | for (auto &arg : args) { |
141 | usageInfo.erase(arg.first); |
142 | cannotTransform.remove(arg.first); |
143 | } |
144 | } |
145 | }; |
146 | } // namespace |
147 | |
148 | static fir::SequenceType getAsSequenceType(mlir::Value *v) { |
149 | mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v->getType())); |
150 | return argTy.dyn_cast<fir::SequenceType>(); |
151 | } |
152 | |
153 | /// if a value comes from a fir.declare, follow it to the original source, |
154 | /// otherwise return the value |
155 | static mlir::Value unwrapFirDeclare(mlir::Value val) { |
156 | // fir.declare is for source code variables. We don't have declares of |
157 | // declares |
158 | if (fir::DeclareOp declare = val.getDefiningOp<fir::DeclareOp>()) |
159 | return declare.getMemref(); |
160 | return val; |
161 | } |
162 | |
163 | /// if a value comes from a fir.rebox, follow the rebox to the original source, |
164 | /// of the value, otherwise return the value |
165 | static mlir::Value unwrapReboxOp(mlir::Value val) { |
166 | // don't support reboxes of reboxes |
167 | if (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) |
168 | val = rebox.getBox(); |
169 | return val; |
170 | } |
171 | |
172 | /// normalize a value (removing fir.declare and fir.rebox) so that we can |
173 | /// more conveniently spot values which came from function arguments |
174 | static mlir::Value normaliseVal(mlir::Value val) { |
175 | return unwrapFirDeclare(unwrapReboxOp(val)); |
176 | } |
177 | |
178 | /// some FIR operations accept a fir.shape, a fir.shift or a fir.shapeshift. |
179 | /// fir.shift and fir.shapeshift allow us to extract lower bounds |
180 | /// if lowerbounds cannot be found, return nullptr |
181 | static mlir::Value tryGetLowerBoundsFromShapeLike(mlir::Value shapeLike, |
182 | unsigned dim) { |
183 | mlir::Value lowerBound{nullptr}; |
184 | if (auto shift = shapeLike.getDefiningOp<fir::ShiftOp>()) |
185 | lowerBound = shift.getOrigins()[dim]; |
186 | if (auto shapeShift = shapeLike.getDefiningOp<fir::ShapeShiftOp>()) |
187 | lowerBound = shapeShift.getOrigins()[dim]; |
188 | return lowerBound; |
189 | } |
190 | |
191 | /// attempt to get the array lower bounds of dimension dim of the memref |
192 | /// argument to a fir.array_coor op |
193 | /// 0 <= dim < rank |
194 | /// May return nullptr if no lower bounds can be determined |
195 | static mlir::Value getLowerBound(fir::ArrayCoorOp coop, unsigned dim) { |
196 | // 1) try to get from the shape argument to fir.array_coor |
197 | if (mlir::Value shapeLike = coop.getShape()) |
198 | if (mlir::Value lb = tryGetLowerBoundsFromShapeLike(shapeLike, dim)) |
199 | return lb; |
200 | |
201 | // It is important not to try to read the lower bound from the box, because |
202 | // in the FIR lowering, boxes will sometimes contain incorrect lower bound |
203 | // information |
204 | |
205 | // out of ideas |
206 | return {}; |
207 | } |
208 | |
209 | /// gets the i'th index from array coordinate operation op |
210 | /// dim should range between 0 and rank - 1 |
211 | static mlir::Value getIndex(fir::FirOpBuilder &builder, mlir::Operation *op, |
212 | unsigned dim) { |
213 | if (fir::CoordinateOp coop = mlir::dyn_cast<fir::CoordinateOp>(op)) |
214 | return coop.getCoor()[dim]; |
215 | |
216 | fir::ArrayCoorOp coop = mlir::dyn_cast<fir::ArrayCoorOp>(op); |
217 | assert(coop && |
218 | "operation must be either fir.coordiante_of or fir.array_coor" ); |
219 | |
220 | // fir.coordinate_of indices start at 0: adjust these indices to match by |
221 | // subtracting the lower bound |
222 | mlir::Value index = coop.getIndices()[dim]; |
223 | mlir::Value lb = getLowerBound(coop, dim); |
224 | if (!lb) |
225 | // assume a default lower bound of one |
226 | lb = builder.createIntegerConstant(coop.getLoc(), index.getType(), 1); |
227 | |
228 | // index_0 = index - lb; |
229 | if (lb.getType() != index.getType()) |
230 | lb = builder.createConvert(coop.getLoc(), index.getType(), lb); |
231 | return builder.create<mlir::arith::SubIOp>(coop.getLoc(), index, lb); |
232 | } |
233 | |
234 | void LoopVersioningPass::runOnOperation() { |
235 | LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n" ); |
236 | mlir::func::FuncOp func = getOperation(); |
237 | |
238 | // First look for arguments with assumed shape = unknown extent in the lowest |
239 | // dimension. |
240 | LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n" ); |
241 | mlir::Block::BlockArgListType args = func.getArguments(); |
242 | mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>(); |
243 | fir::KindMapping kindMap = fir::getKindMapping(module); |
244 | mlir::SmallVector<ArgInfo, 4> argsOfInterest; |
245 | std::optional<mlir::DataLayout> dl = |
246 | fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false); |
247 | if (!dl) |
248 | mlir::emitError(module.getLoc(), |
249 | "data layout attribute is required to perform " DEBUG_TYPE |
250 | "pass" ); |
251 | for (auto &arg : args) { |
252 | // Optional arguments must be checked for IsPresent before |
253 | // looking for the bounds. They are unsupported for the time being. |
254 | if (func.getArgAttrOfType<mlir::UnitAttr>(arg.getArgNumber(), |
255 | fir::getOptionalAttrName())) { |
256 | LLVM_DEBUG(llvm::dbgs() << "OPTIONAL is not supported\n" ); |
257 | continue; |
258 | } |
259 | |
260 | if (auto seqTy = getAsSequenceType(&arg)) { |
261 | unsigned rank = seqTy.getDimension(); |
262 | if (rank > 0 && |
263 | seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) { |
264 | size_t typeSize = 0; |
265 | mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType()); |
266 | if (mlir::isa<mlir::FloatType>(elementType) || |
267 | mlir::isa<mlir::IntegerType>(elementType) || |
268 | mlir::isa<fir::ComplexType>(elementType)) { |
269 | auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignment( |
270 | arg.getLoc(), elementType, *dl, kindMap); |
271 | typeSize = llvm::alignTo(eleSize, eleAlign); |
272 | } |
273 | if (typeSize) |
274 | argsOfInterest.push_back({arg, typeSize, rank, {}}); |
275 | else |
276 | LLVM_DEBUG(llvm::dbgs() << "Type not supported\n" ); |
277 | } |
278 | } |
279 | } |
280 | |
281 | if (argsOfInterest.empty()) { |
282 | LLVM_DEBUG(llvm::dbgs() |
283 | << "No suitable arguments.\n=== End " DEBUG_TYPE " ===\n" ); |
284 | return; |
285 | } |
286 | |
287 | // A list of all loops in the function in post-order. |
288 | mlir::SmallVector<fir::DoLoopOp> originalLoops; |
289 | // Information about the arguments usage by the instructions |
290 | // immediately nested in a loop. |
291 | llvm::DenseMap<fir::DoLoopOp, ArgsUsageInLoop> argsInLoops; |
292 | |
293 | auto &domInfo = getAnalysis<mlir::DominanceInfo>(); |
294 | |
295 | // Traverse the loops in post-order and see |
296 | // if those arguments are used inside any loop. |
297 | func.walk([&](fir::DoLoopOp loop) { |
298 | mlir::Block &body = *loop.getBody(); |
299 | auto &argsInLoop = argsInLoops[loop]; |
300 | originalLoops.push_back(loop); |
301 | body.walk([&](mlir::Operation *op) { |
302 | // Support either fir.array_coor or fir.coordinate_of. |
303 | if (!mlir::isa<fir::ArrayCoorOp, fir::CoordinateOp>(op)) |
304 | return; |
305 | // Process only operations immediately nested in the current loop. |
306 | if (op->getParentOfType<fir::DoLoopOp>() != loop) |
307 | return; |
308 | mlir::Value operand = op->getOperand(0); |
309 | for (auto a : argsOfInterest) { |
310 | if (a.arg == normaliseVal(operand)) { |
311 | // Use the reboxed value, not the block arg when re-creating the loop. |
312 | a.arg = operand; |
313 | |
314 | // Check that the operand dominates the loop? |
315 | // If this is the case, record such operands in argsInLoop.cannot- |
316 | // Transform, so that they disable the transformation for the parent |
317 | /// loops as well. |
318 | if (!domInfo.dominates(a.arg, loop)) |
319 | argsInLoop.cannotTransform.insert(a.arg); |
320 | |
321 | // No support currently for sliced arrays. |
322 | // This means that we cannot transform properly |
323 | // instructions referencing a.arg in the whole loop |
324 | // nest this loop is located in. |
325 | if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op)) |
326 | if (arrayCoor.getSlice()) |
327 | argsInLoop.cannotTransform.insert(a.arg); |
328 | |
329 | if (argsInLoop.cannotTransform.contains(a.arg)) { |
330 | // Remove any previously recorded usage, if any. |
331 | argsInLoop.usageInfo.erase(a.arg); |
332 | break; |
333 | } |
334 | |
335 | // Record the a.arg usage, if not recorded yet. |
336 | argsInLoop.usageInfo.try_emplace(a.arg, a); |
337 | break; |
338 | } |
339 | } |
340 | }); |
341 | }); |
342 | |
343 | // Dump loops info after initial collection. |
344 | LLVM_DEBUG({ |
345 | llvm::dbgs() << "Initial usage info:\n" ; |
346 | for (fir::DoLoopOp loop : originalLoops) { |
347 | auto &argsInLoop = argsInLoops[loop]; |
348 | argsInLoop.dump(loop); |
349 | } |
350 | }); |
351 | |
352 | // Clear argument usage for parent loops if an inner loop |
353 | // contains a non-transformable usage. |
354 | for (fir::DoLoopOp loop : originalLoops) { |
355 | auto &argsInLoop = argsInLoops[loop]; |
356 | if (argsInLoop.cannotTransform.empty()) |
357 | continue; |
358 | |
359 | fir::DoLoopOp parent = loop; |
360 | while ((parent = parent->getParentOfType<fir::DoLoopOp>())) |
361 | argsInLoops[parent].eraseUsage(argsInLoop.cannotTransform); |
362 | } |
363 | |
364 | // If an argument access can be optimized in a loop and |
365 | // its descendant loop, then it does not make sense to |
366 | // generate the contiguity check for the descendant loop. |
367 | // The check will be produced as part of the ancestor |
368 | // loop's transformation. So we can clear the argument |
369 | // usage for all descendant loops. |
370 | for (fir::DoLoopOp loop : originalLoops) { |
371 | auto &argsInLoop = argsInLoops[loop]; |
372 | if (argsInLoop.usageInfo.empty()) |
373 | continue; |
374 | |
375 | loop.getBody()->walk([&](fir::DoLoopOp dloop) { |
376 | argsInLoops[dloop].eraseUsage(argsInLoop.usageInfo); |
377 | }); |
378 | } |
379 | |
380 | LLVM_DEBUG({ |
381 | llvm::dbgs() << "Final usage info:\n" ; |
382 | for (fir::DoLoopOp loop : originalLoops) { |
383 | auto &argsInLoop = argsInLoops[loop]; |
384 | argsInLoop.dump(loop); |
385 | } |
386 | }); |
387 | |
388 | // Reduce the collected information to a list of loops |
389 | // with attached arguments usage information. |
390 | // The list must hold the loops in post order, so that |
391 | // the inner loops are transformed before the outer loops. |
392 | struct OpsWithArgs { |
393 | mlir::Operation *op; |
394 | mlir::SmallVector<ArgInfo, 4> argsAndDims; |
395 | }; |
396 | mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest; |
397 | for (fir::DoLoopOp loop : originalLoops) { |
398 | auto &argsInLoop = argsInLoops[loop]; |
399 | if (argsInLoop.usageInfo.empty()) |
400 | continue; |
401 | OpsWithArgs info; |
402 | info.op = loop; |
403 | for (auto &arg : argsInLoop.usageInfo) |
404 | info.argsAndDims.push_back(arg.second); |
405 | loopsOfInterest.emplace_back(std::move(info)); |
406 | } |
407 | |
408 | if (loopsOfInterest.empty()) { |
409 | LLVM_DEBUG(llvm::dbgs() |
410 | << "No loops to transform.\n=== End " DEBUG_TYPE " ===\n" ); |
411 | return; |
412 | } |
413 | |
414 | // If we get here, there are loops to process. |
415 | fir::FirOpBuilder builder{module, std::move(kindMap)}; |
416 | mlir::Location loc = builder.getUnknownLoc(); |
417 | mlir::IndexType idxTy = builder.getIndexType(); |
418 | |
419 | LLVM_DEBUG(llvm::dbgs() << "Module Before transformation:" ); |
420 | LLVM_DEBUG(module->dump()); |
421 | |
422 | LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size() |
423 | << "\n" ); |
424 | for (auto op : loopsOfInterest) { |
425 | LLVM_DEBUG(op.op->dump()); |
426 | builder.setInsertionPoint(op.op); |
427 | |
428 | mlir::Value allCompares = nullptr; |
429 | // Ensure all of the arrays are unit-stride. |
430 | for (auto &arg : op.argsAndDims) { |
431 | // Fetch all the dimensions of the array, except the last dimension. |
432 | // Always fetch the first dimension, however, so set ndims = 1 if |
433 | // we have one dim |
434 | unsigned ndims = arg.rank; |
435 | for (unsigned i = 0; i < ndims; i++) { |
436 | mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); |
437 | arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, |
438 | arg.arg, dimIdx); |
439 | } |
440 | // We only care about lowest order dimension, here. |
441 | mlir::Value elemSize = |
442 | builder.createIntegerConstant(loc, idxTy, arg.size); |
443 | mlir::Value cmp = builder.create<mlir::arith::CmpIOp>( |
444 | loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2), |
445 | elemSize); |
446 | if (!allCompares) { |
447 | allCompares = cmp; |
448 | } else { |
449 | allCompares = |
450 | builder.create<mlir::arith::AndIOp>(loc, cmp, allCompares); |
451 | } |
452 | } |
453 | |
454 | auto ifOp = |
455 | builder.create<fir::IfOp>(loc, op.op->getResultTypes(), allCompares, |
456 | /*withElse=*/true); |
457 | builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); |
458 | |
459 | LLVM_DEBUG(llvm::dbgs() << "Creating cloned loop\n" ); |
460 | mlir::Operation *clonedLoop = op.op->clone(); |
461 | bool changed = false; |
462 | for (auto &arg : op.argsAndDims) { |
463 | fir::SequenceType::Shape newShape; |
464 | newShape.push_back(fir::SequenceType::getUnknownExtent()); |
465 | auto elementType = fir::unwrapSeqOrBoxedSeqType(arg.arg.getType()); |
466 | mlir::Type arrTy = fir::SequenceType::get(newShape, elementType); |
467 | mlir::Type boxArrTy = fir::BoxType::get(arrTy); |
468 | mlir::Type refArrTy = builder.getRefType(arrTy); |
469 | auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, arg.arg); |
470 | auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg); |
471 | auto insPt = builder.saveInsertionPoint(); |
472 | // Use caddr instead of arg. |
473 | clonedLoop->walk([&](mlir::Operation *coop) { |
474 | if (!mlir::isa<fir::CoordinateOp, fir::ArrayCoorOp>(coop)) |
475 | return; |
476 | // Reduce the multi-dimensioned index to a single index. |
477 | // This is required becase fir arrays do not support multiple dimensions |
478 | // with unknown dimensions at compile time. |
479 | // We then calculate the multidimensional array like this: |
480 | // arr(x, y, z) bedcomes arr(z * stride(2) + y * stride(1) + x) |
481 | // where stride is the distance between elements in the dimensions |
482 | // 0, 1 and 2 or x, y and z. |
483 | if (coop->getOperand(0) == arg.arg && coop->getOperands().size() >= 2) { |
484 | builder.setInsertionPoint(coop); |
485 | mlir::Value totalIndex; |
486 | for (unsigned i = arg.rank - 1; i > 0; i--) { |
487 | mlir::Value curIndex = |
488 | builder.createConvert(loc, idxTy, getIndex(builder, coop, i)); |
489 | // Multiply by the stride of this array. Later we'll divide by the |
490 | // element size. |
491 | mlir::Value scale = |
492 | builder.createConvert(loc, idxTy, arg.dims[i].getResult(2)); |
493 | curIndex = |
494 | builder.create<mlir::arith::MulIOp>(loc, scale, curIndex); |
495 | totalIndex = (totalIndex) ? builder.create<mlir::arith::AddIOp>( |
496 | loc, curIndex, totalIndex) |
497 | : curIndex; |
498 | } |
499 | // This is the lowest dimension - which doesn't need scaling |
500 | mlir::Value finalIndex = |
501 | builder.createConvert(loc, idxTy, getIndex(builder, coop, 0)); |
502 | if (totalIndex) { |
503 | assert(llvm::isPowerOf2_32(arg.size) && |
504 | "Expected power of two here" ); |
505 | unsigned bits = llvm::Log2_32(arg.size); |
506 | mlir::Value elemShift = |
507 | builder.createIntegerConstant(loc, idxTy, bits); |
508 | totalIndex = builder.create<mlir::arith::AddIOp>( |
509 | loc, |
510 | builder.create<mlir::arith::ShRSIOp>(loc, totalIndex, |
511 | elemShift), |
512 | finalIndex); |
513 | } else { |
514 | totalIndex = finalIndex; |
515 | } |
516 | auto newOp = builder.create<fir::CoordinateOp>( |
517 | loc, builder.getRefType(elementType), caddr, |
518 | mlir::ValueRange{totalIndex}); |
519 | LLVM_DEBUG(newOp->dump()); |
520 | coop->getResult(0).replaceAllUsesWith(newOp->getResult(0)); |
521 | coop->erase(); |
522 | changed = true; |
523 | } |
524 | }); |
525 | |
526 | builder.restoreInsertionPoint(insPt); |
527 | } |
528 | assert(changed && "Expected operations to have changed" ); |
529 | |
530 | builder.insert(clonedLoop); |
531 | // Forward the result(s), if any, from the loop operation to the |
532 | // |
533 | mlir::ResultRange results = clonedLoop->getResults(); |
534 | bool hasResults = (results.size() > 0); |
535 | if (hasResults) |
536 | builder.create<fir::ResultOp>(loc, results); |
537 | |
538 | // Add the original loop in the else-side of the if operation. |
539 | builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); |
540 | op.op->replaceAllUsesWith(ifOp); |
541 | op.op->remove(); |
542 | builder.insert(op.op); |
543 | // Rely on "cloned loop has results, so original loop also has results". |
544 | if (hasResults) { |
545 | builder.create<fir::ResultOp>(loc, op.op->getResults()); |
546 | } else { |
547 | // Use an assert to check this. |
548 | assert(op.op->getResults().size() == 0 && |
549 | "Weird, the cloned loop doesn't have results, but the original " |
550 | "does?" ); |
551 | } |
552 | } |
553 | |
554 | LLVM_DEBUG(llvm::dbgs() << "After transform:\n" ); |
555 | LLVM_DEBUG(module->dump()); |
556 | |
557 | LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n" ); |
558 | } |
559 | |
560 | std::unique_ptr<mlir::Pass> fir::createLoopVersioningPass() { |
561 | return std::make_unique<LoopVersioningPass>(); |
562 | } |
563 | |