1//===- LoopVersioning.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10/// \file
11/// This pass looks for loops iterating over assumed-shape arrays, that can
12/// be optimized by "guessing" that the stride is element-sized.
13///
14/// This is done by creating two versions of the same loop: one which assumes
15/// that the elements are contiguous (stride == size of element), and one that
16/// is the original generic loop.
17///
18/// As a side-effect of the assumed element size stride, the array is also
19/// flattened to make it a 1D array - this is because the internal array
20/// structure must be either 1D or have known sizes in all dimensions - and at
21/// least one of the dimensions here is already unknown.
22///
23/// There are two distinct benefits here:
24/// 1. The loop that iterates over the elements is somewhat simplified by the
25/// constant stride calculation.
26/// 2. Since the compiler can understand the size of the stride, it can use
27/// vector instructions, where an unknown (at compile time) stride does often
28/// prevent vector operations from being used.
29///
30/// A known drawback is that the code-size is increased, in some cases that can
31/// be quite substantial - 3-4x is quite plausible (this includes that the loop
32/// gets vectorized, which in itself often more than doubles the size of the
33/// code, because unless the loop size is known, there will be a modulo
34/// vector-size remainder to deal with.
35///
36/// TODO: Do we need some size limit where loops no longer get duplicated?
37// Maybe some sort of cost analysis.
38/// TODO: Should some loop content - for example calls to functions and
39/// subroutines inhibit the versioning of the loops. Plausibly, this
40/// could be part of the cost analysis above.
41//===----------------------------------------------------------------------===//
42
43#include "flang/Common/ISO_Fortran_binding_wrapper.h"
44#include "flang/Optimizer/Builder/BoxValue.h"
45#include "flang/Optimizer/Builder/FIRBuilder.h"
46#include "flang/Optimizer/Builder/Runtime/Inquiry.h"
47#include "flang/Optimizer/Dialect/FIRDialect.h"
48#include "flang/Optimizer/Dialect/FIROps.h"
49#include "flang/Optimizer/Dialect/FIRType.h"
50#include "flang/Optimizer/Dialect/Support/FIRContext.h"
51#include "flang/Optimizer/Dialect/Support/KindMapping.h"
52#include "flang/Optimizer/Support/DataLayout.h"
53#include "flang/Optimizer/Transforms/Passes.h"
54#include "mlir/Dialect/DLTI/DLTI.h"
55#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
56#include "mlir/IR/Dominance.h"
57#include "mlir/IR/Matchers.h"
58#include "mlir/IR/TypeUtilities.h"
59#include "mlir/Pass/Pass.h"
60#include "mlir/Transforms/DialectConversion.h"
61#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
62#include "mlir/Transforms/RegionUtils.h"
63#include "llvm/Support/Debug.h"
64#include "llvm/Support/raw_ostream.h"
65
66#include <algorithm>
67
68namespace fir {
69#define GEN_PASS_DEF_LOOPVERSIONING
70#include "flang/Optimizer/Transforms/Passes.h.inc"
71} // namespace fir
72
73#define DEBUG_TYPE "flang-loop-versioning"
74
75namespace {
76
77class LoopVersioningPass
78 : public fir::impl::LoopVersioningBase<LoopVersioningPass> {
79public:
80 void runOnOperation() override;
81};
82
83/// @struct ArgInfo
84/// A structure to hold an argument, the size of the argument and dimension
85/// information.
86struct ArgInfo {
87 mlir::Value arg;
88 size_t size;
89 unsigned rank;
90 fir::BoxDimsOp dims[CFI_MAX_RANK];
91};
92
93/// @struct ArgsUsageInLoop
94/// A structure providing information about the function arguments
95/// usage by the instructions immediately nested in a loop.
96struct ArgsUsageInLoop {
97 /// Mapping between the memref operand of an array indexing
98 /// operation (e.g. fir.coordinate_of) and the argument information.
99 llvm::DenseMap<mlir::Value, ArgInfo> usageInfo;
100 /// Some array indexing operations inside a loop cannot be transformed.
101 /// This vector holds the memref operands of such operations.
102 /// The vector is used to make sure that we do not try to transform
103 /// any outer loop, since this will imply the operation rewrite
104 /// in this loop.
105 llvm::SetVector<mlir::Value> cannotTransform;
106
107 // Debug dump of the structure members assuming that
108 // the information has been collected for the given loop.
109 void dump(fir::DoLoopOp loop) const {
110 LLVM_DEBUG({
111 mlir::OpPrintingFlags printFlags;
112 printFlags.skipRegions();
113 llvm::dbgs() << "Arguments usage info for loop:\n";
114 loop.print(llvm::dbgs(), printFlags);
115 llvm::dbgs() << "\nUsed args:\n";
116 for (auto &use : usageInfo) {
117 mlir::Value v = use.first;
118 v.print(llvm::dbgs(), printFlags);
119 llvm::dbgs() << "\n";
120 }
121 llvm::dbgs() << "\nCannot transform args:\n";
122 for (mlir::Value arg : cannotTransform) {
123 arg.print(llvm::dbgs(), printFlags);
124 llvm::dbgs() << "\n";
125 }
126 llvm::dbgs() << "====\n";
127 });
128 }
129
130 // Erase usageInfo and cannotTransform entries for a set
131 // of given arguments.
132 void eraseUsage(const llvm::SetVector<mlir::Value> &args) {
133 for (auto &arg : args)
134 usageInfo.erase(arg);
135 cannotTransform.set_subtract(args);
136 }
137
138 // Erase usageInfo and cannotTransform entries for a set
139 // of given arguments provided in the form of usageInfo map.
140 void eraseUsage(const llvm::DenseMap<mlir::Value, ArgInfo> &args) {
141 for (auto &arg : args) {
142 usageInfo.erase(arg.first);
143 cannotTransform.remove(arg.first);
144 }
145 }
146};
147} // namespace
148
149static fir::SequenceType getAsSequenceType(mlir::Value v) {
150 mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v.getType()));
151 return mlir::dyn_cast<fir::SequenceType>(argTy);
152}
153
154/// Return the rank and the element size (in bytes) of the given
155/// value \p v. If it is not an array or the element type is not
156/// supported, then return <0, 0>. Only trivial data types
157/// are currently supported.
158/// When \p isArgument is true, \p v is assumed to be a function
159/// argument. If \p v's type does not look like a type of an assumed
160/// shape array, then the function returns <0, 0>.
161/// When \p isArgument is false, array types with known innermost
162/// dimension are allowed to proceed.
163static std::pair<unsigned, size_t>
164getRankAndElementSize(const fir::KindMapping &kindMap,
165 const mlir::DataLayout &dl, mlir::Value v,
166 bool isArgument = false) {
167 if (auto seqTy = getAsSequenceType(v)) {
168 unsigned rank = seqTy.getDimension();
169 if (rank > 0 &&
170 (!isArgument ||
171 seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent())) {
172 size_t typeSize = 0;
173 mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(v.getType());
174 if (fir::isa_trivial(elementType)) {
175 auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash(
176 v.getLoc(), elementType, dl, kindMap);
177 typeSize = llvm::alignTo(eleSize, eleAlign);
178 }
179 if (typeSize)
180 return {rank, typeSize};
181 }
182 }
183
184 LLVM_DEBUG(llvm::dbgs() << "Unsupported rank/type: " << v << '\n');
185 return {0, 0};
186}
187
188/// If a value comes from a fir.declare of fir.pack_array,
189/// follow it to the original source, otherwise return the value.
190static mlir::Value unwrapPassThroughOps(mlir::Value val) {
191 // Instead of unwrapping fir.declare, we may try to start
192 // the analysis in this pass from fir.declare's instead
193 // of the function entry block arguments. This way the loop
194 // versioning would work even after FIR inlining.
195 while (true) {
196 if (fir::DeclareOp declare = val.getDefiningOp<fir::DeclareOp>()) {
197 val = declare.getMemref();
198 continue;
199 }
200 // fir.pack_array might be met before fir.declare - this is how
201 // it is orifinally generated.
202 // It might also be met after fir.declare - after the optimization
203 // passes that sink fir.pack_array closer to the uses.
204 if (auto packArray = val.getDefiningOp<fir::PackArrayOp>()) {
205 val = packArray.getArray();
206 continue;
207 }
208 break;
209 }
210 return val;
211}
212
213/// if a value comes from a fir.rebox, follow the rebox to the original source,
214/// of the value, otherwise return the value
215static mlir::Value unwrapReboxOp(mlir::Value val) {
216 while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) {
217 if (!fir::reboxPreservesContinuity(rebox,
218 /*mayHaveNonDefaultLowerBounds=*/true,
219 /*checkWhole=*/false)) {
220 LLVM_DEBUG(llvm::dbgs() << "REBOX may produce non-contiguous array: "
221 << rebox << '\n');
222 break;
223 }
224 val = rebox.getBox();
225 }
226 return val;
227}
228
229/// normalize a value (removing fir.declare and fir.rebox) so that we can
230/// more conveniently spot values which came from function arguments
231static mlir::Value normaliseVal(mlir::Value val) {
232 return unwrapPassThroughOps(unwrapReboxOp(val));
233}
234
235/// some FIR operations accept a fir.shape, a fir.shift or a fir.shapeshift.
236/// fir.shift and fir.shapeshift allow us to extract lower bounds
237/// if lowerbounds cannot be found, return nullptr
238static mlir::Value tryGetLowerBoundsFromShapeLike(mlir::Value shapeLike,
239 unsigned dim) {
240 mlir::Value lowerBound{nullptr};
241 if (auto shift = shapeLike.getDefiningOp<fir::ShiftOp>())
242 lowerBound = shift.getOrigins()[dim];
243 if (auto shapeShift = shapeLike.getDefiningOp<fir::ShapeShiftOp>())
244 lowerBound = shapeShift.getOrigins()[dim];
245 return lowerBound;
246}
247
248/// attempt to get the array lower bounds of dimension dim of the memref
249/// argument to a fir.array_coor op
250/// 0 <= dim < rank
251/// May return nullptr if no lower bounds can be determined
252static mlir::Value getLowerBound(fir::ArrayCoorOp coop, unsigned dim) {
253 // 1) try to get from the shape argument to fir.array_coor
254 if (mlir::Value shapeLike = coop.getShape())
255 if (mlir::Value lb = tryGetLowerBoundsFromShapeLike(shapeLike, dim))
256 return lb;
257
258 // It is important not to try to read the lower bound from the box, because
259 // in the FIR lowering, boxes will sometimes contain incorrect lower bound
260 // information
261
262 // out of ideas
263 return {};
264}
265
266/// gets the i'th index from array coordinate operation op
267/// dim should range between 0 and rank - 1
268static mlir::Value getIndex(fir::FirOpBuilder &builder, mlir::Operation *op,
269 unsigned dim) {
270 if (fir::CoordinateOp coop = mlir::dyn_cast<fir::CoordinateOp>(op))
271 return coop.getCoor()[dim];
272
273 fir::ArrayCoorOp coop = mlir::dyn_cast<fir::ArrayCoorOp>(op);
274 assert(coop &&
275 "operation must be either fir.coordiante_of or fir.array_coor");
276
277 // fir.coordinate_of indices start at 0: adjust these indices to match by
278 // subtracting the lower bound
279 mlir::Value index = coop.getIndices()[dim];
280 mlir::Value lb = getLowerBound(coop, dim);
281 if (!lb)
282 // assume a default lower bound of one
283 lb = builder.createIntegerConstant(coop.getLoc(), index.getType(), 1);
284
285 // index_0 = index - lb;
286 if (lb.getType() != index.getType())
287 lb = builder.createConvert(coop.getLoc(), index.getType(), lb);
288 return builder.create<mlir::arith::SubIOp>(coop.getLoc(), index, lb);
289}
290
291void LoopVersioningPass::runOnOperation() {
292 LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
293 mlir::func::FuncOp func = getOperation();
294
295 // First look for arguments with assumed shape = unknown extent in the lowest
296 // dimension.
297 LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
298 mlir::Block::BlockArgListType args = func.getArguments();
299 mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
300 fir::KindMapping kindMap = fir::getKindMapping(module);
301 mlir::SmallVector<ArgInfo, 4> argsOfInterest;
302 std::optional<mlir::DataLayout> dl = fir::support::getOrSetMLIRDataLayout(
303 module, /*allowDefaultLayout=*/false);
304 if (!dl)
305 mlir::emitError(module.getLoc(),
306 "data layout attribute is required to perform " DEBUG_TYPE
307 "pass");
308 for (auto &arg : args) {
309 // Optional arguments must be checked for IsPresent before
310 // looking for the bounds. They are unsupported for the time being.
311 if (func.getArgAttrOfType<mlir::UnitAttr>(arg.getArgNumber(),
312 fir::getOptionalAttrName())) {
313 LLVM_DEBUG(llvm::dbgs() << "OPTIONAL is not supported\n");
314 continue;
315 }
316
317 auto [rank, typeSize] =
318 getRankAndElementSize(kindMap, *dl, arg, /*isArgument=*/true);
319 if (rank != 0 && typeSize != 0)
320 argsOfInterest.push_back({arg, typeSize, rank, {}});
321 }
322
323 if (argsOfInterest.empty()) {
324 LLVM_DEBUG(llvm::dbgs()
325 << "No suitable arguments.\n=== End " DEBUG_TYPE " ===\n");
326 return;
327 }
328
329 // A list of all loops in the function in post-order.
330 mlir::SmallVector<fir::DoLoopOp> originalLoops;
331 // Information about the arguments usage by the instructions
332 // immediately nested in a loop.
333 llvm::DenseMap<fir::DoLoopOp, ArgsUsageInLoop> argsInLoops;
334
335 auto &domInfo = getAnalysis<mlir::DominanceInfo>();
336
337 // Traverse the loops in post-order and see
338 // if those arguments are used inside any loop.
339 func.walk([&](fir::DoLoopOp loop) {
340 mlir::Block &body = *loop.getBody();
341 auto &argsInLoop = argsInLoops[loop];
342 originalLoops.push_back(loop);
343 body.walk([&](mlir::Operation *op) {
344 // Support either fir.array_coor or fir.coordinate_of.
345 if (!mlir::isa<fir::ArrayCoorOp, fir::CoordinateOp>(op))
346 return;
347 // Process only operations immediately nested in the current loop.
348 if (op->getParentOfType<fir::DoLoopOp>() != loop)
349 return;
350 mlir::Value operand = op->getOperand(0);
351 for (auto a : argsOfInterest) {
352 if (a.arg == normaliseVal(operand)) {
353 // Use the reboxed value, not the block arg when re-creating the loop.
354 a.arg = operand;
355
356 // Check that the operand dominates the loop?
357 // If this is the case, record such operands in argsInLoop.cannot-
358 // Transform, so that they disable the transformation for the parent
359 /// loops as well.
360 if (!domInfo.dominates(a.arg, loop))
361 argsInLoop.cannotTransform.insert(a.arg);
362
363 // No support currently for sliced arrays.
364 // This means that we cannot transform properly
365 // instructions referencing a.arg in the whole loop
366 // nest this loop is located in.
367 if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op))
368 if (arrayCoor.getSlice())
369 argsInLoop.cannotTransform.insert(a.arg);
370
371 // We need to compute the rank and element size
372 // based on the operand, not the original argument,
373 // because array slicing may affect it.
374 std::tie(a.rank, a.size) = getRankAndElementSize(kindMap, *dl, a.arg);
375 if (a.rank == 0 || a.size == 0)
376 argsInLoop.cannotTransform.insert(a.arg);
377
378 if (argsInLoop.cannotTransform.contains(a.arg)) {
379 // Remove any previously recorded usage, if any.
380 argsInLoop.usageInfo.erase(a.arg);
381 break;
382 }
383
384 // Record the a.arg usage, if not recorded yet.
385 argsInLoop.usageInfo.try_emplace(a.arg, a);
386 break;
387 }
388 }
389 });
390 });
391
392 // Dump loops info after initial collection.
393 LLVM_DEBUG({
394 llvm::dbgs() << "Initial usage info:\n";
395 for (fir::DoLoopOp loop : originalLoops) {
396 auto &argsInLoop = argsInLoops[loop];
397 argsInLoop.dump(loop);
398 }
399 });
400
401 // Clear argument usage for parent loops if an inner loop
402 // contains a non-transformable usage.
403 for (fir::DoLoopOp loop : originalLoops) {
404 auto &argsInLoop = argsInLoops[loop];
405 if (argsInLoop.cannotTransform.empty())
406 continue;
407
408 fir::DoLoopOp parent = loop;
409 while ((parent = parent->getParentOfType<fir::DoLoopOp>()))
410 argsInLoops[parent].eraseUsage(argsInLoop.cannotTransform);
411 }
412
413 // If an argument access can be optimized in a loop and
414 // its descendant loop, then it does not make sense to
415 // generate the contiguity check for the descendant loop.
416 // The check will be produced as part of the ancestor
417 // loop's transformation. So we can clear the argument
418 // usage for all descendant loops.
419 for (fir::DoLoopOp loop : originalLoops) {
420 auto &argsInLoop = argsInLoops[loop];
421 if (argsInLoop.usageInfo.empty())
422 continue;
423
424 loop.getBody()->walk([&](fir::DoLoopOp dloop) {
425 argsInLoops[dloop].eraseUsage(argsInLoop.usageInfo);
426 });
427 }
428
429 LLVM_DEBUG({
430 llvm::dbgs() << "Final usage info:\n";
431 for (fir::DoLoopOp loop : originalLoops) {
432 auto &argsInLoop = argsInLoops[loop];
433 argsInLoop.dump(loop);
434 }
435 });
436
437 // Reduce the collected information to a list of loops
438 // with attached arguments usage information.
439 // The list must hold the loops in post order, so that
440 // the inner loops are transformed before the outer loops.
441 struct OpsWithArgs {
442 mlir::Operation *op;
443 mlir::SmallVector<ArgInfo, 4> argsAndDims;
444 };
445 mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;
446 for (fir::DoLoopOp loop : originalLoops) {
447 auto &argsInLoop = argsInLoops[loop];
448 if (argsInLoop.usageInfo.empty())
449 continue;
450 OpsWithArgs info;
451 info.op = loop;
452 for (auto &arg : argsInLoop.usageInfo)
453 info.argsAndDims.push_back(arg.second);
454 loopsOfInterest.emplace_back(std::move(info));
455 }
456
457 if (loopsOfInterest.empty()) {
458 LLVM_DEBUG(llvm::dbgs()
459 << "No loops to transform.\n=== End " DEBUG_TYPE " ===\n");
460 return;
461 }
462
463 // If we get here, there are loops to process.
464 fir::FirOpBuilder builder{module, std::move(kindMap)};
465 mlir::Location loc = builder.getUnknownLoc();
466 mlir::IndexType idxTy = builder.getIndexType();
467
468 LLVM_DEBUG(llvm::dbgs() << "Func Before transformation:\n");
469 LLVM_DEBUG(func->dump());
470
471 LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size()
472 << "\n");
473 for (auto op : loopsOfInterest) {
474 LLVM_DEBUG(op.op->dump());
475 builder.setInsertionPoint(op.op);
476
477 mlir::Value allCompares = nullptr;
478 // Ensure all of the arrays are unit-stride.
479 for (auto &arg : op.argsAndDims) {
480 // Fetch all the dimensions of the array, except the last dimension.
481 // Always fetch the first dimension, however, so set ndims = 1 if
482 // we have one dim
483 unsigned ndims = arg.rank;
484 for (unsigned i = 0; i < ndims; i++) {
485 mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
486 arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
487 arg.arg, dimIdx);
488 }
489 // We only care about lowest order dimension, here.
490 mlir::Value elemSize =
491 builder.createIntegerConstant(loc, idxTy, arg.size);
492 mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
493 loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2),
494 elemSize);
495 if (!allCompares) {
496 allCompares = cmp;
497 } else {
498 allCompares =
499 builder.create<mlir::arith::AndIOp>(loc, cmp, allCompares);
500 }
501 }
502
503 auto ifOp =
504 builder.create<fir::IfOp>(loc, op.op->getResultTypes(), allCompares,
505 /*withElse=*/true);
506 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
507
508 LLVM_DEBUG(llvm::dbgs() << "Creating cloned loop\n");
509 mlir::Operation *clonedLoop = op.op->clone();
510 bool changed = false;
511 for (auto &arg : op.argsAndDims) {
512 fir::SequenceType::Shape newShape;
513 newShape.push_back(fir::SequenceType::getUnknownExtent());
514 auto elementType = fir::unwrapSeqOrBoxedSeqType(arg.arg.getType());
515 mlir::Type arrTy = fir::SequenceType::get(newShape, elementType);
516 mlir::Type boxArrTy = fir::BoxType::get(arrTy);
517 mlir::Type refArrTy = builder.getRefType(arrTy);
518 auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, arg.arg);
519 auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);
520 auto insPt = builder.saveInsertionPoint();
521 // Use caddr instead of arg.
522 clonedLoop->walk([&](mlir::Operation *coop) {
523 if (!mlir::isa<fir::CoordinateOp, fir::ArrayCoorOp>(coop))
524 return;
525 // Reduce the multi-dimensioned index to a single index.
526 // This is required becase fir arrays do not support multiple dimensions
527 // with unknown dimensions at compile time.
528 // We then calculate the multidimensional array like this:
529 // arr(x, y, z) bedcomes arr(z * stride(2) + y * stride(1) + x)
530 // where stride is the distance between elements in the dimensions
531 // 0, 1 and 2 or x, y and z.
532 if (coop->getOperand(0) == arg.arg && coop->getOperands().size() >= 2) {
533 builder.setInsertionPoint(coop);
534 mlir::Value totalIndex;
535 for (unsigned i = arg.rank - 1; i > 0; i--) {
536 mlir::Value curIndex =
537 builder.createConvert(loc, idxTy, getIndex(builder, coop, i));
538 // Multiply by the stride of this array. Later we'll divide by the
539 // element size.
540 mlir::Value scale =
541 builder.createConvert(loc, idxTy, arg.dims[i].getResult(2));
542 curIndex =
543 builder.create<mlir::arith::MulIOp>(loc, scale, curIndex);
544 totalIndex = (totalIndex) ? builder.create<mlir::arith::AddIOp>(
545 loc, curIndex, totalIndex)
546 : curIndex;
547 }
548 // This is the lowest dimension - which doesn't need scaling
549 mlir::Value finalIndex =
550 builder.createConvert(loc, idxTy, getIndex(builder, coop, 0));
551 if (totalIndex) {
552 assert(llvm::isPowerOf2_32(arg.size) &&
553 "Expected power of two here");
554 unsigned bits = llvm::Log2_32(arg.size);
555 mlir::Value elemShift =
556 builder.createIntegerConstant(loc, idxTy, bits);
557 totalIndex = builder.create<mlir::arith::AddIOp>(
558 loc,
559 builder.create<mlir::arith::ShRSIOp>(loc, totalIndex,
560 elemShift),
561 finalIndex);
562 } else {
563 totalIndex = finalIndex;
564 }
565 auto newOp = builder.create<fir::CoordinateOp>(
566 loc, builder.getRefType(elementType), caddr,
567 mlir::ValueRange{totalIndex});
568 LLVM_DEBUG(newOp->dump());
569 coop->getResult(0).replaceAllUsesWith(newOp->getResult(0));
570 coop->erase();
571 changed = true;
572 }
573 });
574
575 builder.restoreInsertionPoint(insPt);
576 }
577 assert(changed && "Expected operations to have changed");
578
579 builder.insert(clonedLoop);
580 // Forward the result(s), if any, from the loop operation to the
581 //
582 mlir::ResultRange results = clonedLoop->getResults();
583 bool hasResults = (results.size() > 0);
584 if (hasResults)
585 builder.create<fir::ResultOp>(loc, results);
586
587 // Add the original loop in the else-side of the if operation.
588 builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
589 op.op->replaceAllUsesWith(ifOp);
590 op.op->remove();
591 builder.insert(op.op);
592 // Rely on "cloned loop has results, so original loop also has results".
593 if (hasResults) {
594 builder.create<fir::ResultOp>(loc, op.op->getResults());
595 } else {
596 // Use an assert to check this.
597 assert(op.op->getResults().size() == 0 &&
598 "Weird, the cloned loop doesn't have results, but the original "
599 "does?");
600 }
601 }
602
603 LLVM_DEBUG(llvm::dbgs() << "Func After transform:\n");
604 LLVM_DEBUG(func->dump());
605
606 LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
607}
608

source code of flang/lib/Optimizer/Transforms/LoopVersioning.cpp