1//===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous transformation utilities for the Affine
10// dialect.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Dialect/Affine/Utils.h"
15
16#include "mlir/Dialect/Affine/Analysis/Utils.h"
17#include "mlir/Dialect/Affine/IR/AffineOps.h"
18#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
19#include "mlir/Dialect/Affine/LoopUtils.h"
20#include "mlir/Dialect/Arith/Utils/Utils.h"
21#include "mlir/Dialect/Func/IR/FuncOps.h"
22#include "mlir/Dialect/MemRef/IR/MemRef.h"
23#include "mlir/Dialect/Utils/IndexingUtils.h"
24#include "mlir/IR/AffineExprVisitor.h"
25#include "mlir/IR/Dominance.h"
26#include "mlir/IR/IRMapping.h"
27#include "mlir/IR/ImplicitLocOpBuilder.h"
28#include "mlir/IR/IntegerSet.h"
29#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
30#include <optional>
31
32#define DEBUG_TYPE "affine-utils"
33
34using namespace mlir;
35using namespace affine;
36using namespace presburger;
37
38namespace {
39/// Visit affine expressions recursively and build the sequence of operations
40/// that correspond to it. Visitation functions return an Value of the
41/// expression subtree they visited or `nullptr` on error.
42class AffineApplyExpander
43 : public AffineExprVisitor<AffineApplyExpander, Value> {
44public:
45 /// This internal class expects arguments to be non-null, checks must be
46 /// performed at the call site.
47 AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
48 ValueRange symbolValues, Location loc)
49 : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
50 loc(loc) {}
51
52 template <typename OpTy>
53 Value buildBinaryExpr(AffineBinaryOpExpr expr,
54 arith::IntegerOverflowFlags overflowFlags =
55 arith::IntegerOverflowFlags::none) {
56 auto lhs = visit(expr: expr.getLHS());
57 auto rhs = visit(expr: expr.getRHS());
58 if (!lhs || !rhs)
59 return nullptr;
60 auto op = builder.create<OpTy>(loc, lhs, rhs, overflowFlags);
61 return op.getResult();
62 }
63
64 Value visitAddExpr(AffineBinaryOpExpr expr) {
65 return buildBinaryExpr<arith::AddIOp>(expr);
66 }
67
68 Value visitMulExpr(AffineBinaryOpExpr expr) {
69 return buildBinaryExpr<arith::MulIOp>(expr,
70 overflowFlags: arith::IntegerOverflowFlags::nsw);
71 }
72
73 /// Euclidean modulo operation: negative RHS is not allowed.
74 /// Remainder of the euclidean integer division is always non-negative.
75 ///
76 /// Implemented as
77 ///
78 /// a mod b =
79 /// let remainder = srem a, b;
80 /// negative = a < 0 in
81 /// select negative, remainder + b, remainder.
82 Value visitModExpr(AffineBinaryOpExpr expr) {
83 if (auto rhsConst = dyn_cast<AffineConstantExpr>(Val: expr.getRHS())) {
84 if (rhsConst.getValue() <= 0) {
85 emitError(loc, message: "modulo by non-positive value is not supported");
86 return nullptr;
87 }
88 }
89
90 auto lhs = visit(expr: expr.getLHS());
91 auto rhs = visit(expr: expr.getRHS());
92 assert(lhs && rhs && "unexpected affine expr lowering failure");
93
94 Value remainder = builder.create<arith::RemSIOp>(location: loc, args&: lhs, args&: rhs);
95 Value zeroCst = builder.create<arith::ConstantIndexOp>(location: loc, args: 0);
96 Value isRemainderNegative = builder.create<arith::CmpIOp>(
97 location: loc, args: arith::CmpIPredicate::slt, args&: remainder, args&: zeroCst);
98 Value correctedRemainder =
99 builder.create<arith::AddIOp>(location: loc, args&: remainder, args&: rhs);
100 Value result = builder.create<arith::SelectOp>(
101 location: loc, args&: isRemainderNegative, args&: correctedRemainder, args&: remainder);
102 return result;
103 }
104
105 /// Floor division operation (rounds towards negative infinity).
106 ///
107 /// For positive divisors, it can be implemented without branching and with a
108 /// single division operation as
109 ///
110 /// a floordiv b =
111 /// let negative = a < 0 in
112 /// let absolute = negative ? -a - 1 : a in
113 /// let quotient = absolute / b in
114 /// negative ? -quotient - 1 : quotient
115 ///
116 /// Note: this lowering does not use arith.floordivsi because the lowering of
117 /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
118 /// not one but two arith.divsi. That could be changed to one divsi, but one
119 /// way or another, going through arith.floordivsi will result in more complex
120 /// IR because arith.floordivsi is more general than affine floordiv in that
121 /// it supports negative RHS.
122 Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
123 if (auto rhsConst = dyn_cast<AffineConstantExpr>(Val: expr.getRHS())) {
124 if (rhsConst.getValue() <= 0) {
125 emitError(loc, message: "division by non-positive value is not supported");
126 return nullptr;
127 }
128 }
129 auto lhs = visit(expr: expr.getLHS());
130 auto rhs = visit(expr: expr.getRHS());
131 assert(lhs && rhs && "unexpected affine expr lowering failure");
132
133 Value zeroCst = builder.create<arith::ConstantIndexOp>(location: loc, args: 0);
134 Value noneCst = builder.create<arith::ConstantIndexOp>(location: loc, args: -1);
135 Value negative = builder.create<arith::CmpIOp>(
136 location: loc, args: arith::CmpIPredicate::slt, args&: lhs, args&: zeroCst);
137 Value negatedDecremented = builder.create<arith::SubIOp>(location: loc, args&: noneCst, args&: lhs);
138 Value dividend =
139 builder.create<arith::SelectOp>(location: loc, args&: negative, args&: negatedDecremented, args&: lhs);
140 Value quotient = builder.create<arith::DivSIOp>(location: loc, args&: dividend, args&: rhs);
141 Value correctedQuotient =
142 builder.create<arith::SubIOp>(location: loc, args&: noneCst, args&: quotient);
143 Value result = builder.create<arith::SelectOp>(location: loc, args&: negative,
144 args&: correctedQuotient, args&: quotient);
145 return result;
146 }
147
148 /// Ceiling division operation (rounds towards positive infinity).
149 ///
150 /// For positive divisors, it can be implemented without branching and with a
151 /// single division operation as
152 ///
153 /// a ceildiv b =
154 /// let negative = a <= 0 in
155 /// let absolute = negative ? -a : a - 1 in
156 /// let quotient = absolute / b in
157 /// negative ? -quotient : quotient + 1
158 ///
159 /// Note: not using arith.ceildivsi for the same reason as explained in the
160 /// visitFloorDivExpr comment.
161 Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
162 if (auto rhsConst = dyn_cast<AffineConstantExpr>(Val: expr.getRHS())) {
163 if (rhsConst.getValue() <= 0) {
164 emitError(loc, message: "division by non-positive value is not supported");
165 return nullptr;
166 }
167 }
168 auto lhs = visit(expr: expr.getLHS());
169 auto rhs = visit(expr: expr.getRHS());
170 assert(lhs && rhs && "unexpected affine expr lowering failure");
171
172 Value zeroCst = builder.create<arith::ConstantIndexOp>(location: loc, args: 0);
173 Value oneCst = builder.create<arith::ConstantIndexOp>(location: loc, args: 1);
174 Value nonPositive = builder.create<arith::CmpIOp>(
175 location: loc, args: arith::CmpIPredicate::sle, args&: lhs, args&: zeroCst);
176 Value negated = builder.create<arith::SubIOp>(location: loc, args&: zeroCst, args&: lhs);
177 Value decremented = builder.create<arith::SubIOp>(location: loc, args&: lhs, args&: oneCst);
178 Value dividend =
179 builder.create<arith::SelectOp>(location: loc, args&: nonPositive, args&: negated, args&: decremented);
180 Value quotient = builder.create<arith::DivSIOp>(location: loc, args&: dividend, args&: rhs);
181 Value negatedQuotient =
182 builder.create<arith::SubIOp>(location: loc, args&: zeroCst, args&: quotient);
183 Value incrementedQuotient =
184 builder.create<arith::AddIOp>(location: loc, args&: quotient, args&: oneCst);
185 Value result = builder.create<arith::SelectOp>(
186 location: loc, args&: nonPositive, args&: negatedQuotient, args&: incrementedQuotient);
187 return result;
188 }
189
190 Value visitConstantExpr(AffineConstantExpr expr) {
191 auto op = builder.create<arith::ConstantIndexOp>(location: loc, args: expr.getValue());
192 return op.getResult();
193 }
194
195 Value visitDimExpr(AffineDimExpr expr) {
196 assert(expr.getPosition() < dimValues.size() &&
197 "affine dim position out of range");
198 return dimValues[expr.getPosition()];
199 }
200
201 Value visitSymbolExpr(AffineSymbolExpr expr) {
202 assert(expr.getPosition() < symbolValues.size() &&
203 "symbol dim position out of range");
204 return symbolValues[expr.getPosition()];
205 }
206
207private:
208 OpBuilder &builder;
209 ValueRange dimValues;
210 ValueRange symbolValues;
211
212 Location loc;
213};
214} // namespace
215
216/// Create a sequence of operations that implement the `expr` applied to the
217/// given dimension and symbol values.
218mlir::Value mlir::affine::expandAffineExpr(OpBuilder &builder, Location loc,
219 AffineExpr expr,
220 ValueRange dimValues,
221 ValueRange symbolValues) {
222 return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
223}
224
225/// Create a sequence of operations that implement the `affineMap` applied to
226/// the given `operands` (as it it were an AffineApplyOp).
227std::optional<SmallVector<Value, 8>>
228mlir::affine::expandAffineMap(OpBuilder &builder, Location loc,
229 AffineMap affineMap, ValueRange operands) {
230 auto numDims = affineMap.getNumDims();
231 auto expanded = llvm::to_vector<8>(
232 Range: llvm::map_range(C: affineMap.getResults(),
233 F: [numDims, &builder, loc, operands](AffineExpr expr) {
234 return expandAffineExpr(builder, loc, expr,
235 dimValues: operands.take_front(n: numDims),
236 symbolValues: operands.drop_front(n: numDims));
237 }));
238 if (llvm::all_of(Range&: expanded, P: [](Value v) { return v; }))
239 return expanded;
240 return std::nullopt;
241}
242
243/// Promotes the `then` or the `else` block of `ifOp` (depending on whether
244/// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
245/// the rest of the op.
246static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
247 if (elseBlock)
248 assert(ifOp.hasElse() && "else block expected");
249
250 Block *destBlock = ifOp->getBlock();
251 Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
252 destBlock->getOperations().splice(
253 where: Block::iterator(ifOp), L2&: srcBlock->getOperations(), first: srcBlock->begin(),
254 last: std::prev(x: srcBlock->end()));
255 ifOp.erase();
256}
257
258/// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
259/// on. The `ifOp` could be hoisted and placed right before such an operation.
260/// This method assumes that the ifOp has been canonicalized (to be correct and
261/// effective).
262static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
263 // Walk up the parents past all for op that this conditional is invariant on.
264 auto ifOperands = ifOp.getOperands();
265 Operation *res = ifOp;
266 while (!res->getParentOp()->hasTrait<OpTrait::IsIsolatedFromAbove>()) {
267 auto *parentOp = res->getParentOp();
268 if (auto forOp = dyn_cast<AffineForOp>(Val: parentOp)) {
269 if (llvm::is_contained(Range&: ifOperands, Element: forOp.getInductionVar()))
270 break;
271 } else if (auto parallelOp = dyn_cast<AffineParallelOp>(Val: parentOp)) {
272 if (llvm::any_of(Range: parallelOp.getIVs(), P: [&](Value iv) {
273 return llvm::is_contained(Range&: ifOperands, Element: iv);
274 }))
275 break;
276 } else if (!isa<AffineIfOp>(Val: parentOp)) {
277 // Won't walk up past anything other than affine.for/if ops.
278 break;
279 }
280 // You can always hoist up past any affine.if ops.
281 res = parentOp;
282 }
283 return res;
284}
285
286/// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
287/// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
288/// otherwise the same `ifOp`.
289static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
290 // No hoisting to do.
291 if (hoistOverOp == ifOp)
292 return ifOp;
293
294 // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
295 // the else block. Then drop the else block of the original 'if' in the 'then'
296 // branch while promoting its then block, and analogously drop the 'then'
297 // block of the original 'if' from the 'else' branch while promoting its else
298 // block.
299 IRMapping operandMap;
300 OpBuilder b(hoistOverOp);
301 auto hoistedIfOp = b.create<AffineIfOp>(location: ifOp.getLoc(), args: ifOp.getIntegerSet(),
302 args: ifOp.getOperands(),
303 /*elseBlock=*/args: true);
304
305 // Create a clone of hoistOverOp to use for the else branch of the hoisted
306 // conditional. The else block may get optimized away if empty.
307 Operation *hoistOverOpClone = nullptr;
308 // We use this unique name to identify/find `ifOp`'s clone in the else
309 // version.
310 StringAttr idForIfOp = b.getStringAttr(bytes: "__mlir_if_hoisting");
311 operandMap.clear();
312 b.setInsertionPointAfter(hoistOverOp);
313 // We'll set an attribute to identify this op in a clone of this sub-tree.
314 ifOp->setAttr(name: idForIfOp, value: b.getBoolAttr(value: true));
315 hoistOverOpClone = b.clone(op&: *hoistOverOp, mapper&: operandMap);
316
317 // Promote the 'then' block of the original affine.if in the then version.
318 promoteIfBlock(ifOp, /*elseBlock=*/false);
319
320 // Move the then version to the hoisted if op's 'then' block.
321 auto *thenBlock = hoistedIfOp.getThenBlock();
322 thenBlock->getOperations().splice(where: thenBlock->begin(),
323 L2&: hoistOverOp->getBlock()->getOperations(),
324 first: Block::iterator(hoistOverOp));
325
326 // Find the clone of the original affine.if op in the else version.
327 AffineIfOp ifCloneInElse;
328 hoistOverOpClone->walk(callback: [&](AffineIfOp ifClone) {
329 if (!ifClone->getAttr(name: idForIfOp))
330 return WalkResult::advance();
331 ifCloneInElse = ifClone;
332 return WalkResult::interrupt();
333 });
334 assert(ifCloneInElse && "if op clone should exist");
335 // For the else block, promote the else block of the original 'if' if it had
336 // one; otherwise, the op itself is to be erased.
337 if (!ifCloneInElse.hasElse())
338 ifCloneInElse.erase();
339 else
340 promoteIfBlock(ifOp: ifCloneInElse, /*elseBlock=*/true);
341
342 // Move the else version into the else block of the hoisted if op.
343 auto *elseBlock = hoistedIfOp.getElseBlock();
344 elseBlock->getOperations().splice(
345 where: elseBlock->begin(), L2&: hoistOverOpClone->getBlock()->getOperations(),
346 first: Block::iterator(hoistOverOpClone));
347
348 return hoistedIfOp;
349}
350
351LogicalResult
352mlir::affine::affineParallelize(AffineForOp forOp,
353 ArrayRef<LoopReduction> parallelReductions,
354 AffineParallelOp *resOp) {
355 // Fail early if there are iter arguments that are not reductions.
356 unsigned numReductions = parallelReductions.size();
357 if (numReductions != forOp.getNumIterOperands())
358 return failure();
359
360 Location loc = forOp.getLoc();
361 OpBuilder outsideBuilder(forOp);
362 AffineMap lowerBoundMap = forOp.getLowerBoundMap();
363 ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
364 AffineMap upperBoundMap = forOp.getUpperBoundMap();
365 ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
366
367 // Creating empty 1-D affine.parallel op.
368 auto reducedValues = llvm::to_vector<4>(Range: llvm::map_range(
369 C&: parallelReductions, F: [](const LoopReduction &red) { return red.value; }));
370 auto reductionKinds = llvm::to_vector<4>(Range: llvm::map_range(
371 C&: parallelReductions, F: [](const LoopReduction &red) { return red.kind; }));
372 AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
373 location: loc, args: ValueRange(reducedValues).getTypes(), args&: reductionKinds,
374 args: llvm::ArrayRef(lowerBoundMap), args&: lowerBoundOperands,
375 args: llvm::ArrayRef(upperBoundMap), args&: upperBoundOperands,
376 args: llvm::ArrayRef(forOp.getStepAsInt()));
377 // Steal the body of the old affine for op.
378 newPloop.getRegion().takeBody(other&: forOp.getRegion());
379 Operation *yieldOp = &newPloop.getBody()->back();
380
381 // Handle the initial values of reductions because the parallel loop always
382 // starts from the neutral value.
383 SmallVector<Value> newResults;
384 newResults.reserve(N: numReductions);
385 for (unsigned i = 0; i < numReductions; ++i) {
386 Value init = forOp.getInits()[i];
387 // This works because we are only handling single-op reductions at the
388 // moment. A switch on reduction kind or a mechanism to collect operations
389 // participating in the reduction will be necessary for multi-op reductions.
390 Operation *reductionOp = yieldOp->getOperand(idx: i).getDefiningOp();
391 assert(reductionOp && "yielded value is expected to be produced by an op");
392 outsideBuilder.getInsertionBlock()->getOperations().splice(
393 where: outsideBuilder.getInsertionPoint(), L2&: newPloop.getBody()->getOperations(),
394 N: reductionOp);
395 reductionOp->setOperands({init, newPloop->getResult(idx: i)});
396 forOp->getResult(idx: i).replaceAllUsesWith(newValue: reductionOp->getResult(idx: 0));
397 }
398
399 // Update the loop terminator to yield reduced values bypassing the reduction
400 // operation itself (now moved outside of the loop) and erase the block
401 // arguments that correspond to reductions. Note that the loop always has one
402 // "main" induction variable whenc coming from a non-parallel for.
403 unsigned numIVs = 1;
404 yieldOp->setOperands(reducedValues);
405 newPloop.getBody()->eraseArguments(start: numIVs, num: numReductions);
406
407 forOp.erase();
408 if (resOp)
409 *resOp = newPloop;
410 return success();
411}
412
413// Returns success if any hoisting happened.
414LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
415 // Bail out early if the ifOp returns a result. TODO: Consider how to
416 // properly support this case.
417 if (ifOp.getNumResults() != 0)
418 return failure();
419
420 // Apply canonicalization patterns and folding - this is necessary for the
421 // hoisting check to be correct (operands should be composed), and to be more
422 // effective (no unused operands). Since the pattern rewriter's folding is
423 // entangled with application of patterns, we may fold/end up erasing the op,
424 // in which case we return with `folded` being set.
425 RewritePatternSet patterns(ifOp.getContext());
426 AffineIfOp::getCanonicalizationPatterns(results&: patterns, context: ifOp.getContext());
427 FrozenRewritePatternSet frozenPatterns(std::move(patterns));
428 bool erased;
429 (void)applyOpPatternsGreedily(
430 ops: ifOp.getOperation(), patterns: frozenPatterns,
431 config: GreedyRewriteConfig().setStrictness(GreedyRewriteStrictness::ExistingOps),
432 /*changed=*/nullptr, allErased: &erased);
433 if (erased) {
434 if (folded)
435 *folded = true;
436 return failure();
437 }
438 if (folded)
439 *folded = false;
440
441 // The folding above should have ensured this.
442 assert(llvm::all_of(ifOp.getOperands(),
443 [](Value v) {
444 return isTopLevelValue(v) || isAffineInductionVar(v);
445 }) &&
446 "operands not composed");
447
448 // We are going hoist as high as possible.
449 // TODO: this could be customized in the future.
450 auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
451
452 AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
453 // Nothing to hoist over.
454 if (hoistedIfOp == ifOp)
455 return failure();
456
457 // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
458 // a sequence of affine.fors that are all perfectly nested).
459 (void)applyPatternsGreedily(
460 op: hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
461 patterns: frozenPatterns);
462
463 return success();
464}
465
466// Return the min expr after replacing the given dim.
467AffineExpr mlir::affine::substWithMin(AffineExpr e, AffineExpr dim,
468 AffineExpr min, AffineExpr max,
469 bool positivePath) {
470 if (e == dim)
471 return positivePath ? min : max;
472 if (auto bin = dyn_cast<AffineBinaryOpExpr>(Val&: e)) {
473 AffineExpr lhs = bin.getLHS();
474 AffineExpr rhs = bin.getRHS();
475 if (bin.getKind() == mlir::AffineExprKind::Add)
476 return substWithMin(e: lhs, dim, min, max, positivePath) +
477 substWithMin(e: rhs, dim, min, max, positivePath);
478
479 auto c1 = dyn_cast<AffineConstantExpr>(Val: bin.getLHS());
480 auto c2 = dyn_cast<AffineConstantExpr>(Val: bin.getRHS());
481 if (c1 && c1.getValue() < 0)
482 return getAffineBinaryOpExpr(
483 kind: bin.getKind(), lhs: c1, rhs: substWithMin(e: rhs, dim, min, max, positivePath: !positivePath));
484 if (c2 && c2.getValue() < 0)
485 return getAffineBinaryOpExpr(
486 kind: bin.getKind(), lhs: substWithMin(e: lhs, dim, min, max, positivePath: !positivePath), rhs: c2);
487 return getAffineBinaryOpExpr(
488 kind: bin.getKind(), lhs: substWithMin(e: lhs, dim, min, max, positivePath),
489 rhs: substWithMin(e: rhs, dim, min, max, positivePath));
490 }
491 return e;
492}
493
494void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
495 // Loops with min/max in bounds are not normalized at the moment.
496 if (op.hasMinMaxBounds())
497 return;
498
499 AffineMap lbMap = op.getLowerBoundsMap();
500 SmallVector<int64_t, 8> steps = op.getSteps();
501 // No need to do any work if the parallel op is already normalized.
502 bool isAlreadyNormalized =
503 llvm::all_of(Range: llvm::zip(t&: steps, u: lbMap.getResults()), P: [](auto tuple) {
504 int64_t step = std::get<0>(tuple);
505 auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
506 return lbExpr && lbExpr.getValue() == 0 && step == 1;
507 });
508 if (isAlreadyNormalized)
509 return;
510
511 AffineValueMap ranges;
512 AffineValueMap::difference(a: op.getUpperBoundsValueMap(),
513 b: op.getLowerBoundsValueMap(), res: &ranges);
514 auto builder = OpBuilder::atBlockBegin(block: op.getBody());
515 auto zeroExpr = builder.getAffineConstantExpr(constant: 0);
516 SmallVector<AffineExpr, 8> lbExprs;
517 SmallVector<AffineExpr, 8> ubExprs;
518 for (unsigned i = 0, e = steps.size(); i < e; ++i) {
519 int64_t step = steps[i];
520
521 // Adjust the lower bound to be 0.
522 lbExprs.push_back(Elt: zeroExpr);
523
524 // Adjust the upper bound expression: 'range / step'.
525 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(v: step);
526 ubExprs.push_back(Elt: ubExpr);
527
528 // Adjust the corresponding IV: 'lb + i * step'.
529 BlockArgument iv = op.getBody()->getArgument(i);
530 AffineExpr lbExpr = lbMap.getResult(idx: i);
531 unsigned nDims = lbMap.getNumDims();
532 auto expr = lbExpr + builder.getAffineDimExpr(position: nDims) * step;
533 auto map = AffineMap::get(/*dimCount=*/nDims + 1,
534 /*symbolCount=*/lbMap.getNumSymbols(), result: expr);
535
536 // Use an 'affine.apply' op that will be simplified later in subsequent
537 // canonicalizations.
538 OperandRange lbOperands = op.getLowerBoundsOperands();
539 OperandRange dimOperands = lbOperands.take_front(n: nDims);
540 OperandRange symbolOperands = lbOperands.drop_front(n: nDims);
541 SmallVector<Value, 8> applyOperands{dimOperands};
542 applyOperands.push_back(Elt: iv);
543 applyOperands.append(in_start: symbolOperands.begin(), in_end: symbolOperands.end());
544 auto apply = builder.create<AffineApplyOp>(location: op.getLoc(), args&: map, args&: applyOperands);
545 iv.replaceAllUsesExcept(newValue: apply, exceptedUser: apply);
546 }
547
548 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
549 op.setSteps(newSteps);
550 auto newLowerMap = AffineMap::get(
551 /*dimCount=*/0, /*symbolCount=*/0, results: lbExprs, context: op.getContext());
552 op.setLowerBounds(operands: {}, map: newLowerMap);
553 auto newUpperMap = AffineMap::get(dimCount: ranges.getNumDims(), symbolCount: ranges.getNumSymbols(),
554 results: ubExprs, context: op.getContext());
555 op.setUpperBounds(operands: ranges.getOperands(), map: newUpperMap);
556}
557
558LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
559 bool promoteSingleIter) {
560 if (promoteSingleIter && succeeded(Result: promoteIfSingleIteration(forOp: op)))
561 return success();
562
563 // Check if the forop is already normalized.
564 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
565 (op.getStep() == 1))
566 return success();
567
568 // Check if the lower bound has a single result only. Loops with a max lower
569 // bound can't be normalized without additional support like
570 // affine.execute_region's. If the lower bound does not have a single result
571 // then skip this op.
572 if (op.getLowerBoundMap().getNumResults() != 1)
573 return failure();
574
575 Location loc = op.getLoc();
576 OpBuilder opBuilder(op);
577 int64_t origLoopStep = op.getStepAsInt();
578
579 // Construct the new upper bound value map.
580 AffineMap oldLbMap = op.getLowerBoundMap();
581 // The upper bound can have multiple results. To use
582 // AffineValueMap::difference, we need to have the same number of results in
583 // both lower and upper bound maps. So, we just create a value map for the
584 // lower bound with the only available lower bound result repeated to pad up
585 // to the number of upper bound results.
586 SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
587 op.getLowerBoundMap().getResult(idx: 0));
588 AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
589 AffineMap paddedLbMap =
590 AffineMap::get(dimCount: oldLbMap.getNumDims(), symbolCount: oldLbMap.getNumSymbols(), results: lbExprs,
591 context: op.getContext());
592 AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
593 AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
594 AffineValueMap newUbValueMap;
595 // Compute the `upper bound - lower bound`.
596 AffineValueMap::difference(a: ubValueMap, b: paddedLbValueMap, res: &newUbValueMap);
597 (void)newUbValueMap.canonicalize();
598
599 // Scale down the upper bound value map by the loop step.
600 unsigned numResult = newUbValueMap.getNumResults();
601 SmallVector<AffineExpr> scaleDownExprs(numResult);
602 for (unsigned i = 0; i < numResult; ++i)
603 scaleDownExprs[i] = opBuilder.getAffineDimExpr(position: i).ceilDiv(v: origLoopStep);
604 // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
605 // step). Where `n` is the number of results in the upper bound map.
606 AffineMap scaleDownMap =
607 AffineMap::get(dimCount: numResult, symbolCount: 0, results: scaleDownExprs, context: op.getContext());
608 AffineMap newUbMap = scaleDownMap.compose(map: newUbValueMap.getAffineMap());
609
610 // Set the newly create upper bound map and operands.
611 op.setUpperBound(operands: newUbValueMap.getOperands(), map: newUbMap);
612 op.setLowerBound(operands: {}, map: opBuilder.getConstantAffineMap(val: 0));
613 op.setStep(1);
614
615 // Calculate the Value of new loopIV. Create affine.apply for the value of
616 // the loopIV in normalized loop.
617 opBuilder.setInsertionPointToStart(op.getBody());
618 // Construct an affine.apply op mapping the new IV to the old IV.
619 AffineMap scaleIvMap =
620 AffineMap::get(dimCount: 1, symbolCount: 0, result: -opBuilder.getAffineDimExpr(position: 0) * origLoopStep);
621 AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
622 AffineValueMap newIvToOldIvMap;
623 AffineValueMap::difference(a: lbMap, b: scaleIvValueMap, res: &newIvToOldIvMap);
624 (void)newIvToOldIvMap.canonicalize();
625 auto newIV = opBuilder.create<AffineApplyOp>(
626 location: loc, args: newIvToOldIvMap.getAffineMap(), args: newIvToOldIvMap.getOperands());
627 op.getInductionVar().replaceAllUsesExcept(newValue: newIV->getResult(idx: 0), exceptedUser: newIV);
628 return success();
629}
630
631/// Returns true if the memory operation of `destAccess` depends on `srcAccess`
632/// inside of the innermost common surrounding affine loop between the two
633/// accesses.
634static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
635 const MemRefAccess &destAccess) {
636 // Affine dependence analysis is possible only if both ops in the same
637 // AffineScope.
638 if (getAffineAnalysisScope(op: srcAccess.opInst) !=
639 getAffineAnalysisScope(op: destAccess.opInst))
640 return false;
641
642 unsigned nsLoops =
643 getNumCommonSurroundingLoops(a&: *srcAccess.opInst, b&: *destAccess.opInst);
644 DependenceResult result =
645 checkMemrefAccessDependence(srcAccess, dstAccess: destAccess, loopDepth: nsLoops + 1);
646 return hasDependence(result);
647}
648
649/// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
650/// scope of the outermost `minSurroundingLoops` loops that surround them.
651/// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
652static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
653 unsigned minSurroundingLoops) {
654 MemRefAccess srcAccess(srcMemOp);
655 MemRefAccess destAccess(destMemOp);
656
657 // Affine dependence analysis here is applicable only if both ops operate on
658 // the same memref and if `srcMemOp` and `destMemOp` are in the same
659 // AffineScope. Also, we can only check if our affine scope is isolated from
660 // above; otherwise, values can from outside of the affine scope that the
661 // check below cannot analyze.
662 Region *srcScope = getAffineAnalysisScope(op: srcMemOp);
663 if (srcAccess.memref == destAccess.memref &&
664 srcScope == getAffineAnalysisScope(op: destMemOp)) {
665 unsigned nsLoops = getNumCommonSurroundingLoops(a&: *srcMemOp, b&: *destMemOp);
666 FlatAffineValueConstraints dependenceConstraints;
667 for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
668 DependenceResult result = checkMemrefAccessDependence(
669 srcAccess, dstAccess: destAccess, loopDepth: d, dependenceConstraints: &dependenceConstraints,
670 /*dependenceComponents=*/nullptr);
671 // A dependence failure or the presence of a dependence implies a
672 // side effect.
673 if (!noDependence(result))
674 return true;
675 }
676 // No side effect was seen.
677 return false;
678 }
679 // TODO: Check here if the memrefs alias: there is no side effect if
680 // `srcAccess.memref` and `destAccess.memref` don't alias.
681 return true;
682}
683
684template <typename EffectType, typename T>
685bool mlir::affine::hasNoInterveningEffect(
686 Operation *start, T memOp,
687 llvm::function_ref<bool(Value, Value)> mayAlias) {
688 // A boolean representing whether an intervening operation could have impacted
689 // memOp.
690 bool hasSideEffect = false;
691
692 // Check whether the effect on memOp can be caused by a given operation op.
693 Value memref = memOp.getMemRef();
694 std::function<void(Operation *)> checkOperation = [&](Operation *op) {
695 // If the effect has alreay been found, early exit,
696 if (hasSideEffect)
697 return;
698
699 if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(Val: op)) {
700 SmallVector<MemoryEffects::EffectInstance, 1> effects;
701 memEffect.getEffects(effects);
702
703 bool opMayHaveEffect = false;
704 for (auto effect : effects) {
705 // If op causes EffectType on a potentially aliasing location for
706 // memOp, mark as having the effect.
707 if (isa<EffectType>(effect.getEffect())) {
708 if (effect.getValue() && effect.getValue() != memref &&
709 !mayAlias(effect.getValue(), memref))
710 continue;
711 opMayHaveEffect = true;
712 break;
713 }
714 }
715
716 if (!opMayHaveEffect)
717 return;
718
719 // If the side effect comes from an affine read or write, try to
720 // prove the side effecting `op` cannot reach `memOp`.
721 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(Val: op)) {
722 // For ease, let's consider the case that `op` is a store and
723 // we're looking for other potential stores that overwrite memory after
724 // `start`, and before being read in `memOp`. In this case, we only
725 // need to consider other potential stores with depth >
726 // minSurroundingLoops since `start` would overwrite any store with a
727 // smaller number of surrounding loops before.
728 unsigned minSurroundingLoops =
729 getNumCommonSurroundingLoops(*start, *memOp);
730 if (mayHaveEffect(op, memOp, minSurroundingLoops))
731 hasSideEffect = true;
732 return;
733 }
734
735 // We have an op with a memory effect and we cannot prove if it
736 // intervenes.
737 hasSideEffect = true;
738 return;
739 }
740
741 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
742 // Recurse into the regions for this op and check whether the internal
743 // operations may have the side effect `EffectType` on memOp.
744 for (Region &region : op->getRegions())
745 for (Block &block : region)
746 for (Operation &op : block)
747 checkOperation(&op);
748 return;
749 }
750
751 // Otherwise, conservatively assume generic operations have the effect
752 // on the operation
753 hasSideEffect = true;
754 };
755
756 // Check all paths from ancestor op `parent` to the operation `to` for the
757 // effect. It is known that `to` must be contained within `parent`.
758 auto until = [&](Operation *parent, Operation *to) {
759 // TODO check only the paths from `parent` to `to`.
760 // Currently we fallback and check the entire parent op, rather than
761 // just the paths from the parent path, stopping after reaching `to`.
762 // This is conservatively correct, but could be made more aggressive.
763 assert(parent->isAncestor(to));
764 checkOperation(parent);
765 };
766
767 // Check for all paths from operation `from` to operation `untilOp` for the
768 // given memory effect.
769 std::function<void(Operation *, Operation *)> recur =
770 [&](Operation *from, Operation *untilOp) {
771 assert(
772 from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
773 "Checking for side effect between two operations without a common "
774 "ancestor");
775
776 // If the operations are in different regions, recursively consider all
777 // path from `from` to the parent of `to` and all paths from the parent
778 // of `to` to `to`.
779 if (from->getParentRegion() != untilOp->getParentRegion()) {
780 recur(from, untilOp->getParentOp());
781 until(untilOp->getParentOp(), untilOp);
782 return;
783 }
784
785 // Now, assuming that `from` and `to` exist in the same region, perform
786 // a CFG traversal to check all the relevant operations.
787
788 // Additional blocks to consider.
789 SmallVector<Block *, 2> todoBlocks;
790 {
791 // First consider the parent block of `from` an check all operations
792 // after `from`.
793 for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
794 iter != end && &*iter != untilOp; ++iter) {
795 checkOperation(&*iter);
796 }
797
798 // If the parent of `from` doesn't contain `to`, add the successors
799 // to the list of blocks to check.
800 if (untilOp->getBlock() != from->getBlock())
801 for (Block *succ : from->getBlock()->getSuccessors())
802 todoBlocks.push_back(Elt: succ);
803 }
804
805 SmallPtrSet<Block *, 4> done;
806 // Traverse the CFG until hitting `to`.
807 while (!todoBlocks.empty()) {
808 Block *blk = todoBlocks.pop_back_val();
809 if (done.count(Ptr: blk))
810 continue;
811 done.insert(Ptr: blk);
812 for (auto &op : *blk) {
813 if (&op == untilOp)
814 break;
815 checkOperation(&op);
816 if (&op == blk->getTerminator())
817 for (Block *succ : blk->getSuccessors())
818 todoBlocks.push_back(Elt: succ);
819 }
820 }
821 };
822 recur(start, memOp);
823 return !hasSideEffect;
824}
825
826/// Attempt to eliminate loadOp by replacing it with a value stored into memory
827/// which the load is guaranteed to retrieve. This check involves three
828/// components: 1) The store and load must be on the same location 2) The store
829/// must dominate (and therefore must always occur prior to) the load 3) No
830/// other operations will overwrite the memory loaded between the given load
831/// and store. If such a value exists, the replaced `loadOp` will be added to
832/// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
833static void forwardStoreToLoad(
834 AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
835 SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
836 llvm::function_ref<bool(Value, Value)> mayAlias) {
837
838 // The store op candidate for forwarding that satisfies all conditions
839 // to replace the load, if any.
840 Operation *lastWriteStoreOp = nullptr;
841
842 for (auto *user : loadOp.getMemRef().getUsers()) {
843 auto storeOp = dyn_cast<AffineWriteOpInterface>(Val: user);
844 if (!storeOp)
845 continue;
846 MemRefAccess srcAccess(storeOp);
847 MemRefAccess destAccess(loadOp);
848
849 // 1. Check if the store and the load have mathematically equivalent
850 // affine access functions; this implies that they statically refer to the
851 // same single memref element. As an example this filters out cases like:
852 // store %A[%i0 + 1]
853 // load %A[%i0]
854 // store %A[%M]
855 // load %A[%N]
856 // Use the AffineValueMap difference based memref access equality checking.
857 if (srcAccess != destAccess)
858 continue;
859
860 // 2. The store has to dominate the load op to be candidate.
861 if (!domInfo.dominates(a: storeOp, b: loadOp))
862 continue;
863
864 // 3. The store must reach the load. Access function equivalence only
865 // guarantees this for accesses in the same block. The load could be in a
866 // nested block that is unreachable.
867 if (!mustReachAtInnermost(srcAccess, destAccess))
868 continue;
869
870 // 4. Ensure there is no intermediate operation which could replace the
871 // value in memory.
872 if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(start: storeOp, memOp: loadOp,
873 mayAlias))
874 continue;
875
876 // We now have a candidate for forwarding.
877 assert(lastWriteStoreOp == nullptr &&
878 "multiple simultaneous replacement stores");
879 lastWriteStoreOp = storeOp;
880 }
881
882 if (!lastWriteStoreOp)
883 return;
884
885 // Perform the actual store to load forwarding.
886 Value storeVal =
887 cast<AffineWriteOpInterface>(Val: lastWriteStoreOp).getValueToStore();
888 // Check if 2 values have the same shape. This is needed for affine vector
889 // loads and stores.
890 if (storeVal.getType() != loadOp.getValue().getType())
891 return;
892 loadOp.getValue().replaceAllUsesWith(newValue: storeVal);
893 // Record the memref for a later sweep to optimize away.
894 memrefsToErase.insert(Ptr: loadOp.getMemRef());
895 // Record this to erase later.
896 loadOpsToErase.push_back(Elt: loadOp);
897}
898
899template bool
900mlir::affine::hasNoInterveningEffect<mlir::MemoryEffects::Read,
901 affine::AffineReadOpInterface>(
902 mlir::Operation *, affine::AffineReadOpInterface,
903 llvm::function_ref<bool(Value, Value)>);
904
905// This attempts to find stores which have no impact on the final result.
906// A writing op writeA will be eliminated if there exists an op writeB if
907// 1) writeA and writeB have mathematically equivalent affine access functions.
908// 2) writeB postdominates writeA.
909// 3) There is no potential read between writeA and writeB.
910static void findUnusedStore(AffineWriteOpInterface writeA,
911 SmallVectorImpl<Operation *> &opsToErase,
912 PostDominanceInfo &postDominanceInfo,
913 llvm::function_ref<bool(Value, Value)> mayAlias) {
914
915 for (Operation *user : writeA.getMemRef().getUsers()) {
916 // Only consider writing operations.
917 auto writeB = dyn_cast<AffineWriteOpInterface>(Val: user);
918 if (!writeB)
919 continue;
920
921 // The operations must be distinct.
922 if (writeB == writeA)
923 continue;
924
925 // Both operations must lie in the same region.
926 if (writeB->getParentRegion() != writeA->getParentRegion())
927 continue;
928
929 // Both operations must write to the same memory.
930 MemRefAccess srcAccess(writeB);
931 MemRefAccess destAccess(writeA);
932
933 if (srcAccess != destAccess)
934 continue;
935
936 // writeB must postdominate writeA.
937 if (!postDominanceInfo.postDominates(a: writeB, b: writeA))
938 continue;
939
940 // There cannot be an operation which reads from memory between
941 // the two writes.
942 if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(start: writeA, memOp: writeB,
943 mayAlias))
944 continue;
945
946 opsToErase.push_back(Elt: writeA);
947 break;
948 }
949}
950
951// The load to load forwarding / redundant load elimination is similar to the
952// store to load forwarding.
953// loadA will be be replaced with loadB if:
954// 1) loadA and loadB have mathematically equivalent affine access functions.
955// 2) loadB dominates loadA.
956// 3) There is no write between loadA and loadB.
957static void loadCSE(AffineReadOpInterface loadA,
958 SmallVectorImpl<Operation *> &loadOpsToErase,
959 DominanceInfo &domInfo,
960 llvm::function_ref<bool(Value, Value)> mayAlias) {
961 SmallVector<AffineReadOpInterface, 4> loadCandidates;
962 for (auto *user : loadA.getMemRef().getUsers()) {
963 auto loadB = dyn_cast<AffineReadOpInterface>(Val: user);
964 if (!loadB || loadB == loadA)
965 continue;
966
967 MemRefAccess srcAccess(loadB);
968 MemRefAccess destAccess(loadA);
969
970 // 1. The accesses should be to be to the same location.
971 if (srcAccess != destAccess) {
972 continue;
973 }
974
975 // 2. loadB should dominate loadA.
976 if (!domInfo.dominates(a: loadB, b: loadA))
977 continue;
978
979 // 3. There should not be a write between loadA and loadB.
980 if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
981 start: loadB.getOperation(), memOp: loadA, mayAlias))
982 continue;
983
984 // Check if two values have the same shape. This is needed for affine vector
985 // loads.
986 if (loadB.getValue().getType() != loadA.getValue().getType())
987 continue;
988
989 loadCandidates.push_back(Elt: loadB);
990 }
991
992 // Of the legal load candidates, use the one that dominates all others
993 // to minimize the subsequent need to loadCSE
994 Value loadB;
995 for (AffineReadOpInterface option : loadCandidates) {
996 if (llvm::all_of(Range&: loadCandidates, P: [&](AffineReadOpInterface depStore) {
997 return depStore == option ||
998 domInfo.dominates(a: option.getOperation(),
999 b: depStore.getOperation());
1000 })) {
1001 loadB = option.getValue();
1002 break;
1003 }
1004 }
1005
1006 if (loadB) {
1007 loadA.getValue().replaceAllUsesWith(newValue: loadB);
1008 // Record this to erase later.
1009 loadOpsToErase.push_back(Elt: loadA);
1010 }
1011}
1012
1013// The store to load forwarding and load CSE rely on three conditions:
1014//
1015// 1) store/load providing a replacement value and load being replaced need to
1016// have mathematically equivalent affine access functions (checked after full
1017// composition of load/store operands); this implies that they access the same
1018// single memref element for all iterations of the common surrounding loop,
1019//
1020// 2) the store/load op should dominate the load op,
1021//
1022// 3) no operation that may write to memory read by the load being replaced can
1023// occur after executing the instruction (load or store) providing the
1024// replacement value and before the load being replaced (thus potentially
1025// allowing overwriting the memory read by the load).
1026//
1027// The above conditions are simple to check, sufficient, and powerful for most
1028// cases in practice - they are sufficient, but not necessary --- since they
1029// don't reason about loops that are guaranteed to execute at least once or
1030// multiple sources to forward from.
1031//
1032// TODO: more forwarding can be done when support for
1033// loop/conditional live-out SSA values is available.
1034// TODO: do general dead store elimination for memref's. This pass
1035// currently only eliminates the stores only if no other loads/uses (other
1036// than dealloc) remain.
1037//
1038void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1039 PostDominanceInfo &postDomInfo,
1040 AliasAnalysis &aliasAnalysis) {
1041 // Load op's whose results were replaced by those forwarded from stores.
1042 SmallVector<Operation *, 8> opsToErase;
1043
1044 // A list of memref's that are potentially dead / could be eliminated.
1045 SmallPtrSet<Value, 4> memrefsToErase;
1046
1047 auto mayAlias = [&](Value val1, Value val2) -> bool {
1048 return !aliasAnalysis.alias(lhs: val1, rhs: val2).isNo();
1049 };
1050
1051 // Walk all load's and perform store to load forwarding.
1052 f.walk(callback: [&](AffineReadOpInterface loadOp) {
1053 forwardStoreToLoad(loadOp, loadOpsToErase&: opsToErase, memrefsToErase, domInfo, mayAlias);
1054 });
1055 for (auto *op : opsToErase)
1056 op->erase();
1057 opsToErase.clear();
1058
1059 // Walk all store's and perform unused store elimination
1060 f.walk(callback: [&](AffineWriteOpInterface storeOp) {
1061 findUnusedStore(writeA: storeOp, opsToErase, postDominanceInfo&: postDomInfo, mayAlias);
1062 });
1063 for (auto *op : opsToErase)
1064 op->erase();
1065 opsToErase.clear();
1066
1067 // Check if the store fwd'ed memrefs are now left with only stores and
1068 // deallocs and can thus be completely deleted. Note: the canonicalize pass
1069 // should be able to do this as well, but we'll do it here since we collected
1070 // these anyway.
1071 for (auto memref : memrefsToErase) {
1072 // If the memref hasn't been locally alloc'ed, skip.
1073 Operation *defOp = memref.getDefiningOp();
1074 if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(op: defOp, value: memref))
1075 // TODO: if the memref was returned by a 'call' operation, we
1076 // could still erase it if the call had no side-effects.
1077 continue;
1078 if (llvm::any_of(Range: memref.getUsers(), P: [&](Operation *ownerOp) {
1079 return !isa<AffineWriteOpInterface>(Val: ownerOp) &&
1080 !hasSingleEffect<MemoryEffects::Free>(op: ownerOp, value: memref);
1081 }))
1082 continue;
1083
1084 // Erase all stores, the dealloc, and the alloc on the memref.
1085 for (auto *user : llvm::make_early_inc_range(Range: memref.getUsers()))
1086 user->erase();
1087 defOp->erase();
1088 }
1089
1090 // To eliminate as many loads as possible, run load CSE after eliminating
1091 // stores. Otherwise, some stores are wrongly seen as having an intervening
1092 // effect.
1093 f.walk(callback: [&](AffineReadOpInterface loadOp) {
1094 loadCSE(loadA: loadOp, loadOpsToErase&: opsToErase, domInfo, mayAlias);
1095 });
1096 for (auto *op : opsToErase)
1097 op->erase();
1098}
1099
1100// Checks if `op` is non dereferencing.
1101// TODO: This hardcoded check will be removed once the right interface is added.
1102static bool isDereferencingOp(Operation *op) {
1103 return isa<AffineMapAccessInterface, memref::LoadOp, memref::StoreOp>(Val: op);
1104}
1105
1106// Perform the replacement in `op`.
1107LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1108 Value oldMemRef, Value newMemRef, Operation *op,
1109 ArrayRef<Value> extraIndices, AffineMap indexRemap,
1110 ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1111 bool allowNonDereferencingOps) {
1112 unsigned newMemRefRank = cast<MemRefType>(Val: newMemRef.getType()).getRank();
1113 (void)newMemRefRank; // unused in opt mode
1114 unsigned oldMemRefRank = cast<MemRefType>(Val: oldMemRef.getType()).getRank();
1115 (void)oldMemRefRank; // unused in opt mode
1116 if (indexRemap) {
1117 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1118 "symbolic operand count mismatch");
1119 assert(indexRemap.getNumInputs() ==
1120 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1121 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1122 } else {
1123 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1124 }
1125
1126 // Assert same elemental type.
1127 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1128 cast<MemRefType>(newMemRef.getType()).getElementType());
1129
1130 SmallVector<unsigned, 2> usePositions;
1131 for (const auto &opEntry : llvm::enumerate(First: op->getOperands())) {
1132 if (opEntry.value() == oldMemRef)
1133 usePositions.push_back(Elt: opEntry.index());
1134 }
1135
1136 // If memref doesn't appear, nothing to do.
1137 if (usePositions.empty())
1138 return success();
1139
1140 unsigned memRefOperandPos = usePositions.front();
1141
1142 OpBuilder builder(op);
1143 // The following checks if op is dereferencing memref and performs the access
1144 // index rewrites.
1145 if (!isDereferencingOp(op)) {
1146 if (!allowNonDereferencingOps) {
1147 // Failure: memref used in a non-dereferencing context (potentially
1148 // escapes); no replacement in these cases unless allowNonDereferencingOps
1149 // is set.
1150 return failure();
1151 }
1152 for (unsigned pos : usePositions)
1153 op->setOperand(idx: pos, value: newMemRef);
1154 return success();
1155 }
1156
1157 if (usePositions.size() > 1) {
1158 // TODO: extend it for this case when needed (rare).
1159 LLVM_DEBUG(llvm::dbgs()
1160 << "multiple dereferencing uses in a single op not supported");
1161 return failure();
1162 }
1163
1164 // Perform index rewrites for the dereferencing op and then replace the op.
1165 SmallVector<Value, 4> oldMapOperands;
1166 AffineMap oldMap;
1167 unsigned oldMemRefNumIndices = oldMemRefRank;
1168 auto startIdx = op->operand_begin() + memRefOperandPos + 1;
1169 auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(Val: op);
1170 if (affMapAccInterface) {
1171 // If `op` implements AffineMapAccessInterface, we can get the indices by
1172 // quering the number of map operands from the operand list from a certain
1173 // offset (`memRefOperandPos` in this case).
1174 NamedAttribute oldMapAttrPair =
1175 affMapAccInterface.getAffineMapAttrForMemRef(memref: oldMemRef);
1176 oldMap = cast<AffineMapAttr>(Val: oldMapAttrPair.getValue()).getValue();
1177 oldMemRefNumIndices = oldMap.getNumInputs();
1178 }
1179 oldMapOperands.assign(in_start: startIdx, in_end: startIdx + oldMemRefNumIndices);
1180
1181 // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1182 SmallVector<Value, 4> oldMemRefOperands;
1183 SmallVector<Value, 4> affineApplyOps;
1184 oldMemRefOperands.reserve(N: oldMemRefRank);
1185 if (affMapAccInterface &&
1186 oldMap != builder.getMultiDimIdentityMap(rank: oldMap.getNumDims())) {
1187 for (auto resultExpr : oldMap.getResults()) {
1188 auto singleResMap = AffineMap::get(dimCount: oldMap.getNumDims(),
1189 symbolCount: oldMap.getNumSymbols(), result: resultExpr);
1190 auto afOp = builder.create<AffineApplyOp>(location: op->getLoc(), args&: singleResMap,
1191 args&: oldMapOperands);
1192 oldMemRefOperands.push_back(Elt: afOp);
1193 affineApplyOps.push_back(Elt: afOp);
1194 }
1195 } else {
1196 oldMemRefOperands.assign(in_start: oldMapOperands.begin(), in_end: oldMapOperands.end());
1197 }
1198
1199 // Construct new indices as a remap of the old ones if a remapping has been
1200 // provided. The indices of a memref come right after it, i.e.,
1201 // at position memRefOperandPos + 1.
1202 SmallVector<Value, 4> remapOperands;
1203 remapOperands.reserve(N: extraOperands.size() + oldMemRefRank +
1204 symbolOperands.size());
1205 remapOperands.append(in_start: extraOperands.begin(), in_end: extraOperands.end());
1206 remapOperands.append(in_start: oldMemRefOperands.begin(), in_end: oldMemRefOperands.end());
1207 remapOperands.append(in_start: symbolOperands.begin(), in_end: symbolOperands.end());
1208
1209 SmallVector<Value, 4> remapOutputs;
1210 remapOutputs.reserve(N: oldMemRefRank);
1211 if (indexRemap &&
1212 indexRemap != builder.getMultiDimIdentityMap(rank: indexRemap.getNumDims())) {
1213 // Remapped indices.
1214 for (auto resultExpr : indexRemap.getResults()) {
1215 auto singleResMap = AffineMap::get(
1216 dimCount: indexRemap.getNumDims(), symbolCount: indexRemap.getNumSymbols(), result: resultExpr);
1217 auto afOp = builder.create<AffineApplyOp>(location: op->getLoc(), args&: singleResMap,
1218 args&: remapOperands);
1219 remapOutputs.push_back(Elt: afOp);
1220 affineApplyOps.push_back(Elt: afOp);
1221 }
1222 } else {
1223 // No remapping specified.
1224 remapOutputs.assign(in_start: remapOperands.begin(), in_end: remapOperands.end());
1225 }
1226 SmallVector<Value, 4> newMapOperands;
1227 newMapOperands.reserve(N: newMemRefRank);
1228
1229 // Prepend 'extraIndices' in 'newMapOperands'.
1230 for (Value extraIndex : extraIndices) {
1231 assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1232 "invalid memory op index");
1233 newMapOperands.push_back(Elt: extraIndex);
1234 }
1235
1236 // Append 'remapOutputs' to 'newMapOperands'.
1237 newMapOperands.append(in_start: remapOutputs.begin(), in_end: remapOutputs.end());
1238
1239 // Create new fully composed AffineMap for new op to be created.
1240 assert(newMapOperands.size() == newMemRefRank);
1241 auto newMap = builder.getMultiDimIdentityMap(rank: newMemRefRank);
1242 fullyComposeAffineMapAndOperands(map: &newMap, operands: &newMapOperands);
1243 newMap = simplifyAffineMap(map: newMap);
1244 canonicalizeMapAndOperands(map: &newMap, operands: &newMapOperands);
1245 // Remove any affine.apply's that became dead as a result of composition.
1246 for (Value value : affineApplyOps)
1247 if (value.use_empty())
1248 value.getDefiningOp()->erase();
1249
1250 OperationState state(op->getLoc(), op->getName());
1251 // Construct the new operation using this memref.
1252 state.operands.reserve(N: op->getNumOperands() + extraIndices.size());
1253 // Insert the non-memref operands.
1254 state.operands.append(in_start: op->operand_begin(),
1255 in_end: op->operand_begin() + memRefOperandPos);
1256 // Insert the new memref value.
1257 state.operands.push_back(Elt: newMemRef);
1258
1259 // Insert the new memref map operands.
1260 if (affMapAccInterface) {
1261 state.operands.append(in_start: newMapOperands.begin(), in_end: newMapOperands.end());
1262 } else {
1263 // In the case of dereferencing ops not implementing
1264 // AffineMapAccessInterface, we need to apply the values of `newMapOperands`
1265 // to the `newMap` to get the correct indices.
1266 for (unsigned i = 0; i < newMemRefRank; i++) {
1267 state.operands.push_back(Elt: builder.create<AffineApplyOp>(
1268 location: op->getLoc(),
1269 args: AffineMap::get(dimCount: newMap.getNumDims(), symbolCount: newMap.getNumSymbols(),
1270 result: newMap.getResult(idx: i)),
1271 args&: newMapOperands));
1272 }
1273 }
1274
1275 // Insert the remaining operands unmodified.
1276 unsigned oldMapNumInputs = oldMapOperands.size();
1277 state.operands.append(in_start: op->operand_begin() + memRefOperandPos + 1 +
1278 oldMapNumInputs,
1279 in_end: op->operand_end());
1280 // Result types don't change. Both memref's are of the same elemental type.
1281 state.types.reserve(N: op->getNumResults());
1282 for (auto result : op->getResults())
1283 state.types.push_back(Elt: result.getType());
1284
1285 // Add attribute for 'newMap', other Attributes do not change.
1286 auto newMapAttr = AffineMapAttr::get(value: newMap);
1287 for (auto namedAttr : op->getAttrs()) {
1288 if (affMapAccInterface &&
1289 namedAttr.getName() ==
1290 affMapAccInterface.getAffineMapAttrForMemRef(memref: oldMemRef).getName())
1291 state.attributes.push_back(newAttribute: {namedAttr.getName(), newMapAttr});
1292 else
1293 state.attributes.push_back(newAttribute: namedAttr);
1294 }
1295
1296 // Create the new operation.
1297 auto *repOp = builder.create(state);
1298 op->replaceAllUsesWith(values&: repOp);
1299 op->erase();
1300
1301 return success();
1302}
1303
1304LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1305 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1306 AffineMap indexRemap, ArrayRef<Value> extraOperands,
1307 ArrayRef<Value> symbolOperands,
1308 llvm::function_ref<bool(Operation *)> userFilterFn,
1309 bool allowNonDereferencingOps, bool replaceInDeallocOp) {
1310 unsigned newMemRefRank = cast<MemRefType>(Val: newMemRef.getType()).getRank();
1311 (void)newMemRefRank; // unused in opt mode
1312 unsigned oldMemRefRank = cast<MemRefType>(Val: oldMemRef.getType()).getRank();
1313 (void)oldMemRefRank;
1314 if (indexRemap) {
1315 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1316 "symbol operand count mismatch");
1317 assert(indexRemap.getNumInputs() ==
1318 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1319 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1320 } else {
1321 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1322 }
1323
1324 // Assert same elemental type.
1325 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1326 cast<MemRefType>(newMemRef.getType()).getElementType());
1327
1328 std::unique_ptr<DominanceInfo> domInfo;
1329 std::unique_ptr<PostDominanceInfo> postDomInfo;
1330
1331 // Walk all uses of old memref; collect ops to perform replacement. We use a
1332 // DenseSet since an operation could potentially have multiple uses of a
1333 // memref (although rare), and the replacement later is going to erase ops.
1334 DenseSet<Operation *> opsToReplace;
1335 for (auto *user : oldMemRef.getUsers()) {
1336 // Check if this user doesn't pass the filter.
1337 if (userFilterFn && !userFilterFn(user))
1338 continue;
1339
1340 // Skip dealloc's - no replacement is necessary, and a memref replacement
1341 // at other uses doesn't hurt these dealloc's.
1342 if (hasSingleEffect<MemoryEffects::Free>(op: user, value: oldMemRef) &&
1343 !replaceInDeallocOp)
1344 continue;
1345
1346 // Check if the memref was used in a non-dereferencing context. It is fine
1347 // for the memref to be used in a non-dereferencing way outside of the
1348 // region where this replacement is happening.
1349 if (!isa<AffineMapAccessInterface>(Val: *user)) {
1350 if (!allowNonDereferencingOps) {
1351 LLVM_DEBUG(
1352 llvm::dbgs()
1353 << "Memref replacement failed: non-deferencing memref user: \n"
1354 << *user << '\n');
1355 return failure();
1356 }
1357 // Non-dereferencing ops with the MemRefsNormalizable trait are
1358 // supported for replacement.
1359 if (!user->hasTrait<OpTrait::MemRefsNormalizable>()) {
1360 LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1361 "memrefs normalizable trait: \n"
1362 << *user << '\n');
1363 return failure();
1364 }
1365 }
1366
1367 // We'll first collect and then replace --- since replacement erases the
1368 // user that has the use, and that user could be postDomFilter or domFilter
1369 // itself!
1370 opsToReplace.insert(V: user);
1371 }
1372
1373 for (auto *user : opsToReplace) {
1374 if (failed(Result: replaceAllMemRefUsesWith(
1375 oldMemRef, newMemRef, op: user, extraIndices, indexRemap, extraOperands,
1376 symbolOperands, allowNonDereferencingOps)))
1377 llvm_unreachable("memref replacement guaranteed to succeed here");
1378 }
1379
1380 return success();
1381}
1382
1383/// Given an operation, inserts one or more single result affine
1384/// apply operations, results of which are exclusively used by this operation
1385/// operation. The operands of these newly created affine apply ops are
1386/// guaranteed to be loop iterators or terminal symbols of a function.
1387///
1388/// Before
1389///
1390/// affine.for %i = 0 to #map(%N)
1391/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1392/// "send"(%idx, %A, ...)
1393/// "compute"(%idx)
1394///
1395/// After
1396///
1397/// affine.for %i = 0 to #map(%N)
1398/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1399/// "send"(%idx, %A, ...)
1400/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1401/// "compute"(%idx_)
1402///
1403/// This allows applying different transformations on send and compute (for eg.
1404/// different shifts/delays).
1405///
1406/// Returns nullptr either if none of opInst's operands were the result of an
1407/// affine.apply and thus there was no affine computation slice to create, or if
1408/// all the affine.apply op's supplying operands to this opInst did not have any
1409/// uses besides this opInst; otherwise returns the list of affine.apply
1410/// operations created in output argument `sliceOps`.
1411void mlir::affine::createAffineComputationSlice(
1412 Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1413 // Collect all operands that are results of affine apply ops.
1414 SmallVector<Value, 4> subOperands;
1415 subOperands.reserve(N: opInst->getNumOperands());
1416 for (auto operand : opInst->getOperands())
1417 if (isa_and_nonnull<AffineApplyOp>(Val: operand.getDefiningOp()))
1418 subOperands.push_back(Elt: operand);
1419
1420 // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1421 SmallVector<Operation *, 4> affineApplyOps;
1422 getReachableAffineApplyOps(operands: subOperands, affineApplyOps);
1423 // Skip transforming if there are no affine maps to compose.
1424 if (affineApplyOps.empty())
1425 return;
1426
1427 // Check if all uses of the affine apply op's lie only in this op op, in
1428 // which case there would be nothing to do.
1429 bool localized = true;
1430 for (auto *op : affineApplyOps) {
1431 for (auto result : op->getResults()) {
1432 for (auto *user : result.getUsers()) {
1433 if (user != opInst) {
1434 localized = false;
1435 break;
1436 }
1437 }
1438 }
1439 }
1440 if (localized)
1441 return;
1442
1443 OpBuilder builder(opInst);
1444 SmallVector<Value, 4> composedOpOperands(subOperands);
1445 auto composedMap = builder.getMultiDimIdentityMap(rank: composedOpOperands.size());
1446 fullyComposeAffineMapAndOperands(map: &composedMap, operands: &composedOpOperands);
1447
1448 // Create an affine.apply for each of the map results.
1449 sliceOps->reserve(N: composedMap.getNumResults());
1450 for (auto resultExpr : composedMap.getResults()) {
1451 auto singleResMap = AffineMap::get(dimCount: composedMap.getNumDims(),
1452 symbolCount: composedMap.getNumSymbols(), result: resultExpr);
1453 sliceOps->push_back(Elt: builder.create<AffineApplyOp>(
1454 location: opInst->getLoc(), args&: singleResMap, args&: composedOpOperands));
1455 }
1456
1457 // Construct the new operands that include the results from the composed
1458 // affine apply op above instead of existing ones (subOperands). So, they
1459 // differ from opInst's operands only for those operands in 'subOperands', for
1460 // which they will be replaced by the corresponding one from 'sliceOps'.
1461 SmallVector<Value, 4> newOperands(opInst->getOperands());
1462 for (Value &operand : newOperands) {
1463 // Replace the subOperands from among the new operands.
1464 unsigned j, f;
1465 for (j = 0, f = subOperands.size(); j < f; j++) {
1466 if (operand == subOperands[j])
1467 break;
1468 }
1469 if (j < subOperands.size())
1470 operand = (*sliceOps)[j];
1471 }
1472 for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1473 opInst->setOperand(idx, value: newOperands[idx]);
1474}
1475
1476/// Enum to set patterns of affine expr in tiled-layout map.
1477/// TileFloorDiv: <dim expr> div <tile size>
1478/// TileMod: <dim expr> mod <tile size>
1479/// TileNone: None of the above
1480/// Example:
1481/// #tiled_2d_128x256 = affine_map<(d0, d1)
1482/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1483/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1484/// "d0 mod 128" and "d1 mod 256" ==> TileMod
1485enum TileExprPattern { TileFloorDiv, TileMod, TileNone };
1486
1487/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1488/// being floordiv'ed by respective tile sizes appeare in a mod with the same
1489/// tile sizes, and no other expression involves those k dimensions. This
1490/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1491/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1492/// tiled layout, an empty vector is returned.
1493static LogicalResult getTileSizePos(
1494 AffineMap map,
1495 SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1496 // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1497 // `floordiv` and its position in `map` output.
1498 // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1499 // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1500 // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1501 SmallVector<std::tuple<AffineExpr, AffineExpr, unsigned>, 4> floordivExprs;
1502 unsigned pos = 0;
1503 for (AffineExpr expr : map.getResults()) {
1504 if (expr.getKind() == AffineExprKind::FloorDiv) {
1505 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(Val&: expr);
1506 if (isa<AffineConstantExpr>(Val: binaryExpr.getRHS()))
1507 floordivExprs.emplace_back(
1508 Args: std::make_tuple(args: binaryExpr.getLHS(), args: binaryExpr.getRHS(), args&: pos));
1509 }
1510 pos++;
1511 }
1512 // Not tiled layout if `floordivExprs` is empty.
1513 if (floordivExprs.empty()) {
1514 tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
1515 return success();
1516 }
1517
1518 // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1519 // not tiled layout.
1520 for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1521 AffineExpr floordivExprLHS = std::get<0>(t&: fexpr);
1522 AffineExpr floordivExprRHS = std::get<1>(t&: fexpr);
1523 unsigned floordivPos = std::get<2>(t&: fexpr);
1524
1525 // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1526 // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1527 // other expr, the map is not tiled layout. Example of non tiled layout:
1528 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1529 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1530 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1531 // 256)>
1532 bool found = false;
1533 pos = 0;
1534 for (AffineExpr expr : map.getResults()) {
1535 bool notTiled = false;
1536 if (pos != floordivPos) {
1537 expr.walk(callback: [&](AffineExpr e) {
1538 if (e == floordivExprLHS) {
1539 if (expr.getKind() == AffineExprKind::Mod) {
1540 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(Val&: expr);
1541 // If LHS and RHS of `mod` are the same with those of floordiv.
1542 if (floordivExprLHS == binaryExpr.getLHS() &&
1543 floordivExprRHS == binaryExpr.getRHS()) {
1544 // Save tile size (RHS of `mod`), and position of `floordiv` and
1545 // `mod` if same expr with `mod` is not found yet.
1546 if (!found) {
1547 tileSizePos.emplace_back(
1548 Args: std::make_tuple(args: binaryExpr.getRHS(), args&: floordivPos, args&: pos));
1549 found = true;
1550 } else {
1551 // Non tiled layout: Have multilpe `mod` with the same LHS.
1552 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1553 // mod 256, d2 mod 256)>
1554 notTiled = true;
1555 }
1556 } else {
1557 // Non tiled layout: RHS of `mod` is different from `floordiv`.
1558 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1559 // mod 128)>
1560 notTiled = true;
1561 }
1562 } else {
1563 // Non tiled layout: LHS is the same, but not `mod`.
1564 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1565 // floordiv 256)>
1566 notTiled = true;
1567 }
1568 }
1569 });
1570 }
1571 if (notTiled) {
1572 tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
1573 return success();
1574 }
1575 pos++;
1576 }
1577 }
1578 return success();
1579}
1580
1581/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1582/// after normalization. Dimensions that include dynamic dimensions in the map
1583/// output will become dynamic dimensions. Return true if `dim` is dynamic
1584/// dimension.
1585///
1586/// Example:
1587/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1588///
1589/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1590/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1591static bool
1592isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1593 SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1594 AffineExpr expr = layoutMap.getResults()[dim];
1595 // Check if affine expr of the dimension includes dynamic dimension of input
1596 // memrefType.
1597 MLIRContext *context = layoutMap.getContext();
1598 return expr
1599 .walk(callback: [&](AffineExpr e) {
1600 if (isa<AffineDimExpr>(Val: e) &&
1601 llvm::any_of(Range&: inMemrefTypeDynDims, P: [&](unsigned dim) {
1602 return e == getAffineDimExpr(position: dim, context);
1603 }))
1604 return WalkResult::interrupt();
1605 return WalkResult::advance();
1606 })
1607 .wasInterrupted();
1608}
1609
1610/// Create affine expr to calculate dimension size for a tiled-layout map.
1611static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput,
1612 TileExprPattern pat) {
1613 // Create map output for the patterns.
1614 // "floordiv <tile size>" ==> "ceildiv <tile size>"
1615 // "mod <tile size>" ==> "<tile size>"
1616 AffineExpr newMapOutput;
1617 AffineBinaryOpExpr binaryExpr = nullptr;
1618 switch (pat) {
1619 case TileExprPattern::TileMod:
1620 binaryExpr = cast<AffineBinaryOpExpr>(Val&: oldMapOutput);
1621 newMapOutput = binaryExpr.getRHS();
1622 break;
1623 case TileExprPattern::TileFloorDiv:
1624 binaryExpr = cast<AffineBinaryOpExpr>(Val&: oldMapOutput);
1625 newMapOutput = getAffineBinaryOpExpr(
1626 kind: AffineExprKind::CeilDiv, lhs: binaryExpr.getLHS(), rhs: binaryExpr.getRHS());
1627 break;
1628 default:
1629 newMapOutput = oldMapOutput;
1630 }
1631 return newMapOutput;
1632}
1633
1634/// Create new maps to calculate each dimension size of `newMemRefType`, and
1635/// create `newDynamicSizes` from them by using AffineApplyOp.
1636///
1637/// Steps for normalizing dynamic memrefs for a tiled layout map
1638/// Example:
1639/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1640/// %0 = dim %arg0, %c1 :memref<4x?xf32>
1641/// %1 = alloc(%0) : memref<4x?xf32, #map0>
1642///
1643/// (Before this function)
1644/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1645/// single layout map is supported.
1646///
1647/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1648/// is memref<4x?x?xf32> in the above example.
1649///
1650/// (In this function)
1651/// 3. Create new maps to calculate each dimension of the normalized memrefType
1652/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1653/// dimension size can be calculated by replacing "floordiv <tile size>" with
1654/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1655/// - New map in the above example
1656/// #map0 = affine_map<(d0, d1) -> (d0)>
1657/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1658/// #map2 = affine_map<(d0, d1) -> (32)>
1659///
1660/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1661/// is used in dynamicSizes of new AllocOp.
1662/// %0 = dim %arg0, %c1 : memref<4x?xf32>
1663/// %c4 = arith.constant 4 : index
1664/// %1 = affine.apply #map1(%c4, %0)
1665/// %2 = affine.apply #map2(%c4, %0)
1666template <typename AllocLikeOp>
1667static void createNewDynamicSizes(MemRefType oldMemRefType,
1668 MemRefType newMemRefType, AffineMap map,
1669 AllocLikeOp allocOp, OpBuilder b,
1670 SmallVectorImpl<Value> &newDynamicSizes) {
1671 // Create new input for AffineApplyOp.
1672 SmallVector<Value, 4> inAffineApply;
1673 ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1674 unsigned dynIdx = 0;
1675 for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1676 if (oldMemRefShape[d] < 0) {
1677 // Use dynamicSizes of allocOp for dynamic dimension.
1678 inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1679 dynIdx++;
1680 } else {
1681 // Create ConstantOp for static dimension.
1682 auto constantAttr = b.getIntegerAttr(type: b.getIndexType(), value: oldMemRefShape[d]);
1683 inAffineApply.emplace_back(
1684 b.create<arith::ConstantOp>(allocOp.getLoc(), constantAttr));
1685 }
1686 }
1687
1688 // Create new map to calculate each dimension size of new memref for each
1689 // original map output. Only for dynamic dimesion of `newMemRefType`.
1690 unsigned newDimIdx = 0;
1691 ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1692 SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
1693 (void)getTileSizePos(map, tileSizePos);
1694 for (AffineExpr expr : map.getResults()) {
1695 if (newMemRefShape[newDimIdx] < 0) {
1696 // Create new maps to calculate each dimension size of new memref.
1697 enum TileExprPattern pat = TileExprPattern::TileNone;
1698 for (auto pos : tileSizePos) {
1699 if (newDimIdx == std::get<1>(t&: pos))
1700 pat = TileExprPattern::TileFloorDiv;
1701 else if (newDimIdx == std::get<2>(t&: pos))
1702 pat = TileExprPattern::TileMod;
1703 }
1704 AffineExpr newMapOutput = createDimSizeExprForTiledLayout(oldMapOutput: expr, pat);
1705 AffineMap newMap =
1706 AffineMap::get(dimCount: map.getNumInputs(), symbolCount: map.getNumSymbols(), result: newMapOutput);
1707 Value affineApp =
1708 b.create<AffineApplyOp>(allocOp.getLoc(), newMap, inAffineApply);
1709 newDynamicSizes.emplace_back(Args&: affineApp);
1710 }
1711 newDimIdx++;
1712 }
1713}
1714
1715template <typename AllocLikeOp>
1716LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1717 MemRefType memrefType = allocOp.getType();
1718 OpBuilder b(allocOp);
1719
1720 // Fetch a new memref type after normalizing the old memref to have an
1721 // identity map layout.
1722 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1723 if (newMemRefType == memrefType)
1724 // Either memrefType already had an identity map or the map couldn't be
1725 // transformed to an identity map.
1726 return failure();
1727
1728 Value oldMemRef = allocOp.getResult();
1729
1730 SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1731 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1732 AllocLikeOp newAlloc;
1733 // Check if `layoutMap` is a tiled layout. Only single layout map is
1734 // supported for normalizing dynamic memrefs.
1735 SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
1736 (void)getTileSizePos(map: layoutMap, tileSizePos);
1737 if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1738 auto oldMemRefType = cast<MemRefType>(Val: oldMemRef.getType());
1739 SmallVector<Value, 4> newDynamicSizes;
1740 createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1741 newDynamicSizes);
1742 // Add the new dynamic sizes in new AllocOp.
1743 newAlloc =
1744 b.create<AllocLikeOp>(allocOp.getLoc(), newMemRefType, newDynamicSizes,
1745 allocOp.getAlignmentAttr());
1746 } else {
1747 newAlloc = b.create<AllocLikeOp>(allocOp.getLoc(), newMemRefType,
1748 allocOp.getAlignmentAttr());
1749 }
1750 // Replace all uses of the old memref.
1751 if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1752 /*extraIndices=*/{},
1753 /*indexRemap=*/layoutMap,
1754 /*extraOperands=*/{},
1755 /*symbolOperands=*/symbolOperands,
1756 /*userFilterFn=*/nullptr,
1757 /*allowNonDereferencingOps=*/true))) {
1758 // If it failed (due to escapes for example), bail out.
1759 newAlloc.erase();
1760 return failure();
1761 }
1762 // Replace any uses of the original alloc op and erase it. All remaining uses
1763 // have to be dealloc's; RAMUW above would've failed otherwise.
1764 assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1765 return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1766 }));
1767 oldMemRef.replaceAllUsesWith(newValue: newAlloc);
1768 allocOp.erase();
1769 return success();
1770}
1771
1772LogicalResult
1773mlir::affine::normalizeMemRef(memref::ReinterpretCastOp reinterpretCastOp) {
1774 MemRefType memrefType = reinterpretCastOp.getType();
1775 AffineMap oldLayoutMap = memrefType.getLayout().getAffineMap();
1776 Value oldMemRef = reinterpretCastOp.getResult();
1777
1778 // If `oldLayoutMap` is identity, `memrefType` is already normalized.
1779 if (oldLayoutMap.isIdentity())
1780 return success();
1781
1782 // Fetch a new memref type after normalizing the old memref to have an
1783 // identity map layout.
1784 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1785 if (newMemRefType == memrefType)
1786 // `oldLayoutMap` couldn't be transformed to an identity map.
1787 return failure();
1788
1789 uint64_t newRank = newMemRefType.getRank();
1790 SmallVector<Value> mapOperands(oldLayoutMap.getNumDims() +
1791 oldLayoutMap.getNumSymbols());
1792 SmallVector<Value> oldStrides = reinterpretCastOp.getStrides();
1793 Location loc = reinterpretCastOp.getLoc();
1794 // As `newMemRefType` is normalized, it is unit strided.
1795 SmallVector<int64_t> newStaticStrides(newRank, 1);
1796 SmallVector<int64_t> newStaticOffsets(newRank, 0);
1797 ArrayRef<int64_t> oldShape = memrefType.getShape();
1798 ValueRange oldSizes = reinterpretCastOp.getSizes();
1799 unsigned idx = 0;
1800 OpBuilder b(reinterpretCastOp);
1801 // Collect the map operands which will be used to compute the new normalized
1802 // memref shape.
1803 for (unsigned i = 0, e = memrefType.getRank(); i < e; i++) {
1804 if (memrefType.isDynamicDim(idx: i))
1805 mapOperands[i] =
1806 b.create<arith::SubIOp>(location: loc, args: oldSizes[0].getType(), args: oldSizes[idx++],
1807 args: b.create<arith::ConstantIndexOp>(location: loc, args: 1));
1808 else
1809 mapOperands[i] = b.create<arith::ConstantIndexOp>(location: loc, args: oldShape[i] - 1);
1810 }
1811 for (unsigned i = 0, e = oldStrides.size(); i < e; i++)
1812 mapOperands[memrefType.getRank() + i] = oldStrides[i];
1813 SmallVector<Value> newSizes;
1814 ArrayRef<int64_t> newShape = newMemRefType.getShape();
1815 // Compute size along all the dimensions of the new normalized memref.
1816 for (unsigned i = 0; i < newRank; i++) {
1817 if (!newMemRefType.isDynamicDim(idx: i))
1818 continue;
1819 newSizes.push_back(Elt: b.create<AffineApplyOp>(
1820 location: loc,
1821 args: AffineMap::get(dimCount: oldLayoutMap.getNumDims(), symbolCount: oldLayoutMap.getNumSymbols(),
1822 result: oldLayoutMap.getResult(idx: i)),
1823 args&: mapOperands));
1824 }
1825 for (unsigned i = 0, e = newSizes.size(); i < e; i++) {
1826 newSizes[i] =
1827 b.create<arith::AddIOp>(location: loc, args: newSizes[i].getType(), args&: newSizes[i],
1828 args: b.create<arith::ConstantIndexOp>(location: loc, args: 1));
1829 }
1830 // Create the new reinterpret_cast op.
1831 auto newReinterpretCast = b.create<memref::ReinterpretCastOp>(
1832 location: loc, args&: newMemRefType, args: reinterpretCastOp.getSource(),
1833 /*offsets=*/args: ValueRange(), args&: newSizes,
1834 /*strides=*/args: ValueRange(),
1835 /*static_offsets=*/args&: newStaticOffsets,
1836 /*static_sizes=*/args&: newShape,
1837 /*static_strides=*/args&: newStaticStrides);
1838
1839 // Replace all uses of the old memref.
1840 if (failed(Result: replaceAllMemRefUsesWith(oldMemRef,
1841 /*newMemRef=*/newReinterpretCast,
1842 /*extraIndices=*/{},
1843 /*indexRemap=*/oldLayoutMap,
1844 /*extraOperands=*/{},
1845 /*symbolOperands=*/oldStrides,
1846 /*userFilterFn=*/nullptr,
1847 /*allowNonDereferencingOps=*/true))) {
1848 // If it failed (due to escapes for example), bail out.
1849 newReinterpretCast.erase();
1850 return failure();
1851 }
1852
1853 oldMemRef.replaceAllUsesWith(newValue: newReinterpretCast);
1854 reinterpretCastOp.erase();
1855 return success();
1856}
1857
1858template LogicalResult
1859mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1860template LogicalResult
1861mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1862
1863MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1864 unsigned rank = memrefType.getRank();
1865 if (rank == 0)
1866 return memrefType;
1867
1868 if (memrefType.getLayout().isIdentity()) {
1869 // Either no maps is associated with this memref or this memref has
1870 // a trivial (identity) map.
1871 return memrefType;
1872 }
1873 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1874 unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1875
1876 // We don't do any checks for one-to-one'ness; we assume that it is
1877 // one-to-one.
1878
1879 // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1880 // for now.
1881 // TODO: Normalize the other types of dynamic memrefs.
1882 SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
1883 (void)getTileSizePos(map: layoutMap, tileSizePos);
1884 if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1885 return memrefType;
1886
1887 // We have a single map that is not an identity map. Create a new memref
1888 // with the right shape and an identity layout map.
1889 ArrayRef<int64_t> shape = memrefType.getShape();
1890 // FlatAffineValueConstraint may later on use symbolicOperands.
1891 FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1892 SmallVector<unsigned, 4> memrefTypeDynDims;
1893 for (unsigned d = 0; d < rank; ++d) {
1894 // Use constraint system only in static dimensions.
1895 if (shape[d] > 0) {
1896 fac.addBound(type: BoundType::LB, pos: d, value: 0);
1897 fac.addBound(type: BoundType::UB, pos: d, value: shape[d] - 1);
1898 } else {
1899 memrefTypeDynDims.emplace_back(Args&: d);
1900 }
1901 }
1902 // We compose this map with the original index (logical) space to derive
1903 // the upper bounds for the new index space.
1904 unsigned newRank = layoutMap.getNumResults();
1905 if (failed(Result: fac.composeMatchingMap(other: layoutMap)))
1906 return memrefType;
1907 // TODO: Handle semi-affine maps.
1908 // Project out the old data dimensions.
1909 fac.projectOut(pos: newRank, num: fac.getNumVars() - newRank - fac.getNumLocalVars());
1910 SmallVector<int64_t, 4> newShape(newRank);
1911 MLIRContext *context = memrefType.getContext();
1912 for (unsigned d = 0; d < newRank; ++d) {
1913 // Check if this dimension is dynamic.
1914 if (isNormalizedMemRefDynamicDim(dim: d, layoutMap, inMemrefTypeDynDims&: memrefTypeDynDims)) {
1915 newShape[d] = ShapedType::kDynamic;
1916 continue;
1917 }
1918 // The lower bound for the shape is always zero.
1919 std::optional<int64_t> ubConst = fac.getConstantBound64(type: BoundType::UB, pos: d);
1920 // For a static memref and an affine map with no symbols, this is
1921 // always bounded. However, when we have symbols, we may not be able to
1922 // obtain a constant upper bound. Also, mapping to a negative space is
1923 // invalid for normalization.
1924 if (!ubConst.has_value() || *ubConst < 0) {
1925 LLVM_DEBUG(llvm::dbgs()
1926 << "can't normalize map due to unknown/invalid upper bound");
1927 return memrefType;
1928 }
1929 // If dimension of new memrefType is dynamic, the value is -1.
1930 newShape[d] = *ubConst + 1;
1931 }
1932
1933 // Create the new memref type after trivializing the old layout map.
1934 auto newMemRefType =
1935 MemRefType::Builder(memrefType)
1936 .setShape(newShape)
1937 .setLayout(AffineMapAttr::get(
1938 value: AffineMap::getMultiDimIdentityMap(numDims: newRank, context)));
1939 return newMemRefType;
1940}
1941
1942DivModValue mlir::affine::getDivMod(OpBuilder &b, Location loc, Value lhs,
1943 Value rhs) {
1944 DivModValue result;
1945 AffineExpr d0, d1;
1946 bindDims(ctx: b.getContext(), exprs&: d0, exprs&: d1);
1947 result.quotient =
1948 affine::makeComposedAffineApply(b, loc, e: d0.floorDiv(other: d1), operands: {lhs, rhs});
1949 result.remainder =
1950 affine::makeComposedAffineApply(b, loc, e: d0 % d1, operands: {lhs, rhs});
1951 return result;
1952}
1953
1954/// Create an affine map that computes `lhs` * `rhs`, composing in any other
1955/// affine maps.
1956static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1957 Location loc,
1958 OpFoldResult lhs,
1959 OpFoldResult rhs) {
1960 AffineExpr s0, s1;
1961 bindSymbols(ctx: b.getContext(), exprs&: s0, exprs&: s1);
1962 return makeComposedFoldedAffineApply(b, loc, expr: s0 * s1, operands: {lhs, rhs});
1963}
1964
1965FailureOr<SmallVector<Value>>
1966mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1967 ArrayRef<Value> basis, bool hasOuterBound) {
1968 if (hasOuterBound)
1969 basis = basis.drop_front();
1970
1971 // Note: the divisors are backwards due to the scan.
1972 SmallVector<Value> divisors;
1973 OpFoldResult basisProd = b.getIndexAttr(value: 1);
1974 for (OpFoldResult basisElem : llvm::reverse(C&: basis)) {
1975 FailureOr<OpFoldResult> nextProd =
1976 composedAffineMultiply(b, loc, lhs: basisElem, rhs: basisProd);
1977 if (failed(Result: nextProd))
1978 return failure();
1979 basisProd = *nextProd;
1980 divisors.push_back(Elt: getValueOrCreateConstantIndexOp(b, loc, ofr: basisProd));
1981 }
1982
1983 SmallVector<Value> results;
1984 results.reserve(N: divisors.size() + 1);
1985 Value residual = linearIndex;
1986 for (Value divisor : llvm::reverse(C&: divisors)) {
1987 DivModValue divMod = getDivMod(b, loc, lhs: residual, rhs: divisor);
1988 results.push_back(Elt: divMod.quotient);
1989 residual = divMod.remainder;
1990 }
1991 results.push_back(Elt: residual);
1992 return results;
1993}
1994
1995FailureOr<SmallVector<Value>>
1996mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1997 ArrayRef<OpFoldResult> basis,
1998 bool hasOuterBound) {
1999 if (hasOuterBound)
2000 basis = basis.drop_front();
2001
2002 // Note: the divisors are backwards due to the scan.
2003 SmallVector<Value> divisors;
2004 OpFoldResult basisProd = b.getIndexAttr(value: 1);
2005 for (OpFoldResult basisElem : llvm::reverse(C&: basis)) {
2006 FailureOr<OpFoldResult> nextProd =
2007 composedAffineMultiply(b, loc, lhs: basisElem, rhs: basisProd);
2008 if (failed(Result: nextProd))
2009 return failure();
2010 basisProd = *nextProd;
2011 divisors.push_back(Elt: getValueOrCreateConstantIndexOp(b, loc, ofr: basisProd));
2012 }
2013
2014 SmallVector<Value> results;
2015 results.reserve(N: divisors.size() + 1);
2016 Value residual = linearIndex;
2017 for (Value divisor : llvm::reverse(C&: divisors)) {
2018 DivModValue divMod = getDivMod(b, loc, lhs: residual, rhs: divisor);
2019 results.push_back(Elt: divMod.quotient);
2020 residual = divMod.remainder;
2021 }
2022 results.push_back(Elt: residual);
2023 return results;
2024}
2025
2026OpFoldResult mlir::affine::linearizeIndex(ArrayRef<OpFoldResult> multiIndex,
2027 ArrayRef<OpFoldResult> basis,
2028 ImplicitLocOpBuilder &builder) {
2029 return linearizeIndex(builder, loc: builder.getLoc(), multiIndex, basis);
2030}
2031
2032OpFoldResult mlir::affine::linearizeIndex(OpBuilder &builder, Location loc,
2033 ArrayRef<OpFoldResult> multiIndex,
2034 ArrayRef<OpFoldResult> basis) {
2035 assert(multiIndex.size() == basis.size() ||
2036 multiIndex.size() == basis.size() + 1);
2037 SmallVector<AffineExpr> basisAffine;
2038
2039 // Add a fake initial size in order to make the later index linearization
2040 // computations line up if an outer bound is not provided.
2041 if (multiIndex.size() == basis.size() + 1)
2042 basisAffine.push_back(Elt: getAffineConstantExpr(constant: 1, context: builder.getContext()));
2043
2044 for (size_t i = 0; i < basis.size(); ++i) {
2045 basisAffine.push_back(Elt: getAffineSymbolExpr(position: i, context: builder.getContext()));
2046 }
2047
2048 SmallVector<AffineExpr> stridesAffine = computeStrides(sizes: basisAffine);
2049 SmallVector<OpFoldResult> strides;
2050 strides.reserve(N: stridesAffine.size());
2051 llvm::transform(Range&: stridesAffine, d_first: std::back_inserter(x&: strides),
2052 F: [&builder, &basis, loc](AffineExpr strideExpr) {
2053 return affine::makeComposedFoldedAffineApply(
2054 b&: builder, loc, expr: strideExpr, operands: basis);
2055 });
2056
2057 auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2058 sourceOffset: OpFoldResult(builder.getIndexAttr(value: 0)), strides, indices: multiIndex);
2059 return affine::makeComposedFoldedAffineApply(b&: builder, loc, expr: linearIndexExpr,
2060 operands: multiIndexAndStrides);
2061}
2062

source code of mlir/lib/Dialect/Affine/Utils/Utils.cpp