1//===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous analysis routines for affine structures
10// (expressions, maps, sets), and other utilities relying on such analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
15#include "mlir/Analysis/Presburger/IntegerRelation.h"
16#include "mlir/Analysis/Presburger/PresburgerSpace.h"
17#include "mlir/Analysis/SliceAnalysis.h"
18#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
19#include "mlir/Dialect/Affine/Analysis/Utils.h"
20#include "mlir/Dialect/Affine/IR/AffineOps.h"
21#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
22#include "mlir/IR/AffineExprVisitor.h"
23#include "mlir/IR/BuiltinOps.h"
24#include "mlir/IR/IntegerSet.h"
25#include "mlir/Interfaces/SideEffectInterfaces.h"
26#include "mlir/Interfaces/ViewLikeInterface.h"
27#include "llvm/ADT/TypeSwitch.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30#include <optional>
31
32#define DEBUG_TYPE "affine-analysis"
33
34using namespace mlir;
35using namespace affine;
36using namespace presburger;
37
38/// Get the value that is being reduced by `pos`-th reduction in the loop if
39/// such a reduction can be performed by affine parallel loops. This assumes
40/// floating-point operations are commutative. On success, `kind` will be the
41/// reduction kind suitable for use in affine parallel loop builder. If the
42/// reduction is not supported, returns null.
43static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
44 arith::AtomicRMWKind &kind) {
45 SmallVector<Operation *> combinerOps;
46 Value reducedVal =
47 matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
48 if (!reducedVal)
49 return nullptr;
50
51 // Expected only one combiner operation.
52 if (combinerOps.size() > 1)
53 return nullptr;
54
55 Operation *combinerOp = combinerOps.back();
56 std::optional<arith::AtomicRMWKind> maybeKind =
57 TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp)
58 .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
59 .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
60 .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
61 .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
62 .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
63 .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
64 .Case(
65 [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
66 .Case(
67 [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
68 .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
69 .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
70 .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
71 .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
72 .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
73 // TODO: AtomicRMW supports other kinds of reductions this is
74 // currently not detecting, add those when the need arises.
75 return std::nullopt;
76 });
77 if (!maybeKind)
78 return nullptr;
79
80 kind = *maybeKind;
81 return reducedVal;
82}
83
84/// Populate `supportedReductions` with descriptors of the supported reductions.
85void mlir::affine::getSupportedReductions(
86 AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
87 unsigned numIterArgs = forOp.getNumIterOperands();
88 if (numIterArgs == 0)
89 return;
90 supportedReductions.reserve(N: numIterArgs);
91 for (unsigned i = 0; i < numIterArgs; ++i) {
92 arith::AtomicRMWKind kind;
93 if (Value value = getSupportedReduction(forOp, i, kind))
94 supportedReductions.emplace_back(Args: LoopReduction{kind, i, value});
95 }
96}
97
98/// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
99/// provided, populates it with descriptors of the parallelizable reductions and
100/// treats them as not preventing parallelization.
101bool mlir::affine::isLoopParallel(
102 AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
103 unsigned numIterArgs = forOp.getNumIterOperands();
104
105 // Loop is not parallel if it has SSA loop-carried dependences and reduction
106 // detection is not requested.
107 if (numIterArgs > 0 && !parallelReductions)
108 return false;
109
110 // Find supported reductions of requested.
111 if (parallelReductions) {
112 getSupportedReductions(forOp, *parallelReductions);
113 // Return later to allow for identifying all parallel reductions even if the
114 // loop is not parallel.
115 if (parallelReductions->size() != numIterArgs)
116 return false;
117 }
118
119 // Check memory dependences.
120 return isLoopMemoryParallel(forOp);
121}
122
123/// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
124/// allocated by an operation nested within `enclosingOp`.
125static bool isLocallyDefined(Value v, Operation *enclosingOp) {
126 Operation *defOp = v.getDefiningOp();
127 if (!defOp)
128 return false;
129
130 if (hasSingleEffect<MemoryEffects::Allocate>(op: defOp, value: v) &&
131 enclosingOp->isProperAncestor(other: defOp))
132 return true;
133
134 // Aliasing ops.
135 auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
136 return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
137}
138
139bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
140 // Any memref-typed iteration arguments are treated as serializing.
141 if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
142 return false;
143
144 // Collect all load and store ops in loop nest rooted at 'forOp'.
145 SmallVector<Operation *, 8> loadAndStoreOps;
146 auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
147 if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
148 // Memrefs that are allocated inside `forOp` need not be considered.
149 if (!isLocallyDefined(readOp.getMemRef(), forOp))
150 loadAndStoreOps.push_back(Elt: op);
151 } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
152 // Filter out stores the same way as above.
153 if (!isLocallyDefined(writeOp.getMemRef(), forOp))
154 loadAndStoreOps.push_back(Elt: op);
155 } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
156 !hasSingleEffect<MemoryEffects::Allocate>(op) &&
157 !isMemoryEffectFree(op)) {
158 // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
159 // as long as they don't escape the loop (which has been checked above).
160 return WalkResult::interrupt();
161 }
162
163 return WalkResult::advance();
164 });
165
166 // Stop early if the loop has unknown ops with side effects.
167 if (walkResult.wasInterrupted())
168 return false;
169
170 // Dep check depth would be number of enclosing loops + 1.
171 unsigned depth = getNestingDepth(forOp) + 1;
172
173 // Check dependences between all pairs of ops in 'loadAndStoreOps'.
174 for (auto *srcOp : loadAndStoreOps) {
175 MemRefAccess srcAccess(srcOp);
176 for (auto *dstOp : loadAndStoreOps) {
177 MemRefAccess dstAccess(dstOp);
178 DependenceResult result =
179 checkMemrefAccessDependence(srcAccess, dstAccess, loopDepth: depth);
180 if (result.value != DependenceResult::NoDependence)
181 return false;
182 }
183 }
184 return true;
185}
186
187/// Returns the sequence of AffineApplyOp Operations operation in
188/// 'affineApplyOps', which are reachable via a search starting from 'operands',
189/// and ending at operands which are not defined by AffineApplyOps.
190// TODO: Add a method to AffineApplyOp which forward substitutes the
191// AffineApplyOp into any user AffineApplyOps.
192void mlir::affine::getReachableAffineApplyOps(
193 ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
194 struct State {
195 // The ssa value for this node in the DFS traversal.
196 Value value;
197 // The operand index of 'value' to explore next during DFS traversal.
198 unsigned operandIndex;
199 };
200 SmallVector<State, 4> worklist;
201 for (auto operand : operands) {
202 worklist.push_back(Elt: {.value: operand, .operandIndex: 0});
203 }
204
205 while (!worklist.empty()) {
206 State &state = worklist.back();
207 auto *opInst = state.value.getDefiningOp();
208 // Note: getDefiningOp will return nullptr if the operand is not an
209 // Operation (i.e. block argument), which is a terminator for the search.
210 if (!isa_and_nonnull<AffineApplyOp>(Val: opInst)) {
211 worklist.pop_back();
212 continue;
213 }
214
215 if (state.operandIndex == 0) {
216 // Pre-Visit: Add 'opInst' to reachable sequence.
217 affineApplyOps.push_back(Elt: opInst);
218 }
219 if (state.operandIndex < opInst->getNumOperands()) {
220 // Visit: Add next 'affineApplyOp' operand to worklist.
221 // Get next operand to visit at 'operandIndex'.
222 auto nextOperand = opInst->getOperand(idx: state.operandIndex);
223 // Increment 'operandIndex' in 'state'.
224 ++state.operandIndex;
225 // Add 'nextOperand' to worklist.
226 worklist.push_back(Elt: {.value: nextOperand, .operandIndex: 0});
227 } else {
228 // Post-visit: done visiting operands AffineApplyOp, pop off stack.
229 worklist.pop_back();
230 }
231 }
232}
233
234// Builds a system of constraints with dimensional variables corresponding to
235// the loop IVs of the forOps appearing in that order. Any symbols founds in
236// the bound operands are added as symbols in the system. Returns failure for
237// the yet unimplemented cases.
238// TODO: Handle non-unit steps through local variables or stride information in
239// FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
240// introducing a method in FlatAffineValueConstraints
241// setExprStride(ArrayRef<int64_t> expr, int64_t stride)
242LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops,
243 FlatAffineValueConstraints *domain) {
244 SmallVector<Value, 4> indices;
245 SmallVector<Operation *, 8> loopOps;
246 size_t numDims = 0;
247 for (Operation *op : ops) {
248 if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(Val: op)) {
249 LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
250 "parallel ops");
251 return failure();
252 }
253 if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
254 loopOps.push_back(Elt: forOp);
255 // An AffineForOp retains only 1 induction variable.
256 numDims += 1;
257 } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
258 loopOps.push_back(Elt: parallelOp);
259 numDims += parallelOp.getNumDims();
260 }
261 }
262 extractInductionVars(affineOps: loopOps, ivs&: indices);
263 // Reset while associating Values in 'indices' to the domain.
264 *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0,
265 /*numLocals=*/0, indices);
266 for (Operation *op : ops) {
267 // Add constraints from forOp's bounds.
268 if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
269 if (failed(domain->addAffineForOpDomain(forOp: forOp)))
270 return failure();
271 } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
272 domain->addAffineIfOpDomain(ifOp: ifOp);
273 } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
274 if (failed(domain->addAffineParallelOpDomain(parallelOp: parallelOp)))
275 return failure();
276 }
277 return success();
278}
279
280/// Computes the iteration domain for 'op' and populates 'indexSet', which
281/// encapsulates the constraints involving loops surrounding 'op' and
282/// potentially involving any Function symbols. The dimensional variables in
283/// 'indexSet' correspond to the loops surrounding 'op' from outermost to
284/// innermost.
285static LogicalResult getOpIndexSet(Operation *op,
286 FlatAffineValueConstraints *indexSet) {
287 SmallVector<Operation *, 4> ops;
288 getEnclosingAffineOps(op&: *op, ops: &ops);
289 return getIndexSet(ops, domain: indexSet);
290}
291
292// Returns the number of outer loop common to 'src/dstDomain'.
293// Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
294static unsigned
295getNumCommonLoops(const FlatAffineValueConstraints &srcDomain,
296 const FlatAffineValueConstraints &dstDomain,
297 SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
298 // Find the number of common loops shared by src and dst accesses.
299 unsigned minNumLoops =
300 std::min(a: srcDomain.getNumDimVars(), b: dstDomain.getNumDimVars());
301 unsigned numCommonLoops = 0;
302 for (unsigned i = 0; i < minNumLoops; ++i) {
303 if ((!isAffineForInductionVar(val: srcDomain.getValue(pos: i)) &&
304 !isAffineParallelInductionVar(val: srcDomain.getValue(pos: i))) ||
305 (!isAffineForInductionVar(val: dstDomain.getValue(pos: i)) &&
306 !isAffineParallelInductionVar(val: dstDomain.getValue(pos: i))) ||
307 srcDomain.getValue(pos: i) != dstDomain.getValue(pos: i))
308 break;
309 if (commonLoops != nullptr)
310 commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(pos: i)));
311 ++numCommonLoops;
312 }
313 if (commonLoops != nullptr)
314 assert(commonLoops->size() == numCommonLoops);
315 return numCommonLoops;
316}
317
318/// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
319/// `opB` should be in the same affine scope. Returns nullptr if such a block
320/// does not exist (when the two ops are in different blocks of an op starting
321/// an `AffineScope`).
322static Block *getCommonBlockInAffineScope(Operation *opA, Operation *opB) {
323 // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
324 // chain extends up to and includnig an op that starts an affine scope.
325 auto getChainOfAncestorBlocks =
326 [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
327 Block *currBlock = op->getBlock();
328 // Loop terminates when the currBlock is nullptr or its parent operation
329 // holds an affine scope.
330 while (currBlock &&
331 !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
332 ancestorBlocks.push_back(Elt: currBlock);
333 currBlock = currBlock->getParentOp()->getBlock();
334 }
335 assert(currBlock &&
336 "parent op starting an affine scope is always expected");
337 ancestorBlocks.push_back(Elt: currBlock);
338 };
339
340 // Find the closest common block.
341 SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
342 getChainOfAncestorBlocks(opA, srcAncestorBlocks);
343 getChainOfAncestorBlocks(opB, dstAncestorBlocks);
344
345 Block *commonBlock = nullptr;
346 for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
347 i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
348 i--, j--)
349 commonBlock = srcAncestorBlocks[i];
350
351 return commonBlock;
352}
353
354/// Returns true if the ancestor operation of 'srcAccess' appears before the
355/// ancestor operation of 'dstAccess' in their common ancestral block. The
356/// operations for `srcAccess` and `dstAccess` are expected to be in the same
357/// affine scope and have a common surrounding block within it.
358static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess,
359 const MemRefAccess &dstAccess) {
360 // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
361 Block *commonBlock =
362 getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst);
363 assert(commonBlock &&
364 "ops expected to have a common surrounding block in affine scope");
365
366 // Check the dominance relationship between the respective ancestors of the
367 // src and dst in the Block of the innermost among the common loops.
368 Operation *srcOp = commonBlock->findAncestorOpInBlock(op&: *srcAccess.opInst);
369 assert(srcOp && "src access op must lie in common block");
370 Operation *dstOp = commonBlock->findAncestorOpInBlock(op&: *dstAccess.opInst);
371 assert(dstOp && "dest access op must lie in common block");
372
373 // Determine whether dstOp comes after srcOp.
374 return srcOp->isBeforeInBlock(other: dstOp);
375}
376
377// Adds ordering constraints to 'dependenceDomain' based on number of loops
378// common to 'src/dstDomain' and requested 'loopDepth'.
379// Note that 'loopDepth' cannot exceed the number of common loops plus one.
380// EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
381// *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
382// *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
383// *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
384static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain,
385 const FlatAffineValueConstraints &dstDomain,
386 unsigned loopDepth,
387 IntegerRelation *dependenceDomain) {
388 unsigned numCols = dependenceDomain->getNumCols();
389 SmallVector<int64_t, 4> eq(numCols);
390 unsigned numSrcDims = srcDomain.getNumDimVars();
391 unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
392 unsigned numCommonLoopConstraints = std::min(a: numCommonLoops, b: loopDepth);
393 for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
394 std::fill(first: eq.begin(), last: eq.end(), value: 0);
395 eq[i] = -1;
396 eq[i + numSrcDims] = 1;
397 if (i == loopDepth - 1) {
398 eq[numCols - 1] = -1;
399 dependenceDomain->addInequality(inEq: eq);
400 } else {
401 dependenceDomain->addEquality(eq);
402 }
403 }
404}
405
406// Computes distance and direction vectors in 'dependences', by adding
407// variables to 'dependenceDomain' which represent the difference of the IVs,
408// eliminating all other variables, and reading off distance vectors from
409// equality constraints (if possible), and direction vectors from inequalities.
410static void computeDirectionVector(
411 const FlatAffineValueConstraints &srcDomain,
412 const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
413 IntegerPolyhedron *dependenceDomain,
414 SmallVector<DependenceComponent, 2> *dependenceComponents) {
415 // Find the number of common loops shared by src and dst accesses.
416 SmallVector<AffineForOp, 4> commonLoops;
417 unsigned numCommonLoops =
418 getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
419 if (numCommonLoops == 0)
420 return;
421 // Compute direction vectors for requested loop depth.
422 unsigned numIdsToEliminate = dependenceDomain->getNumVars();
423 // Add new variables to 'dependenceDomain' to represent the direction
424 // constraints for each shared loop.
425 dependenceDomain->insertVar(kind: VarKind::SetDim, /*pos=*/0,
426 /*num=*/numCommonLoops);
427
428 // Add equality constraints for each common loop, setting newly introduced
429 // variable at column 'j' to the 'dst' IV minus the 'src IV.
430 SmallVector<int64_t, 4> eq;
431 eq.resize(N: dependenceDomain->getNumCols());
432 unsigned numSrcDims = srcDomain.getNumDimVars();
433 // Constraint variables format:
434 // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
435 for (unsigned j = 0; j < numCommonLoops; ++j) {
436 std::fill(first: eq.begin(), last: eq.end(), value: 0);
437 eq[j] = 1;
438 eq[j + numCommonLoops] = 1;
439 eq[j + numCommonLoops + numSrcDims] = -1;
440 dependenceDomain->addEquality(eq);
441 }
442
443 // Eliminate all variables other than the direction variables just added.
444 dependenceDomain->projectOut(pos: numCommonLoops, num: numIdsToEliminate);
445
446 // Scan each common loop variable column and set direction vectors based
447 // on eliminated constraint system.
448 dependenceComponents->resize(N: numCommonLoops);
449 for (unsigned j = 0; j < numCommonLoops; ++j) {
450 (*dependenceComponents)[j].op = commonLoops[j].getOperation();
451 auto lbConst = dependenceDomain->getConstantBound64(type: BoundType::LB, pos: j);
452 (*dependenceComponents)[j].lb =
453 lbConst.value_or(u: std::numeric_limits<int64_t>::min());
454 auto ubConst = dependenceDomain->getConstantBound64(type: BoundType::UB, pos: j);
455 (*dependenceComponents)[j].ub =
456 ubConst.value_or(u: std::numeric_limits<int64_t>::max());
457 }
458}
459
460LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const {
461 // Create set corresponding to domain of access.
462 FlatAffineValueConstraints domain;
463 if (failed(result: getOpIndexSet(op: opInst, indexSet: &domain)))
464 return failure();
465
466 // Get access relation from access map.
467 AffineValueMap accessValueMap;
468 getAccessMap(accessMap: &accessValueMap);
469 if (failed(result: getRelationFromMap(map: accessValueMap, rel)))
470 return failure();
471
472 // Merge and align domain ids of `rel` with ids of `domain`. Since the domain
473 // of the access map is a subset of the domain of access, the domain ids of
474 // `rel` are guranteed to be a subset of ids of `domain`.
475 unsigned inserts = 0;
476 for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
477 const Identifier domainIdi = Identifier(domain.getValue(pos: i));
478 const Identifier *findBegin = rel.getIds(kind: VarKind::SetDim).begin() + i;
479 const Identifier *findEnd = rel.getIds(kind: VarKind::SetDim).end();
480 const Identifier *itr = std::find(first: findBegin, last: findEnd, val: domainIdi);
481 if (itr != findEnd) {
482 rel.swapVar(posA: i, posB: i + std::distance(first: findBegin, last: itr));
483 } else {
484 ++inserts;
485 rel.insertVar(kind: VarKind::SetDim, pos: i);
486 rel.setId(kind: VarKind::SetDim, i, id: domainIdi);
487 }
488 }
489
490 // Append domain constraints to `rel`.
491 IntegerRelation domainRel = domain;
492 if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds())
493 domainRel.resetIds();
494 domainRel.appendVar(kind: VarKind::Range, num: accessValueMap.getNumResults());
495 domainRel.mergeAndAlignSymbols(other&: rel);
496 domainRel.mergeLocalVars(other&: rel);
497 rel.append(other: domainRel);
498
499 rel.convertVarKind(srcKind: VarKind::SetDim, varStart: 0, varLimit: accessValueMap.getNumDims() + inserts,
500 dstKind: VarKind::Domain);
501
502 return success();
503}
504
505// Populates 'accessMap' with composition of AffineApplyOps reachable from
506// indices of MemRefAccess.
507void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
508 // Get affine map from AffineLoad/Store.
509 AffineMap map;
510 if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
511 map = loadOp.getAffineMap();
512 else
513 map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
514
515 SmallVector<Value, 8> operands(indices.begin(), indices.end());
516 fullyComposeAffineMapAndOperands(map: &map, operands: &operands);
517 map = simplifyAffineMap(map);
518 canonicalizeMapAndOperands(map: &map, operands: &operands);
519 accessMap->reset(map, operands);
520}
521
522// Builds a flat affine constraint system to check if there exists a dependence
523// between memref accesses 'srcAccess' and 'dstAccess'.
524// Returns 'NoDependence' if the accesses can be definitively shown not to
525// access the same element.
526// Returns 'HasDependence' if the accesses do access the same element.
527// Returns 'Failure' if an error or unsupported case was encountered.
528// If a dependence exists, returns in 'dependenceComponents' a direction
529// vector for the dependence, with a component for each loop IV in loops
530// common to both accesses (see Dependence in AffineAnalysis.h for details).
531//
532// The memref access dependence check is comprised of the following steps:
533// *) Build access relation for each access. An access relation maps elements
534// of an iteration domain to the element(s) of an array domain accessed by
535// that iteration of the associated statement through some array reference.
536// *) Compute the dependence relation by composing access relation of
537// `srcAccess` with the inverse of access relation of `dstAccess`.
538// Doing this builds a relation between iteration domain of `srcAccess`
539// to the iteration domain of `dstAccess` which access the same memory
540// location.
541// *) Add ordering constraints for `srcAccess` to be accessed before
542// `dstAccess`.
543//
544// This method builds a constraint system with the following column format:
545//
546// [src-dim-variables, dst-dim-variables, symbols, constant]
547//
548// For example, given the following MLIR code with "source" and "destination"
549// accesses to the same memref label, and symbols %M, %N, %K:
550//
551// affine.for %i0 = 0 to 100 {
552// affine.for %i1 = 0 to 50 {
553// %a0 = affine.apply
554// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
555// // Source memref access.
556// store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
557// }
558// }
559//
560// affine.for %i2 = 0 to 100 {
561// affine.for %i3 = 0 to 50 {
562// %a1 = affine.apply
563// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
564// // Destination memref access.
565// %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
566// }
567// }
568//
569// The access relation for `srcAccess` would be the following:
570//
571// [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
572// 2 -4 -1 0 1 0 0 = 0
573// 0 3 0 -1 0 -1 0 = 0
574// 1 0 0 0 0 0 0 >= 0
575// -1 0 0 0 0 0 100 >= 0
576// 0 1 0 0 0 0 0 >= 0
577// 0 -1 0 0 0 0 50 >= 0
578//
579// The access relation for `dstAccess` would be the following:
580//
581// [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
582// 7 9 -1 0 -1 0 0 = 0
583// 0 11 0 -1 0 -1 0 = 0
584// 1 0 0 0 0 0 0 >= 0
585// -1 0 0 0 0 0 100 >= 0
586// 0 1 0 0 0 0 0 >= 0
587// 0 -1 0 0 0 0 50 >= 0
588//
589// The equalities in the above relations correspond to the access maps while
590// the inequalities corresspond to the iteration domain constraints.
591//
592// The dependence relation formed:
593//
594// [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
595// 2 -4 -7 -9 1 1 0 0 = 0
596// 0 3 0 -11 -1 0 1 0 = 0
597// 1 0 0 0 0 0 0 0 >= 0
598// -1 0 0 0 0 0 0 100 >= 0
599// 0 1 0 0 0 0 0 0 >= 0
600// 0 -1 0 0 0 0 0 50 >= 0
601// 0 0 1 0 0 0 0 0 >= 0
602// 0 0 -1 0 0 0 0 100 >= 0
603// 0 0 0 1 0 0 0 0 >= 0
604// 0 0 0 -1 0 0 0 50 >= 0
605//
606//
607// TODO: Support AffineExprs mod/floordiv/ceildiv.
608DependenceResult mlir::affine::checkMemrefAccessDependence(
609 const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
610 unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
611 SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
612 LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
613 << Twine(loopDepth) << " between:\n";);
614 LLVM_DEBUG(srcAccess.opInst->dump());
615 LLVM_DEBUG(dstAccess.opInst->dump());
616
617 // Return 'NoDependence' if these accesses do not access the same memref.
618 if (srcAccess.memref != dstAccess.memref)
619 return DependenceResult::NoDependence;
620
621 // Return 'NoDependence' if one of these accesses is not an
622 // AffineWriteOpInterface.
623 if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
624 !isa<AffineWriteOpInterface>(dstAccess.opInst))
625 return DependenceResult::NoDependence;
626
627 // We can't analyze further if the ops lie in different affine scopes or have
628 // no common block in an affine scope.
629 if (getAffineScope(op: srcAccess.opInst) != getAffineScope(op: dstAccess.opInst))
630 return DependenceResult::Failure;
631 if (!getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst))
632 return DependenceResult::Failure;
633
634 // Create access relation from each MemRefAccess.
635 PresburgerSpace space = PresburgerSpace::getRelationSpace();
636 IntegerRelation srcRel(space), dstRel(space);
637 if (failed(result: srcAccess.getAccessRelation(rel&: srcRel)))
638 return DependenceResult::Failure;
639 if (failed(result: dstAccess.getAccessRelation(rel&: dstRel)))
640 return DependenceResult::Failure;
641
642 FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
643 FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
644
645 // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
646 // operation of 'srcAccess' does not properly dominate the ancestor
647 // operation of 'dstAccess' in the same common operation block.
648 // Note: this check is skipped if 'allowRAR' is true, because RAR deps
649 // can exist irrespective of lexicographic ordering b/w src and dst.
650 unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
651 assert(loopDepth <= numCommonLoops + 1);
652 if (!allowRAR && loopDepth > numCommonLoops &&
653 !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
654 return DependenceResult::NoDependence;
655 }
656
657 // Compute the dependence relation by composing `srcRel` with the inverse of
658 // `dstRel`. Doing this builds a relation between iteration domain of
659 // `srcAccess` to the iteration domain of `dstAccess` which access the same
660 // memory locations.
661 dstRel.inverse();
662 dstRel.mergeAndCompose(other: srcRel);
663 dstRel.convertVarKind(srcKind: VarKind::Domain, varStart: 0, varLimit: dstRel.getNumDomainVars(),
664 dstKind: VarKind::Range, pos: 0);
665 IntegerPolyhedron dependenceDomain(dstRel);
666
667 // Add 'src' happens before 'dst' ordering constraints.
668 addOrderingConstraints(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain);
669
670 // Return 'NoDependence' if the solution space is empty: no dependence.
671 if (dependenceDomain.isEmpty())
672 return DependenceResult::NoDependence;
673
674 // Compute dependence direction vector and return true.
675 if (dependenceComponents != nullptr)
676 computeDirectionVector(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain,
677 dependenceComponents);
678
679 LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
680 LLVM_DEBUG(dependenceDomain.dump());
681
682 FlatAffineValueConstraints result(dependenceDomain);
683 if (dependenceConstraints)
684 *dependenceConstraints = result;
685 return DependenceResult::HasDependence;
686}
687
688/// Gathers dependence components for dependences between all ops in loop nest
689/// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
690void mlir::affine::getDependenceComponents(
691 AffineForOp forOp, unsigned maxLoopDepth,
692 std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
693 // Collect all load and store ops in loop nest rooted at 'forOp'.
694 SmallVector<Operation *, 8> loadAndStoreOps;
695 forOp->walk([&](Operation *op) {
696 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
697 loadAndStoreOps.push_back(Elt: op);
698 });
699
700 unsigned numOps = loadAndStoreOps.size();
701 for (unsigned d = 1; d <= maxLoopDepth; ++d) {
702 for (unsigned i = 0; i < numOps; ++i) {
703 auto *srcOp = loadAndStoreOps[i];
704 MemRefAccess srcAccess(srcOp);
705 for (unsigned j = 0; j < numOps; ++j) {
706 auto *dstOp = loadAndStoreOps[j];
707 MemRefAccess dstAccess(dstOp);
708
709 SmallVector<DependenceComponent, 2> depComps;
710 // TODO: Explore whether it would be profitable to pre-compute and store
711 // deps instead of repeatedly checking.
712 DependenceResult result = checkMemrefAccessDependence(
713 srcAccess, dstAccess, loopDepth: d, /*dependenceConstraints=*/nullptr,
714 dependenceComponents: &depComps);
715 if (hasDependence(result))
716 depCompsVec->push_back(x: depComps);
717 }
718 }
719 }
720}
721

source code of mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp