AffineAnalysis.cpp source code [mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp]

1	//===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements miscellaneous analysis routines for affine structures
10	// (expressions, maps, sets), and other utilities relying on such analysis.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
15	#include "mlir/Analysis/Presburger/IntegerRelation.h"
16	#include "mlir/Analysis/Presburger/PresburgerSpace.h"
17	#include "mlir/Analysis/SliceAnalysis.h"
18	#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
19	#include "mlir/Dialect/Affine/Analysis/Utils.h"
20	#include "mlir/Dialect/Affine/IR/AffineOps.h"
21	#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
22	#include "mlir/IR/AffineExprVisitor.h"
23	#include "mlir/IR/BuiltinOps.h"
24	#include "mlir/IR/IntegerSet.h"
25	#include "mlir/Interfaces/SideEffectInterfaces.h"
26	#include "mlir/Interfaces/ViewLikeInterface.h"
27	#include "llvm/ADT/TypeSwitch.h"
28	#include "llvm/Support/Debug.h"
29	#include "llvm/Support/raw_ostream.h"
30	#include <optional>
31
32	#define DEBUG_TYPE "affine-analysis"
33
34	using namespace mlir;
35	using namespace affine;
36	using namespace presburger;
37
38	/// Get the value that is being reduced by `pos`-th reduction in the loop if
39	/// such a reduction can be performed by affine parallel loops. This assumes
40	/// floating-point operations are commutative. On success, `kind` will be the
41	/// reduction kind suitable for use in affine parallel loop builder. If the
42	/// reduction is not supported, returns null.
43	static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
44	arith::AtomicRMWKind &kind) {
45	SmallVector<Operation *> combinerOps;
46	Value reducedVal =
47	matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
48	if (!reducedVal)
49	return nullptr;
50
51	// Expected only one combiner operation.
52	if (combinerOps.size() > `1`)
53	return nullptr;
54
55	Operation *combinerOp = combinerOps.back();
56	std::optional<arith::AtomicRMWKind> maybeKind =
57	TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp)
58	.Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
59	.Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
60	.Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
61	.Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
62	.Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
63	.Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
64	.Case(
65	[](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
66	.Case(
67	[](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
68	.Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
69	.Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
70	.Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
71	.Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
72	.Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
73	// TODO: AtomicRMW supports other kinds of reductions this is
74	// currently not detecting, add those when the need arises.
75	return std::nullopt;
76	});
77	if (!maybeKind)
78	return nullptr;
79
80	kind = *maybeKind;
81	return reducedVal;
82	}
83
84	/// Populate `supportedReductions` with descriptors of the supported reductions.
85	void mlir::affine::getSupportedReductions(
86	AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
87	unsigned numIterArgs = forOp.getNumIterOperands();
88	if (numIterArgs == `0`)
89	return;
90	supportedReductions.reserve(N: numIterArgs);
91	for (unsigned i = `0`; i < numIterArgs; ++i) {
92	arith::AtomicRMWKind kind;
93	if (Value value = getSupportedReduction(forOp, i, kind))
94	supportedReductions.emplace_back(Args: LoopReduction{kind, i, value});
95	}
96	}
97
98	/// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
99	/// provided, populates it with descriptors of the parallelizable reductions and
100	/// treats them as not preventing parallelization.
101	bool mlir::affine::isLoopParallel(
102	AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
103	unsigned numIterArgs = forOp.getNumIterOperands();
104
105	// Loop is not parallel if it has SSA loop-carried dependences and reduction
106	// detection is not requested.
107	if (numIterArgs > `0` && !parallelReductions)
108	return false;
109
110	// Find supported reductions of requested.
111	if (parallelReductions) {
112	getSupportedReductions(forOp, *parallelReductions);
113	// Return later to allow for identifying all parallel reductions even if the
114	// loop is not parallel.
115	if (parallelReductions->size() != numIterArgs)
116	return false;
117	}
118
119	// Check memory dependences.
120	return isLoopMemoryParallel(forOp);
121	}
122
123	/// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
124	/// allocated by an operation nested within `enclosingOp`.
125	static bool isLocallyDefined(Value v, Operation *enclosingOp) {
126	Operation *defOp = v.getDefiningOp();
127	if (!defOp)
128	return false;
129
130	if (hasSingleEffect<MemoryEffects::Allocate>(op: defOp, value: v) &&
131	enclosingOp->isProperAncestor(other: defOp))
132	return true;
133
134	// Aliasing ops.
135	auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
136	return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
137	}
138
139	bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
140	// Any memref-typed iteration arguments are treated as serializing.
141	if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
142	return false;
143
144	// Collect all load and store ops in loop nest rooted at 'forOp'.
145	SmallVector<Operation *, `8`> loadAndStoreOps;
146	auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
147	if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
148	// Memrefs that are allocated inside `forOp` need not be considered.
149	if (!isLocallyDefined(readOp.getMemRef(), forOp))
150	loadAndStoreOps.push_back(Elt: op);
151	} else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
152	// Filter out stores the same way as above.
153	if (!isLocallyDefined(writeOp.getMemRef(), forOp))
154	loadAndStoreOps.push_back(Elt: op);
155	} else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
156	!hasSingleEffect<MemoryEffects::Allocate>(op) &&
157	!isMemoryEffectFree(op)) {
158	// Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
159	// as long as they don't escape the loop (which has been checked above).
160	return WalkResult::interrupt();
161	}
162
163	return WalkResult::advance();
164	});
165
166	// Stop early if the loop has unknown ops with side effects.
167	if (walkResult.wasInterrupted())
168	return false;
169
170	// Dep check depth would be number of enclosing loops + 1.
171	unsigned depth = getNestingDepth(forOp) + `1`;
172
173	// Check dependences between all pairs of ops in 'loadAndStoreOps'.
174	for (auto *srcOp : loadAndStoreOps) {
175	MemRefAccess srcAccess(srcOp);
176	for (auto *dstOp : loadAndStoreOps) {
177	MemRefAccess dstAccess(dstOp);
178	DependenceResult result =
179	checkMemrefAccessDependence(srcAccess, dstAccess, loopDepth: depth);
180	if (result.value != DependenceResult::NoDependence)
181	return false;
182	}
183	}
184	return true;
185	}
186
187	/// Returns the sequence of AffineApplyOp Operations operation in
188	/// 'affineApplyOps', which are reachable via a search starting from 'operands',
189	/// and ending at operands which are not defined by AffineApplyOps.
190	// TODO: Add a method to AffineApplyOp which forward substitutes the
191	// AffineApplyOp into any user AffineApplyOps.
192	void mlir::affine::getReachableAffineApplyOps(
193	ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
194	struct State {
195	// The ssa value for this node in the DFS traversal.
196	Value value;
197	// The operand index of 'value' to explore next during DFS traversal.
198	unsigned operandIndex;
199	};
200	SmallVector<State, `4`> worklist;
201	for (auto operand : operands) {
202	worklist.push_back(Elt: {.value: operand, .operandIndex: `0`});
203	}
204
205	while (!worklist.empty()) {
206	State &state = worklist.back();
207	auto *opInst = state.value.getDefiningOp();
208	// Note: getDefiningOp will return nullptr if the operand is not an
209	// Operation (i.e. block argument), which is a terminator for the search.
210	if (!isa_and_nonnull<AffineApplyOp>(Val: opInst)) {
211	worklist.pop_back();
212	continue;
213	}
214
215	if (state.operandIndex == `0`) {
216	// Pre-Visit: Add 'opInst' to reachable sequence.
217	affineApplyOps.push_back(Elt: opInst);
218	}
219	if (state.operandIndex < opInst->getNumOperands()) {
220	// Visit: Add next 'affineApplyOp' operand to worklist.
221	// Get next operand to visit at 'operandIndex'.
222	auto nextOperand = opInst->getOperand(idx: state.operandIndex);
223	// Increment 'operandIndex' in 'state'.
224	++state.operandIndex;
225	// Add 'nextOperand' to worklist.
226	worklist.push_back(Elt: {.value: nextOperand, .operandIndex: `0`});
227	} else {
228	// Post-visit: done visiting operands AffineApplyOp, pop off stack.
229	worklist.pop_back();
230	}
231	}
232	}
233
234	// Builds a system of constraints with dimensional variables corresponding to
235	// the loop IVs of the forOps appearing in that order. Any symbols founds in
236	// the bound operands are added as symbols in the system. Returns failure for
237	// the yet unimplemented cases.
238	// TODO: Handle non-unit steps through local variables or stride information in
239	// FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
240	// introducing a method in FlatAffineValueConstraints
241	// setExprStride(ArrayRef<int64_t> expr, int64_t stride)
242	LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops,
243	FlatAffineValueConstraints *domain) {
244	SmallVector<Value, `4`> indices;
245	SmallVector<Operation *, `8`> loopOps;
246	size_t numDims = `0`;
247	for (Operation *op : ops) {
248	if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(Val: op)) {
249	LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
250	"parallel ops");
251	return failure();
252	}
253	if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
254	loopOps.push_back(Elt: forOp);
255	// An AffineForOp retains only 1 induction variable.
256	numDims += `1`;
257	} else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
258	loopOps.push_back(Elt: parallelOp);
259	numDims += parallelOp.getNumDims();
260	}
261	}
262	extractInductionVars(affineOps: loopOps, ivs&: indices);
263	// Reset while associating Values in 'indices' to the domain.
264	domain = FlatAffineValueConstraints (numDims, /numSymbols=/*`0`,
265	/numLocals=/`0`, indices);
266	for (Operation *op : ops) {
267	// Add constraints from forOp's bounds.
268	if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
269	if (failed(domain->addAffineForOpDomain(forOp: forOp)))
270	return failure();
271	} else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
272	domain->addAffineIfOpDomain(ifOp: ifOp);
273	} else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
274	if (failed(domain->addAffineParallelOpDomain(parallelOp: parallelOp)))
275	return failure();
276	}
277	return success();
278	}
279
280	/// Computes the iteration domain for 'op' and populates 'indexSet', which
281	/// encapsulates the constraints involving loops surrounding 'op' and
282	/// potentially involving any Function symbols. The dimensional variables in
283	/// 'indexSet' correspond to the loops surrounding 'op' from outermost to
284	/// innermost.
285	static LogicalResult getOpIndexSet(Operation *op,
286	FlatAffineValueConstraints *indexSet) {
287	SmallVector<Operation *, `4`> ops;
288	getEnclosingAffineOps(op&: *op, ops: &ops);
289	return getIndexSet(ops, domain: indexSet);
290	}
291
292	// Returns the number of outer loop common to 'src/dstDomain'.
293	// Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
294	static unsigned
295	getNumCommonLoops(const FlatAffineValueConstraints &srcDomain,
296	const FlatAffineValueConstraints &dstDomain,
297	SmallVectorImpl<AffineForOp> commonLoops = nullptr*) {
298	// Find the number of common loops shared by src and dst accesses.
299	unsigned minNumLoops =
300	std::min(a: srcDomain.getNumDimVars(), b: dstDomain.getNumDimVars());
301	unsigned numCommonLoops = `0`;
302	for (unsigned i = `0`; i < minNumLoops; ++i) {
303	if ((!isAffineForInductionVar(val: srcDomain.getValue(pos: i)) &&
304	!isAffineParallelInductionVar(val: srcDomain.getValue(pos: i))) \|\|
305	(!isAffineForInductionVar(val: dstDomain.getValue(pos: i)) &&
306	!isAffineParallelInductionVar(val: dstDomain.getValue(pos: i))) \|\|
307	srcDomain.getValue(pos: i) != dstDomain.getValue(pos: i))
308	break;
309	if (commonLoops != nullptr)
310	commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(pos: i)));
311	++numCommonLoops;
312	}
313	if (commonLoops != nullptr)
314	assert(commonLoops->size() == numCommonLoops);
315	return numCommonLoops;
316	}
317
318	/// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
319	/// `opB` should be in the same affine scope. Returns nullptr if such a block
320	/// does not exist (when the two ops are in different blocks of an op starting
321	/// an `AffineScope`).
322	static Block getCommonBlockInAffineScope(Operation opA, Operation *opB) {
323	// Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
324	// chain extends up to and includnig an op that starts an affine scope.
325	auto getChainOfAncestorBlocks =
326	[&](Operation op, SmallVectorImpl<Block > &ancestorBlocks) {
327	Block *currBlock = op->getBlock();
328	// Loop terminates when the currBlock is nullptr or its parent operation
329	// holds an affine scope.
330	while (currBlock &&
331	!currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
332	ancestorBlocks.push_back(Elt: currBlock);
333	currBlock = currBlock->getParentOp()->getBlock();
334	}
335	assert(currBlock &&
336	"parent op starting an affine scope is always expected");
337	ancestorBlocks.push_back(Elt: currBlock);
338	};
339
340	// Find the closest common block.
341	SmallVector<Block *, `4`> srcAncestorBlocks, dstAncestorBlocks;
342	getChainOfAncestorBlocks (opA, srcAncestorBlocks);
343	getChainOfAncestorBlocks (opB, dstAncestorBlocks);
344
345	Block commonBlock = nullptr*;
346	for (int i = srcAncestorBlocks.size() - `1`, j = dstAncestorBlocks.size() - `1`;
347	i >= `0` && j >= `0` && srcAncestorBlocks [i] == dstAncestorBlocks [j];
348	i--, j--)
349	commonBlock = srcAncestorBlocks [i];
350
351	return commonBlock;
352	}
353
354	/// Returns true if the ancestor operation of 'srcAccess' appears before the
355	/// ancestor operation of 'dstAccess' in their common ancestral block. The
356	/// operations for `srcAccess` and `dstAccess` are expected to be in the same
357	/// affine scope and have a common surrounding block within it.
358	static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess,
359	const MemRefAccess &dstAccess) {
360	// Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
361	Block *commonBlock =
362	getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst);
363	assert(commonBlock &&
364	"ops expected to have a common surrounding block in affine scope");
365
366	// Check the dominance relationship between the respective ancestors of the
367	// src and dst in the Block of the innermost among the common loops.
368	Operation srcOp = commonBlock->findAncestorOpInBlock(op&: srcAccess.opInst);
369	assert(srcOp && "src access op must lie in common block");
370	Operation dstOp = commonBlock->findAncestorOpInBlock(op&: dstAccess.opInst);
371	assert(dstOp && "dest access op must lie in common block");
372
373	// Determine whether dstOp comes after srcOp.
374	return srcOp->isBeforeInBlock(other: dstOp);
375	}
376
377	// Adds ordering constraints to 'dependenceDomain' based on number of loops
378	// common to 'src/dstDomain' and requested 'loopDepth'.
379	// Note that 'loopDepth' cannot exceed the number of common loops plus one.
380	// EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
381	// ) If 'loopDepth == 1' then one constraint is added: i' >= i + 1*
382	// ) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1*
383	// ) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'*
384	static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain,
385	const FlatAffineValueConstraints &dstDomain,
386	unsigned loopDepth,
387	IntegerRelation *dependenceDomain) {
388	unsigned numCols = dependenceDomain->getNumCols();
389	SmallVector<int64_t, `4`> eq(numCols);
390	unsigned numSrcDims = srcDomain.getNumDimVars();
391	unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
392	unsigned numCommonLoopConstraints = std::min(a: numCommonLoops, b: loopDepth);
393	for (unsigned i = `0`; i < numCommonLoopConstraints; ++i) {
394	std::fill(first: eq.begin(), last: eq.end(), value: `0`);
395	eq [i] = -`1`;
396	eq [i + numSrcDims] = `1`;
397	if (i == loopDepth - `1`) {
398	eq [numCols - `1`] = -`1`;
399	dependenceDomain->addInequality(inEq: eq);
400	} else {
401	dependenceDomain->addEquality(eq);
402	}
403	}
404	}
405
406	// Computes distance and direction vectors in 'dependences', by adding
407	// variables to 'dependenceDomain' which represent the difference of the IVs,
408	// eliminating all other variables, and reading off distance vectors from
409	// equality constraints (if possible), and direction vectors from inequalities.
410	static void computeDirectionVector(
411	const FlatAffineValueConstraints &srcDomain,
412	const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
413	IntegerPolyhedron *dependenceDomain,
414	SmallVector<DependenceComponent, `2`> *dependenceComponents) {
415	// Find the number of common loops shared by src and dst accesses.
416	SmallVector<AffineForOp, `4`> commonLoops;
417	unsigned numCommonLoops =
418	getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
419	if (numCommonLoops == `0`)
420	return;
421	// Compute direction vectors for requested loop depth.
422	unsigned numIdsToEliminate = dependenceDomain->getNumVars();
423	// Add new variables to 'dependenceDomain' to represent the direction
424	// constraints for each shared loop.
425	dependenceDomain->insertVar(kind: VarKind::SetDim, /pos=/`0`,
426	/num=/numCommonLoops);
427
428	// Add equality constraints for each common loop, setting newly introduced
429	// variable at column 'j' to the 'dst' IV minus the 'src IV.
430	SmallVector<int64_t, `4`> eq;
431	eq.resize(N: dependenceDomain->getNumCols());
432	unsigned numSrcDims = srcDomain.getNumDimVars();
433	// Constraint variables format:
434	// [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
435	for (unsigned j = `0`; j < numCommonLoops; ++j) {
436	std::fill(first: eq.begin(), last: eq.end(), value: `0`);
437	eq [j] = `1`;
438	eq [j + numCommonLoops] = `1`;
439	eq [j + numCommonLoops + numSrcDims] = -`1`;
440	dependenceDomain->addEquality(eq);
441	}
442
443	// Eliminate all variables other than the direction variables just added.
444	dependenceDomain->projectOut(pos: numCommonLoops, num: numIdsToEliminate);
445
446	// Scan each common loop variable column and set direction vectors based
447	// on eliminated constraint system.
448	dependenceComponents->resize(N: numCommonLoops);
449	for (unsigned j = `0`; j < numCommonLoops; ++j) {
450	(*dependenceComponents)[j].op = commonLoops[j].getOperation();
451	auto lbConst = dependenceDomain->getConstantBound64(type: BoundType::LB, pos: j);
452	(*dependenceComponents)[j].lb =
453	lbConst.value_or(u: std::numeric_limits<int64_t>::min());
454	auto ubConst = dependenceDomain->getConstantBound64(type: BoundType::UB, pos: j);
455	(*dependenceComponents)[j].ub =
456	ubConst.value_or(u: std::numeric_limits<int64_t>::max());
457	}
458	}
459
460	LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const {
461	// Create set corresponding to domain of access.
462	FlatAffineValueConstraints domain;
463	if (failed(result: getOpIndexSet(op: opInst, indexSet: &domain)))
464	return failure();
465
466	// Get access relation from access map.
467	AffineValueMap accessValueMap;
468	getAccessMap(accessMap: &accessValueMap);
469	if (failed(result: getRelationFromMap(map: accessValueMap, rel)))
470	return failure();
471
472	// Merge and align domain ids of `rel` with ids of `domain`. Since the domain
473	// of the access map is a subset of the domain of access, the domain ids of
474	// `rel` are guranteed to be a subset of ids of `domain`.
475	unsigned inserts = `0`;
476	for (unsigned i = `0`, e = domain.getNumDimVars(); i < e; ++i) {
477	const Identifier domainIdi = Identifier (domain.getValue(pos: i));
478	const Identifier *findBegin = rel.getIds(kind: VarKind::SetDim).begin() + i;
479	const Identifier *findEnd = rel.getIds(kind: VarKind::SetDim).end();
480	const Identifier *itr = std::find(first: findBegin, last: findEnd, val: domainIdi);
481	if (itr != findEnd) {
482	rel.swapVar(posA: i, posB: i + std::distance(first: findBegin, last: itr));
483	} else {
484	++inserts;
485	rel.insertVar(kind: VarKind::SetDim, pos: i);
486	rel.setId(kind: VarKind::SetDim, i, id: domainIdi);
487	}
488	}
489
490	// Append domain constraints to `rel`.
491	IntegerRelation domainRel = domain;
492	if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds())
493	domainRel.resetIds();
494	domainRel.appendVar(kind: VarKind::Range, num: accessValueMap.getNumResults());
495	domainRel.mergeAndAlignSymbols(other&: rel);
496	domainRel.mergeLocalVars(other&: rel);
497	rel.append(other: domainRel);
498
499	rel.convertVarKind(srcKind: VarKind::SetDim, varStart: `0`, varLimit: accessValueMap.getNumDims() + inserts,
500	dstKind: VarKind::Domain);
501
502	return success();
503	}
504
505	// Populates 'accessMap' with composition of AffineApplyOps reachable from
506	// indices of MemRefAccess.
507	void MemRefAccess::getAccessMap(AffineValueMap accessMap) const* {
508	// Get affine map from AffineLoad/Store.
509	AffineMap map;
510	if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
511	map = loadOp.getAffineMap();
512	else
513	map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
514
515	SmallVector<Value, `8`> operands(indices.begin(), indices.end());
516	fullyComposeAffineMapAndOperands(map: &map, operands: &operands);
517	map = simplifyAffineMap(map);
518	canonicalizeMapAndOperands(map: &map, operands: &operands);
519	accessMap->reset(map, operands);
520	}
521
522	// Builds a flat affine constraint system to check if there exists a dependence
523	// between memref accesses 'srcAccess' and 'dstAccess'.
524	// Returns 'NoDependence' if the accesses can be definitively shown not to
525	// access the same element.
526	// Returns 'HasDependence' if the accesses do access the same element.
527	// Returns 'Failure' if an error or unsupported case was encountered.
528	// If a dependence exists, returns in 'dependenceComponents' a direction
529	// vector for the dependence, with a component for each loop IV in loops
530	// common to both accesses (see Dependence in AffineAnalysis.h for details).
531	//
532	// The memref access dependence check is comprised of the following steps:
533	// ) Build access relation for each access. An access relation maps elements*
534	// of an iteration domain to the element(s) of an array domain accessed by
535	// that iteration of the associated statement through some array reference.
536	// ) Compute the dependence relation by composing access relation of*
537	// `srcAccess` with the inverse of access relation of `dstAccess`.
538	// Doing this builds a relation between iteration domain of `srcAccess`
539	// to the iteration domain of `dstAccess` which access the same memory
540	// location.
541	// ) Add ordering constraints for `srcAccess` to be accessed before*
542	// `dstAccess`.
543	//
544	// This method builds a constraint system with the following column format:
545	//
546	// [src-dim-variables, dst-dim-variables, symbols, constant]
547	//
548	// For example, given the following MLIR code with "source" and "destination"
549	// accesses to the same memref label, and symbols %M, %N, %K:
550	//
551	// affine.for %i0 = 0 to 100 {
552	// affine.for %i1 = 0 to 50 {
553	// %a0 = affine.apply
554	// (d0, d1) -> (d0 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]*
555	// // Source memref access.
556	// store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
557	// }
558	// }
559	//
560	// affine.for %i2 = 0 to 100 {
561	// affine.for %i3 = 0 to 50 {
562	// %a1 = affine.apply
563	// (d0, d1) -> (d0 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]*
564	// // Destination memref access.
565	// %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
566	// }
567	// }
568	//
569	// The access relation for `srcAccess` would be the following:
570	//
571	// [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
572	// 2 -4 -1 0 1 0 0 = 0
573	// 0 3 0 -1 0 -1 0 = 0
574	// 1 0 0 0 0 0 0 >= 0
575	// -1 0 0 0 0 0 100 >= 0
576	// 0 1 0 0 0 0 0 >= 0
577	// 0 -1 0 0 0 0 50 >= 0
578	//
579	// The access relation for `dstAccess` would be the following:
580	//
581	// [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
582	// 7 9 -1 0 -1 0 0 = 0
583	// 0 11 0 -1 0 -1 0 = 0
584	// 1 0 0 0 0 0 0 >= 0
585	// -1 0 0 0 0 0 100 >= 0
586	// 0 1 0 0 0 0 0 >= 0
587	// 0 -1 0 0 0 0 50 >= 0
588	//
589	// The equalities in the above relations correspond to the access maps while
590	// the inequalities corresspond to the iteration domain constraints.
591	//
592	// The dependence relation formed:
593	//
594	// [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
595	// 2 -4 -7 -9 1 1 0 0 = 0
596	// 0 3 0 -11 -1 0 1 0 = 0
597	// 1 0 0 0 0 0 0 0 >= 0
598	// -1 0 0 0 0 0 0 100 >= 0
599	// 0 1 0 0 0 0 0 0 >= 0
600	// 0 -1 0 0 0 0 0 50 >= 0
601	// 0 0 1 0 0 0 0 0 >= 0
602	// 0 0 -1 0 0 0 0 100 >= 0
603	// 0 0 0 1 0 0 0 0 >= 0
604	// 0 0 0 -1 0 0 0 50 >= 0
605	//
606	//
607	// TODO: Support AffineExprs mod/floordiv/ceildiv.
608	DependenceResult mlir::affine::checkMemrefAccessDependence(
609	const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
610	unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
611	SmallVector<DependenceComponent, `2`> dependenceComponents, bool* allowRAR) {
612	LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
613	<< Twine(loopDepth) << " between:\n";);
614	LLVM_DEBUG(srcAccess.opInst->dump());
615	LLVM_DEBUG(dstAccess.opInst->dump());
616
617	// Return 'NoDependence' if these accesses do not access the same memref.
618	if (srcAccess.memref != dstAccess.memref)
619	return DependenceResult::NoDependence;
620
621	// Return 'NoDependence' if one of these accesses is not an
622	// AffineWriteOpInterface.
623	if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
624	!isa<AffineWriteOpInterface>(dstAccess.opInst))
625	return DependenceResult::NoDependence;
626
627	// We can't analyze further if the ops lie in different affine scopes or have
628	// no common block in an affine scope.
629	if (getAffineScope(op: srcAccess.opInst) != getAffineScope(op: dstAccess.opInst))
630	return DependenceResult::Failure;
631	if (!getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst))
632	return DependenceResult::Failure;
633
634	// Create access relation from each MemRefAccess.
635	PresburgerSpace space = PresburgerSpace::getRelationSpace();
636	IntegerRelation srcRel(space), dstRel(space);
637	if (failed(result: srcAccess.getAccessRelation(rel&: srcRel)))
638	return DependenceResult::Failure;
639	if (failed(result: dstAccess.getAccessRelation(rel&: dstRel)))
640	return DependenceResult::Failure;
641
642	FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
643	FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
644
645	// Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
646	// operation of 'srcAccess' does not properly dominate the ancestor
647	// operation of 'dstAccess' in the same common operation block.
648	// Note: this check is skipped if 'allowRAR' is true, because RAR deps
649	// can exist irrespective of lexicographic ordering b/w src and dst.
650	unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
651	assert(loopDepth <= numCommonLoops + `1`);
652	if (!allowRAR && loopDepth > numCommonLoops &&
653	!srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
654	return DependenceResult::NoDependence;
655	}
656
657	// Compute the dependence relation by composing `srcRel` with the inverse of
658	// `dstRel`. Doing this builds a relation between iteration domain of
659	// `srcAccess` to the iteration domain of `dstAccess` which access the same
660	// memory locations.
661	dstRel.inverse();
662	dstRel.mergeAndCompose(other: srcRel);
663	dstRel.convertVarKind(srcKind: VarKind::Domain, varStart: `0`, varLimit: dstRel.getNumDomainVars(),
664	dstKind: VarKind::Range, pos: `0`);
665	IntegerPolyhedron dependenceDomain(dstRel);
666
667	// Add 'src' happens before 'dst' ordering constraints.
668	addOrderingConstraints(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain);
669
670	// Return 'NoDependence' if the solution space is empty: no dependence.
671	if (dependenceDomain.isEmpty())
672	return DependenceResult::NoDependence;
673
674	// Compute dependence direction vector and return true.
675	if (dependenceComponents != nullptr)
676	computeDirectionVector(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain,
677	dependenceComponents);
678
679	LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
680	LLVM_DEBUG(dependenceDomain.dump());
681
682	FlatAffineValueConstraints result(dependenceDomain);
683	if (dependenceConstraints)
684	*dependenceConstraints = result;
685	return DependenceResult::HasDependence;
686	}
687
688	/// Gathers dependence components for dependences between all ops in loop nest
689	/// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
690	void mlir::affine::getDependenceComponents(
691	AffineForOp forOp, unsigned maxLoopDepth,
692	std::vector<SmallVector<DependenceComponent, `2`>> *depCompsVec) {
693	// Collect all load and store ops in loop nest rooted at 'forOp'.
694	SmallVector<Operation *, `8`> loadAndStoreOps;
695	forOp->walk([&](Operation *op) {
696	if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
697	loadAndStoreOps.push_back(Elt: op);
698	});
699
700	unsigned numOps = loadAndStoreOps.size();
701	for (unsigned d = `1`; d <= maxLoopDepth; ++d) {
702	for (unsigned i = `0`; i < numOps; ++i) {
703	auto *srcOp = loadAndStoreOps [i];
704	MemRefAccess srcAccess(srcOp);
705	for (unsigned j = `0`; j < numOps; ++j) {
706	auto *dstOp = loadAndStoreOps [j];
707	MemRefAccess dstAccess(dstOp);
708
709	SmallVector<DependenceComponent, `2`> depComps;
710	// TODO: Explore whether it would be profitable to pre-compute and store
711	// deps instead of repeatedly checking.
712	DependenceResult result = checkMemrefAccessDependence(
713	srcAccess, dstAccess, loopDepth: d, /dependenceConstraints=/nullptr,
714	dependenceComponents: &depComps);
715	if (hasDependence(result))
716	depCompsVec->push_back(x: depComps);
717	}
718	}
719	}
720	}
721

source code of mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp