1 | //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements miscellaneous analysis routines for affine structures |
10 | // (expressions, maps, sets), and other utilities relying on such analysis. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" |
15 | #include "mlir/Analysis/Presburger/IntegerRelation.h" |
16 | #include "mlir/Analysis/Presburger/PresburgerSpace.h" |
17 | #include "mlir/Analysis/SliceAnalysis.h" |
18 | #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" |
19 | #include "mlir/Dialect/Affine/Analysis/Utils.h" |
20 | #include "mlir/Dialect/Affine/IR/AffineOps.h" |
21 | #include "mlir/Dialect/Affine/IR/AffineValueMap.h" |
22 | #include "mlir/IR/AffineExprVisitor.h" |
23 | #include "mlir/IR/BuiltinOps.h" |
24 | #include "mlir/IR/IntegerSet.h" |
25 | #include "mlir/Interfaces/SideEffectInterfaces.h" |
26 | #include "mlir/Interfaces/ViewLikeInterface.h" |
27 | #include "llvm/ADT/TypeSwitch.h" |
28 | #include "llvm/Support/Debug.h" |
29 | #include "llvm/Support/raw_ostream.h" |
30 | #include <optional> |
31 | |
32 | #define DEBUG_TYPE "affine-analysis" |
33 | |
34 | using namespace mlir; |
35 | using namespace affine; |
36 | using namespace presburger; |
37 | |
38 | /// Get the value that is being reduced by `pos`-th reduction in the loop if |
39 | /// such a reduction can be performed by affine parallel loops. This assumes |
40 | /// floating-point operations are commutative. On success, `kind` will be the |
41 | /// reduction kind suitable for use in affine parallel loop builder. If the |
42 | /// reduction is not supported, returns null. |
43 | static Value getSupportedReduction(AffineForOp forOp, unsigned pos, |
44 | arith::AtomicRMWKind &kind) { |
45 | SmallVector<Operation *> combinerOps; |
46 | Value reducedVal = |
47 | matchReduction(forOp.getRegionIterArgs(), pos, combinerOps); |
48 | if (!reducedVal) |
49 | return nullptr; |
50 | |
51 | // Expected only one combiner operation. |
52 | if (combinerOps.size() > 1) |
53 | return nullptr; |
54 | |
55 | Operation *combinerOp = combinerOps.back(); |
56 | std::optional<arith::AtomicRMWKind> maybeKind = |
57 | TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp) |
58 | .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; }) |
59 | .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; }) |
60 | .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; }) |
61 | .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; }) |
62 | .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; }) |
63 | .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; }) |
64 | .Case( |
65 | [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; }) |
66 | .Case( |
67 | [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; }) |
68 | .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; }) |
69 | .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; }) |
70 | .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; }) |
71 | .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; }) |
72 | .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> { |
73 | // TODO: AtomicRMW supports other kinds of reductions this is |
74 | // currently not detecting, add those when the need arises. |
75 | return std::nullopt; |
76 | }); |
77 | if (!maybeKind) |
78 | return nullptr; |
79 | |
80 | kind = *maybeKind; |
81 | return reducedVal; |
82 | } |
83 | |
84 | /// Populate `supportedReductions` with descriptors of the supported reductions. |
85 | void mlir::affine::getSupportedReductions( |
86 | AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) { |
87 | unsigned numIterArgs = forOp.getNumIterOperands(); |
88 | if (numIterArgs == 0) |
89 | return; |
90 | supportedReductions.reserve(N: numIterArgs); |
91 | for (unsigned i = 0; i < numIterArgs; ++i) { |
92 | arith::AtomicRMWKind kind; |
93 | if (Value value = getSupportedReduction(forOp, i, kind)) |
94 | supportedReductions.emplace_back(Args: LoopReduction{kind, i, value}); |
95 | } |
96 | } |
97 | |
98 | /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is |
99 | /// provided, populates it with descriptors of the parallelizable reductions and |
100 | /// treats them as not preventing parallelization. |
101 | bool mlir::affine::isLoopParallel( |
102 | AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) { |
103 | unsigned numIterArgs = forOp.getNumIterOperands(); |
104 | |
105 | // Loop is not parallel if it has SSA loop-carried dependences and reduction |
106 | // detection is not requested. |
107 | if (numIterArgs > 0 && !parallelReductions) |
108 | return false; |
109 | |
110 | // Find supported reductions of requested. |
111 | if (parallelReductions) { |
112 | getSupportedReductions(forOp, *parallelReductions); |
113 | // Return later to allow for identifying all parallel reductions even if the |
114 | // loop is not parallel. |
115 | if (parallelReductions->size() != numIterArgs) |
116 | return false; |
117 | } |
118 | |
119 | // Check memory dependences. |
120 | return isLoopMemoryParallel(forOp); |
121 | } |
122 | |
123 | /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is |
124 | /// allocated by an operation nested within `enclosingOp`. |
125 | static bool isLocallyDefined(Value v, Operation *enclosingOp) { |
126 | Operation *defOp = v.getDefiningOp(); |
127 | if (!defOp) |
128 | return false; |
129 | |
130 | if (hasSingleEffect<MemoryEffects::Allocate>(op: defOp, value: v) && |
131 | enclosingOp->isProperAncestor(other: defOp)) |
132 | return true; |
133 | |
134 | // Aliasing ops. |
135 | auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp); |
136 | return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp); |
137 | } |
138 | |
139 | bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) { |
140 | // Any memref-typed iteration arguments are treated as serializing. |
141 | if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>)) |
142 | return false; |
143 | |
144 | // Collect all load and store ops in loop nest rooted at 'forOp'. |
145 | SmallVector<Operation *, 8> loadAndStoreOps; |
146 | auto walkResult = forOp.walk([&](Operation *op) -> WalkResult { |
147 | if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) { |
148 | // Memrefs that are allocated inside `forOp` need not be considered. |
149 | if (!isLocallyDefined(readOp.getMemRef(), forOp)) |
150 | loadAndStoreOps.push_back(Elt: op); |
151 | } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) { |
152 | // Filter out stores the same way as above. |
153 | if (!isLocallyDefined(writeOp.getMemRef(), forOp)) |
154 | loadAndStoreOps.push_back(Elt: op); |
155 | } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) && |
156 | !hasSingleEffect<MemoryEffects::Allocate>(op) && |
157 | !isMemoryEffectFree(op)) { |
158 | // Alloc-like ops inside `forOp` are fine (they don't impact parallelism) |
159 | // as long as they don't escape the loop (which has been checked above). |
160 | return WalkResult::interrupt(); |
161 | } |
162 | |
163 | return WalkResult::advance(); |
164 | }); |
165 | |
166 | // Stop early if the loop has unknown ops with side effects. |
167 | if (walkResult.wasInterrupted()) |
168 | return false; |
169 | |
170 | // Dep check depth would be number of enclosing loops + 1. |
171 | unsigned depth = getNestingDepth(forOp) + 1; |
172 | |
173 | // Check dependences between all pairs of ops in 'loadAndStoreOps'. |
174 | for (auto *srcOp : loadAndStoreOps) { |
175 | MemRefAccess srcAccess(srcOp); |
176 | for (auto *dstOp : loadAndStoreOps) { |
177 | MemRefAccess dstAccess(dstOp); |
178 | DependenceResult result = |
179 | checkMemrefAccessDependence(srcAccess, dstAccess, loopDepth: depth); |
180 | if (result.value != DependenceResult::NoDependence) |
181 | return false; |
182 | } |
183 | } |
184 | return true; |
185 | } |
186 | |
187 | /// Returns the sequence of AffineApplyOp Operations operation in |
188 | /// 'affineApplyOps', which are reachable via a search starting from 'operands', |
189 | /// and ending at operands which are not defined by AffineApplyOps. |
190 | // TODO: Add a method to AffineApplyOp which forward substitutes the |
191 | // AffineApplyOp into any user AffineApplyOps. |
192 | void mlir::affine::getReachableAffineApplyOps( |
193 | ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) { |
194 | struct State { |
195 | // The ssa value for this node in the DFS traversal. |
196 | Value value; |
197 | // The operand index of 'value' to explore next during DFS traversal. |
198 | unsigned operandIndex; |
199 | }; |
200 | SmallVector<State, 4> worklist; |
201 | for (auto operand : operands) { |
202 | worklist.push_back(Elt: {.value: operand, .operandIndex: 0}); |
203 | } |
204 | |
205 | while (!worklist.empty()) { |
206 | State &state = worklist.back(); |
207 | auto *opInst = state.value.getDefiningOp(); |
208 | // Note: getDefiningOp will return nullptr if the operand is not an |
209 | // Operation (i.e. block argument), which is a terminator for the search. |
210 | if (!isa_and_nonnull<AffineApplyOp>(Val: opInst)) { |
211 | worklist.pop_back(); |
212 | continue; |
213 | } |
214 | |
215 | if (state.operandIndex == 0) { |
216 | // Pre-Visit: Add 'opInst' to reachable sequence. |
217 | affineApplyOps.push_back(Elt: opInst); |
218 | } |
219 | if (state.operandIndex < opInst->getNumOperands()) { |
220 | // Visit: Add next 'affineApplyOp' operand to worklist. |
221 | // Get next operand to visit at 'operandIndex'. |
222 | auto nextOperand = opInst->getOperand(idx: state.operandIndex); |
223 | // Increment 'operandIndex' in 'state'. |
224 | ++state.operandIndex; |
225 | // Add 'nextOperand' to worklist. |
226 | worklist.push_back(Elt: {.value: nextOperand, .operandIndex: 0}); |
227 | } else { |
228 | // Post-visit: done visiting operands AffineApplyOp, pop off stack. |
229 | worklist.pop_back(); |
230 | } |
231 | } |
232 | } |
233 | |
234 | // Builds a system of constraints with dimensional variables corresponding to |
235 | // the loop IVs of the forOps appearing in that order. Any symbols founds in |
236 | // the bound operands are added as symbols in the system. Returns failure for |
237 | // the yet unimplemented cases. |
238 | // TODO: Handle non-unit steps through local variables or stride information in |
239 | // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by |
240 | // introducing a method in FlatAffineValueConstraints |
241 | // setExprStride(ArrayRef<int64_t> expr, int64_t stride) |
242 | LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops, |
243 | FlatAffineValueConstraints *domain) { |
244 | SmallVector<Value, 4> indices; |
245 | SmallVector<Operation *, 8> loopOps; |
246 | size_t numDims = 0; |
247 | for (Operation *op : ops) { |
248 | if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(Val: op)) { |
249 | LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/" |
250 | "parallel ops" ); |
251 | return failure(); |
252 | } |
253 | if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { |
254 | loopOps.push_back(Elt: forOp); |
255 | // An AffineForOp retains only 1 induction variable. |
256 | numDims += 1; |
257 | } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) { |
258 | loopOps.push_back(Elt: parallelOp); |
259 | numDims += parallelOp.getNumDims(); |
260 | } |
261 | } |
262 | extractInductionVars(affineOps: loopOps, ivs&: indices); |
263 | // Reset while associating Values in 'indices' to the domain. |
264 | *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0, |
265 | /*numLocals=*/0, indices); |
266 | for (Operation *op : ops) { |
267 | // Add constraints from forOp's bounds. |
268 | if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { |
269 | if (failed(domain->addAffineForOpDomain(forOp: forOp))) |
270 | return failure(); |
271 | } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) { |
272 | domain->addAffineIfOpDomain(ifOp: ifOp); |
273 | } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op)) |
274 | if (failed(domain->addAffineParallelOpDomain(parallelOp: parallelOp))) |
275 | return failure(); |
276 | } |
277 | return success(); |
278 | } |
279 | |
280 | /// Computes the iteration domain for 'op' and populates 'indexSet', which |
281 | /// encapsulates the constraints involving loops surrounding 'op' and |
282 | /// potentially involving any Function symbols. The dimensional variables in |
283 | /// 'indexSet' correspond to the loops surrounding 'op' from outermost to |
284 | /// innermost. |
285 | static LogicalResult getOpIndexSet(Operation *op, |
286 | FlatAffineValueConstraints *indexSet) { |
287 | SmallVector<Operation *, 4> ops; |
288 | getEnclosingAffineOps(op&: *op, ops: &ops); |
289 | return getIndexSet(ops, domain: indexSet); |
290 | } |
291 | |
292 | // Returns the number of outer loop common to 'src/dstDomain'. |
293 | // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null. |
294 | static unsigned |
295 | getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, |
296 | const FlatAffineValueConstraints &dstDomain, |
297 | SmallVectorImpl<AffineForOp> *commonLoops = nullptr) { |
298 | // Find the number of common loops shared by src and dst accesses. |
299 | unsigned minNumLoops = |
300 | std::min(a: srcDomain.getNumDimVars(), b: dstDomain.getNumDimVars()); |
301 | unsigned numCommonLoops = 0; |
302 | for (unsigned i = 0; i < minNumLoops; ++i) { |
303 | if ((!isAffineForInductionVar(val: srcDomain.getValue(pos: i)) && |
304 | !isAffineParallelInductionVar(val: srcDomain.getValue(pos: i))) || |
305 | (!isAffineForInductionVar(val: dstDomain.getValue(pos: i)) && |
306 | !isAffineParallelInductionVar(val: dstDomain.getValue(pos: i))) || |
307 | srcDomain.getValue(pos: i) != dstDomain.getValue(pos: i)) |
308 | break; |
309 | if (commonLoops != nullptr) |
310 | commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(pos: i))); |
311 | ++numCommonLoops; |
312 | } |
313 | if (commonLoops != nullptr) |
314 | assert(commonLoops->size() == numCommonLoops); |
315 | return numCommonLoops; |
316 | } |
317 | |
318 | /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and |
319 | /// `opB` should be in the same affine scope. Returns nullptr if such a block |
320 | /// does not exist (when the two ops are in different blocks of an op starting |
321 | /// an `AffineScope`). |
322 | static Block *getCommonBlockInAffineScope(Operation *opA, Operation *opB) { |
323 | // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The |
324 | // chain extends up to and includnig an op that starts an affine scope. |
325 | auto getChainOfAncestorBlocks = |
326 | [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) { |
327 | Block *currBlock = op->getBlock(); |
328 | // Loop terminates when the currBlock is nullptr or its parent operation |
329 | // holds an affine scope. |
330 | while (currBlock && |
331 | !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) { |
332 | ancestorBlocks.push_back(Elt: currBlock); |
333 | currBlock = currBlock->getParentOp()->getBlock(); |
334 | } |
335 | assert(currBlock && |
336 | "parent op starting an affine scope is always expected" ); |
337 | ancestorBlocks.push_back(Elt: currBlock); |
338 | }; |
339 | |
340 | // Find the closest common block. |
341 | SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks; |
342 | getChainOfAncestorBlocks(opA, srcAncestorBlocks); |
343 | getChainOfAncestorBlocks(opB, dstAncestorBlocks); |
344 | |
345 | Block *commonBlock = nullptr; |
346 | for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1; |
347 | i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j]; |
348 | i--, j--) |
349 | commonBlock = srcAncestorBlocks[i]; |
350 | |
351 | return commonBlock; |
352 | } |
353 | |
354 | /// Returns true if the ancestor operation of 'srcAccess' appears before the |
355 | /// ancestor operation of 'dstAccess' in their common ancestral block. The |
356 | /// operations for `srcAccess` and `dstAccess` are expected to be in the same |
357 | /// affine scope and have a common surrounding block within it. |
358 | static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, |
359 | const MemRefAccess &dstAccess) { |
360 | // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. |
361 | Block *commonBlock = |
362 | getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst); |
363 | assert(commonBlock && |
364 | "ops expected to have a common surrounding block in affine scope" ); |
365 | |
366 | // Check the dominance relationship between the respective ancestors of the |
367 | // src and dst in the Block of the innermost among the common loops. |
368 | Operation *srcOp = commonBlock->findAncestorOpInBlock(op&: *srcAccess.opInst); |
369 | assert(srcOp && "src access op must lie in common block" ); |
370 | Operation *dstOp = commonBlock->findAncestorOpInBlock(op&: *dstAccess.opInst); |
371 | assert(dstOp && "dest access op must lie in common block" ); |
372 | |
373 | // Determine whether dstOp comes after srcOp. |
374 | return srcOp->isBeforeInBlock(other: dstOp); |
375 | } |
376 | |
377 | // Adds ordering constraints to 'dependenceDomain' based on number of loops |
378 | // common to 'src/dstDomain' and requested 'loopDepth'. |
379 | // Note that 'loopDepth' cannot exceed the number of common loops plus one. |
380 | // EX: Given a loop nest of depth 2 with IVs 'i' and 'j': |
381 | // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1 |
382 | // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1 |
383 | // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j' |
384 | static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, |
385 | const FlatAffineValueConstraints &dstDomain, |
386 | unsigned loopDepth, |
387 | IntegerRelation *dependenceDomain) { |
388 | unsigned numCols = dependenceDomain->getNumCols(); |
389 | SmallVector<int64_t, 4> eq(numCols); |
390 | unsigned numSrcDims = srcDomain.getNumDimVars(); |
391 | unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); |
392 | unsigned numCommonLoopConstraints = std::min(a: numCommonLoops, b: loopDepth); |
393 | for (unsigned i = 0; i < numCommonLoopConstraints; ++i) { |
394 | std::fill(first: eq.begin(), last: eq.end(), value: 0); |
395 | eq[i] = -1; |
396 | eq[i + numSrcDims] = 1; |
397 | if (i == loopDepth - 1) { |
398 | eq[numCols - 1] = -1; |
399 | dependenceDomain->addInequality(inEq: eq); |
400 | } else { |
401 | dependenceDomain->addEquality(eq); |
402 | } |
403 | } |
404 | } |
405 | |
406 | // Computes distance and direction vectors in 'dependences', by adding |
407 | // variables to 'dependenceDomain' which represent the difference of the IVs, |
408 | // eliminating all other variables, and reading off distance vectors from |
409 | // equality constraints (if possible), and direction vectors from inequalities. |
410 | static void computeDirectionVector( |
411 | const FlatAffineValueConstraints &srcDomain, |
412 | const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, |
413 | IntegerPolyhedron *dependenceDomain, |
414 | SmallVector<DependenceComponent, 2> *dependenceComponents) { |
415 | // Find the number of common loops shared by src and dst accesses. |
416 | SmallVector<AffineForOp, 4> commonLoops; |
417 | unsigned numCommonLoops = |
418 | getNumCommonLoops(srcDomain, dstDomain, &commonLoops); |
419 | if (numCommonLoops == 0) |
420 | return; |
421 | // Compute direction vectors for requested loop depth. |
422 | unsigned numIdsToEliminate = dependenceDomain->getNumVars(); |
423 | // Add new variables to 'dependenceDomain' to represent the direction |
424 | // constraints for each shared loop. |
425 | dependenceDomain->insertVar(kind: VarKind::SetDim, /*pos=*/0, |
426 | /*num=*/numCommonLoops); |
427 | |
428 | // Add equality constraints for each common loop, setting newly introduced |
429 | // variable at column 'j' to the 'dst' IV minus the 'src IV. |
430 | SmallVector<int64_t, 4> eq; |
431 | eq.resize(N: dependenceDomain->getNumCols()); |
432 | unsigned numSrcDims = srcDomain.getNumDimVars(); |
433 | // Constraint variables format: |
434 | // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant] |
435 | for (unsigned j = 0; j < numCommonLoops; ++j) { |
436 | std::fill(first: eq.begin(), last: eq.end(), value: 0); |
437 | eq[j] = 1; |
438 | eq[j + numCommonLoops] = 1; |
439 | eq[j + numCommonLoops + numSrcDims] = -1; |
440 | dependenceDomain->addEquality(eq); |
441 | } |
442 | |
443 | // Eliminate all variables other than the direction variables just added. |
444 | dependenceDomain->projectOut(pos: numCommonLoops, num: numIdsToEliminate); |
445 | |
446 | // Scan each common loop variable column and set direction vectors based |
447 | // on eliminated constraint system. |
448 | dependenceComponents->resize(N: numCommonLoops); |
449 | for (unsigned j = 0; j < numCommonLoops; ++j) { |
450 | (*dependenceComponents)[j].op = commonLoops[j].getOperation(); |
451 | auto lbConst = dependenceDomain->getConstantBound64(type: BoundType::LB, pos: j); |
452 | (*dependenceComponents)[j].lb = |
453 | lbConst.value_or(u: std::numeric_limits<int64_t>::min()); |
454 | auto ubConst = dependenceDomain->getConstantBound64(type: BoundType::UB, pos: j); |
455 | (*dependenceComponents)[j].ub = |
456 | ubConst.value_or(u: std::numeric_limits<int64_t>::max()); |
457 | } |
458 | } |
459 | |
460 | LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const { |
461 | // Create set corresponding to domain of access. |
462 | FlatAffineValueConstraints domain; |
463 | if (failed(result: getOpIndexSet(op: opInst, indexSet: &domain))) |
464 | return failure(); |
465 | |
466 | // Get access relation from access map. |
467 | AffineValueMap accessValueMap; |
468 | getAccessMap(accessMap: &accessValueMap); |
469 | if (failed(result: getRelationFromMap(map: accessValueMap, rel))) |
470 | return failure(); |
471 | |
472 | // Merge and align domain ids of `rel` with ids of `domain`. Since the domain |
473 | // of the access map is a subset of the domain of access, the domain ids of |
474 | // `rel` are guranteed to be a subset of ids of `domain`. |
475 | unsigned inserts = 0; |
476 | for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) { |
477 | const Identifier domainIdi = Identifier(domain.getValue(pos: i)); |
478 | const Identifier *findBegin = rel.getIds(kind: VarKind::SetDim).begin() + i; |
479 | const Identifier *findEnd = rel.getIds(kind: VarKind::SetDim).end(); |
480 | const Identifier *itr = std::find(first: findBegin, last: findEnd, val: domainIdi); |
481 | if (itr != findEnd) { |
482 | rel.swapVar(posA: i, posB: i + std::distance(first: findBegin, last: itr)); |
483 | } else { |
484 | ++inserts; |
485 | rel.insertVar(kind: VarKind::SetDim, pos: i); |
486 | rel.setId(kind: VarKind::SetDim, i, id: domainIdi); |
487 | } |
488 | } |
489 | |
490 | // Append domain constraints to `rel`. |
491 | IntegerRelation domainRel = domain; |
492 | if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds()) |
493 | domainRel.resetIds(); |
494 | domainRel.appendVar(kind: VarKind::Range, num: accessValueMap.getNumResults()); |
495 | domainRel.mergeAndAlignSymbols(other&: rel); |
496 | domainRel.mergeLocalVars(other&: rel); |
497 | rel.append(other: domainRel); |
498 | |
499 | rel.convertVarKind(srcKind: VarKind::SetDim, varStart: 0, varLimit: accessValueMap.getNumDims() + inserts, |
500 | dstKind: VarKind::Domain); |
501 | |
502 | return success(); |
503 | } |
504 | |
505 | // Populates 'accessMap' with composition of AffineApplyOps reachable from |
506 | // indices of MemRefAccess. |
507 | void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { |
508 | // Get affine map from AffineLoad/Store. |
509 | AffineMap map; |
510 | if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst)) |
511 | map = loadOp.getAffineMap(); |
512 | else |
513 | map = cast<AffineWriteOpInterface>(opInst).getAffineMap(); |
514 | |
515 | SmallVector<Value, 8> operands(indices.begin(), indices.end()); |
516 | fullyComposeAffineMapAndOperands(map: &map, operands: &operands); |
517 | map = simplifyAffineMap(map); |
518 | canonicalizeMapAndOperands(map: &map, operands: &operands); |
519 | accessMap->reset(map, operands); |
520 | } |
521 | |
522 | // Builds a flat affine constraint system to check if there exists a dependence |
523 | // between memref accesses 'srcAccess' and 'dstAccess'. |
524 | // Returns 'NoDependence' if the accesses can be definitively shown not to |
525 | // access the same element. |
526 | // Returns 'HasDependence' if the accesses do access the same element. |
527 | // Returns 'Failure' if an error or unsupported case was encountered. |
528 | // If a dependence exists, returns in 'dependenceComponents' a direction |
529 | // vector for the dependence, with a component for each loop IV in loops |
530 | // common to both accesses (see Dependence in AffineAnalysis.h for details). |
531 | // |
532 | // The memref access dependence check is comprised of the following steps: |
533 | // *) Build access relation for each access. An access relation maps elements |
534 | // of an iteration domain to the element(s) of an array domain accessed by |
535 | // that iteration of the associated statement through some array reference. |
536 | // *) Compute the dependence relation by composing access relation of |
537 | // `srcAccess` with the inverse of access relation of `dstAccess`. |
538 | // Doing this builds a relation between iteration domain of `srcAccess` |
539 | // to the iteration domain of `dstAccess` which access the same memory |
540 | // location. |
541 | // *) Add ordering constraints for `srcAccess` to be accessed before |
542 | // `dstAccess`. |
543 | // |
544 | // This method builds a constraint system with the following column format: |
545 | // |
546 | // [src-dim-variables, dst-dim-variables, symbols, constant] |
547 | // |
548 | // For example, given the following MLIR code with "source" and "destination" |
549 | // accesses to the same memref label, and symbols %M, %N, %K: |
550 | // |
551 | // affine.for %i0 = 0 to 100 { |
552 | // affine.for %i1 = 0 to 50 { |
553 | // %a0 = affine.apply |
554 | // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N] |
555 | // // Source memref access. |
556 | // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32> |
557 | // } |
558 | // } |
559 | // |
560 | // affine.for %i2 = 0 to 100 { |
561 | // affine.for %i3 = 0 to 50 { |
562 | // %a1 = affine.apply |
563 | // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M] |
564 | // // Destination memref access. |
565 | // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32> |
566 | // } |
567 | // } |
568 | // |
569 | // The access relation for `srcAccess` would be the following: |
570 | // |
571 | // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const] |
572 | // 2 -4 -1 0 1 0 0 = 0 |
573 | // 0 3 0 -1 0 -1 0 = 0 |
574 | // 1 0 0 0 0 0 0 >= 0 |
575 | // -1 0 0 0 0 0 100 >= 0 |
576 | // 0 1 0 0 0 0 0 >= 0 |
577 | // 0 -1 0 0 0 0 50 >= 0 |
578 | // |
579 | // The access relation for `dstAccess` would be the following: |
580 | // |
581 | // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const] |
582 | // 7 9 -1 0 -1 0 0 = 0 |
583 | // 0 11 0 -1 0 -1 0 = 0 |
584 | // 1 0 0 0 0 0 0 >= 0 |
585 | // -1 0 0 0 0 0 100 >= 0 |
586 | // 0 1 0 0 0 0 0 >= 0 |
587 | // 0 -1 0 0 0 0 50 >= 0 |
588 | // |
589 | // The equalities in the above relations correspond to the access maps while |
590 | // the inequalities corresspond to the iteration domain constraints. |
591 | // |
592 | // The dependence relation formed: |
593 | // |
594 | // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const] |
595 | // 2 -4 -7 -9 1 1 0 0 = 0 |
596 | // 0 3 0 -11 -1 0 1 0 = 0 |
597 | // 1 0 0 0 0 0 0 0 >= 0 |
598 | // -1 0 0 0 0 0 0 100 >= 0 |
599 | // 0 1 0 0 0 0 0 0 >= 0 |
600 | // 0 -1 0 0 0 0 0 50 >= 0 |
601 | // 0 0 1 0 0 0 0 0 >= 0 |
602 | // 0 0 -1 0 0 0 0 100 >= 0 |
603 | // 0 0 0 1 0 0 0 0 >= 0 |
604 | // 0 0 0 -1 0 0 0 50 >= 0 |
605 | // |
606 | // |
607 | // TODO: Support AffineExprs mod/floordiv/ceildiv. |
608 | DependenceResult mlir::affine::checkMemrefAccessDependence( |
609 | const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, |
610 | unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, |
611 | SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) { |
612 | LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: " |
613 | << Twine(loopDepth) << " between:\n" ;); |
614 | LLVM_DEBUG(srcAccess.opInst->dump()); |
615 | LLVM_DEBUG(dstAccess.opInst->dump()); |
616 | |
617 | // Return 'NoDependence' if these accesses do not access the same memref. |
618 | if (srcAccess.memref != dstAccess.memref) |
619 | return DependenceResult::NoDependence; |
620 | |
621 | // Return 'NoDependence' if one of these accesses is not an |
622 | // AffineWriteOpInterface. |
623 | if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) && |
624 | !isa<AffineWriteOpInterface>(dstAccess.opInst)) |
625 | return DependenceResult::NoDependence; |
626 | |
627 | // We can't analyze further if the ops lie in different affine scopes or have |
628 | // no common block in an affine scope. |
629 | if (getAffineScope(op: srcAccess.opInst) != getAffineScope(op: dstAccess.opInst)) |
630 | return DependenceResult::Failure; |
631 | if (!getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst)) |
632 | return DependenceResult::Failure; |
633 | |
634 | // Create access relation from each MemRefAccess. |
635 | PresburgerSpace space = PresburgerSpace::getRelationSpace(); |
636 | IntegerRelation srcRel(space), dstRel(space); |
637 | if (failed(result: srcAccess.getAccessRelation(rel&: srcRel))) |
638 | return DependenceResult::Failure; |
639 | if (failed(result: dstAccess.getAccessRelation(rel&: dstRel))) |
640 | return DependenceResult::Failure; |
641 | |
642 | FlatAffineValueConstraints srcDomain(srcRel.getDomainSet()); |
643 | FlatAffineValueConstraints dstDomain(dstRel.getDomainSet()); |
644 | |
645 | // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor |
646 | // operation of 'srcAccess' does not properly dominate the ancestor |
647 | // operation of 'dstAccess' in the same common operation block. |
648 | // Note: this check is skipped if 'allowRAR' is true, because RAR deps |
649 | // can exist irrespective of lexicographic ordering b/w src and dst. |
650 | unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); |
651 | assert(loopDepth <= numCommonLoops + 1); |
652 | if (!allowRAR && loopDepth > numCommonLoops && |
653 | !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) { |
654 | return DependenceResult::NoDependence; |
655 | } |
656 | |
657 | // Compute the dependence relation by composing `srcRel` with the inverse of |
658 | // `dstRel`. Doing this builds a relation between iteration domain of |
659 | // `srcAccess` to the iteration domain of `dstAccess` which access the same |
660 | // memory locations. |
661 | dstRel.inverse(); |
662 | dstRel.mergeAndCompose(other: srcRel); |
663 | dstRel.convertVarKind(srcKind: VarKind::Domain, varStart: 0, varLimit: dstRel.getNumDomainVars(), |
664 | dstKind: VarKind::Range, pos: 0); |
665 | IntegerPolyhedron dependenceDomain(dstRel); |
666 | |
667 | // Add 'src' happens before 'dst' ordering constraints. |
668 | addOrderingConstraints(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain); |
669 | |
670 | // Return 'NoDependence' if the solution space is empty: no dependence. |
671 | if (dependenceDomain.isEmpty()) |
672 | return DependenceResult::NoDependence; |
673 | |
674 | // Compute dependence direction vector and return true. |
675 | if (dependenceComponents != nullptr) |
676 | computeDirectionVector(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain, |
677 | dependenceComponents); |
678 | |
679 | LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n" ); |
680 | LLVM_DEBUG(dependenceDomain.dump()); |
681 | |
682 | FlatAffineValueConstraints result(dependenceDomain); |
683 | if (dependenceConstraints) |
684 | *dependenceConstraints = result; |
685 | return DependenceResult::HasDependence; |
686 | } |
687 | |
688 | /// Gathers dependence components for dependences between all ops in loop nest |
689 | /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth]. |
690 | void mlir::affine::getDependenceComponents( |
691 | AffineForOp forOp, unsigned maxLoopDepth, |
692 | std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) { |
693 | // Collect all load and store ops in loop nest rooted at 'forOp'. |
694 | SmallVector<Operation *, 8> loadAndStoreOps; |
695 | forOp->walk([&](Operation *op) { |
696 | if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) |
697 | loadAndStoreOps.push_back(Elt: op); |
698 | }); |
699 | |
700 | unsigned numOps = loadAndStoreOps.size(); |
701 | for (unsigned d = 1; d <= maxLoopDepth; ++d) { |
702 | for (unsigned i = 0; i < numOps; ++i) { |
703 | auto *srcOp = loadAndStoreOps[i]; |
704 | MemRefAccess srcAccess(srcOp); |
705 | for (unsigned j = 0; j < numOps; ++j) { |
706 | auto *dstOp = loadAndStoreOps[j]; |
707 | MemRefAccess dstAccess(dstOp); |
708 | |
709 | SmallVector<DependenceComponent, 2> depComps; |
710 | // TODO: Explore whether it would be profitable to pre-compute and store |
711 | // deps instead of repeatedly checking. |
712 | DependenceResult result = checkMemrefAccessDependence( |
713 | srcAccess, dstAccess, loopDepth: d, /*dependenceConstraints=*/nullptr, |
714 | dependenceComponents: &depComps); |
715 | if (hasDependence(result)) |
716 | depCompsVec->push_back(x: depComps); |
717 | } |
718 | } |
719 | } |
720 | } |
721 | |