| 1 | //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements miscellaneous analysis routines for affine structures |
| 10 | // (expressions, maps, sets), and other utilities relying on such analysis. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" |
| 15 | #include "mlir/Analysis/Presburger/IntegerRelation.h" |
| 16 | #include "mlir/Analysis/Presburger/PresburgerSpace.h" |
| 17 | #include "mlir/Analysis/SliceAnalysis.h" |
| 18 | #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" |
| 19 | #include "mlir/Dialect/Affine/Analysis/Utils.h" |
| 20 | #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| 21 | #include "mlir/Dialect/Affine/IR/AffineValueMap.h" |
| 22 | #include "mlir/IR/AffineExprVisitor.h" |
| 23 | #include "mlir/IR/BuiltinOps.h" |
| 24 | #include "mlir/IR/IntegerSet.h" |
| 25 | #include "mlir/Interfaces/SideEffectInterfaces.h" |
| 26 | #include "mlir/Interfaces/ViewLikeInterface.h" |
| 27 | #include "llvm/ADT/TypeSwitch.h" |
| 28 | #include "llvm/Support/Debug.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | #include <optional> |
| 31 | |
| 32 | #define DEBUG_TYPE "affine-analysis" |
| 33 | |
| 34 | using namespace mlir; |
| 35 | using namespace affine; |
| 36 | using namespace presburger; |
| 37 | |
| 38 | /// Get the value that is being reduced by `pos`-th reduction in the loop if |
| 39 | /// such a reduction can be performed by affine parallel loops. This assumes |
| 40 | /// floating-point operations are commutative. On success, `kind` will be the |
| 41 | /// reduction kind suitable for use in affine parallel loop builder. If the |
| 42 | /// reduction is not supported, returns null. |
| 43 | static Value getSupportedReduction(AffineForOp forOp, unsigned pos, |
| 44 | arith::AtomicRMWKind &kind) { |
| 45 | SmallVector<Operation *> combinerOps; |
| 46 | Value reducedVal = |
| 47 | matchReduction(forOp.getRegionIterArgs(), pos, combinerOps); |
| 48 | if (!reducedVal) |
| 49 | return nullptr; |
| 50 | |
| 51 | // Expected only one combiner operation. |
| 52 | if (combinerOps.size() > 1) |
| 53 | return nullptr; |
| 54 | |
| 55 | Operation *combinerOp = combinerOps.back(); |
| 56 | std::optional<arith::AtomicRMWKind> maybeKind = |
| 57 | TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp) |
| 58 | .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; }) |
| 59 | .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; }) |
| 60 | .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; }) |
| 61 | .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; }) |
| 62 | .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; }) |
| 63 | .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; }) |
| 64 | .Case( |
| 65 | [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; }) |
| 66 | .Case( |
| 67 | [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; }) |
| 68 | .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; }) |
| 69 | .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; }) |
| 70 | .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; }) |
| 71 | .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; }) |
| 72 | .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> { |
| 73 | // TODO: AtomicRMW supports other kinds of reductions this is |
| 74 | // currently not detecting, add those when the need arises. |
| 75 | return std::nullopt; |
| 76 | }); |
| 77 | if (!maybeKind) |
| 78 | return nullptr; |
| 79 | |
| 80 | kind = *maybeKind; |
| 81 | return reducedVal; |
| 82 | } |
| 83 | |
| 84 | /// Populate `supportedReductions` with descriptors of the supported reductions. |
| 85 | void mlir::affine::getSupportedReductions( |
| 86 | AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) { |
| 87 | unsigned numIterArgs = forOp.getNumIterOperands(); |
| 88 | if (numIterArgs == 0) |
| 89 | return; |
| 90 | supportedReductions.reserve(N: numIterArgs); |
| 91 | for (unsigned i = 0; i < numIterArgs; ++i) { |
| 92 | arith::AtomicRMWKind kind; |
| 93 | if (Value value = getSupportedReduction(forOp, i, kind)) |
| 94 | supportedReductions.emplace_back(Args: LoopReduction{kind, i, value}); |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is |
| 99 | /// provided, populates it with descriptors of the parallelizable reductions and |
| 100 | /// treats them as not preventing parallelization. |
| 101 | bool mlir::affine::isLoopParallel( |
| 102 | AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) { |
| 103 | unsigned numIterArgs = forOp.getNumIterOperands(); |
| 104 | |
| 105 | // Loop is not parallel if it has SSA loop-carried dependences and reduction |
| 106 | // detection is not requested. |
| 107 | if (numIterArgs > 0 && !parallelReductions) |
| 108 | return false; |
| 109 | |
| 110 | // Find supported reductions of requested. |
| 111 | if (parallelReductions) { |
| 112 | getSupportedReductions(forOp, *parallelReductions); |
| 113 | // Return later to allow for identifying all parallel reductions even if the |
| 114 | // loop is not parallel. |
| 115 | if (parallelReductions->size() != numIterArgs) |
| 116 | return false; |
| 117 | } |
| 118 | |
| 119 | // Check memory dependences. |
| 120 | return isLoopMemoryParallel(forOp); |
| 121 | } |
| 122 | |
| 123 | /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is |
| 124 | /// allocated by an operation nested within `enclosingOp`. |
| 125 | static bool isLocallyDefined(Value v, Operation *enclosingOp) { |
| 126 | Operation *defOp = v.getDefiningOp(); |
| 127 | if (!defOp) |
| 128 | return false; |
| 129 | |
| 130 | if (hasSingleEffect<MemoryEffects::Allocate>(op: defOp, value: v) && |
| 131 | enclosingOp->isProperAncestor(other: defOp)) |
| 132 | return true; |
| 133 | |
| 134 | // Aliasing ops. |
| 135 | auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp); |
| 136 | return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp); |
| 137 | } |
| 138 | |
| 139 | bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) { |
| 140 | // Any memref-typed iteration arguments are treated as serializing. |
| 141 | if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>)) |
| 142 | return false; |
| 143 | |
| 144 | // Collect all load and store ops in loop nest rooted at 'forOp'. |
| 145 | SmallVector<Operation *, 8> loadAndStoreOps; |
| 146 | auto walkResult = forOp.walk([&](Operation *op) -> WalkResult { |
| 147 | if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) { |
| 148 | // Memrefs that are allocated inside `forOp` need not be considered. |
| 149 | if (!isLocallyDefined(readOp.getMemRef(), forOp)) |
| 150 | loadAndStoreOps.push_back(Elt: op); |
| 151 | } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) { |
| 152 | // Filter out stores the same way as above. |
| 153 | if (!isLocallyDefined(writeOp.getMemRef(), forOp)) |
| 154 | loadAndStoreOps.push_back(Elt: op); |
| 155 | } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) && |
| 156 | !hasSingleEffect<MemoryEffects::Allocate>(op) && |
| 157 | !isMemoryEffectFree(op)) { |
| 158 | // Alloc-like ops inside `forOp` are fine (they don't impact parallelism) |
| 159 | // as long as they don't escape the loop (which has been checked above). |
| 160 | return WalkResult::interrupt(); |
| 161 | } |
| 162 | |
| 163 | return WalkResult::advance(); |
| 164 | }); |
| 165 | |
| 166 | // Stop early if the loop has unknown ops with side effects. |
| 167 | if (walkResult.wasInterrupted()) |
| 168 | return false; |
| 169 | |
| 170 | // Dep check depth would be number of enclosing loops + 1. |
| 171 | unsigned depth = getNestingDepth(forOp) + 1; |
| 172 | |
| 173 | // Check dependences between all pairs of ops in 'loadAndStoreOps'. |
| 174 | for (auto *srcOp : loadAndStoreOps) { |
| 175 | MemRefAccess srcAccess(srcOp); |
| 176 | for (auto *dstOp : loadAndStoreOps) { |
| 177 | MemRefAccess dstAccess(dstOp); |
| 178 | DependenceResult result = |
| 179 | checkMemrefAccessDependence(srcAccess, dstAccess, loopDepth: depth); |
| 180 | if (result.value != DependenceResult::NoDependence) |
| 181 | return false; |
| 182 | } |
| 183 | } |
| 184 | return true; |
| 185 | } |
| 186 | |
| 187 | /// Returns the sequence of AffineApplyOp Operations operation in |
| 188 | /// 'affineApplyOps', which are reachable via a search starting from 'operands', |
| 189 | /// and ending at operands which are not defined by AffineApplyOps. |
| 190 | // TODO: Add a method to AffineApplyOp which forward substitutes the |
| 191 | // AffineApplyOp into any user AffineApplyOps. |
| 192 | void mlir::affine::getReachableAffineApplyOps( |
| 193 | ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) { |
| 194 | struct State { |
| 195 | // The ssa value for this node in the DFS traversal. |
| 196 | Value value; |
| 197 | // The operand index of 'value' to explore next during DFS traversal. |
| 198 | unsigned operandIndex; |
| 199 | }; |
| 200 | SmallVector<State, 4> worklist; |
| 201 | for (auto operand : operands) { |
| 202 | worklist.push_back(Elt: {.value: operand, .operandIndex: 0}); |
| 203 | } |
| 204 | |
| 205 | while (!worklist.empty()) { |
| 206 | State &state = worklist.back(); |
| 207 | auto *opInst = state.value.getDefiningOp(); |
| 208 | // Note: getDefiningOp will return nullptr if the operand is not an |
| 209 | // Operation (i.e. block argument), which is a terminator for the search. |
| 210 | if (!isa_and_nonnull<AffineApplyOp>(Val: opInst)) { |
| 211 | worklist.pop_back(); |
| 212 | continue; |
| 213 | } |
| 214 | |
| 215 | if (state.operandIndex == 0) { |
| 216 | // Pre-Visit: Add 'opInst' to reachable sequence. |
| 217 | affineApplyOps.push_back(Elt: opInst); |
| 218 | } |
| 219 | if (state.operandIndex < opInst->getNumOperands()) { |
| 220 | // Visit: Add next 'affineApplyOp' operand to worklist. |
| 221 | // Get next operand to visit at 'operandIndex'. |
| 222 | auto nextOperand = opInst->getOperand(idx: state.operandIndex); |
| 223 | // Increment 'operandIndex' in 'state'. |
| 224 | ++state.operandIndex; |
| 225 | // Add 'nextOperand' to worklist. |
| 226 | worklist.push_back(Elt: {.value: nextOperand, .operandIndex: 0}); |
| 227 | } else { |
| 228 | // Post-visit: done visiting operands AffineApplyOp, pop off stack. |
| 229 | worklist.pop_back(); |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | // Builds a system of constraints with dimensional variables corresponding to |
| 235 | // the loop IVs of the forOps appearing in that order. Any symbols founds in |
| 236 | // the bound operands are added as symbols in the system. Returns failure for |
| 237 | // the yet unimplemented cases. |
| 238 | // TODO: Handle non-unit steps through local variables or stride information in |
| 239 | // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by |
| 240 | // introducing a method in FlatAffineValueConstraints |
| 241 | // setExprStride(ArrayRef<int64_t> expr, int64_t stride) |
| 242 | LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops, |
| 243 | FlatAffineValueConstraints *domain) { |
| 244 | SmallVector<Value, 4> indices; |
| 245 | SmallVector<Operation *, 8> loopOps; |
| 246 | size_t numDims = 0; |
| 247 | for (Operation *op : ops) { |
| 248 | if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(Val: op)) { |
| 249 | LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/" |
| 250 | "parallel ops" ); |
| 251 | return failure(); |
| 252 | } |
| 253 | if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { |
| 254 | loopOps.push_back(Elt: forOp); |
| 255 | // An AffineForOp retains only 1 induction variable. |
| 256 | numDims += 1; |
| 257 | } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) { |
| 258 | loopOps.push_back(Elt: parallelOp); |
| 259 | numDims += parallelOp.getNumDims(); |
| 260 | } |
| 261 | } |
| 262 | extractInductionVars(affineOps: loopOps, ivs&: indices); |
| 263 | // Reset while associating Values in 'indices' to the domain. |
| 264 | *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0, |
| 265 | /*numLocals=*/0, indices); |
| 266 | for (Operation *op : ops) { |
| 267 | // Add constraints from forOp's bounds. |
| 268 | if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { |
| 269 | if (failed(domain->addAffineForOpDomain(forOp: forOp))) |
| 270 | return failure(); |
| 271 | } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) { |
| 272 | domain->addAffineIfOpDomain(ifOp: ifOp); |
| 273 | } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op)) |
| 274 | if (failed(domain->addAffineParallelOpDomain(parallelOp: parallelOp))) |
| 275 | return failure(); |
| 276 | } |
| 277 | return success(); |
| 278 | } |
| 279 | |
| 280 | /// Computes the iteration domain for 'op' and populates 'indexSet', which |
| 281 | /// encapsulates the constraints involving loops surrounding 'op' and |
| 282 | /// potentially involving any Function symbols. The dimensional variables in |
| 283 | /// 'indexSet' correspond to the loops surrounding 'op' from outermost to |
| 284 | /// innermost. |
| 285 | static LogicalResult getOpIndexSet(Operation *op, |
| 286 | FlatAffineValueConstraints *indexSet) { |
| 287 | SmallVector<Operation *, 4> ops; |
| 288 | getEnclosingAffineOps(op&: *op, ops: &ops); |
| 289 | return getIndexSet(ops, domain: indexSet); |
| 290 | } |
| 291 | |
| 292 | // Returns the number of outer loop common to 'src/dstDomain'. |
| 293 | // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null. |
| 294 | static unsigned |
| 295 | getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, |
| 296 | const FlatAffineValueConstraints &dstDomain, |
| 297 | SmallVectorImpl<AffineForOp> *commonLoops = nullptr) { |
| 298 | // Find the number of common loops shared by src and dst accesses. |
| 299 | unsigned minNumLoops = |
| 300 | std::min(a: srcDomain.getNumDimVars(), b: dstDomain.getNumDimVars()); |
| 301 | unsigned numCommonLoops = 0; |
| 302 | for (unsigned i = 0; i < minNumLoops; ++i) { |
| 303 | if ((!isAffineForInductionVar(val: srcDomain.getValue(pos: i)) && |
| 304 | !isAffineParallelInductionVar(val: srcDomain.getValue(pos: i))) || |
| 305 | (!isAffineForInductionVar(val: dstDomain.getValue(pos: i)) && |
| 306 | !isAffineParallelInductionVar(val: dstDomain.getValue(pos: i))) || |
| 307 | srcDomain.getValue(pos: i) != dstDomain.getValue(pos: i)) |
| 308 | break; |
| 309 | if (commonLoops != nullptr) |
| 310 | commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(pos: i))); |
| 311 | ++numCommonLoops; |
| 312 | } |
| 313 | if (commonLoops != nullptr) |
| 314 | assert(commonLoops->size() == numCommonLoops); |
| 315 | return numCommonLoops; |
| 316 | } |
| 317 | |
| 318 | /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and |
| 319 | /// `opB` should be in the same affine scope. Returns nullptr if such a block |
| 320 | /// does not exist (when the two ops are in different blocks of an op starting |
| 321 | /// an `AffineScope`). |
| 322 | static Block *getCommonBlockInAffineScope(Operation *opA, Operation *opB) { |
| 323 | // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The |
| 324 | // chain extends up to and includnig an op that starts an affine scope. |
| 325 | auto getChainOfAncestorBlocks = |
| 326 | [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) { |
| 327 | Block *currBlock = op->getBlock(); |
| 328 | // Loop terminates when the currBlock is nullptr or its parent operation |
| 329 | // holds an affine scope. |
| 330 | while (currBlock && |
| 331 | !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) { |
| 332 | ancestorBlocks.push_back(Elt: currBlock); |
| 333 | currBlock = currBlock->getParentOp()->getBlock(); |
| 334 | } |
| 335 | assert(currBlock && |
| 336 | "parent op starting an affine scope is always expected" ); |
| 337 | ancestorBlocks.push_back(Elt: currBlock); |
| 338 | }; |
| 339 | |
| 340 | // Find the closest common block. |
| 341 | SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks; |
| 342 | getChainOfAncestorBlocks(opA, srcAncestorBlocks); |
| 343 | getChainOfAncestorBlocks(opB, dstAncestorBlocks); |
| 344 | |
| 345 | Block *commonBlock = nullptr; |
| 346 | for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1; |
| 347 | i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j]; |
| 348 | i--, j--) |
| 349 | commonBlock = srcAncestorBlocks[i]; |
| 350 | |
| 351 | return commonBlock; |
| 352 | } |
| 353 | |
| 354 | /// Returns true if the ancestor operation of 'srcAccess' appears before the |
| 355 | /// ancestor operation of 'dstAccess' in their common ancestral block. The |
| 356 | /// operations for `srcAccess` and `dstAccess` are expected to be in the same |
| 357 | /// affine scope and have a common surrounding block within it. |
| 358 | static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, |
| 359 | const MemRefAccess &dstAccess) { |
| 360 | // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. |
| 361 | Block *commonBlock = |
| 362 | getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst); |
| 363 | assert(commonBlock && |
| 364 | "ops expected to have a common surrounding block in affine scope" ); |
| 365 | |
| 366 | // Check the dominance relationship between the respective ancestors of the |
| 367 | // src and dst in the Block of the innermost among the common loops. |
| 368 | Operation *srcOp = commonBlock->findAncestorOpInBlock(op&: *srcAccess.opInst); |
| 369 | assert(srcOp && "src access op must lie in common block" ); |
| 370 | Operation *dstOp = commonBlock->findAncestorOpInBlock(op&: *dstAccess.opInst); |
| 371 | assert(dstOp && "dest access op must lie in common block" ); |
| 372 | |
| 373 | // Determine whether dstOp comes after srcOp. |
| 374 | return srcOp->isBeforeInBlock(other: dstOp); |
| 375 | } |
| 376 | |
| 377 | // Adds ordering constraints to 'dependenceDomain' based on number of loops |
| 378 | // common to 'src/dstDomain' and requested 'loopDepth'. |
| 379 | // Note that 'loopDepth' cannot exceed the number of common loops plus one. |
| 380 | // EX: Given a loop nest of depth 2 with IVs 'i' and 'j': |
| 381 | // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1 |
| 382 | // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1 |
| 383 | // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j' |
| 384 | static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, |
| 385 | const FlatAffineValueConstraints &dstDomain, |
| 386 | unsigned loopDepth, |
| 387 | IntegerRelation *dependenceDomain) { |
| 388 | unsigned numCols = dependenceDomain->getNumCols(); |
| 389 | SmallVector<int64_t, 4> eq(numCols); |
| 390 | unsigned numSrcDims = srcDomain.getNumDimVars(); |
| 391 | unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); |
| 392 | unsigned numCommonLoopConstraints = std::min(a: numCommonLoops, b: loopDepth); |
| 393 | for (unsigned i = 0; i < numCommonLoopConstraints; ++i) { |
| 394 | std::fill(first: eq.begin(), last: eq.end(), value: 0); |
| 395 | eq[i] = -1; |
| 396 | eq[i + numSrcDims] = 1; |
| 397 | if (i == loopDepth - 1) { |
| 398 | eq[numCols - 1] = -1; |
| 399 | dependenceDomain->addInequality(inEq: eq); |
| 400 | } else { |
| 401 | dependenceDomain->addEquality(eq); |
| 402 | } |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | // Computes distance and direction vectors in 'dependences', by adding |
| 407 | // variables to 'dependenceDomain' which represent the difference of the IVs, |
| 408 | // eliminating all other variables, and reading off distance vectors from |
| 409 | // equality constraints (if possible), and direction vectors from inequalities. |
| 410 | static void computeDirectionVector( |
| 411 | const FlatAffineValueConstraints &srcDomain, |
| 412 | const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, |
| 413 | IntegerPolyhedron *dependenceDomain, |
| 414 | SmallVector<DependenceComponent, 2> *dependenceComponents) { |
| 415 | // Find the number of common loops shared by src and dst accesses. |
| 416 | SmallVector<AffineForOp, 4> commonLoops; |
| 417 | unsigned numCommonLoops = |
| 418 | getNumCommonLoops(srcDomain, dstDomain, &commonLoops); |
| 419 | if (numCommonLoops == 0) |
| 420 | return; |
| 421 | // Compute direction vectors for requested loop depth. |
| 422 | unsigned numIdsToEliminate = dependenceDomain->getNumVars(); |
| 423 | // Add new variables to 'dependenceDomain' to represent the direction |
| 424 | // constraints for each shared loop. |
| 425 | dependenceDomain->insertVar(kind: VarKind::SetDim, /*pos=*/0, |
| 426 | /*num=*/numCommonLoops); |
| 427 | |
| 428 | // Add equality constraints for each common loop, setting newly introduced |
| 429 | // variable at column 'j' to the 'dst' IV minus the 'src IV. |
| 430 | SmallVector<int64_t, 4> eq; |
| 431 | eq.resize(N: dependenceDomain->getNumCols()); |
| 432 | unsigned numSrcDims = srcDomain.getNumDimVars(); |
| 433 | // Constraint variables format: |
| 434 | // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant] |
| 435 | for (unsigned j = 0; j < numCommonLoops; ++j) { |
| 436 | std::fill(first: eq.begin(), last: eq.end(), value: 0); |
| 437 | eq[j] = 1; |
| 438 | eq[j + numCommonLoops] = 1; |
| 439 | eq[j + numCommonLoops + numSrcDims] = -1; |
| 440 | dependenceDomain->addEquality(eq); |
| 441 | } |
| 442 | |
| 443 | // Eliminate all variables other than the direction variables just added. |
| 444 | dependenceDomain->projectOut(pos: numCommonLoops, num: numIdsToEliminate); |
| 445 | |
| 446 | // Scan each common loop variable column and set direction vectors based |
| 447 | // on eliminated constraint system. |
| 448 | dependenceComponents->resize(N: numCommonLoops); |
| 449 | for (unsigned j = 0; j < numCommonLoops; ++j) { |
| 450 | (*dependenceComponents)[j].op = commonLoops[j].getOperation(); |
| 451 | auto lbConst = dependenceDomain->getConstantBound64(type: BoundType::LB, pos: j); |
| 452 | (*dependenceComponents)[j].lb = |
| 453 | lbConst.value_or(u: std::numeric_limits<int64_t>::min()); |
| 454 | auto ubConst = dependenceDomain->getConstantBound64(type: BoundType::UB, pos: j); |
| 455 | (*dependenceComponents)[j].ub = |
| 456 | ubConst.value_or(u: std::numeric_limits<int64_t>::max()); |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const { |
| 461 | // Create set corresponding to domain of access. |
| 462 | FlatAffineValueConstraints domain; |
| 463 | if (failed(Result: getOpIndexSet(op: opInst, indexSet: &domain))) |
| 464 | return failure(); |
| 465 | |
| 466 | // Get access relation from access map. |
| 467 | AffineValueMap accessValueMap; |
| 468 | getAccessMap(accessMap: &accessValueMap); |
| 469 | if (failed(Result: getRelationFromMap(map: accessValueMap, rel))) |
| 470 | return failure(); |
| 471 | |
| 472 | // Merge and align domain ids of `rel` with ids of `domain`. Since the domain |
| 473 | // of the access map is a subset of the domain of access, the domain ids of |
| 474 | // `rel` are guranteed to be a subset of ids of `domain`. |
| 475 | unsigned inserts = 0; |
| 476 | for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) { |
| 477 | const Identifier domainIdi = Identifier(domain.getValue(pos: i)); |
| 478 | const Identifier *findBegin = rel.getIds(kind: VarKind::SetDim).begin() + i; |
| 479 | const Identifier *findEnd = rel.getIds(kind: VarKind::SetDim).end(); |
| 480 | const Identifier *itr = std::find(first: findBegin, last: findEnd, val: domainIdi); |
| 481 | if (itr != findEnd) { |
| 482 | rel.swapVar(posA: i, posB: i + std::distance(first: findBegin, last: itr)); |
| 483 | } else { |
| 484 | ++inserts; |
| 485 | rel.insertVar(kind: VarKind::SetDim, pos: i); |
| 486 | rel.setId(kind: VarKind::SetDim, i, id: domainIdi); |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | // Append domain constraints to `rel`. |
| 491 | IntegerRelation domainRel = domain; |
| 492 | // For 0-d spaces, there will be no IDs. Enable if that's the case. |
| 493 | if (!domainRel.getSpace().isUsingIds()) |
| 494 | domainRel.resetIds(); |
| 495 | if (!rel.getSpace().isUsingIds()) |
| 496 | rel.resetIds(); |
| 497 | domainRel.appendVar(kind: VarKind::Range, num: accessValueMap.getNumResults()); |
| 498 | domainRel.mergeAndAlignSymbols(other&: rel); |
| 499 | domainRel.mergeLocalVars(other&: rel); |
| 500 | rel.append(other: domainRel); |
| 501 | |
| 502 | rel.convertVarKind(srcKind: VarKind::SetDim, varStart: 0, varLimit: accessValueMap.getNumDims() + inserts, |
| 503 | dstKind: VarKind::Domain); |
| 504 | |
| 505 | return success(); |
| 506 | } |
| 507 | |
| 508 | // Populates 'accessMap' with composition of AffineApplyOps reachable from |
| 509 | // indices of MemRefAccess. |
| 510 | void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { |
| 511 | // Get affine map from AffineLoad/Store. |
| 512 | AffineMap map; |
| 513 | if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst)) |
| 514 | map = loadOp.getAffineMap(); |
| 515 | else |
| 516 | map = cast<AffineWriteOpInterface>(opInst).getAffineMap(); |
| 517 | |
| 518 | SmallVector<Value, 8> operands(indices.begin(), indices.end()); |
| 519 | fullyComposeAffineMapAndOperands(map: &map, operands: &operands); |
| 520 | map = simplifyAffineMap(map); |
| 521 | canonicalizeMapAndOperands(map: &map, operands: &operands); |
| 522 | accessMap->reset(map, operands); |
| 523 | } |
| 524 | |
| 525 | // Builds a flat affine constraint system to check if there exists a dependence |
| 526 | // between memref accesses 'srcAccess' and 'dstAccess'. |
| 527 | // Returns 'NoDependence' if the accesses can be definitively shown not to |
| 528 | // access the same element. |
| 529 | // Returns 'HasDependence' if the accesses do access the same element. |
| 530 | // Returns 'Failure' if an error or unsupported case was encountered. |
| 531 | // If a dependence exists, returns in 'dependenceComponents' a direction |
| 532 | // vector for the dependence, with a component for each loop IV in loops |
| 533 | // common to both accesses (see Dependence in AffineAnalysis.h for details). |
| 534 | // |
| 535 | // The memref access dependence check is comprised of the following steps: |
| 536 | // *) Build access relation for each access. An access relation maps elements |
| 537 | // of an iteration domain to the element(s) of an array domain accessed by |
| 538 | // that iteration of the associated statement through some array reference. |
| 539 | // *) Compute the dependence relation by composing access relation of |
| 540 | // `srcAccess` with the inverse of access relation of `dstAccess`. |
| 541 | // Doing this builds a relation between iteration domain of `srcAccess` |
| 542 | // to the iteration domain of `dstAccess` which access the same memory |
| 543 | // location. |
| 544 | // *) Add ordering constraints for `srcAccess` to be accessed before |
| 545 | // `dstAccess`. |
| 546 | // |
| 547 | // This method builds a constraint system with the following column format: |
| 548 | // |
| 549 | // [src-dim-variables, dst-dim-variables, symbols, constant] |
| 550 | // |
| 551 | // For example, given the following MLIR code with "source" and "destination" |
| 552 | // accesses to the same memref label, and symbols %M, %N, %K: |
| 553 | // |
| 554 | // affine.for %i0 = 0 to 100 { |
| 555 | // affine.for %i1 = 0 to 50 { |
| 556 | // %a0 = affine.apply |
| 557 | // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N] |
| 558 | // // Source memref access. |
| 559 | // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32> |
| 560 | // } |
| 561 | // } |
| 562 | // |
| 563 | // affine.for %i2 = 0 to 100 { |
| 564 | // affine.for %i3 = 0 to 50 { |
| 565 | // %a1 = affine.apply |
| 566 | // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M] |
| 567 | // // Destination memref access. |
| 568 | // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32> |
| 569 | // } |
| 570 | // } |
| 571 | // |
| 572 | // The access relation for `srcAccess` would be the following: |
| 573 | // |
| 574 | // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const] |
| 575 | // 2 -4 -1 0 1 0 0 = 0 |
| 576 | // 0 3 0 -1 0 -1 0 = 0 |
| 577 | // 1 0 0 0 0 0 0 >= 0 |
| 578 | // -1 0 0 0 0 0 100 >= 0 |
| 579 | // 0 1 0 0 0 0 0 >= 0 |
| 580 | // 0 -1 0 0 0 0 50 >= 0 |
| 581 | // |
| 582 | // The access relation for `dstAccess` would be the following: |
| 583 | // |
| 584 | // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const] |
| 585 | // 7 9 -1 0 -1 0 0 = 0 |
| 586 | // 0 11 0 -1 0 -1 0 = 0 |
| 587 | // 1 0 0 0 0 0 0 >= 0 |
| 588 | // -1 0 0 0 0 0 100 >= 0 |
| 589 | // 0 1 0 0 0 0 0 >= 0 |
| 590 | // 0 -1 0 0 0 0 50 >= 0 |
| 591 | // |
| 592 | // The equalities in the above relations correspond to the access maps while |
| 593 | // the inequalities corresspond to the iteration domain constraints. |
| 594 | // |
| 595 | // The dependence relation formed: |
| 596 | // |
| 597 | // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const] |
| 598 | // 2 -4 -7 -9 1 1 0 0 = 0 |
| 599 | // 0 3 0 -11 -1 0 1 0 = 0 |
| 600 | // 1 0 0 0 0 0 0 0 >= 0 |
| 601 | // -1 0 0 0 0 0 0 100 >= 0 |
| 602 | // 0 1 0 0 0 0 0 0 >= 0 |
| 603 | // 0 -1 0 0 0 0 0 50 >= 0 |
| 604 | // 0 0 1 0 0 0 0 0 >= 0 |
| 605 | // 0 0 -1 0 0 0 0 100 >= 0 |
| 606 | // 0 0 0 1 0 0 0 0 >= 0 |
| 607 | // 0 0 0 -1 0 0 0 50 >= 0 |
| 608 | // |
| 609 | // |
| 610 | // TODO: Support AffineExprs mod/floordiv/ceildiv. |
| 611 | DependenceResult mlir::affine::checkMemrefAccessDependence( |
| 612 | const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, |
| 613 | unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, |
| 614 | SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) { |
| 615 | LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: " |
| 616 | << Twine(loopDepth) << " between:\n" ;); |
| 617 | LLVM_DEBUG(srcAccess.opInst->dump()); |
| 618 | LLVM_DEBUG(dstAccess.opInst->dump()); |
| 619 | |
| 620 | // Return 'NoDependence' if these accesses do not access the same memref. |
| 621 | if (srcAccess.memref != dstAccess.memref) |
| 622 | return DependenceResult::NoDependence; |
| 623 | |
| 624 | // Return 'NoDependence' if one of these accesses is not an |
| 625 | // AffineWriteOpInterface. |
| 626 | if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) && |
| 627 | !isa<AffineWriteOpInterface>(dstAccess.opInst)) |
| 628 | return DependenceResult::NoDependence; |
| 629 | |
| 630 | // We can't analyze further if the ops lie in different affine scopes or have |
| 631 | // no common block in an affine scope. |
| 632 | if (getAffineAnalysisScope(op: srcAccess.opInst) != |
| 633 | getAffineAnalysisScope(op: dstAccess.opInst)) |
| 634 | return DependenceResult::Failure; |
| 635 | if (!getCommonBlockInAffineScope(opA: srcAccess.opInst, opB: dstAccess.opInst)) |
| 636 | return DependenceResult::Failure; |
| 637 | |
| 638 | // Create access relation from each MemRefAccess. |
| 639 | PresburgerSpace space = PresburgerSpace::getRelationSpace(); |
| 640 | IntegerRelation srcRel(space), dstRel(space); |
| 641 | if (failed(Result: srcAccess.getAccessRelation(rel&: srcRel))) |
| 642 | return DependenceResult::Failure; |
| 643 | if (failed(Result: dstAccess.getAccessRelation(rel&: dstRel))) |
| 644 | return DependenceResult::Failure; |
| 645 | |
| 646 | FlatAffineValueConstraints srcDomain(srcRel.getDomainSet()); |
| 647 | FlatAffineValueConstraints dstDomain(dstRel.getDomainSet()); |
| 648 | |
| 649 | // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor |
| 650 | // operation of 'srcAccess' does not properly dominate the ancestor |
| 651 | // operation of 'dstAccess' in the same common operation block. |
| 652 | // Note: this check is skipped if 'allowRAR' is true, because RAR deps |
| 653 | // can exist irrespective of lexicographic ordering b/w src and dst. |
| 654 | unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); |
| 655 | assert(loopDepth <= numCommonLoops + 1); |
| 656 | if (!allowRAR && loopDepth > numCommonLoops && |
| 657 | !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) { |
| 658 | return DependenceResult::NoDependence; |
| 659 | } |
| 660 | |
| 661 | // Compute the dependence relation by composing `srcRel` with the inverse of |
| 662 | // `dstRel`. Doing this builds a relation between iteration domain of |
| 663 | // `srcAccess` to the iteration domain of `dstAccess` which access the same |
| 664 | // memory locations. |
| 665 | dstRel.inverse(); |
| 666 | // For 0-d spaces, there will be no IDs. Enable if that's the case. |
| 667 | if (!dstRel.getSpace().isUsingIds()) |
| 668 | dstRel.resetIds(); |
| 669 | if (!srcRel.getSpace().isUsingIds()) |
| 670 | srcRel.resetIds(); |
| 671 | dstRel.mergeAndCompose(other: srcRel); |
| 672 | dstRel.convertVarKind(srcKind: VarKind::Domain, varStart: 0, varLimit: dstRel.getNumDomainVars(), |
| 673 | dstKind: VarKind::Range, pos: 0); |
| 674 | IntegerPolyhedron dependenceDomain(dstRel); |
| 675 | |
| 676 | // Add 'src' happens before 'dst' ordering constraints. |
| 677 | addOrderingConstraints(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain); |
| 678 | |
| 679 | // Return 'NoDependence' if the solution space is empty: no dependence. |
| 680 | if (dependenceDomain.isEmpty()) |
| 681 | return DependenceResult::NoDependence; |
| 682 | |
| 683 | // Compute dependence direction vector and return true. |
| 684 | if (dependenceComponents != nullptr) |
| 685 | computeDirectionVector(srcDomain, dstDomain, loopDepth, dependenceDomain: &dependenceDomain, |
| 686 | dependenceComponents); |
| 687 | |
| 688 | LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n" ); |
| 689 | LLVM_DEBUG(dependenceDomain.dump()); |
| 690 | |
| 691 | FlatAffineValueConstraints result(dependenceDomain); |
| 692 | if (dependenceConstraints) |
| 693 | *dependenceConstraints = result; |
| 694 | return DependenceResult::HasDependence; |
| 695 | } |
| 696 | |
| 697 | /// Gathers dependence components for dependences between all ops in loop nest |
| 698 | /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth]. |
| 699 | void mlir::affine::getDependenceComponents( |
| 700 | AffineForOp forOp, unsigned maxLoopDepth, |
| 701 | std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) { |
| 702 | // Collect all load and store ops in loop nest rooted at 'forOp'. |
| 703 | SmallVector<Operation *, 8> loadAndStoreOps; |
| 704 | forOp->walk([&](Operation *op) { |
| 705 | if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) |
| 706 | loadAndStoreOps.push_back(Elt: op); |
| 707 | }); |
| 708 | |
| 709 | unsigned numOps = loadAndStoreOps.size(); |
| 710 | for (unsigned d = 1; d <= maxLoopDepth; ++d) { |
| 711 | for (unsigned i = 0; i < numOps; ++i) { |
| 712 | auto *srcOp = loadAndStoreOps[i]; |
| 713 | MemRefAccess srcAccess(srcOp); |
| 714 | for (unsigned j = 0; j < numOps; ++j) { |
| 715 | auto *dstOp = loadAndStoreOps[j]; |
| 716 | MemRefAccess dstAccess(dstOp); |
| 717 | |
| 718 | SmallVector<DependenceComponent, 2> depComps; |
| 719 | // TODO: Explore whether it would be profitable to pre-compute and store |
| 720 | // deps instead of repeatedly checking. |
| 721 | DependenceResult result = checkMemrefAccessDependence( |
| 722 | srcAccess, dstAccess, loopDepth: d, /*dependenceConstraints=*/nullptr, |
| 723 | dependenceComponents: &depComps); |
| 724 | if (hasDependence(result)) |
| 725 | depCompsVec->push_back(x: depComps); |
| 726 | } |
| 727 | } |
| 728 | } |
| 729 | } |
| 730 | |