1//===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous loop analysis routines.
10//
11//===----------------------------------------------------------------------===//
12
13#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
14
15#include "mlir/Analysis/SliceAnalysis.h"
16#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
18#include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
19#include "mlir/Dialect/Affine/IR/AffineOps.h"
20#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
21#include "mlir/Support/MathExtras.h"
22
23#include "llvm/ADT/DenseSet.h"
24#include "llvm/ADT/SmallPtrSet.h"
25#include "llvm/ADT/SmallString.h"
26#include <numeric>
27#include <optional>
28#include <type_traits>
29
30using namespace mlir;
31using namespace mlir::affine;
32
33/// Returns the trip count of the loop as an affine expression if the latter is
34/// expressible as an affine expression, and nullptr otherwise. The trip count
35/// expression is simplified before returning. This method only utilizes map
36/// composition to construct lower and upper bounds before computing the trip
37/// count expressions.
38void mlir::affine::getTripCountMapAndOperands(
39 AffineForOp forOp, AffineMap *tripCountMap,
40 SmallVectorImpl<Value> *tripCountOperands) {
41 MLIRContext *context = forOp.getContext();
42 int64_t step = forOp.getStepAsInt();
43 int64_t loopSpan;
44 if (forOp.hasConstantBounds()) {
45 int64_t lb = forOp.getConstantLowerBound();
46 int64_t ub = forOp.getConstantUpperBound();
47 loopSpan = ub - lb;
48 if (loopSpan < 0)
49 loopSpan = 0;
50 *tripCountMap = AffineMap::getConstantMap(val: ceilDiv(lhs: loopSpan, rhs: step), context);
51 tripCountOperands->clear();
52 return;
53 }
54 auto lbMap = forOp.getLowerBoundMap();
55 auto ubMap = forOp.getUpperBoundMap();
56 if (lbMap.getNumResults() != 1) {
57 *tripCountMap = AffineMap();
58 return;
59 }
60
61 // Difference of each upper bound expression from the single lower bound
62 // expression (divided by the step) provides the expressions for the trip
63 // count map.
64 AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
65
66 SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
67 lbMap.getResult(0));
68 auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
69 lbSplatExpr, context);
70 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
71
72 AffineValueMap tripCountValueMap;
73 AffineValueMap::difference(a: ubValueMap, b: lbSplatValueMap, res: &tripCountValueMap);
74 for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
75 tripCountValueMap.setResult(i,
76 e: tripCountValueMap.getResult(i).ceilDiv(v: step));
77
78 *tripCountMap = tripCountValueMap.getAffineMap();
79 tripCountOperands->assign(in_start: tripCountValueMap.getOperands().begin(),
80 in_end: tripCountValueMap.getOperands().end());
81}
82
83/// Returns the trip count of the loop if it's a constant, std::nullopt
84/// otherwise. This method uses affine expression analysis (in turn using
85/// getTripCount) and is able to determine constant trip count in non-trivial
86/// cases.
87std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
88 SmallVector<Value, 4> operands;
89 AffineMap map;
90 getTripCountMapAndOperands(forOp, &map, &operands);
91
92 if (!map)
93 return std::nullopt;
94
95 // Take the min if all trip counts are constant.
96 std::optional<uint64_t> tripCount;
97 for (auto resultExpr : map.getResults()) {
98 if (auto constExpr = dyn_cast<AffineConstantExpr>(Val&: resultExpr)) {
99 if (tripCount.has_value())
100 tripCount =
101 std::min(a: *tripCount, b: static_cast<uint64_t>(constExpr.getValue()));
102 else
103 tripCount = constExpr.getValue();
104 } else
105 return std::nullopt;
106 }
107 return tripCount;
108}
109
110/// Returns the greatest known integral divisor of the trip count. Affine
111/// expression analysis is used (indirectly through getTripCount), and
112/// this method is thus able to determine non-trivial divisors.
113uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
114 SmallVector<Value, 4> operands;
115 AffineMap map;
116 getTripCountMapAndOperands(forOp, &map, &operands);
117
118 if (!map)
119 return 1;
120
121 // The largest divisor of the trip count is the GCD of the individual largest
122 // divisors.
123 assert(map.getNumResults() >= 1 && "expected one or more results");
124 std::optional<uint64_t> gcd;
125 for (auto resultExpr : map.getResults()) {
126 uint64_t thisGcd;
127 if (auto constExpr = dyn_cast<AffineConstantExpr>(Val&: resultExpr)) {
128 uint64_t tripCount = constExpr.getValue();
129 // 0 iteration loops (greatest divisor is 2^64 - 1).
130 if (tripCount == 0)
131 thisGcd = std::numeric_limits<uint64_t>::max();
132 else
133 // The greatest divisor is the trip count.
134 thisGcd = tripCount;
135 } else {
136 // Trip count is not a known constant; return its largest known divisor.
137 thisGcd = resultExpr.getLargestKnownDivisor();
138 }
139 if (gcd.has_value())
140 gcd = std::gcd(m: *gcd, n: thisGcd);
141 else
142 gcd = thisGcd;
143 }
144 assert(gcd.has_value() && "value expected per above logic");
145 return *gcd;
146}
147
148/// Given an affine.for `iv` and an access `index` of type index, returns `true`
149/// if `index` is independent of `iv` and false otherwise.
150///
151/// Prerequisites: `iv` and `index` of the proper type;
152static bool isAccessIndexInvariant(Value iv, Value index) {
153 assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
154 assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
155 auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, context: iv.getContext());
156 SmallVector<Value> operands = {index};
157 AffineValueMap avm(map, operands);
158 avm.composeSimplifyAndCanonicalize();
159 return !avm.isFunctionOf(idx: 0, value: iv);
160}
161
162// Pre-requisite: Loop bounds should be in canonical form.
163template <typename LoadOrStoreOp>
164bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
165 AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
166 avm.composeSimplifyAndCanonicalize();
167 return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
168}
169
170// Explicitly instantiate the template so that the compiler knows we need them.
171template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
172 AffineForOp);
173template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
174 AffineForOp);
175template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
176template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
177
178DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
179 ArrayRef<Value> indices) {
180 DenseSet<Value> res;
181 for (auto val : indices) {
182 if (isAccessIndexInvariant(iv, index: val)) {
183 res.insert(V: val);
184 }
185 }
186 return res;
187}
188
189// TODO: check access stride.
190template <typename LoadOrStoreOp>
191bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
192 int *memRefDim) {
193 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
194 AffineWriteOpInterface>::value,
195 "Must be called on either an affine read or write op");
196 assert(memRefDim && "memRefDim == nullptr");
197 auto memRefType = memoryOp.getMemRefType();
198
199 if (!memRefType.getLayout().isIdentity())
200 return memoryOp.emitError("NYI: non-trivial layout map"), false;
201
202 int uniqueVaryingIndexAlongIv = -1;
203 auto accessMap = memoryOp.getAffineMap();
204 SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
205 unsigned numDims = accessMap.getNumDims();
206 for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
207 // Gather map operands used in result expr 'i' in 'exprOperands'.
208 SmallVector<Value, 4> exprOperands;
209 auto resultExpr = accessMap.getResult(i);
210 resultExpr.walk([&](AffineExpr expr) {
211 if (auto dimExpr = dyn_cast<AffineDimExpr>(Val&: expr))
212 exprOperands.push_back(Elt: mapOperands[dimExpr.getPosition()]);
213 else if (auto symExpr = dyn_cast<AffineSymbolExpr>(Val&: expr))
214 exprOperands.push_back(Elt: mapOperands[numDims + symExpr.getPosition()]);
215 });
216 // Check access invariance of each operand in 'exprOperands'.
217 for (Value exprOperand : exprOperands) {
218 if (!isAccessIndexInvariant(iv, index: exprOperand)) {
219 if (uniqueVaryingIndexAlongIv != -1) {
220 // 2+ varying indices -> do not vectorize along iv.
221 return false;
222 }
223 uniqueVaryingIndexAlongIv = i;
224 }
225 }
226 }
227
228 if (uniqueVaryingIndexAlongIv == -1)
229 *memRefDim = -1;
230 else
231 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
232 return true;
233}
234
235template bool mlir::affine::isContiguousAccess(Value iv,
236 AffineReadOpInterface loadOp,
237 int *memRefDim);
238template bool mlir::affine::isContiguousAccess(Value iv,
239 AffineWriteOpInterface loadOp,
240 int *memRefDim);
241
242template <typename LoadOrStoreOp>
243static bool isVectorElement(LoadOrStoreOp memoryOp) {
244 auto memRefType = memoryOp.getMemRefType();
245 return isa<VectorType>(memRefType.getElementType());
246}
247
248using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
249
250static bool
251isVectorizableLoopBodyWithOpCond(AffineForOp loop,
252 const VectorizableOpFun &isVectorizableOp,
253 NestedPattern &vectorTransferMatcher) {
254 auto *forOp = loop.getOperation();
255
256 // No vectorization across conditionals for now.
257 auto conditionals = matcher::If();
258 SmallVector<NestedMatch, 8> conditionalsMatched;
259 conditionals.match(op: forOp, matches: &conditionalsMatched);
260 if (!conditionalsMatched.empty()) {
261 return false;
262 }
263
264 // No vectorization for ops with operand or result types that are not
265 // vectorizable.
266 auto types = matcher::Op(filter: [](Operation &op) -> bool {
267 if (llvm::any_of(Range: op.getOperandTypes(), P: [](Type type) {
268 if (MemRefType t = dyn_cast<MemRefType>(type))
269 return !VectorType::isValidElementType(t.getElementType());
270 return !VectorType::isValidElementType(type);
271 }))
272 return true;
273 return llvm::any_of(Range: op.getResultTypes(), P: [](Type type) {
274 return !VectorType::isValidElementType(type);
275 });
276 });
277 SmallVector<NestedMatch, 8> opsMatched;
278 types.match(op: forOp, matches: &opsMatched);
279 if (!opsMatched.empty()) {
280 return false;
281 }
282
283 // No vectorization across unknown regions.
284 auto regions = matcher::Op(filter: [](Operation &op) -> bool {
285 return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(Val: op);
286 });
287 SmallVector<NestedMatch, 8> regionsMatched;
288 regions.match(op: forOp, matches: &regionsMatched);
289 if (!regionsMatched.empty()) {
290 return false;
291 }
292
293 SmallVector<NestedMatch, 8> vectorTransfersMatched;
294 vectorTransferMatcher.match(op: forOp, matches: &vectorTransfersMatched);
295 if (!vectorTransfersMatched.empty()) {
296 return false;
297 }
298
299 auto loadAndStores = matcher::Op(filter: matcher::isLoadOrStore);
300 SmallVector<NestedMatch, 8> loadAndStoresMatched;
301 loadAndStores.match(op: forOp, matches: &loadAndStoresMatched);
302 for (auto ls : loadAndStoresMatched) {
303 auto *op = ls.getMatchedOperation();
304 auto load = dyn_cast<AffineLoadOp>(op);
305 auto store = dyn_cast<AffineStoreOp>(op);
306 // Only scalar types are considered vectorizable, all load/store must be
307 // vectorizable for a loop to qualify as vectorizable.
308 // TODO: ponder whether we want to be more general here.
309 bool vector = load ? isVectorElement(load) : isVectorElement(store);
310 if (vector) {
311 return false;
312 }
313 if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
314 return false;
315 }
316 }
317 return true;
318}
319
320bool mlir::affine::isVectorizableLoopBody(
321 AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
322 *memRefDim = -1;
323 VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
324 auto load = dyn_cast<AffineLoadOp>(op);
325 auto store = dyn_cast<AffineStoreOp>(op);
326 int thisOpMemRefDim = -1;
327 bool isContiguous =
328 load ? isContiguousAccess(loop.getInductionVar(),
329 cast<AffineReadOpInterface>(*load),
330 &thisOpMemRefDim)
331 : isContiguousAccess(loop.getInductionVar(),
332 cast<AffineWriteOpInterface>(*store),
333 &thisOpMemRefDim);
334 if (thisOpMemRefDim != -1) {
335 // If memory accesses vary across different dimensions then the loop is
336 // not vectorizable.
337 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
338 return false;
339 *memRefDim = thisOpMemRefDim;
340 }
341 return isContiguous;
342 });
343 return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
344}
345
346bool mlir::affine::isVectorizableLoopBody(
347 AffineForOp loop, NestedPattern &vectorTransferMatcher) {
348 return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
349}
350
351/// Checks whether SSA dominance would be violated if a for op's body
352/// operations are shifted by the specified shifts. This method checks if a
353/// 'def' and all its uses have the same shift factor.
354// TODO: extend this to check for memory-based dependence violation when we have
355// the support.
356bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
357 ArrayRef<uint64_t> shifts) {
358 auto *forBody = forOp.getBody();
359 assert(shifts.size() == forBody->getOperations().size());
360
361 // Work backwards over the body of the block so that the shift of a use's
362 // ancestor operation in the block gets recorded before it's looked up.
363 DenseMap<Operation *, uint64_t> forBodyShift;
364 for (const auto &it :
365 llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
366 auto &op = it.value();
367
368 // Get the index of the current operation, note that we are iterating in
369 // reverse so we need to fix it up.
370 size_t index = shifts.size() - it.index() - 1;
371
372 // Remember the shift of this operation.
373 uint64_t shift = shifts[index];
374 forBodyShift.try_emplace(&op, shift);
375
376 // Validate the results of this operation if it were to be shifted.
377 for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
378 Value result = op.getResult(i);
379 for (auto *user : result.getUsers()) {
380 // If an ancestor operation doesn't lie in the block of forOp,
381 // there is no shift to check.
382 if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
383 assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
384 if (shift != forBodyShift[ancOp])
385 return false;
386 }
387 }
388 }
389 }
390 return true;
391}
392

source code of mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp