1 | //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements miscellaneous loop analysis routines. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" |
14 | |
15 | #include "mlir/Analysis/SliceAnalysis.h" |
16 | #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" |
17 | #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" |
18 | #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h" |
19 | #include "mlir/Dialect/Affine/IR/AffineOps.h" |
20 | #include "mlir/Dialect/Affine/IR/AffineValueMap.h" |
21 | #include "mlir/Support/MathExtras.h" |
22 | |
23 | #include "llvm/ADT/DenseSet.h" |
24 | #include "llvm/ADT/SmallPtrSet.h" |
25 | #include "llvm/ADT/SmallString.h" |
26 | #include <numeric> |
27 | #include <optional> |
28 | #include <type_traits> |
29 | |
30 | using namespace mlir; |
31 | using namespace mlir::affine; |
32 | |
33 | /// Returns the trip count of the loop as an affine expression if the latter is |
34 | /// expressible as an affine expression, and nullptr otherwise. The trip count |
35 | /// expression is simplified before returning. This method only utilizes map |
36 | /// composition to construct lower and upper bounds before computing the trip |
37 | /// count expressions. |
38 | void mlir::affine::getTripCountMapAndOperands( |
39 | AffineForOp forOp, AffineMap *tripCountMap, |
40 | SmallVectorImpl<Value> *tripCountOperands) { |
41 | MLIRContext *context = forOp.getContext(); |
42 | int64_t step = forOp.getStepAsInt(); |
43 | int64_t loopSpan; |
44 | if (forOp.hasConstantBounds()) { |
45 | int64_t lb = forOp.getConstantLowerBound(); |
46 | int64_t ub = forOp.getConstantUpperBound(); |
47 | loopSpan = ub - lb; |
48 | if (loopSpan < 0) |
49 | loopSpan = 0; |
50 | *tripCountMap = AffineMap::getConstantMap(val: ceilDiv(lhs: loopSpan, rhs: step), context); |
51 | tripCountOperands->clear(); |
52 | return; |
53 | } |
54 | auto lbMap = forOp.getLowerBoundMap(); |
55 | auto ubMap = forOp.getUpperBoundMap(); |
56 | if (lbMap.getNumResults() != 1) { |
57 | *tripCountMap = AffineMap(); |
58 | return; |
59 | } |
60 | |
61 | // Difference of each upper bound expression from the single lower bound |
62 | // expression (divided by the step) provides the expressions for the trip |
63 | // count map. |
64 | AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands()); |
65 | |
66 | SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(), |
67 | lbMap.getResult(0)); |
68 | auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(), |
69 | lbSplatExpr, context); |
70 | AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands()); |
71 | |
72 | AffineValueMap tripCountValueMap; |
73 | AffineValueMap::difference(a: ubValueMap, b: lbSplatValueMap, res: &tripCountValueMap); |
74 | for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i) |
75 | tripCountValueMap.setResult(i, |
76 | e: tripCountValueMap.getResult(i).ceilDiv(v: step)); |
77 | |
78 | *tripCountMap = tripCountValueMap.getAffineMap(); |
79 | tripCountOperands->assign(in_start: tripCountValueMap.getOperands().begin(), |
80 | in_end: tripCountValueMap.getOperands().end()); |
81 | } |
82 | |
83 | /// Returns the trip count of the loop if it's a constant, std::nullopt |
84 | /// otherwise. This method uses affine expression analysis (in turn using |
85 | /// getTripCount) and is able to determine constant trip count in non-trivial |
86 | /// cases. |
87 | std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) { |
88 | SmallVector<Value, 4> operands; |
89 | AffineMap map; |
90 | getTripCountMapAndOperands(forOp, &map, &operands); |
91 | |
92 | if (!map) |
93 | return std::nullopt; |
94 | |
95 | // Take the min if all trip counts are constant. |
96 | std::optional<uint64_t> tripCount; |
97 | for (auto resultExpr : map.getResults()) { |
98 | if (auto constExpr = dyn_cast<AffineConstantExpr>(Val&: resultExpr)) { |
99 | if (tripCount.has_value()) |
100 | tripCount = |
101 | std::min(a: *tripCount, b: static_cast<uint64_t>(constExpr.getValue())); |
102 | else |
103 | tripCount = constExpr.getValue(); |
104 | } else |
105 | return std::nullopt; |
106 | } |
107 | return tripCount; |
108 | } |
109 | |
110 | /// Returns the greatest known integral divisor of the trip count. Affine |
111 | /// expression analysis is used (indirectly through getTripCount), and |
112 | /// this method is thus able to determine non-trivial divisors. |
113 | uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) { |
114 | SmallVector<Value, 4> operands; |
115 | AffineMap map; |
116 | getTripCountMapAndOperands(forOp, &map, &operands); |
117 | |
118 | if (!map) |
119 | return 1; |
120 | |
121 | // The largest divisor of the trip count is the GCD of the individual largest |
122 | // divisors. |
123 | assert(map.getNumResults() >= 1 && "expected one or more results" ); |
124 | std::optional<uint64_t> gcd; |
125 | for (auto resultExpr : map.getResults()) { |
126 | uint64_t thisGcd; |
127 | if (auto constExpr = dyn_cast<AffineConstantExpr>(Val&: resultExpr)) { |
128 | uint64_t tripCount = constExpr.getValue(); |
129 | // 0 iteration loops (greatest divisor is 2^64 - 1). |
130 | if (tripCount == 0) |
131 | thisGcd = std::numeric_limits<uint64_t>::max(); |
132 | else |
133 | // The greatest divisor is the trip count. |
134 | thisGcd = tripCount; |
135 | } else { |
136 | // Trip count is not a known constant; return its largest known divisor. |
137 | thisGcd = resultExpr.getLargestKnownDivisor(); |
138 | } |
139 | if (gcd.has_value()) |
140 | gcd = std::gcd(m: *gcd, n: thisGcd); |
141 | else |
142 | gcd = thisGcd; |
143 | } |
144 | assert(gcd.has_value() && "value expected per above logic" ); |
145 | return *gcd; |
146 | } |
147 | |
148 | /// Given an affine.for `iv` and an access `index` of type index, returns `true` |
149 | /// if `index` is independent of `iv` and false otherwise. |
150 | /// |
151 | /// Prerequisites: `iv` and `index` of the proper type; |
152 | static bool isAccessIndexInvariant(Value iv, Value index) { |
153 | assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv" ); |
154 | assert(isa<IndexType>(index.getType()) && "index must be of 'index' type" ); |
155 | auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, context: iv.getContext()); |
156 | SmallVector<Value> operands = {index}; |
157 | AffineValueMap avm(map, operands); |
158 | avm.composeSimplifyAndCanonicalize(); |
159 | return !avm.isFunctionOf(idx: 0, value: iv); |
160 | } |
161 | |
162 | // Pre-requisite: Loop bounds should be in canonical form. |
163 | template <typename LoadOrStoreOp> |
164 | bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) { |
165 | AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands()); |
166 | avm.composeSimplifyAndCanonicalize(); |
167 | return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar()); |
168 | } |
169 | |
170 | // Explicitly instantiate the template so that the compiler knows we need them. |
171 | template bool mlir::affine::isInvariantAccess(AffineReadOpInterface, |
172 | AffineForOp); |
173 | template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface, |
174 | AffineForOp); |
175 | template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp); |
176 | template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp); |
177 | |
178 | DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv, |
179 | ArrayRef<Value> indices) { |
180 | DenseSet<Value> res; |
181 | for (auto val : indices) { |
182 | if (isAccessIndexInvariant(iv, index: val)) { |
183 | res.insert(V: val); |
184 | } |
185 | } |
186 | return res; |
187 | } |
188 | |
189 | // TODO: check access stride. |
190 | template <typename LoadOrStoreOp> |
191 | bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, |
192 | int *memRefDim) { |
193 | static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface, |
194 | AffineWriteOpInterface>::value, |
195 | "Must be called on either an affine read or write op" ); |
196 | assert(memRefDim && "memRefDim == nullptr" ); |
197 | auto memRefType = memoryOp.getMemRefType(); |
198 | |
199 | if (!memRefType.getLayout().isIdentity()) |
200 | return memoryOp.emitError("NYI: non-trivial layout map" ), false; |
201 | |
202 | int uniqueVaryingIndexAlongIv = -1; |
203 | auto accessMap = memoryOp.getAffineMap(); |
204 | SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands()); |
205 | unsigned numDims = accessMap.getNumDims(); |
206 | for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) { |
207 | // Gather map operands used in result expr 'i' in 'exprOperands'. |
208 | SmallVector<Value, 4> exprOperands; |
209 | auto resultExpr = accessMap.getResult(i); |
210 | resultExpr.walk([&](AffineExpr expr) { |
211 | if (auto dimExpr = dyn_cast<AffineDimExpr>(Val&: expr)) |
212 | exprOperands.push_back(Elt: mapOperands[dimExpr.getPosition()]); |
213 | else if (auto symExpr = dyn_cast<AffineSymbolExpr>(Val&: expr)) |
214 | exprOperands.push_back(Elt: mapOperands[numDims + symExpr.getPosition()]); |
215 | }); |
216 | // Check access invariance of each operand in 'exprOperands'. |
217 | for (Value exprOperand : exprOperands) { |
218 | if (!isAccessIndexInvariant(iv, index: exprOperand)) { |
219 | if (uniqueVaryingIndexAlongIv != -1) { |
220 | // 2+ varying indices -> do not vectorize along iv. |
221 | return false; |
222 | } |
223 | uniqueVaryingIndexAlongIv = i; |
224 | } |
225 | } |
226 | } |
227 | |
228 | if (uniqueVaryingIndexAlongIv == -1) |
229 | *memRefDim = -1; |
230 | else |
231 | *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1); |
232 | return true; |
233 | } |
234 | |
235 | template bool mlir::affine::isContiguousAccess(Value iv, |
236 | AffineReadOpInterface loadOp, |
237 | int *memRefDim); |
238 | template bool mlir::affine::isContiguousAccess(Value iv, |
239 | AffineWriteOpInterface loadOp, |
240 | int *memRefDim); |
241 | |
242 | template <typename LoadOrStoreOp> |
243 | static bool isVectorElement(LoadOrStoreOp memoryOp) { |
244 | auto memRefType = memoryOp.getMemRefType(); |
245 | return isa<VectorType>(memRefType.getElementType()); |
246 | } |
247 | |
248 | using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>; |
249 | |
250 | static bool |
251 | isVectorizableLoopBodyWithOpCond(AffineForOp loop, |
252 | const VectorizableOpFun &isVectorizableOp, |
253 | NestedPattern &vectorTransferMatcher) { |
254 | auto *forOp = loop.getOperation(); |
255 | |
256 | // No vectorization across conditionals for now. |
257 | auto conditionals = matcher::If(); |
258 | SmallVector<NestedMatch, 8> conditionalsMatched; |
259 | conditionals.match(op: forOp, matches: &conditionalsMatched); |
260 | if (!conditionalsMatched.empty()) { |
261 | return false; |
262 | } |
263 | |
264 | // No vectorization for ops with operand or result types that are not |
265 | // vectorizable. |
266 | auto types = matcher::Op(filter: [](Operation &op) -> bool { |
267 | if (llvm::any_of(Range: op.getOperandTypes(), P: [](Type type) { |
268 | if (MemRefType t = dyn_cast<MemRefType>(type)) |
269 | return !VectorType::isValidElementType(t.getElementType()); |
270 | return !VectorType::isValidElementType(type); |
271 | })) |
272 | return true; |
273 | return llvm::any_of(Range: op.getResultTypes(), P: [](Type type) { |
274 | return !VectorType::isValidElementType(type); |
275 | }); |
276 | }); |
277 | SmallVector<NestedMatch, 8> opsMatched; |
278 | types.match(op: forOp, matches: &opsMatched); |
279 | if (!opsMatched.empty()) { |
280 | return false; |
281 | } |
282 | |
283 | // No vectorization across unknown regions. |
284 | auto regions = matcher::Op(filter: [](Operation &op) -> bool { |
285 | return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(Val: op); |
286 | }); |
287 | SmallVector<NestedMatch, 8> regionsMatched; |
288 | regions.match(op: forOp, matches: ®ionsMatched); |
289 | if (!regionsMatched.empty()) { |
290 | return false; |
291 | } |
292 | |
293 | SmallVector<NestedMatch, 8> vectorTransfersMatched; |
294 | vectorTransferMatcher.match(op: forOp, matches: &vectorTransfersMatched); |
295 | if (!vectorTransfersMatched.empty()) { |
296 | return false; |
297 | } |
298 | |
299 | auto loadAndStores = matcher::Op(filter: matcher::isLoadOrStore); |
300 | SmallVector<NestedMatch, 8> loadAndStoresMatched; |
301 | loadAndStores.match(op: forOp, matches: &loadAndStoresMatched); |
302 | for (auto ls : loadAndStoresMatched) { |
303 | auto *op = ls.getMatchedOperation(); |
304 | auto load = dyn_cast<AffineLoadOp>(op); |
305 | auto store = dyn_cast<AffineStoreOp>(op); |
306 | // Only scalar types are considered vectorizable, all load/store must be |
307 | // vectorizable for a loop to qualify as vectorizable. |
308 | // TODO: ponder whether we want to be more general here. |
309 | bool vector = load ? isVectorElement(load) : isVectorElement(store); |
310 | if (vector) { |
311 | return false; |
312 | } |
313 | if (isVectorizableOp && !isVectorizableOp(loop, *op)) { |
314 | return false; |
315 | } |
316 | } |
317 | return true; |
318 | } |
319 | |
320 | bool mlir::affine::isVectorizableLoopBody( |
321 | AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) { |
322 | *memRefDim = -1; |
323 | VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) { |
324 | auto load = dyn_cast<AffineLoadOp>(op); |
325 | auto store = dyn_cast<AffineStoreOp>(op); |
326 | int thisOpMemRefDim = -1; |
327 | bool isContiguous = |
328 | load ? isContiguousAccess(loop.getInductionVar(), |
329 | cast<AffineReadOpInterface>(*load), |
330 | &thisOpMemRefDim) |
331 | : isContiguousAccess(loop.getInductionVar(), |
332 | cast<AffineWriteOpInterface>(*store), |
333 | &thisOpMemRefDim); |
334 | if (thisOpMemRefDim != -1) { |
335 | // If memory accesses vary across different dimensions then the loop is |
336 | // not vectorizable. |
337 | if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim) |
338 | return false; |
339 | *memRefDim = thisOpMemRefDim; |
340 | } |
341 | return isContiguous; |
342 | }); |
343 | return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher); |
344 | } |
345 | |
346 | bool mlir::affine::isVectorizableLoopBody( |
347 | AffineForOp loop, NestedPattern &vectorTransferMatcher) { |
348 | return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher); |
349 | } |
350 | |
351 | /// Checks whether SSA dominance would be violated if a for op's body |
352 | /// operations are shifted by the specified shifts. This method checks if a |
353 | /// 'def' and all its uses have the same shift factor. |
354 | // TODO: extend this to check for memory-based dependence violation when we have |
355 | // the support. |
356 | bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp, |
357 | ArrayRef<uint64_t> shifts) { |
358 | auto *forBody = forOp.getBody(); |
359 | assert(shifts.size() == forBody->getOperations().size()); |
360 | |
361 | // Work backwards over the body of the block so that the shift of a use's |
362 | // ancestor operation in the block gets recorded before it's looked up. |
363 | DenseMap<Operation *, uint64_t> forBodyShift; |
364 | for (const auto &it : |
365 | llvm::enumerate(llvm::reverse(forBody->getOperations()))) { |
366 | auto &op = it.value(); |
367 | |
368 | // Get the index of the current operation, note that we are iterating in |
369 | // reverse so we need to fix it up. |
370 | size_t index = shifts.size() - it.index() - 1; |
371 | |
372 | // Remember the shift of this operation. |
373 | uint64_t shift = shifts[index]; |
374 | forBodyShift.try_emplace(&op, shift); |
375 | |
376 | // Validate the results of this operation if it were to be shifted. |
377 | for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) { |
378 | Value result = op.getResult(i); |
379 | for (auto *user : result.getUsers()) { |
380 | // If an ancestor operation doesn't lie in the block of forOp, |
381 | // there is no shift to check. |
382 | if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) { |
383 | assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map" ); |
384 | if (shift != forBodyShift[ancOp]) |
385 | return false; |
386 | } |
387 | } |
388 | } |
389 | } |
390 | return true; |
391 | } |
392 | |