1//===- IterationGraphSorter.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "IterationGraphSorter.h"
10
11#include "mlir/Dialect/Linalg/IR/Linalg.h"
12#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
13#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
14#include "mlir/IR/AffineExprVisitor.h"
15#include "mlir/IR/BuiltinTypes.h"
16
17using namespace mlir;
18using namespace mlir::sparse_tensor;
19
20namespace {
21
22/// A helper class that visits an affine expression and tries to find
23/// an AffineDimExpr to which the corresponding iterator from a GenericOp
24/// matches the desired iterator type. If there is no matched iterator
25/// type, the method returns the first DimExpr in the expression.
26class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> {
27public:
28 explicit AffineDimFinder(ArrayRef<utils::IteratorType> itTypes)
29 : iterTypes(itTypes) {}
30
31 /// Overrides the visit method from AffineExprVisitor.
32 void visitDimExpr(AffineDimExpr expr) {
33 if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()])
34 pickedDim = expr;
35 }
36
37 /// Sets the desired iterator type that we want to pick.
38 void setPickedIterType(utils::IteratorType iterType) {
39 pickIterType = iterType;
40 }
41
42 /// Gets the desired AffineDimExpr.
43 AffineDimExpr getDimExpr() const {
44 return llvm::cast<AffineDimExpr>(Val: pickedDim);
45 }
46
47 /// Walks the graph in post order to find dim expr.
48 void walkPostOrder(AffineExpr expr) {
49 pickedDim = nullptr;
50 AffineExprVisitor<AffineDimFinder>::walkPostOrder(expr);
51 }
52
53private:
54 /// The picked AffineDimExpr after visit.
55 AffineExpr pickedDim;
56 /// The iterator type that we want.
57 utils::IteratorType pickIterType;
58 /// The mapping between levels and iterator types.
59 ArrayRef<utils::IteratorType> iterTypes;
60};
61
62/// Flattens an affine expression into a list of AffineDimExprs.
63struct AffineDimCollector : public AffineExprVisitor<AffineDimCollector> {
64 // Overrides method from AffineExprVisitor.
65 void visitDimExpr(AffineDimExpr expr) { dims.push_back(Elt: expr); }
66 SmallVector<AffineDimExpr> dims;
67};
68
69} // namespace
70
71inline static bool includesAny(SortMask mask1, SortMask mask2) {
72 return static_cast<unsigned>(mask1) & static_cast<unsigned>(mask2);
73}
74
75inline static bool includesDenseInput(SortMask mask) {
76 return includesAny(mask1: mask, mask2: SortMask::kIncludeDenseInput);
77}
78
79inline static bool includesDenseOutput(SortMask mask) {
80 return includesAny(mask1: mask, mask2: SortMask::kIncludeDenseOutput);
81}
82
83AffineMap IterationGraphSorter::topoSort() {
84 // The sorted result will put the first Reduction iterator to the
85 // latest possible position.
86 std::vector<unsigned> redIt; // reduce iterator with 0 degree
87 std::vector<unsigned> parIt; // parallel iterator with 0 degree
88 const unsigned numLoops = getNumLoops();
89 for (unsigned i = 0; i < numLoops; i++) {
90 if (inDegree[i] == 0) {
91 if (iterTypes[i] == utils::IteratorType::reduction)
92 redIt.push_back(x: i);
93 else
94 parIt.push_back(x: i);
95 }
96 }
97
98 SmallVector<unsigned> loopOrder;
99 while (!redIt.empty() || !parIt.empty()) {
100 // We always prefer a parallel loop over a reduction loop because putting
101 // a reduction loop early might make the loop sequence inadmissible.
102 auto &it = !parIt.empty() ? parIt : redIt;
103 auto src = it.back();
104 loopOrder.push_back(Elt: src);
105 it.pop_back();
106 // Update in-degree, and push 0-degree node into worklist.
107 for (unsigned dst = 0; dst < numLoops; dst++) {
108 if (itGraph[src][dst] && --inDegree[dst] == 0) {
109 if (iterTypes[dst] == utils::IteratorType::reduction)
110 redIt.push_back(x: dst);
111 else
112 parIt.push_back(x: dst);
113 }
114 }
115 }
116
117 // Return the topological sort on success.
118 if (loopOrder.size() == numLoops)
119 return AffineMap::getPermutationMap(permutation: loopOrder, context: out.getContext());
120
121 // Cycle detected.
122 return AffineMap();
123}
124
125IterationGraphSorter
126IterationGraphSorter::fromGenericOp(linalg::GenericOp genericOp) {
127 // Must be a demapped sparse kernel.
128 assert(!hasAnyNonIdentityOperandsOrResults(genericOp) &&
129 hasAnySparseOperandOrResult(genericOp) &&
130 genericOp.getNumDpsInits() == 1);
131
132 SmallVector<AffineMap> loopMap = genericOp.getIndexingMapsArray();
133 SmallVector<Value> ins = genericOp.getDpsInputs();
134
135 AffineMap outMap = loopMap.back();
136 loopMap.pop_back();
137
138 Value out = genericOp.getDpsInitOperand(i: 0)->get();
139 SmallVector<utils::IteratorType> iterTypes =
140 genericOp.getIteratorTypesArray();
141
142 return IterationGraphSorter(std::move(ins), std::move(loopMap), out, outMap,
143 std::move(iterTypes));
144}
145
146IterationGraphSorter::IterationGraphSorter(
147 SmallVector<Value> &&ins, SmallVector<AffineMap> &&loop2InsLvl, Value out,
148 AffineMap loop2OutLvl, SmallVector<utils::IteratorType> &&iterTypes)
149 : ins(std::move(ins)), loop2InsLvl(std::move(loop2InsLvl)), out(out),
150 loop2OutLvl(loop2OutLvl), iterTypes(std::move(iterTypes)) {
151 // One map per tensor.
152 assert(loop2InsLvl.size() == ins.size());
153 // All the affine maps have the same number of dimensions (loops).
154 assert(llvm::all_equal(llvm::map_range(
155 loop2InsLvl, [](AffineMap m) { return m.getNumDims(); })));
156 // The number of results of the map should match the rank of the tensor.
157 assert(llvm::all_of(llvm::zip(loop2InsLvl, ins), [](auto mvPair) {
158 auto [m, v] = mvPair;
159 return m.getNumResults() == cast<ShapedType>(v.getType()).getRank();
160 }));
161
162 itGraph.resize(new_size: getNumLoops(), x: std::vector<bool>(getNumLoops(), false));
163 inDegree.resize(new_size: getNumLoops());
164}
165
166AffineMap IterationGraphSorter::sort(SortMask mask, Value ignored) {
167 // Reset the adjacency matrix that represents the iteration graph.
168 for (auto &row : itGraph)
169 llvm::fill(Range&: row, Value: false);
170
171 // Reset in-degree.
172 llvm::fill(Range&: inDegree, Value: 0);
173
174 // Add the constraints for the loop to level map.
175 for (auto [in, map] : llvm::zip(t&: ins, u&: loop2InsLvl)) {
176 // Get map and encoding.
177 const auto enc = getSparseTensorEncoding(type: in.getType());
178 // Skip dense inputs when not requested.
179 if ((!enc && !includesDenseInput(mask)) || in == ignored)
180 continue;
181 addConstraints(t: in, loop2LvlMap: map);
182 }
183
184 // Add the constraints for the output map.
185 const auto enc = getSparseTensorEncoding(type: out.getType());
186 if ((enc || includesDenseOutput(mask)) && out != ignored)
187 addConstraints(t: out, loop2LvlMap: loop2OutLvl);
188
189 // Return the topological sort (empty for cyclic).
190 return topoSort();
191}
192
193void IterationGraphSorter::addConstraints(Value t, AffineMap loop2LvlMap) {
194 auto addIterOrdering = [this](unsigned f, unsigned t) {
195 if (!itGraph[f][t] && f != t) {
196 itGraph[f][t] = true;
197 inDegree[t]++;
198 }
199 };
200
201 // Set up a reduction finder.
202 AffineDimFinder finder(iterTypes);
203 finder.setPickedIterType(utils::IteratorType::reduction);
204
205 // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6],
206 // we require there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6},
207 // and d_x > d_y && {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
208 const Level lvlRank = loop2LvlMap.getNumResults();
209 for (Level lvl = 1; lvl < lvlRank; lvl++) {
210 const AffineExpr fa = loop2LvlMap.getResult(idx: lvl - 1);
211 const AffineExpr ta = loop2LvlMap.getResult(idx: lvl);
212
213 if (llvm::isa<AffineDimExpr>(Val: fa) || llvm::isa<AffineDimExpr>(Val: ta)) {
214 // Special case when at least one loop2LvlExp is a simple AffineDimExpr
215 // (say, d0) and we require d0 > {d1, d2, ...} or {d1, d2, ...} > d0
216 AffineDimCollector fCollector;
217 fCollector.walkPostOrder(expr: fa);
218 AffineDimCollector tCollector;
219 tCollector.walkPostOrder(expr: ta);
220
221 for (auto fd : fCollector.dims) {
222 for (auto td : tCollector.dims) {
223 const unsigned f = fd.getPosition();
224 const unsigned t = td.getPosition();
225 addIterOrdering(f, t);
226 }
227 }
228 continue;
229 }
230
231 // When both loop2LvlExpr is compound, we pick an abitrary reduction loop
232 // from lhs and rhs and use them as d_x and d_y.
233 finder.walkPostOrder(expr: fa);
234 const AffineDimExpr fexp = finder.getDimExpr();
235 const unsigned fldx = fexp.getPosition();
236
237 finder.walkPostOrder(expr: ta);
238 const AffineDimExpr texp = finder.getDimExpr();
239 const unsigned tldx = texp.getPosition();
240
241 // d_x > d_y
242 addIterOrdering(fldx, tldx);
243
244 AffineDimCollector fCollector;
245 fCollector.walkPostOrder(expr: fa);
246 AffineDimCollector tCollector;
247 tCollector.walkPostOrder(expr: ta);
248
249 // Make sure dx and dy is the last.
250 for (auto fd : fCollector.dims) {
251 const unsigned f = fd.getPosition();
252 addIterOrdering(f, fldx);
253 }
254 for (auto td : tCollector.dims) {
255 const unsigned t = td.getPosition();
256 addIterOrdering(t, tldx);
257 }
258 // {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
259 // This is to ensure that the affine expressions are reduced in sparse
260 // tensor level ordering.
261 for (auto fd : fCollector.dims) {
262 const unsigned f = fd.getPosition();
263 if (f == fldx) // skip d_x
264 continue;
265 for (auto td : tCollector.dims) {
266 const unsigned t = td.getPosition();
267 if (t == tldx) // skip d_y
268 continue;
269 addIterOrdering(f, t);
270 }
271 }
272 }
273}
274

source code of mlir/lib/Dialect/SparseTensor/Transforms/Utils/IterationGraphSorter.cpp