1 | //===- IterationGraphSorter.cpp -------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "IterationGraphSorter.h" |
10 | |
11 | #include "mlir/Dialect/Linalg/IR/Linalg.h" |
12 | #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" |
13 | #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" |
14 | #include "mlir/Dialect/Utils/StructuredOpsUtils.h" |
15 | #include "mlir/IR/AffineExprVisitor.h" |
16 | #include "mlir/IR/BuiltinTypes.h" |
17 | |
18 | using namespace mlir; |
19 | using namespace mlir::sparse_tensor; |
20 | |
21 | namespace { |
22 | |
23 | /// A helper class that visits an affine expression and tries to find |
24 | /// an AffineDimExpr to which the corresponding iterator from a GenericOp |
25 | /// matches the desired iterator type. If there is no matched iterator |
26 | /// type, the method returns the first DimExpr in the expression. |
27 | class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> { |
28 | public: |
29 | explicit AffineDimFinder(ArrayRef<utils::IteratorType> itTypes) |
30 | : iterTypes(itTypes) {} |
31 | |
32 | /// Overrides the visit method from AffineExprVisitor. |
33 | void visitDimExpr(AffineDimExpr expr) { |
34 | if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()]) |
35 | pickedDim = expr; |
36 | } |
37 | |
38 | /// Sets the desired iterator type that we want to pick. |
39 | void setPickedIterType(utils::IteratorType iterType) { |
40 | pickIterType = iterType; |
41 | } |
42 | |
43 | /// Gets the desired AffineDimExpr. |
44 | AffineDimExpr getDimExpr() const { |
45 | return llvm::cast<AffineDimExpr>(Val: pickedDim); |
46 | } |
47 | |
48 | /// Walks the graph in post order to find dim expr. |
49 | void walkPostOrder(AffineExpr expr) { |
50 | pickedDim = nullptr; |
51 | AffineExprVisitor<AffineDimFinder>::walkPostOrder(expr); |
52 | } |
53 | |
54 | private: |
55 | /// The picked AffineDimExpr after visit. |
56 | AffineExpr pickedDim; |
57 | /// The iterator type that we want. |
58 | utils::IteratorType pickIterType; |
59 | /// The mapping between levels and iterator types. |
60 | ArrayRef<utils::IteratorType> iterTypes; |
61 | }; |
62 | |
63 | /// Flattens an affine expression into a list of AffineDimExprs. |
64 | struct AffineDimCollector : public AffineExprVisitor<AffineDimCollector> { |
65 | // Overrides method from AffineExprVisitor. |
66 | void visitDimExpr(AffineDimExpr expr) { dims.push_back(Elt: expr); } |
67 | SmallVector<AffineDimExpr> dims; |
68 | }; |
69 | |
70 | } // namespace |
71 | |
72 | inline static bool includesAny(SortMask mask1, SortMask mask2) { |
73 | return static_cast<unsigned>(mask1) & static_cast<unsigned>(mask2); |
74 | } |
75 | |
76 | inline static bool includesDenseInput(SortMask mask) { |
77 | return includesAny(mask1: mask, mask2: SortMask::kIncludeDenseInput); |
78 | } |
79 | |
80 | inline static bool includesDenseOutput(SortMask mask) { |
81 | return includesAny(mask1: mask, mask2: SortMask::kIncludeDenseOutput); |
82 | } |
83 | |
84 | AffineMap IterationGraphSorter::topoSort() { |
85 | // The sorted result will put the first Reduction iterator to the |
86 | // latest possible position. |
87 | std::vector<unsigned> redIt; // reduce iterator with 0 degree |
88 | std::vector<unsigned> parIt; // parallel iterator with 0 degree |
89 | const unsigned numLoops = getNumLoops(); |
90 | for (unsigned i = 0; i < numLoops; i++) { |
91 | if (inDegree[i] == 0) { |
92 | if (iterTypes[i] == utils::IteratorType::reduction) |
93 | redIt.push_back(x: i); |
94 | else |
95 | parIt.push_back(x: i); |
96 | } |
97 | } |
98 | |
99 | SmallVector<unsigned> loopOrder; |
100 | while (!redIt.empty() || !parIt.empty()) { |
101 | // We always prefer a parallel loop over a reduction loop because putting |
102 | // a reduction loop early might make the loop sequence inadmissible. |
103 | auto &it = !parIt.empty() ? parIt : redIt; |
104 | auto src = it.back(); |
105 | loopOrder.push_back(Elt: src); |
106 | it.pop_back(); |
107 | // Update in-degree, and push 0-degree node into worklist. |
108 | for (unsigned dst = 0; dst < numLoops; dst++) { |
109 | if (itGraph[src][dst] && --inDegree[dst] == 0) { |
110 | if (iterTypes[dst] == utils::IteratorType::reduction) |
111 | redIt.push_back(x: dst); |
112 | else |
113 | parIt.push_back(x: dst); |
114 | } |
115 | } |
116 | } |
117 | |
118 | // Return the topological sort on success. |
119 | if (loopOrder.size() == numLoops) |
120 | return AffineMap::getPermutationMap(permutation: loopOrder, context: out.getContext()); |
121 | |
122 | // Cycle detected. |
123 | return AffineMap(); |
124 | } |
125 | |
126 | IterationGraphSorter |
127 | IterationGraphSorter::fromGenericOp(linalg::GenericOp genericOp) { |
128 | // Must be a demapped sparse kernel. |
129 | assert(!hasAnyNonIdentityOperandsOrResults(genericOp) && |
130 | hasAnySparseOperandOrResult(genericOp) && |
131 | genericOp.getNumDpsInits() == 1); |
132 | |
133 | SmallVector<AffineMap> loopMap = genericOp.getIndexingMapsArray(); |
134 | SmallVector<Value> ins = genericOp.getDpsInputs(); |
135 | |
136 | AffineMap outMap = loopMap.back(); |
137 | loopMap.pop_back(); |
138 | |
139 | Value out = genericOp.getDpsInitOperand(0)->get(); |
140 | SmallVector<utils::IteratorType> iterTypes = |
141 | genericOp.getIteratorTypesArray(); |
142 | |
143 | return IterationGraphSorter(std::move(ins), std::move(loopMap), out, outMap, |
144 | std::move(iterTypes)); |
145 | } |
146 | |
147 | IterationGraphSorter::IterationGraphSorter( |
148 | SmallVector<Value> &&ins, SmallVector<AffineMap> &&loop2InsLvl, Value out, |
149 | AffineMap loop2OutLvl, SmallVector<utils::IteratorType> &&iterTypes) |
150 | : ins(std::move(ins)), loop2InsLvl(std::move(loop2InsLvl)), out(out), |
151 | loop2OutLvl(loop2OutLvl), iterTypes(std::move(iterTypes)) { |
152 | // One map per tensor. |
153 | assert(loop2InsLvl.size() == ins.size()); |
154 | // All the affine maps have the same number of dimensions (loops). |
155 | assert(llvm::all_equal(llvm::map_range( |
156 | loop2InsLvl, [](AffineMap m) { return m.getNumDims(); }))); |
157 | // The number of results of the map should match the rank of the tensor. |
158 | assert(llvm::all_of(llvm::zip(loop2InsLvl, ins), [](auto mvPair) { |
159 | auto [m, v] = mvPair; |
160 | return m.getNumResults() == cast<ShapedType>(v.getType()).getRank(); |
161 | })); |
162 | |
163 | itGraph.resize(new_size: getNumLoops(), x: std::vector<bool>(getNumLoops(), false)); |
164 | inDegree.resize(new_size: getNumLoops()); |
165 | } |
166 | |
167 | AffineMap IterationGraphSorter::sort(SortMask mask, Value ignored) { |
168 | // Reset the adjacency matrix that represents the iteration graph. |
169 | for (auto &row : itGraph) |
170 | std::fill(first: row.begin(), last: row.end(), value: false); |
171 | |
172 | // Reset in-degree. |
173 | std::fill(first: inDegree.begin(), last: inDegree.end(), value: 0); |
174 | |
175 | // Add the constraints for the loop to level map. |
176 | for (auto [in, map] : llvm::zip(t&: ins, u&: loop2InsLvl)) { |
177 | // Get map and encoding. |
178 | const auto enc = getSparseTensorEncoding(in.getType()); |
179 | // Skip dense inputs when not requested. |
180 | if ((!enc && !includesDenseInput(mask)) || in == ignored) |
181 | continue; |
182 | addConstraints(t: in, loop2LvlMap: map); |
183 | } |
184 | |
185 | // Add the constraints for the output map. |
186 | const auto enc = getSparseTensorEncoding(out.getType()); |
187 | if ((enc || includesDenseOutput(mask)) && out != ignored) |
188 | addConstraints(t: out, loop2LvlMap: loop2OutLvl); |
189 | |
190 | // Return the topological sort (empty for cyclic). |
191 | return topoSort(); |
192 | } |
193 | |
194 | void IterationGraphSorter::addConstraints(Value t, AffineMap loop2LvlMap) { |
195 | auto addIterOrdering = [this](unsigned f, unsigned t) { |
196 | if (!itGraph[f][t] && f != t) { |
197 | itGraph[f][t] = true; |
198 | inDegree[t]++; |
199 | } |
200 | }; |
201 | |
202 | // Set up a reduction finder. |
203 | AffineDimFinder finder(iterTypes); |
204 | finder.setPickedIterType(utils::IteratorType::reduction); |
205 | |
206 | // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6], |
207 | // we require there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6}, |
208 | // and d_x > d_y && {d0, d1, d3} - d_x > {d4, d5, d6} - d_y |
209 | const Level lvlRank = loop2LvlMap.getNumResults(); |
210 | for (Level lvl = 1; lvl < lvlRank; lvl++) { |
211 | const AffineExpr fa = loop2LvlMap.getResult(idx: lvl - 1); |
212 | const AffineExpr ta = loop2LvlMap.getResult(idx: lvl); |
213 | |
214 | if (llvm::isa<AffineDimExpr>(Val: fa) || llvm::isa<AffineDimExpr>(Val: ta)) { |
215 | // Special case when at least one loop2LvlExp is a simple AffineDimExpr |
216 | // (say, d0) and we require d0 > {d1, d2, ...} or {d1, d2, ...} > d0 |
217 | AffineDimCollector fCollector; |
218 | fCollector.walkPostOrder(expr: fa); |
219 | AffineDimCollector tCollector; |
220 | tCollector.walkPostOrder(expr: ta); |
221 | |
222 | for (auto fd : fCollector.dims) { |
223 | for (auto td : tCollector.dims) { |
224 | const unsigned f = fd.getPosition(); |
225 | const unsigned t = td.getPosition(); |
226 | addIterOrdering(f, t); |
227 | } |
228 | } |
229 | continue; |
230 | } |
231 | |
232 | // When both loop2LvlExpr is compound, we pick an abitrary reduction loop |
233 | // from lhs and rhs and use them as d_x and d_y. |
234 | finder.walkPostOrder(expr: fa); |
235 | const AffineDimExpr fexp = finder.getDimExpr(); |
236 | const unsigned fldx = fexp.getPosition(); |
237 | |
238 | finder.walkPostOrder(expr: ta); |
239 | const AffineDimExpr texp = finder.getDimExpr(); |
240 | const unsigned tldx = texp.getPosition(); |
241 | |
242 | // d_x > d_y |
243 | addIterOrdering(fldx, tldx); |
244 | |
245 | AffineDimCollector fCollector; |
246 | fCollector.walkPostOrder(expr: fa); |
247 | AffineDimCollector tCollector; |
248 | tCollector.walkPostOrder(expr: ta); |
249 | |
250 | // Make sure dx and dy is the last. |
251 | for (auto fd : fCollector.dims) { |
252 | const unsigned f = fd.getPosition(); |
253 | addIterOrdering(f, fldx); |
254 | } |
255 | for (auto td : tCollector.dims) { |
256 | const unsigned t = td.getPosition(); |
257 | addIterOrdering(t, tldx); |
258 | } |
259 | // {d0, d1, d3} - d_x > {d4, d5, d6} - d_y |
260 | // This is to ensure that the affine expressions are reduced in sparse |
261 | // tensor level ordering. |
262 | for (auto fd : fCollector.dims) { |
263 | const unsigned f = fd.getPosition(); |
264 | if (f == fldx) // skip d_x |
265 | continue; |
266 | for (auto td : tCollector.dims) { |
267 | const unsigned t = td.getPosition(); |
268 | if (t == tldx) // skip d_y |
269 | continue; |
270 | addIterOrdering(f, t); |
271 | } |
272 | } |
273 | } |
274 | } |
275 | |