1//===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
10#include "mlir/Dialect/Affine/IR/AffineOps.h"
11#include "mlir/Dialect/Affine/Utils.h"
12#include "mlir/Dialect/Arith/Utils/Utils.h"
13#include "mlir/Dialect/Linalg/IR/Linalg.h"
14#include "mlir/Dialect/Linalg/Utils/Utils.h"
15#include "mlir/Dialect/SCF/IR/SCF.h"
16#include "mlir/Dialect/Tensor/IR/Tensor.h"
17#include "mlir/Dialect/Tensor/Utils/Utils.h"
18#include "mlir/Dialect/Utils/IndexingUtils.h"
19#include "mlir/Interfaces/InferTypeOpInterface.h"
20#include "mlir/Interfaces/TilingInterface.h"
21#include "mlir/Interfaces/ValueBoundsOpInterface.h"
22
23using namespace mlir;
24using namespace mlir::tensor;
25
26namespace {
27
28struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
29
30 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
31 auto padOp = cast<PadOp>(op);
32 SmallVector<utils::IteratorType> iteratorTypes(
33 padOp.getResultType().getRank(), utils::IteratorType::parallel);
34 return iteratorTypes;
35 }
36
37 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
38 ReifiedRankedShapedTypeDims reifiedShapes;
39 (void)reifyResultShapes(b, op, reifiedShapes);
40 OpFoldResult zero = b.getIndexAttr(0);
41 OpFoldResult one = b.getIndexAttr(1);
42 // Initialize all the ranges to {zero, one, one}. All the `ub`s are
43 // overwritten.
44 SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
45 for (const auto &ub : enumerate(reifiedShapes[0]))
46 loopRanges[ub.index()].size = ub.value();
47 return loopRanges;
48 }
49
50 FailureOr<TilingResult>
51 getTiledImplementation(Operation *op, OpBuilder &b,
52 ArrayRef<OpFoldResult> offsets,
53 ArrayRef<OpFoldResult> sizes) const {
54 FailureOr<TilingResult> result =
55 tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);
56 if (failed(result))
57 return failure();
58 return result.value();
59 }
60
61 LogicalResult
62 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
63 ArrayRef<OpFoldResult> offsets,
64 ArrayRef<OpFoldResult> sizes,
65 SmallVector<OpFoldResult> &resultOffsets,
66 SmallVector<OpFoldResult> &resultSizes) const {
67 resultOffsets.assign(offsets.begin(), offsets.end());
68 resultSizes.assign(sizes.begin(), sizes.end());
69 return success();
70 }
71
72 LogicalResult getIterationDomainTileFromResultTile(
73 Operation *op, OpBuilder &b, unsigned resultNumber,
74 ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
75 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
76 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
77 iterDomainOffsets.assign(offsets.begin(), offsets.end());
78 iterDomainSizes.assign(sizes.begin(), sizes.end());
79 return success();
80 }
81
82 FailureOr<TilingResult>
83 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
84 ArrayRef<OpFoldResult> offsets,
85 ArrayRef<OpFoldResult> sizes) const {
86 return getTiledImplementation(op, b, offsets, sizes);
87 }
88};
89
90} // namespace
91
92FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
93 tensor::PadOp padOp,
94 ArrayRef<OpFoldResult> offsets,
95 ArrayRef<OpFoldResult> sizes,
96 bool generateZeroSliceGuard) {
97 // Only constant padding value supported.
98 Value padValue = padOp.getConstantPaddingValue();
99 if (!padValue)
100 return failure();
101
102 // Helper variables and functions for various arithmetic operations. These
103 // are used extensively for computing new offset/length and padding values.
104 Location loc = padOp->getLoc();
105 AffineExpr dim0, dim1;
106 bindDims(ctx: b.getContext(), exprs&: dim0, exprs&: dim1);
107 // Subtract two integers.
108 auto subMap = AffineMap::get(dimCount: 2, symbolCount: 0, result: {dim0 - dim1});
109 auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
110 return affine::makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});
111 };
112 // Take the minimum of two integers.
113 auto idMap = AffineMap::getMultiDimIdentityMap(numDims: 2, context: b.getContext());
114 auto min = [&](OpFoldResult v1, OpFoldResult v2) {
115 return affine::makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});
116 };
117 // Take the maximum of two integers.
118 auto max = [&](OpFoldResult v1, OpFoldResult v2) {
119 return affine::makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});
120 };
121 // Zero index-typed integer.
122 OpFoldResult zero = b.getIndexAttr(0);
123
124 // Compute new offsets, lengths, low padding, high padding.
125 SmallVector<OpFoldResult> newOffsets, newLengths;
126 SmallVector<OpFoldResult> newLows, newHighs;
127 // Set to true if the original data source is not read at all.
128 bool hasZeroLen = false;
129 // Same as hasZeroLen, but for dynamic dimension sizes. This condition
130 // is true if the original data source turns out to be unused at runtime.
131 Value dynHasZeroLenCond;
132
133 int64_t rank = padOp.getSourceType().getRank();
134 // Only unit stride supported.
135 SmallVector<OpFoldResult> newStrides(rank, b.getIndexAttr(1));
136 for (unsigned dim = 0; dim < rank; ++dim) {
137 auto low = padOp.getMixedLowPad()[dim];
138 bool hasLowPad = !isZeroInteger(low);
139 auto high = padOp.getMixedHighPad()[dim];
140 bool hasHighPad = !isZeroInteger(high);
141 auto offset = offsets[dim];
142 auto length = sizes[dim];
143 // If the dim has no padding, we dont need to calculate new values for that
144 // dim as the exisiting ones are correct even after the pattern.
145 if (!hasLowPad && !hasHighPad) {
146 newOffsets.push_back(Elt: offset);
147 newLengths.push_back(Elt: length);
148 newLows.push_back(Elt: low);
149 newHighs.push_back(Elt: high);
150 continue;
151 }
152
153 auto srcSize = tensor::getMixedSize(builder&: b, loc, value: padOp.getSource(), dim);
154
155 // The new amount of low padding is `low - offset`. Except for the case
156 // where none of the low padding is read. In that case, the new amount of
157 // low padding is zero.
158 //
159 // Optimization: If low = 0, then newLow = 0.
160 OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
161 newLows.push_back(Elt: newLow);
162
163 // Start reading the data from position `offset - low`. Since the original
164 // read may have started in the low padding zone, this value could be
165 // negative. Therefore, start reading from:
166 //
167 // max(offset - low, 0)
168 //
169 // The original read could also have started in the high padding zone.
170 // In that case, set the offset to the end of source tensor. The new
171 // ExtractSliceOp length will be zero in that case. (Effectively reading
172 // no data from the source.)
173 //
174 // Optimization: If low = 0, then the formula can be simplified.
175 OpFoldResult newOffset = hasLowPad
176 ? min(max(sub(offset, low), zero), srcSize)
177 : min(offset, srcSize);
178 newOffsets.push_back(Elt: newOffset);
179
180 // The original ExtractSliceOp was reading until position `offset +
181 // length`. Therefore, the corresponding position within the source tensor
182 // is:
183 //
184 // offset + length - low
185 //
186 // In case the original ExtractSliceOp stopped reading within the low
187 // padding zone, this value can be negative. In that case, the end
188 // position of the read should be zero. (Similar to newOffset.)
189 //
190 // The original read could also have stopped in the high padding zone.
191 // In that case, set the end positition of the read should be the end of
192 // the source tensor. (Similar to newOffset.)
193 // srcSize - newOffset represents how much length we have available
194 // and length - newLow represents how much length we want at most.
195 // Note that there are many ways to order this indexing math to compute
196 // newLength, but we want to make sure that the final affine.min ops in the
197 // sequence are bounding the index to as small a value as possible. If
198 // ValueBoundsOpInterface is used, this calculation will get upper bounds
199 // from the affine.min ops, so we want to use the smallest known value to
200 // set the bound at the end of the computation sequence. In this case, the
201 // index will be upper bounded by length - newLow.
202 OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));
203 // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming
204 // length >= 0.
205 if (hasLowPad)
206 newLength = max(newLength, zero);
207 newLengths.push_back(Elt: newLength);
208
209 // Check if newLength is zero. In that case, no SubTensorOp should be
210 // executed.
211 if (isZeroInteger(v: newLength)) {
212 hasZeroLen = true;
213 } else if (!hasZeroLen) {
214 Value check = b.create<arith::CmpIOp>(
215 loc, arith::CmpIPredicate::eq,
216 getValueOrCreateConstantIndexOp(b, loc, newLength),
217 getValueOrCreateConstantIndexOp(b, loc, zero));
218 dynHasZeroLenCond =
219 dynHasZeroLenCond
220 ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
221 : check;
222 }
223
224 // The amount of high padding is simply the number of elements remaining,
225 // so that the result has the same length as the original ExtractSliceOp.
226 // As an optimization, if the original high padding is zero, then the new
227 // high padding must also be zero.
228 OpFoldResult newHigh =
229 hasHighPad ? sub(sub(length, newLength), newLow) : zero;
230 newHighs.push_back(Elt: newHigh);
231 }
232
233 // The shape of the result can be obtained from the sizes passed in.
234 SmallVector<Value> dynDims;
235 SmallVector<int64_t> shape;
236 dispatchIndexOpFoldResults(ofrs: sizes, dynamicVec&: dynDims, staticVec&: shape);
237 RankedTensorType resultType =
238 RankedTensorType::get(shape, padOp.getResultType().getElementType());
239
240 // Insert cast to ensure that types match. (May be folded away.)
241 auto castResult = [&](Value val) -> Value {
242 if (resultType == val.getType())
243 return val;
244 return b.create<tensor::CastOp>(loc, resultType, val);
245 };
246
247 // In cases where the original data source is unused: Emit a GenerateOp and
248 // do not generate a SliceOp. (The result shape of the SliceOp would
249 // have a dimension of size 0, the semantics of which is unclear.)
250 auto createGenerateOp = [&]() {
251 // Create GenerateOp.
252 auto generateOp = b.create<tensor::GenerateOp>(
253 loc, resultType, dynDims,
254 [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
255 builder.create<tensor::YieldOp>(gLoc, padValue);
256 });
257 return generateOp;
258 };
259
260 // Emit a SliceOp and a PadOp. Should not be used in cases where
261 // the result shape of the new SliceOp has a zero dimension.
262 auto createPadOfExtractSlice = [&]() {
263 // Create pad(extract_slice(x)).
264 auto newSliceOp = b.create<tensor::ExtractSliceOp>(
265 loc, padOp.getSource(), newOffsets, newLengths, newStrides);
266 auto newPadOp = b.create<PadOp>(
267 loc, Type(), newSliceOp, newLows, newHighs,
268 /*nofold=*/padOp.getNofold(),
269 getPrunedAttributeList(padOp, PadOp::getAttributeNames()));
270
271 // Copy region to new PadOp.
272 IRMapping bvm;
273 padOp.getRegion().cloneInto(&newPadOp.getRegion(), bvm);
274
275 // Cast result and return.
276 return std::make_tuple(newPadOp, newSliceOp);
277 };
278
279 // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that
280 // the original data source x is not used.
281 if (hasZeroLen) {
282 Operation *generateOp = createGenerateOp();
283 return TilingResult{.tiledOps: {generateOp},
284 .tiledValues: {castResult(generateOp->getResult(idx: 0))},
285 /*generatedSlices=*/{}};
286 }
287
288 // If there are dynamic dimensions: Generate an scf.if check to avoid
289 // creating SliceOps with result dimensions of size 0 at runtime.
290 if (generateZeroSliceGuard && dynHasZeroLenCond) {
291 Operation *thenOp;
292 Operation *elseOp;
293 Operation *sliceOp;
294 auto result = b.create<scf::IfOp>(
295 loc, dynHasZeroLenCond,
296 /*thenBuilder=*/
297 [&](OpBuilder &b, Location loc) {
298 thenOp = createGenerateOp();
299 b.create<scf::YieldOp>(loc, castResult(thenOp->getResult(0)));
300 },
301 /*elseBuilder=*/
302 [&](OpBuilder &b, Location loc) {
303 std::tie(elseOp, sliceOp) = createPadOfExtractSlice();
304 b.create<scf::YieldOp>(loc, castResult(elseOp->getResult(0)));
305 });
306 return TilingResult{
307 .tiledOps: {elseOp}, .tiledValues: SmallVector<Value>(result->getResults()), .generatedSlices: {sliceOp}};
308 }
309
310 auto [newPadOp, sliceOp] = createPadOfExtractSlice();
311 return TilingResult{
312 {newPadOp}, {castResult(newPadOp->getResult(0))}, {sliceOp}};
313}
314
315void mlir::tensor::registerTilingInterfaceExternalModels(
316 DialectRegistry &registry) {
317 registry.addExtension(extensionFn: +[](MLIRContext *ctx, TensorDialect *dialect) {
318 tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
319 });
320}
321

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp