1 | //===- ConcatOpPatterns.cpp - Patterns related to tensor.concat lowering --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "mlir/Dialect/Affine/IR/AffineOps.h" |
10 | #include "mlir/Dialect/Arith/IR/Arith.h" |
11 | #include "mlir/Dialect/Arith/Utils/Utils.h" |
12 | #include "mlir/Dialect/Tensor/IR/Tensor.h" |
13 | #include "mlir/Dialect/Tensor/Transforms/Transforms.h" |
14 | #include "mlir/IR/PatternMatch.h" |
15 | |
16 | using namespace mlir; |
17 | using namespace mlir::tensor; |
18 | |
19 | namespace { |
20 | |
21 | /// Decompose `tensor.concat` into `tensor.empty` and a chain of slice inserts. |
22 | /// |
23 | /// %concat = tensor.concat dim(1) %0, %1 : |
24 | /// (tensor<2x3xf32>, tensor<2x4xf32>) -> tensor<2x7xf32> |
25 | /// |
26 | /// Becomes |
27 | /// |
28 | /// %empty = tensor.empty() : tensor<2x7xf32> |
29 | /// %insert0 = tensor.insert_slice %0 into %empty[0, 0][2, 3][1, 1] |
30 | /// %concat = tensor.insert_slice %1 into %insert0[0, 3][2, 4][1, 1] |
31 | struct DecomposeTensorConcatOp : public OpRewritePattern<ConcatOp> { |
32 | using OpRewritePattern<ConcatOp>::OpRewritePattern; |
33 | |
34 | LogicalResult matchAndRewrite(ConcatOp concatOp, |
35 | PatternRewriter &rewriter) const override { |
36 | Location loc = concatOp.getLoc(); |
37 | FailureOr<Value> dest = |
38 | tensor::getOrCreateDestination(b&: rewriter, loc, opResult: concatOp->getResult(0)); |
39 | if (failed(result: dest)) |
40 | return failure(); |
41 | |
42 | auto empty = dest->getDefiningOp<tensor::EmptyOp>(); |
43 | if (!empty) |
44 | return failure(); |
45 | |
46 | int64_t dim = concatOp.getDim(); |
47 | Value dimValue = |
48 | rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(dim)); |
49 | |
50 | int64_t rank = concatOp.getResultType().getRank(); |
51 | SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1)); |
52 | SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0)); |
53 | |
54 | // Compute the partial sums for the slice offsets. |
55 | AffineExpr sum = rewriter.getAffineDimExpr(position: 0); |
56 | SmallVector<AffineExpr> partialSums = {sum}; |
57 | SmallVector<OpFoldResult> offsetStrides = {rewriter.getIndexAttr(0)}; |
58 | for (auto [idx, input] : |
59 | llvm::enumerate(concatOp.getInputs().drop_back())) { |
60 | sum = sum + rewriter.getAffineDimExpr(idx + 1); |
61 | partialSums.push_back(sum); |
62 | offsetStrides.push_back( |
63 | rewriter.createOrFold<tensor::DimOp>(loc, input, dimValue)); |
64 | } |
65 | auto partialSumMap = AffineMap::get(concatOp.getInputs().size(), 0, |
66 | partialSums, rewriter.getContext()); |
67 | SmallVector<OpFoldResult> dimOffsets = |
68 | affine::makeComposedFoldedMultiResultAffineApply( |
69 | rewriter, loc, partialSumMap, offsetStrides); |
70 | |
71 | // Construct the chain of insert_slice ops into the destination. |
72 | Value result = *dest; |
73 | for (auto [input, offset] : |
74 | llvm::zip_equal(concatOp.getInputs(), dimOffsets)) { |
75 | SmallVector<OpFoldResult> sizes = |
76 | tensor::getMixedSizes(rewriter, loc, input); |
77 | offsets[dim] = offset; |
78 | result = rewriter.createOrFold<tensor::InsertSliceOp>( |
79 | loc, input, result, offsets, sizes, strides); |
80 | } |
81 | |
82 | rewriter.replaceOpWithNewOp<tensor::CastOp>( |
83 | concatOp, concatOp.getResultType(), result); |
84 | return success(); |
85 | } |
86 | }; |
87 | |
88 | } // namespace |
89 | |
90 | void mlir::tensor::populateDecomposeTensorConcatPatterns( |
91 | RewritePatternSet &patterns) { |
92 | patterns.add<DecomposeTensorConcatOp>(arg: patterns.getContext()); |
93 | } |
94 | |