1 | //===- LowerVectorInterleave.cpp - Lower 'vector.interleave' operation ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements target-independent rewrites and utilities to lower the |
10 | // 'vector.interleave' operation. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
15 | #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" |
16 | #include "mlir/Dialect/Vector/Utils/VectorUtils.h" |
17 | #include "mlir/IR/BuiltinTypes.h" |
18 | #include "mlir/IR/PatternMatch.h" |
19 | |
20 | #define DEBUG_TYPE "vector-interleave-lowering" |
21 | |
22 | using namespace mlir; |
23 | using namespace mlir::vector; |
24 | |
25 | namespace { |
26 | |
27 | /// A one-shot unrolling of vector.interleave to the `targetRank`. |
28 | /// |
29 | /// Example: |
30 | /// |
31 | /// ```mlir |
32 | /// vector.interleave %a, %b : vector<1x2x3x4xi64> -> vector<1x2x3x8xi64> |
33 | /// ``` |
34 | /// Would be unrolled to: |
35 | /// ```mlir |
36 | /// %result = arith.constant dense<0> : vector<1x2x3x8xi64> |
37 | /// %0 = vector.extract %a[0, 0, 0] ─┐ |
38 | /// : vector<4xi64> from vector<1x2x3x4xi64> | |
39 | /// %1 = vector.extract %b[0, 0, 0] | |
40 | /// : vector<4xi64> from vector<1x2x3x4xi64> | - Repeated 6x for |
41 | /// %2 = vector.interleave %0, %1 : | all leading positions |
42 | /// : vector<4xi64> -> vector<8xi64> | |
43 | /// %3 = vector.insert %2, %result [0, 0, 0] | |
44 | /// : vector<8xi64> into vector<1x2x3x8xi64> ┘ |
45 | /// ``` |
46 | /// |
47 | /// Note: If any leading dimension before the `targetRank` is scalable the |
48 | /// unrolling will stop before the scalable dimension. |
49 | class UnrollInterleaveOp final : public OpRewritePattern<vector::InterleaveOp> { |
50 | public: |
51 | UnrollInterleaveOp(int64_t targetRank, MLIRContext *context, |
52 | PatternBenefit benefit = 1) |
53 | : OpRewritePattern(context, benefit), targetRank(targetRank){}; |
54 | |
55 | LogicalResult matchAndRewrite(vector::InterleaveOp op, |
56 | PatternRewriter &rewriter) const override { |
57 | VectorType resultType = op.getResultVectorType(); |
58 | auto unrollIterator = vector::createUnrollIterator(vType: resultType, targetRank); |
59 | if (!unrollIterator) |
60 | return failure(); |
61 | |
62 | auto loc = op.getLoc(); |
63 | Value result = rewriter.create<arith::ConstantOp>( |
64 | loc, resultType, rewriter.getZeroAttr(resultType)); |
65 | for (auto position : *unrollIterator) { |
66 | Value extractLhs = rewriter.create<ExtractOp>(loc, op.getLhs(), position); |
67 | Value extractRhs = rewriter.create<ExtractOp>(loc, op.getRhs(), position); |
68 | Value interleave = |
69 | rewriter.create<InterleaveOp>(loc, extractLhs, extractRhs); |
70 | result = rewriter.create<InsertOp>(loc, interleave, result, position); |
71 | } |
72 | |
73 | rewriter.replaceOp(op, result); |
74 | return success(); |
75 | } |
76 | |
77 | private: |
78 | int64_t targetRank = 1; |
79 | }; |
80 | |
81 | /// A one-shot unrolling of vector.deinterleave to the `targetRank`. |
82 | /// |
83 | /// Example: |
84 | /// |
85 | /// ```mlir |
86 | /// %0, %1 = vector.deinterleave %a : vector<1x2x3x8xi64> -> vector<1x2x3x4xi64> |
87 | /// ``` |
88 | /// Would be unrolled to: |
89 | /// ```mlir |
90 | /// %result = arith.constant dense<0> : vector<1x2x3x4xi64> |
91 | /// %0 = vector.extract %a[0, 0, 0] ─┐ |
92 | /// : vector<8xi64> from vector<1x2x3x8xi64> | |
93 | /// %1, %2 = vector.deinterleave %0 | |
94 | /// : vector<8xi64> -> vector<4xi64> | -- Initial deinterleave |
95 | /// %3 = vector.insert %1, %result [0, 0, 0] | operation unrolled. |
96 | /// : vector<4xi64> into vector<1x2x3x4xi64> | |
97 | /// %4 = vector.insert %2, %result [0, 0, 0] | |
98 | /// : vector<4xi64> into vector<1x2x3x4xi64> ┘ |
99 | /// %5 = vector.extract %a[0, 0, 1] ─┐ |
100 | /// : vector<8xi64> from vector<1x2x3x8xi64> | |
101 | /// %6, %7 = vector.deinterleave %5 | |
102 | /// : vector<8xi64> -> vector<4xi64> | -- Recursive pattern for |
103 | /// %8 = vector.insert %6, %3 [0, 0, 1] | subsequent unrolled |
104 | /// : vector<4xi64> into vector<1x2x3x4xi64> | deinterleave |
105 | /// %9 = vector.insert %7, %4 [0, 0, 1] | operations. Repeated |
106 | /// : vector<4xi64> into vector<1x2x3x4xi64> ┘ 5x in this case. |
107 | /// ``` |
108 | /// |
109 | /// Note: If any leading dimension before the `targetRank` is scalable the |
110 | /// unrolling will stop before the scalable dimension. |
111 | class UnrollDeinterleaveOp final |
112 | : public OpRewritePattern<vector::DeinterleaveOp> { |
113 | public: |
114 | UnrollDeinterleaveOp(int64_t targetRank, MLIRContext *context, |
115 | PatternBenefit benefit = 1) |
116 | : OpRewritePattern(context, benefit), targetRank(targetRank) {}; |
117 | |
118 | LogicalResult matchAndRewrite(vector::DeinterleaveOp op, |
119 | PatternRewriter &rewriter) const override { |
120 | VectorType resultType = op.getResultVectorType(); |
121 | auto unrollIterator = vector::createUnrollIterator(vType: resultType, targetRank); |
122 | if (!unrollIterator) |
123 | return failure(); |
124 | |
125 | auto loc = op.getLoc(); |
126 | Value emptyResult = rewriter.create<arith::ConstantOp>( |
127 | loc, resultType, rewriter.getZeroAttr(resultType)); |
128 | Value evenResult = emptyResult; |
129 | Value oddResult = emptyResult; |
130 | |
131 | for (auto position : *unrollIterator) { |
132 | auto extractSrc = |
133 | rewriter.create<vector::ExtractOp>(loc, op.getSource(), position); |
134 | auto deinterleave = |
135 | rewriter.create<vector::DeinterleaveOp>(loc, extractSrc); |
136 | evenResult = rewriter.create<vector::InsertOp>( |
137 | loc, deinterleave.getRes1(), evenResult, position); |
138 | oddResult = rewriter.create<vector::InsertOp>(loc, deinterleave.getRes2(), |
139 | oddResult, position); |
140 | } |
141 | rewriter.replaceOp(op, ValueRange{evenResult, oddResult}); |
142 | return success(); |
143 | } |
144 | |
145 | private: |
146 | int64_t targetRank = 1; |
147 | }; |
148 | /// Rewrite vector.interleave op into an equivalent vector.shuffle op, when |
149 | /// applicable: `sourceType` must be 1D and non-scalable. |
150 | /// |
151 | /// Example: |
152 | /// |
153 | /// ```mlir |
154 | /// vector.interleave %a, %b : vector<7xi16> -> vector<14xi16> |
155 | /// ``` |
156 | /// |
157 | /// Is rewritten into: |
158 | /// |
159 | /// ```mlir |
160 | /// vector.shuffle %arg0, %arg1 [0, 7, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13] |
161 | /// : vector<7xi16>, vector<7xi16> |
162 | /// ``` |
163 | struct InterleaveToShuffle final : OpRewritePattern<vector::InterleaveOp> { |
164 | using OpRewritePattern::OpRewritePattern; |
165 | |
166 | LogicalResult matchAndRewrite(vector::InterleaveOp op, |
167 | PatternRewriter &rewriter) const override { |
168 | VectorType sourceType = op.getSourceVectorType(); |
169 | if (sourceType.getRank() != 1 || sourceType.isScalable()) { |
170 | return failure(); |
171 | } |
172 | int64_t n = sourceType.getNumElements(); |
173 | auto seq = llvm::seq<int64_t>(Size: 2 * n); |
174 | auto zip = llvm::to_vector(llvm::map_range( |
175 | seq, [n](int64_t i) { return (i % 2 ? n : 0) + i / 2; })); |
176 | rewriter.replaceOpWithNewOp<ShuffleOp>(op, op.getLhs(), op.getRhs(), zip); |
177 | return success(); |
178 | } |
179 | }; |
180 | |
181 | } // namespace |
182 | |
183 | void mlir::vector::populateVectorInterleaveLoweringPatterns( |
184 | RewritePatternSet &patterns, int64_t targetRank, PatternBenefit benefit) { |
185 | patterns.add<UnrollInterleaveOp, UnrollDeinterleaveOp>( |
186 | arg&: targetRank, args: patterns.getContext(), args&: benefit); |
187 | } |
188 | |
189 | void mlir::vector::populateVectorInterleaveToShufflePatterns( |
190 | RewritePatternSet &patterns, PatternBenefit benefit) { |
191 | patterns.add<InterleaveToShuffle>(arg: patterns.getContext(), args&: benefit); |
192 | } |
193 | |