1 | //===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements target-independent rewrites and utilities to lower the |
10 | // 'vector.scan' operation. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "mlir/Dialect/Affine/IR/AffineOps.h" |
15 | #include "mlir/Dialect/Arith/IR/Arith.h" |
16 | #include "mlir/Dialect/Arith/Utils/Utils.h" |
17 | #include "mlir/Dialect/Linalg/IR/Linalg.h" |
18 | #include "mlir/Dialect/MemRef/IR/MemRef.h" |
19 | #include "mlir/Dialect/SCF/IR/SCF.h" |
20 | #include "mlir/Dialect/Tensor/IR/Tensor.h" |
21 | #include "mlir/Dialect/Utils/IndexingUtils.h" |
22 | #include "mlir/Dialect/Utils/StructuredOpsUtils.h" |
23 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
24 | #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" |
25 | #include "mlir/Dialect/Vector/Utils/VectorUtils.h" |
26 | #include "mlir/IR/BuiltinAttributeInterfaces.h" |
27 | #include "mlir/IR/BuiltinTypes.h" |
28 | #include "mlir/IR/ImplicitLocOpBuilder.h" |
29 | #include "mlir/IR/Location.h" |
30 | #include "mlir/IR/Matchers.h" |
31 | #include "mlir/IR/PatternMatch.h" |
32 | #include "mlir/IR/TypeUtilities.h" |
33 | #include "mlir/Interfaces/VectorInterfaces.h" |
34 | |
35 | #define DEBUG_TYPE "vector-broadcast-lowering" |
36 | |
37 | using namespace mlir; |
38 | using namespace mlir::vector; |
39 | |
40 | /// This function checks to see if the vector combining kind |
41 | /// is consistent with the integer or float element type. |
42 | static bool isValidKind(bool isInt, vector::CombiningKind kind) { |
43 | using vector::CombiningKind; |
44 | enum class KindType { FLOAT, INT, INVALID }; |
45 | KindType type{KindType::INVALID}; |
46 | switch (kind) { |
47 | case CombiningKind::MINNUMF: |
48 | case CombiningKind::MINIMUMF: |
49 | case CombiningKind::MAXNUMF: |
50 | case CombiningKind::MAXIMUMF: |
51 | type = KindType::FLOAT; |
52 | break; |
53 | case CombiningKind::MINUI: |
54 | case CombiningKind::MINSI: |
55 | case CombiningKind::MAXUI: |
56 | case CombiningKind::MAXSI: |
57 | case CombiningKind::AND: |
58 | case CombiningKind::OR: |
59 | case CombiningKind::XOR: |
60 | type = KindType::INT; |
61 | break; |
62 | case CombiningKind::ADD: |
63 | case CombiningKind::MUL: |
64 | type = isInt ? KindType::INT : KindType::FLOAT; |
65 | break; |
66 | } |
67 | bool isValidIntKind = (type == KindType::INT) && isInt; |
68 | bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); |
69 | return (isValidIntKind || isValidFloatKind); |
70 | } |
71 | |
72 | namespace { |
73 | /// Convert vector.scan op into arith ops and vector.insert_strided_slice / |
74 | /// vector.extract_strided_slice. |
75 | /// |
76 | /// Example: |
77 | /// |
78 | /// ``` |
79 | /// %0:2 = vector.scan <add>, %arg0, %arg1 |
80 | /// {inclusive = true, reduction_dim = 1} : |
81 | /// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) |
82 | /// ``` |
83 | /// |
84 | /// is converted to: |
85 | /// |
86 | /// ``` |
87 | /// %cst = arith.constant dense<0> : vector<2x3xi32> |
88 | /// %0 = vector.extract_strided_slice %arg0 |
89 | /// {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} |
90 | /// : vector<2x3xi32> to vector<2x1xi32> |
91 | /// %1 = vector.insert_strided_slice %0, %cst |
92 | /// {offsets = [0, 0], strides = [1, 1]} |
93 | /// : vector<2x1xi32> into vector<2x3xi32> |
94 | /// %2 = vector.extract_strided_slice %arg0 |
95 | /// {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} |
96 | /// : vector<2x3xi32> to vector<2x1xi32> |
97 | /// %3 = arith.muli %0, %2 : vector<2x1xi32> |
98 | /// %4 = vector.insert_strided_slice %3, %1 |
99 | /// {offsets = [0, 1], strides = [1, 1]} |
100 | /// : vector<2x1xi32> into vector<2x3xi32> |
101 | /// %5 = vector.extract_strided_slice %arg0 |
102 | /// {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]} |
103 | /// : vector<2x3xi32> to vector<2x1xi32> |
104 | /// %6 = arith.muli %3, %5 : vector<2x1xi32> |
105 | /// %7 = vector.insert_strided_slice %6, %4 |
106 | /// {offsets = [0, 2], strides = [1, 1]} |
107 | /// : vector<2x1xi32> into vector<2x3xi32> |
108 | /// %8 = vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> |
109 | /// return %7, %8 : vector<2x3xi32>, vector<2xi32> |
110 | /// ``` |
111 | struct ScanToArithOps : public OpRewritePattern<vector::ScanOp> { |
112 | using OpRewritePattern::OpRewritePattern; |
113 | |
114 | LogicalResult matchAndRewrite(vector::ScanOp scanOp, |
115 | PatternRewriter &rewriter) const override { |
116 | auto loc = scanOp.getLoc(); |
117 | VectorType destType = scanOp.getDestType(); |
118 | ArrayRef<int64_t> destShape = destType.getShape(); |
119 | auto elType = destType.getElementType(); |
120 | bool isInt = elType.isIntOrIndex(); |
121 | if (!isValidKind(isInt, scanOp.getKind())) |
122 | return failure(); |
123 | |
124 | VectorType resType = VectorType::get(destShape, elType); |
125 | Value result = rewriter.create<arith::ConstantOp>( |
126 | loc, resType, rewriter.getZeroAttr(resType)); |
127 | int64_t reductionDim = scanOp.getReductionDim(); |
128 | bool inclusive = scanOp.getInclusive(); |
129 | int64_t destRank = destType.getRank(); |
130 | VectorType initialValueType = scanOp.getInitialValueType(); |
131 | int64_t initialValueRank = initialValueType.getRank(); |
132 | |
133 | SmallVector<int64_t> reductionShape(destShape); |
134 | reductionShape[reductionDim] = 1; |
135 | VectorType reductionType = VectorType::get(reductionShape, elType); |
136 | SmallVector<int64_t> offsets(destRank, 0); |
137 | SmallVector<int64_t> strides(destRank, 1); |
138 | SmallVector<int64_t> sizes(destShape); |
139 | sizes[reductionDim] = 1; |
140 | ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes); |
141 | ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides); |
142 | |
143 | Value lastOutput, lastInput; |
144 | for (int i = 0; i < destShape[reductionDim]; i++) { |
145 | offsets[reductionDim] = i; |
146 | ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets); |
147 | Value input = rewriter.create<vector::ExtractStridedSliceOp>( |
148 | loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes, |
149 | scanStrides); |
150 | Value output; |
151 | if (i == 0) { |
152 | if (inclusive) { |
153 | output = input; |
154 | } else { |
155 | if (initialValueRank == 0) { |
156 | // ShapeCastOp cannot handle 0-D vectors |
157 | output = rewriter.create<vector::BroadcastOp>( |
158 | loc, input.getType(), scanOp.getInitialValue()); |
159 | } else { |
160 | output = rewriter.create<vector::ShapeCastOp>( |
161 | loc, input.getType(), scanOp.getInitialValue()); |
162 | } |
163 | } |
164 | } else { |
165 | Value y = inclusive ? input : lastInput; |
166 | output = vector::makeArithReduction(rewriter, loc, scanOp.getKind(), |
167 | lastOutput, y); |
168 | } |
169 | result = rewriter.create<vector::InsertStridedSliceOp>( |
170 | loc, output, result, offsets, strides); |
171 | lastOutput = output; |
172 | lastInput = input; |
173 | } |
174 | |
175 | Value reduction; |
176 | if (initialValueRank == 0) { |
177 | Value v = rewriter.create<vector::ExtractOp>(loc, lastOutput, 0); |
178 | reduction = |
179 | rewriter.create<vector::BroadcastOp>(loc, initialValueType, v); |
180 | } else { |
181 | reduction = rewriter.create<vector::ShapeCastOp>(loc, initialValueType, |
182 | lastOutput); |
183 | } |
184 | |
185 | rewriter.replaceOp(scanOp, {result, reduction}); |
186 | return success(); |
187 | } |
188 | }; |
189 | } // namespace |
190 | |
191 | void mlir::vector::populateVectorScanLoweringPatterns( |
192 | RewritePatternSet &patterns, PatternBenefit benefit) { |
193 | patterns.add<ScanToArithOps>(arg: patterns.getContext(), args&: benefit); |
194 | } |
195 | |