| 1 | //===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements target-independent rewrites and utilities to lower the |
| 10 | // 'vector.scan' operation. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "mlir/Dialect/Arith/IR/Arith.h" |
| 15 | #include "mlir/Dialect/MemRef/IR/MemRef.h" |
| 16 | #include "mlir/Dialect/Utils/IndexingUtils.h" |
| 17 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
| 18 | #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" |
| 19 | #include "mlir/Dialect/Vector/Utils/VectorUtils.h" |
| 20 | #include "mlir/IR/BuiltinTypes.h" |
| 21 | #include "mlir/IR/Location.h" |
| 22 | #include "mlir/IR/PatternMatch.h" |
| 23 | #include "mlir/IR/TypeUtilities.h" |
| 24 | |
| 25 | #define DEBUG_TYPE "vector-broadcast-lowering" |
| 26 | |
| 27 | using namespace mlir; |
| 28 | using namespace mlir::vector; |
| 29 | |
| 30 | /// This function checks to see if the vector combining kind |
| 31 | /// is consistent with the integer or float element type. |
| 32 | static bool isValidKind(bool isInt, vector::CombiningKind kind) { |
| 33 | using vector::CombiningKind; |
| 34 | enum class KindType { FLOAT, INT, INVALID }; |
| 35 | KindType type{KindType::INVALID}; |
| 36 | switch (kind) { |
| 37 | case CombiningKind::MINNUMF: |
| 38 | case CombiningKind::MINIMUMF: |
| 39 | case CombiningKind::MAXNUMF: |
| 40 | case CombiningKind::MAXIMUMF: |
| 41 | type = KindType::FLOAT; |
| 42 | break; |
| 43 | case CombiningKind::MINUI: |
| 44 | case CombiningKind::MINSI: |
| 45 | case CombiningKind::MAXUI: |
| 46 | case CombiningKind::MAXSI: |
| 47 | case CombiningKind::AND: |
| 48 | case CombiningKind::OR: |
| 49 | case CombiningKind::XOR: |
| 50 | type = KindType::INT; |
| 51 | break; |
| 52 | case CombiningKind::ADD: |
| 53 | case CombiningKind::MUL: |
| 54 | type = isInt ? KindType::INT : KindType::FLOAT; |
| 55 | break; |
| 56 | } |
| 57 | bool isValidIntKind = (type == KindType::INT) && isInt; |
| 58 | bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); |
| 59 | return (isValidIntKind || isValidFloatKind); |
| 60 | } |
| 61 | |
| 62 | namespace { |
| 63 | /// Convert vector.scan op into arith ops and vector.insert_strided_slice / |
| 64 | /// vector.extract_strided_slice. |
| 65 | /// |
| 66 | /// Example: |
| 67 | /// |
| 68 | /// ``` |
| 69 | /// %0:2 = vector.scan <add>, %arg0, %arg1 |
| 70 | /// {inclusive = true, reduction_dim = 1} : |
| 71 | /// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) |
| 72 | /// ``` |
| 73 | /// |
| 74 | /// is converted to: |
| 75 | /// |
| 76 | /// ``` |
| 77 | /// %cst = arith.constant dense<0> : vector<2x3xi32> |
| 78 | /// %0 = vector.extract_strided_slice %arg0 |
| 79 | /// {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} |
| 80 | /// : vector<2x3xi32> to vector<2x1xi32> |
| 81 | /// %1 = vector.insert_strided_slice %0, %cst |
| 82 | /// {offsets = [0, 0], strides = [1, 1]} |
| 83 | /// : vector<2x1xi32> into vector<2x3xi32> |
| 84 | /// %2 = vector.extract_strided_slice %arg0 |
| 85 | /// {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} |
| 86 | /// : vector<2x3xi32> to vector<2x1xi32> |
| 87 | /// %3 = arith.muli %0, %2 : vector<2x1xi32> |
| 88 | /// %4 = vector.insert_strided_slice %3, %1 |
| 89 | /// {offsets = [0, 1], strides = [1, 1]} |
| 90 | /// : vector<2x1xi32> into vector<2x3xi32> |
| 91 | /// %5 = vector.extract_strided_slice %arg0 |
| 92 | /// {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]} |
| 93 | /// : vector<2x3xi32> to vector<2x1xi32> |
| 94 | /// %6 = arith.muli %3, %5 : vector<2x1xi32> |
| 95 | /// %7 = vector.insert_strided_slice %6, %4 |
| 96 | /// {offsets = [0, 2], strides = [1, 1]} |
| 97 | /// : vector<2x1xi32> into vector<2x3xi32> |
| 98 | /// %8 = vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> |
| 99 | /// return %7, %8 : vector<2x3xi32>, vector<2xi32> |
| 100 | /// ``` |
| 101 | struct ScanToArithOps : public OpRewritePattern<vector::ScanOp> { |
| 102 | using OpRewritePattern::OpRewritePattern; |
| 103 | |
| 104 | LogicalResult matchAndRewrite(vector::ScanOp scanOp, |
| 105 | PatternRewriter &rewriter) const override { |
| 106 | auto loc = scanOp.getLoc(); |
| 107 | VectorType destType = scanOp.getDestType(); |
| 108 | ArrayRef<int64_t> destShape = destType.getShape(); |
| 109 | auto elType = destType.getElementType(); |
| 110 | bool isInt = elType.isIntOrIndex(); |
| 111 | if (!isValidKind(isInt, kind: scanOp.getKind())) |
| 112 | return failure(); |
| 113 | |
| 114 | VectorType resType = VectorType::get(shape: destShape, elementType: elType); |
| 115 | Value result = rewriter.create<arith::ConstantOp>( |
| 116 | location: loc, args&: resType, args: rewriter.getZeroAttr(type: resType)); |
| 117 | int64_t reductionDim = scanOp.getReductionDim(); |
| 118 | bool inclusive = scanOp.getInclusive(); |
| 119 | int64_t destRank = destType.getRank(); |
| 120 | VectorType initialValueType = scanOp.getInitialValueType(); |
| 121 | int64_t initialValueRank = initialValueType.getRank(); |
| 122 | |
| 123 | SmallVector<int64_t> reductionShape(destShape); |
| 124 | reductionShape[reductionDim] = 1; |
| 125 | VectorType reductionType = VectorType::get(shape: reductionShape, elementType: elType); |
| 126 | SmallVector<int64_t> offsets(destRank, 0); |
| 127 | SmallVector<int64_t> strides(destRank, 1); |
| 128 | SmallVector<int64_t> sizes(destShape); |
| 129 | sizes[reductionDim] = 1; |
| 130 | ArrayAttr scanSizes = rewriter.getI64ArrayAttr(values: sizes); |
| 131 | ArrayAttr scanStrides = rewriter.getI64ArrayAttr(values: strides); |
| 132 | |
| 133 | Value lastOutput, lastInput; |
| 134 | for (int i = 0; i < destShape[reductionDim]; i++) { |
| 135 | offsets[reductionDim] = i; |
| 136 | ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(values: offsets); |
| 137 | Value input = rewriter.create<vector::ExtractStridedSliceOp>( |
| 138 | location: loc, args&: reductionType, args: scanOp.getSource(), args&: scanOffsets, args&: scanSizes, |
| 139 | args&: scanStrides); |
| 140 | Value output; |
| 141 | if (i == 0) { |
| 142 | if (inclusive) { |
| 143 | output = input; |
| 144 | } else { |
| 145 | if (initialValueRank == 0) { |
| 146 | // ShapeCastOp cannot handle 0-D vectors |
| 147 | output = rewriter.create<vector::BroadcastOp>( |
| 148 | location: loc, args: input.getType(), args: scanOp.getInitialValue()); |
| 149 | } else { |
| 150 | output = rewriter.create<vector::ShapeCastOp>( |
| 151 | location: loc, args: input.getType(), args: scanOp.getInitialValue()); |
| 152 | } |
| 153 | } |
| 154 | } else { |
| 155 | Value y = inclusive ? input : lastInput; |
| 156 | output = vector::makeArithReduction(b&: rewriter, loc, kind: scanOp.getKind(), |
| 157 | v1: lastOutput, acc: y); |
| 158 | } |
| 159 | result = rewriter.create<vector::InsertStridedSliceOp>( |
| 160 | location: loc, args&: output, args&: result, args&: offsets, args&: strides); |
| 161 | lastOutput = output; |
| 162 | lastInput = input; |
| 163 | } |
| 164 | |
| 165 | Value reduction; |
| 166 | if (initialValueRank == 0) { |
| 167 | Value v = rewriter.create<vector::ExtractOp>(location: loc, args&: lastOutput, args: 0); |
| 168 | reduction = |
| 169 | rewriter.create<vector::BroadcastOp>(location: loc, args&: initialValueType, args&: v); |
| 170 | } else { |
| 171 | reduction = rewriter.create<vector::ShapeCastOp>(location: loc, args&: initialValueType, |
| 172 | args&: lastOutput); |
| 173 | } |
| 174 | |
| 175 | rewriter.replaceOp(op: scanOp, newValues: {result, reduction}); |
| 176 | return success(); |
| 177 | } |
| 178 | }; |
| 179 | } // namespace |
| 180 | |
| 181 | void mlir::vector::populateVectorScanLoweringPatterns( |
| 182 | RewritePatternSet &patterns, PatternBenefit benefit) { |
| 183 | patterns.add<ScanToArithOps>(arg: patterns.getContext(), args&: benefit); |
| 184 | } |
| 185 | |