1 | //===-- MathToROCDL.cpp - conversion from Math to rocdl calls -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "mlir/Conversion/MathToROCDL/MathToROCDL.h" |
10 | #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" |
11 | #include "mlir/Conversion/LLVMCommon/TypeConverter.h" |
12 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
13 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
14 | #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" |
15 | #include "mlir/Dialect/Math/IR/Math.h" |
16 | #include "mlir/Dialect/Utils/IndexingUtils.h" |
17 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
18 | #include "mlir/IR/BuiltinDialect.h" |
19 | #include "mlir/IR/PatternMatch.h" |
20 | #include "mlir/Pass/Pass.h" |
21 | #include "mlir/Transforms/DialectConversion.h" |
22 | |
23 | #include "../GPUCommon/GPUOpsLowering.h" |
24 | #include "../GPUCommon/IndexIntrinsicsOpLowering.h" |
25 | #include "../GPUCommon/OpToFuncCallLowering.h" |
26 | #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" |
27 | |
28 | namespace mlir { |
29 | #define GEN_PASS_DEF_CONVERTMATHTOROCDL |
30 | #include "mlir/Conversion/Passes.h.inc" |
31 | } // namespace mlir |
32 | |
33 | using namespace mlir; |
34 | |
35 | #define DEBUG_TYPE "math-to-rocdl" |
36 | #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
37 | |
38 | template <typename OpTy> |
39 | static void populateOpPatterns(const LLVMTypeConverter &converter, |
40 | RewritePatternSet &patterns, StringRef f32Func, |
41 | StringRef f64Func, StringRef f16Func, |
42 | StringRef f32ApproxFunc = "" ) { |
43 | patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter); |
44 | patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func, |
45 | f32ApproxFunc, f16Func); |
46 | } |
47 | |
48 | void mlir::populateMathToROCDLConversionPatterns( |
49 | const LLVMTypeConverter &converter, RewritePatternSet &patterns) { |
50 | // Handled by mathToLLVM: math::AbsIOp |
51 | // Handled by mathToLLVM: math::AbsFOp |
52 | // Handled by mathToLLVM: math::CopySignOp |
53 | // Handled by mathToLLVM: math::CountLeadingZerosOp |
54 | // Handled by mathToLLVM: math::CountTrailingZerosOp |
55 | // Handled by mathToLLVM: math::CgPopOp |
56 | // Handled by mathToLLVM: math::ExpOp (32-bit only) |
57 | // Handled by mathToLLVM: math::FmaOp |
58 | // Handled by mathToLLVM: math::LogOp (32-bit only) |
59 | // FIXME: math::IPowIOp |
60 | // Handled by mathToLLVM: math::RoundEvenOp |
61 | // Handled by mathToLLVM: math::RoundOp |
62 | // Handled by mathToLLVM: math::SqrtOp |
63 | // Handled by mathToLLVM: math::TruncOp |
64 | populateOpPatterns<math::AcosOp>(converter, patterns, "__ocml_acos_f32" , |
65 | "__ocml_acos_f64" , "__ocml_acos_f16" ); |
66 | populateOpPatterns<math::AcoshOp>(converter, patterns, "__ocml_acosh_f32" , |
67 | "__ocml_acosh_f64" , "__ocml_acosh_f16" ); |
68 | populateOpPatterns<math::AsinOp>(converter, patterns, "__ocml_asin_f32" , |
69 | "__ocml_asin_f64" , "__ocml_asin_f16" ); |
70 | populateOpPatterns<math::AsinhOp>(converter, patterns, "__ocml_asinh_f32" , |
71 | "__ocml_asinh_f64" , "__ocml_asinh_f16" ); |
72 | populateOpPatterns<math::AtanOp>(converter, patterns, "__ocml_atan_f32" , |
73 | "__ocml_atan_f64" , "__ocml_atan_f16" ); |
74 | populateOpPatterns<math::AtanhOp>(converter, patterns, "__ocml_atanh_f32" , |
75 | "__ocml_atanh_f64" , "__ocml_atanh_f16" ); |
76 | populateOpPatterns<math::Atan2Op>(converter, patterns, "__ocml_atan2_f32" , |
77 | "__ocml_atan2_f64" , "__ocml_atan2_f16" ); |
78 | populateOpPatterns<math::CbrtOp>(converter, patterns, "__ocml_cbrt_f32" , |
79 | "__ocml_cbrt_f64" , "__ocml_cbrt_f16" ); |
80 | populateOpPatterns<math::CeilOp>(converter, patterns, "__ocml_ceil_f32" , |
81 | "__ocml_ceil_f64" , "__ocml_ceil_f16" ); |
82 | populateOpPatterns<math::CosOp>(converter, patterns, "__ocml_cos_f32" , |
83 | "__ocml_cos_f64" , "__ocml_cos_f16" ); |
84 | populateOpPatterns<math::CoshOp>(converter, patterns, "__ocml_cosh_f32" , |
85 | "__ocml_cosh_f64" , "__ocml_cosh_f16" ); |
86 | populateOpPatterns<math::SinhOp>(converter, patterns, "__ocml_sinh_f32" , |
87 | "__ocml_sinh_f64" , "__ocml_sinh_f16" ); |
88 | populateOpPatterns<math::ExpOp>(converter, patterns, "" , "__ocml_exp_f64" , |
89 | "__ocml_exp_f16" ); |
90 | populateOpPatterns<math::Exp2Op>(converter, patterns, "__ocml_exp2_f32" , |
91 | "__ocml_exp2_f64" , "__ocml_exp2_f16" ); |
92 | populateOpPatterns<math::ExpM1Op>(converter, patterns, "__ocml_expm1_f32" , |
93 | "__ocml_expm1_f64" , "__ocml_expm1_f16" ); |
94 | populateOpPatterns<math::FloorOp>(converter, patterns, "__ocml_floor_f32" , |
95 | "__ocml_floor_f64" , "__ocml_floor_f16" ); |
96 | populateOpPatterns<math::LogOp>(converter, patterns, "" , "__ocml_log_f64" , |
97 | "__ocml_log_f16" ); |
98 | populateOpPatterns<math::Log10Op>(converter, patterns, "__ocml_log10_f32" , |
99 | "__ocml_log10_f64" , "__ocml_log10_f16" ); |
100 | populateOpPatterns<math::Log1pOp>(converter, patterns, "__ocml_log1p_f32" , |
101 | "__ocml_log1p_f64" , "__ocml_log1p_f16" ); |
102 | populateOpPatterns<math::Log2Op>(converter, patterns, "__ocml_log2_f32" , |
103 | "__ocml_log2_f64" , "__ocml_log2_f16" ); |
104 | populateOpPatterns<math::PowFOp>(converter, patterns, "__ocml_pow_f32" , |
105 | "__ocml_pow_f64" , "__ocml_pow_f16" ); |
106 | populateOpPatterns<math::RsqrtOp>(converter, patterns, "__ocml_rsqrt_f32" , |
107 | "__ocml_rsqrt_f64" , "__ocml_rsqrt_f16" ); |
108 | populateOpPatterns<math::SinOp>(converter, patterns, "__ocml_sin_f32" , |
109 | "__ocml_sin_f64" , "__ocml_sin_f16" ); |
110 | populateOpPatterns<math::TanhOp>(converter, patterns, "__ocml_tanh_f32" , |
111 | "__ocml_tanh_f64" , "__ocml_tanh_f16" ); |
112 | populateOpPatterns<math::TanOp>(converter, patterns, "__ocml_tan_f32" , |
113 | "__ocml_tan_f64" , "__ocml_tan_f16" ); |
114 | populateOpPatterns<math::ErfOp>(converter, patterns, "__ocml_erf_f32" , |
115 | "__ocml_erf_f64" , "__ocml_erf_f16" ); |
116 | populateOpPatterns<math::ErfcOp>(converter, patterns, "__ocml_erfc_f32" , |
117 | "__ocml_erfc_f64" , "__ocml_erfc_f16" ); |
118 | populateOpPatterns<math::FPowIOp>(converter, patterns, "__ocml_pown_f32" , |
119 | "__ocml_pown_f64" , "__ocml_pown_f16" ); |
120 | // Single arith pattern that needs a ROCDL call, probably not |
121 | // worth creating a separate pass for it. |
122 | populateOpPatterns<arith::RemFOp>(converter, patterns, "__ocml_fmod_f32" , |
123 | "__ocml_fmod_f64" , "__ocml_fmod_f16" ); |
124 | } |
125 | |
126 | namespace { |
127 | struct ConvertMathToROCDLPass |
128 | : public impl::ConvertMathToROCDLBase<ConvertMathToROCDLPass> { |
129 | ConvertMathToROCDLPass() = default; |
130 | void runOnOperation() override; |
131 | }; |
132 | } // namespace |
133 | |
134 | void ConvertMathToROCDLPass::runOnOperation() { |
135 | auto m = getOperation(); |
136 | MLIRContext *ctx = m.getContext(); |
137 | |
138 | RewritePatternSet patterns(&getContext()); |
139 | LowerToLLVMOptions options(ctx, DataLayout(m)); |
140 | LLVMTypeConverter converter(ctx, options); |
141 | populateMathToROCDLConversionPatterns(converter, patterns); |
142 | ConversionTarget target(getContext()); |
143 | target.addLegalDialect<BuiltinDialect, func::FuncDialect, |
144 | vector::VectorDialect, LLVM::LLVMDialect>(); |
145 | target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FAbsOp, |
146 | LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, |
147 | LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp, |
148 | LLVM::SqrtOp>(); |
149 | if (failed(applyPartialConversion(m, target, std::move(patterns)))) |
150 | signalPassFailure(); |
151 | } |
152 | |