1 | //===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SIModeRegisterDefaults.h" |
10 | #include "GCNSubtarget.h" |
11 | |
12 | using namespace llvm; |
13 | |
14 | SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, |
15 | const GCNSubtarget &ST) { |
16 | *this = getDefaultForCallingConv(CC: F.getCallingConv()); |
17 | |
18 | if (ST.hasIEEEMode()) { |
19 | StringRef IEEEAttr = F.getFnAttribute(Kind: "amdgpu-ieee" ).getValueAsString(); |
20 | if (!IEEEAttr.empty()) |
21 | IEEE = IEEEAttr == "true" ; |
22 | } |
23 | |
24 | if (ST.hasDX10ClampMode()) { |
25 | StringRef DX10ClampAttr = |
26 | F.getFnAttribute(Kind: "amdgpu-dx10-clamp" ).getValueAsString(); |
27 | if (!DX10ClampAttr.empty()) |
28 | DX10Clamp = DX10ClampAttr == "true" ; |
29 | } |
30 | |
31 | StringRef DenormF32Attr = |
32 | F.getFnAttribute(Kind: "denormal-fp-math-f32" ).getValueAsString(); |
33 | if (!DenormF32Attr.empty()) |
34 | FP32Denormals = parseDenormalFPAttribute(Str: DenormF32Attr); |
35 | |
36 | StringRef DenormAttr = |
37 | F.getFnAttribute(Kind: "denormal-fp-math" ).getValueAsString(); |
38 | if (!DenormAttr.empty()) { |
39 | DenormalMode DenormMode = parseDenormalFPAttribute(Str: DenormAttr); |
40 | if (DenormF32Attr.empty()) |
41 | FP32Denormals = DenormMode; |
42 | FP64FP16Denormals = DenormMode; |
43 | } |
44 | } |
45 | |
46 | using namespace AMDGPU; |
47 | |
48 | /// Combine f32 and f64 rounding modes into a combined rounding mode value. |
49 | static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val, |
50 | uint32_t HWFP64Val) { |
51 | return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset; |
52 | } |
53 | |
54 | static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal, |
55 | uint32_t HWF32Val, |
56 | uint32_t HWF64Val) { |
57 | uint32_t ModeVal = getModeRegisterRoundMode(HWFP32Val: HWF32Val, HWFP64Val: HWF64Val); |
58 | if (FltRoundsVal > TowardNegative) |
59 | FltRoundsVal -= ExtendedFltRoundOffset; |
60 | |
61 | uint32_t BitIndex = ModeVal << 2; |
62 | return static_cast<uint64_t>(FltRoundsVal) << BitIndex; |
63 | } |
64 | |
65 | // Encode FLT_ROUNDS value where the two rounding modes are the same and use a |
66 | // standard value |
67 | static constexpr uint64_t |
68 | encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) { |
69 | return encodeFltRoundsTable(FltRoundsVal: FltRoundsMode, HWF32Val: HWVal, HWF64Val: HWVal); |
70 | } |
71 | |
72 | // Convert mode register encoded rounding mode to AMDGPUFltRounds |
73 | static constexpr AMDGPUFltRounds |
74 | decodeIndexFltRoundConversionTable(uint32_t HWMode) { |
75 | uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf; |
76 | if (TableRead > TowardNegative) |
77 | TableRead += ExtendedFltRoundOffset; |
78 | return static_cast<AMDGPUFltRounds>(TableRead); |
79 | } |
80 | |
81 | static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO; |
82 | static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST; |
83 | static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF; |
84 | static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF; |
85 | |
86 | const uint64_t AMDGPU::FltRoundConversionTable = |
87 | encodeFltRoundsTableSame(FltRoundsMode: TowardZeroF32_TowardZeroF64, HWVal: HWTowardZero) | |
88 | encodeFltRoundsTableSame(FltRoundsMode: NearestTiesToEvenF32_NearestTiesToEvenF64, |
89 | HWVal: HWNearestTiesToEven) | |
90 | encodeFltRoundsTableSame(FltRoundsMode: TowardPositiveF32_TowardPositiveF64, |
91 | HWVal: HWTowardPositive) | |
92 | encodeFltRoundsTableSame(FltRoundsMode: TowardNegativeF32_TowardNegativeF64, |
93 | HWVal: HWTowardNegative) | |
94 | |
95 | encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_NearestTiesToEvenF64, HWF32Val: HWTowardZero, |
96 | HWF64Val: HWNearestTiesToEven) | |
97 | encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardPositiveF64, HWF32Val: HWTowardZero, |
98 | HWF64Val: HWTowardPositive) | |
99 | encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardNegativeF64, HWF32Val: HWTowardZero, |
100 | HWF64Val: HWTowardNegative) | |
101 | |
102 | encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardZeroF64, |
103 | HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardZero) | |
104 | encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardPositiveF64, |
105 | HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardPositive) | |
106 | encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardNegativeF64, |
107 | HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardNegative) | |
108 | |
109 | encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardZeroF64, HWF32Val: HWTowardPositive, |
110 | HWF64Val: HWTowardZero) | |
111 | encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_NearestTiesToEvenF64, |
112 | HWF32Val: HWTowardPositive, HWF64Val: HWNearestTiesToEven) | |
113 | encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardNegativeF64, HWF32Val: HWTowardPositive, |
114 | HWF64Val: HWTowardNegative) | |
115 | |
116 | encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardZeroF64, HWF32Val: HWTowardNegative, |
117 | HWF64Val: HWTowardZero) | |
118 | encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_NearestTiesToEvenF64, |
119 | HWF32Val: HWTowardNegative, HWF64Val: HWNearestTiesToEven) | |
120 | encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardPositiveF64, HWF32Val: HWTowardNegative, |
121 | HWF64Val: HWTowardPositive); |
122 | |
123 | // Verify evaluation of FltRoundConversionTable |
124 | |
125 | // If both modes are the same, should return the standard values. |
126 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
127 | HWFP32Val: HWTowardZero, HWFP64Val: HWTowardZero)) == AMDGPUFltRounds::TowardZero); |
128 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
129 | HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWNearestTiesToEven)) == |
130 | AMDGPUFltRounds::NearestTiesToEven); |
131 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
132 | HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardPositive)) == |
133 | AMDGPUFltRounds::TowardPositive); |
134 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
135 | HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardNegative)) == |
136 | AMDGPUFltRounds::TowardNegative); |
137 | |
138 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
139 | HWFP32Val: HWTowardZero, HWFP64Val: HWNearestTiesToEven)) == |
140 | TowardZeroF32_NearestTiesToEvenF64); |
141 | static_assert(decodeIndexFltRoundConversionTable( |
142 | HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardPositive)) == |
143 | TowardZeroF32_TowardPositiveF64); |
144 | static_assert(decodeIndexFltRoundConversionTable( |
145 | HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardNegative)) == |
146 | TowardZeroF32_TowardNegativeF64); |
147 | |
148 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
149 | HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardZero)) == |
150 | NearestTiesToEvenF32_TowardZeroF64); |
151 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
152 | HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardPositive)) == |
153 | NearestTiesToEvenF32_TowardPositiveF64); |
154 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
155 | HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardNegative)) == |
156 | NearestTiesToEvenF32_TowardNegativeF64); |
157 | |
158 | static_assert(decodeIndexFltRoundConversionTable( |
159 | HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardZero)) == |
160 | TowardPositiveF32_TowardZeroF64); |
161 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
162 | HWFP32Val: HWTowardPositive, HWFP64Val: HWNearestTiesToEven)) == |
163 | TowardPositiveF32_NearestTiesToEvenF64); |
164 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
165 | HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardNegative)) == |
166 | TowardPositiveF32_TowardNegativeF64); |
167 | |
168 | static_assert(decodeIndexFltRoundConversionTable( |
169 | HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardZero)) == |
170 | TowardNegativeF32_TowardZeroF64); |
171 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
172 | HWFP32Val: HWTowardNegative, HWFP64Val: HWNearestTiesToEven)) == |
173 | TowardNegativeF32_NearestTiesToEvenF64); |
174 | static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode( |
175 | HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardPositive)) == |
176 | TowardNegativeF32_TowardPositiveF64); |
177 | |