Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===-- include/flang/Decimal/binary-floating-point.h -----------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_ |
10 | #define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_ |
11 | |
12 | // Access and manipulate the fields of an IEEE-754 binary |
13 | // floating-point value via a generalized template. |
14 | |
15 | #include "flang/Common/api-attrs.h" |
16 | #include "flang/Common/real.h" |
17 | #include "flang/Common/uint128.h" |
18 | #include <cinttypes> |
19 | #include <climits> |
20 | #include <cstring> |
21 | #include <type_traits> |
22 | |
23 | namespace Fortran::decimal { |
24 | |
25 | enum FortranRounding { |
26 | RoundNearest, /* RN and RP */ |
27 | RoundUp, /* RU */ |
28 | RoundDown, /* RD */ |
29 | RoundToZero, /* RZ - no rounding */ |
30 | RoundCompatible, /* RC: like RN, but ties go away from 0 */ |
31 | }; |
32 | |
33 | template <int BINARY_PRECISION> class BinaryFloatingPointNumber { |
34 | public: |
35 | static constexpr common::RealCharacteristics realChars{BINARY_PRECISION}; |
36 | static constexpr int binaryPrecision{BINARY_PRECISION}; |
37 | static constexpr int bits{realChars.bits}; |
38 | static constexpr int isImplicitMSB{realChars.isImplicitMSB}; |
39 | static constexpr int significandBits{realChars.significandBits}; |
40 | static constexpr int exponentBits{realChars.exponentBits}; |
41 | static constexpr int exponentBias{realChars.exponentBias}; |
42 | static constexpr int maxExponent{realChars.maxExponent}; |
43 | static constexpr int decimalPrecision{realChars.decimalPrecision}; |
44 | static constexpr int decimalRange{realChars.decimalRange}; |
45 | static constexpr int maxDecimalConversionDigits{ |
46 | realChars.maxDecimalConversionDigits}; |
47 | |
48 | using RawType = common::HostUnsignedIntType<bits>; |
49 | static_assert(CHAR_BIT * sizeof(RawType) >= bits); |
50 | RT_OFFLOAD_VAR_GROUP_BEGIN |
51 | static constexpr RawType significandMask{(RawType{1} << significandBits) - 1}; |
52 | |
53 | constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero |
54 | RT_OFFLOAD_VAR_GROUP_END |
55 | constexpr BinaryFloatingPointNumber( |
56 | const BinaryFloatingPointNumber &that) = default; |
57 | constexpr BinaryFloatingPointNumber( |
58 | BinaryFloatingPointNumber &&that) = default; |
59 | constexpr BinaryFloatingPointNumber &operator=( |
60 | const BinaryFloatingPointNumber &that) = default; |
61 | constexpr BinaryFloatingPointNumber &operator=( |
62 | BinaryFloatingPointNumber &&that) = default; |
63 | constexpr explicit RT_API_ATTRS BinaryFloatingPointNumber(RawType raw) |
64 | : raw_{raw} {} |
65 | |
66 | RT_API_ATTRS RawType raw() const { return raw_; } |
67 | |
68 | template <typename A> |
69 | explicit constexpr RT_API_ATTRS BinaryFloatingPointNumber(A x) { |
70 | static_assert(sizeof raw_ <= sizeof x); |
71 | std::memcpy(reinterpret_cast<void *>(&raw_), |
72 | reinterpret_cast<const void *>(&x), sizeof raw_); |
73 | } |
74 | |
75 | constexpr RT_API_ATTRS int BiasedExponent() const { |
76 | return static_cast<int>( |
77 | (raw_ >> significandBits) & ((1 << exponentBits) - 1)); |
78 | } |
79 | constexpr RT_API_ATTRS int UnbiasedExponent() const { |
80 | int biased{BiasedExponent()}; |
81 | return biased - exponentBias + (biased == 0); |
82 | } |
83 | constexpr RT_API_ATTRS RawType Significand() const { |
84 | return raw_ & significandMask; |
85 | } |
86 | constexpr RT_API_ATTRS RawType Fraction() const { |
87 | RawType sig{Significand()}; |
88 | if (isImplicitMSB && BiasedExponent() > 0) { |
89 | sig |= RawType{1} << significandBits; |
90 | } |
91 | return sig; |
92 | } |
93 | |
94 | constexpr RT_API_ATTRS bool IsZero() const { |
95 | return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0; |
96 | } |
97 | constexpr RT_API_ATTRS bool IsNaN() const { |
98 | auto expo{BiasedExponent()}; |
99 | auto sig{Significand()}; |
100 | if constexpr (bits == 80) { // x87 |
101 | if (expo == maxExponent) { |
102 | return sig != (significandMask >> 1) + 1; |
103 | } else { |
104 | return expo != 0 && !(sig & (RawType{1} << (significandBits - 1))); |
105 | ; |
106 | } |
107 | } else { |
108 | return expo == maxExponent && sig != 0; |
109 | } |
110 | } |
111 | constexpr RT_API_ATTRS bool IsInfinite() const { |
112 | if constexpr (bits == 80) { // x87 |
113 | return BiasedExponent() == maxExponent && |
114 | Significand() == ((significandMask >> 1) + 1); |
115 | } else { |
116 | return BiasedExponent() == maxExponent && Significand() == 0; |
117 | } |
118 | } |
119 | constexpr RT_API_ATTRS bool IsMaximalFiniteMagnitude() const { |
120 | return BiasedExponent() == maxExponent - 1 && |
121 | Significand() == significandMask; |
122 | } |
123 | constexpr RT_API_ATTRS bool IsNegative() const { |
124 | return ((raw_ >> (bits - 1)) & 1) != 0; |
125 | } |
126 | |
127 | constexpr RT_API_ATTRS void Negate() { raw_ ^= RawType{1} << (bits - 1); } |
128 | |
129 | // For calculating the nearest neighbors of a floating-point value |
130 | constexpr RT_API_ATTRS void Previous() { |
131 | RemoveExplicitMSB(); |
132 | --raw_; |
133 | InsertExplicitMSB(); |
134 | } |
135 | constexpr RT_API_ATTRS void Next() { |
136 | RemoveExplicitMSB(); |
137 | ++raw_; |
138 | InsertExplicitMSB(); |
139 | } |
140 | |
141 | static constexpr RT_API_ATTRS BinaryFloatingPointNumber Infinity( |
142 | bool isNegative) { |
143 | RawType result{RawType{maxExponent} << significandBits}; |
144 | if (isNegative) { |
145 | result |= RawType{1} << (bits - 1); |
146 | } |
147 | return BinaryFloatingPointNumber{result}; |
148 | } |
149 | |
150 | // Returns true when the result is exact |
151 | constexpr RT_API_ATTRS bool RoundToBits( |
152 | int keepBits, enum FortranRounding mode) { |
153 | if (IsNaN() || IsInfinite() || keepBits >= binaryPrecision) { |
154 | return true; |
155 | } |
156 | int lostBits{keepBits < binaryPrecision ? binaryPrecision - keepBits : 0}; |
157 | RawType lostMask{static_cast<RawType>((RawType{1} << lostBits) - 1)}; |
158 | if (RawType lost{static_cast<RawType>(raw_ & lostMask)}; lost != 0) { |
159 | bool increase{false}; |
160 | switch (mode) { |
161 | case RoundNearest: |
162 | if (lost >> (lostBits - 1) != 0) { // >= tie |
163 | if ((lost & (lostMask >> 1)) != 0) { |
164 | increase = true; // > tie |
165 | } else { |
166 | increase = ((raw_ >> lostBits) & 1) != 0; // tie to even |
167 | } |
168 | } |
169 | break; |
170 | case RoundUp: |
171 | increase = !IsNegative(); |
172 | break; |
173 | case RoundDown: |
174 | increase = IsNegative(); |
175 | break; |
176 | case RoundToZero: |
177 | break; |
178 | case RoundCompatible: |
179 | increase = lost >> (lostBits - 1) != 0; // >= tie |
180 | break; |
181 | } |
182 | if (increase) { |
183 | raw_ |= lostMask; |
184 | Next(); |
185 | } |
186 | return false; // inexact |
187 | } else { |
188 | return true; // exact |
189 | } |
190 | } |
191 | |
192 | private: |
193 | constexpr RT_API_ATTRS void RemoveExplicitMSB() { |
194 | if constexpr (!isImplicitMSB) { |
195 | raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1); |
196 | } |
197 | } |
198 | constexpr RT_API_ATTRS void InsertExplicitMSB() { |
199 | if constexpr (!isImplicitMSB) { |
200 | constexpr RawType mask{significandMask >> 1}; |
201 | raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1); |
202 | if (BiasedExponent() > 0) { |
203 | raw_ |= RawType{1} << (significandBits - 1); |
204 | } |
205 | } |
206 | } |
207 | |
208 | RawType raw_{0}; |
209 | }; |
210 | } // namespace Fortran::decimal |
211 | #endif |
212 |
Warning: This file is not a C or C++ file. It does not have highlighting.