1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2019 The Qt Company Ltd. |
4 | ** Copyright (C) 2016 by Southwest Research Institute (R) |
5 | ** Contact: http://www.qt-project.org/legal |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #include "qfloat16.h" |
42 | #include "private/qsimd_p.h" |
43 | #include <cmath> // for fpclassify()'s return values |
44 | |
45 | QT_BEGIN_NAMESPACE |
46 | |
47 | /*! |
48 | \class qfloat16 |
49 | \keyword 16-bit Floating Point Support |
50 | \ingroup funclists |
51 | \inmodule QtCore |
52 | \inheaderfile QFloat16 |
53 | \brief Provides 16-bit floating point support. |
54 | |
55 | The \c qfloat16 class provides support for half-precision (16-bit) floating |
56 | point data. It is fully compliant with IEEE 754 as a storage type. This |
57 | implies that any arithmetic operation on a \c qfloat16 instance results in |
58 | the value first being converted to a \c float. This conversion to and from |
59 | \c float is performed by hardware when possible, but on processors that do |
60 | not natively support half-precision, the conversion is performed through a |
61 | sequence of lookup table operations. |
62 | |
63 | \c qfloat16 should be treated as if it were a POD (plain old data) type. |
64 | Consequently, none of the supported operations need any elaboration beyond |
65 | stating that it supports all arithmetic operators incident to floating point |
66 | types. |
67 | |
68 | \note On x86 and x86-64 that to get hardware accelerated conversions you must |
69 | compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16() |
70 | which will detect F16C at runtime. |
71 | |
72 | \since 5.9 |
73 | */ |
74 | |
75 | /*! |
76 | \macro QT_NO_FLOAT16_OPERATORS |
77 | \relates qfloat16 |
78 | \since 5.12.4 |
79 | |
80 | Defining this macro disables the arithmetic operators for qfloat16. |
81 | |
82 | This is only necessary on Visual Studio 2017 (and earlier) when including |
83 | \c {<QFloat16>} and \c{<bitset>} in the same translation unit, which would |
84 | otherwise cause a compilation error due to a toolchain bug (see |
85 | [QTBUG-72073]). |
86 | */ |
87 | |
88 | /*! |
89 | \fn bool qIsInf(qfloat16 f) |
90 | \relates qfloat16 |
91 | |
92 | Returns true if the \c qfloat16 \a {f} is equivalent to infinity. |
93 | |
94 | \sa qIsInf |
95 | */ |
96 | |
97 | /*! |
98 | \fn bool qIsNaN(qfloat16 f) |
99 | \relates qfloat16 |
100 | |
101 | Returns true if the \c qfloat16 \a {f} is not a number (NaN). |
102 | |
103 | \sa qIsNaN |
104 | */ |
105 | |
106 | /*! |
107 | \fn bool qIsFinite(qfloat16 f) |
108 | \relates qfloat16 |
109 | |
110 | Returns true if the \c qfloat16 \a {f} is a finite number. |
111 | |
112 | \sa qIsFinite |
113 | */ |
114 | |
115 | /*! |
116 | \internal |
117 | \since 5.14 |
118 | \fn bool qfloat16::isInf() const noexcept |
119 | |
120 | Tests whether this \c qfloat16 value is an infinity. |
121 | |
122 | \sa qIsInf() |
123 | */ |
124 | |
125 | /*! |
126 | \internal |
127 | \since 5.14 |
128 | \fn bool qfloat16::isNaN() const noexcept |
129 | |
130 | Tests whether this \c qfloat16 value is "not a number". |
131 | |
132 | \sa qIsNaN() |
133 | */ |
134 | |
135 | /*! |
136 | \since 5.14 |
137 | \fn bool qfloat16::isNormal() const noexcept |
138 | |
139 | Returns \c true if this \c qfloat16 value is finite and in normal form. |
140 | |
141 | \sa qFpClassify() |
142 | */ |
143 | |
144 | /*! |
145 | \internal |
146 | \since 5.14 |
147 | \fn bool qfloat16::isFinite() const noexcept |
148 | |
149 | Tests whether this \c qfloat16 value is finite. |
150 | |
151 | \sa qIsFinite() |
152 | */ |
153 | |
154 | /*! |
155 | \since 5.15 |
156 | \fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept |
157 | |
158 | Returns a qfloat16 with the sign of \a sign but the rest of its value taken |
159 | from this qfloat16. Serves as qfloat16's equivalent of std::copysign(). |
160 | */ |
161 | |
162 | /*! |
163 | \internal |
164 | \since 5.14 |
165 | Implements qFpClassify() for qfloat16. |
166 | |
167 | \sa qFpClassify() |
168 | */ |
169 | int qfloat16::fpClassify() const noexcept |
170 | { |
171 | return isInf() ? FP_INFINITE : isNaN() ? FP_NAN |
172 | : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL; |
173 | } |
174 | |
175 | /*! \fn int qRound(qfloat16 value) |
176 | \relates qfloat16 |
177 | |
178 | Rounds \a value to the nearest integer. |
179 | |
180 | \sa qRound |
181 | */ |
182 | |
183 | /*! \fn qint64 qRound64(qfloat16 value) |
184 | \relates qfloat16 |
185 | |
186 | Rounds \a value to the nearest 64-bit integer. |
187 | |
188 | \sa qRound64 |
189 | */ |
190 | |
191 | /*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) |
192 | \relates qfloat16 |
193 | |
194 | Compares the floating point value \a p1 and \a p2 and |
195 | returns \c true if they are considered equal, otherwise \c false. |
196 | |
197 | The two numbers are compared in a relative way, where the |
198 | exactness is stronger the smaller the numbers are. |
199 | */ |
200 | |
201 | #if QT_COMPILER_SUPPORTS(F16C) |
202 | static inline bool hasFastF16() |
203 | { |
204 | // All processors with F16C also support AVX, but YMM registers |
205 | // might not be supported by the OS, or they might be disabled. |
206 | return qCpuHasFeature(F16C) && qCpuHasFeature(AVX); |
207 | } |
208 | |
209 | extern "C" { |
210 | #ifdef QFLOAT16_INCLUDE_FAST |
211 | # define f16cextern static |
212 | #else |
213 | # define f16cextern extern |
214 | #endif |
215 | |
216 | f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept; |
217 | f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept; |
218 | |
219 | #undef f16cextern |
220 | } |
221 | |
222 | #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2) |
223 | static inline bool hasFastF16() |
224 | { |
225 | return true; |
226 | } |
227 | |
228 | static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept |
229 | { |
230 | __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out); |
231 | qsizetype i = 0; |
232 | for (; i < len - 3; i += 4) |
233 | vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i))); |
234 | SIMD_EPILOGUE(i, len, 3) |
235 | out_f16[i] = __fp16(in[i]); |
236 | } |
237 | |
238 | static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept |
239 | { |
240 | const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in); |
241 | qsizetype i = 0; |
242 | for (; i < len - 3; i += 4) |
243 | vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i))); |
244 | SIMD_EPILOGUE(i, len, 3) |
245 | out[i] = float(in_f16[i]); |
246 | } |
247 | #else |
248 | static inline bool hasFastF16() |
249 | { |
250 | return false; |
251 | } |
252 | |
253 | static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept |
254 | { |
255 | Q_UNREACHABLE(); |
256 | } |
257 | |
258 | static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept |
259 | { |
260 | Q_UNREACHABLE(); |
261 | } |
262 | #endif |
263 | /*! |
264 | \since 5.11 |
265 | \relates qfloat16 |
266 | |
267 | Converts \a len floats from \a in to qfloat16 and stores them in \a out. |
268 | Both \a in and \a out must have \a len allocated entries. |
269 | |
270 | This function is faster than converting values one by one, and will do runtime |
271 | F16C detection on x86 and x86-64 hardware. |
272 | */ |
273 | Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept |
274 | { |
275 | if (hasFastF16()) |
276 | return qFloatToFloat16_fast(out: reinterpret_cast<quint16 *>(out), in, len); |
277 | |
278 | for (qsizetype i = 0; i < len; ++i) |
279 | out[i] = qfloat16(in[i]); |
280 | } |
281 | |
282 | /*! |
283 | \since 5.11 |
284 | \relates qfloat16 |
285 | |
286 | Converts \a len qfloat16 from \a in to floats and stores them in \a out. |
287 | Both \a in and \a out must have \a len allocated entries. |
288 | |
289 | This function is faster than converting values one by one, and will do runtime |
290 | F16C detection on x86 and x86-64 hardware. |
291 | */ |
292 | Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept |
293 | { |
294 | if (hasFastF16()) |
295 | return qFloatFromFloat16_fast(out, in: reinterpret_cast<const quint16 *>(in), len); |
296 | |
297 | for (qsizetype i = 0; i < len; ++i) |
298 | out[i] = float(in[i]); |
299 | } |
300 | |
301 | QT_END_NAMESPACE |
302 | |
303 | #include "qfloat16tables.cpp" |
304 | #ifdef QFLOAT16_INCLUDE_FAST |
305 | # include "qfloat16_f16c.c" |
306 | #endif |
307 | |