1 | // Copyright (C) 2020 The Qt Company Ltd. |
2 | // Copyright (C) 2016 by Southwest Research Institute (R) |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | #include "qfloat16.h" |
6 | #include "private/qsimd_p.h" |
7 | #include <cmath> // for fpclassify()'s return values |
8 | |
9 | #include <QtCore/qdatastream.h> |
10 | #include <QtCore/qmetatype.h> |
11 | #include <QtCore/qtextstream.h> |
12 | |
13 | QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT) |
14 | QT_BEGIN_NAMESPACE |
15 | |
16 | QT_IMPL_METATYPE_EXTERN(qfloat16) |
17 | |
18 | /*! |
19 | \class qfloat16 |
20 | \keyword 16-bit Floating Point Support |
21 | \ingroup funclists |
22 | \inmodule QtCore |
23 | \inheaderfile QFloat16 |
24 | \brief Provides 16-bit floating point support. |
25 | |
26 | \compares partial |
27 | \compareswith partial float double {long double} qint8 quint8 qint16 quint16 \ |
28 | qint32 quint32 long {unsigned long} qint64 quint64 |
29 | \endcompareswith |
30 | \compareswith partial qint128 quint128 |
31 | Comparison with 128-bit integral types is only supported if Qt provides |
32 | these types. |
33 | \endcompareswith |
34 | |
35 | The \c qfloat16 class provides support for half-precision (16-bit) floating |
36 | point data. It is fully compliant with IEEE 754 as a storage type. This |
37 | implies that any arithmetic operation on a \c qfloat16 instance results in |
38 | the value first being converted to a \c float. This conversion to and from |
39 | \c float is performed by hardware when possible, but on processors that do |
40 | not natively support half-precision, the conversion is performed through a |
41 | sequence of lookup table operations. |
42 | |
43 | \c qfloat16 should be treated as if it were a POD (plain old data) type. |
44 | Consequently, none of the supported operations need any elaboration beyond |
45 | stating that it supports all arithmetic operators incident to floating point |
46 | types. |
47 | |
48 | \note On x86 and x86-64 that to get hardware accelerated conversions you must |
49 | compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16() |
50 | which will detect F16C at runtime. |
51 | |
52 | \since 5.9 |
53 | */ |
54 | |
55 | /*! |
56 | \fn qfloat16::qfloat16(Qt::Initialization) |
57 | \since 6.1 |
58 | |
59 | Constructs a qfloat16 without initializing the value. |
60 | */ |
61 | |
62 | /*! |
63 | \fn bool qIsInf(qfloat16 f) |
64 | \relates qfloat16 |
65 | \overload qIsInf(float) |
66 | |
67 | Returns true if the \c qfloat16 \a {f} is equivalent to infinity. |
68 | */ |
69 | |
70 | /*! |
71 | \fn bool qIsNaN(qfloat16 f) |
72 | \relates qfloat16 |
73 | \overload qIsNaN(float) |
74 | |
75 | Returns true if the \c qfloat16 \a {f} is not a number (NaN). |
76 | */ |
77 | |
78 | /*! |
79 | \fn bool qIsFinite(qfloat16 f) |
80 | \relates qfloat16 |
81 | \overload qIsFinite(float) |
82 | |
83 | Returns true if the \c qfloat16 \a {f} is a finite number. |
84 | */ |
85 | |
86 | /*! |
87 | \internal |
88 | \since 5.14 |
89 | \fn bool qfloat16::isInf() const noexcept |
90 | |
91 | Tests whether this \c qfloat16 value is an infinity. |
92 | */ |
93 | |
94 | /*! |
95 | \internal |
96 | \since 5.14 |
97 | \fn bool qfloat16::isNaN() const noexcept |
98 | |
99 | Tests whether this \c qfloat16 value is "not a number". |
100 | */ |
101 | |
102 | /*! |
103 | \since 5.14 |
104 | \fn bool qfloat16::isNormal() const noexcept |
105 | |
106 | Returns \c true if this \c qfloat16 value is finite and in normal form. |
107 | |
108 | \sa qFpClassify() |
109 | */ |
110 | |
111 | /*! |
112 | \internal |
113 | \since 5.14 |
114 | \fn bool qfloat16::isFinite() const noexcept |
115 | |
116 | Tests whether this \c qfloat16 value is finite. |
117 | */ |
118 | |
119 | /*! |
120 | \since 5.15 |
121 | \fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept |
122 | |
123 | Returns a qfloat16 with the sign of \a sign but the rest of its value taken |
124 | from this qfloat16. Serves as qfloat16's equivalent of std::copysign(). |
125 | */ |
126 | |
127 | /*! |
128 | \fn int qFpClassify(qfloat16 val) |
129 | \relates qfloat16 |
130 | \since 5.14 |
131 | \overload qFpClassify(float) |
132 | |
133 | Returns the floating-point class of \a val. |
134 | */ |
135 | |
136 | /*! |
137 | \internal |
138 | \since 5.14 |
139 | Implements qFpClassify() for qfloat16. |
140 | */ |
141 | int qfloat16::fpClassify() const noexcept |
142 | { |
143 | return isInf() ? FP_INFINITE : isNaN() ? FP_NAN |
144 | : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL; |
145 | } |
146 | |
147 | /*! \fn int qRound(qfloat16 value) |
148 | \relates qfloat16 |
149 | \overload qRound(float) |
150 | |
151 | Rounds \a value to the nearest integer. |
152 | */ |
153 | |
154 | /*! \fn qint64 qRound64(qfloat16 value) |
155 | \relates qfloat16 |
156 | \overload qRound64(float) |
157 | |
158 | Rounds \a value to the nearest 64-bit integer. |
159 | */ |
160 | |
161 | /*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) |
162 | \relates qfloat16 |
163 | \overload qFuzzyCompare(float, float) |
164 | |
165 | Compares the floating point value \a p1 and \a p2 and |
166 | returns \c true if they are considered equal, otherwise \c false. |
167 | |
168 | The two numbers are compared in a relative way, where the |
169 | exactness is stronger the smaller the numbers are. |
170 | */ |
171 | |
172 | #if QT_COMPILER_SUPPORTS_HERE(F16C) |
173 | static inline bool hasFastF16() |
174 | { |
175 | // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX |
176 | // state-saving is not enabled by the OS |
177 | return qCpuHasFeature(F16C); |
178 | } |
179 | |
180 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
181 | static bool hasFastF16Avx256() |
182 | { |
183 | // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info) |
184 | return qCpuHasFeature(ArchSkylakeAvx512); |
185 | } |
186 | |
187 | static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) |
188 | void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept |
189 | { |
190 | __mmask16 mask = _bzhi_u32(X: -1, Y: len); |
191 | __m256 f32 = _mm256_maskz_loadu_ps(U: mask, P: in ); |
192 | __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT); |
193 | _mm_mask_storeu_epi16(P: out, U: mask, A: f16); |
194 | }; |
195 | |
196 | static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) |
197 | void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept |
198 | { |
199 | __mmask16 mask = _bzhi_u32(X: -1, Y: len); |
200 | __m128i f16 = _mm_maskz_loadu_epi16(U: mask, P: in); |
201 | __m256 f32 = _mm256_cvtph_ps(a: f16); |
202 | _mm256_mask_storeu_ps(P: out, U: mask, A: f32); |
203 | }; |
204 | #endif |
205 | |
206 | QT_FUNCTION_TARGET(F16C) |
207 | static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept |
208 | { |
209 | constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); |
210 | constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); |
211 | qsizetype i = 0; |
212 | |
213 | if (len >= Step) { |
214 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
215 | __m256 f32 = _mm256_loadu_ps(p: in + offset); |
216 | __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); |
217 | _mm_storeu_si128(p: reinterpret_cast<__m128i *>(out + offset), b: f16); |
218 | }; |
219 | |
220 | // main loop: convert Step (8) floats per iteration |
221 | for ( ; i + Step < len; i += Step) |
222 | convertOneChunk(i); |
223 | |
224 | // epilogue: convert the last chunk, possibly overlapping with the last |
225 | // iteration of the loop |
226 | return convertOneChunk(len - Step); |
227 | } |
228 | |
229 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
230 | if (hasFastF16Avx256()) |
231 | return qFloatToFloat16_tail_avx256(out, in, len); |
232 | #endif |
233 | |
234 | if (len >= HalfStep) { |
235 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
236 | __m128 f32 = _mm_loadu_ps(p: in + offset); |
237 | __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); |
238 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(out + offset), a: f16); |
239 | }; |
240 | |
241 | // two conversions, possibly overlapping |
242 | convertOneChunk(0); |
243 | return convertOneChunk(len - HalfStep); |
244 | } |
245 | |
246 | // Inlining "qfloat16::qfloat16(float f)": |
247 | for ( ; i < len; ++i) |
248 | out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0); |
249 | } |
250 | |
251 | QT_FUNCTION_TARGET(F16C) |
252 | static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept |
253 | { |
254 | constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); |
255 | constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); |
256 | qsizetype i = 0; |
257 | |
258 | if (len >= Step) { |
259 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
260 | __m128i f16 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(in + offset)); |
261 | __m256 f32 = _mm256_cvtph_ps(a: f16); |
262 | _mm256_storeu_ps(p: out + offset, a: f32); |
263 | }; |
264 | |
265 | // main loop: convert Step (8) floats per iteration |
266 | for ( ; i + Step < len; i += Step) |
267 | convertOneChunk(i); |
268 | |
269 | // epilogue: convert the last chunk, possibly overlapping with the last |
270 | // iteration of the loop |
271 | return convertOneChunk(len - Step); |
272 | } |
273 | |
274 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
275 | if (hasFastF16Avx256()) |
276 | return qFloatFromFloat16_tail_avx256(out, in, len); |
277 | #endif |
278 | |
279 | if (len >= HalfStep) { |
280 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
281 | __m128i f16 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(in + offset)); |
282 | __m128 f32 = _mm_cvtph_ps(a: f16); |
283 | _mm_storeu_ps(p: out + offset, a: f32); |
284 | }; |
285 | |
286 | // two conversions, possibly overlapping |
287 | convertOneChunk(0); |
288 | return convertOneChunk(len - HalfStep); |
289 | } |
290 | |
291 | // Inlining "qfloat16::operator float()": |
292 | for ( ; i < len; ++i) |
293 | out[i] = _mm_cvtss_f32(a: _mm_cvtph_ps(a: _mm_cvtsi32_si128(a: in[i]))); |
294 | } |
295 | |
296 | #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2) |
297 | static inline bool hasFastF16() |
298 | { |
299 | return true; |
300 | } |
301 | |
302 | static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept |
303 | { |
304 | __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out); |
305 | qsizetype i = 0; |
306 | for (; i < len - 3; i += 4) |
307 | vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i))); |
308 | SIMD_EPILOGUE(i, len, 3) |
309 | out_f16[i] = __fp16(in[i]); |
310 | } |
311 | |
312 | static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept |
313 | { |
314 | const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in); |
315 | qsizetype i = 0; |
316 | for (; i < len - 3; i += 4) |
317 | vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i))); |
318 | SIMD_EPILOGUE(i, len, 3) |
319 | out[i] = float(in_f16[i]); |
320 | } |
321 | #else |
322 | static inline bool hasFastF16() |
323 | { |
324 | return false; |
325 | } |
326 | |
327 | static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept |
328 | { |
329 | Q_UNREACHABLE(); |
330 | } |
331 | |
332 | static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept |
333 | { |
334 | Q_UNREACHABLE(); |
335 | } |
336 | #endif |
337 | /*! |
338 | \since 5.11 |
339 | \relates qfloat16 |
340 | |
341 | Converts \a len floats from \a in to qfloat16 and stores them in \a out. |
342 | Both \a in and \a out must have \a len allocated entries. |
343 | |
344 | This function is faster than converting values one by one, and will do runtime |
345 | F16C detection on x86 and x86-64 hardware. |
346 | */ |
347 | Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept |
348 | { |
349 | if (hasFastF16()) |
350 | return qFloatToFloat16_fast(out: reinterpret_cast<quint16 *>(out), in, len); |
351 | |
352 | for (qsizetype i = 0; i < len; ++i) |
353 | out[i] = qfloat16(in[i]); |
354 | } |
355 | |
356 | /*! |
357 | \since 5.11 |
358 | \relates qfloat16 |
359 | |
360 | Converts \a len qfloat16 from \a in to floats and stores them in \a out. |
361 | Both \a in and \a out must have \a len allocated entries. |
362 | |
363 | This function is faster than converting values one by one, and will do runtime |
364 | F16C detection on x86 and x86-64 hardware. |
365 | */ |
366 | Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept |
367 | { |
368 | if (hasFastF16()) |
369 | return qFloatFromFloat16_fast(out, in: reinterpret_cast<const quint16 *>(in), len); |
370 | |
371 | for (qsizetype i = 0; i < len; ++i) |
372 | out[i] = float(in[i]); |
373 | } |
374 | |
375 | /*! |
376 | \fn size_t qfloat16::qHash(qfloat16 key, size_t seed) |
377 | \since 6.5.3 |
378 | |
379 | Returns the hash value for the \a key, using \a seed to seed the |
380 | calculation. |
381 | |
382 | \note In Qt versions before 6.5, this operation was provided by the |
383 | qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality |
384 | was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this |
385 | overload restores the Qt 6.4 behavior. |
386 | */ |
387 | |
388 | #ifndef QT_NO_DATASTREAM |
389 | /*! |
390 | \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f) |
391 | \relates QDataStream |
392 | \since 5.9 |
393 | |
394 | Writes a floating point number, \a f, to the stream \a ds using |
395 | the standard IEEE 754 format. Returns a reference to the stream. |
396 | |
397 | \note In Qt versions prior to 6.3, this was a member function on |
398 | QDataStream. |
399 | */ |
400 | QDataStream &operator<<(QDataStream &ds, qfloat16 f) |
401 | { |
402 | return ds << f.b16; |
403 | } |
404 | |
405 | /*! |
406 | \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f) |
407 | \relates QDataStream |
408 | \since 5.9 |
409 | |
410 | Reads a floating point number from the stream \a ds into \a f, |
411 | using the standard IEEE 754 format. Returns a reference to the |
412 | stream. |
413 | |
414 | \note In Qt versions prior to 6.3, this was a member function on |
415 | QDataStream. |
416 | */ |
417 | QDataStream &operator>>(QDataStream &ds, qfloat16 &f) |
418 | { |
419 | return ds >> f.b16; |
420 | } |
421 | #endif |
422 | |
423 | QTextStream &operator>>(QTextStream &ts, qfloat16 &f16) |
424 | { |
425 | float f; |
426 | ts >> f; |
427 | f16 = qfloat16(f); |
428 | return ts; |
429 | } |
430 | |
431 | QTextStream &operator<<(QTextStream &ts, qfloat16 f) |
432 | { |
433 | return ts << float(f); |
434 | } |
435 | |
436 | QT_END_NAMESPACE |
437 | |
438 | #include "qfloat16tables.cpp" |
439 | |