1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2016 by Southwest Research Institute (R)
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qfloat16.h"
6#include "private/qsimd_p.h"
7#include <cmath> // for fpclassify()'s return values
8
9#include <QtCore/qdatastream.h>
10#include <QtCore/qmetatype.h>
11#include <QtCore/qtextstream.h>
12
13QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT)
14QT_BEGIN_NAMESPACE
15
16QT_IMPL_METATYPE_EXTERN(qfloat16)
17
18/*!
19 \class qfloat16
20 \keyword 16-bit Floating Point Support
21 \ingroup funclists
22 \inmodule QtCore
23 \inheaderfile QFloat16
24 \brief Provides 16-bit floating point support.
25
26 \compares partial
27 \compareswith partial float double {long double} qint8 quint8 qint16 quint16 \
28 qint32 quint32 long {unsigned long} qint64 quint64
29 \endcompareswith
30 \compareswith partial qint128 quint128
31 Comparison with 128-bit integral types is only supported if Qt provides
32 these types.
33 \endcompareswith
34
35 The \c qfloat16 class provides support for half-precision (16-bit) floating
36 point data. It is fully compliant with IEEE 754 as a storage type. This
37 implies that any arithmetic operation on a \c qfloat16 instance results in
38 the value first being converted to a \c float. This conversion to and from
39 \c float is performed by hardware when possible, but on processors that do
40 not natively support half-precision, the conversion is performed through a
41 sequence of lookup table operations.
42
43 \c qfloat16 should be treated as if it were a POD (plain old data) type.
44 Consequently, none of the supported operations need any elaboration beyond
45 stating that it supports all arithmetic operators incident to floating point
46 types.
47
48 \note On x86 and x86-64 that to get hardware accelerated conversions you must
49 compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16()
50 which will detect F16C at runtime.
51
52 \since 5.9
53*/
54
55/*!
56 \fn qfloat16::qfloat16(Qt::Initialization)
57 \since 6.1
58
59 Constructs a qfloat16 without initializing the value.
60*/
61
62/*!
63 \fn bool qIsInf(qfloat16 f)
64 \relates qfloat16
65 \overload qIsInf(float)
66
67 Returns true if the \c qfloat16 \a {f} is equivalent to infinity.
68*/
69
70/*!
71 \fn bool qIsNaN(qfloat16 f)
72 \relates qfloat16
73 \overload qIsNaN(float)
74
75 Returns true if the \c qfloat16 \a {f} is not a number (NaN).
76*/
77
78/*!
79 \fn bool qIsFinite(qfloat16 f)
80 \relates qfloat16
81 \overload qIsFinite(float)
82
83 Returns true if the \c qfloat16 \a {f} is a finite number.
84*/
85
86/*!
87 \internal
88 \since 5.14
89 \fn bool qfloat16::isInf() const noexcept
90
91 Tests whether this \c qfloat16 value is an infinity.
92*/
93
94/*!
95 \internal
96 \since 5.14
97 \fn bool qfloat16::isNaN() const noexcept
98
99 Tests whether this \c qfloat16 value is "not a number".
100*/
101
102/*!
103 \since 5.14
104 \fn bool qfloat16::isNormal() const noexcept
105
106 Returns \c true if this \c qfloat16 value is finite and in normal form.
107
108 \sa qFpClassify()
109*/
110
111/*!
112 \internal
113 \since 5.14
114 \fn bool qfloat16::isFinite() const noexcept
115
116 Tests whether this \c qfloat16 value is finite.
117*/
118
119/*!
120 \since 5.15
121 \fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept
122
123 Returns a qfloat16 with the sign of \a sign but the rest of its value taken
124 from this qfloat16. Serves as qfloat16's equivalent of std::copysign().
125*/
126
127/*!
128 \fn int qFpClassify(qfloat16 val)
129 \relates qfloat16
130 \since 5.14
131 \overload qFpClassify(float)
132
133 Returns the floating-point class of \a val.
134*/
135
136/*!
137 \internal
138 \since 5.14
139 Implements qFpClassify() for qfloat16.
140*/
141int qfloat16::fpClassify() const noexcept
142{
143 return isInf() ? FP_INFINITE : isNaN() ? FP_NAN
144 : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL;
145}
146
147/*! \fn int qRound(qfloat16 value)
148 \relates qfloat16
149 \overload qRound(float)
150
151 Rounds \a value to the nearest integer.
152*/
153
154/*! \fn qint64 qRound64(qfloat16 value)
155 \relates qfloat16
156 \overload qRound64(float)
157
158 Rounds \a value to the nearest 64-bit integer.
159*/
160
161/*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2)
162 \relates qfloat16
163 \overload qFuzzyCompare(float, float)
164
165 Compares the floating point value \a p1 and \a p2 and
166 returns \c true if they are considered equal, otherwise \c false.
167
168 The two numbers are compared in a relative way, where the
169 exactness is stronger the smaller the numbers are.
170 */
171
172#if QT_COMPILER_SUPPORTS_HERE(F16C)
173static inline bool hasFastF16()
174{
175 // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX
176 // state-saving is not enabled by the OS
177 return qCpuHasFeature(F16C);
178}
179
180#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
181static bool hasFastF16Avx256()
182{
183 // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info)
184 return qCpuHasFeature(ArchSkylakeAvx512);
185}
186
187static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
188void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept
189{
190 __mmask16 mask = _bzhi_u32(X: -1, Y: len);
191 __m256 f32 = _mm256_maskz_loadu_ps(U: mask, P: in );
192 __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT);
193 _mm_mask_storeu_epi16(P: out, U: mask, A: f16);
194};
195
196static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
197void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept
198{
199 __mmask16 mask = _bzhi_u32(X: -1, Y: len);
200 __m128i f16 = _mm_maskz_loadu_epi16(U: mask, P: in);
201 __m256 f32 = _mm256_cvtph_ps(a: f16);
202 _mm256_mask_storeu_ps(P: out, U: mask, A: f32);
203};
204#endif
205
206QT_FUNCTION_TARGET(F16C)
207static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept
208{
209 constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
210 constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
211 qsizetype i = 0;
212
213 if (len >= Step) {
214 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
215 __m256 f32 = _mm256_loadu_ps(p: in + offset);
216 __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
217 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(out + offset), b: f16);
218 };
219
220 // main loop: convert Step (8) floats per iteration
221 for ( ; i + Step < len; i += Step)
222 convertOneChunk(i);
223
224 // epilogue: convert the last chunk, possibly overlapping with the last
225 // iteration of the loop
226 return convertOneChunk(len - Step);
227 }
228
229#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
230 if (hasFastF16Avx256())
231 return qFloatToFloat16_tail_avx256(out, in, len);
232#endif
233
234 if (len >= HalfStep) {
235 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
236 __m128 f32 = _mm_loadu_ps(p: in + offset);
237 __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
238 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(out + offset), a: f16);
239 };
240
241 // two conversions, possibly overlapping
242 convertOneChunk(0);
243 return convertOneChunk(len - HalfStep);
244 }
245
246 // Inlining "qfloat16::qfloat16(float f)":
247 for ( ; i < len; ++i)
248 out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
249}
250
251QT_FUNCTION_TARGET(F16C)
252static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept
253{
254 constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
255 constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
256 qsizetype i = 0;
257
258 if (len >= Step) {
259 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
260 __m128i f16 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(in + offset));
261 __m256 f32 = _mm256_cvtph_ps(a: f16);
262 _mm256_storeu_ps(p: out + offset, a: f32);
263 };
264
265 // main loop: convert Step (8) floats per iteration
266 for ( ; i + Step < len; i += Step)
267 convertOneChunk(i);
268
269 // epilogue: convert the last chunk, possibly overlapping with the last
270 // iteration of the loop
271 return convertOneChunk(len - Step);
272 }
273
274#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
275 if (hasFastF16Avx256())
276 return qFloatFromFloat16_tail_avx256(out, in, len);
277#endif
278
279 if (len >= HalfStep) {
280 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
281 __m128i f16 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(in + offset));
282 __m128 f32 = _mm_cvtph_ps(a: f16);
283 _mm_storeu_ps(p: out + offset, a: f32);
284 };
285
286 // two conversions, possibly overlapping
287 convertOneChunk(0);
288 return convertOneChunk(len - HalfStep);
289 }
290
291 // Inlining "qfloat16::operator float()":
292 for ( ; i < len; ++i)
293 out[i] = _mm_cvtss_f32(a: _mm_cvtph_ps(a: _mm_cvtsi32_si128(a: in[i])));
294}
295
296#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
297static inline bool hasFastF16()
298{
299 return true;
300}
301
302static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept
303{
304 __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out);
305 qsizetype i = 0;
306 for (; i < len - 3; i += 4)
307 vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i)));
308 SIMD_EPILOGUE(i, len, 3)
309 out_f16[i] = __fp16(in[i]);
310}
311
312static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept
313{
314 const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in);
315 qsizetype i = 0;
316 for (; i < len - 3; i += 4)
317 vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i)));
318 SIMD_EPILOGUE(i, len, 3)
319 out[i] = float(in_f16[i]);
320}
321#else
322static inline bool hasFastF16()
323{
324 return false;
325}
326
327static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept
328{
329 Q_UNREACHABLE();
330}
331
332static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept
333{
334 Q_UNREACHABLE();
335}
336#endif
337/*!
338 \since 5.11
339 \relates qfloat16
340
341 Converts \a len floats from \a in to qfloat16 and stores them in \a out.
342 Both \a in and \a out must have \a len allocated entries.
343
344 This function is faster than converting values one by one, and will do runtime
345 F16C detection on x86 and x86-64 hardware.
346*/
347Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept
348{
349 if (hasFastF16())
350 return qFloatToFloat16_fast(out: reinterpret_cast<quint16 *>(out), in, len);
351
352 for (qsizetype i = 0; i < len; ++i)
353 out[i] = qfloat16(in[i]);
354}
355
356/*!
357 \since 5.11
358 \relates qfloat16
359
360 Converts \a len qfloat16 from \a in to floats and stores them in \a out.
361 Both \a in and \a out must have \a len allocated entries.
362
363 This function is faster than converting values one by one, and will do runtime
364 F16C detection on x86 and x86-64 hardware.
365*/
366Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept
367{
368 if (hasFastF16())
369 return qFloatFromFloat16_fast(out, in: reinterpret_cast<const quint16 *>(in), len);
370
371 for (qsizetype i = 0; i < len; ++i)
372 out[i] = float(in[i]);
373}
374
375/*!
376 \fn size_t qfloat16::qHash(qfloat16 key, size_t seed)
377 \since 6.5.3
378
379 Returns the hash value for the \a key, using \a seed to seed the
380 calculation.
381
382 \note In Qt versions before 6.5, this operation was provided by the
383 qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality
384 was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this
385 overload restores the Qt 6.4 behavior.
386*/
387
388#ifndef QT_NO_DATASTREAM
389/*!
390 \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f)
391 \relates QDataStream
392 \since 5.9
393
394 Writes a floating point number, \a f, to the stream \a ds using
395 the standard IEEE 754 format. Returns a reference to the stream.
396
397 \note In Qt versions prior to 6.3, this was a member function on
398 QDataStream.
399*/
400QDataStream &operator<<(QDataStream &ds, qfloat16 f)
401{
402 return ds << f.b16;
403}
404
405/*!
406 \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f)
407 \relates QDataStream
408 \since 5.9
409
410 Reads a floating point number from the stream \a ds into \a f,
411 using the standard IEEE 754 format. Returns a reference to the
412 stream.
413
414 \note In Qt versions prior to 6.3, this was a member function on
415 QDataStream.
416*/
417QDataStream &operator>>(QDataStream &ds, qfloat16 &f)
418{
419 return ds >> f.b16;
420}
421#endif
422
423QTextStream &operator>>(QTextStream &ts, qfloat16 &f16)
424{
425 float f;
426 ts >> f;
427 f16 = qfloat16(f);
428 return ts;
429}
430
431QTextStream &operator<<(QTextStream &ts, qfloat16 f)
432{
433 return ts << float(f);
434}
435
436QT_END_NAMESPACE
437
438#include "qfloat16tables.cpp"
439

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtbase/src/corelib/global/qfloat16.cpp