1 | // Copyright (C) 2020 The Qt Company Ltd. |
2 | // Copyright (C) 2016 by Southwest Research Institute (R) |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | #include "qfloat16.h" |
6 | #include "private/qsimd_p.h" |
7 | #include <cmath> // for fpclassify()'s return values |
8 | |
9 | #include <QtCore/qdatastream.h> |
10 | #include <QtCore/qmetatype.h> |
11 | #include <QtCore/qtextstream.h> |
12 | |
13 | QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT) |
14 | QT_BEGIN_NAMESPACE |
15 | |
16 | QT_IMPL_METATYPE_EXTERN(qfloat16) |
17 | |
18 | /*! |
19 | \class qfloat16 |
20 | \keyword 16-bit Floating Point Support |
21 | \ingroup funclists |
22 | \inmodule QtCore |
23 | \inheaderfile QFloat16 |
24 | \brief Provides 16-bit floating point support. |
25 | |
26 | The \c qfloat16 class provides support for half-precision (16-bit) floating |
27 | point data. It is fully compliant with IEEE 754 as a storage type. This |
28 | implies that any arithmetic operation on a \c qfloat16 instance results in |
29 | the value first being converted to a \c float. This conversion to and from |
30 | \c float is performed by hardware when possible, but on processors that do |
31 | not natively support half-precision, the conversion is performed through a |
32 | sequence of lookup table operations. |
33 | |
34 | \c qfloat16 should be treated as if it were a POD (plain old data) type. |
35 | Consequently, none of the supported operations need any elaboration beyond |
36 | stating that it supports all arithmetic operators incident to floating point |
37 | types. |
38 | |
39 | \note On x86 and x86-64 that to get hardware accelerated conversions you must |
40 | compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16() |
41 | which will detect F16C at runtime. |
42 | |
43 | \since 5.9 |
44 | */ |
45 | |
46 | /*! |
47 | \fn qfloat16::qfloat16(Qt::Initialization) |
48 | \since 6.1 |
49 | |
50 | Constructs a qfloat16 without initializing the value. |
51 | */ |
52 | |
53 | /*! |
54 | \fn bool qIsInf(qfloat16 f) |
55 | \relates qfloat16 |
56 | \overload qIsInf(float) |
57 | |
58 | Returns true if the \c qfloat16 \a {f} is equivalent to infinity. |
59 | */ |
60 | |
61 | /*! |
62 | \fn bool qIsNaN(qfloat16 f) |
63 | \relates qfloat16 |
64 | \overload qIsNaN(float) |
65 | |
66 | Returns true if the \c qfloat16 \a {f} is not a number (NaN). |
67 | */ |
68 | |
69 | /*! |
70 | \fn bool qIsFinite(qfloat16 f) |
71 | \relates qfloat16 |
72 | \overload qIsFinite(float) |
73 | |
74 | Returns true if the \c qfloat16 \a {f} is a finite number. |
75 | */ |
76 | |
77 | /*! |
78 | \internal |
79 | \since 5.14 |
80 | \fn bool qfloat16::isInf() const noexcept |
81 | |
82 | Tests whether this \c qfloat16 value is an infinity. |
83 | */ |
84 | |
85 | /*! |
86 | \internal |
87 | \since 5.14 |
88 | \fn bool qfloat16::isNaN() const noexcept |
89 | |
90 | Tests whether this \c qfloat16 value is "not a number". |
91 | */ |
92 | |
93 | /*! |
94 | \since 5.14 |
95 | \fn bool qfloat16::isNormal() const noexcept |
96 | |
97 | Returns \c true if this \c qfloat16 value is finite and in normal form. |
98 | |
99 | \sa qFpClassify() |
100 | */ |
101 | |
102 | /*! |
103 | \internal |
104 | \since 5.14 |
105 | \fn bool qfloat16::isFinite() const noexcept |
106 | |
107 | Tests whether this \c qfloat16 value is finite. |
108 | */ |
109 | |
110 | /*! |
111 | \since 5.15 |
112 | \fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept |
113 | |
114 | Returns a qfloat16 with the sign of \a sign but the rest of its value taken |
115 | from this qfloat16. Serves as qfloat16's equivalent of std::copysign(). |
116 | */ |
117 | |
118 | /*! |
119 | \fn int qFpClassify(qfloat16 val) |
120 | \relates qfloat16 |
121 | \since 5.14 |
122 | \overload qFpClassify(float) |
123 | |
124 | Returns the floating-point class of \a val. |
125 | */ |
126 | |
127 | /*! |
128 | \internal |
129 | \since 5.14 |
130 | Implements qFpClassify() for qfloat16. |
131 | */ |
132 | int qfloat16::fpClassify() const noexcept |
133 | { |
134 | return isInf() ? FP_INFINITE : isNaN() ? FP_NAN |
135 | : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL; |
136 | } |
137 | |
138 | /*! \fn int qRound(qfloat16 value) |
139 | \relates qfloat16 |
140 | \overload qRound(float) |
141 | |
142 | Rounds \a value to the nearest integer. |
143 | */ |
144 | |
145 | /*! \fn qint64 qRound64(qfloat16 value) |
146 | \relates qfloat16 |
147 | \overload qRound64(float) |
148 | |
149 | Rounds \a value to the nearest 64-bit integer. |
150 | */ |
151 | |
152 | /*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) |
153 | \relates qfloat16 |
154 | \overload qFuzzyCompare(float, float) |
155 | |
156 | Compares the floating point value \a p1 and \a p2 and |
157 | returns \c true if they are considered equal, otherwise \c false. |
158 | |
159 | The two numbers are compared in a relative way, where the |
160 | exactness is stronger the smaller the numbers are. |
161 | */ |
162 | |
163 | #if QT_COMPILER_SUPPORTS_HERE(F16C) |
164 | static inline bool hasFastF16() |
165 | { |
166 | // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX |
167 | // state-saving is not enabled by the OS |
168 | return qCpuHasFeature(F16C); |
169 | } |
170 | |
171 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
172 | static bool hasFastF16Avx256() |
173 | { |
174 | // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info) |
175 | return qCpuHasFeature(ArchSkylakeAvx512); |
176 | } |
177 | |
178 | static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) |
179 | void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept |
180 | { |
181 | __mmask16 mask = _bzhi_u32(X: -1, Y: len); |
182 | __m256 f32 = _mm256_maskz_loadu_ps(U: mask, P: in ); |
183 | __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT); |
184 | _mm_mask_storeu_epi16(P: out, U: mask, A: f16); |
185 | }; |
186 | |
187 | static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) |
188 | void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept |
189 | { |
190 | __mmask16 mask = _bzhi_u32(X: -1, Y: len); |
191 | __m128i f16 = _mm_maskz_loadu_epi16(U: mask, P: in); |
192 | __m256 f32 = _mm256_cvtph_ps(a: f16); |
193 | _mm256_mask_storeu_ps(P: out, U: mask, A: f32); |
194 | }; |
195 | #endif |
196 | |
197 | QT_FUNCTION_TARGET(F16C) |
198 | static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept |
199 | { |
200 | constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); |
201 | constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); |
202 | qsizetype i = 0; |
203 | |
204 | if (len >= Step) { |
205 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
206 | __m256 f32 = _mm256_loadu_ps(p: in + offset); |
207 | __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); |
208 | _mm_storeu_si128(p: reinterpret_cast<__m128i *>(out + offset), b: f16); |
209 | }; |
210 | |
211 | // main loop: convert Step (8) floats per iteration |
212 | for ( ; i + Step < len; i += Step) |
213 | convertOneChunk(i); |
214 | |
215 | // epilogue: convert the last chunk, possibly overlapping with the last |
216 | // iteration of the loop |
217 | return convertOneChunk(len - Step); |
218 | } |
219 | |
220 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
221 | if (hasFastF16Avx256()) |
222 | return qFloatToFloat16_tail_avx256(out, in, len); |
223 | #endif |
224 | |
225 | if (len >= HalfStep) { |
226 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
227 | __m128 f32 = _mm_loadu_ps(p: in + offset); |
228 | __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); |
229 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(out + offset), a: f16); |
230 | }; |
231 | |
232 | // two conversions, possibly overlapping |
233 | convertOneChunk(0); |
234 | return convertOneChunk(len - HalfStep); |
235 | } |
236 | |
237 | // Inlining "qfloat16::qfloat16(float f)": |
238 | for ( ; i < len; ++i) |
239 | out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0); |
240 | } |
241 | |
242 | QT_FUNCTION_TARGET(F16C) |
243 | static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept |
244 | { |
245 | constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); |
246 | constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); |
247 | qsizetype i = 0; |
248 | |
249 | if (len >= Step) { |
250 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
251 | __m128i f16 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(in + offset)); |
252 | __m256 f32 = _mm256_cvtph_ps(a: f16); |
253 | _mm256_storeu_ps(p: out + offset, a: f32); |
254 | }; |
255 | |
256 | // main loop: convert Step (8) floats per iteration |
257 | for ( ; i + Step < len; i += Step) |
258 | convertOneChunk(i); |
259 | |
260 | // epilogue: convert the last chunk, possibly overlapping with the last |
261 | // iteration of the loop |
262 | return convertOneChunk(len - Step); |
263 | } |
264 | |
265 | #if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) |
266 | if (hasFastF16Avx256()) |
267 | return qFloatFromFloat16_tail_avx256(out, in, len); |
268 | #endif |
269 | |
270 | if (len >= HalfStep) { |
271 | auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { |
272 | __m128i f16 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(in + offset)); |
273 | __m128 f32 = _mm_cvtph_ps(a: f16); |
274 | _mm_storeu_ps(p: out + offset, a: f32); |
275 | }; |
276 | |
277 | // two conversions, possibly overlapping |
278 | convertOneChunk(0); |
279 | return convertOneChunk(len - HalfStep); |
280 | } |
281 | |
282 | // Inlining "qfloat16::operator float()": |
283 | for ( ; i < len; ++i) |
284 | out[i] = _mm_cvtss_f32(a: _mm_cvtph_ps(a: _mm_cvtsi32_si128(a: in[i]))); |
285 | } |
286 | |
287 | #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2) |
288 | static inline bool hasFastF16() |
289 | { |
290 | return true; |
291 | } |
292 | |
293 | static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept |
294 | { |
295 | __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out); |
296 | qsizetype i = 0; |
297 | for (; i < len - 3; i += 4) |
298 | vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i))); |
299 | SIMD_EPILOGUE(i, len, 3) |
300 | out_f16[i] = __fp16(in[i]); |
301 | } |
302 | |
303 | static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept |
304 | { |
305 | const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in); |
306 | qsizetype i = 0; |
307 | for (; i < len - 3; i += 4) |
308 | vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i))); |
309 | SIMD_EPILOGUE(i, len, 3) |
310 | out[i] = float(in_f16[i]); |
311 | } |
312 | #else |
313 | static inline bool hasFastF16() |
314 | { |
315 | return false; |
316 | } |
317 | |
318 | static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept |
319 | { |
320 | Q_UNREACHABLE(); |
321 | } |
322 | |
323 | static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept |
324 | { |
325 | Q_UNREACHABLE(); |
326 | } |
327 | #endif |
328 | /*! |
329 | \since 5.11 |
330 | \relates qfloat16 |
331 | |
332 | Converts \a len floats from \a in to qfloat16 and stores them in \a out. |
333 | Both \a in and \a out must have \a len allocated entries. |
334 | |
335 | This function is faster than converting values one by one, and will do runtime |
336 | F16C detection on x86 and x86-64 hardware. |
337 | */ |
338 | Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept |
339 | { |
340 | if (hasFastF16()) |
341 | return qFloatToFloat16_fast(out: reinterpret_cast<quint16 *>(out), in, len); |
342 | |
343 | for (qsizetype i = 0; i < len; ++i) |
344 | out[i] = qfloat16(in[i]); |
345 | } |
346 | |
347 | /*! |
348 | \since 5.11 |
349 | \relates qfloat16 |
350 | |
351 | Converts \a len qfloat16 from \a in to floats and stores them in \a out. |
352 | Both \a in and \a out must have \a len allocated entries. |
353 | |
354 | This function is faster than converting values one by one, and will do runtime |
355 | F16C detection on x86 and x86-64 hardware. |
356 | */ |
357 | Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept |
358 | { |
359 | if (hasFastF16()) |
360 | return qFloatFromFloat16_fast(out, in: reinterpret_cast<const quint16 *>(in), len); |
361 | |
362 | for (qsizetype i = 0; i < len; ++i) |
363 | out[i] = float(in[i]); |
364 | } |
365 | |
366 | /*! |
367 | \fn size_t qfloat16::qHash(qfloat16 key, size_t seed) |
368 | \since 6.5.3 |
369 | \relates qfloat16 |
370 | |
371 | Returns the hash value for the \a key, using \a seed to seed the |
372 | calculation. |
373 | |
374 | \note In Qt versions before 6.5, this operation was provided by the |
375 | qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality |
376 | was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this |
377 | overload restores the Qt 6.4 behavior. |
378 | */ |
379 | |
380 | #ifndef QT_NO_DATASTREAM |
381 | /*! |
382 | \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f) |
383 | \relates QDataStream |
384 | \since 5.9 |
385 | |
386 | Writes a floating point number, \a f, to the stream \a ds using |
387 | the standard IEEE 754 format. Returns a reference to the stream. |
388 | |
389 | \note In Qt versions prior to 6.3, this was a member function on |
390 | QDataStream. |
391 | */ |
392 | QDataStream &operator<<(QDataStream &ds, qfloat16 f) |
393 | { |
394 | return ds << f.b16; |
395 | } |
396 | |
397 | /*! |
398 | \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f) |
399 | \relates QDataStream |
400 | \since 5.9 |
401 | |
402 | Reads a floating point number from the stream \a ds into \a f, |
403 | using the standard IEEE 754 format. Returns a reference to the |
404 | stream. |
405 | |
406 | \note In Qt versions prior to 6.3, this was a member function on |
407 | QDataStream. |
408 | */ |
409 | QDataStream &operator>>(QDataStream &ds, qfloat16 &f) |
410 | { |
411 | return ds >> f.b16; |
412 | } |
413 | #endif |
414 | |
415 | QTextStream &operator>>(QTextStream &ts, qfloat16 &f16) |
416 | { |
417 | float f; |
418 | ts >> f; |
419 | f16 = qfloat16(f); |
420 | return ts; |
421 | } |
422 | |
423 | QTextStream &operator<<(QTextStream &ts, qfloat16 f) |
424 | { |
425 | return ts << float(f); |
426 | } |
427 | |
428 | QT_END_NAMESPACE |
429 | |
430 | #include "qfloat16tables.cpp" |
431 | |