qfloat16.cpp source code [qtbase/src/corelib/global/qfloat16.cpp]

1	// Copyright (C) 2020 The Qt Company Ltd.
2	// Copyright (C) 2016 by Southwest Research Institute (R)
3	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5	#include "qfloat16.h"
6	#include "private/qsimd_p.h"
7	#include <cmath> // for fpclassify()'s return values
8
9	#include <QtCore/qdatastream.h>
10	#include <QtCore/qmetatype.h>
11	#include <QtCore/qtextstream.h>
12
13	QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT)
14	QT_BEGIN_NAMESPACE
15
16	QT_IMPL_METATYPE_EXTERN(qfloat16)
17
18	/!*
19	\class qfloat16
20	\keyword 16-bit Floating Point Support
21	\ingroup funclists
22	\inmodule QtCore
23	\inheaderfile QFloat16
24	\brief Provides 16-bit floating point support.
25
26	\compares partial
27	\compareswith partial float double {long double} qint8 quint8 qint16 quint16 \
28	qint32 quint32 long {unsigned long} qint64 quint64
29	\endcompareswith
30	\compareswith partial qint128 quint128
31	Comparison with 128-bit integral types is only supported if Qt provides
32	these types.
33	\endcompareswith
34
35	The \c qfloat16 class provides support for half-precision (16-bit) floating
36	point data. It is fully compliant with IEEE 754 as a storage type. This
37	implies that any arithmetic operation on a \c qfloat16 instance results in
38	the value first being converted to a \c float. This conversion to and from
39	\c float is performed by hardware when possible, but on processors that do
40	not natively support half-precision, the conversion is performed through a
41	sequence of lookup table operations.
42
43	\c qfloat16 should be treated as if it were a POD (plain old data) type.
44	Consequently, none of the supported operations need any elaboration beyond
45	stating that it supports all arithmetic operators incident to floating point
46	types.
47
48	\note On x86 and x86-64 that to get hardware accelerated conversions you must
49	compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16()
50	which will detect F16C at runtime.
51
52	\since 5.9
53	*/
54
55	/!*
56	\fn qfloat16::qfloat16(Qt::Initialization)
57	\since 6.1
58
59	Constructs a qfloat16 without initializing the value.
60	*/
61
62	/!*
63	\fn bool qIsInf(qfloat16 f)
64	\relates qfloat16
65	\overload qIsInf(float)
66
67	Returns true if the \c qfloat16 \a {f} is equivalent to infinity.
68	*/
69
70	/!*
71	\fn bool qIsNaN(qfloat16 f)
72	\relates qfloat16
73	\overload qIsNaN(float)
74
75	Returns true if the \c qfloat16 \a {f} is not a number (NaN).
76	*/
77
78	/!*
79	\fn bool qIsFinite(qfloat16 f)
80	\relates qfloat16
81	\overload qIsFinite(float)
82
83	Returns true if the \c qfloat16 \a {f} is a finite number.
84	*/
85
86	/!*
87	\internal
88	\since 5.14
89	\fn bool qfloat16::isInf() const noexcept
90
91	Tests whether this \c qfloat16 value is an infinity.
92	*/
93
94	/!*
95	\internal
96	\since 5.14
97	\fn bool qfloat16::isNaN() const noexcept
98
99	Tests whether this \c qfloat16 value is "not a number".
100	*/
101
102	/!*
103	\since 5.14
104	\fn bool qfloat16::isNormal() const noexcept
105
106	Returns \c true if this \c qfloat16 value is finite and in normal form.
107
108	\sa qFpClassify()
109	*/
110
111	/!*
112	\internal
113	\since 5.14
114	\fn bool qfloat16::isFinite() const noexcept
115
116	Tests whether this \c qfloat16 value is finite.
117	*/
118
119	/!*
120	\since 5.15
121	\fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept
122
123	Returns a qfloat16 with the sign of \a sign but the rest of its value taken
124	from this qfloat16. Serves as qfloat16's equivalent of std::copysign().
125	*/
126
127	/!*
128	\fn int qFpClassify(qfloat16 val)
129	\relates qfloat16
130	\since 5.14
131	\overload qFpClassify(float)
132
133	Returns the floating-point class of \a val.
134	*/
135
136	/!*
137	\internal
138	\since 5.14
139	Implements qFpClassify() for qfloat16.
140	*/
141	int qfloat16::fpClassify() const noexcept
142	{
143	return isInf() ? FP_INFINITE : isNaN() ? FP_NAN
144	: !(b16 & `0x7fff`) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL;
145	}
146
147	/! \fn int qRound(qfloat16 value)*
148	\relates qfloat16
149	\overload qRound(float)
150
151	Rounds \a value to the nearest integer.
152	*/
153
154	/! \fn qint64 qRound64(qfloat16 value)*
155	\relates qfloat16
156	\overload qRound64(float)
157
158	Rounds \a value to the nearest 64-bit integer.
159	*/
160
161	/! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2)*
162	\relates qfloat16
163	\overload qFuzzyCompare(float, float)
164
165	Compares the floating point value \a p1 and \a p2 and
166	returns \c true if they are considered equal, otherwise \c false.
167
168	The two numbers are compared in a relative way, where the
169	exactness is stronger the smaller the numbers are.
170	*/
171
172	#if QT_COMPILER_SUPPORTS_HERE(F16C)
173	static inline bool hasFastF16()
174	{
175	// qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX
176	// state-saving is not enabled by the OS
177	return qCpuHasFeature(F16C);
178	}
179
180	#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
181	static bool hasFastF16Avx256()
182	{
183	// 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info)
184	return qCpuHasFeature(ArchSkylakeAvx512);
185	}
186
187	static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
188	void qFloatToFloat16_tail_avx256(quint16 out, const* float in, qsizetype len) noexcept*
189	{
190	__mmask16 mask = _bzhi_u32(X: -`1`, Y: len);
191	__m256 f32 = _mm256_maskz_loadu_ps(U: mask, P: in );
192	__m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT);
193	_mm_mask_storeu_epi16(P: out, U: mask, A: f16);
194	};
195
196	static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
197	void qFloatFromFloat16_tail_avx256(float out, const* quint16 in, qsizetype len) noexcept*
198	{
199	__mmask16 mask = _bzhi_u32(X: -`1`, Y: len);
200	__m128i f16 = _mm_maskz_loadu_epi16(U: mask, P: in);
201	__m256 f32 = _mm256_cvtph_ps(a: f16);
202	_mm256_mask_storeu_ps(P: out, U: mask, A: f32);
203	};
204	#endif
205
206	QT_FUNCTION_TARGET(F16C)
207	static void qFloatToFloat16_fast(quint16 out, const* float in, qsizetype len) noexcept*
208	{
209	constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
210	constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
211	qsizetype i = `0`;
212
213	if (len >= Step) {
214	auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
215	__m256 f32 = _mm256_loadu_ps(p: in + offset);
216	__m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
217	_mm_storeu_si128(p: reinterpret_cast<__m128i *>(out + offset), b: f16);
218	};
219
220	// main loop: convert Step (8) floats per iteration
221	for ( ; i + Step < len; i += Step)
222	convertOneChunk (i);
223
224	// epilogue: convert the last chunk, possibly overlapping with the last
225	// iteration of the loop
226	return convertOneChunk (len - Step);
227	}
228
229	#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
230	if (hasFastF16Avx256())
231	return qFloatToFloat16_tail_avx256(out, in, len);
232	#endif
233
234	if (len >= HalfStep) {
235	auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
236	__m128 f32 = _mm_loadu_ps(p: in + offset);
237	__m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
238	_mm_storel_epi64(p: reinterpret_cast<__m128i *>(out + offset), a: f16);
239	};
240
241	// two conversions, possibly overlapping
242	convertOneChunk (`0`);
243	return convertOneChunk (len - HalfStep);
244	}
245
246	// Inlining "qfloat16::qfloat16(float f)":
247	for ( ; i < len; ++i)
248	out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), `0`), `0`);
249	}
250
251	QT_FUNCTION_TARGET(F16C)
252	static void qFloatFromFloat16_fast(float out, const* quint16 in, qsizetype len) noexcept*
253	{
254	constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
255	constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
256	qsizetype i = `0`;
257
258	if (len >= Step) {
259	auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
260	__m128i f16 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(in + offset));
261	__m256 f32 = _mm256_cvtph_ps(a: f16);
262	_mm256_storeu_ps(p: out + offset, a: f32);
263	};
264
265	// main loop: convert Step (8) floats per iteration
266	for ( ; i + Step < len; i += Step)
267	convertOneChunk (i);
268
269	// epilogue: convert the last chunk, possibly overlapping with the last
270	// iteration of the loop
271	return convertOneChunk (len - Step);
272	}
273
274	#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
275	if (hasFastF16Avx256())
276	return qFloatFromFloat16_tail_avx256(out, in, len);
277	#endif
278
279	if (len >= HalfStep) {
280	auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
281	__m128i f16 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(in + offset));
282	__m128 f32 = _mm_cvtph_ps(a: f16);
283	_mm_storeu_ps(p: out + offset, a: f32);
284	};
285
286	// two conversions, possibly overlapping
287	convertOneChunk (`0`);
288	return convertOneChunk (len - HalfStep);
289	}
290
291	// Inlining "qfloat16::operator float()":
292	for ( ; i < len; ++i)
293	out[i] = _mm_cvtss_f32(a: _mm_cvtph_ps(a: _mm_cvtsi32_si128(a: in[i])));
294	}
295
296	#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
297	static inline bool hasFastF16()
298	{
299	return true;
300	}
301
302	static void qFloatToFloat16_fast(quint16 out, const* float in, qsizetype len) noexcept*
303	{
304	__fp16 out_f16 = reinterpret_cast<__fp16* *>(out);
305	qsizetype i = `0`;
306	for (; i < len - `3`; i += `4`)
307	vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i)));
308	SIMD_EPILOGUE(i, len, `3`)
309	out_f16[i] = __fp16(in[i]);
310	}
311
312	static void qFloatFromFloat16_fast(float out, const* quint16 in, qsizetype len) noexcept*
313	{
314	const __fp16 in_f16 = reinterpret_cast<const* __fp16 *>(in);
315	qsizetype i = `0`;
316	for (; i < len - `3`; i += `4`)
317	vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i)));
318	SIMD_EPILOGUE(i, len, `3`)
319	out[i] = float(in_f16[i]);
320	}
321	#else
322	static inline bool hasFastF16()
323	{
324	return false;
325	}
326
327	static void qFloatToFloat16_fast(quint16 , const* float , qsizetype) noexcept*
328	{
329	Q_UNREACHABLE();
330	}
331
332	static void qFloatFromFloat16_fast(float , const* quint16 , qsizetype) noexcept*
333	{
334	Q_UNREACHABLE();
335	}
336	#endif
337	/!*
338	\since 5.11
339	\relates qfloat16
340
341	Converts \a len floats from \a in to qfloat16 and stores them in \a out.
342	Both \a in and \a out must have \a len allocated entries.
343
344	This function is faster than converting values one by one, and will do runtime
345	F16C detection on x86 and x86-64 hardware.
346	*/
347	Q_CORE_EXPORT void qFloatToFloat16(qfloat16 out, const* float in, qsizetype len) noexcept*
348	{
349	if (hasFastF16())
350	return qFloatToFloat16_fast(out: reinterpret_cast<quint16 *>(out), in, len);
351
352	for (qsizetype i = `0`; i < len; ++i)
353	out[i] = qfloat16 (in[i]);
354	}
355
356	/!*
357	\since 5.11
358	\relates qfloat16
359
360	Converts \a len qfloat16 from \a in to floats and stores them in \a out.
361	Both \a in and \a out must have \a len allocated entries.
362
363	This function is faster than converting values one by one, and will do runtime
364	F16C detection on x86 and x86-64 hardware.
365	*/
366	Q_CORE_EXPORT void qFloatFromFloat16(float out, const* qfloat16 in, qsizetype len) noexcept*
367	{
368	if (hasFastF16())
369	return qFloatFromFloat16_fast(out, in: reinterpret_cast<const quint16 *>(in), len);
370
371	for (qsizetype i = `0`; i < len; ++i)
372	out[i] = float(in[i]);
373	}
374
375	/!*
376	\fn size_t qfloat16::qHash(qfloat16 key, size_t seed)
377	\since 6.5.3
378
379	Returns the hash value for the \a key, using \a seed to seed the
380	calculation.
381
382	\note In Qt versions before 6.5, this operation was provided by the
383	qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality
384	was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this
385	overload restores the Qt 6.4 behavior.
386	*/
387
388	#ifndef QT_NO_DATASTREAM
389	/!*
390	\fn qfloat16::operator<<(QDataStream &ds, qfloat16 f)
391	\relates QDataStream
392	\since 5.9
393
394	Writes a floating point number, \a f, to the stream \a ds using
395	the standard IEEE 754 format. Returns a reference to the stream.
396
397	\note In Qt versions prior to 6.3, this was a member function on
398	QDataStream.
399	*/
400	QDataStream &operator<<(QDataStream &ds, qfloat16 f)
401	{
402	return ds << f.b16;
403	}
404
405	/!*
406	\fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f)
407	\relates QDataStream
408	\since 5.9
409
410	Reads a floating point number from the stream \a ds into \a f,
411	using the standard IEEE 754 format. Returns a reference to the
412	stream.
413
414	\note In Qt versions prior to 6.3, this was a member function on
415	QDataStream.
416	*/
417	QDataStream &operator>>(QDataStream &ds, qfloat16 &f)
418	{
419	return ds >> f.b16;
420	}
421	#endif
422
423	QTextStream &operator>>(QTextStream &ts, qfloat16 &f16)
424	{
425	float f;
426	ts >> f;
427	f16 = qfloat16 (f);
428	return ts;
429	}
430
431	QTextStream &operator<<(QTextStream &ts, qfloat16 f)
432	{
433	return ts << float(f);
434	}
435
436	QT_END_NAMESPACE
437
438	#include "qfloat16tables.cpp"
439

Provided by KDAB

Start learning QML with our Intro Training

Find out more

Definitions

source code of qtbase/src/corelib/global/qfloat16.cpp