half.h source code [include/Imath/half.h]

1	//
2	// SPDX-License-Identifier: BSD-3-Clause
3	// Copyright Contributors to the OpenEXR Project.
4	//
5
6	//
7	// Primary original authors:
8	// Florian Kainz <kainz@ilm.com>
9	// Rod Bogart <rgb@ilm.com>
10	//
11
12	#ifndef IMATH_HALF_H_
13	#define IMATH_HALF_H_
14
15	#include "ImathExport.h"
16	#include "ImathNamespace.h"
17	#include "ImathPlatform.h"
18
19	/// @file half.h
20	/// The half type is a 16-bit floating number, compatible with the
21	/// IEEE 754-2008 binary16 type.
22	///
23	/// Representation of a 32-bit float:
24	///
25	/// We assume that a float, f, is an IEEE 754 single-precision
26	/// floating point number, whose bits are arranged as follows:
27	///
28	/// 31 (msb)
29	/// \|
30	/// \| 30 23
31	/// \| \| \|
32	/// \| \| \| 22 0 (lsb)
33	/// \| \| \| \| \|
34	/// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
35	///
36	/// s e m
37	///
38	/// S is the sign-bit, e is the exponent and m is the significand.
39	///
40	/// If e is between 1 and 254, f is a normalized number:
41	///
42	/// s e-127
43	/// f = (-1) 2 * 1.m*
44	///
45	/// If e is 0, and m is not zero, f is a denormalized number:
46	///
47	/// s -126
48	/// f = (-1) 2 * 0.m*
49	///
50	/// If e and m are both zero, f is zero:
51	///
52	/// f = 0.0
53	///
54	/// If e is 255, f is an "infinity" or "not a number" (NAN),
55	/// depending on whether m is zero or not.
56	///
57	/// Examples:
58	///
59	/// 0 00000000 00000000000000000000000 = 0.0
60	/// 0 01111110 00000000000000000000000 = 0.5
61	/// 0 01111111 00000000000000000000000 = 1.0
62	/// 0 10000000 00000000000000000000000 = 2.0
63	/// 0 10000000 10000000000000000000000 = 3.0
64	/// 1 10000101 11110000010000000000000 = -124.0625
65	/// 0 11111111 00000000000000000000000 = +infinity
66	/// 1 11111111 00000000000000000000000 = -infinity
67	/// 0 11111111 10000000000000000000000 = NAN
68	/// 1 11111111 11111111111111111111111 = NAN
69	///
70	/// Representation of a 16-bit half:
71	///
72	/// Here is the bit-layout for a half number, h:
73	///
74	/// 15 (msb)
75	/// \|
76	/// \| 14 10
77	/// \| \| \|
78	/// \| \| \| 9 0 (lsb)
79	/// \| \| \| \| \|
80	/// X XXXXX XXXXXXXXXX
81	///
82	/// s e m
83	///
84	/// S is the sign-bit, e is the exponent and m is the significand.
85	///
86	/// If e is between 1 and 30, h is a normalized number:
87	///
88	/// s e-15
89	/// h = (-1) 2 * 1.m*
90	///
91	/// If e is 0, and m is not zero, h is a denormalized number:
92	///
93	/// S -14
94	/// h = (-1) 2 * 0.m*
95	///
96	/// If e and m are both zero, h is zero:
97	///
98	/// h = 0.0
99	///
100	/// If e is 31, h is an "infinity" or "not a number" (NAN),
101	/// depending on whether m is zero or not.
102	///
103	/// Examples:
104	///
105	/// 0 00000 0000000000 = 0.0
106	/// 0 01110 0000000000 = 0.5
107	/// 0 01111 0000000000 = 1.0
108	/// 0 10000 0000000000 = 2.0
109	/// 0 10000 1000000000 = 3.0
110	/// 1 10101 1111000001 = -124.0625
111	/// 0 11111 0000000000 = +infinity
112	/// 1 11111 0000000000 = -infinity
113	/// 0 11111 1000000000 = NAN
114	/// 1 11111 1111111111 = NAN
115	///
116	/// Conversion via Lookup Table:
117	///
118	/// Converting from half to float is performed by default using a
119	/// lookup table. There are only 65,536 different half numbers; each
120	/// of these numbers has been converted and stored in a table pointed
121	/// to by the ``imath_half_to_float_table`` pointer.
122	///
123	/// Prior to Imath v3.1, conversion from float to half was
124	/// accomplished with the help of an exponent look table, but this is
125	/// now replaced with explicit bit shifting.
126	///
127	/// Conversion via Hardware:
128	///
129	/// For Imath v3.1, the conversion routines have been extended to use
130	/// F16C SSE instructions whenever present and enabled by compiler
131	/// flags.
132	///
133	/// Conversion via Bit-Shifting
134	///
135	/// If F16C SSE instructions are not available, conversion can be
136	/// accomplished by a bit-shifting algorithm. For half-to-float
137	/// conversion, this is generally slower than the lookup table, but it
138	/// may be preferable when memory limits preclude storing of the
139	/// 65,536-entry lookup table.
140	///
141	/// The lookup table symbol is included in the compilation even if
142	/// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code
143	/// using the exported ``half.h`` may choose to enable the use of the table.
144	///
145	/// An implementation can eliminate the table from compilation by
146	/// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol.
147	/// Simply add:
148	///
149	/// #define IMATH_HALF_NO_LOOKUP_TABLE
150	///
151	/// before including ``half.h``, or define the symbol on the compile
152	/// command line.
153	///
154	/// Furthermore, an implementation wishing to receive ``FE_OVERFLOW``
155	/// and ``FE_UNDERFLOW`` floating point exceptions when converting
156	/// float to half by the bit-shift algorithm can define the
157	/// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to
158	/// including ``half.h``:
159	///
160	/// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS
161	///
162	/// Conversion Performance Comparison:
163	///
164	/// Testing on a Core i9, the timings are approximately:
165	///
166	/// half to float
167	/// - table: 0.71 ns / call
168	/// - no table: 1.06 ns / call
169	/// - f16c: 0.45 ns / call
170	///
171	/// float-to-half:
172	/// - original: 5.2 ns / call
173	/// - no exp table + opt: 1.27 ns / call
174	/// - f16c: 0.45 ns / call
175	///
176	/// Note:* the timing above depends on the distribution of the*
177	/// floats in question.
178	///
179
180	#ifdef __CUDA_ARCH__
181	// do not include intrinsics headers on Cuda
182	#elif defined(_WIN32)
183	# include <intrin.h>
184	#elif defined(__x86_64__)
185	# include <x86intrin.h>
186	#elif defined(__F16C__)
187	# include <immintrin.h>
188	#endif
189
190	#include <stdint.h>
191	#include <stdio.h>
192
193	#ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
194	# include <fenv.h>
195	#endif
196
197	//-------------------------------------------------------------------------
198	// Limits
199	//
200	// Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float
201	// constants, but at least one other compiler (gcc 2.96) produces incorrect
202	// results if they are.
203	//-------------------------------------------------------------------------
204
205	#if (defined _WIN32 \|\| defined _WIN64) && defined _MSC_VER
206
207	/// Smallest positive denormalized half
208	# define HALF_DENORM_MIN 5.96046448e-08f
209	/// Smallest positive normalized half
210	# define HALF_NRM_MIN 6.10351562e-05f
211	/// Smallest positive normalized half
212	# define HALF_MIN 6.10351562e-05f
213	/// Largest positive half
214	# define HALF_MAX 65504.0f
215	/// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
216	# define HALF_EPSILON 0.00097656f
217	#else
218	/// Smallest positive denormalized half
219	# define HALF_DENORM_MIN 5.96046448e-08
220	/// Smallest positive normalized half
221	# define HALF_NRM_MIN 6.10351562e-05
222	/// Smallest positive normalized half
223	# define HALF_MIN 6.10351562e-05f
224	/// Largest positive half
225	# define HALF_MAX 65504.0
226	/// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
227	# define HALF_EPSILON 0.00097656
228	#endif
229
230	/// Number of digits in mantissa (significand + hidden leading 1)
231	#define HALF_MANT_DIG 11
232	/// Number of base 10 digits that can be represented without change:
233	///
234	/// ``floor( (HALF_MANT_DIG - 1) log10(2) ) => 3.01... -> 3``*
235	#define HALF_DIG 3
236	/// Number of base-10 digits that are necessary to uniquely represent
237	/// all distinct values:
238	///
239	/// ``ceil(HALF_MANT_DIG log10(2) + 1) => 4.31... -> 5``*
240	#define HALF_DECIMAL_DIG 5
241	/// Base of the exponent
242	#define HALF_RADIX 2
243	/// Minimum negative integer such that ``HALF_RADIX`` raised to the power
244	/// of one less than that integer is a normalized half
245	#define HALF_DENORM_MIN_EXP -13
246	/// Maximum positive integer such that ``HALF_RADIX`` raised to the power
247	/// of one less than that integer is a normalized half
248	#define HALF_MAX_EXP 16
249	/// Minimum positive integer such that 10 raised to that power is a
250	/// normalized half
251	#define HALF_DENORM_MIN_10_EXP -4
252	/// Maximum positive integer such that 10 raised to that power is a
253	/// normalized half
254	#define HALF_MAX_10_EXP 4
255
256	/// a type for both C-only programs and C++ to use the same utilities
257	typedef union imath_half_uif
258	{
259	uint32_t i;
260	float f;
261	} imath_half_uif_t;
262
263	/// a type for both C-only programs and C++ to use the same utilities
264	typedef uint16_t imath_half_bits_t;
265
266	#if !defined(__cplusplus) && !defined(__CUDACC__)
267	/// if we're in a C-only context, alias the half bits type to half
268	typedef imath_half_bits_t half;
269	#endif
270
271	#if !defined(IMATH_HALF_NO_LOOKUP_TABLE)
272	# if defined(__cplusplus)
273	extern "C"
274	# else
275	extern
276	# endif
277	IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table;
278	#endif
279
280	///
281	/// Convert half to float
282	///
283
284	static inline float
285	imath_half_to_float (imath_half_bits_t h)
286	{
287	#if defined(__F16C__)
288	// NB: The intel implementation does seem to treat NaN slightly
289	// different than the original toFloat table does (i.e. where the
290	// 1 bits are, meaning the signalling or not bits). This seems
291	// benign, given that the original library didn't really deal with
292	// signalling vs non-signalling NaNs
293	# ifdef _MSC_VER
294	/ msvc does not seem to have cvtsh_ss :( /
295	return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h)));
296	# else
297	return _cvtsh_ss (h);
298	# endif
299	#elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE)
300	return imath_half_to_float_table[h].f;
301	#else
302	imath_half_uif_t v;
303	// this code would be clearer, although it does appear to be faster
304	// (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
305	// shifts.
306	//
307	uint32_t hexpmant = ( (uint32_t)(h) << `17` ) >> `4`;
308	v.i = ((uint32_t)(h >> `15`)) << `31`;
309
310	// the likely really does help if most of your numbers are "normal" half numbers
311	if (IMATH_LIKELY ((hexpmant >= `0x00800000`)))
312	{
313	v.i \|= hexpmant;
314	// either we are a normal number, in which case add in the bias difference
315	// otherwise make sure all exponent bits are set
316	if (IMATH_LIKELY ((hexpmant < `0x0f800000`)))
317	v.i += `0x38000000`;
318	else
319	v.i \|= `0x7f800000`;
320	}
321	else if (hexpmant != `0`)
322	{
323	// exponent is 0 because we're denormal, don't have to extract
324	// the mantissa, can just use as is
325	//
326	//
327	// other compilers may provide count-leading-zeros primitives,
328	// but we need the community to inform us of the variants
329	uint32_t lc;
330	# if defined(_MSC_VER) && (_M_IX86 \|\| _M_X64)
331	lc = __lzcnt (hexpmant);
332	# elif defined(__GNUC__) \|\| defined(__clang__)
333	lc = (uint32_t) __builtin_clz (hexpmant);
334	# else
335	lc = `0`;
336	while (`0` == ((hexpmant << lc) & `0x80000000`))
337	++lc;
338	# endif
339	lc -= `8`;
340	// so nominally we want to remove that extra bit we shifted
341	// up, but we are going to add that bit back in, then subtract
342	// from it with the 0x38800000 - (lc << 23)....
343	//
344	// by combining, this allows us to skip the & operation (and
345	// remove a constant)
346	//
347	// hexpmant &= ~0x00800000;
348	v.i \|= `0x38800000`;
349	// lc is now x, where the desired exponent is then
350	// -14 - lc
351	// + 127 -> new exponent
352	v.i \|= (hexpmant << lc);
353	v.i -= (lc << `23`);
354	}
355	return v.f;
356	#endif
357	}
358
359	///
360	/// Convert half to float
361	///
362	/// Note: This only supports the "round to even" rounding mode, which
363	/// was the only mode supported by the original OpenEXR library
364	///
365
366	static inline imath_half_bits_t
367	imath_float_to_half (float f)
368	{
369	#if defined(__F16C__)
370	# ifdef _MSC_VER
371	// msvc does not seem to have cvtsh_ss :(
372	return _mm_extract_epi16 (
373	_mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC)),
374	`0`);
375	# else
376	// preserve the fixed rounding mode to nearest
377	return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC));
378	# endif
379	#else
380	imath_half_uif_t v;
381	imath_half_bits_t ret;
382	uint32_t e, m, ui, r, shift;
383
384	v.f = f;
385
386	ui = (v.i & ~`0x80000000`);
387	ret = ((v.i >> `16`) & `0x8000`);
388
389	// exponent large enough to result in a normal number, round and return
390	if (ui >= `0x38800000`)
391	{
392	// inf or nan
393	if (IMATH_UNLIKELY (ui >= `0x7f800000`))
394	{
395	ret \|= `0x7c00`;
396	if (ui == `0x7f800000`)
397	return ret;
398	m = (ui & `0x7fffff`) >> `13`;
399	// make sure we have at least one bit after shift to preserve nan-ness
400	return ret \| (uint16_t)m \| (uint16_t)(m == `0`);
401	}
402
403	// too large, round to infinity
404	if (IMATH_UNLIKELY (ui > `0x477fefff`))
405	{
406	# ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
407	feraiseexcept (FE_OVERFLOW);
408	# endif
409	return ret \| `0x7c00`;
410	}
411
412	ui -= `0x38000000`;
413	ui = ((ui + `0x00000fff` + ((ui >> `13`) & `1`)) >> `13`);
414	return ret \| (uint16_t)ui;
415	}
416
417	// zero or flush to 0
418	if (ui < `0x33000001`)
419	{
420	# ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
421	if (ui == `0`)
422	return ret;
423	feraiseexcept (FE_UNDERFLOW);
424	# endif
425	return ret;
426	}
427
428	// produce a denormalized half
429	e = (ui >> `23`);
430	shift = `0x7e` - e;
431	m = `0x800000` \| (ui & `0x7fffff`);
432	r = m << (`32` - shift);
433	ret \|= (m >> shift);
434	if (r > `0x80000000` \|\| (r == `0x80000000` && (ret & `0x1`) != `0`))
435	++ret;
436	return ret;
437	#endif
438	}
439
440	////////////////////////////////////////
441
442	#ifdef __cplusplus
443
444	# include <iostream>
445
446	IMATH_INTERNAL_NAMESPACE_HEADER_ENTER
447
448	///
449	///
450	/// class half represents a 16-bit floating point number
451	///
452	/// Type half can represent positive and negative numbers whose
453	/// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
454	/// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
455	/// with an absolute error of 6.0e-8. All integers from -2048 to
456	/// +2048 can be represented exactly.
457	///
458	/// Type half behaves (almost) like the built-in C++ floating point
459	/// types. In arithmetic expressions, half, float and double can be
460	/// mixed freely. Here are a few examples:
461	///
462	/// half a (3.5);
463	/// float b (a + sqrt (a));
464	/// a += b;
465	/// b += a;
466	/// b = a + 7;
467	///
468	/// Conversions from half to float are lossless; all half numbers
469	/// are exactly representable as floats.
470	///
471	/// Conversions from float to half may not preserve a float's value
472	/// exactly. If a float is not representable as a half, then the
473	/// float value is rounded to the nearest representable half. If a
474	/// float value is exactly in the middle between the two closest
475	/// representable half values, then the float value is rounded to
476	/// the closest half whose least significant bit is zero.
477	///
478	/// Overflows during float-to-half conversions cause arithmetic
479	/// exceptions. An overflow occurs when the float value to be
480	/// converted is too large to be represented as a half, or if the
481	/// float value is an infinity or a NAN.
482	///
483	/// The implementation of type half makes the following assumptions
484	/// about the implementation of the built-in C++ types:
485	///
486	/// float is an IEEE 754 single-precision number*
487	/// sizeof (float) == 4*
488	/// sizeof (unsigned int) == sizeof (float)*
489	/// alignof (unsigned int) == alignof (float)*
490	/// sizeof (uint16_t) == 2*
491	///
492
493	class IMATH_EXPORT_TYPE half
494	{
495	public:
496	/// A special tag that lets us initialize a half from the raw bits.
497	enum IMATH_EXPORT_ENUM FromBitsTag
498	{
499	FromBits
500	};
501
502	/// @{
503	/// @name Constructors
504
505	/// Default construction provides no initialization (hence it is
506	/// not constexpr).
507	half() IMATH_NOEXCEPT = default;
508
509	/// Construct from float
510	half (float f) IMATH_NOEXCEPT;
511
512	/// Construct from bit-vector
513	constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT;
514
515	/// Copy constructor
516	constexpr half (const half&) IMATH_NOEXCEPT = default;
517
518	/// Move constructor
519	constexpr half (half&&) IMATH_NOEXCEPT = default;
520
521	/// Destructor
522	~half() IMATH_NOEXCEPT = default;
523
524	/// @}
525
526	/// Conversion to float
527	operator float() const IMATH_NOEXCEPT;
528
529	/// @{
530	/// @name Basic Algebra
531
532	/// Unary minus
533	constexpr half operator-() const IMATH_NOEXCEPT;
534
535	/// Assignment
536	half& operator= (const half& h) IMATH_NOEXCEPT = default;
537
538	/// Move assignment
539	half& operator= (half&& h) IMATH_NOEXCEPT = default;
540
541	/// Assignment from float
542	half& operator= (float f) IMATH_NOEXCEPT;
543
544	/// Addition assignment
545	half& operator+= (half h) IMATH_NOEXCEPT;
546
547	/// Addition assignment from float
548	half& operator+= (float f) IMATH_NOEXCEPT;
549
550	/// Subtraction assignment
551	half& operator-= (half h) IMATH_NOEXCEPT;
552
553	/// Subtraction assignment from float
554	half& operator-= (float f) IMATH_NOEXCEPT;
555
556	/// Multiplication assignment
557	half& operator*= (half h) IMATH_NOEXCEPT;
558
559	/// Multiplication assignment from float
560	half& operator= (float* f) IMATH_NOEXCEPT;
561
562	/// Division assignment
563	half& operator/= (half h) IMATH_NOEXCEPT;
564
565	/// Division assignment from float
566	half& operator/= (float f) IMATH_NOEXCEPT;
567
568	/// @}
569
570	/// Round to n-bit precision (n should be between 0 and 10).
571	/// After rounding, the significand's 10-n least significant
572	/// bits will be zero.
573	IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT;
574
575	/// @{
576	/// @name Classification
577
578	/// Return true if a normalized number, a denormalized number, or
579	/// zero.
580	constexpr bool isFinite() const IMATH_NOEXCEPT;
581
582	/// Return true if a normalized number.
583	constexpr bool isNormalized() const IMATH_NOEXCEPT;
584
585	/// Return true if a denormalized number.
586	constexpr bool isDenormalized() const IMATH_NOEXCEPT;
587
588	/// Return true if zero.
589	constexpr bool isZero() const IMATH_NOEXCEPT;
590
591	/// Return true if NAN.
592	constexpr bool isNan() const IMATH_NOEXCEPT;
593
594	/// Return true if a positive or a negative infinity
595	constexpr bool isInfinity() const IMATH_NOEXCEPT;
596
597	/// Return true if the sign bit is set (negative)
598	constexpr bool isNegative() const IMATH_NOEXCEPT;
599
600	/// @}
601
602	/// @{
603	/// @name Special values
604
605	/// Return +infinity
606	static constexpr half posInf() IMATH_NOEXCEPT;
607
608	/// Return -infinity
609	static constexpr half negInf() IMATH_NOEXCEPT;
610
611	/// Returns a NAN with the bit pattern 0111111111111111
612	static constexpr half qNan() IMATH_NOEXCEPT;
613
614	/// Return a NAN with the bit pattern 0111110111111111
615	static constexpr half sNan() IMATH_NOEXCEPT;
616
617	/// @}
618
619	/// @{
620	/// @name Access to the internal representation
621
622	/// Return the bit pattern
623	constexpr uint16_t bits () const IMATH_NOEXCEPT;
624
625	/// Set the bit pattern
626	IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT;
627
628	/// @}
629
630	public:
631	static_assert (sizeof (float) == sizeof (uint32_t),
632	"Assumption about the size of floats correct");
633	using uif = imath_half_uif;
634
635	private:
636
637	constexpr uint16_t mantissa() const IMATH_NOEXCEPT;
638	constexpr uint16_t exponent() const IMATH_NOEXCEPT;
639
640	uint16_t _h;
641	};
642
643	//----------------------------
644	// Half-from-float constructor
645	//----------------------------
646
647	inline half::half (float f) IMATH_NOEXCEPT
648	: _h (imath_float_to_half (f))
649	{
650	}
651
652	//------------------------------------------
653	// Half from raw bits constructor
654	//------------------------------------------
655
656	inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits)
657	{}
658
659	//-------------------------
660	// Half-to-float conversion
661	//-------------------------
662
663	inline half::operator float() const IMATH_NOEXCEPT
664	{
665	return imath_half_to_float (h: _h);
666	}
667
668	//-------------------------
669	// Round to n-bit precision
670	//-------------------------
671
672	inline IMATH_CONSTEXPR14 half
673	half::round (unsigned int n) const IMATH_NOEXCEPT
674	{
675	//
676	// Parameter check.
677	//
678
679	if (n >= `10`)
680	return *this;
681
682	//
683	// Disassemble h into the sign, s,
684	// and the combined exponent and significand, e.
685	//
686
687	uint16_t s = _h & `0x8000`;
688	uint16_t e = _h & `0x7fff`;
689
690	//
691	// Round the exponent and significand to the nearest value
692	// where ones occur only in the (10-n) most significant bits.
693	// Note that the exponent adjusts automatically if rounding
694	// up causes the significand to overflow.
695	//
696
697	e >>= `9` - n;
698	e += e & `1`;
699	e <<= `9` - n;
700
701	//
702	// Check for exponent overflow.
703	//
704
705	if (e >= `0x7c00`)
706	{
707	//
708	// Overflow occurred -- truncate instead of rounding.
709	//
710
711	e = _h;
712	e >>= `10` - n;
713	e <<= `10` - n;
714	}
715
716	//
717	// Put the original sign bit back.
718	//
719
720	half h (FromBits, s \| e);
721
722	return h;
723	}
724
725	//-----------------------
726	// Other inline functions
727	//-----------------------
728
729	inline constexpr half
730	half::operator-() const IMATH_NOEXCEPT
731	{
732	return half (FromBits, bits() ^ `0x8000`);
733	}
734
735	inline half&
736	half::operator= (float f) IMATH_NOEXCEPT
737	{
738	*this = half (f);
739	return *this;
740	}
741
742	inline half&
743	half::operator+= (half h) IMATH_NOEXCEPT
744	{
745	*this = half (float (*this) + float (h));
746	return *this;
747	}
748
749	inline half&
750	half::operator+= (float f) IMATH_NOEXCEPT
751	{
752	*this = half (float (*this) + f);
753	return *this;
754	}
755
756	inline half&
757	half::operator-= (half h) IMATH_NOEXCEPT
758	{
759	*this = half (float (*this) - float (h));
760	return *this;
761	}
762
763	inline half&
764	half::operator-= (float f) IMATH_NOEXCEPT
765	{
766	*this = half (float (*this) - f);
767	return *this;
768	}
769
770	inline half&
771	half::operator*= (half h) IMATH_NOEXCEPT
772	{
773	*this = half (float (*this) * float (h));
774	return *this;
775	}
776
777	inline half&
778	half::operator= (float* f) IMATH_NOEXCEPT
779	{
780	*this = half (float (*this) * f);
781	return *this;
782	}
783
784	inline half&
785	half::operator/= (half h) IMATH_NOEXCEPT
786	{
787	*this = half (float (*this) / float (h));
788	return *this;
789	}
790
791	inline half&
792	half::operator/= (float f) IMATH_NOEXCEPT
793	{
794	*this = half (float (*this) / f);
795	return *this;
796	}
797
798	inline constexpr uint16_t
799	half::mantissa() const IMATH_NOEXCEPT
800	{
801	return _h & `0x3ff`;
802	}
803
804	inline constexpr uint16_t
805	half::exponent() const IMATH_NOEXCEPT
806	{
807	return (_h >> `10`) & `0x001f`;
808	}
809
810	inline constexpr bool
811	half::isFinite() const IMATH_NOEXCEPT
812	{
813	return exponent() < `31`;
814	}
815
816	inline constexpr bool
817	half::isNormalized() const IMATH_NOEXCEPT
818	{
819	return exponent() > `0` && exponent() < `31`;
820	}
821
822	inline constexpr bool
823	half::isDenormalized() const IMATH_NOEXCEPT
824	{
825	return exponent() == `0` && mantissa() != `0`;
826	}
827
828	inline constexpr bool
829	half::isZero() const IMATH_NOEXCEPT
830	{
831	return (_h & `0x7fff`) == `0`;
832	}
833
834	inline constexpr bool
835	half::isNan() const IMATH_NOEXCEPT
836	{
837	return exponent() == `31` && mantissa() != `0`;
838	}
839
840	inline constexpr bool
841	half::isInfinity() const IMATH_NOEXCEPT
842	{
843	return exponent() == `31` && mantissa() == `0`;
844	}
845
846	inline constexpr bool
847	half::isNegative() const IMATH_NOEXCEPT
848	{
849	return (_h & `0x8000`) != `0`;
850	}
851
852	inline constexpr half
853	half::posInf() IMATH_NOEXCEPT
854	{
855	return half (FromBits, `0x7c00`);
856	}
857
858	inline constexpr half
859	half::negInf() IMATH_NOEXCEPT
860	{
861	return half (FromBits, `0xfc00`);
862	}
863
864	inline constexpr half
865	half::qNan() IMATH_NOEXCEPT
866	{
867	return half (FromBits, `0x7fff`);
868	}
869
870	inline constexpr half
871	half::sNan() IMATH_NOEXCEPT
872	{
873	return half (FromBits, `0x7dff`);
874	}
875
876	inline constexpr uint16_t
877	half::bits() const IMATH_NOEXCEPT
878	{
879	return _h;
880	}
881
882	inline IMATH_CONSTEXPR14 void
883	half::setBits (uint16_t bits) IMATH_NOEXCEPT
884	{
885	_h = bits;
886	}
887
888	IMATH_INTERNAL_NAMESPACE_HEADER_EXIT
889
890	/// Output h to os, formatted as a float
891	IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
892
893	/// Input h from is
894	IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h);
895
896	#include <limits>
897
898	namespace std
899	{
900
901	template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half>
902	{
903	public:
904	static const bool is_specialized = true;
905
906	static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT
907	{
908	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x0400`); /HALF_MIN/
909	}
910	static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT
911	{
912	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x7bff`); /HALF_MAX/
913	}
914	static constexpr IMATH_INTERNAL_NAMESPACE::half lowest ()
915	{
916	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0xfbff`); / -HALF_MAX /
917	}
918
919	static constexpr int digits = HALF_MANT_DIG;
920	static constexpr int digits10 = HALF_DIG;
921	static constexpr int max_digits10 = HALF_DECIMAL_DIG;
922	static constexpr bool is_signed = true;
923	static constexpr bool is_integer = false;
924	static constexpr bool is_exact = false;
925	static constexpr int radix = HALF_RADIX;
926	static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT
927	{
928	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x1400`); /HALF_EPSILON/
929	}
930	static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT
931	{
932	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x3800`); /0.5/
933	}
934
935	static constexpr int min_exponent = HALF_DENORM_MIN_EXP;
936	static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP;
937	static constexpr int max_exponent = HALF_MAX_EXP;
938	static constexpr int max_exponent10 = HALF_MAX_10_EXP;
939
940	static constexpr bool has_infinity = true;
941	static constexpr bool has_quiet_NaN = true;
942	static constexpr bool has_signaling_NaN = true;
943	static constexpr float_denorm_style has_denorm = denorm_present;
944	static constexpr bool has_denorm_loss = false;
945	static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT
946	{
947	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x7c00`); /half::posInf()/
948	}
949	static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT
950	{
951	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x7fff`); /half::qNan()/
952	}
953	static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT
954	{
955	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x7dff`); /half::sNan()/
956	}
957	static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT
958	{
959	return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, `0x0001`); /HALF_DENORM_MIN/
960	}
961
962	static constexpr bool is_iec559 = false;
963	static constexpr bool is_bounded = false;
964	static constexpr bool is_modulo = false;
965
966	static constexpr bool traps = true;
967	static constexpr bool tinyness_before = false;
968	static constexpr float_round_style round_style = round_to_nearest;
969	};
970
971	} // namespace std
972
973	//----------
974	// Debugging
975	//----------
976
977	IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
978	IMATH_EXPORT void printBits (std::ostream& os, float f);
979	IMATH_EXPORT void printBits (char c[`19`], IMATH_INTERNAL_NAMESPACE::half h);
980	IMATH_EXPORT void printBits (char c[`35`], float f);
981
982	# if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__)
983	using half = IMATH_INTERNAL_NAMESPACE::half;
984	# else
985	# include <cuda_fp16.h>
986	# endif
987
988	#endif // __cplusplus
989
990	#endif // IMATH_HALF_H_
991

source code of include/Imath/half.h