1//
2// SPDX-License-Identifier: BSD-3-Clause
3// Copyright Contributors to the OpenEXR Project.
4//
5
6//
7// Primary original authors:
8// Florian Kainz <kainz@ilm.com>
9// Rod Bogart <rgb@ilm.com>
10//
11
12#ifndef IMATH_HALF_H_
13#define IMATH_HALF_H_
14
15#include "ImathExport.h"
16#include "ImathNamespace.h"
17#include "ImathPlatform.h"
18
19/// @file half.h
20/// The half type is a 16-bit floating number, compatible with the
21/// IEEE 754-2008 binary16 type.
22///
23/// **Representation of a 32-bit float:**
24///
25/// We assume that a float, f, is an IEEE 754 single-precision
26/// floating point number, whose bits are arranged as follows:
27///
28/// 31 (msb)
29/// |
30/// | 30 23
31/// | | |
32/// | | | 22 0 (lsb)
33/// | | | | |
34/// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
35///
36/// s e m
37///
38/// S is the sign-bit, e is the exponent and m is the significand.
39///
40/// If e is between 1 and 254, f is a normalized number:
41///
42/// s e-127
43/// f = (-1) * 2 * 1.m
44///
45/// If e is 0, and m is not zero, f is a denormalized number:
46///
47/// s -126
48/// f = (-1) * 2 * 0.m
49///
50/// If e and m are both zero, f is zero:
51///
52/// f = 0.0
53///
54/// If e is 255, f is an "infinity" or "not a number" (NAN),
55/// depending on whether m is zero or not.
56///
57/// Examples:
58///
59/// 0 00000000 00000000000000000000000 = 0.0
60/// 0 01111110 00000000000000000000000 = 0.5
61/// 0 01111111 00000000000000000000000 = 1.0
62/// 0 10000000 00000000000000000000000 = 2.0
63/// 0 10000000 10000000000000000000000 = 3.0
64/// 1 10000101 11110000010000000000000 = -124.0625
65/// 0 11111111 00000000000000000000000 = +infinity
66/// 1 11111111 00000000000000000000000 = -infinity
67/// 0 11111111 10000000000000000000000 = NAN
68/// 1 11111111 11111111111111111111111 = NAN
69///
70/// **Representation of a 16-bit half:**
71///
72/// Here is the bit-layout for a half number, h:
73///
74/// 15 (msb)
75/// |
76/// | 14 10
77/// | | |
78/// | | | 9 0 (lsb)
79/// | | | | |
80/// X XXXXX XXXXXXXXXX
81///
82/// s e m
83///
84/// S is the sign-bit, e is the exponent and m is the significand.
85///
86/// If e is between 1 and 30, h is a normalized number:
87///
88/// s e-15
89/// h = (-1) * 2 * 1.m
90///
91/// If e is 0, and m is not zero, h is a denormalized number:
92///
93/// S -14
94/// h = (-1) * 2 * 0.m
95///
96/// If e and m are both zero, h is zero:
97///
98/// h = 0.0
99///
100/// If e is 31, h is an "infinity" or "not a number" (NAN),
101/// depending on whether m is zero or not.
102///
103/// Examples:
104///
105/// 0 00000 0000000000 = 0.0
106/// 0 01110 0000000000 = 0.5
107/// 0 01111 0000000000 = 1.0
108/// 0 10000 0000000000 = 2.0
109/// 0 10000 1000000000 = 3.0
110/// 1 10101 1111000001 = -124.0625
111/// 0 11111 0000000000 = +infinity
112/// 1 11111 0000000000 = -infinity
113/// 0 11111 1000000000 = NAN
114/// 1 11111 1111111111 = NAN
115///
116/// **Conversion via Lookup Table:**
117///
118/// Converting from half to float is performed by default using a
119/// lookup table. There are only 65,536 different half numbers; each
120/// of these numbers has been converted and stored in a table pointed
121/// to by the ``imath_half_to_float_table`` pointer.
122///
123/// Prior to Imath v3.1, conversion from float to half was
124/// accomplished with the help of an exponent look table, but this is
125/// now replaced with explicit bit shifting.
126///
127/// **Conversion via Hardware:**
128///
129/// For Imath v3.1, the conversion routines have been extended to use
130/// F16C SSE instructions whenever present and enabled by compiler
131/// flags.
132///
133/// **Conversion via Bit-Shifting**
134///
135/// If F16C SSE instructions are not available, conversion can be
136/// accomplished by a bit-shifting algorithm. For half-to-float
137/// conversion, this is generally slower than the lookup table, but it
138/// may be preferable when memory limits preclude storing of the
139/// 65,536-entry lookup table.
140///
141/// The lookup table symbol is included in the compilation even if
142/// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code
143/// using the exported ``half.h`` may choose to enable the use of the table.
144///
145/// An implementation can eliminate the table from compilation by
146/// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol.
147/// Simply add:
148///
149/// #define IMATH_HALF_NO_LOOKUP_TABLE
150///
151/// before including ``half.h``, or define the symbol on the compile
152/// command line.
153///
154/// Furthermore, an implementation wishing to receive ``FE_OVERFLOW``
155/// and ``FE_UNDERFLOW`` floating point exceptions when converting
156/// float to half by the bit-shift algorithm can define the
157/// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to
158/// including ``half.h``:
159///
160/// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS
161///
162/// **Conversion Performance Comparison:**
163///
164/// Testing on a Core i9, the timings are approximately:
165///
166/// half to float
167/// - table: 0.71 ns / call
168/// - no table: 1.06 ns / call
169/// - f16c: 0.45 ns / call
170///
171/// float-to-half:
172/// - original: 5.2 ns / call
173/// - no exp table + opt: 1.27 ns / call
174/// - f16c: 0.45 ns / call
175///
176/// **Note:** the timing above depends on the distribution of the
177/// floats in question.
178///
179
180#ifdef __CUDA_ARCH__
181// do not include intrinsics headers on Cuda
182#elif defined(_WIN32)
183# include <intrin.h>
184#elif defined(__x86_64__)
185# include <x86intrin.h>
186#elif defined(__F16C__)
187# include <immintrin.h>
188#endif
189
190#include <stdint.h>
191#include <stdio.h>
192
193#ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
194# include <fenv.h>
195#endif
196
197//-------------------------------------------------------------------------
198// Limits
199//
200// Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float
201// constants, but at least one other compiler (gcc 2.96) produces incorrect
202// results if they are.
203//-------------------------------------------------------------------------
204
205#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
206
207/// Smallest positive denormalized half
208# define HALF_DENORM_MIN 5.96046448e-08f
209/// Smallest positive normalized half
210# define HALF_NRM_MIN 6.10351562e-05f
211/// Smallest positive normalized half
212# define HALF_MIN 6.10351562e-05f
213/// Largest positive half
214# define HALF_MAX 65504.0f
215/// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
216# define HALF_EPSILON 0.00097656f
217#else
218/// Smallest positive denormalized half
219# define HALF_DENORM_MIN 5.96046448e-08
220/// Smallest positive normalized half
221# define HALF_NRM_MIN 6.10351562e-05
222/// Smallest positive normalized half
223# define HALF_MIN 6.10351562e-05f
224/// Largest positive half
225# define HALF_MAX 65504.0
226/// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
227# define HALF_EPSILON 0.00097656
228#endif
229
230/// Number of digits in mantissa (significand + hidden leading 1)
231#define HALF_MANT_DIG 11
232/// Number of base 10 digits that can be represented without change:
233///
234/// ``floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3``
235#define HALF_DIG 3
236/// Number of base-10 digits that are necessary to uniquely represent
237/// all distinct values:
238///
239/// ``ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5``
240#define HALF_DECIMAL_DIG 5
241/// Base of the exponent
242#define HALF_RADIX 2
243/// Minimum negative integer such that ``HALF_RADIX`` raised to the power
244/// of one less than that integer is a normalized half
245#define HALF_DENORM_MIN_EXP -13
246/// Maximum positive integer such that ``HALF_RADIX`` raised to the power
247/// of one less than that integer is a normalized half
248#define HALF_MAX_EXP 16
249/// Minimum positive integer such that 10 raised to that power is a
250/// normalized half
251#define HALF_DENORM_MIN_10_EXP -4
252/// Maximum positive integer such that 10 raised to that power is a
253/// normalized half
254#define HALF_MAX_10_EXP 4
255
256/// a type for both C-only programs and C++ to use the same utilities
257typedef union imath_half_uif
258{
259 uint32_t i;
260 float f;
261} imath_half_uif_t;
262
263/// a type for both C-only programs and C++ to use the same utilities
264typedef uint16_t imath_half_bits_t;
265
266#if !defined(__cplusplus) && !defined(__CUDACC__)
267/// if we're in a C-only context, alias the half bits type to half
268typedef imath_half_bits_t half;
269#endif
270
271#if !defined(IMATH_HALF_NO_LOOKUP_TABLE)
272# if defined(__cplusplus)
273extern "C"
274# else
275extern
276# endif
277 IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table;
278#endif
279
280///
281/// Convert half to float
282///
283
284static inline float
285imath_half_to_float (imath_half_bits_t h)
286{
287#if defined(__F16C__)
288 // NB: The intel implementation does seem to treat NaN slightly
289 // different than the original toFloat table does (i.e. where the
290 // 1 bits are, meaning the signalling or not bits). This seems
291 // benign, given that the original library didn't really deal with
292 // signalling vs non-signalling NaNs
293# ifdef _MSC_VER
294 /* msvc does not seem to have cvtsh_ss :( */
295 return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h)));
296# else
297 return _cvtsh_ss (h);
298# endif
299#elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE)
300 return imath_half_to_float_table[h].f;
301#else
302 imath_half_uif_t v;
303 // this code would be clearer, although it does appear to be faster
304 // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
305 // shifts.
306 //
307 uint32_t hexpmant = ( (uint32_t)(h) << 17 ) >> 4;
308 v.i = ((uint32_t)(h >> 15)) << 31;
309
310 // the likely really does help if most of your numbers are "normal" half numbers
311 if (IMATH_LIKELY ((hexpmant >= 0x00800000)))
312 {
313 v.i |= hexpmant;
314 // either we are a normal number, in which case add in the bias difference
315 // otherwise make sure all exponent bits are set
316 if (IMATH_LIKELY ((hexpmant < 0x0f800000)))
317 v.i += 0x38000000;
318 else
319 v.i |= 0x7f800000;
320 }
321 else if (hexpmant != 0)
322 {
323 // exponent is 0 because we're denormal, don't have to extract
324 // the mantissa, can just use as is
325 //
326 //
327 // other compilers may provide count-leading-zeros primitives,
328 // but we need the community to inform us of the variants
329 uint32_t lc;
330# if defined(_MSC_VER) && (_M_IX86 || _M_X64)
331 lc = __lzcnt (hexpmant);
332# elif defined(__GNUC__) || defined(__clang__)
333 lc = (uint32_t) __builtin_clz (hexpmant);
334# else
335 lc = 0;
336 while (0 == ((hexpmant << lc) & 0x80000000))
337 ++lc;
338# endif
339 lc -= 8;
340 // so nominally we want to remove that extra bit we shifted
341 // up, but we are going to add that bit back in, then subtract
342 // from it with the 0x38800000 - (lc << 23)....
343 //
344 // by combining, this allows us to skip the & operation (and
345 // remove a constant)
346 //
347 // hexpmant &= ~0x00800000;
348 v.i |= 0x38800000;
349 // lc is now x, where the desired exponent is then
350 // -14 - lc
351 // + 127 -> new exponent
352 v.i |= (hexpmant << lc);
353 v.i -= (lc << 23);
354 }
355 return v.f;
356#endif
357}
358
359///
360/// Convert half to float
361///
362/// Note: This only supports the "round to even" rounding mode, which
363/// was the only mode supported by the original OpenEXR library
364///
365
366static inline imath_half_bits_t
367imath_float_to_half (float f)
368{
369#if defined(__F16C__)
370# ifdef _MSC_VER
371 // msvc does not seem to have cvtsh_ss :(
372 return _mm_extract_epi16 (
373 _mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)),
374 0);
375# else
376 // preserve the fixed rounding mode to nearest
377 return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
378# endif
379#else
380 imath_half_uif_t v;
381 imath_half_bits_t ret;
382 uint32_t e, m, ui, r, shift;
383
384 v.f = f;
385
386 ui = (v.i & ~0x80000000);
387 ret = ((v.i >> 16) & 0x8000);
388
389 // exponent large enough to result in a normal number, round and return
390 if (ui >= 0x38800000)
391 {
392 // inf or nan
393 if (IMATH_UNLIKELY (ui >= 0x7f800000))
394 {
395 ret |= 0x7c00;
396 if (ui == 0x7f800000)
397 return ret;
398 m = (ui & 0x7fffff) >> 13;
399 // make sure we have at least one bit after shift to preserve nan-ness
400 return ret | (uint16_t)m | (uint16_t)(m == 0);
401 }
402
403 // too large, round to infinity
404 if (IMATH_UNLIKELY (ui > 0x477fefff))
405 {
406# ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
407 feraiseexcept (FE_OVERFLOW);
408# endif
409 return ret | 0x7c00;
410 }
411
412 ui -= 0x38000000;
413 ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
414 return ret | (uint16_t)ui;
415 }
416
417 // zero or flush to 0
418 if (ui < 0x33000001)
419 {
420# ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
421 if (ui == 0)
422 return ret;
423 feraiseexcept (FE_UNDERFLOW);
424# endif
425 return ret;
426 }
427
428 // produce a denormalized half
429 e = (ui >> 23);
430 shift = 0x7e - e;
431 m = 0x800000 | (ui & 0x7fffff);
432 r = m << (32 - shift);
433 ret |= (m >> shift);
434 if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0))
435 ++ret;
436 return ret;
437#endif
438}
439
440////////////////////////////////////////
441
442#ifdef __cplusplus
443
444# include <iostream>
445
446IMATH_INTERNAL_NAMESPACE_HEADER_ENTER
447
448///
449///
450/// class half represents a 16-bit floating point number
451///
452/// Type half can represent positive and negative numbers whose
453/// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
454/// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
455/// with an absolute error of 6.0e-8. All integers from -2048 to
456/// +2048 can be represented exactly.
457///
458/// Type half behaves (almost) like the built-in C++ floating point
459/// types. In arithmetic expressions, half, float and double can be
460/// mixed freely. Here are a few examples:
461///
462/// half a (3.5);
463/// float b (a + sqrt (a));
464/// a += b;
465/// b += a;
466/// b = a + 7;
467///
468/// Conversions from half to float are lossless; all half numbers
469/// are exactly representable as floats.
470///
471/// Conversions from float to half may not preserve a float's value
472/// exactly. If a float is not representable as a half, then the
473/// float value is rounded to the nearest representable half. If a
474/// float value is exactly in the middle between the two closest
475/// representable half values, then the float value is rounded to
476/// the closest half whose least significant bit is zero.
477///
478/// Overflows during float-to-half conversions cause arithmetic
479/// exceptions. An overflow occurs when the float value to be
480/// converted is too large to be represented as a half, or if the
481/// float value is an infinity or a NAN.
482///
483/// The implementation of type half makes the following assumptions
484/// about the implementation of the built-in C++ types:
485///
486/// * float is an IEEE 754 single-precision number
487/// * sizeof (float) == 4
488/// * sizeof (unsigned int) == sizeof (float)
489/// * alignof (unsigned int) == alignof (float)
490/// * sizeof (uint16_t) == 2
491///
492
493class IMATH_EXPORT_TYPE half
494{
495 public:
496 /// A special tag that lets us initialize a half from the raw bits.
497 enum IMATH_EXPORT_ENUM FromBitsTag
498 {
499 FromBits
500 };
501
502 /// @{
503 /// @name Constructors
504
505 /// Default construction provides no initialization (hence it is
506 /// not constexpr).
507 half() IMATH_NOEXCEPT = default;
508
509 /// Construct from float
510 half (float f) IMATH_NOEXCEPT;
511
512 /// Construct from bit-vector
513 constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT;
514
515 /// Copy constructor
516 constexpr half (const half&) IMATH_NOEXCEPT = default;
517
518 /// Move constructor
519 constexpr half (half&&) IMATH_NOEXCEPT = default;
520
521 /// Destructor
522 ~half() IMATH_NOEXCEPT = default;
523
524 /// @}
525
526 /// Conversion to float
527 operator float() const IMATH_NOEXCEPT;
528
529 /// @{
530 /// @name Basic Algebra
531
532 /// Unary minus
533 constexpr half operator-() const IMATH_NOEXCEPT;
534
535 /// Assignment
536 half& operator= (const half& h) IMATH_NOEXCEPT = default;
537
538 /// Move assignment
539 half& operator= (half&& h) IMATH_NOEXCEPT = default;
540
541 /// Assignment from float
542 half& operator= (float f) IMATH_NOEXCEPT;
543
544 /// Addition assignment
545 half& operator+= (half h) IMATH_NOEXCEPT;
546
547 /// Addition assignment from float
548 half& operator+= (float f) IMATH_NOEXCEPT;
549
550 /// Subtraction assignment
551 half& operator-= (half h) IMATH_NOEXCEPT;
552
553 /// Subtraction assignment from float
554 half& operator-= (float f) IMATH_NOEXCEPT;
555
556 /// Multiplication assignment
557 half& operator*= (half h) IMATH_NOEXCEPT;
558
559 /// Multiplication assignment from float
560 half& operator*= (float f) IMATH_NOEXCEPT;
561
562 /// Division assignment
563 half& operator/= (half h) IMATH_NOEXCEPT;
564
565 /// Division assignment from float
566 half& operator/= (float f) IMATH_NOEXCEPT;
567
568 /// @}
569
570 /// Round to n-bit precision (n should be between 0 and 10).
571 /// After rounding, the significand's 10-n least significant
572 /// bits will be zero.
573 IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT;
574
575 /// @{
576 /// @name Classification
577
578 /// Return true if a normalized number, a denormalized number, or
579 /// zero.
580 constexpr bool isFinite() const IMATH_NOEXCEPT;
581
582 /// Return true if a normalized number.
583 constexpr bool isNormalized() const IMATH_NOEXCEPT;
584
585 /// Return true if a denormalized number.
586 constexpr bool isDenormalized() const IMATH_NOEXCEPT;
587
588 /// Return true if zero.
589 constexpr bool isZero() const IMATH_NOEXCEPT;
590
591 /// Return true if NAN.
592 constexpr bool isNan() const IMATH_NOEXCEPT;
593
594 /// Return true if a positive or a negative infinity
595 constexpr bool isInfinity() const IMATH_NOEXCEPT;
596
597 /// Return true if the sign bit is set (negative)
598 constexpr bool isNegative() const IMATH_NOEXCEPT;
599
600 /// @}
601
602 /// @{
603 /// @name Special values
604
605 /// Return +infinity
606 static constexpr half posInf() IMATH_NOEXCEPT;
607
608 /// Return -infinity
609 static constexpr half negInf() IMATH_NOEXCEPT;
610
611 /// Returns a NAN with the bit pattern 0111111111111111
612 static constexpr half qNan() IMATH_NOEXCEPT;
613
614 /// Return a NAN with the bit pattern 0111110111111111
615 static constexpr half sNan() IMATH_NOEXCEPT;
616
617 /// @}
618
619 /// @{
620 /// @name Access to the internal representation
621
622 /// Return the bit pattern
623 constexpr uint16_t bits () const IMATH_NOEXCEPT;
624
625 /// Set the bit pattern
626 IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT;
627
628 /// @}
629
630 public:
631 static_assert (sizeof (float) == sizeof (uint32_t),
632 "Assumption about the size of floats correct");
633 using uif = imath_half_uif;
634
635 private:
636
637 constexpr uint16_t mantissa() const IMATH_NOEXCEPT;
638 constexpr uint16_t exponent() const IMATH_NOEXCEPT;
639
640 uint16_t _h;
641};
642
643//----------------------------
644// Half-from-float constructor
645//----------------------------
646
647inline half::half (float f) IMATH_NOEXCEPT
648 : _h (imath_float_to_half (f))
649{
650}
651
652//------------------------------------------
653// Half from raw bits constructor
654//------------------------------------------
655
656inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits)
657{}
658
659//-------------------------
660// Half-to-float conversion
661//-------------------------
662
663inline half::operator float() const IMATH_NOEXCEPT
664{
665 return imath_half_to_float (h: _h);
666}
667
668//-------------------------
669// Round to n-bit precision
670//-------------------------
671
672inline IMATH_CONSTEXPR14 half
673half::round (unsigned int n) const IMATH_NOEXCEPT
674{
675 //
676 // Parameter check.
677 //
678
679 if (n >= 10)
680 return *this;
681
682 //
683 // Disassemble h into the sign, s,
684 // and the combined exponent and significand, e.
685 //
686
687 uint16_t s = _h & 0x8000;
688 uint16_t e = _h & 0x7fff;
689
690 //
691 // Round the exponent and significand to the nearest value
692 // where ones occur only in the (10-n) most significant bits.
693 // Note that the exponent adjusts automatically if rounding
694 // up causes the significand to overflow.
695 //
696
697 e >>= 9 - n;
698 e += e & 1;
699 e <<= 9 - n;
700
701 //
702 // Check for exponent overflow.
703 //
704
705 if (e >= 0x7c00)
706 {
707 //
708 // Overflow occurred -- truncate instead of rounding.
709 //
710
711 e = _h;
712 e >>= 10 - n;
713 e <<= 10 - n;
714 }
715
716 //
717 // Put the original sign bit back.
718 //
719
720 half h (FromBits, s | e);
721
722 return h;
723}
724
725//-----------------------
726// Other inline functions
727//-----------------------
728
729inline constexpr half
730half::operator-() const IMATH_NOEXCEPT
731{
732 return half (FromBits, bits() ^ 0x8000);
733}
734
735inline half&
736half::operator= (float f) IMATH_NOEXCEPT
737{
738 *this = half (f);
739 return *this;
740}
741
742inline half&
743half::operator+= (half h) IMATH_NOEXCEPT
744{
745 *this = half (float (*this) + float (h));
746 return *this;
747}
748
749inline half&
750half::operator+= (float f) IMATH_NOEXCEPT
751{
752 *this = half (float (*this) + f);
753 return *this;
754}
755
756inline half&
757half::operator-= (half h) IMATH_NOEXCEPT
758{
759 *this = half (float (*this) - float (h));
760 return *this;
761}
762
763inline half&
764half::operator-= (float f) IMATH_NOEXCEPT
765{
766 *this = half (float (*this) - f);
767 return *this;
768}
769
770inline half&
771half::operator*= (half h) IMATH_NOEXCEPT
772{
773 *this = half (float (*this) * float (h));
774 return *this;
775}
776
777inline half&
778half::operator*= (float f) IMATH_NOEXCEPT
779{
780 *this = half (float (*this) * f);
781 return *this;
782}
783
784inline half&
785half::operator/= (half h) IMATH_NOEXCEPT
786{
787 *this = half (float (*this) / float (h));
788 return *this;
789}
790
791inline half&
792half::operator/= (float f) IMATH_NOEXCEPT
793{
794 *this = half (float (*this) / f);
795 return *this;
796}
797
798inline constexpr uint16_t
799half::mantissa() const IMATH_NOEXCEPT
800{
801 return _h & 0x3ff;
802}
803
804inline constexpr uint16_t
805half::exponent() const IMATH_NOEXCEPT
806{
807 return (_h >> 10) & 0x001f;
808}
809
810inline constexpr bool
811half::isFinite() const IMATH_NOEXCEPT
812{
813 return exponent() < 31;
814}
815
816inline constexpr bool
817half::isNormalized() const IMATH_NOEXCEPT
818{
819 return exponent() > 0 && exponent() < 31;
820}
821
822inline constexpr bool
823half::isDenormalized() const IMATH_NOEXCEPT
824{
825 return exponent() == 0 && mantissa() != 0;
826}
827
828inline constexpr bool
829half::isZero() const IMATH_NOEXCEPT
830{
831 return (_h & 0x7fff) == 0;
832}
833
834inline constexpr bool
835half::isNan() const IMATH_NOEXCEPT
836{
837 return exponent() == 31 && mantissa() != 0;
838}
839
840inline constexpr bool
841half::isInfinity() const IMATH_NOEXCEPT
842{
843 return exponent() == 31 && mantissa() == 0;
844}
845
846inline constexpr bool
847half::isNegative() const IMATH_NOEXCEPT
848{
849 return (_h & 0x8000) != 0;
850}
851
852inline constexpr half
853half::posInf() IMATH_NOEXCEPT
854{
855 return half (FromBits, 0x7c00);
856}
857
858inline constexpr half
859half::negInf() IMATH_NOEXCEPT
860{
861 return half (FromBits, 0xfc00);
862}
863
864inline constexpr half
865half::qNan() IMATH_NOEXCEPT
866{
867 return half (FromBits, 0x7fff);
868}
869
870inline constexpr half
871half::sNan() IMATH_NOEXCEPT
872{
873 return half (FromBits, 0x7dff);
874}
875
876inline constexpr uint16_t
877half::bits() const IMATH_NOEXCEPT
878{
879 return _h;
880}
881
882inline IMATH_CONSTEXPR14 void
883half::setBits (uint16_t bits) IMATH_NOEXCEPT
884{
885 _h = bits;
886}
887
888IMATH_INTERNAL_NAMESPACE_HEADER_EXIT
889
890/// Output h to os, formatted as a float
891IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
892
893/// Input h from is
894IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h);
895
896#include <limits>
897
898namespace std
899{
900
901template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half>
902{
903public:
904 static const bool is_specialized = true;
905
906 static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT
907 {
908 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0400); /*HALF_MIN*/
909 }
910 static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT
911 {
912 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7bff); /*HALF_MAX*/
913 }
914 static constexpr IMATH_INTERNAL_NAMESPACE::half lowest ()
915 {
916 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0xfbff); /* -HALF_MAX */
917 }
918
919 static constexpr int digits = HALF_MANT_DIG;
920 static constexpr int digits10 = HALF_DIG;
921 static constexpr int max_digits10 = HALF_DECIMAL_DIG;
922 static constexpr bool is_signed = true;
923 static constexpr bool is_integer = false;
924 static constexpr bool is_exact = false;
925 static constexpr int radix = HALF_RADIX;
926 static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT
927 {
928 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x1400); /*HALF_EPSILON*/
929 }
930 static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT
931 {
932 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x3800); /*0.5*/
933 }
934
935 static constexpr int min_exponent = HALF_DENORM_MIN_EXP;
936 static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP;
937 static constexpr int max_exponent = HALF_MAX_EXP;
938 static constexpr int max_exponent10 = HALF_MAX_10_EXP;
939
940 static constexpr bool has_infinity = true;
941 static constexpr bool has_quiet_NaN = true;
942 static constexpr bool has_signaling_NaN = true;
943 static constexpr float_denorm_style has_denorm = denorm_present;
944 static constexpr bool has_denorm_loss = false;
945 static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT
946 {
947 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7c00); /*half::posInf()*/
948 }
949 static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT
950 {
951 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7fff); /*half::qNan()*/
952 }
953 static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT
954 {
955 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7dff); /*half::sNan()*/
956 }
957 static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT
958 {
959 return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0001); /*HALF_DENORM_MIN*/
960 }
961
962 static constexpr bool is_iec559 = false;
963 static constexpr bool is_bounded = false;
964 static constexpr bool is_modulo = false;
965
966 static constexpr bool traps = true;
967 static constexpr bool tinyness_before = false;
968 static constexpr float_round_style round_style = round_to_nearest;
969};
970
971} // namespace std
972
973//----------
974// Debugging
975//----------
976
977IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
978IMATH_EXPORT void printBits (std::ostream& os, float f);
979IMATH_EXPORT void printBits (char c[19], IMATH_INTERNAL_NAMESPACE::half h);
980IMATH_EXPORT void printBits (char c[35], float f);
981
982# if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__)
983using half = IMATH_INTERNAL_NAMESPACE::half;
984# else
985# include <cuda_fp16.h>
986# endif
987
988#endif // __cplusplus
989
990#endif // IMATH_HALF_H_
991

source code of include/Imath/half.h