1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// * Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35// Primary authors:
36// Florian Kainz <kainz@ilm.com>
37// Rod Bogart <rgb@ilm.com>
38
39//---------------------------------------------------------------------------
40//
41// half -- a 16-bit floating point number class:
42//
43// Type half can represent positive and negative numbers whose
44// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
45// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46// with an absolute error of 6.0e-8. All integers from -2048 to
47// +2048 can be represented exactly.
48//
49// Type half behaves (almost) like the built-in C++ floating point
50// types. In arithmetic expressions, half, float and double can be
51// mixed freely. Here are a few examples:
52//
53// half a (3.5);
54// float b (a + sqrt (a));
55// a += b;
56// b += a;
57// b = a + 7;
58//
59// Conversions from half to float are lossless; all half numbers
60// are exactly representable as floats.
61//
62// Conversions from float to half may not preserve a float's value
63// exactly. If a float is not representable as a half, then the
64// float value is rounded to the nearest representable half. If a
65// float value is exactly in the middle between the two closest
66// representable half values, then the float value is rounded to
67// the closest half whose least significant bit is zero.
68//
69// Overflows during float-to-half conversions cause arithmetic
70// exceptions. An overflow occurs when the float value to be
71// converted is too large to be represented as a half, or if the
72// float value is an infinity or a NAN.
73//
74// The implementation of type half makes the following assumptions
75// about the implementation of the built-in C++ types:
76//
77// float is an IEEE 754 single-precision number
78// sizeof (float) == 4
79// sizeof (unsigned int) == sizeof (float)
80// alignof (unsigned int) == alignof (float)
81// sizeof (unsigned short) == 2
82//
83//---------------------------------------------------------------------------
84
85#ifndef _HALF_H_
86#define _HALF_H_
87
88#include "halfExport.h" // for definition of HALF_EXPORT
89#include <iostream>
90
91class half
92{
93 public:
94
95 //-------------
96 // Constructors
97 //-------------
98
99 half () = default; // no initialization
100 half (float f);
101 // rule of 5
102 ~half () = default;
103 half (const half &) = default;
104 half (half &&) noexcept = default;
105
106 //--------------------
107 // Conversion to float
108 //--------------------
109
110 operator float () const;
111
112
113 //------------
114 // Unary minus
115 //------------
116
117 half operator - () const;
118
119
120 //-----------
121 // Assignment
122 //-----------
123
124 half & operator = (const half &h) = default;
125 half & operator = (half &&h) noexcept = default;
126 half & operator = (float f);
127
128 half & operator += (half h);
129 half & operator += (float f);
130
131 half & operator -= (half h);
132 half & operator -= (float f);
133
134 half & operator *= (half h);
135 half & operator *= (float f);
136
137 half & operator /= (half h);
138 half & operator /= (float f);
139
140
141 //---------------------------------------------------------
142 // Round to n-bit precision (n should be between 0 and 10).
143 // After rounding, the significand's 10-n least significant
144 // bits will be zero.
145 //---------------------------------------------------------
146
147 half round (unsigned int n) const;
148
149
150 //--------------------------------------------------------------------
151 // Classification:
152 //
153 // h.isFinite() returns true if h is a normalized number,
154 // a denormalized number or zero
155 //
156 // h.isNormalized() returns true if h is a normalized number
157 //
158 // h.isDenormalized() returns true if h is a denormalized number
159 //
160 // h.isZero() returns true if h is zero
161 //
162 // h.isNan() returns true if h is a NAN
163 //
164 // h.isInfinity() returns true if h is a positive
165 // or a negative infinity
166 //
167 // h.isNegative() returns true if the sign bit of h
168 // is set (negative)
169 //--------------------------------------------------------------------
170
171 bool isFinite () const;
172 bool isNormalized () const;
173 bool isDenormalized () const;
174 bool isZero () const;
175 bool isNan () const;
176 bool isInfinity () const;
177 bool isNegative () const;
178
179
180 //--------------------------------------------
181 // Special values
182 //
183 // posInf() returns +infinity
184 //
185 // negInf() returns -infinity
186 //
187 // qNan() returns a NAN with the bit
188 // pattern 0111111111111111
189 //
190 // sNan() returns a NAN with the bit
191 // pattern 0111110111111111
192 //--------------------------------------------
193
194 static half posInf ();
195 static half negInf ();
196 static half qNan ();
197 static half sNan ();
198
199
200 //--------------------------------------
201 // Access to the internal representation
202 //--------------------------------------
203
204 HALF_EXPORT unsigned short bits () const;
205 HALF_EXPORT void setBits (unsigned short bits);
206
207
208 public:
209
210 union uif
211 {
212 unsigned int i;
213 float f;
214 };
215
216 private:
217
218 HALF_EXPORT static short convert (int i);
219 HALF_EXPORT static float overflow ();
220
221 unsigned short _h;
222
223 HALF_EXPORT static const uif _toFloat[1 << 16];
224 HALF_EXPORT static const unsigned short _eLut[1 << 9];
225};
226
227
228
229//-----------
230// Stream I/O
231//-----------
232
233HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);
234HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);
235
236
237//----------
238// Debugging
239//----------
240
241HALF_EXPORT void printBits (std::ostream &os, half h);
242HALF_EXPORT void printBits (std::ostream &os, float f);
243HALF_EXPORT void printBits (char c[19], half h);
244HALF_EXPORT void printBits (char c[35], float f);
245
246
247//-------------------------------------------------------------------------
248// Limits
249//
250// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
251// constants, but at least one other compiler (gcc 2.96) produces incorrect
252// results if they are.
253//-------------------------------------------------------------------------
254
255#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
256
257 #define HALF_MIN 5.96046448e-08f // Smallest positive half
258
259 #define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
260
261 #define HALF_MAX 65504.0f // Largest positive half
262
263 #define HALF_EPSILON 0.00097656f // Smallest positive e for which
264 // half (1.0 + e) != half (1.0)
265#else
266
267 #define HALF_MIN 5.96046448e-08 // Smallest positive half
268
269 #define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
270
271 #define HALF_MAX 65504.0 // Largest positive half
272
273 #define HALF_EPSILON 0.00097656 // Smallest positive e for which
274 // half (1.0 + e) != half (1.0)
275#endif
276
277
278#define HALF_MANT_DIG 11 // Number of digits in mantissa
279 // (significand + hidden leading 1)
280
281//
282// floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3
283#define HALF_DIG 3 // Number of base 10 digits that
284 // can be represented without change
285
286// ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5
287#define HALF_DECIMAL_DIG 5 // Number of base-10 digits that are
288 // necessary to uniquely represent all
289 // distinct values
290
291#define HALF_RADIX 2 // Base of the exponent
292
293#define HALF_MIN_EXP -13 // Minimum negative integer such that
294 // HALF_RADIX raised to the power of
295 // one less than that integer is a
296 // normalized half
297
298#define HALF_MAX_EXP 16 // Maximum positive integer such that
299 // HALF_RADIX raised to the power of
300 // one less than that integer is a
301 // normalized half
302
303#define HALF_MIN_10_EXP -4 // Minimum positive integer such
304 // that 10 raised to that power is
305 // a normalized half
306
307#define HALF_MAX_10_EXP 4 // Maximum positive integer such
308 // that 10 raised to that power is
309 // a normalized half
310
311
312//---------------------------------------------------------------------------
313//
314// Implementation --
315//
316// Representation of a float:
317//
318// We assume that a float, f, is an IEEE 754 single-precision
319// floating point number, whose bits are arranged as follows:
320//
321// 31 (msb)
322// |
323// | 30 23
324// | | |
325// | | | 22 0 (lsb)
326// | | | | |
327// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
328//
329// s e m
330//
331// S is the sign-bit, e is the exponent and m is the significand.
332//
333// If e is between 1 and 254, f is a normalized number:
334//
335// s e-127
336// f = (-1) * 2 * 1.m
337//
338// If e is 0, and m is not zero, f is a denormalized number:
339//
340// s -126
341// f = (-1) * 2 * 0.m
342//
343// If e and m are both zero, f is zero:
344//
345// f = 0.0
346//
347// If e is 255, f is an "infinity" or "not a number" (NAN),
348// depending on whether m is zero or not.
349//
350// Examples:
351//
352// 0 00000000 00000000000000000000000 = 0.0
353// 0 01111110 00000000000000000000000 = 0.5
354// 0 01111111 00000000000000000000000 = 1.0
355// 0 10000000 00000000000000000000000 = 2.0
356// 0 10000000 10000000000000000000000 = 3.0
357// 1 10000101 11110000010000000000000 = -124.0625
358// 0 11111111 00000000000000000000000 = +infinity
359// 1 11111111 00000000000000000000000 = -infinity
360// 0 11111111 10000000000000000000000 = NAN
361// 1 11111111 11111111111111111111111 = NAN
362//
363// Representation of a half:
364//
365// Here is the bit-layout for a half number, h:
366//
367// 15 (msb)
368// |
369// | 14 10
370// | | |
371// | | | 9 0 (lsb)
372// | | | | |
373// X XXXXX XXXXXXXXXX
374//
375// s e m
376//
377// S is the sign-bit, e is the exponent and m is the significand.
378//
379// If e is between 1 and 30, h is a normalized number:
380//
381// s e-15
382// h = (-1) * 2 * 1.m
383//
384// If e is 0, and m is not zero, h is a denormalized number:
385//
386// S -14
387// h = (-1) * 2 * 0.m
388//
389// If e and m are both zero, h is zero:
390//
391// h = 0.0
392//
393// If e is 31, h is an "infinity" or "not a number" (NAN),
394// depending on whether m is zero or not.
395//
396// Examples:
397//
398// 0 00000 0000000000 = 0.0
399// 0 01110 0000000000 = 0.5
400// 0 01111 0000000000 = 1.0
401// 0 10000 0000000000 = 2.0
402// 0 10000 1000000000 = 3.0
403// 1 10101 1111000001 = -124.0625
404// 0 11111 0000000000 = +infinity
405// 1 11111 0000000000 = -infinity
406// 0 11111 1000000000 = NAN
407// 1 11111 1111111111 = NAN
408//
409// Conversion:
410//
411// Converting from a float to a half requires some non-trivial bit
412// manipulations. In some cases, this makes conversion relatively
413// slow, but the most common case is accelerated via table lookups.
414//
415// Converting back from a half to a float is easier because we don't
416// have to do any rounding. In addition, there are only 65536
417// different half numbers; we can convert each of those numbers once
418// and store the results in a table. Later, all conversions can be
419// done using only simple table lookups.
420//
421//---------------------------------------------------------------------------
422
423
424//----------------------------
425// Half-from-float constructor
426//----------------------------
427
428inline
429half::half (float f)
430{
431 uif x;
432
433 x.f = f;
434
435 if (f == 0)
436 {
437 //
438 // Common special case - zero.
439 // Preserve the zero's sign bit.
440 //
441
442 _h = (x.i >> 16);
443 }
444 else
445 {
446 //
447 // We extract the combined sign and exponent, e, from our
448 // floating-point number, f. Then we convert e to the sign
449 // and exponent of the half number via a table lookup.
450 //
451 // For the most common case, where a normalized half is produced,
452 // the table lookup returns a non-zero value; in this case, all
453 // we have to do is round f's significand to 10 bits and combine
454 // the result with e.
455 //
456 // For all other cases (overflow, zeroes, denormalized numbers
457 // resulting from underflow, infinities and NANs), the table
458 // lookup returns zero, and we call a longer, non-inline function
459 // to do the float-to-half conversion.
460 //
461
462 int e = (x.i >> 23) & 0x000001ff;
463
464 e = _eLut[e];
465
466 if (e)
467 {
468 //
469 // Simple case - round the significand, m, to 10
470 // bits and combine it with the sign and exponent.
471 //
472
473 int m = x.i & 0x007fffff;
474 _h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
475 }
476 else
477 {
478 //
479 // Difficult case - call a function.
480 //
481
482 _h = convert (i: x.i);
483 }
484 }
485}
486
487
488//------------------------------------------
489// Half-to-float conversion via table lookup
490//------------------------------------------
491
492inline
493half::operator float () const
494{
495 return _toFloat[_h].f;
496}
497
498
499//-------------------------
500// Round to n-bit precision
501//-------------------------
502
503inline half
504half::round (unsigned int n) const
505{
506 //
507 // Parameter check.
508 //
509
510 if (n >= 10)
511 return *this;
512
513 //
514 // Disassemble h into the sign, s,
515 // and the combined exponent and significand, e.
516 //
517
518 unsigned short s = _h & 0x8000;
519 unsigned short e = _h & 0x7fff;
520
521 //
522 // Round the exponent and significand to the nearest value
523 // where ones occur only in the (10-n) most significant bits.
524 // Note that the exponent adjusts automatically if rounding
525 // up causes the significand to overflow.
526 //
527
528 e >>= 9 - n;
529 e += e & 1;
530 e <<= 9 - n;
531
532 //
533 // Check for exponent overflow.
534 //
535
536 if (e >= 0x7c00)
537 {
538 //
539 // Overflow occurred -- truncate instead of rounding.
540 //
541
542 e = _h;
543 e >>= 10 - n;
544 e <<= 10 - n;
545 }
546
547 //
548 // Put the original sign bit back.
549 //
550
551 half h;
552 h._h = s | e;
553
554 return h;
555}
556
557
558//-----------------------
559// Other inline functions
560//-----------------------
561
562inline half
563half::operator - () const
564{
565 half h;
566 h._h = _h ^ 0x8000;
567 return h;
568}
569
570
571inline half &
572half::operator = (float f)
573{
574 *this = half (f);
575 return *this;
576}
577
578
579inline half &
580half::operator += (half h)
581{
582 *this = half (float (*this) + float (h));
583 return *this;
584}
585
586
587inline half &
588half::operator += (float f)
589{
590 *this = half (float (*this) + f);
591 return *this;
592}
593
594
595inline half &
596half::operator -= (half h)
597{
598 *this = half (float (*this) - float (h));
599 return *this;
600}
601
602
603inline half &
604half::operator -= (float f)
605{
606 *this = half (float (*this) - f);
607 return *this;
608}
609
610
611inline half &
612half::operator *= (half h)
613{
614 *this = half (float (*this) * float (h));
615 return *this;
616}
617
618
619inline half &
620half::operator *= (float f)
621{
622 *this = half (float (*this) * f);
623 return *this;
624}
625
626
627inline half &
628half::operator /= (half h)
629{
630 *this = half (float (*this) / float (h));
631 return *this;
632}
633
634
635inline half &
636half::operator /= (float f)
637{
638 *this = half (float (*this) / f);
639 return *this;
640}
641
642
643inline bool
644half::isFinite () const
645{
646 unsigned short e = (_h >> 10) & 0x001f;
647 return e < 31;
648}
649
650
651inline bool
652half::isNormalized () const
653{
654 unsigned short e = (_h >> 10) & 0x001f;
655 return e > 0 && e < 31;
656}
657
658
659inline bool
660half::isDenormalized () const
661{
662 unsigned short e = (_h >> 10) & 0x001f;
663 unsigned short m = _h & 0x3ff;
664 return e == 0 && m != 0;
665}
666
667
668inline bool
669half::isZero () const
670{
671 return (_h & 0x7fff) == 0;
672}
673
674
675inline bool
676half::isNan () const
677{
678 unsigned short e = (_h >> 10) & 0x001f;
679 unsigned short m = _h & 0x3ff;
680 return e == 31 && m != 0;
681}
682
683
684inline bool
685half::isInfinity () const
686{
687 unsigned short e = (_h >> 10) & 0x001f;
688 unsigned short m = _h & 0x3ff;
689 return e == 31 && m == 0;
690}
691
692
693inline bool
694half::isNegative () const
695{
696 return (_h & 0x8000) != 0;
697}
698
699
700inline half
701half::posInf ()
702{
703 half h;
704 h._h = 0x7c00;
705 return h;
706}
707
708
709inline half
710half::negInf ()
711{
712 half h;
713 h._h = 0xfc00;
714 return h;
715}
716
717
718inline half
719half::qNan ()
720{
721 half h;
722 h._h = 0x7fff;
723 return h;
724}
725
726
727inline half
728half::sNan ()
729{
730 half h;
731 h._h = 0x7dff;
732 return h;
733}
734
735
736inline unsigned short
737half::bits () const
738{
739 return _h;
740}
741
742
743inline void
744half::setBits (unsigned short bits)
745{
746 _h = bits;
747}
748
749#endif
750

source code of include/OpenEXR/half.h