half.h source code [include/OpenEXR/half.h]

1	///////////////////////////////////////////////////////////////////////////
2	//
3	// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4	// Digital Ltd. LLC
5	//
6	// All rights reserved.
7	//
8	// Redistribution and use in source and binary forms, with or without
9	// modification, are permitted provided that the following conditions are
10	// met:
11	// Redistributions of source code must retain the above copyright*
12	// notice, this list of conditions and the following disclaimer.
13	// Redistributions in binary form must reproduce the above*
14	// copyright notice, this list of conditions and the following disclaimer
15	// in the documentation and/or other materials provided with the
16	// distribution.
17	// Neither the name of Industrial Light & Magic nor the names of*
18	// its contributors may be used to endorse or promote products derived
19	// from this software without specific prior written permission.
20	//
21	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32	//
33	///////////////////////////////////////////////////////////////////////////
34
35	// Primary authors:
36	// Florian Kainz <kainz@ilm.com>
37	// Rod Bogart <rgb@ilm.com>
38
39	//---------------------------------------------------------------------------
40	//
41	// half -- a 16-bit floating point number class:
42	//
43	// Type half can represent positive and negative numbers whose
44	// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
45	// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46	// with an absolute error of 6.0e-8. All integers from -2048 to
47	// +2048 can be represented exactly.
48	//
49	// Type half behaves (almost) like the built-in C++ floating point
50	// types. In arithmetic expressions, half, float and double can be
51	// mixed freely. Here are a few examples:
52	//
53	// half a (3.5);
54	// float b (a + sqrt (a));
55	// a += b;
56	// b += a;
57	// b = a + 7;
58	//
59	// Conversions from half to float are lossless; all half numbers
60	// are exactly representable as floats.
61	//
62	// Conversions from float to half may not preserve a float's value
63	// exactly. If a float is not representable as a half, then the
64	// float value is rounded to the nearest representable half. If a
65	// float value is exactly in the middle between the two closest
66	// representable half values, then the float value is rounded to
67	// the closest half whose least significant bit is zero.
68	//
69	// Overflows during float-to-half conversions cause arithmetic
70	// exceptions. An overflow occurs when the float value to be
71	// converted is too large to be represented as a half, or if the
72	// float value is an infinity or a NAN.
73	//
74	// The implementation of type half makes the following assumptions
75	// about the implementation of the built-in C++ types:
76	//
77	// float is an IEEE 754 single-precision number
78	// sizeof (float) == 4
79	// sizeof (unsigned int) == sizeof (float)
80	// alignof (unsigned int) == alignof (float)
81	// sizeof (unsigned short) == 2
82	//
83	//---------------------------------------------------------------------------
84
85	#ifndef _HALF_H_
86	#define _HALF_H_
87
88	#include "halfExport.h" // for definition of HALF_EXPORT
89	#include <iostream>
90
91	class half
92	{
93	public:
94
95	//-------------
96	// Constructors
97	//-------------
98
99	half () = default; // no initialization
100	half (float f);
101	// rule of 5
102	~half () = default;
103	half (const half &) = default;
104	half (half &&) noexcept = default;
105
106	//--------------------
107	// Conversion to float
108	//--------------------
109
110	operator float () const;
111
112
113	//------------
114	// Unary minus
115	//------------
116
117	half operator - () const;
118
119
120	//-----------
121	// Assignment
122	//-----------
123
124	half & operator = (const half &h) = default;
125	half & operator = (half &&h) noexcept = default;
126	half & operator = (float f);
127
128	half & operator += (half h);
129	half & operator += (float f);
130
131	half & operator -= (half h);
132	half & operator -= (float f);
133
134	half & operator *= (half h);
135	half & operator = (float* f);
136
137	half & operator /= (half h);
138	half & operator /= (float f);
139
140
141	//---------------------------------------------------------
142	// Round to n-bit precision (n should be between 0 and 10).
143	// After rounding, the significand's 10-n least significant
144	// bits will be zero.
145	//---------------------------------------------------------
146
147	half round (unsigned int n) const;
148
149
150	//--------------------------------------------------------------------
151	// Classification:
152	//
153	// h.isFinite() returns true if h is a normalized number,
154	// a denormalized number or zero
155	//
156	// h.isNormalized() returns true if h is a normalized number
157	//
158	// h.isDenormalized() returns true if h is a denormalized number
159	//
160	// h.isZero() returns true if h is zero
161	//
162	// h.isNan() returns true if h is a NAN
163	//
164	// h.isInfinity() returns true if h is a positive
165	// or a negative infinity
166	//
167	// h.isNegative() returns true if the sign bit of h
168	// is set (negative)
169	//--------------------------------------------------------------------
170
171	bool isFinite () const;
172	bool isNormalized () const;
173	bool isDenormalized () const;
174	bool isZero () const;
175	bool isNan () const;
176	bool isInfinity () const;
177	bool isNegative () const;
178
179
180	//--------------------------------------------
181	// Special values
182	//
183	// posInf() returns +infinity
184	//
185	// negInf() returns -infinity
186	//
187	// qNan() returns a NAN with the bit
188	// pattern 0111111111111111
189	//
190	// sNan() returns a NAN with the bit
191	// pattern 0111110111111111
192	//--------------------------------------------
193
194	static half posInf ();
195	static half negInf ();
196	static half qNan ();
197	static half sNan ();
198
199
200	//--------------------------------------
201	// Access to the internal representation
202	//--------------------------------------
203
204	HALF_EXPORT unsigned short bits () const;
205	HALF_EXPORT void setBits (unsigned short bits);
206
207
208	public:
209
210	union uif
211	{
212	unsigned int i;
213	float f;
214	};
215
216	private:
217
218	HALF_EXPORT static short convert (int i);
219	HALF_EXPORT static float overflow ();
220
221	unsigned short _h;
222
223	HALF_EXPORT static const uif _toFloat[`1` << `16`];
224	HALF_EXPORT static const unsigned short _eLut[`1` << `9`];
225	};
226
227
228
229	//-----------
230	// Stream I/O
231	//-----------
232
233	HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);
234	HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);
235
236
237	//----------
238	// Debugging
239	//----------
240
241	HALF_EXPORT void printBits (std::ostream &os, half h);
242	HALF_EXPORT void printBits (std::ostream &os, float f);
243	HALF_EXPORT void printBits (char c[`19`], half h);
244	HALF_EXPORT void printBits (char c[`35`], float f);
245
246
247	//-------------------------------------------------------------------------
248	// Limits
249	//
250	// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
251	// constants, but at least one other compiler (gcc 2.96) produces incorrect
252	// results if they are.
253	//-------------------------------------------------------------------------
254
255	#if (defined _WIN32 \|\| defined _WIN64) && defined _MSC_VER
256
257	#define HALF_MIN 5.96046448e-08f // Smallest positive half
258
259	#define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
260
261	#define HALF_MAX 65504.0f // Largest positive half
262
263	#define HALF_EPSILON 0.00097656f // Smallest positive e for which
264	// half (1.0 + e) != half (1.0)
265	#else
266
267	#define HALF_MIN 5.96046448e-08 // Smallest positive half
268
269	#define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
270
271	#define HALF_MAX 65504.0 // Largest positive half
272
273	#define HALF_EPSILON 0.00097656 // Smallest positive e for which
274	// half (1.0 + e) != half (1.0)
275	#endif
276
277
278	#define HALF_MANT_DIG 11 // Number of digits in mantissa
279	// (significand + hidden leading 1)
280
281	//
282	// floor( (HALF_MANT_DIG - 1) log10(2) ) => 3.01... -> 3*
283	#define HALF_DIG 3 // Number of base 10 digits that
284	// can be represented without change
285
286	// ceil(HALF_MANT_DIG log10(2) + 1) => 4.31... -> 5*
287	#define HALF_DECIMAL_DIG 5 // Number of base-10 digits that are
288	// necessary to uniquely represent all
289	// distinct values
290
291	#define HALF_RADIX 2 // Base of the exponent
292
293	#define HALF_MIN_EXP -13 // Minimum negative integer such that
294	// HALF_RADIX raised to the power of
295	// one less than that integer is a
296	// normalized half
297
298	#define HALF_MAX_EXP 16 // Maximum positive integer such that
299	// HALF_RADIX raised to the power of
300	// one less than that integer is a
301	// normalized half
302
303	#define HALF_MIN_10_EXP -4 // Minimum positive integer such
304	// that 10 raised to that power is
305	// a normalized half
306
307	#define HALF_MAX_10_EXP 4 // Maximum positive integer such
308	// that 10 raised to that power is
309	// a normalized half
310
311
312	//---------------------------------------------------------------------------
313	//
314	// Implementation --
315	//
316	// Representation of a float:
317	//
318	// We assume that a float, f, is an IEEE 754 single-precision
319	// floating point number, whose bits are arranged as follows:
320	//
321	// 31 (msb)
322	// \|
323	// \| 30 23
324	// \| \| \|
325	// \| \| \| 22 0 (lsb)
326	// \| \| \| \| \|
327	// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
328	//
329	// s e m
330	//
331	// S is the sign-bit, e is the exponent and m is the significand.
332	//
333	// If e is between 1 and 254, f is a normalized number:
334	//
335	// s e-127
336	// f = (-1) 2 * 1.m*
337	//
338	// If e is 0, and m is not zero, f is a denormalized number:
339	//
340	// s -126
341	// f = (-1) 2 * 0.m*
342	//
343	// If e and m are both zero, f is zero:
344	//
345	// f = 0.0
346	//
347	// If e is 255, f is an "infinity" or "not a number" (NAN),
348	// depending on whether m is zero or not.
349	//
350	// Examples:
351	//
352	// 0 00000000 00000000000000000000000 = 0.0
353	// 0 01111110 00000000000000000000000 = 0.5
354	// 0 01111111 00000000000000000000000 = 1.0
355	// 0 10000000 00000000000000000000000 = 2.0
356	// 0 10000000 10000000000000000000000 = 3.0
357	// 1 10000101 11110000010000000000000 = -124.0625
358	// 0 11111111 00000000000000000000000 = +infinity
359	// 1 11111111 00000000000000000000000 = -infinity
360	// 0 11111111 10000000000000000000000 = NAN
361	// 1 11111111 11111111111111111111111 = NAN
362	//
363	// Representation of a half:
364	//
365	// Here is the bit-layout for a half number, h:
366	//
367	// 15 (msb)
368	// \|
369	// \| 14 10
370	// \| \| \|
371	// \| \| \| 9 0 (lsb)
372	// \| \| \| \| \|
373	// X XXXXX XXXXXXXXXX
374	//
375	// s e m
376	//
377	// S is the sign-bit, e is the exponent and m is the significand.
378	//
379	// If e is between 1 and 30, h is a normalized number:
380	//
381	// s e-15
382	// h = (-1) 2 * 1.m*
383	//
384	// If e is 0, and m is not zero, h is a denormalized number:
385	//
386	// S -14
387	// h = (-1) 2 * 0.m*
388	//
389	// If e and m are both zero, h is zero:
390	//
391	// h = 0.0
392	//
393	// If e is 31, h is an "infinity" or "not a number" (NAN),
394	// depending on whether m is zero or not.
395	//
396	// Examples:
397	//
398	// 0 00000 0000000000 = 0.0
399	// 0 01110 0000000000 = 0.5
400	// 0 01111 0000000000 = 1.0
401	// 0 10000 0000000000 = 2.0
402	// 0 10000 1000000000 = 3.0
403	// 1 10101 1111000001 = -124.0625
404	// 0 11111 0000000000 = +infinity
405	// 1 11111 0000000000 = -infinity
406	// 0 11111 1000000000 = NAN
407	// 1 11111 1111111111 = NAN
408	//
409	// Conversion:
410	//
411	// Converting from a float to a half requires some non-trivial bit
412	// manipulations. In some cases, this makes conversion relatively
413	// slow, but the most common case is accelerated via table lookups.
414	//
415	// Converting back from a half to a float is easier because we don't
416	// have to do any rounding. In addition, there are only 65536
417	// different half numbers; we can convert each of those numbers once
418	// and store the results in a table. Later, all conversions can be
419	// done using only simple table lookups.
420	//
421	//---------------------------------------------------------------------------
422
423
424	//----------------------------
425	// Half-from-float constructor
426	//----------------------------
427
428	inline
429	half::half (float f)
430	{
431	uif x;
432
433	x.f = f;
434
435	if (f == `0`)
436	{
437	//
438	// Common special case - zero.
439	// Preserve the zero's sign bit.
440	//
441
442	_h = (x.i >> `16`);
443	}
444	else
445	{
446	//
447	// We extract the combined sign and exponent, e, from our
448	// floating-point number, f. Then we convert e to the sign
449	// and exponent of the half number via a table lookup.
450	//
451	// For the most common case, where a normalized half is produced,
452	// the table lookup returns a non-zero value; in this case, all
453	// we have to do is round f's significand to 10 bits and combine
454	// the result with e.
455	//
456	// For all other cases (overflow, zeroes, denormalized numbers
457	// resulting from underflow, infinities and NANs), the table
458	// lookup returns zero, and we call a longer, non-inline function
459	// to do the float-to-half conversion.
460	//
461
462	int e = (x.i >> `23`) & `0x000001ff`;
463
464	e = _eLut[e];
465
466	if (e)
467	{
468	//
469	// Simple case - round the significand, m, to 10
470	// bits and combine it with the sign and exponent.
471	//
472
473	int m = x.i & `0x007fffff`;
474	_h = e + ((m + `0x00000fff` + ((m >> `13`) & `1`)) >> `13`);
475	}
476	else
477	{
478	//
479	// Difficult case - call a function.
480	//
481
482	_h = convert (i: x.i);
483	}
484	}
485	}
486
487
488	//------------------------------------------
489	// Half-to-float conversion via table lookup
490	//------------------------------------------
491
492	inline
493	half::operator float () const
494	{
495	return _toFloat[_h].f;
496	}
497
498
499	//-------------------------
500	// Round to n-bit precision
501	//-------------------------
502
503	inline half
504	half::round (unsigned int n) const
505	{
506	//
507	// Parameter check.
508	//
509
510	if (n >= `10`)
511	return *this;
512
513	//
514	// Disassemble h into the sign, s,
515	// and the combined exponent and significand, e.
516	//
517
518	unsigned short s = _h & `0x8000`;
519	unsigned short e = _h & `0x7fff`;
520
521	//
522	// Round the exponent and significand to the nearest value
523	// where ones occur only in the (10-n) most significant bits.
524	// Note that the exponent adjusts automatically if rounding
525	// up causes the significand to overflow.
526	//
527
528	e >>= `9` - n;
529	e += e & `1`;
530	e <<= `9` - n;
531
532	//
533	// Check for exponent overflow.
534	//
535
536	if (e >= `0x7c00`)
537	{
538	//
539	// Overflow occurred -- truncate instead of rounding.
540	//
541
542	e = _h;
543	e >>= `10` - n;
544	e <<= `10` - n;
545	}
546
547	//
548	// Put the original sign bit back.
549	//
550
551	half h;
552	h._h = s \| e;
553
554	return h;
555	}
556
557
558	//-----------------------
559	// Other inline functions
560	//-----------------------
561
562	inline half
563	half::operator - () const
564	{
565	half h;
566	h._h = _h ^ `0x8000`;
567	return h;
568	}
569
570
571	inline half &
572	half::operator = (float f)
573	{
574	*this = half (f);
575	return *this;
576	}
577
578
579	inline half &
580	half::operator += (half h)
581	{
582	*this = half (float (*this) + float (h));
583	return *this;
584	}
585
586
587	inline half &
588	half::operator += (float f)
589	{
590	*this = half (float (*this) + f);
591	return *this;
592	}
593
594
595	inline half &
596	half::operator -= (half h)
597	{
598	*this = half (float (*this) - float (h));
599	return *this;
600	}
601
602
603	inline half &
604	half::operator -= (float f)
605	{
606	*this = half (float (*this) - f);
607	return *this;
608	}
609
610
611	inline half &
612	half::operator *= (half h)
613	{
614	*this = half (float (*this) * float (h));
615	return *this;
616	}
617
618
619	inline half &
620	half::operator = (float* f)
621	{
622	*this = half (float (*this) * f);
623	return *this;
624	}
625
626
627	inline half &
628	half::operator /= (half h)
629	{
630	*this = half (float (*this) / float (h));
631	return *this;
632	}
633
634
635	inline half &
636	half::operator /= (float f)
637	{
638	*this = half (float (*this) / f);
639	return *this;
640	}
641
642
643	inline bool
644	half::isFinite () const
645	{
646	unsigned short e = (_h >> `10`) & `0x001f`;
647	return e < `31`;
648	}
649
650
651	inline bool
652	half::isNormalized () const
653	{
654	unsigned short e = (_h >> `10`) & `0x001f`;
655	return e > `0` && e < `31`;
656	}
657
658
659	inline bool
660	half::isDenormalized () const
661	{
662	unsigned short e = (_h >> `10`) & `0x001f`;
663	unsigned short m = _h & `0x3ff`;
664	return e == `0` && m != `0`;
665	}
666
667
668	inline bool
669	half::isZero () const
670	{
671	return (_h & `0x7fff`) == `0`;
672	}
673
674
675	inline bool
676	half::isNan () const
677	{
678	unsigned short e = (_h >> `10`) & `0x001f`;
679	unsigned short m = _h & `0x3ff`;
680	return e == `31` && m != `0`;
681	}
682
683
684	inline bool
685	half::isInfinity () const
686	{
687	unsigned short e = (_h >> `10`) & `0x001f`;
688	unsigned short m = _h & `0x3ff`;
689	return e == `31` && m == `0`;
690	}
691
692
693	inline bool
694	half::isNegative () const
695	{
696	return (_h & `0x8000`) != `0`;
697	}
698
699
700	inline half
701	half::posInf ()
702	{
703	half h;
704	h._h = `0x7c00`;
705	return h;
706	}
707
708
709	inline half
710	half::negInf ()
711	{
712	half h;
713	h._h = `0xfc00`;
714	return h;
715	}
716
717
718	inline half
719	half::qNan ()
720	{
721	half h;
722	h._h = `0x7fff`;
723	return h;
724	}
725
726
727	inline half
728	half::sNan ()
729	{
730	half h;
731	h._h = `0x7dff`;
732	return h;
733	}
734
735
736	inline unsigned short
737	half::bits () const
738	{
739	return _h;
740	}
741
742
743	inline void
744	half::setBits (unsigned short bits)
745	{
746	_h = bits;
747	}
748
749	#endif
750

source code of include/OpenEXR/half.h