| 1 | // |
| 2 | // SPDX-License-Identifier: BSD-3-Clause |
| 3 | // Copyright Contributors to the OpenEXR Project. |
| 4 | // |
| 5 | |
| 6 | // |
| 7 | // Primary original authors: |
| 8 | // Florian Kainz <kainz@ilm.com> |
| 9 | // Rod Bogart <rgb@ilm.com> |
| 10 | // |
| 11 | |
| 12 | #ifndef IMATH_HALF_H_ |
| 13 | #define IMATH_HALF_H_ |
| 14 | |
| 15 | #include "ImathExport.h" |
| 16 | #include "ImathNamespace.h" |
| 17 | #include "ImathPlatform.h" |
| 18 | |
| 19 | /// @file half.h |
| 20 | /// The half type is a 16-bit floating number, compatible with the |
| 21 | /// IEEE 754-2008 binary16 type. |
| 22 | /// |
| 23 | /// **Representation of a 32-bit float:** |
| 24 | /// |
| 25 | /// We assume that a float, f, is an IEEE 754 single-precision |
| 26 | /// floating point number, whose bits are arranged as follows: |
| 27 | /// |
| 28 | /// 31 (msb) |
| 29 | /// | |
| 30 | /// | 30 23 |
| 31 | /// | | | |
| 32 | /// | | | 22 0 (lsb) |
| 33 | /// | | | | | |
| 34 | /// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX |
| 35 | /// |
| 36 | /// s e m |
| 37 | /// |
| 38 | /// S is the sign-bit, e is the exponent and m is the significand. |
| 39 | /// |
| 40 | /// If e is between 1 and 254, f is a normalized number: |
| 41 | /// |
| 42 | /// s e-127 |
| 43 | /// f = (-1) * 2 * 1.m |
| 44 | /// |
| 45 | /// If e is 0, and m is not zero, f is a denormalized number: |
| 46 | /// |
| 47 | /// s -126 |
| 48 | /// f = (-1) * 2 * 0.m |
| 49 | /// |
| 50 | /// If e and m are both zero, f is zero: |
| 51 | /// |
| 52 | /// f = 0.0 |
| 53 | /// |
| 54 | /// If e is 255, f is an "infinity" or "not a number" (NAN), |
| 55 | /// depending on whether m is zero or not. |
| 56 | /// |
| 57 | /// Examples: |
| 58 | /// |
| 59 | /// 0 00000000 00000000000000000000000 = 0.0 |
| 60 | /// 0 01111110 00000000000000000000000 = 0.5 |
| 61 | /// 0 01111111 00000000000000000000000 = 1.0 |
| 62 | /// 0 10000000 00000000000000000000000 = 2.0 |
| 63 | /// 0 10000000 10000000000000000000000 = 3.0 |
| 64 | /// 1 10000101 11110000010000000000000 = -124.0625 |
| 65 | /// 0 11111111 00000000000000000000000 = +infinity |
| 66 | /// 1 11111111 00000000000000000000000 = -infinity |
| 67 | /// 0 11111111 10000000000000000000000 = NAN |
| 68 | /// 1 11111111 11111111111111111111111 = NAN |
| 69 | /// |
| 70 | /// **Representation of a 16-bit half:** |
| 71 | /// |
| 72 | /// Here is the bit-layout for a half number, h: |
| 73 | /// |
| 74 | /// 15 (msb) |
| 75 | /// | |
| 76 | /// | 14 10 |
| 77 | /// | | | |
| 78 | /// | | | 9 0 (lsb) |
| 79 | /// | | | | | |
| 80 | /// X XXXXX XXXXXXXXXX |
| 81 | /// |
| 82 | /// s e m |
| 83 | /// |
| 84 | /// S is the sign-bit, e is the exponent and m is the significand. |
| 85 | /// |
| 86 | /// If e is between 1 and 30, h is a normalized number: |
| 87 | /// |
| 88 | /// s e-15 |
| 89 | /// h = (-1) * 2 * 1.m |
| 90 | /// |
| 91 | /// If e is 0, and m is not zero, h is a denormalized number: |
| 92 | /// |
| 93 | /// S -14 |
| 94 | /// h = (-1) * 2 * 0.m |
| 95 | /// |
| 96 | /// If e and m are both zero, h is zero: |
| 97 | /// |
| 98 | /// h = 0.0 |
| 99 | /// |
| 100 | /// If e is 31, h is an "infinity" or "not a number" (NAN), |
| 101 | /// depending on whether m is zero or not. |
| 102 | /// |
| 103 | /// Examples: |
| 104 | /// |
| 105 | /// 0 00000 0000000000 = 0.0 |
| 106 | /// 0 01110 0000000000 = 0.5 |
| 107 | /// 0 01111 0000000000 = 1.0 |
| 108 | /// 0 10000 0000000000 = 2.0 |
| 109 | /// 0 10000 1000000000 = 3.0 |
| 110 | /// 1 10101 1111000001 = -124.0625 |
| 111 | /// 0 11111 0000000000 = +infinity |
| 112 | /// 1 11111 0000000000 = -infinity |
| 113 | /// 0 11111 1000000000 = NAN |
| 114 | /// 1 11111 1111111111 = NAN |
| 115 | /// |
| 116 | /// **Conversion via Lookup Table:** |
| 117 | /// |
| 118 | /// Converting from half to float is performed by default using a |
| 119 | /// lookup table. There are only 65,536 different half numbers; each |
| 120 | /// of these numbers has been converted and stored in a table pointed |
| 121 | /// to by the ``imath_half_to_float_table`` pointer. |
| 122 | /// |
| 123 | /// Prior to Imath v3.1, conversion from float to half was |
| 124 | /// accomplished with the help of an exponent look table, but this is |
| 125 | /// now replaced with explicit bit shifting. |
| 126 | /// |
| 127 | /// **Conversion via Hardware:** |
| 128 | /// |
| 129 | /// For Imath v3.1, the conversion routines have been extended to use |
| 130 | /// F16C SSE instructions whenever present and enabled by compiler |
| 131 | /// flags. |
| 132 | /// |
| 133 | /// **Conversion via Bit-Shifting** |
| 134 | /// |
| 135 | /// If F16C SSE instructions are not available, conversion can be |
| 136 | /// accomplished by a bit-shifting algorithm. For half-to-float |
| 137 | /// conversion, this is generally slower than the lookup table, but it |
| 138 | /// may be preferable when memory limits preclude storing of the |
| 139 | /// 65,536-entry lookup table. |
| 140 | /// |
| 141 | /// The lookup table symbol is included in the compilation even if |
| 142 | /// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code |
| 143 | /// using the exported ``half.h`` may choose to enable the use of the table. |
| 144 | /// |
| 145 | /// An implementation can eliminate the table from compilation by |
| 146 | /// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol. |
| 147 | /// Simply add: |
| 148 | /// |
| 149 | /// #define IMATH_HALF_NO_LOOKUP_TABLE |
| 150 | /// |
| 151 | /// before including ``half.h``, or define the symbol on the compile |
| 152 | /// command line. |
| 153 | /// |
| 154 | /// Furthermore, an implementation wishing to receive ``FE_OVERFLOW`` |
| 155 | /// and ``FE_UNDERFLOW`` floating point exceptions when converting |
| 156 | /// float to half by the bit-shift algorithm can define the |
| 157 | /// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to |
| 158 | /// including ``half.h``: |
| 159 | /// |
| 160 | /// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS |
| 161 | /// |
| 162 | /// **Conversion Performance Comparison:** |
| 163 | /// |
| 164 | /// Testing on a Core i9, the timings are approximately: |
| 165 | /// |
| 166 | /// half to float |
| 167 | /// - table: 0.71 ns / call |
| 168 | /// - no table: 1.06 ns / call |
| 169 | /// - f16c: 0.45 ns / call |
| 170 | /// |
| 171 | /// float-to-half: |
| 172 | /// - original: 5.2 ns / call |
| 173 | /// - no exp table + opt: 1.27 ns / call |
| 174 | /// - f16c: 0.45 ns / call |
| 175 | /// |
| 176 | /// **Note:** the timing above depends on the distribution of the |
| 177 | /// floats in question. |
| 178 | /// |
| 179 | |
| 180 | #ifdef __CUDA_ARCH__ |
| 181 | // do not include intrinsics headers on Cuda |
| 182 | #elif defined(_WIN32) |
| 183 | # include <intrin.h> |
| 184 | #elif defined(__x86_64__) |
| 185 | # include <x86intrin.h> |
| 186 | #elif defined(__F16C__) |
| 187 | # include <immintrin.h> |
| 188 | #endif |
| 189 | |
| 190 | #include <stdint.h> |
| 191 | #include <stdio.h> |
| 192 | |
| 193 | #ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
| 194 | # include <fenv.h> |
| 195 | #endif |
| 196 | |
| 197 | //------------------------------------------------------------------------- |
| 198 | // Limits |
| 199 | // |
| 200 | // Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float |
| 201 | // constants, but at least one other compiler (gcc 2.96) produces incorrect |
| 202 | // results if they are. |
| 203 | //------------------------------------------------------------------------- |
| 204 | |
| 205 | #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER |
| 206 | |
| 207 | /// Smallest positive denormalized half |
| 208 | # define HALF_DENORM_MIN 5.96046448e-08f |
| 209 | /// Smallest positive normalized half |
| 210 | # define HALF_NRM_MIN 6.10351562e-05f |
| 211 | /// Smallest positive normalized half |
| 212 | # define HALF_MIN 6.10351562e-05f |
| 213 | /// Largest positive half |
| 214 | # define HALF_MAX 65504.0f |
| 215 | /// Smallest positive e for which ``half(1.0 + e) != half(1.0)`` |
| 216 | # define HALF_EPSILON 0.00097656f |
| 217 | #else |
| 218 | /// Smallest positive denormalized half |
| 219 | # define HALF_DENORM_MIN 5.96046448e-08 |
| 220 | /// Smallest positive normalized half |
| 221 | # define HALF_NRM_MIN 6.10351562e-05 |
| 222 | /// Smallest positive normalized half |
| 223 | # define HALF_MIN 6.10351562e-05f |
| 224 | /// Largest positive half |
| 225 | # define HALF_MAX 65504.0 |
| 226 | /// Smallest positive e for which ``half(1.0 + e) != half(1.0)`` |
| 227 | # define HALF_EPSILON 0.00097656 |
| 228 | #endif |
| 229 | |
| 230 | /// Number of digits in mantissa (significand + hidden leading 1) |
| 231 | #define HALF_MANT_DIG 11 |
| 232 | /// Number of base 10 digits that can be represented without change: |
| 233 | /// |
| 234 | /// ``floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3`` |
| 235 | #define HALF_DIG 3 |
| 236 | /// Number of base-10 digits that are necessary to uniquely represent |
| 237 | /// all distinct values: |
| 238 | /// |
| 239 | /// ``ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5`` |
| 240 | #define HALF_DECIMAL_DIG 5 |
| 241 | /// Base of the exponent |
| 242 | #define HALF_RADIX 2 |
| 243 | /// Minimum negative integer such that ``HALF_RADIX`` raised to the power |
| 244 | /// of one less than that integer is a normalized half |
| 245 | #define HALF_DENORM_MIN_EXP -13 |
| 246 | /// Maximum positive integer such that ``HALF_RADIX`` raised to the power |
| 247 | /// of one less than that integer is a normalized half |
| 248 | #define HALF_MAX_EXP 16 |
| 249 | /// Minimum positive integer such that 10 raised to that power is a |
| 250 | /// normalized half |
| 251 | #define HALF_DENORM_MIN_10_EXP -4 |
| 252 | /// Maximum positive integer such that 10 raised to that power is a |
| 253 | /// normalized half |
| 254 | #define HALF_MAX_10_EXP 4 |
| 255 | |
| 256 | /// a type for both C-only programs and C++ to use the same utilities |
| 257 | typedef union imath_half_uif |
| 258 | { |
| 259 | uint32_t i; |
| 260 | float f; |
| 261 | } imath_half_uif_t; |
| 262 | |
| 263 | /// a type for both C-only programs and C++ to use the same utilities |
| 264 | typedef uint16_t imath_half_bits_t; |
| 265 | |
| 266 | #if !defined(__cplusplus) && !defined(__CUDACC__) |
| 267 | /// if we're in a C-only context, alias the half bits type to half |
| 268 | typedef imath_half_bits_t half; |
| 269 | #endif |
| 270 | |
| 271 | #if !defined(IMATH_HALF_NO_LOOKUP_TABLE) |
| 272 | # if defined(__cplusplus) |
| 273 | extern "C" |
| 274 | # else |
| 275 | extern |
| 276 | # endif |
| 277 | IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table; |
| 278 | #endif |
| 279 | |
| 280 | /// |
| 281 | /// Convert half to float |
| 282 | /// |
| 283 | |
| 284 | static inline float |
| 285 | imath_half_to_float (imath_half_bits_t h) |
| 286 | { |
| 287 | #if defined(__F16C__) |
| 288 | // NB: The intel implementation does seem to treat NaN slightly |
| 289 | // different than the original toFloat table does (i.e. where the |
| 290 | // 1 bits are, meaning the signalling or not bits). This seems |
| 291 | // benign, given that the original library didn't really deal with |
| 292 | // signalling vs non-signalling NaNs |
| 293 | # ifdef _MSC_VER |
| 294 | /* msvc does not seem to have cvtsh_ss :( */ |
| 295 | return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h))); |
| 296 | # else |
| 297 | return _cvtsh_ss (h); |
| 298 | # endif |
| 299 | #elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE) |
| 300 | return imath_half_to_float_table[h].f; |
| 301 | #else |
| 302 | imath_half_uif_t v; |
| 303 | // this code would be clearer, although it does appear to be faster |
| 304 | // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4 |
| 305 | // shifts. |
| 306 | // |
| 307 | uint32_t hexpmant = ( (uint32_t)(h) << 17 ) >> 4; |
| 308 | v.i = ((uint32_t)(h >> 15)) << 31; |
| 309 | |
| 310 | // the likely really does help if most of your numbers are "normal" half numbers |
| 311 | if (IMATH_LIKELY ((hexpmant >= 0x00800000))) |
| 312 | { |
| 313 | v.i |= hexpmant; |
| 314 | // either we are a normal number, in which case add in the bias difference |
| 315 | // otherwise make sure all exponent bits are set |
| 316 | if (IMATH_LIKELY ((hexpmant < 0x0f800000))) |
| 317 | v.i += 0x38000000; |
| 318 | else |
| 319 | v.i |= 0x7f800000; |
| 320 | } |
| 321 | else if (hexpmant != 0) |
| 322 | { |
| 323 | // exponent is 0 because we're denormal, don't have to extract |
| 324 | // the mantissa, can just use as is |
| 325 | // |
| 326 | // |
| 327 | // other compilers may provide count-leading-zeros primitives, |
| 328 | // but we need the community to inform us of the variants |
| 329 | uint32_t lc; |
| 330 | # if defined(_MSC_VER) && (_M_IX86 || _M_X64) |
| 331 | lc = __lzcnt (hexpmant); |
| 332 | # elif defined(__GNUC__) || defined(__clang__) |
| 333 | lc = (uint32_t) __builtin_clz (hexpmant); |
| 334 | # else |
| 335 | lc = 0; |
| 336 | while (0 == ((hexpmant << lc) & 0x80000000)) |
| 337 | ++lc; |
| 338 | # endif |
| 339 | lc -= 8; |
| 340 | // so nominally we want to remove that extra bit we shifted |
| 341 | // up, but we are going to add that bit back in, then subtract |
| 342 | // from it with the 0x38800000 - (lc << 23).... |
| 343 | // |
| 344 | // by combining, this allows us to skip the & operation (and |
| 345 | // remove a constant) |
| 346 | // |
| 347 | // hexpmant &= ~0x00800000; |
| 348 | v.i |= 0x38800000; |
| 349 | // lc is now x, where the desired exponent is then |
| 350 | // -14 - lc |
| 351 | // + 127 -> new exponent |
| 352 | v.i |= (hexpmant << lc); |
| 353 | v.i -= (lc << 23); |
| 354 | } |
| 355 | return v.f; |
| 356 | #endif |
| 357 | } |
| 358 | |
| 359 | /// |
| 360 | /// Convert half to float |
| 361 | /// |
| 362 | /// Note: This only supports the "round to even" rounding mode, which |
| 363 | /// was the only mode supported by the original OpenEXR library |
| 364 | /// |
| 365 | |
| 366 | static inline imath_half_bits_t |
| 367 | imath_float_to_half (float f) |
| 368 | { |
| 369 | #if defined(__F16C__) |
| 370 | # ifdef _MSC_VER |
| 371 | // msvc does not seem to have cvtsh_ss :( |
| 372 | return _mm_extract_epi16 ( |
| 373 | _mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)), |
| 374 | 0); |
| 375 | # else |
| 376 | // preserve the fixed rounding mode to nearest |
| 377 | return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); |
| 378 | # endif |
| 379 | #else |
| 380 | imath_half_uif_t v; |
| 381 | imath_half_bits_t ret; |
| 382 | uint32_t e, m, ui, r, shift; |
| 383 | |
| 384 | v.f = f; |
| 385 | |
| 386 | ui = (v.i & ~0x80000000); |
| 387 | ret = ((v.i >> 16) & 0x8000); |
| 388 | |
| 389 | // exponent large enough to result in a normal number, round and return |
| 390 | if (ui >= 0x38800000) |
| 391 | { |
| 392 | // inf or nan |
| 393 | if (IMATH_UNLIKELY (ui >= 0x7f800000)) |
| 394 | { |
| 395 | ret |= 0x7c00; |
| 396 | if (ui == 0x7f800000) |
| 397 | return ret; |
| 398 | m = (ui & 0x7fffff) >> 13; |
| 399 | // make sure we have at least one bit after shift to preserve nan-ness |
| 400 | return ret | (uint16_t)m | (uint16_t)(m == 0); |
| 401 | } |
| 402 | |
| 403 | // too large, round to infinity |
| 404 | if (IMATH_UNLIKELY (ui > 0x477fefff)) |
| 405 | { |
| 406 | # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
| 407 | feraiseexcept (FE_OVERFLOW); |
| 408 | # endif |
| 409 | return ret | 0x7c00; |
| 410 | } |
| 411 | |
| 412 | ui -= 0x38000000; |
| 413 | ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13); |
| 414 | return ret | (uint16_t)ui; |
| 415 | } |
| 416 | |
| 417 | // zero or flush to 0 |
| 418 | if (ui < 0x33000001) |
| 419 | { |
| 420 | # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
| 421 | if (ui == 0) |
| 422 | return ret; |
| 423 | feraiseexcept (FE_UNDERFLOW); |
| 424 | # endif |
| 425 | return ret; |
| 426 | } |
| 427 | |
| 428 | // produce a denormalized half |
| 429 | e = (ui >> 23); |
| 430 | shift = 0x7e - e; |
| 431 | m = 0x800000 | (ui & 0x7fffff); |
| 432 | r = m << (32 - shift); |
| 433 | ret |= (m >> shift); |
| 434 | if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0)) |
| 435 | ++ret; |
| 436 | return ret; |
| 437 | #endif |
| 438 | } |
| 439 | |
| 440 | //////////////////////////////////////// |
| 441 | |
| 442 | #ifdef __cplusplus |
| 443 | |
| 444 | # include <iostream> |
| 445 | |
| 446 | IMATH_INTERNAL_NAMESPACE_HEADER_ENTER |
| 447 | |
| 448 | /// |
| 449 | /// |
| 450 | /// class half represents a 16-bit floating point number |
| 451 | /// |
| 452 | /// Type half can represent positive and negative numbers whose |
| 453 | /// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative |
| 454 | /// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented |
| 455 | /// with an absolute error of 6.0e-8. All integers from -2048 to |
| 456 | /// +2048 can be represented exactly. |
| 457 | /// |
| 458 | /// Type half behaves (almost) like the built-in C++ floating point |
| 459 | /// types. In arithmetic expressions, half, float and double can be |
| 460 | /// mixed freely. Here are a few examples: |
| 461 | /// |
| 462 | /// half a (3.5); |
| 463 | /// float b (a + sqrt (a)); |
| 464 | /// a += b; |
| 465 | /// b += a; |
| 466 | /// b = a + 7; |
| 467 | /// |
| 468 | /// Conversions from half to float are lossless; all half numbers |
| 469 | /// are exactly representable as floats. |
| 470 | /// |
| 471 | /// Conversions from float to half may not preserve a float's value |
| 472 | /// exactly. If a float is not representable as a half, then the |
| 473 | /// float value is rounded to the nearest representable half. If a |
| 474 | /// float value is exactly in the middle between the two closest |
| 475 | /// representable half values, then the float value is rounded to |
| 476 | /// the closest half whose least significant bit is zero. |
| 477 | /// |
| 478 | /// Overflows during float-to-half conversions cause arithmetic |
| 479 | /// exceptions. An overflow occurs when the float value to be |
| 480 | /// converted is too large to be represented as a half, or if the |
| 481 | /// float value is an infinity or a NAN. |
| 482 | /// |
| 483 | /// The implementation of type half makes the following assumptions |
| 484 | /// about the implementation of the built-in C++ types: |
| 485 | /// |
| 486 | /// * float is an IEEE 754 single-precision number |
| 487 | /// * sizeof (float) == 4 |
| 488 | /// * sizeof (unsigned int) == sizeof (float) |
| 489 | /// * alignof (unsigned int) == alignof (float) |
| 490 | /// * sizeof (uint16_t) == 2 |
| 491 | /// |
| 492 | |
| 493 | class IMATH_EXPORT_TYPE half |
| 494 | { |
| 495 | public: |
| 496 | /// A special tag that lets us initialize a half from the raw bits. |
| 497 | enum IMATH_EXPORT_ENUM FromBitsTag |
| 498 | { |
| 499 | FromBits |
| 500 | }; |
| 501 | |
| 502 | /// @{ |
| 503 | /// @name Constructors |
| 504 | |
| 505 | /// Default construction provides no initialization (hence it is |
| 506 | /// not constexpr). |
| 507 | half() IMATH_NOEXCEPT = default; |
| 508 | |
| 509 | /// Construct from float |
| 510 | half (float f) IMATH_NOEXCEPT; |
| 511 | |
| 512 | /// Construct from bit-vector |
| 513 | constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT; |
| 514 | |
| 515 | /// Copy constructor |
| 516 | constexpr half (const half&) IMATH_NOEXCEPT = default; |
| 517 | |
| 518 | /// Move constructor |
| 519 | constexpr half (half&&) IMATH_NOEXCEPT = default; |
| 520 | |
| 521 | /// Destructor |
| 522 | ~half() IMATH_NOEXCEPT = default; |
| 523 | |
| 524 | /// @} |
| 525 | |
| 526 | /// Conversion to float |
| 527 | operator float() const IMATH_NOEXCEPT; |
| 528 | |
| 529 | /// @{ |
| 530 | /// @name Basic Algebra |
| 531 | |
| 532 | /// Unary minus |
| 533 | constexpr half operator-() const IMATH_NOEXCEPT; |
| 534 | |
| 535 | /// Assignment |
| 536 | half& operator= (const half& h) IMATH_NOEXCEPT = default; |
| 537 | |
| 538 | /// Move assignment |
| 539 | half& operator= (half&& h) IMATH_NOEXCEPT = default; |
| 540 | |
| 541 | /// Assignment from float |
| 542 | half& operator= (float f) IMATH_NOEXCEPT; |
| 543 | |
| 544 | /// Addition assignment |
| 545 | half& operator+= (half h) IMATH_NOEXCEPT; |
| 546 | |
| 547 | /// Addition assignment from float |
| 548 | half& operator+= (float f) IMATH_NOEXCEPT; |
| 549 | |
| 550 | /// Subtraction assignment |
| 551 | half& operator-= (half h) IMATH_NOEXCEPT; |
| 552 | |
| 553 | /// Subtraction assignment from float |
| 554 | half& operator-= (float f) IMATH_NOEXCEPT; |
| 555 | |
| 556 | /// Multiplication assignment |
| 557 | half& operator*= (half h) IMATH_NOEXCEPT; |
| 558 | |
| 559 | /// Multiplication assignment from float |
| 560 | half& operator*= (float f) IMATH_NOEXCEPT; |
| 561 | |
| 562 | /// Division assignment |
| 563 | half& operator/= (half h) IMATH_NOEXCEPT; |
| 564 | |
| 565 | /// Division assignment from float |
| 566 | half& operator/= (float f) IMATH_NOEXCEPT; |
| 567 | |
| 568 | /// @} |
| 569 | |
| 570 | /// Round to n-bit precision (n should be between 0 and 10). |
| 571 | /// After rounding, the significand's 10-n least significant |
| 572 | /// bits will be zero. |
| 573 | IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT; |
| 574 | |
| 575 | /// @{ |
| 576 | /// @name Classification |
| 577 | |
| 578 | /// Return true if a normalized number, a denormalized number, or |
| 579 | /// zero. |
| 580 | constexpr bool isFinite() const IMATH_NOEXCEPT; |
| 581 | |
| 582 | /// Return true if a normalized number. |
| 583 | constexpr bool isNormalized() const IMATH_NOEXCEPT; |
| 584 | |
| 585 | /// Return true if a denormalized number. |
| 586 | constexpr bool isDenormalized() const IMATH_NOEXCEPT; |
| 587 | |
| 588 | /// Return true if zero. |
| 589 | constexpr bool isZero() const IMATH_NOEXCEPT; |
| 590 | |
| 591 | /// Return true if NAN. |
| 592 | constexpr bool isNan() const IMATH_NOEXCEPT; |
| 593 | |
| 594 | /// Return true if a positive or a negative infinity |
| 595 | constexpr bool isInfinity() const IMATH_NOEXCEPT; |
| 596 | |
| 597 | /// Return true if the sign bit is set (negative) |
| 598 | constexpr bool isNegative() const IMATH_NOEXCEPT; |
| 599 | |
| 600 | /// @} |
| 601 | |
| 602 | /// @{ |
| 603 | /// @name Special values |
| 604 | |
| 605 | /// Return +infinity |
| 606 | static constexpr half posInf() IMATH_NOEXCEPT; |
| 607 | |
| 608 | /// Return -infinity |
| 609 | static constexpr half negInf() IMATH_NOEXCEPT; |
| 610 | |
| 611 | /// Returns a NAN with the bit pattern 0111111111111111 |
| 612 | static constexpr half qNan() IMATH_NOEXCEPT; |
| 613 | |
| 614 | /// Return a NAN with the bit pattern 0111110111111111 |
| 615 | static constexpr half sNan() IMATH_NOEXCEPT; |
| 616 | |
| 617 | /// @} |
| 618 | |
| 619 | /// @{ |
| 620 | /// @name Access to the internal representation |
| 621 | |
| 622 | /// Return the bit pattern |
| 623 | constexpr uint16_t bits () const IMATH_NOEXCEPT; |
| 624 | |
| 625 | /// Set the bit pattern |
| 626 | IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT; |
| 627 | |
| 628 | /// @} |
| 629 | |
| 630 | public: |
| 631 | static_assert (sizeof (float) == sizeof (uint32_t), |
| 632 | "Assumption about the size of floats correct" ); |
| 633 | using uif = imath_half_uif; |
| 634 | |
| 635 | private: |
| 636 | |
| 637 | constexpr uint16_t mantissa() const IMATH_NOEXCEPT; |
| 638 | constexpr uint16_t exponent() const IMATH_NOEXCEPT; |
| 639 | |
| 640 | uint16_t _h; |
| 641 | }; |
| 642 | |
| 643 | //---------------------------- |
| 644 | // Half-from-float constructor |
| 645 | //---------------------------- |
| 646 | |
| 647 | inline half::half (float f) IMATH_NOEXCEPT |
| 648 | : _h (imath_float_to_half (f)) |
| 649 | { |
| 650 | } |
| 651 | |
| 652 | //------------------------------------------ |
| 653 | // Half from raw bits constructor |
| 654 | //------------------------------------------ |
| 655 | |
| 656 | inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits) |
| 657 | {} |
| 658 | |
| 659 | //------------------------- |
| 660 | // Half-to-float conversion |
| 661 | //------------------------- |
| 662 | |
| 663 | inline half::operator float() const IMATH_NOEXCEPT |
| 664 | { |
| 665 | return imath_half_to_float (h: _h); |
| 666 | } |
| 667 | |
| 668 | //------------------------- |
| 669 | // Round to n-bit precision |
| 670 | //------------------------- |
| 671 | |
| 672 | inline IMATH_CONSTEXPR14 half |
| 673 | half::round (unsigned int n) const IMATH_NOEXCEPT |
| 674 | { |
| 675 | // |
| 676 | // Parameter check. |
| 677 | // |
| 678 | |
| 679 | if (n >= 10) |
| 680 | return *this; |
| 681 | |
| 682 | // |
| 683 | // Disassemble h into the sign, s, |
| 684 | // and the combined exponent and significand, e. |
| 685 | // |
| 686 | |
| 687 | uint16_t s = _h & 0x8000; |
| 688 | uint16_t e = _h & 0x7fff; |
| 689 | |
| 690 | // |
| 691 | // Round the exponent and significand to the nearest value |
| 692 | // where ones occur only in the (10-n) most significant bits. |
| 693 | // Note that the exponent adjusts automatically if rounding |
| 694 | // up causes the significand to overflow. |
| 695 | // |
| 696 | |
| 697 | e >>= 9 - n; |
| 698 | e += e & 1; |
| 699 | e <<= 9 - n; |
| 700 | |
| 701 | // |
| 702 | // Check for exponent overflow. |
| 703 | // |
| 704 | |
| 705 | if (e >= 0x7c00) |
| 706 | { |
| 707 | // |
| 708 | // Overflow occurred -- truncate instead of rounding. |
| 709 | // |
| 710 | |
| 711 | e = _h; |
| 712 | e >>= 10 - n; |
| 713 | e <<= 10 - n; |
| 714 | } |
| 715 | |
| 716 | // |
| 717 | // Put the original sign bit back. |
| 718 | // |
| 719 | |
| 720 | half h (FromBits, s | e); |
| 721 | |
| 722 | return h; |
| 723 | } |
| 724 | |
| 725 | //----------------------- |
| 726 | // Other inline functions |
| 727 | //----------------------- |
| 728 | |
| 729 | inline constexpr half |
| 730 | half::operator-() const IMATH_NOEXCEPT |
| 731 | { |
| 732 | return half (FromBits, bits() ^ 0x8000); |
| 733 | } |
| 734 | |
| 735 | inline half& |
| 736 | half::operator= (float f) IMATH_NOEXCEPT |
| 737 | { |
| 738 | *this = half (f); |
| 739 | return *this; |
| 740 | } |
| 741 | |
| 742 | inline half& |
| 743 | half::operator+= (half h) IMATH_NOEXCEPT |
| 744 | { |
| 745 | *this = half (float (*this) + float (h)); |
| 746 | return *this; |
| 747 | } |
| 748 | |
| 749 | inline half& |
| 750 | half::operator+= (float f) IMATH_NOEXCEPT |
| 751 | { |
| 752 | *this = half (float (*this) + f); |
| 753 | return *this; |
| 754 | } |
| 755 | |
| 756 | inline half& |
| 757 | half::operator-= (half h) IMATH_NOEXCEPT |
| 758 | { |
| 759 | *this = half (float (*this) - float (h)); |
| 760 | return *this; |
| 761 | } |
| 762 | |
| 763 | inline half& |
| 764 | half::operator-= (float f) IMATH_NOEXCEPT |
| 765 | { |
| 766 | *this = half (float (*this) - f); |
| 767 | return *this; |
| 768 | } |
| 769 | |
| 770 | inline half& |
| 771 | half::operator*= (half h) IMATH_NOEXCEPT |
| 772 | { |
| 773 | *this = half (float (*this) * float (h)); |
| 774 | return *this; |
| 775 | } |
| 776 | |
| 777 | inline half& |
| 778 | half::operator*= (float f) IMATH_NOEXCEPT |
| 779 | { |
| 780 | *this = half (float (*this) * f); |
| 781 | return *this; |
| 782 | } |
| 783 | |
| 784 | inline half& |
| 785 | half::operator/= (half h) IMATH_NOEXCEPT |
| 786 | { |
| 787 | *this = half (float (*this) / float (h)); |
| 788 | return *this; |
| 789 | } |
| 790 | |
| 791 | inline half& |
| 792 | half::operator/= (float f) IMATH_NOEXCEPT |
| 793 | { |
| 794 | *this = half (float (*this) / f); |
| 795 | return *this; |
| 796 | } |
| 797 | |
| 798 | inline constexpr uint16_t |
| 799 | half::mantissa() const IMATH_NOEXCEPT |
| 800 | { |
| 801 | return _h & 0x3ff; |
| 802 | } |
| 803 | |
| 804 | inline constexpr uint16_t |
| 805 | half::exponent() const IMATH_NOEXCEPT |
| 806 | { |
| 807 | return (_h >> 10) & 0x001f; |
| 808 | } |
| 809 | |
| 810 | inline constexpr bool |
| 811 | half::isFinite() const IMATH_NOEXCEPT |
| 812 | { |
| 813 | return exponent() < 31; |
| 814 | } |
| 815 | |
| 816 | inline constexpr bool |
| 817 | half::isNormalized() const IMATH_NOEXCEPT |
| 818 | { |
| 819 | return exponent() > 0 && exponent() < 31; |
| 820 | } |
| 821 | |
| 822 | inline constexpr bool |
| 823 | half::isDenormalized() const IMATH_NOEXCEPT |
| 824 | { |
| 825 | return exponent() == 0 && mantissa() != 0; |
| 826 | } |
| 827 | |
| 828 | inline constexpr bool |
| 829 | half::isZero() const IMATH_NOEXCEPT |
| 830 | { |
| 831 | return (_h & 0x7fff) == 0; |
| 832 | } |
| 833 | |
| 834 | inline constexpr bool |
| 835 | half::isNan() const IMATH_NOEXCEPT |
| 836 | { |
| 837 | return exponent() == 31 && mantissa() != 0; |
| 838 | } |
| 839 | |
| 840 | inline constexpr bool |
| 841 | half::isInfinity() const IMATH_NOEXCEPT |
| 842 | { |
| 843 | return exponent() == 31 && mantissa() == 0; |
| 844 | } |
| 845 | |
| 846 | inline constexpr bool |
| 847 | half::isNegative() const IMATH_NOEXCEPT |
| 848 | { |
| 849 | return (_h & 0x8000) != 0; |
| 850 | } |
| 851 | |
| 852 | inline constexpr half |
| 853 | half::posInf() IMATH_NOEXCEPT |
| 854 | { |
| 855 | return half (FromBits, 0x7c00); |
| 856 | } |
| 857 | |
| 858 | inline constexpr half |
| 859 | half::negInf() IMATH_NOEXCEPT |
| 860 | { |
| 861 | return half (FromBits, 0xfc00); |
| 862 | } |
| 863 | |
| 864 | inline constexpr half |
| 865 | half::qNan() IMATH_NOEXCEPT |
| 866 | { |
| 867 | return half (FromBits, 0x7fff); |
| 868 | } |
| 869 | |
| 870 | inline constexpr half |
| 871 | half::sNan() IMATH_NOEXCEPT |
| 872 | { |
| 873 | return half (FromBits, 0x7dff); |
| 874 | } |
| 875 | |
| 876 | inline constexpr uint16_t |
| 877 | half::bits() const IMATH_NOEXCEPT |
| 878 | { |
| 879 | return _h; |
| 880 | } |
| 881 | |
| 882 | inline IMATH_CONSTEXPR14 void |
| 883 | half::setBits (uint16_t bits) IMATH_NOEXCEPT |
| 884 | { |
| 885 | _h = bits; |
| 886 | } |
| 887 | |
| 888 | IMATH_INTERNAL_NAMESPACE_HEADER_EXIT |
| 889 | |
| 890 | /// Output h to os, formatted as a float |
| 891 | IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h); |
| 892 | |
| 893 | /// Input h from is |
| 894 | IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h); |
| 895 | |
| 896 | #include <limits> |
| 897 | |
| 898 | namespace std |
| 899 | { |
| 900 | |
| 901 | template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half> |
| 902 | { |
| 903 | public: |
| 904 | static const bool is_specialized = true; |
| 905 | |
| 906 | static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT |
| 907 | { |
| 908 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0400); /*HALF_MIN*/ |
| 909 | } |
| 910 | static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT |
| 911 | { |
| 912 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7bff); /*HALF_MAX*/ |
| 913 | } |
| 914 | static constexpr IMATH_INTERNAL_NAMESPACE::half lowest () |
| 915 | { |
| 916 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0xfbff); /* -HALF_MAX */ |
| 917 | } |
| 918 | |
| 919 | static constexpr int digits = HALF_MANT_DIG; |
| 920 | static constexpr int digits10 = HALF_DIG; |
| 921 | static constexpr int max_digits10 = HALF_DECIMAL_DIG; |
| 922 | static constexpr bool is_signed = true; |
| 923 | static constexpr bool is_integer = false; |
| 924 | static constexpr bool is_exact = false; |
| 925 | static constexpr int radix = HALF_RADIX; |
| 926 | static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT |
| 927 | { |
| 928 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x1400); /*HALF_EPSILON*/ |
| 929 | } |
| 930 | static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT |
| 931 | { |
| 932 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x3800); /*0.5*/ |
| 933 | } |
| 934 | |
| 935 | static constexpr int min_exponent = HALF_DENORM_MIN_EXP; |
| 936 | static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP; |
| 937 | static constexpr int max_exponent = HALF_MAX_EXP; |
| 938 | static constexpr int max_exponent10 = HALF_MAX_10_EXP; |
| 939 | |
| 940 | static constexpr bool has_infinity = true; |
| 941 | static constexpr bool has_quiet_NaN = true; |
| 942 | static constexpr bool has_signaling_NaN = true; |
| 943 | static constexpr float_denorm_style has_denorm = denorm_present; |
| 944 | static constexpr bool has_denorm_loss = false; |
| 945 | static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT |
| 946 | { |
| 947 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7c00); /*half::posInf()*/ |
| 948 | } |
| 949 | static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT |
| 950 | { |
| 951 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7fff); /*half::qNan()*/ |
| 952 | } |
| 953 | static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT |
| 954 | { |
| 955 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7dff); /*half::sNan()*/ |
| 956 | } |
| 957 | static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT |
| 958 | { |
| 959 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0001); /*HALF_DENORM_MIN*/ |
| 960 | } |
| 961 | |
| 962 | static constexpr bool is_iec559 = false; |
| 963 | static constexpr bool is_bounded = false; |
| 964 | static constexpr bool is_modulo = false; |
| 965 | |
| 966 | static constexpr bool traps = true; |
| 967 | static constexpr bool tinyness_before = false; |
| 968 | static constexpr float_round_style round_style = round_to_nearest; |
| 969 | }; |
| 970 | |
| 971 | } // namespace std |
| 972 | |
| 973 | //---------- |
| 974 | // Debugging |
| 975 | //---------- |
| 976 | |
| 977 | IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h); |
| 978 | IMATH_EXPORT void printBits (std::ostream& os, float f); |
| 979 | IMATH_EXPORT void printBits (char c[19], IMATH_INTERNAL_NAMESPACE::half h); |
| 980 | IMATH_EXPORT void printBits (char c[35], float f); |
| 981 | |
| 982 | # if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__) |
| 983 | using half = IMATH_INTERNAL_NAMESPACE::half; |
| 984 | # else |
| 985 | # include <cuda_fp16.h> |
| 986 | # endif |
| 987 | |
| 988 | #endif // __cplusplus |
| 989 | |
| 990 | #endif // IMATH_HALF_H_ |
| 991 | |