1#[cfg(all(feature = "serde", feature = "alloc"))]
2#[allow(unused_imports)]
3use alloc::string::ToString;
4#[cfg(feature = "bytemuck")]
5use bytemuck::{Pod, Zeroable};
6use core::{
7 cmp::Ordering,
8 iter::{Product, Sum},
9 num::FpCategory,
10 ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11};
12#[cfg(not(target_arch = "spirv"))]
13use core::{
14 fmt::{
15 Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16 },
17 num::ParseFloatError,
18 str::FromStr,
19};
20#[cfg(feature = "serde")]
21use serde::{Deserialize, Serialize};
22#[cfg(feature = "zerocopy")]
23use zerocopy::{AsBytes, FromBytes};
24
25pub(crate) mod arch;
26
27/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a "half"
28/// format.
29///
30/// This 16-bit floating point type is intended for efficient storage where the full range and
31/// precision of a larger floating point value is not required.
32///
33/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
34#[allow(non_camel_case_types)]
35#[derive(Clone, Copy, Default)]
36#[repr(transparent)]
37#[cfg_attr(feature = "serde", derive(Serialize))]
38#[cfg_attr(
39 feature = "rkyv",
40 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
41)]
42#[cfg_attr(feature = "rkyv", archive(resolver = "F16Resolver"))]
43#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
44#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
45#[cfg_attr(kani, derive(kani::Arbitrary))]
46pub struct f16(u16);
47
48impl f16 {
49 /// Constructs a 16-bit floating point value from the raw bits.
50 #[inline]
51 #[must_use]
52 pub const fn from_bits(bits: u16) -> f16 {
53 f16(bits)
54 }
55
56 /// Constructs a 16-bit floating point value from a 32-bit floating point value.
57 ///
58 /// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
59 /// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
60 /// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
61 /// subnormals or ±0. All other values are truncated and rounded to the nearest representable
62 /// 16-bit value.
63 #[inline]
64 #[must_use]
65 pub fn from_f32(value: f32) -> f16 {
66 f16(arch::f32_to_f16(value))
67 }
68
69 /// Constructs a 16-bit floating point value from a 32-bit floating point value.
70 ///
71 /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
72 /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
73 /// in any non-`const` context.
74 ///
75 /// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
76 /// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
77 /// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
78 /// subnormals or ±0. All other values are truncated and rounded to the nearest representable
79 /// 16-bit value.
80 #[inline]
81 #[must_use]
82 pub const fn from_f32_const(value: f32) -> f16 {
83 f16(arch::f32_to_f16_fallback(value))
84 }
85
86 /// Constructs a 16-bit floating point value from a 64-bit floating point value.
87 ///
88 /// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
89 /// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
90 /// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
91 /// subnormals or ±0. All other values are truncated and rounded to the nearest representable
92 /// 16-bit value.
93 #[inline]
94 #[must_use]
95 pub fn from_f64(value: f64) -> f16 {
96 f16(arch::f64_to_f16(value))
97 }
98
99 /// Constructs a 16-bit floating point value from a 64-bit floating point value.
100 ///
101 /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
102 /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
103 /// in any non-`const` context.
104 ///
105 /// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
106 /// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
107 /// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
108 /// subnormals or ±0. All other values are truncated and rounded to the nearest representable
109 /// 16-bit value.
110 #[inline]
111 #[must_use]
112 pub const fn from_f64_const(value: f64) -> f16 {
113 f16(arch::f64_to_f16_fallback(value))
114 }
115
116 /// Converts a [`f16`] into the underlying bit representation.
117 #[inline]
118 #[must_use]
119 pub const fn to_bits(self) -> u16 {
120 self.0
121 }
122
123 /// Returns the memory representation of the underlying bit representation as a byte array in
124 /// little-endian byte order.
125 ///
126 /// # Examples
127 ///
128 /// ```rust
129 /// # use half::prelude::*;
130 /// let bytes = f16::from_f32(12.5).to_le_bytes();
131 /// assert_eq!(bytes, [0x40, 0x4A]);
132 /// ```
133 #[inline]
134 #[must_use]
135 pub const fn to_le_bytes(self) -> [u8; 2] {
136 self.0.to_le_bytes()
137 }
138
139 /// Returns the memory representation of the underlying bit representation as a byte array in
140 /// big-endian (network) byte order.
141 ///
142 /// # Examples
143 ///
144 /// ```rust
145 /// # use half::prelude::*;
146 /// let bytes = f16::from_f32(12.5).to_be_bytes();
147 /// assert_eq!(bytes, [0x4A, 0x40]);
148 /// ```
149 #[inline]
150 #[must_use]
151 pub const fn to_be_bytes(self) -> [u8; 2] {
152 self.0.to_be_bytes()
153 }
154
155 /// Returns the memory representation of the underlying bit representation as a byte array in
156 /// native byte order.
157 ///
158 /// As the target platform's native endianness is used, portable code should use
159 /// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate,
160 /// instead.
161 ///
162 /// # Examples
163 ///
164 /// ```rust
165 /// # use half::prelude::*;
166 /// let bytes = f16::from_f32(12.5).to_ne_bytes();
167 /// assert_eq!(bytes, if cfg!(target_endian = "big") {
168 /// [0x4A, 0x40]
169 /// } else {
170 /// [0x40, 0x4A]
171 /// });
172 /// ```
173 #[inline]
174 #[must_use]
175 pub const fn to_ne_bytes(self) -> [u8; 2] {
176 self.0.to_ne_bytes()
177 }
178
179 /// Creates a floating point value from its representation as a byte array in little endian.
180 ///
181 /// # Examples
182 ///
183 /// ```rust
184 /// # use half::prelude::*;
185 /// let value = f16::from_le_bytes([0x40, 0x4A]);
186 /// assert_eq!(value, f16::from_f32(12.5));
187 /// ```
188 #[inline]
189 #[must_use]
190 pub const fn from_le_bytes(bytes: [u8; 2]) -> f16 {
191 f16::from_bits(u16::from_le_bytes(bytes))
192 }
193
194 /// Creates a floating point value from its representation as a byte array in big endian.
195 ///
196 /// # Examples
197 ///
198 /// ```rust
199 /// # use half::prelude::*;
200 /// let value = f16::from_be_bytes([0x4A, 0x40]);
201 /// assert_eq!(value, f16::from_f32(12.5));
202 /// ```
203 #[inline]
204 #[must_use]
205 pub const fn from_be_bytes(bytes: [u8; 2]) -> f16 {
206 f16::from_bits(u16::from_be_bytes(bytes))
207 }
208
209 /// Creates a floating point value from its representation as a byte array in native endian.
210 ///
211 /// As the target platform's native endianness is used, portable code likely wants to use
212 /// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as
213 /// appropriate instead.
214 ///
215 /// # Examples
216 ///
217 /// ```rust
218 /// # use half::prelude::*;
219 /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") {
220 /// [0x4A, 0x40]
221 /// } else {
222 /// [0x40, 0x4A]
223 /// });
224 /// assert_eq!(value, f16::from_f32(12.5));
225 /// ```
226 #[inline]
227 #[must_use]
228 pub const fn from_ne_bytes(bytes: [u8; 2]) -> f16 {
229 f16::from_bits(u16::from_ne_bytes(bytes))
230 }
231
232 /// Converts a [`f16`] value into a `f32` value.
233 ///
234 /// This conversion is lossless as all 16-bit floating point values can be represented exactly
235 /// in 32-bit floating point.
236 #[inline]
237 #[must_use]
238 pub fn to_f32(self) -> f32 {
239 arch::f16_to_f32(self.0)
240 }
241
242 /// Converts a [`f16`] value into a `f32` value.
243 ///
244 /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
245 /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
246 /// in any non-`const` context.
247 ///
248 /// This conversion is lossless as all 16-bit floating point values can be represented exactly
249 /// in 32-bit floating point.
250 #[inline]
251 #[must_use]
252 pub const fn to_f32_const(self) -> f32 {
253 arch::f16_to_f32_fallback(self.0)
254 }
255
256 /// Converts a [`f16`] value into a `f64` value.
257 ///
258 /// This conversion is lossless as all 16-bit floating point values can be represented exactly
259 /// in 64-bit floating point.
260 #[inline]
261 #[must_use]
262 pub fn to_f64(self) -> f64 {
263 arch::f16_to_f64(self.0)
264 }
265
266 /// Converts a [`f16`] value into a `f64` value.
267 ///
268 /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
269 /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
270 /// in any non-`const` context.
271 ///
272 /// This conversion is lossless as all 16-bit floating point values can be represented exactly
273 /// in 64-bit floating point.
274 #[inline]
275 #[must_use]
276 pub const fn to_f64_const(self) -> f64 {
277 arch::f16_to_f64_fallback(self.0)
278 }
279
280 /// Returns `true` if this value is `NaN` and `false` otherwise.
281 ///
282 /// # Examples
283 ///
284 /// ```rust
285 /// # use half::prelude::*;
286 ///
287 /// let nan = f16::NAN;
288 /// let f = f16::from_f32(7.0_f32);
289 ///
290 /// assert!(nan.is_nan());
291 /// assert!(!f.is_nan());
292 /// ```
293 #[inline]
294 #[must_use]
295 pub const fn is_nan(self) -> bool {
296 self.0 & 0x7FFFu16 > 0x7C00u16
297 }
298
299 /// Returns `true` if this value is ±∞ and `false`.
300 /// otherwise.
301 ///
302 /// # Examples
303 ///
304 /// ```rust
305 /// # use half::prelude::*;
306 ///
307 /// let f = f16::from_f32(7.0f32);
308 /// let inf = f16::INFINITY;
309 /// let neg_inf = f16::NEG_INFINITY;
310 /// let nan = f16::NAN;
311 ///
312 /// assert!(!f.is_infinite());
313 /// assert!(!nan.is_infinite());
314 ///
315 /// assert!(inf.is_infinite());
316 /// assert!(neg_inf.is_infinite());
317 /// ```
318 #[inline]
319 #[must_use]
320 pub const fn is_infinite(self) -> bool {
321 self.0 & 0x7FFFu16 == 0x7C00u16
322 }
323
324 /// Returns `true` if this number is neither infinite nor `NaN`.
325 ///
326 /// # Examples
327 ///
328 /// ```rust
329 /// # use half::prelude::*;
330 ///
331 /// let f = f16::from_f32(7.0f32);
332 /// let inf = f16::INFINITY;
333 /// let neg_inf = f16::NEG_INFINITY;
334 /// let nan = f16::NAN;
335 ///
336 /// assert!(f.is_finite());
337 ///
338 /// assert!(!nan.is_finite());
339 /// assert!(!inf.is_finite());
340 /// assert!(!neg_inf.is_finite());
341 /// ```
342 #[inline]
343 #[must_use]
344 pub const fn is_finite(self) -> bool {
345 self.0 & 0x7C00u16 != 0x7C00u16
346 }
347
348 /// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`.
349 ///
350 /// # Examples
351 ///
352 /// ```rust
353 /// # use half::prelude::*;
354 ///
355 /// let min = f16::MIN_POSITIVE;
356 /// let max = f16::MAX;
357 /// let lower_than_min = f16::from_f32(1.0e-10_f32);
358 /// let zero = f16::from_f32(0.0_f32);
359 ///
360 /// assert!(min.is_normal());
361 /// assert!(max.is_normal());
362 ///
363 /// assert!(!zero.is_normal());
364 /// assert!(!f16::NAN.is_normal());
365 /// assert!(!f16::INFINITY.is_normal());
366 /// // Values between `0` and `min` are Subnormal.
367 /// assert!(!lower_than_min.is_normal());
368 /// ```
369 #[inline]
370 #[must_use]
371 pub const fn is_normal(self) -> bool {
372 let exp = self.0 & 0x7C00u16;
373 exp != 0x7C00u16 && exp != 0
374 }
375
376 /// Returns the floating point category of the number.
377 ///
378 /// If only one property is going to be tested, it is generally faster to use the specific
379 /// predicate instead.
380 ///
381 /// # Examples
382 ///
383 /// ```rust
384 /// use std::num::FpCategory;
385 /// # use half::prelude::*;
386 ///
387 /// let num = f16::from_f32(12.4_f32);
388 /// let inf = f16::INFINITY;
389 ///
390 /// assert_eq!(num.classify(), FpCategory::Normal);
391 /// assert_eq!(inf.classify(), FpCategory::Infinite);
392 /// ```
393 #[must_use]
394 pub const fn classify(self) -> FpCategory {
395 let exp = self.0 & 0x7C00u16;
396 let man = self.0 & 0x03FFu16;
397 match (exp, man) {
398 (0, 0) => FpCategory::Zero,
399 (0, _) => FpCategory::Subnormal,
400 (0x7C00u16, 0) => FpCategory::Infinite,
401 (0x7C00u16, _) => FpCategory::Nan,
402 _ => FpCategory::Normal,
403 }
404 }
405
406 /// Returns a number that represents the sign of `self`.
407 ///
408 /// * `1.0` if the number is positive, `+0.0` or [`INFINITY`][f16::INFINITY]
409 /// * `-1.0` if the number is negative, `-0.0` or [`NEG_INFINITY`][f16::NEG_INFINITY]
410 /// * [`NAN`][f16::NAN] if the number is `NaN`
411 ///
412 /// # Examples
413 ///
414 /// ```rust
415 /// # use half::prelude::*;
416 ///
417 /// let f = f16::from_f32(3.5_f32);
418 ///
419 /// assert_eq!(f.signum(), f16::from_f32(1.0));
420 /// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-1.0));
421 ///
422 /// assert!(f16::NAN.signum().is_nan());
423 /// ```
424 #[must_use]
425 pub const fn signum(self) -> f16 {
426 if self.is_nan() {
427 self
428 } else if self.0 & 0x8000u16 != 0 {
429 Self::NEG_ONE
430 } else {
431 Self::ONE
432 }
433 }
434
435 /// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a
436 /// positive sign bit and +∞.
437 ///
438 /// # Examples
439 ///
440 /// ```rust
441 /// # use half::prelude::*;
442 ///
443 /// let nan = f16::NAN;
444 /// let f = f16::from_f32(7.0_f32);
445 /// let g = f16::from_f32(-7.0_f32);
446 ///
447 /// assert!(f.is_sign_positive());
448 /// assert!(!g.is_sign_positive());
449 /// // `NaN` can be either positive or negative
450 /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
451 /// ```
452 #[inline]
453 #[must_use]
454 pub const fn is_sign_positive(self) -> bool {
455 self.0 & 0x8000u16 == 0
456 }
457
458 /// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a
459 /// negative sign bit and −∞.
460 ///
461 /// # Examples
462 ///
463 /// ```rust
464 /// # use half::prelude::*;
465 ///
466 /// let nan = f16::NAN;
467 /// let f = f16::from_f32(7.0f32);
468 /// let g = f16::from_f32(-7.0f32);
469 ///
470 /// assert!(!f.is_sign_negative());
471 /// assert!(g.is_sign_negative());
472 /// // `NaN` can be either positive or negative
473 /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
474 /// ```
475 #[inline]
476 #[must_use]
477 pub const fn is_sign_negative(self) -> bool {
478 self.0 & 0x8000u16 != 0
479 }
480
481 /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
482 ///
483 /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
484 /// If `self` is NaN, then NaN with the sign of `sign` is returned.
485 ///
486 /// # Examples
487 ///
488 /// ```
489 /// # use half::prelude::*;
490 /// let f = f16::from_f32(3.5);
491 ///
492 /// assert_eq!(f.copysign(f16::from_f32(0.42)), f16::from_f32(3.5));
493 /// assert_eq!(f.copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5));
494 /// assert_eq!((-f).copysign(f16::from_f32(0.42)), f16::from_f32(3.5));
495 /// assert_eq!((-f).copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5));
496 ///
497 /// assert!(f16::NAN.copysign(f16::from_f32(1.0)).is_nan());
498 /// ```
499 #[inline]
500 #[must_use]
501 pub const fn copysign(self, sign: f16) -> f16 {
502 f16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16))
503 }
504
505 /// Returns the maximum of the two numbers.
506 ///
507 /// If one of the arguments is NaN, then the other argument is returned.
508 ///
509 /// # Examples
510 ///
511 /// ```
512 /// # use half::prelude::*;
513 /// let x = f16::from_f32(1.0);
514 /// let y = f16::from_f32(2.0);
515 ///
516 /// assert_eq!(x.max(y), y);
517 /// ```
518 #[inline]
519 #[must_use]
520 pub fn max(self, other: f16) -> f16 {
521 if other > self && !other.is_nan() {
522 other
523 } else {
524 self
525 }
526 }
527
528 /// Returns the minimum of the two numbers.
529 ///
530 /// If one of the arguments is NaN, then the other argument is returned.
531 ///
532 /// # Examples
533 ///
534 /// ```
535 /// # use half::prelude::*;
536 /// let x = f16::from_f32(1.0);
537 /// let y = f16::from_f32(2.0);
538 ///
539 /// assert_eq!(x.min(y), x);
540 /// ```
541 #[inline]
542 #[must_use]
543 pub fn min(self, other: f16) -> f16 {
544 if other < self && !other.is_nan() {
545 other
546 } else {
547 self
548 }
549 }
550
551 /// Restrict a value to a certain interval unless it is NaN.
552 ///
553 /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
554 /// Otherwise this returns `self`.
555 ///
556 /// Note that this function returns NaN if the initial value was NaN as well.
557 ///
558 /// # Panics
559 /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
560 ///
561 /// # Examples
562 ///
563 /// ```
564 /// # use half::prelude::*;
565 /// assert!(f16::from_f32(-3.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(-2.0));
566 /// assert!(f16::from_f32(0.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(0.0));
567 /// assert!(f16::from_f32(2.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(1.0));
568 /// assert!(f16::NAN.clamp(f16::from_f32(-2.0), f16::from_f32(1.0)).is_nan());
569 /// ```
570 #[inline]
571 #[must_use]
572 pub fn clamp(self, min: f16, max: f16) -> f16 {
573 assert!(min <= max);
574 let mut x = self;
575 if x < min {
576 x = min;
577 }
578 if x > max {
579 x = max;
580 }
581 x
582 }
583
584 /// Returns the ordering between `self` and `other`.
585 ///
586 /// Unlike the standard partial comparison between floating point numbers,
587 /// this comparison always produces an ordering in accordance to
588 /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
589 /// floating point standard. The values are ordered in the following sequence:
590 ///
591 /// - negative quiet NaN
592 /// - negative signaling NaN
593 /// - negative infinity
594 /// - negative numbers
595 /// - negative subnormal numbers
596 /// - negative zero
597 /// - positive zero
598 /// - positive subnormal numbers
599 /// - positive numbers
600 /// - positive infinity
601 /// - positive signaling NaN
602 /// - positive quiet NaN.
603 ///
604 /// The ordering established by this function does not always agree with the
605 /// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example,
606 /// they consider negative and positive zero equal, while `total_cmp`
607 /// doesn't.
608 ///
609 /// The interpretation of the signaling NaN bit follows the definition in
610 /// the IEEE 754 standard, which may not match the interpretation by some of
611 /// the older, non-conformant (e.g. MIPS) hardware implementations.
612 ///
613 /// # Examples
614 /// ```
615 /// # use half::f16;
616 /// let mut v: Vec<f16> = vec![];
617 /// v.push(f16::ONE);
618 /// v.push(f16::INFINITY);
619 /// v.push(f16::NEG_INFINITY);
620 /// v.push(f16::NAN);
621 /// v.push(f16::MAX_SUBNORMAL);
622 /// v.push(-f16::MAX_SUBNORMAL);
623 /// v.push(f16::ZERO);
624 /// v.push(f16::NEG_ZERO);
625 /// v.push(f16::NEG_ONE);
626 /// v.push(f16::MIN_POSITIVE);
627 ///
628 /// v.sort_by(|a, b| a.total_cmp(&b));
629 ///
630 /// assert!(v
631 /// .into_iter()
632 /// .zip(
633 /// [
634 /// f16::NEG_INFINITY,
635 /// f16::NEG_ONE,
636 /// -f16::MAX_SUBNORMAL,
637 /// f16::NEG_ZERO,
638 /// f16::ZERO,
639 /// f16::MAX_SUBNORMAL,
640 /// f16::MIN_POSITIVE,
641 /// f16::ONE,
642 /// f16::INFINITY,
643 /// f16::NAN
644 /// ]
645 /// .iter()
646 /// )
647 /// .all(|(a, b)| a.to_bits() == b.to_bits()));
648 /// ```
649 // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
650 #[inline]
651 #[must_use]
652 pub fn total_cmp(&self, other: &Self) -> Ordering {
653 let mut left = self.to_bits() as i16;
654 let mut right = other.to_bits() as i16;
655 left ^= (((left >> 15) as u16) >> 1) as i16;
656 right ^= (((right >> 15) as u16) >> 1) as i16;
657 left.cmp(&right)
658 }
659
660 /// Alternate serialize adapter for serializing as a float.
661 ///
662 /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
663 /// implementation that serializes as an [`f32`] value. It is designed for use with
664 /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
665 /// the default deserialize implementation.
666 ///
667 /// # Examples
668 ///
669 /// A demonstration on how to use this adapater:
670 ///
671 /// ```
672 /// use serde::{Serialize, Deserialize};
673 /// use half::f16;
674 ///
675 /// #[derive(Serialize, Deserialize)]
676 /// struct MyStruct {
677 /// #[serde(serialize_with = "f16::serialize_as_f32")]
678 /// value: f16 // Will be serialized as f32 instead of u16
679 /// }
680 /// ```
681 #[cfg(feature = "serde")]
682 pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
683 serializer.serialize_f32(self.to_f32())
684 }
685
686 /// Alternate serialize adapter for serializing as a string.
687 ///
688 /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
689 /// implementation that serializes as a string value. It is designed for use with
690 /// `serialize_with` serde attributes. Deserialization from string values is already supported
691 /// by the default deserialize implementation.
692 ///
693 /// # Examples
694 ///
695 /// A demonstration on how to use this adapater:
696 ///
697 /// ```
698 /// use serde::{Serialize, Deserialize};
699 /// use half::f16;
700 ///
701 /// #[derive(Serialize, Deserialize)]
702 /// struct MyStruct {
703 /// #[serde(serialize_with = "f16::serialize_as_string")]
704 /// value: f16 // Will be serialized as a string instead of u16
705 /// }
706 /// ```
707 #[cfg(all(feature = "serde", feature = "alloc"))]
708 pub fn serialize_as_string<S: serde::Serializer>(
709 &self,
710 serializer: S,
711 ) -> Result<S::Ok, S::Error> {
712 serializer.serialize_str(&self.to_string())
713 }
714
715 /// Approximate number of [`f16`] significant digits in base 10
716 pub const DIGITS: u32 = 3;
717 /// [`f16`]
718 /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
719 ///
720 /// This is the difference between 1.0 and the next largest representable number.
721 pub const EPSILON: f16 = f16(0x1400u16);
722 /// [`f16`] positive Infinity (+∞)
723 pub const INFINITY: f16 = f16(0x7C00u16);
724 /// Number of [`f16`] significant digits in base 2
725 pub const MANTISSA_DIGITS: u32 = 11;
726 /// Largest finite [`f16`] value
727 pub const MAX: f16 = f16(0x7BFF);
728 /// Maximum possible [`f16`] power of 10 exponent
729 pub const MAX_10_EXP: i32 = 4;
730 /// Maximum possible [`f16`] power of 2 exponent
731 pub const MAX_EXP: i32 = 16;
732 /// Smallest finite [`f16`] value
733 pub const MIN: f16 = f16(0xFBFF);
734 /// Minimum possible normal [`f16`] power of 10 exponent
735 pub const MIN_10_EXP: i32 = -4;
736 /// One greater than the minimum possible normal [`f16`] power of 2 exponent
737 pub const MIN_EXP: i32 = -13;
738 /// Smallest positive normal [`f16`] value
739 pub const MIN_POSITIVE: f16 = f16(0x0400u16);
740 /// [`f16`] Not a Number (NaN)
741 pub const NAN: f16 = f16(0x7E00u16);
742 /// [`f16`] negative infinity (-∞)
743 pub const NEG_INFINITY: f16 = f16(0xFC00u16);
744 /// The radix or base of the internal representation of [`f16`]
745 pub const RADIX: u32 = 2;
746
747 /// Minimum positive subnormal [`f16`] value
748 pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(0x0001u16);
749 /// Maximum subnormal [`f16`] value
750 pub const MAX_SUBNORMAL: f16 = f16(0x03FFu16);
751
752 /// [`f16`] 1
753 pub const ONE: f16 = f16(0x3C00u16);
754 /// [`f16`] 0
755 pub const ZERO: f16 = f16(0x0000u16);
756 /// [`f16`] -0
757 pub const NEG_ZERO: f16 = f16(0x8000u16);
758 /// [`f16`] -1
759 pub const NEG_ONE: f16 = f16(0xBC00u16);
760
761 /// [`f16`] Euler's number (ℯ)
762 pub const E: f16 = f16(0x4170u16);
763 /// [`f16`] Archimedes' constant (π)
764 pub const PI: f16 = f16(0x4248u16);
765 /// [`f16`] 1/π
766 pub const FRAC_1_PI: f16 = f16(0x3518u16);
767 /// [`f16`] 1/√2
768 pub const FRAC_1_SQRT_2: f16 = f16(0x39A8u16);
769 /// [`f16`] 2/π
770 pub const FRAC_2_PI: f16 = f16(0x3918u16);
771 /// [`f16`] 2/√π
772 pub const FRAC_2_SQRT_PI: f16 = f16(0x3C83u16);
773 /// [`f16`] π/2
774 pub const FRAC_PI_2: f16 = f16(0x3E48u16);
775 /// [`f16`] π/3
776 pub const FRAC_PI_3: f16 = f16(0x3C30u16);
777 /// [`f16`] π/4
778 pub const FRAC_PI_4: f16 = f16(0x3A48u16);
779 /// [`f16`] π/6
780 pub const FRAC_PI_6: f16 = f16(0x3830u16);
781 /// [`f16`] π/8
782 pub const FRAC_PI_8: f16 = f16(0x3648u16);
783 /// [`f16`] 𝗅𝗇 10
784 pub const LN_10: f16 = f16(0x409Bu16);
785 /// [`f16`] 𝗅𝗇 2
786 pub const LN_2: f16 = f16(0x398Cu16);
787 /// [`f16`] 𝗅𝗈𝗀₁₀ℯ
788 pub const LOG10_E: f16 = f16(0x36F3u16);
789 /// [`f16`] 𝗅𝗈𝗀₁₀2
790 pub const LOG10_2: f16 = f16(0x34D1u16);
791 /// [`f16`] 𝗅𝗈𝗀₂ℯ
792 pub const LOG2_E: f16 = f16(0x3DC5u16);
793 /// [`f16`] 𝗅𝗈𝗀₂10
794 pub const LOG2_10: f16 = f16(0x42A5u16);
795 /// [`f16`] √2
796 pub const SQRT_2: f16 = f16(0x3DA8u16);
797}
798
799impl From<f16> for f32 {
800 #[inline]
801 fn from(x: f16) -> f32 {
802 x.to_f32()
803 }
804}
805
806impl From<f16> for f64 {
807 #[inline]
808 fn from(x: f16) -> f64 {
809 x.to_f64()
810 }
811}
812
813impl From<i8> for f16 {
814 #[inline]
815 fn from(x: i8) -> f16 {
816 // Convert to f32, then to f16
817 f16::from_f32(f32::from(x))
818 }
819}
820
821impl From<u8> for f16 {
822 #[inline]
823 fn from(x: u8) -> f16 {
824 // Convert to f32, then to f16
825 f16::from_f32(f32::from(x))
826 }
827}
828
829impl PartialEq for f16 {
830 fn eq(&self, other: &f16) -> bool {
831 if self.is_nan() || other.is_nan() {
832 false
833 } else {
834 (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0)
835 }
836 }
837}
838
839impl PartialOrd for f16 {
840 fn partial_cmp(&self, other: &f16) -> Option<Ordering> {
841 if self.is_nan() || other.is_nan() {
842 None
843 } else {
844 let neg = self.0 & 0x8000u16 != 0;
845 let other_neg = other.0 & 0x8000u16 != 0;
846 match (neg, other_neg) {
847 (false, false) => Some(self.0.cmp(&other.0)),
848 (false, true) => {
849 if (self.0 | other.0) & 0x7FFFu16 == 0 {
850 Some(Ordering::Equal)
851 } else {
852 Some(Ordering::Greater)
853 }
854 }
855 (true, false) => {
856 if (self.0 | other.0) & 0x7FFFu16 == 0 {
857 Some(Ordering::Equal)
858 } else {
859 Some(Ordering::Less)
860 }
861 }
862 (true, true) => Some(other.0.cmp(&self.0)),
863 }
864 }
865 }
866
867 fn lt(&self, other: &f16) -> bool {
868 if self.is_nan() || other.is_nan() {
869 false
870 } else {
871 let neg = self.0 & 0x8000u16 != 0;
872 let other_neg = other.0 & 0x8000u16 != 0;
873 match (neg, other_neg) {
874 (false, false) => self.0 < other.0,
875 (false, true) => false,
876 (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0,
877 (true, true) => self.0 > other.0,
878 }
879 }
880 }
881
882 fn le(&self, other: &f16) -> bool {
883 if self.is_nan() || other.is_nan() {
884 false
885 } else {
886 let neg = self.0 & 0x8000u16 != 0;
887 let other_neg = other.0 & 0x8000u16 != 0;
888 match (neg, other_neg) {
889 (false, false) => self.0 <= other.0,
890 (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0,
891 (true, false) => true,
892 (true, true) => self.0 >= other.0,
893 }
894 }
895 }
896
897 fn gt(&self, other: &f16) -> bool {
898 if self.is_nan() || other.is_nan() {
899 false
900 } else {
901 let neg = self.0 & 0x8000u16 != 0;
902 let other_neg = other.0 & 0x8000u16 != 0;
903 match (neg, other_neg) {
904 (false, false) => self.0 > other.0,
905 (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0,
906 (true, false) => false,
907 (true, true) => self.0 < other.0,
908 }
909 }
910 }
911
912 fn ge(&self, other: &f16) -> bool {
913 if self.is_nan() || other.is_nan() {
914 false
915 } else {
916 let neg = self.0 & 0x8000u16 != 0;
917 let other_neg = other.0 & 0x8000u16 != 0;
918 match (neg, other_neg) {
919 (false, false) => self.0 >= other.0,
920 (false, true) => true,
921 (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0,
922 (true, true) => self.0 <= other.0,
923 }
924 }
925 }
926}
927
928#[cfg(not(target_arch = "spirv"))]
929impl FromStr for f16 {
930 type Err = ParseFloatError;
931 fn from_str(src: &str) -> Result<f16, ParseFloatError> {
932 f32::from_str(src).map(op:f16::from_f32)
933 }
934}
935
936#[cfg(not(target_arch = "spirv"))]
937impl Debug for f16 {
938 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
939 Debug::fmt(&self.to_f32(), f)
940 }
941}
942
943#[cfg(not(target_arch = "spirv"))]
944impl Display for f16 {
945 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
946 Display::fmt(&self.to_f32(), f)
947 }
948}
949
950#[cfg(not(target_arch = "spirv"))]
951impl LowerExp for f16 {
952 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
953 write!(f, "{:e}", self.to_f32())
954 }
955}
956
957#[cfg(not(target_arch = "spirv"))]
958impl UpperExp for f16 {
959 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
960 write!(f, "{:E}", self.to_f32())
961 }
962}
963
964#[cfg(not(target_arch = "spirv"))]
965impl Binary for f16 {
966 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
967 write!(f, "{:b}", self.0)
968 }
969}
970
971#[cfg(not(target_arch = "spirv"))]
972impl Octal for f16 {
973 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
974 write!(f, "{:o}", self.0)
975 }
976}
977
978#[cfg(not(target_arch = "spirv"))]
979impl LowerHex for f16 {
980 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
981 write!(f, "{:x}", self.0)
982 }
983}
984
985#[cfg(not(target_arch = "spirv"))]
986impl UpperHex for f16 {
987 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
988 write!(f, "{:X}", self.0)
989 }
990}
991
992impl Neg for f16 {
993 type Output = Self;
994
995 #[inline]
996 fn neg(self) -> Self::Output {
997 Self(self.0 ^ 0x8000)
998 }
999}
1000
1001impl Neg for &f16 {
1002 type Output = <f16 as Neg>::Output;
1003
1004 #[inline]
1005 fn neg(self) -> Self::Output {
1006 Neg::neg(*self)
1007 }
1008}
1009
1010impl Add for f16 {
1011 type Output = Self;
1012
1013 #[inline]
1014 fn add(self, rhs: Self) -> Self::Output {
1015 f16(arch::add_f16(self.0, b:rhs.0))
1016 }
1017}
1018
1019impl Add<&f16> for f16 {
1020 type Output = <f16 as Add<f16>>::Output;
1021
1022 #[inline]
1023 fn add(self, rhs: &f16) -> Self::Output {
1024 self.add(*rhs)
1025 }
1026}
1027
1028impl Add<&f16> for &f16 {
1029 type Output = <f16 as Add<f16>>::Output;
1030
1031 #[inline]
1032 fn add(self, rhs: &f16) -> Self::Output {
1033 (*self).add(*rhs)
1034 }
1035}
1036
1037impl Add<f16> for &f16 {
1038 type Output = <f16 as Add<f16>>::Output;
1039
1040 #[inline]
1041 fn add(self, rhs: f16) -> Self::Output {
1042 (*self).add(rhs)
1043 }
1044}
1045
1046impl AddAssign for f16 {
1047 #[inline]
1048 fn add_assign(&mut self, rhs: Self) {
1049 *self = (*self).add(rhs);
1050 }
1051}
1052
1053impl AddAssign<&f16> for f16 {
1054 #[inline]
1055 fn add_assign(&mut self, rhs: &f16) {
1056 *self = (*self).add(rhs);
1057 }
1058}
1059
1060impl Sub for f16 {
1061 type Output = Self;
1062
1063 #[inline]
1064 fn sub(self, rhs: Self) -> Self::Output {
1065 f16(arch::subtract_f16(self.0, b:rhs.0))
1066 }
1067}
1068
1069impl Sub<&f16> for f16 {
1070 type Output = <f16 as Sub<f16>>::Output;
1071
1072 #[inline]
1073 fn sub(self, rhs: &f16) -> Self::Output {
1074 self.sub(*rhs)
1075 }
1076}
1077
1078impl Sub<&f16> for &f16 {
1079 type Output = <f16 as Sub<f16>>::Output;
1080
1081 #[inline]
1082 fn sub(self, rhs: &f16) -> Self::Output {
1083 (*self).sub(*rhs)
1084 }
1085}
1086
1087impl Sub<f16> for &f16 {
1088 type Output = <f16 as Sub<f16>>::Output;
1089
1090 #[inline]
1091 fn sub(self, rhs: f16) -> Self::Output {
1092 (*self).sub(rhs)
1093 }
1094}
1095
1096impl SubAssign for f16 {
1097 #[inline]
1098 fn sub_assign(&mut self, rhs: Self) {
1099 *self = (*self).sub(rhs);
1100 }
1101}
1102
1103impl SubAssign<&f16> for f16 {
1104 #[inline]
1105 fn sub_assign(&mut self, rhs: &f16) {
1106 *self = (*self).sub(rhs);
1107 }
1108}
1109
1110impl Mul for f16 {
1111 type Output = Self;
1112
1113 #[inline]
1114 fn mul(self, rhs: Self) -> Self::Output {
1115 f16(arch::multiply_f16(self.0, b:rhs.0))
1116 }
1117}
1118
1119impl Mul<&f16> for f16 {
1120 type Output = <f16 as Mul<f16>>::Output;
1121
1122 #[inline]
1123 fn mul(self, rhs: &f16) -> Self::Output {
1124 self.mul(*rhs)
1125 }
1126}
1127
1128impl Mul<&f16> for &f16 {
1129 type Output = <f16 as Mul<f16>>::Output;
1130
1131 #[inline]
1132 fn mul(self, rhs: &f16) -> Self::Output {
1133 (*self).mul(*rhs)
1134 }
1135}
1136
1137impl Mul<f16> for &f16 {
1138 type Output = <f16 as Mul<f16>>::Output;
1139
1140 #[inline]
1141 fn mul(self, rhs: f16) -> Self::Output {
1142 (*self).mul(rhs)
1143 }
1144}
1145
1146impl MulAssign for f16 {
1147 #[inline]
1148 fn mul_assign(&mut self, rhs: Self) {
1149 *self = (*self).mul(rhs);
1150 }
1151}
1152
1153impl MulAssign<&f16> for f16 {
1154 #[inline]
1155 fn mul_assign(&mut self, rhs: &f16) {
1156 *self = (*self).mul(rhs);
1157 }
1158}
1159
1160impl Div for f16 {
1161 type Output = Self;
1162
1163 #[inline]
1164 fn div(self, rhs: Self) -> Self::Output {
1165 f16(arch::divide_f16(self.0, b:rhs.0))
1166 }
1167}
1168
1169impl Div<&f16> for f16 {
1170 type Output = <f16 as Div<f16>>::Output;
1171
1172 #[inline]
1173 fn div(self, rhs: &f16) -> Self::Output {
1174 self.div(*rhs)
1175 }
1176}
1177
1178impl Div<&f16> for &f16 {
1179 type Output = <f16 as Div<f16>>::Output;
1180
1181 #[inline]
1182 fn div(self, rhs: &f16) -> Self::Output {
1183 (*self).div(*rhs)
1184 }
1185}
1186
1187impl Div<f16> for &f16 {
1188 type Output = <f16 as Div<f16>>::Output;
1189
1190 #[inline]
1191 fn div(self, rhs: f16) -> Self::Output {
1192 (*self).div(rhs)
1193 }
1194}
1195
1196impl DivAssign for f16 {
1197 #[inline]
1198 fn div_assign(&mut self, rhs: Self) {
1199 *self = (*self).div(rhs);
1200 }
1201}
1202
1203impl DivAssign<&f16> for f16 {
1204 #[inline]
1205 fn div_assign(&mut self, rhs: &f16) {
1206 *self = (*self).div(rhs);
1207 }
1208}
1209
1210impl Rem for f16 {
1211 type Output = Self;
1212
1213 #[inline]
1214 fn rem(self, rhs: Self) -> Self::Output {
1215 f16(arch::remainder_f16(self.0, b:rhs.0))
1216 }
1217}
1218
1219impl Rem<&f16> for f16 {
1220 type Output = <f16 as Rem<f16>>::Output;
1221
1222 #[inline]
1223 fn rem(self, rhs: &f16) -> Self::Output {
1224 self.rem(*rhs)
1225 }
1226}
1227
1228impl Rem<&f16> for &f16 {
1229 type Output = <f16 as Rem<f16>>::Output;
1230
1231 #[inline]
1232 fn rem(self, rhs: &f16) -> Self::Output {
1233 (*self).rem(*rhs)
1234 }
1235}
1236
1237impl Rem<f16> for &f16 {
1238 type Output = <f16 as Rem<f16>>::Output;
1239
1240 #[inline]
1241 fn rem(self, rhs: f16) -> Self::Output {
1242 (*self).rem(rhs)
1243 }
1244}
1245
1246impl RemAssign for f16 {
1247 #[inline]
1248 fn rem_assign(&mut self, rhs: Self) {
1249 *self = (*self).rem(rhs);
1250 }
1251}
1252
1253impl RemAssign<&f16> for f16 {
1254 #[inline]
1255 fn rem_assign(&mut self, rhs: &f16) {
1256 *self = (*self).rem(rhs);
1257 }
1258}
1259
1260impl Product for f16 {
1261 #[inline]
1262 fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1263 f16(arch::product_f16(iter:iter.map(|f: f16| f.to_bits())))
1264 }
1265}
1266
1267impl<'a> Product<&'a f16> for f16 {
1268 #[inline]
1269 fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1270 f16(arch::product_f16(iter:iter.map(|f: &f16| f.to_bits())))
1271 }
1272}
1273
1274impl Sum for f16 {
1275 #[inline]
1276 fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1277 f16(arch::sum_f16(iter:iter.map(|f: f16| f.to_bits())))
1278 }
1279}
1280
1281impl<'a> Sum<&'a f16> for f16 {
1282 #[inline]
1283 fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1284 f16(arch::sum_f16(iter:iter.map(|f: &f16| f.to_bits())))
1285 }
1286}
1287
1288#[cfg(feature = "serde")]
1289struct Visitor;
1290
1291#[cfg(feature = "serde")]
1292impl<'de> Deserialize<'de> for f16 {
1293 fn deserialize<D>(deserializer: D) -> Result<f16, D::Error>
1294 where
1295 D: serde::de::Deserializer<'de>,
1296 {
1297 deserializer.deserialize_newtype_struct("f16", Visitor)
1298 }
1299}
1300
1301#[cfg(feature = "serde")]
1302impl<'de> serde::de::Visitor<'de> for Visitor {
1303 type Value = f16;
1304
1305 fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1306 write!(formatter, "tuple struct f16")
1307 }
1308
1309 fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1310 where
1311 D: serde::Deserializer<'de>,
1312 {
1313 Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?))
1314 }
1315
1316 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1317 where
1318 E: serde::de::Error,
1319 {
1320 v.parse().map_err(|_| {
1321 serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1322 })
1323 }
1324
1325 fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1326 where
1327 E: serde::de::Error,
1328 {
1329 Ok(f16::from_f32(v))
1330 }
1331
1332 fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1333 where
1334 E: serde::de::Error,
1335 {
1336 Ok(f16::from_f64(v))
1337 }
1338}
1339
1340#[allow(
1341 clippy::cognitive_complexity,
1342 clippy::float_cmp,
1343 clippy::neg_cmp_op_on_partial_ord
1344)]
1345#[cfg(test)]
1346mod test {
1347 use super::*;
1348 #[allow(unused_imports)]
1349 use core::cmp::Ordering;
1350 #[cfg(feature = "num-traits")]
1351 use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive};
1352 use quickcheck_macros::quickcheck;
1353
1354 #[cfg(feature = "num-traits")]
1355 #[test]
1356 fn as_primitive() {
1357 let two = f16::from_f32(2.0);
1358 assert_eq!(<i32 as AsPrimitive<f16>>::as_(2), two);
1359 assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), 2);
1360
1361 assert_eq!(<f32 as AsPrimitive<f16>>::as_(2.0), two);
1362 assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), 2.0);
1363
1364 assert_eq!(<f64 as AsPrimitive<f16>>::as_(2.0), two);
1365 assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), 2.0);
1366 }
1367
1368 #[cfg(feature = "num-traits")]
1369 #[test]
1370 fn to_primitive() {
1371 let two = f16::from_f32(2.0);
1372 assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32);
1373 assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32);
1374 assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64);
1375 }
1376
1377 #[cfg(feature = "num-traits")]
1378 #[test]
1379 fn from_primitive() {
1380 let two = f16::from_f32(2.0);
1381 assert_eq!(<f16 as FromPrimitive>::from_i32(2).unwrap(), two);
1382 assert_eq!(<f16 as FromPrimitive>::from_f32(2.0).unwrap(), two);
1383 assert_eq!(<f16 as FromPrimitive>::from_f64(2.0).unwrap(), two);
1384 }
1385
1386 #[test]
1387 fn test_f16_consts() {
1388 // DIGITS
1389 let digits = ((f16::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32;
1390 assert_eq!(f16::DIGITS, digits);
1391 // sanity check to show test is good
1392 let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32;
1393 assert_eq!(core::f32::DIGITS, digits32);
1394
1395 // EPSILON
1396 let one = f16::from_f32(1.0);
1397 let one_plus_epsilon = f16::from_bits(one.to_bits() + 1);
1398 let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - 1.0);
1399 assert_eq!(f16::EPSILON, epsilon);
1400 // sanity check to show test is good
1401 let one_plus_epsilon32 = f32::from_bits(1.0f32.to_bits() + 1);
1402 let epsilon32 = one_plus_epsilon32 - 1f32;
1403 assert_eq!(core::f32::EPSILON, epsilon32);
1404
1405 // MAX, MIN and MIN_POSITIVE
1406 let max = f16::from_bits(f16::INFINITY.to_bits() - 1);
1407 let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - 1);
1408 let min_pos = f16::from_f32(2f32.powi(f16::MIN_EXP - 1));
1409 assert_eq!(f16::MAX, max);
1410 assert_eq!(f16::MIN, min);
1411 assert_eq!(f16::MIN_POSITIVE, min_pos);
1412 // sanity check to show test is good
1413 let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - 1);
1414 let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - 1);
1415 let min_pos32 = 2f32.powi(core::f32::MIN_EXP - 1);
1416 assert_eq!(core::f32::MAX, max32);
1417 assert_eq!(core::f32::MIN, min32);
1418 assert_eq!(core::f32::MIN_POSITIVE, min_pos32);
1419
1420 // MIN_10_EXP and MAX_10_EXP
1421 let ten_to_min = 10f32.powi(f16::MIN_10_EXP);
1422 assert!(ten_to_min / 10.0 < f16::MIN_POSITIVE.to_f32());
1423 assert!(ten_to_min > f16::MIN_POSITIVE.to_f32());
1424 let ten_to_max = 10f32.powi(f16::MAX_10_EXP);
1425 assert!(ten_to_max < f16::MAX.to_f32());
1426 assert!(ten_to_max * 10.0 > f16::MAX.to_f32());
1427 // sanity check to show test is good
1428 let ten_to_min32 = 10f64.powi(core::f32::MIN_10_EXP);
1429 assert!(ten_to_min32 / 10.0 < f64::from(core::f32::MIN_POSITIVE));
1430 assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE));
1431 let ten_to_max32 = 10f64.powi(core::f32::MAX_10_EXP);
1432 assert!(ten_to_max32 < f64::from(core::f32::MAX));
1433 assert!(ten_to_max32 * 10.0 > f64::from(core::f32::MAX));
1434 }
1435
1436 #[test]
1437 fn test_f16_consts_from_f32() {
1438 let one = f16::from_f32(1.0);
1439 let zero = f16::from_f32(0.0);
1440 let neg_zero = f16::from_f32(-0.0);
1441 let neg_one = f16::from_f32(-1.0);
1442 let inf = f16::from_f32(core::f32::INFINITY);
1443 let neg_inf = f16::from_f32(core::f32::NEG_INFINITY);
1444 let nan = f16::from_f32(core::f32::NAN);
1445
1446 assert_eq!(f16::ONE, one);
1447 assert_eq!(f16::ZERO, zero);
1448 assert!(zero.is_sign_positive());
1449 assert_eq!(f16::NEG_ZERO, neg_zero);
1450 assert!(neg_zero.is_sign_negative());
1451 assert_eq!(f16::NEG_ONE, neg_one);
1452 assert!(neg_one.is_sign_negative());
1453 assert_eq!(f16::INFINITY, inf);
1454 assert_eq!(f16::NEG_INFINITY, neg_inf);
1455 assert!(nan.is_nan());
1456 assert!(f16::NAN.is_nan());
1457
1458 let e = f16::from_f32(core::f32::consts::E);
1459 let pi = f16::from_f32(core::f32::consts::PI);
1460 let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI);
1461 let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1462 let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI);
1463 let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1464 let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2);
1465 let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3);
1466 let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4);
1467 let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6);
1468 let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8);
1469 let ln_10 = f16::from_f32(core::f32::consts::LN_10);
1470 let ln_2 = f16::from_f32(core::f32::consts::LN_2);
1471 let log10_e = f16::from_f32(core::f32::consts::LOG10_E);
1472 // core::f32::consts::LOG10_2 requires rustc 1.43.0
1473 let log10_2 = f16::from_f32(2f32.log10());
1474 let log2_e = f16::from_f32(core::f32::consts::LOG2_E);
1475 // core::f32::consts::LOG2_10 requires rustc 1.43.0
1476 let log2_10 = f16::from_f32(10f32.log2());
1477 let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2);
1478
1479 assert_eq!(f16::E, e);
1480 assert_eq!(f16::PI, pi);
1481 assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1482 assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1483 assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1484 assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1485 assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1486 assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1487 assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1488 assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1489 assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1490 assert_eq!(f16::LN_10, ln_10);
1491 assert_eq!(f16::LN_2, ln_2);
1492 assert_eq!(f16::LOG10_E, log10_e);
1493 assert_eq!(f16::LOG10_2, log10_2);
1494 assert_eq!(f16::LOG2_E, log2_e);
1495 assert_eq!(f16::LOG2_10, log2_10);
1496 assert_eq!(f16::SQRT_2, sqrt_2);
1497 }
1498
1499 #[test]
1500 fn test_f16_consts_from_f64() {
1501 let one = f16::from_f64(1.0);
1502 let zero = f16::from_f64(0.0);
1503 let neg_zero = f16::from_f64(-0.0);
1504 let inf = f16::from_f64(core::f64::INFINITY);
1505 let neg_inf = f16::from_f64(core::f64::NEG_INFINITY);
1506 let nan = f16::from_f64(core::f64::NAN);
1507
1508 assert_eq!(f16::ONE, one);
1509 assert_eq!(f16::ZERO, zero);
1510 assert!(zero.is_sign_positive());
1511 assert_eq!(f16::NEG_ZERO, neg_zero);
1512 assert!(neg_zero.is_sign_negative());
1513 assert_eq!(f16::INFINITY, inf);
1514 assert_eq!(f16::NEG_INFINITY, neg_inf);
1515 assert!(nan.is_nan());
1516 assert!(f16::NAN.is_nan());
1517
1518 let e = f16::from_f64(core::f64::consts::E);
1519 let pi = f16::from_f64(core::f64::consts::PI);
1520 let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI);
1521 let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1522 let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI);
1523 let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1524 let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2);
1525 let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3);
1526 let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4);
1527 let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6);
1528 let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8);
1529 let ln_10 = f16::from_f64(core::f64::consts::LN_10);
1530 let ln_2 = f16::from_f64(core::f64::consts::LN_2);
1531 let log10_e = f16::from_f64(core::f64::consts::LOG10_E);
1532 // core::f64::consts::LOG10_2 requires rustc 1.43.0
1533 let log10_2 = f16::from_f64(2f64.log10());
1534 let log2_e = f16::from_f64(core::f64::consts::LOG2_E);
1535 // core::f64::consts::LOG2_10 requires rustc 1.43.0
1536 let log2_10 = f16::from_f64(10f64.log2());
1537 let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2);
1538
1539 assert_eq!(f16::E, e);
1540 assert_eq!(f16::PI, pi);
1541 assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1542 assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1543 assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1544 assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1545 assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1546 assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1547 assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1548 assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1549 assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1550 assert_eq!(f16::LN_10, ln_10);
1551 assert_eq!(f16::LN_2, ln_2);
1552 assert_eq!(f16::LOG10_E, log10_e);
1553 assert_eq!(f16::LOG10_2, log10_2);
1554 assert_eq!(f16::LOG2_E, log2_e);
1555 assert_eq!(f16::LOG2_10, log2_10);
1556 assert_eq!(f16::SQRT_2, sqrt_2);
1557 }
1558
1559 #[test]
1560 fn test_nan_conversion_to_smaller() {
1561 let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64);
1562 let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64);
1563 let nan32 = f32::from_bits(0x7F80_0001u32);
1564 let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1565 let nan32_from_64 = nan64 as f32;
1566 let neg_nan32_from_64 = neg_nan64 as f32;
1567 let nan16_from_64 = f16::from_f64(nan64);
1568 let neg_nan16_from_64 = f16::from_f64(neg_nan64);
1569 let nan16_from_32 = f16::from_f32(nan32);
1570 let neg_nan16_from_32 = f16::from_f32(neg_nan32);
1571
1572 assert!(nan64.is_nan() && nan64.is_sign_positive());
1573 assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1574 assert!(nan32.is_nan() && nan32.is_sign_positive());
1575 assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1576
1577 // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1578 assert!(nan32_from_64.is_nan());
1579 assert!(neg_nan32_from_64.is_nan());
1580 assert!(nan16_from_64.is_nan());
1581 assert!(neg_nan16_from_64.is_nan());
1582 assert!(nan16_from_32.is_nan());
1583 assert!(neg_nan16_from_32.is_nan());
1584 }
1585
1586 #[test]
1587 fn test_nan_conversion_to_larger() {
1588 let nan16 = f16::from_bits(0x7C01u16);
1589 let neg_nan16 = f16::from_bits(0xFC01u16);
1590 let nan32 = f32::from_bits(0x7F80_0001u32);
1591 let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1592 let nan32_from_16 = f32::from(nan16);
1593 let neg_nan32_from_16 = f32::from(neg_nan16);
1594 let nan64_from_16 = f64::from(nan16);
1595 let neg_nan64_from_16 = f64::from(neg_nan16);
1596 let nan64_from_32 = f64::from(nan32);
1597 let neg_nan64_from_32 = f64::from(neg_nan32);
1598
1599 assert!(nan16.is_nan() && nan16.is_sign_positive());
1600 assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1601 assert!(nan32.is_nan() && nan32.is_sign_positive());
1602 assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1603
1604 // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1605 assert!(nan32_from_16.is_nan());
1606 assert!(neg_nan32_from_16.is_nan());
1607 assert!(nan64_from_16.is_nan());
1608 assert!(neg_nan64_from_16.is_nan());
1609 assert!(nan64_from_32.is_nan());
1610 assert!(neg_nan64_from_32.is_nan());
1611 }
1612
1613 #[test]
1614 fn test_f16_to_f32() {
1615 let f = f16::from_f32(7.0);
1616 assert_eq!(f.to_f32(), 7.0f32);
1617
1618 // 7.1 is NOT exactly representable in 16-bit, it's rounded
1619 let f = f16::from_f32(7.1);
1620 let diff = (f.to_f32() - 7.1f32).abs();
1621 // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1622 assert!(diff <= 4.0 * f16::EPSILON.to_f32());
1623
1624 assert_eq!(f16::from_bits(0x0000_0001).to_f32(), 2.0f32.powi(-24));
1625 assert_eq!(f16::from_bits(0x0000_0005).to_f32(), 5.0 * 2.0f32.powi(-24));
1626
1627 assert_eq!(f16::from_bits(0x0000_0001), f16::from_f32(2.0f32.powi(-24)));
1628 assert_eq!(
1629 f16::from_bits(0x0000_0005),
1630 f16::from_f32(5.0 * 2.0f32.powi(-24))
1631 );
1632 }
1633
1634 #[test]
1635 fn test_f16_to_f64() {
1636 let f = f16::from_f64(7.0);
1637 assert_eq!(f.to_f64(), 7.0f64);
1638
1639 // 7.1 is NOT exactly representable in 16-bit, it's rounded
1640 let f = f16::from_f64(7.1);
1641 let diff = (f.to_f64() - 7.1f64).abs();
1642 // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1643 assert!(diff <= 4.0 * f16::EPSILON.to_f64());
1644
1645 assert_eq!(f16::from_bits(0x0000_0001).to_f64(), 2.0f64.powi(-24));
1646 assert_eq!(f16::from_bits(0x0000_0005).to_f64(), 5.0 * 2.0f64.powi(-24));
1647
1648 assert_eq!(f16::from_bits(0x0000_0001), f16::from_f64(2.0f64.powi(-24)));
1649 assert_eq!(
1650 f16::from_bits(0x0000_0005),
1651 f16::from_f64(5.0 * 2.0f64.powi(-24))
1652 );
1653 }
1654
1655 #[test]
1656 fn test_comparisons() {
1657 let zero = f16::from_f64(0.0);
1658 let one = f16::from_f64(1.0);
1659 let neg_zero = f16::from_f64(-0.0);
1660 let neg_one = f16::from_f64(-1.0);
1661
1662 assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1663 assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1664 assert!(zero == neg_zero);
1665 assert!(neg_zero == zero);
1666 assert!(!(zero != neg_zero));
1667 assert!(!(neg_zero != zero));
1668 assert!(!(zero < neg_zero));
1669 assert!(!(neg_zero < zero));
1670 assert!(zero <= neg_zero);
1671 assert!(neg_zero <= zero);
1672 assert!(!(zero > neg_zero));
1673 assert!(!(neg_zero > zero));
1674 assert!(zero >= neg_zero);
1675 assert!(neg_zero >= zero);
1676
1677 assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1678 assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1679 assert!(!(one == neg_zero));
1680 assert!(!(neg_zero == one));
1681 assert!(one != neg_zero);
1682 assert!(neg_zero != one);
1683 assert!(!(one < neg_zero));
1684 assert!(neg_zero < one);
1685 assert!(!(one <= neg_zero));
1686 assert!(neg_zero <= one);
1687 assert!(one > neg_zero);
1688 assert!(!(neg_zero > one));
1689 assert!(one >= neg_zero);
1690 assert!(!(neg_zero >= one));
1691
1692 assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1693 assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1694 assert!(!(one == neg_one));
1695 assert!(!(neg_one == one));
1696 assert!(one != neg_one);
1697 assert!(neg_one != one);
1698 assert!(!(one < neg_one));
1699 assert!(neg_one < one);
1700 assert!(!(one <= neg_one));
1701 assert!(neg_one <= one);
1702 assert!(one > neg_one);
1703 assert!(!(neg_one > one));
1704 assert!(one >= neg_one);
1705 assert!(!(neg_one >= one));
1706 }
1707
1708 #[test]
1709 #[allow(clippy::erasing_op, clippy::identity_op)]
1710 fn round_to_even_f32() {
1711 // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24
1712 let min_sub = f16::from_bits(1);
1713 let min_sub_f = (-24f32).exp2();
1714 assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1715 assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1716
1717 // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1718 // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1719 // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1720 assert_eq!(
1721 f16::from_f32(min_sub_f * 0.49).to_bits(),
1722 min_sub.to_bits() * 0
1723 );
1724 assert_eq!(
1725 f16::from_f32(min_sub_f * 0.50).to_bits(),
1726 min_sub.to_bits() * 0
1727 );
1728 assert_eq!(
1729 f16::from_f32(min_sub_f * 0.51).to_bits(),
1730 min_sub.to_bits() * 1
1731 );
1732
1733 // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1734 // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1735 // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1736 assert_eq!(
1737 f16::from_f32(min_sub_f * 1.49).to_bits(),
1738 min_sub.to_bits() * 1
1739 );
1740 assert_eq!(
1741 f16::from_f32(min_sub_f * 1.50).to_bits(),
1742 min_sub.to_bits() * 2
1743 );
1744 assert_eq!(
1745 f16::from_f32(min_sub_f * 1.51).to_bits(),
1746 min_sub.to_bits() * 2
1747 );
1748
1749 // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1750 // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1751 // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1752 assert_eq!(
1753 f16::from_f32(min_sub_f * 2.49).to_bits(),
1754 min_sub.to_bits() * 2
1755 );
1756 assert_eq!(
1757 f16::from_f32(min_sub_f * 2.50).to_bits(),
1758 min_sub.to_bits() * 2
1759 );
1760 assert_eq!(
1761 f16::from_f32(min_sub_f * 2.51).to_bits(),
1762 min_sub.to_bits() * 3
1763 );
1764
1765 assert_eq!(
1766 f16::from_f32(2000.49f32).to_bits(),
1767 f16::from_f32(2000.0).to_bits()
1768 );
1769 assert_eq!(
1770 f16::from_f32(2000.50f32).to_bits(),
1771 f16::from_f32(2000.0).to_bits()
1772 );
1773 assert_eq!(
1774 f16::from_f32(2000.51f32).to_bits(),
1775 f16::from_f32(2001.0).to_bits()
1776 );
1777 assert_eq!(
1778 f16::from_f32(2001.49f32).to_bits(),
1779 f16::from_f32(2001.0).to_bits()
1780 );
1781 assert_eq!(
1782 f16::from_f32(2001.50f32).to_bits(),
1783 f16::from_f32(2002.0).to_bits()
1784 );
1785 assert_eq!(
1786 f16::from_f32(2001.51f32).to_bits(),
1787 f16::from_f32(2002.0).to_bits()
1788 );
1789 assert_eq!(
1790 f16::from_f32(2002.49f32).to_bits(),
1791 f16::from_f32(2002.0).to_bits()
1792 );
1793 assert_eq!(
1794 f16::from_f32(2002.50f32).to_bits(),
1795 f16::from_f32(2002.0).to_bits()
1796 );
1797 assert_eq!(
1798 f16::from_f32(2002.51f32).to_bits(),
1799 f16::from_f32(2003.0).to_bits()
1800 );
1801 }
1802
1803 #[test]
1804 #[allow(clippy::erasing_op, clippy::identity_op)]
1805 fn round_to_even_f64() {
1806 // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24
1807 let min_sub = f16::from_bits(1);
1808 let min_sub_f = (-24f64).exp2();
1809 assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1810 assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1811
1812 // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1813 // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1814 // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1815 assert_eq!(
1816 f16::from_f64(min_sub_f * 0.49).to_bits(),
1817 min_sub.to_bits() * 0
1818 );
1819 assert_eq!(
1820 f16::from_f64(min_sub_f * 0.50).to_bits(),
1821 min_sub.to_bits() * 0
1822 );
1823 assert_eq!(
1824 f16::from_f64(min_sub_f * 0.51).to_bits(),
1825 min_sub.to_bits() * 1
1826 );
1827
1828 // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1829 // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1830 // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1831 assert_eq!(
1832 f16::from_f64(min_sub_f * 1.49).to_bits(),
1833 min_sub.to_bits() * 1
1834 );
1835 assert_eq!(
1836 f16::from_f64(min_sub_f * 1.50).to_bits(),
1837 min_sub.to_bits() * 2
1838 );
1839 assert_eq!(
1840 f16::from_f64(min_sub_f * 1.51).to_bits(),
1841 min_sub.to_bits() * 2
1842 );
1843
1844 // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1845 // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1846 // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1847 assert_eq!(
1848 f16::from_f64(min_sub_f * 2.49).to_bits(),
1849 min_sub.to_bits() * 2
1850 );
1851 assert_eq!(
1852 f16::from_f64(min_sub_f * 2.50).to_bits(),
1853 min_sub.to_bits() * 2
1854 );
1855 assert_eq!(
1856 f16::from_f64(min_sub_f * 2.51).to_bits(),
1857 min_sub.to_bits() * 3
1858 );
1859
1860 assert_eq!(
1861 f16::from_f64(2000.49f64).to_bits(),
1862 f16::from_f64(2000.0).to_bits()
1863 );
1864 assert_eq!(
1865 f16::from_f64(2000.50f64).to_bits(),
1866 f16::from_f64(2000.0).to_bits()
1867 );
1868 assert_eq!(
1869 f16::from_f64(2000.51f64).to_bits(),
1870 f16::from_f64(2001.0).to_bits()
1871 );
1872 assert_eq!(
1873 f16::from_f64(2001.49f64).to_bits(),
1874 f16::from_f64(2001.0).to_bits()
1875 );
1876 assert_eq!(
1877 f16::from_f64(2001.50f64).to_bits(),
1878 f16::from_f64(2002.0).to_bits()
1879 );
1880 assert_eq!(
1881 f16::from_f64(2001.51f64).to_bits(),
1882 f16::from_f64(2002.0).to_bits()
1883 );
1884 assert_eq!(
1885 f16::from_f64(2002.49f64).to_bits(),
1886 f16::from_f64(2002.0).to_bits()
1887 );
1888 assert_eq!(
1889 f16::from_f64(2002.50f64).to_bits(),
1890 f16::from_f64(2002.0).to_bits()
1891 );
1892 assert_eq!(
1893 f16::from_f64(2002.51f64).to_bits(),
1894 f16::from_f64(2003.0).to_bits()
1895 );
1896 }
1897
1898 #[test]
1899 fn arithmetic() {
1900 assert_eq!(f16::ONE + f16::ONE, f16::from_f32(2.));
1901 assert_eq!(f16::ONE - f16::ONE, f16::ZERO);
1902 assert_eq!(f16::ONE * f16::ONE, f16::ONE);
1903 assert_eq!(f16::from_f32(2.) * f16::from_f32(2.), f16::from_f32(4.));
1904 assert_eq!(f16::ONE / f16::ONE, f16::ONE);
1905 assert_eq!(f16::from_f32(4.) / f16::from_f32(2.), f16::from_f32(2.));
1906 assert_eq!(f16::from_f32(4.) % f16::from_f32(3.), f16::from_f32(1.));
1907 }
1908
1909 #[cfg(feature = "std")]
1910 #[test]
1911 fn formatting() {
1912 let f = f16::from_f32(0.1152344);
1913
1914 assert_eq!(format!("{:.3}", f), "0.115");
1915 assert_eq!(format!("{:.4}", f), "0.1152");
1916 assert_eq!(format!("{:+.4}", f), "+0.1152");
1917 assert_eq!(format!("{:>+10.4}", f), " +0.1152");
1918
1919 assert_eq!(format!("{:.3?}", f), "0.115");
1920 assert_eq!(format!("{:.4?}", f), "0.1152");
1921 assert_eq!(format!("{:+.4?}", f), "+0.1152");
1922 assert_eq!(format!("{:>+10.4?}", f), " +0.1152");
1923 }
1924
1925 impl quickcheck::Arbitrary for f16 {
1926 fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1927 f16(u16::arbitrary(g))
1928 }
1929 }
1930
1931 #[quickcheck]
1932 fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool {
1933 let roundtrip = f16::from_f32(f.to_f32());
1934 if f.is_nan() {
1935 roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1936 } else {
1937 f.0 == roundtrip.0
1938 }
1939 }
1940
1941 #[quickcheck]
1942 fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool {
1943 let roundtrip = f16::from_f64(f.to_f64());
1944 if f.is_nan() {
1945 roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1946 } else {
1947 f.0 == roundtrip.0
1948 }
1949 }
1950}
1951