1#[cfg(all(feature = "serde", feature = "alloc"))]
2#[allow(unused_imports)]
3use alloc::string::ToString;
4#[cfg(feature = "bytemuck")]
5use bytemuck::{Pod, Zeroable};
6use core::{
7 cmp::Ordering,
8 iter::{Product, Sum},
9 num::FpCategory,
10 ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11};
12#[cfg(not(target_arch = "spirv"))]
13use core::{
14 fmt::{
15 Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16 },
17 num::ParseFloatError,
18 str::FromStr,
19};
20#[cfg(feature = "serde")]
21use serde::{Deserialize, Serialize};
22#[cfg(feature = "zerocopy")]
23use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
24
25pub(crate) mod convert;
26
27/// A 16-bit floating point type implementing the [`bfloat16`] format.
28///
29/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard
30/// `binary32`, a.k.a [`f32`]. [`struct@bf16`] has approximately the same dynamic range as [`f32`] by
31/// having a lower precision than [`struct@f16`][crate::f16]. While [`struct@f16`][crate::f16] has a precision of
32/// 11 bits, [`struct@bf16`] has a precision of only 8 bits.
33///
34/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
35#[allow(non_camel_case_types)]
36#[derive(Clone, Copy, Default)]
37#[repr(transparent)]
38#[cfg_attr(feature = "serde", derive(Serialize))]
39#[cfg_attr(
40 feature = "rkyv",
41 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
42)]
43#[cfg_attr(feature = "rkyv", rkyv(resolver = Bf16Resolver))]
44#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
45#[cfg_attr(
46 feature = "zerocopy",
47 derive(FromBytes, Immutable, IntoBytes, KnownLayout)
48)]
49#[cfg_attr(kani, derive(kani::Arbitrary))]
50pub struct bf16(u16);
51
52impl bf16 {
53 /// Constructs a [`struct@bf16`] value from the raw bits.
54 #[inline]
55 #[must_use]
56 pub const fn from_bits(bits: u16) -> bf16 {
57 bf16(bits)
58 }
59
60 /// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
61 ///
62 /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
63 /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
64 /// other values are truncated and rounded to the nearest representable value.
65 #[inline]
66 #[must_use]
67 pub fn from_f32(value: f32) -> bf16 {
68 Self::from_f32_const(value)
69 }
70
71 /// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
72 ///
73 /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
74 /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
75 /// in any non-`const` context.
76 ///
77 /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
78 /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
79 /// other values are truncated and rounded to the nearest representable value.
80 #[inline]
81 #[must_use]
82 pub const fn from_f32_const(value: f32) -> bf16 {
83 bf16(convert::f32_to_bf16(value))
84 }
85
86 /// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
87 ///
88 /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
89 /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
90 /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
91 /// values are truncated and rounded to the nearest representable value.
92 #[inline]
93 #[must_use]
94 pub fn from_f64(value: f64) -> bf16 {
95 Self::from_f64_const(value)
96 }
97
98 /// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
99 ///
100 /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
101 /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
102 /// in any non-`const` context.
103 ///
104 /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
105 /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
106 /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
107 /// values are truncated and rounded to the nearest representable value.
108 #[inline]
109 #[must_use]
110 pub const fn from_f64_const(value: f64) -> bf16 {
111 bf16(convert::f64_to_bf16(value))
112 }
113
114 /// Converts a [`struct@bf16`] into the underlying bit representation.
115 #[inline]
116 #[must_use]
117 pub const fn to_bits(self) -> u16 {
118 self.0
119 }
120
121 /// Returns the memory representation of the underlying bit representation as a byte array in
122 /// little-endian byte order.
123 ///
124 /// # Examples
125 ///
126 /// ```rust
127 /// # use half::prelude::*;
128 /// let bytes = bf16::from_f32(12.5).to_le_bytes();
129 /// assert_eq!(bytes, [0x48, 0x41]);
130 /// ```
131 #[inline]
132 #[must_use]
133 pub const fn to_le_bytes(self) -> [u8; 2] {
134 self.0.to_le_bytes()
135 }
136
137 /// Returns the memory representation of the underlying bit representation as a byte array in
138 /// big-endian (network) byte order.
139 ///
140 /// # Examples
141 ///
142 /// ```rust
143 /// # use half::prelude::*;
144 /// let bytes = bf16::from_f32(12.5).to_be_bytes();
145 /// assert_eq!(bytes, [0x41, 0x48]);
146 /// ```
147 #[inline]
148 #[must_use]
149 pub const fn to_be_bytes(self) -> [u8; 2] {
150 self.0.to_be_bytes()
151 }
152
153 /// Returns the memory representation of the underlying bit representation as a byte array in
154 /// native byte order.
155 ///
156 /// As the target platform's native endianness is used, portable code should use
157 /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate,
158 /// instead.
159 ///
160 /// # Examples
161 ///
162 /// ```rust
163 /// # use half::prelude::*;
164 /// let bytes = bf16::from_f32(12.5).to_ne_bytes();
165 /// assert_eq!(bytes, if cfg!(target_endian = "big") {
166 /// [0x41, 0x48]
167 /// } else {
168 /// [0x48, 0x41]
169 /// });
170 /// ```
171 #[inline]
172 #[must_use]
173 pub const fn to_ne_bytes(self) -> [u8; 2] {
174 self.0.to_ne_bytes()
175 }
176
177 /// Creates a floating point value from its representation as a byte array in little endian.
178 ///
179 /// # Examples
180 ///
181 /// ```rust
182 /// # use half::prelude::*;
183 /// let value = bf16::from_le_bytes([0x48, 0x41]);
184 /// assert_eq!(value, bf16::from_f32(12.5));
185 /// ```
186 #[inline]
187 #[must_use]
188 pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 {
189 bf16::from_bits(u16::from_le_bytes(bytes))
190 }
191
192 /// Creates a floating point value from its representation as a byte array in big endian.
193 ///
194 /// # Examples
195 ///
196 /// ```rust
197 /// # use half::prelude::*;
198 /// let value = bf16::from_be_bytes([0x41, 0x48]);
199 /// assert_eq!(value, bf16::from_f32(12.5));
200 /// ```
201 #[inline]
202 #[must_use]
203 pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 {
204 bf16::from_bits(u16::from_be_bytes(bytes))
205 }
206
207 /// Creates a floating point value from its representation as a byte array in native endian.
208 ///
209 /// As the target platform's native endianness is used, portable code likely wants to use
210 /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as
211 /// appropriate instead.
212 ///
213 /// # Examples
214 ///
215 /// ```rust
216 /// # use half::prelude::*;
217 /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") {
218 /// [0x41, 0x48]
219 /// } else {
220 /// [0x48, 0x41]
221 /// });
222 /// assert_eq!(value, bf16::from_f32(12.5));
223 /// ```
224 #[inline]
225 #[must_use]
226 pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 {
227 bf16::from_bits(u16::from_ne_bytes(bytes))
228 }
229
230 /// Converts a [`struct@bf16`] value into an [`f32`] value.
231 ///
232 /// This conversion is lossless as all values can be represented exactly in [`f32`].
233 #[inline]
234 #[must_use]
235 pub fn to_f32(self) -> f32 {
236 self.to_f32_const()
237 }
238
239 /// Converts a [`struct@bf16`] value into an [`f32`] value.
240 ///
241 /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
242 /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
243 /// in any non-`const` context.
244 ///
245 /// This conversion is lossless as all values can be represented exactly in [`f32`].
246 #[inline]
247 #[must_use]
248 pub const fn to_f32_const(self) -> f32 {
249 convert::bf16_to_f32(self.0)
250 }
251
252 /// Converts a [`struct@bf16`] value into an [`f64`] value.
253 ///
254 /// This conversion is lossless as all values can be represented exactly in [`f64`].
255 #[inline]
256 #[must_use]
257 pub fn to_f64(self) -> f64 {
258 self.to_f64_const()
259 }
260
261 /// Converts a [`struct@bf16`] value into an [`f64`] value.
262 ///
263 /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
264 /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
265 /// in any non-`const` context.
266 ///
267 /// This conversion is lossless as all values can be represented exactly in [`f64`].
268 #[inline]
269 #[must_use]
270 pub const fn to_f64_const(self) -> f64 {
271 convert::bf16_to_f64(self.0)
272 }
273
274 /// Returns `true` if this value is NaN and `false` otherwise.
275 ///
276 /// # Examples
277 ///
278 /// ```rust
279 /// # use half::prelude::*;
280 ///
281 /// let nan = bf16::NAN;
282 /// let f = bf16::from_f32(7.0_f32);
283 ///
284 /// assert!(nan.is_nan());
285 /// assert!(!f.is_nan());
286 /// ```
287 #[inline]
288 #[must_use]
289 pub const fn is_nan(self) -> bool {
290 self.0 & 0x7FFFu16 > 0x7F80u16
291 }
292
293 /// Returns `true` if this value is ±∞ and `false` otherwise.
294 ///
295 /// # Examples
296 ///
297 /// ```rust
298 /// # use half::prelude::*;
299 ///
300 /// let f = bf16::from_f32(7.0f32);
301 /// let inf = bf16::INFINITY;
302 /// let neg_inf = bf16::NEG_INFINITY;
303 /// let nan = bf16::NAN;
304 ///
305 /// assert!(!f.is_infinite());
306 /// assert!(!nan.is_infinite());
307 ///
308 /// assert!(inf.is_infinite());
309 /// assert!(neg_inf.is_infinite());
310 /// ```
311 #[inline]
312 #[must_use]
313 pub const fn is_infinite(self) -> bool {
314 self.0 & 0x7FFFu16 == 0x7F80u16
315 }
316
317 /// Returns `true` if this number is neither infinite nor NaN.
318 ///
319 /// # Examples
320 ///
321 /// ```rust
322 /// # use half::prelude::*;
323 ///
324 /// let f = bf16::from_f32(7.0f32);
325 /// let inf = bf16::INFINITY;
326 /// let neg_inf = bf16::NEG_INFINITY;
327 /// let nan = bf16::NAN;
328 ///
329 /// assert!(f.is_finite());
330 ///
331 /// assert!(!nan.is_finite());
332 /// assert!(!inf.is_finite());
333 /// assert!(!neg_inf.is_finite());
334 /// ```
335 #[inline]
336 #[must_use]
337 pub const fn is_finite(self) -> bool {
338 self.0 & 0x7F80u16 != 0x7F80u16
339 }
340
341 /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN.
342 ///
343 /// # Examples
344 ///
345 /// ```rust
346 /// # use half::prelude::*;
347 ///
348 /// let min = bf16::MIN_POSITIVE;
349 /// let max = bf16::MAX;
350 /// let lower_than_min = bf16::from_f32(1.0e-39_f32);
351 /// let zero = bf16::from_f32(0.0_f32);
352 ///
353 /// assert!(min.is_normal());
354 /// assert!(max.is_normal());
355 ///
356 /// assert!(!zero.is_normal());
357 /// assert!(!bf16::NAN.is_normal());
358 /// assert!(!bf16::INFINITY.is_normal());
359 /// // Values between 0 and `min` are subnormal.
360 /// assert!(!lower_than_min.is_normal());
361 /// ```
362 #[inline]
363 #[must_use]
364 pub const fn is_normal(self) -> bool {
365 let exp = self.0 & 0x7F80u16;
366 exp != 0x7F80u16 && exp != 0
367 }
368
369 /// Returns the floating point category of the number.
370 ///
371 /// If only one property is going to be tested, it is generally faster to use the specific
372 /// predicate instead.
373 ///
374 /// # Examples
375 ///
376 /// ```rust
377 /// use std::num::FpCategory;
378 /// # use half::prelude::*;
379 ///
380 /// let num = bf16::from_f32(12.4_f32);
381 /// let inf = bf16::INFINITY;
382 ///
383 /// assert_eq!(num.classify(), FpCategory::Normal);
384 /// assert_eq!(inf.classify(), FpCategory::Infinite);
385 /// ```
386 #[must_use]
387 pub const fn classify(self) -> FpCategory {
388 let exp = self.0 & 0x7F80u16;
389 let man = self.0 & 0x007Fu16;
390 match (exp, man) {
391 (0, 0) => FpCategory::Zero,
392 (0, _) => FpCategory::Subnormal,
393 (0x7F80u16, 0) => FpCategory::Infinite,
394 (0x7F80u16, _) => FpCategory::Nan,
395 _ => FpCategory::Normal,
396 }
397 }
398
399 /// Returns a number that represents the sign of `self`.
400 ///
401 /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY]
402 /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY]
403 /// * [`NAN`][bf16::NAN] if the number is NaN
404 ///
405 /// # Examples
406 ///
407 /// ```rust
408 /// # use half::prelude::*;
409 ///
410 /// let f = bf16::from_f32(3.5_f32);
411 ///
412 /// assert_eq!(f.signum(), bf16::from_f32(1.0));
413 /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0));
414 ///
415 /// assert!(bf16::NAN.signum().is_nan());
416 /// ```
417 #[must_use]
418 pub const fn signum(self) -> bf16 {
419 if self.is_nan() {
420 self
421 } else if self.0 & 0x8000u16 != 0 {
422 Self::NEG_ONE
423 } else {
424 Self::ONE
425 }
426 }
427
428 /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a
429 /// positive sign bit and +∞.
430 ///
431 /// # Examples
432 ///
433 /// ```rust
434 /// # use half::prelude::*;
435 ///
436 /// let nan = bf16::NAN;
437 /// let f = bf16::from_f32(7.0_f32);
438 /// let g = bf16::from_f32(-7.0_f32);
439 ///
440 /// assert!(f.is_sign_positive());
441 /// assert!(!g.is_sign_positive());
442 /// // NaN can be either positive or negative
443 /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
444 /// ```
445 #[inline]
446 #[must_use]
447 pub const fn is_sign_positive(self) -> bool {
448 self.0 & 0x8000u16 == 0
449 }
450
451 /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a
452 /// negative sign bit and −∞.
453 ///
454 /// # Examples
455 ///
456 /// ```rust
457 /// # use half::prelude::*;
458 ///
459 /// let nan = bf16::NAN;
460 /// let f = bf16::from_f32(7.0f32);
461 /// let g = bf16::from_f32(-7.0f32);
462 ///
463 /// assert!(!f.is_sign_negative());
464 /// assert!(g.is_sign_negative());
465 /// // NaN can be either positive or negative
466 /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
467 /// ```
468 #[inline]
469 #[must_use]
470 pub const fn is_sign_negative(self) -> bool {
471 self.0 & 0x8000u16 != 0
472 }
473
474 /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
475 ///
476 /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
477 /// If `self` is NaN, then NaN with the sign of `sign` is returned.
478 ///
479 /// # Examples
480 ///
481 /// ```
482 /// # use half::prelude::*;
483 /// let f = bf16::from_f32(3.5);
484 ///
485 /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
486 /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
487 /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
488 /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
489 ///
490 /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan());
491 /// ```
492 #[inline]
493 #[must_use]
494 pub const fn copysign(self, sign: bf16) -> bf16 {
495 bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16))
496 }
497
498 /// Returns the maximum of the two numbers.
499 ///
500 /// If one of the arguments is NaN, then the other argument is returned.
501 ///
502 /// # Examples
503 ///
504 /// ```
505 /// # use half::prelude::*;
506 /// let x = bf16::from_f32(1.0);
507 /// let y = bf16::from_f32(2.0);
508 ///
509 /// assert_eq!(x.max(y), y);
510 /// ```
511 #[inline]
512 #[must_use]
513 pub fn max(self, other: bf16) -> bf16 {
514 if other > self && !other.is_nan() {
515 other
516 } else {
517 self
518 }
519 }
520
521 /// Returns the minimum of the two numbers.
522 ///
523 /// If one of the arguments is NaN, then the other argument is returned.
524 ///
525 /// # Examples
526 ///
527 /// ```
528 /// # use half::prelude::*;
529 /// let x = bf16::from_f32(1.0);
530 /// let y = bf16::from_f32(2.0);
531 ///
532 /// assert_eq!(x.min(y), x);
533 /// ```
534 #[inline]
535 #[must_use]
536 pub fn min(self, other: bf16) -> bf16 {
537 if other < self && !other.is_nan() {
538 other
539 } else {
540 self
541 }
542 }
543
544 /// Restrict a value to a certain interval unless it is NaN.
545 ///
546 /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
547 /// Otherwise this returns `self`.
548 ///
549 /// Note that this function returns NaN if the initial value was NaN as well.
550 ///
551 /// # Panics
552 /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
553 ///
554 /// # Examples
555 ///
556 /// ```
557 /// # use half::prelude::*;
558 /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0));
559 /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0));
560 /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0));
561 /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan());
562 /// ```
563 #[inline]
564 #[must_use]
565 pub fn clamp(self, min: bf16, max: bf16) -> bf16 {
566 assert!(min <= max);
567 let mut x = self;
568 if x < min {
569 x = min;
570 }
571 if x > max {
572 x = max;
573 }
574 x
575 }
576
577 /// Returns the ordering between `self` and `other`.
578 ///
579 /// Unlike the standard partial comparison between floating point numbers,
580 /// this comparison always produces an ordering in accordance to
581 /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
582 /// floating point standard. The values are ordered in the following sequence:
583 ///
584 /// - negative quiet NaN
585 /// - negative signaling NaN
586 /// - negative infinity
587 /// - negative numbers
588 /// - negative subnormal numbers
589 /// - negative zero
590 /// - positive zero
591 /// - positive subnormal numbers
592 /// - positive numbers
593 /// - positive infinity
594 /// - positive signaling NaN
595 /// - positive quiet NaN.
596 ///
597 /// The ordering established by this function does not always agree with the
598 /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example,
599 /// they consider negative and positive zero equal, while `total_cmp`
600 /// doesn't.
601 ///
602 /// The interpretation of the signaling NaN bit follows the definition in
603 /// the IEEE 754 standard, which may not match the interpretation by some of
604 /// the older, non-conformant (e.g. MIPS) hardware implementations.
605 ///
606 /// # Examples
607 /// ```
608 /// # use half::bf16;
609 /// let mut v: Vec<bf16> = vec![];
610 /// v.push(bf16::ONE);
611 /// v.push(bf16::INFINITY);
612 /// v.push(bf16::NEG_INFINITY);
613 /// v.push(bf16::NAN);
614 /// v.push(bf16::MAX_SUBNORMAL);
615 /// v.push(-bf16::MAX_SUBNORMAL);
616 /// v.push(bf16::ZERO);
617 /// v.push(bf16::NEG_ZERO);
618 /// v.push(bf16::NEG_ONE);
619 /// v.push(bf16::MIN_POSITIVE);
620 ///
621 /// v.sort_by(|a, b| a.total_cmp(&b));
622 ///
623 /// assert!(v
624 /// .into_iter()
625 /// .zip(
626 /// [
627 /// bf16::NEG_INFINITY,
628 /// bf16::NEG_ONE,
629 /// -bf16::MAX_SUBNORMAL,
630 /// bf16::NEG_ZERO,
631 /// bf16::ZERO,
632 /// bf16::MAX_SUBNORMAL,
633 /// bf16::MIN_POSITIVE,
634 /// bf16::ONE,
635 /// bf16::INFINITY,
636 /// bf16::NAN
637 /// ]
638 /// .iter()
639 /// )
640 /// .all(|(a, b)| a.to_bits() == b.to_bits()));
641 /// ```
642 // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
643 #[inline]
644 #[must_use]
645 pub fn total_cmp(&self, other: &Self) -> Ordering {
646 let mut left = self.to_bits() as i16;
647 let mut right = other.to_bits() as i16;
648 left ^= (((left >> 15) as u16) >> 1) as i16;
649 right ^= (((right >> 15) as u16) >> 1) as i16;
650 left.cmp(&right)
651 }
652
653 /// Alternate serialize adapter for serializing as a float.
654 ///
655 /// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
656 /// implementation that serializes as an [`f32`] value. It is designed for use with
657 /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
658 /// the default deserialize implementation.
659 ///
660 /// # Examples
661 ///
662 /// A demonstration on how to use this adapater:
663 ///
664 /// ```
665 /// use serde::{Serialize, Deserialize};
666 /// use half::bf16;
667 ///
668 /// #[derive(Serialize, Deserialize)]
669 /// struct MyStruct {
670 /// #[serde(serialize_with = "bf16::serialize_as_f32")]
671 /// value: bf16 // Will be serialized as f32 instead of u16
672 /// }
673 /// ```
674 #[cfg(feature = "serde")]
675 pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
676 serializer.serialize_f32(self.to_f32())
677 }
678
679 /// Alternate serialize adapter for serializing as a string.
680 ///
681 /// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
682 /// implementation that serializes as a string value. It is designed for use with
683 /// `serialize_with` serde attributes. Deserialization from string values is already supported
684 /// by the default deserialize implementation.
685 ///
686 /// # Examples
687 ///
688 /// A demonstration on how to use this adapater:
689 ///
690 /// ```
691 /// use serde::{Serialize, Deserialize};
692 /// use half::bf16;
693 ///
694 /// #[derive(Serialize, Deserialize)]
695 /// struct MyStruct {
696 /// #[serde(serialize_with = "bf16::serialize_as_string")]
697 /// value: bf16 // Will be serialized as a string instead of u16
698 /// }
699 /// ```
700 #[cfg(all(feature = "serde", feature = "alloc"))]
701 pub fn serialize_as_string<S: serde::Serializer>(
702 &self,
703 serializer: S,
704 ) -> Result<S::Ok, S::Error> {
705 serializer.serialize_str(&self.to_string())
706 }
707
708 /// Approximate number of [`struct@bf16`] significant digits in base 10
709 pub const DIGITS: u32 = 2;
710 /// [`struct@bf16`]
711 /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
712 ///
713 /// This is the difference between 1.0 and the next largest representable number.
714 pub const EPSILON: bf16 = bf16(0x3C00u16);
715 /// [`struct@bf16`] positive Infinity (+∞)
716 pub const INFINITY: bf16 = bf16(0x7F80u16);
717 /// Number of [`struct@bf16`] significant digits in base 2
718 pub const MANTISSA_DIGITS: u32 = 8;
719 /// Largest finite [`struct@bf16`] value
720 pub const MAX: bf16 = bf16(0x7F7F);
721 /// Maximum possible [`struct@bf16`] power of 10 exponent
722 pub const MAX_10_EXP: i32 = 38;
723 /// Maximum possible [`struct@bf16`] power of 2 exponent
724 pub const MAX_EXP: i32 = 128;
725 /// Smallest finite [`struct@bf16`] value
726 pub const MIN: bf16 = bf16(0xFF7F);
727 /// Minimum possible normal [`struct@bf16`] power of 10 exponent
728 pub const MIN_10_EXP: i32 = -37;
729 /// One greater than the minimum possible normal [`struct@bf16`] power of 2 exponent
730 pub const MIN_EXP: i32 = -125;
731 /// Smallest positive normal [`struct@bf16`] value
732 pub const MIN_POSITIVE: bf16 = bf16(0x0080u16);
733 /// [`struct@bf16`] Not a Number (NaN)
734 pub const NAN: bf16 = bf16(0x7FC0u16);
735 /// [`struct@bf16`] negative infinity (-∞).
736 pub const NEG_INFINITY: bf16 = bf16(0xFF80u16);
737 /// The radix or base of the internal representation of [`struct@bf16`]
738 pub const RADIX: u32 = 2;
739
740 /// Minimum positive subnormal [`struct@bf16`] value
741 pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16);
742 /// Maximum subnormal [`struct@bf16`] value
743 pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16);
744
745 /// [`struct@bf16`] 1
746 pub const ONE: bf16 = bf16(0x3F80u16);
747 /// [`struct@bf16`] 0
748 pub const ZERO: bf16 = bf16(0x0000u16);
749 /// [`struct@bf16`] -0
750 pub const NEG_ZERO: bf16 = bf16(0x8000u16);
751 /// [`struct@bf16`] -1
752 pub const NEG_ONE: bf16 = bf16(0xBF80u16);
753
754 /// [`struct@bf16`] Euler's number (ℯ)
755 pub const E: bf16 = bf16(0x402Eu16);
756 /// [`struct@bf16`] Archimedes' constant (π)
757 pub const PI: bf16 = bf16(0x4049u16);
758 /// [`struct@bf16`] 1/π
759 pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16);
760 /// [`struct@bf16`] 1/√2
761 pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16);
762 /// [`struct@bf16`] 2/π
763 pub const FRAC_2_PI: bf16 = bf16(0x3F23u16);
764 /// [`struct@bf16`] 2/√π
765 pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16);
766 /// [`struct@bf16`] π/2
767 pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16);
768 /// [`struct@bf16`] π/3
769 pub const FRAC_PI_3: bf16 = bf16(0x3F86u16);
770 /// [`struct@bf16`] π/4
771 pub const FRAC_PI_4: bf16 = bf16(0x3F49u16);
772 /// [`struct@bf16`] π/6
773 pub const FRAC_PI_6: bf16 = bf16(0x3F06u16);
774 /// [`struct@bf16`] π/8
775 pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16);
776 /// [`struct@bf16`] 𝗅𝗇 10
777 pub const LN_10: bf16 = bf16(0x4013u16);
778 /// [`struct@bf16`] 𝗅𝗇 2
779 pub const LN_2: bf16 = bf16(0x3F31u16);
780 /// [`struct@bf16`] 𝗅𝗈𝗀₁₀ℯ
781 pub const LOG10_E: bf16 = bf16(0x3EDEu16);
782 /// [`struct@bf16`] 𝗅𝗈𝗀₁₀2
783 pub const LOG10_2: bf16 = bf16(0x3E9Au16);
784 /// [`struct@bf16`] 𝗅𝗈𝗀₂ℯ
785 pub const LOG2_E: bf16 = bf16(0x3FB9u16);
786 /// [`struct@bf16`] 𝗅𝗈𝗀₂10
787 pub const LOG2_10: bf16 = bf16(0x4055u16);
788 /// [`struct@bf16`] √2
789 pub const SQRT_2: bf16 = bf16(0x3FB5u16);
790}
791
792impl From<bf16> for f32 {
793 #[inline]
794 fn from(x: bf16) -> f32 {
795 x.to_f32()
796 }
797}
798
799impl From<bf16> for f64 {
800 #[inline]
801 fn from(x: bf16) -> f64 {
802 x.to_f64()
803 }
804}
805
806impl From<i8> for bf16 {
807 #[inline]
808 fn from(x: i8) -> bf16 {
809 // Convert to f32, then to bf16
810 bf16::from_f32(f32::from(x))
811 }
812}
813
814impl From<u8> for bf16 {
815 #[inline]
816 fn from(x: u8) -> bf16 {
817 // Convert to f32, then to f16
818 bf16::from_f32(f32::from(x))
819 }
820}
821
822impl PartialEq for bf16 {
823 fn eq(&self, other: &bf16) -> bool {
824 if self.is_nan() || other.is_nan() {
825 false
826 } else {
827 (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0)
828 }
829 }
830}
831
832impl PartialOrd for bf16 {
833 fn partial_cmp(&self, other: &bf16) -> Option<Ordering> {
834 if self.is_nan() || other.is_nan() {
835 None
836 } else {
837 let neg = self.0 & 0x8000u16 != 0;
838 let other_neg = other.0 & 0x8000u16 != 0;
839 match (neg, other_neg) {
840 (false, false) => Some(self.0.cmp(&other.0)),
841 (false, true) => {
842 if (self.0 | other.0) & 0x7FFFu16 == 0 {
843 Some(Ordering::Equal)
844 } else {
845 Some(Ordering::Greater)
846 }
847 }
848 (true, false) => {
849 if (self.0 | other.0) & 0x7FFFu16 == 0 {
850 Some(Ordering::Equal)
851 } else {
852 Some(Ordering::Less)
853 }
854 }
855 (true, true) => Some(other.0.cmp(&self.0)),
856 }
857 }
858 }
859
860 fn lt(&self, other: &bf16) -> bool {
861 if self.is_nan() || other.is_nan() {
862 false
863 } else {
864 let neg = self.0 & 0x8000u16 != 0;
865 let other_neg = other.0 & 0x8000u16 != 0;
866 match (neg, other_neg) {
867 (false, false) => self.0 < other.0,
868 (false, true) => false,
869 (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0,
870 (true, true) => self.0 > other.0,
871 }
872 }
873 }
874
875 fn le(&self, other: &bf16) -> bool {
876 if self.is_nan() || other.is_nan() {
877 false
878 } else {
879 let neg = self.0 & 0x8000u16 != 0;
880 let other_neg = other.0 & 0x8000u16 != 0;
881 match (neg, other_neg) {
882 (false, false) => self.0 <= other.0,
883 (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0,
884 (true, false) => true,
885 (true, true) => self.0 >= other.0,
886 }
887 }
888 }
889
890 fn gt(&self, other: &bf16) -> bool {
891 if self.is_nan() || other.is_nan() {
892 false
893 } else {
894 let neg = self.0 & 0x8000u16 != 0;
895 let other_neg = other.0 & 0x8000u16 != 0;
896 match (neg, other_neg) {
897 (false, false) => self.0 > other.0,
898 (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0,
899 (true, false) => false,
900 (true, true) => self.0 < other.0,
901 }
902 }
903 }
904
905 fn ge(&self, other: &bf16) -> bool {
906 if self.is_nan() || other.is_nan() {
907 false
908 } else {
909 let neg = self.0 & 0x8000u16 != 0;
910 let other_neg = other.0 & 0x8000u16 != 0;
911 match (neg, other_neg) {
912 (false, false) => self.0 >= other.0,
913 (false, true) => true,
914 (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0,
915 (true, true) => self.0 <= other.0,
916 }
917 }
918 }
919}
920
921#[cfg(not(target_arch = "spirv"))]
922impl FromStr for bf16 {
923 type Err = ParseFloatError;
924 fn from_str(src: &str) -> Result<bf16, ParseFloatError> {
925 f32::from_str(src).map(op:bf16::from_f32)
926 }
927}
928
929#[cfg(not(target_arch = "spirv"))]
930impl Debug for bf16 {
931 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
932 Debug::fmt(&self.to_f32(), f)
933 }
934}
935
936#[cfg(not(target_arch = "spirv"))]
937impl Display for bf16 {
938 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
939 Display::fmt(&self.to_f32(), f)
940 }
941}
942
943#[cfg(not(target_arch = "spirv"))]
944impl LowerExp for bf16 {
945 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
946 write!(f, "{:e}", self.to_f32())
947 }
948}
949
950#[cfg(not(target_arch = "spirv"))]
951impl UpperExp for bf16 {
952 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
953 write!(f, "{:E}", self.to_f32())
954 }
955}
956
957#[cfg(not(target_arch = "spirv"))]
958impl Binary for bf16 {
959 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
960 write!(f, "{:b}", self.0)
961 }
962}
963
964#[cfg(not(target_arch = "spirv"))]
965impl Octal for bf16 {
966 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
967 write!(f, "{:o}", self.0)
968 }
969}
970
971#[cfg(not(target_arch = "spirv"))]
972impl LowerHex for bf16 {
973 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
974 write!(f, "{:x}", self.0)
975 }
976}
977
978#[cfg(not(target_arch = "spirv"))]
979impl UpperHex for bf16 {
980 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
981 write!(f, "{:X}", self.0)
982 }
983}
984
985impl Neg for bf16 {
986 type Output = Self;
987
988 fn neg(self) -> Self::Output {
989 Self(self.0 ^ 0x8000)
990 }
991}
992
993impl Neg for &bf16 {
994 type Output = <bf16 as Neg>::Output;
995
996 #[inline]
997 fn neg(self) -> Self::Output {
998 Neg::neg(*self)
999 }
1000}
1001
1002impl Add for bf16 {
1003 type Output = Self;
1004
1005 fn add(self, rhs: Self) -> Self::Output {
1006 Self::from_f32(Self::to_f32(self) + Self::to_f32(self:rhs))
1007 }
1008}
1009
1010impl Add<&bf16> for bf16 {
1011 type Output = <bf16 as Add<bf16>>::Output;
1012
1013 #[inline]
1014 fn add(self, rhs: &bf16) -> Self::Output {
1015 self.add(*rhs)
1016 }
1017}
1018
1019impl Add<&bf16> for &bf16 {
1020 type Output = <bf16 as Add<bf16>>::Output;
1021
1022 #[inline]
1023 fn add(self, rhs: &bf16) -> Self::Output {
1024 (*self).add(*rhs)
1025 }
1026}
1027
1028impl Add<bf16> for &bf16 {
1029 type Output = <bf16 as Add<bf16>>::Output;
1030
1031 #[inline]
1032 fn add(self, rhs: bf16) -> Self::Output {
1033 (*self).add(rhs)
1034 }
1035}
1036
1037impl AddAssign for bf16 {
1038 #[inline]
1039 fn add_assign(&mut self, rhs: Self) {
1040 *self = (*self).add(rhs);
1041 }
1042}
1043
1044impl AddAssign<&bf16> for bf16 {
1045 #[inline]
1046 fn add_assign(&mut self, rhs: &bf16) {
1047 *self = (*self).add(rhs);
1048 }
1049}
1050
1051impl Sub for bf16 {
1052 type Output = Self;
1053
1054 fn sub(self, rhs: Self) -> Self::Output {
1055 Self::from_f32(Self::to_f32(self) - Self::to_f32(self:rhs))
1056 }
1057}
1058
1059impl Sub<&bf16> for bf16 {
1060 type Output = <bf16 as Sub<bf16>>::Output;
1061
1062 #[inline]
1063 fn sub(self, rhs: &bf16) -> Self::Output {
1064 self.sub(*rhs)
1065 }
1066}
1067
1068impl Sub<&bf16> for &bf16 {
1069 type Output = <bf16 as Sub<bf16>>::Output;
1070
1071 #[inline]
1072 fn sub(self, rhs: &bf16) -> Self::Output {
1073 (*self).sub(*rhs)
1074 }
1075}
1076
1077impl Sub<bf16> for &bf16 {
1078 type Output = <bf16 as Sub<bf16>>::Output;
1079
1080 #[inline]
1081 fn sub(self, rhs: bf16) -> Self::Output {
1082 (*self).sub(rhs)
1083 }
1084}
1085
1086impl SubAssign for bf16 {
1087 #[inline]
1088 fn sub_assign(&mut self, rhs: Self) {
1089 *self = (*self).sub(rhs);
1090 }
1091}
1092
1093impl SubAssign<&bf16> for bf16 {
1094 #[inline]
1095 fn sub_assign(&mut self, rhs: &bf16) {
1096 *self = (*self).sub(rhs);
1097 }
1098}
1099
1100impl Mul for bf16 {
1101 type Output = Self;
1102
1103 fn mul(self, rhs: Self) -> Self::Output {
1104 Self::from_f32(Self::to_f32(self) * Self::to_f32(self:rhs))
1105 }
1106}
1107
1108impl Mul<&bf16> for bf16 {
1109 type Output = <bf16 as Mul<bf16>>::Output;
1110
1111 #[inline]
1112 fn mul(self, rhs: &bf16) -> Self::Output {
1113 self.mul(*rhs)
1114 }
1115}
1116
1117impl Mul<&bf16> for &bf16 {
1118 type Output = <bf16 as Mul<bf16>>::Output;
1119
1120 #[inline]
1121 fn mul(self, rhs: &bf16) -> Self::Output {
1122 (*self).mul(*rhs)
1123 }
1124}
1125
1126impl Mul<bf16> for &bf16 {
1127 type Output = <bf16 as Mul<bf16>>::Output;
1128
1129 #[inline]
1130 fn mul(self, rhs: bf16) -> Self::Output {
1131 (*self).mul(rhs)
1132 }
1133}
1134
1135impl MulAssign for bf16 {
1136 #[inline]
1137 fn mul_assign(&mut self, rhs: Self) {
1138 *self = (*self).mul(rhs);
1139 }
1140}
1141
1142impl MulAssign<&bf16> for bf16 {
1143 #[inline]
1144 fn mul_assign(&mut self, rhs: &bf16) {
1145 *self = (*self).mul(rhs);
1146 }
1147}
1148
1149impl Div for bf16 {
1150 type Output = Self;
1151
1152 fn div(self, rhs: Self) -> Self::Output {
1153 Self::from_f32(Self::to_f32(self) / Self::to_f32(self:rhs))
1154 }
1155}
1156
1157impl Div<&bf16> for bf16 {
1158 type Output = <bf16 as Div<bf16>>::Output;
1159
1160 #[inline]
1161 fn div(self, rhs: &bf16) -> Self::Output {
1162 self.div(*rhs)
1163 }
1164}
1165
1166impl Div<&bf16> for &bf16 {
1167 type Output = <bf16 as Div<bf16>>::Output;
1168
1169 #[inline]
1170 fn div(self, rhs: &bf16) -> Self::Output {
1171 (*self).div(*rhs)
1172 }
1173}
1174
1175impl Div<bf16> for &bf16 {
1176 type Output = <bf16 as Div<bf16>>::Output;
1177
1178 #[inline]
1179 fn div(self, rhs: bf16) -> Self::Output {
1180 (*self).div(rhs)
1181 }
1182}
1183
1184impl DivAssign for bf16 {
1185 #[inline]
1186 fn div_assign(&mut self, rhs: Self) {
1187 *self = (*self).div(rhs);
1188 }
1189}
1190
1191impl DivAssign<&bf16> for bf16 {
1192 #[inline]
1193 fn div_assign(&mut self, rhs: &bf16) {
1194 *self = (*self).div(rhs);
1195 }
1196}
1197
1198impl Rem for bf16 {
1199 type Output = Self;
1200
1201 fn rem(self, rhs: Self) -> Self::Output {
1202 Self::from_f32(Self::to_f32(self) % Self::to_f32(self:rhs))
1203 }
1204}
1205
1206impl Rem<&bf16> for bf16 {
1207 type Output = <bf16 as Rem<bf16>>::Output;
1208
1209 #[inline]
1210 fn rem(self, rhs: &bf16) -> Self::Output {
1211 self.rem(*rhs)
1212 }
1213}
1214
1215impl Rem<&bf16> for &bf16 {
1216 type Output = <bf16 as Rem<bf16>>::Output;
1217
1218 #[inline]
1219 fn rem(self, rhs: &bf16) -> Self::Output {
1220 (*self).rem(*rhs)
1221 }
1222}
1223
1224impl Rem<bf16> for &bf16 {
1225 type Output = <bf16 as Rem<bf16>>::Output;
1226
1227 #[inline]
1228 fn rem(self, rhs: bf16) -> Self::Output {
1229 (*self).rem(rhs)
1230 }
1231}
1232
1233impl RemAssign for bf16 {
1234 #[inline]
1235 fn rem_assign(&mut self, rhs: Self) {
1236 *self = (*self).rem(rhs);
1237 }
1238}
1239
1240impl RemAssign<&bf16> for bf16 {
1241 #[inline]
1242 fn rem_assign(&mut self, rhs: &bf16) {
1243 *self = (*self).rem(rhs);
1244 }
1245}
1246
1247impl Product for bf16 {
1248 #[inline]
1249 fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1250 bf16::from_f32(iter.map(|f: bf16| f.to_f32()).product())
1251 }
1252}
1253
1254impl<'a> Product<&'a bf16> for bf16 {
1255 #[inline]
1256 fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1257 bf16::from_f32(iter.map(|f: &'a bf16| f.to_f32()).product())
1258 }
1259}
1260
1261impl Sum for bf16 {
1262 #[inline]
1263 fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1264 bf16::from_f32(iter.map(|f: bf16| f.to_f32()).sum())
1265 }
1266}
1267
1268impl<'a> Sum<&'a bf16> for bf16 {
1269 #[inline]
1270 fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1271 bf16::from_f32(iter.map(|f: &'a bf16| f.to_f32()).sum())
1272 }
1273}
1274
1275#[cfg(feature = "serde")]
1276struct Visitor;
1277
1278#[cfg(feature = "serde")]
1279impl<'de> Deserialize<'de> for bf16 {
1280 fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error>
1281 where
1282 D: serde::de::Deserializer<'de>,
1283 {
1284 deserializer.deserialize_newtype_struct("bf16", Visitor)
1285 }
1286}
1287
1288#[cfg(feature = "serde")]
1289impl<'de> serde::de::Visitor<'de> for Visitor {
1290 type Value = bf16;
1291
1292 fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1293 write!(formatter, "tuple struct bf16")
1294 }
1295
1296 fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1297 where
1298 D: serde::Deserializer<'de>,
1299 {
1300 Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?))
1301 }
1302
1303 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1304 where
1305 E: serde::de::Error,
1306 {
1307 v.parse().map_err(|_| {
1308 serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1309 })
1310 }
1311
1312 fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1313 where
1314 E: serde::de::Error,
1315 {
1316 Ok(bf16::from_f32(v))
1317 }
1318
1319 fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1320 where
1321 E: serde::de::Error,
1322 {
1323 Ok(bf16::from_f64(v))
1324 }
1325}
1326
1327#[allow(
1328 clippy::cognitive_complexity,
1329 clippy::float_cmp,
1330 clippy::neg_cmp_op_on_partial_ord
1331)]
1332#[cfg(test)]
1333mod test {
1334 use super::*;
1335 #[allow(unused_imports)]
1336 use core::cmp::Ordering;
1337 #[cfg(feature = "num-traits")]
1338 use num_traits::{AsPrimitive, FromBytes, FromPrimitive, ToBytes, ToPrimitive};
1339 use quickcheck_macros::quickcheck;
1340
1341 #[cfg(feature = "num-traits")]
1342 #[test]
1343 fn as_primitive() {
1344 let two = bf16::from_f32(2.0);
1345 assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two);
1346 assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2);
1347
1348 assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two);
1349 assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0);
1350
1351 assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two);
1352 assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0);
1353 }
1354
1355 #[cfg(feature = "num-traits")]
1356 #[test]
1357 fn to_primitive() {
1358 let two = bf16::from_f32(2.0);
1359 assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32);
1360 assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32);
1361 assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64);
1362 }
1363
1364 #[cfg(feature = "num-traits")]
1365 #[test]
1366 fn from_primitive() {
1367 let two = bf16::from_f32(2.0);
1368 assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two);
1369 assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two);
1370 assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two);
1371 }
1372
1373 #[cfg(feature = "num-traits")]
1374 #[test]
1375 fn to_and_from_bytes() {
1376 let two = bf16::from_f32(2.0);
1377 assert_eq!(<bf16 as ToBytes>::to_le_bytes(&two), [0, 64]);
1378 assert_eq!(<bf16 as FromBytes>::from_le_bytes(&[0, 64]), two);
1379 assert_eq!(<bf16 as ToBytes>::to_be_bytes(&two), [64, 0]);
1380 assert_eq!(<bf16 as FromBytes>::from_be_bytes(&[64, 0]), two);
1381 }
1382
1383 #[test]
1384 fn test_bf16_consts_from_f32() {
1385 let one = bf16::from_f32(1.0);
1386 let zero = bf16::from_f32(0.0);
1387 let neg_zero = bf16::from_f32(-0.0);
1388 let neg_one = bf16::from_f32(-1.0);
1389 let inf = bf16::from_f32(core::f32::INFINITY);
1390 let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY);
1391 let nan = bf16::from_f32(core::f32::NAN);
1392
1393 assert_eq!(bf16::ONE, one);
1394 assert_eq!(bf16::ZERO, zero);
1395 assert!(zero.is_sign_positive());
1396 assert_eq!(bf16::NEG_ZERO, neg_zero);
1397 assert!(neg_zero.is_sign_negative());
1398 assert_eq!(bf16::NEG_ONE, neg_one);
1399 assert!(neg_one.is_sign_negative());
1400 assert_eq!(bf16::INFINITY, inf);
1401 assert_eq!(bf16::NEG_INFINITY, neg_inf);
1402 assert!(nan.is_nan());
1403 assert!(bf16::NAN.is_nan());
1404
1405 let e = bf16::from_f32(core::f32::consts::E);
1406 let pi = bf16::from_f32(core::f32::consts::PI);
1407 let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI);
1408 let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1409 let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI);
1410 let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1411 let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2);
1412 let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3);
1413 let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4);
1414 let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6);
1415 let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8);
1416 let ln_10 = bf16::from_f32(core::f32::consts::LN_10);
1417 let ln_2 = bf16::from_f32(core::f32::consts::LN_2);
1418 let log10_e = bf16::from_f32(core::f32::consts::LOG10_E);
1419 // core::f32::consts::LOG10_2 requires rustc 1.43.0
1420 let log10_2 = bf16::from_f32(2f32.log10());
1421 let log2_e = bf16::from_f32(core::f32::consts::LOG2_E);
1422 // core::f32::consts::LOG2_10 requires rustc 1.43.0
1423 let log2_10 = bf16::from_f32(10f32.log2());
1424 let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2);
1425
1426 assert_eq!(bf16::E, e);
1427 assert_eq!(bf16::PI, pi);
1428 assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1429 assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1430 assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1431 assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1432 assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1433 assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1434 assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1435 assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1436 assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1437 assert_eq!(bf16::LN_10, ln_10);
1438 assert_eq!(bf16::LN_2, ln_2);
1439 assert_eq!(bf16::LOG10_E, log10_e);
1440 assert_eq!(bf16::LOG10_2, log10_2);
1441 assert_eq!(bf16::LOG2_E, log2_e);
1442 assert_eq!(bf16::LOG2_10, log2_10);
1443 assert_eq!(bf16::SQRT_2, sqrt_2);
1444 }
1445
1446 #[test]
1447 fn test_bf16_consts_from_f64() {
1448 let one = bf16::from_f64(1.0);
1449 let zero = bf16::from_f64(0.0);
1450 let neg_zero = bf16::from_f64(-0.0);
1451 let inf = bf16::from_f64(core::f64::INFINITY);
1452 let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY);
1453 let nan = bf16::from_f64(core::f64::NAN);
1454
1455 assert_eq!(bf16::ONE, one);
1456 assert_eq!(bf16::ZERO, zero);
1457 assert_eq!(bf16::NEG_ZERO, neg_zero);
1458 assert_eq!(bf16::INFINITY, inf);
1459 assert_eq!(bf16::NEG_INFINITY, neg_inf);
1460 assert!(nan.is_nan());
1461 assert!(bf16::NAN.is_nan());
1462
1463 let e = bf16::from_f64(core::f64::consts::E);
1464 let pi = bf16::from_f64(core::f64::consts::PI);
1465 let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI);
1466 let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1467 let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI);
1468 let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1469 let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2);
1470 let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3);
1471 let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4);
1472 let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6);
1473 let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8);
1474 let ln_10 = bf16::from_f64(core::f64::consts::LN_10);
1475 let ln_2 = bf16::from_f64(core::f64::consts::LN_2);
1476 let log10_e = bf16::from_f64(core::f64::consts::LOG10_E);
1477 // core::f64::consts::LOG10_2 requires rustc 1.43.0
1478 let log10_2 = bf16::from_f64(2f64.log10());
1479 let log2_e = bf16::from_f64(core::f64::consts::LOG2_E);
1480 // core::f64::consts::LOG2_10 requires rustc 1.43.0
1481 let log2_10 = bf16::from_f64(10f64.log2());
1482 let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2);
1483
1484 assert_eq!(bf16::E, e);
1485 assert_eq!(bf16::PI, pi);
1486 assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1487 assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1488 assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1489 assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1490 assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1491 assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1492 assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1493 assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1494 assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1495 assert_eq!(bf16::LN_10, ln_10);
1496 assert_eq!(bf16::LN_2, ln_2);
1497 assert_eq!(bf16::LOG10_E, log10_e);
1498 assert_eq!(bf16::LOG10_2, log10_2);
1499 assert_eq!(bf16::LOG2_E, log2_e);
1500 assert_eq!(bf16::LOG2_10, log2_10);
1501 assert_eq!(bf16::SQRT_2, sqrt_2);
1502 }
1503
1504 #[test]
1505 fn test_nan_conversion_to_smaller() {
1506 let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64);
1507 let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64);
1508 let nan32 = f32::from_bits(0x7F80_0001u32);
1509 let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1510 let nan32_from_64 = nan64 as f32;
1511 let neg_nan32_from_64 = neg_nan64 as f32;
1512 let nan16_from_64 = bf16::from_f64(nan64);
1513 let neg_nan16_from_64 = bf16::from_f64(neg_nan64);
1514 let nan16_from_32 = bf16::from_f32(nan32);
1515 let neg_nan16_from_32 = bf16::from_f32(neg_nan32);
1516
1517 assert!(nan64.is_nan() && nan64.is_sign_positive());
1518 assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1519 assert!(nan32.is_nan() && nan32.is_sign_positive());
1520 assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1521
1522 // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1523 assert!(neg_nan32_from_64.is_nan());
1524 assert!(nan32_from_64.is_nan());
1525 assert!(nan16_from_64.is_nan());
1526 assert!(neg_nan16_from_64.is_nan());
1527 assert!(nan16_from_32.is_nan());
1528 assert!(neg_nan16_from_32.is_nan());
1529 }
1530
1531 #[test]
1532 fn test_nan_conversion_to_larger() {
1533 let nan16 = bf16::from_bits(0x7F81u16);
1534 let neg_nan16 = bf16::from_bits(0xFF81u16);
1535 let nan32 = f32::from_bits(0x7F80_0001u32);
1536 let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1537 let nan32_from_16 = f32::from(nan16);
1538 let neg_nan32_from_16 = f32::from(neg_nan16);
1539 let nan64_from_16 = f64::from(nan16);
1540 let neg_nan64_from_16 = f64::from(neg_nan16);
1541 let nan64_from_32 = f64::from(nan32);
1542 let neg_nan64_from_32 = f64::from(neg_nan32);
1543
1544 assert!(nan16.is_nan() && nan16.is_sign_positive());
1545 assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1546 assert!(nan32.is_nan() && nan32.is_sign_positive());
1547 assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1548
1549 // // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1550 assert!(nan32_from_16.is_nan());
1551 assert!(neg_nan32_from_16.is_nan());
1552 assert!(nan64_from_16.is_nan());
1553 assert!(neg_nan64_from_16.is_nan());
1554 assert!(nan64_from_32.is_nan());
1555 assert!(neg_nan64_from_32.is_nan());
1556 }
1557
1558 #[test]
1559 fn test_bf16_to_f32() {
1560 let f = bf16::from_f32(7.0);
1561 assert_eq!(f.to_f32(), 7.0f32);
1562
1563 // 7.1 is NOT exactly representable in 16-bit, it's rounded
1564 let f = bf16::from_f32(7.1);
1565 let diff = (f.to_f32() - 7.1f32).abs();
1566 // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1567 assert!(diff <= 4.0 * bf16::EPSILON.to_f32());
1568
1569 let tiny32 = f32::from_bits(0x0001_0000u32);
1570 assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32);
1571 assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32);
1572
1573 assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32));
1574 assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32));
1575 }
1576
1577 #[test]
1578 fn test_bf16_to_f64() {
1579 let f = bf16::from_f64(7.0);
1580 assert_eq!(f.to_f64(), 7.0f64);
1581
1582 // 7.1 is NOT exactly representable in 16-bit, it's rounded
1583 let f = bf16::from_f64(7.1);
1584 let diff = (f.to_f64() - 7.1f64).abs();
1585 // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1586 assert!(diff <= 4.0 * bf16::EPSILON.to_f64());
1587
1588 let tiny64 = 2.0f64.powi(-133);
1589 assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64);
1590 assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64);
1591
1592 assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64));
1593 assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64));
1594 }
1595
1596 #[test]
1597 fn test_comparisons() {
1598 let zero = bf16::from_f64(0.0);
1599 let one = bf16::from_f64(1.0);
1600 let neg_zero = bf16::from_f64(-0.0);
1601 let neg_one = bf16::from_f64(-1.0);
1602
1603 assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1604 assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1605 assert!(zero == neg_zero);
1606 assert!(neg_zero == zero);
1607 assert!(!(zero != neg_zero));
1608 assert!(!(neg_zero != zero));
1609 assert!(!(zero < neg_zero));
1610 assert!(!(neg_zero < zero));
1611 assert!(zero <= neg_zero);
1612 assert!(neg_zero <= zero);
1613 assert!(!(zero > neg_zero));
1614 assert!(!(neg_zero > zero));
1615 assert!(zero >= neg_zero);
1616 assert!(neg_zero >= zero);
1617
1618 assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1619 assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1620 assert!(!(one == neg_zero));
1621 assert!(!(neg_zero == one));
1622 assert!(one != neg_zero);
1623 assert!(neg_zero != one);
1624 assert!(!(one < neg_zero));
1625 assert!(neg_zero < one);
1626 assert!(!(one <= neg_zero));
1627 assert!(neg_zero <= one);
1628 assert!(one > neg_zero);
1629 assert!(!(neg_zero > one));
1630 assert!(one >= neg_zero);
1631 assert!(!(neg_zero >= one));
1632
1633 assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1634 assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1635 assert!(!(one == neg_one));
1636 assert!(!(neg_one == one));
1637 assert!(one != neg_one);
1638 assert!(neg_one != one);
1639 assert!(!(one < neg_one));
1640 assert!(neg_one < one);
1641 assert!(!(one <= neg_one));
1642 assert!(neg_one <= one);
1643 assert!(one > neg_one);
1644 assert!(!(neg_one > one));
1645 assert!(one >= neg_one);
1646 assert!(!(neg_one >= one));
1647 }
1648
1649 #[test]
1650 #[allow(clippy::erasing_op, clippy::identity_op)]
1651 fn round_to_even_f32() {
1652 // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1653 let min_sub = bf16::from_bits(1);
1654 let min_sub_f = (-133f32).exp2();
1655 assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1656 assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1657
1658 // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1659 // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1660 // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1661 assert_eq!(
1662 bf16::from_f32(min_sub_f * 0.49).to_bits(),
1663 min_sub.to_bits() * 0
1664 );
1665 assert_eq!(
1666 bf16::from_f32(min_sub_f * 0.50).to_bits(),
1667 min_sub.to_bits() * 0
1668 );
1669 assert_eq!(
1670 bf16::from_f32(min_sub_f * 0.51).to_bits(),
1671 min_sub.to_bits() * 1
1672 );
1673
1674 // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1675 // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1676 // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1677 assert_eq!(
1678 bf16::from_f32(min_sub_f * 1.49).to_bits(),
1679 min_sub.to_bits() * 1
1680 );
1681 assert_eq!(
1682 bf16::from_f32(min_sub_f * 1.50).to_bits(),
1683 min_sub.to_bits() * 2
1684 );
1685 assert_eq!(
1686 bf16::from_f32(min_sub_f * 1.51).to_bits(),
1687 min_sub.to_bits() * 2
1688 );
1689
1690 // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1691 // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1692 // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1693 assert_eq!(
1694 bf16::from_f32(min_sub_f * 2.49).to_bits(),
1695 min_sub.to_bits() * 2
1696 );
1697 assert_eq!(
1698 bf16::from_f32(min_sub_f * 2.50).to_bits(),
1699 min_sub.to_bits() * 2
1700 );
1701 assert_eq!(
1702 bf16::from_f32(min_sub_f * 2.51).to_bits(),
1703 min_sub.to_bits() * 3
1704 );
1705
1706 assert_eq!(
1707 bf16::from_f32(250.49f32).to_bits(),
1708 bf16::from_f32(250.0).to_bits()
1709 );
1710 assert_eq!(
1711 bf16::from_f32(250.50f32).to_bits(),
1712 bf16::from_f32(250.0).to_bits()
1713 );
1714 assert_eq!(
1715 bf16::from_f32(250.51f32).to_bits(),
1716 bf16::from_f32(251.0).to_bits()
1717 );
1718 assert_eq!(
1719 bf16::from_f32(251.49f32).to_bits(),
1720 bf16::from_f32(251.0).to_bits()
1721 );
1722 assert_eq!(
1723 bf16::from_f32(251.50f32).to_bits(),
1724 bf16::from_f32(252.0).to_bits()
1725 );
1726 assert_eq!(
1727 bf16::from_f32(251.51f32).to_bits(),
1728 bf16::from_f32(252.0).to_bits()
1729 );
1730 assert_eq!(
1731 bf16::from_f32(252.49f32).to_bits(),
1732 bf16::from_f32(252.0).to_bits()
1733 );
1734 assert_eq!(
1735 bf16::from_f32(252.50f32).to_bits(),
1736 bf16::from_f32(252.0).to_bits()
1737 );
1738 assert_eq!(
1739 bf16::from_f32(252.51f32).to_bits(),
1740 bf16::from_f32(253.0).to_bits()
1741 );
1742 }
1743
1744 #[test]
1745 #[allow(clippy::erasing_op, clippy::identity_op)]
1746 fn round_to_even_f64() {
1747 // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1748 let min_sub = bf16::from_bits(1);
1749 let min_sub_f = (-133f64).exp2();
1750 assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1751 assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1752
1753 // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1754 // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1755 // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1756 assert_eq!(
1757 bf16::from_f64(min_sub_f * 0.49).to_bits(),
1758 min_sub.to_bits() * 0
1759 );
1760 assert_eq!(
1761 bf16::from_f64(min_sub_f * 0.50).to_bits(),
1762 min_sub.to_bits() * 0
1763 );
1764 assert_eq!(
1765 bf16::from_f64(min_sub_f * 0.51).to_bits(),
1766 min_sub.to_bits() * 1
1767 );
1768
1769 // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1770 // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1771 // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1772 assert_eq!(
1773 bf16::from_f64(min_sub_f * 1.49).to_bits(),
1774 min_sub.to_bits() * 1
1775 );
1776 assert_eq!(
1777 bf16::from_f64(min_sub_f * 1.50).to_bits(),
1778 min_sub.to_bits() * 2
1779 );
1780 assert_eq!(
1781 bf16::from_f64(min_sub_f * 1.51).to_bits(),
1782 min_sub.to_bits() * 2
1783 );
1784
1785 // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1786 // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1787 // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1788 assert_eq!(
1789 bf16::from_f64(min_sub_f * 2.49).to_bits(),
1790 min_sub.to_bits() * 2
1791 );
1792 assert_eq!(
1793 bf16::from_f64(min_sub_f * 2.50).to_bits(),
1794 min_sub.to_bits() * 2
1795 );
1796 assert_eq!(
1797 bf16::from_f64(min_sub_f * 2.51).to_bits(),
1798 min_sub.to_bits() * 3
1799 );
1800
1801 assert_eq!(
1802 bf16::from_f64(250.49f64).to_bits(),
1803 bf16::from_f64(250.0).to_bits()
1804 );
1805 assert_eq!(
1806 bf16::from_f64(250.50f64).to_bits(),
1807 bf16::from_f64(250.0).to_bits()
1808 );
1809 assert_eq!(
1810 bf16::from_f64(250.51f64).to_bits(),
1811 bf16::from_f64(251.0).to_bits()
1812 );
1813 assert_eq!(
1814 bf16::from_f64(251.49f64).to_bits(),
1815 bf16::from_f64(251.0).to_bits()
1816 );
1817 assert_eq!(
1818 bf16::from_f64(251.50f64).to_bits(),
1819 bf16::from_f64(252.0).to_bits()
1820 );
1821 assert_eq!(
1822 bf16::from_f64(251.51f64).to_bits(),
1823 bf16::from_f64(252.0).to_bits()
1824 );
1825 assert_eq!(
1826 bf16::from_f64(252.49f64).to_bits(),
1827 bf16::from_f64(252.0).to_bits()
1828 );
1829 assert_eq!(
1830 bf16::from_f64(252.50f64).to_bits(),
1831 bf16::from_f64(252.0).to_bits()
1832 );
1833 assert_eq!(
1834 bf16::from_f64(252.51f64).to_bits(),
1835 bf16::from_f64(253.0).to_bits()
1836 );
1837 }
1838
1839 #[cfg(feature = "std")]
1840 #[test]
1841 fn formatting() {
1842 let f = bf16::from_f32(0.1152344);
1843
1844 assert_eq!(format!("{:.3}", f), "0.115");
1845 assert_eq!(format!("{:.4}", f), "0.1152");
1846 assert_eq!(format!("{:+.4}", f), "+0.1152");
1847 assert_eq!(format!("{:>+10.4}", f), " +0.1152");
1848
1849 assert_eq!(format!("{:.3?}", f), "0.115");
1850 assert_eq!(format!("{:.4?}", f), "0.1152");
1851 assert_eq!(format!("{:+.4?}", f), "+0.1152");
1852 assert_eq!(format!("{:>+10.4?}", f), " +0.1152");
1853 }
1854
1855 impl quickcheck::Arbitrary for bf16 {
1856 fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1857 bf16(u16::arbitrary(g))
1858 }
1859 }
1860
1861 #[quickcheck]
1862 fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool {
1863 let roundtrip = bf16::from_f32(f.to_f32());
1864 if f.is_nan() {
1865 roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1866 } else {
1867 f.0 == roundtrip.0
1868 }
1869 }
1870
1871 #[quickcheck]
1872 fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool {
1873 let roundtrip = bf16::from_f64(f.to_f64());
1874 if f.is_nan() {
1875 roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1876 } else {
1877 f.0 == roundtrip.0
1878 }
1879 }
1880}
1881