bfloat.rs source code [crates/half/src/bfloat.rs]

1	#[cfg(all(feature = "serde", feature = "alloc"))]
2	#[allow(unused_imports)]
3	use alloc::string::ToString;
4	#[cfg(feature = "bytemuck")]
5	use bytemuck::{Pod, Zeroable};
6	use core::{
7	cmp::Ordering,
8	iter::{Product, Sum},
9	num::FpCategory,
10	ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11	};
12	#[cfg(not(target_arch = "spirv"))]
13	use core::{
14	fmt::{
15	Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16	},
17	num::ParseFloatError,
18	str::FromStr,
19	};
20	#[cfg(feature = "serde")]
21	use serde::{Deserialize, Serialize};
22	#[cfg(feature = "zerocopy")]
23	use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
24
25	pub(crate) mod convert;
26
27	/// A 16-bit floating point type implementing the [`bfloat16`] format.
28	///
29	/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard
30	/// `binary32`, a.k.a [`f32`]. [`struct@bf16`] has approximately the same dynamic range as [`f32`] by
31	/// having a lower precision than [`struct@f16`][crate::f16]. While [`struct@f16`][crate::f16] has a precision of
32	/// 11 bits, [`struct@bf16`] has a precision of only 8 bits.
33	///
34	/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
35	#[allow(non_camel_case_types)]
36	#[derive(Clone, Copy, Default)]
37	#[repr(transparent)]
38	#[cfg_attr(feature = "serde", derive(Serialize))]
39	#[cfg_attr(
40	feature = "rkyv",
41	derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
42	)]
43	#[cfg_attr(feature = "rkyv", rkyv(resolver = Bf16Resolver))]
44	#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
45	#[cfg_attr(
46	feature = "zerocopy",
47	derive(FromBytes, Immutable, IntoBytes, KnownLayout)
48	)]
49	#[cfg_attr(kani, derive(kani::Arbitrary))]
50	pub struct bf16(u16);
51
52	impl bf16 {
53	/// Constructs a [`struct@bf16`] value from the raw bits.
54	#[inline]
55	#[must_use]
56	pub const fn from_bits(bits: u16) -> bf16 {
57	bf16(bits)
58	}
59
60	/// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
61	///
62	/// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
63	/// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
64	/// other values are truncated and rounded to the nearest representable value.
65	#[inline]
66	#[must_use]
67	pub fn from_f32(value: f32) -> bf16 {
68	Self::from_f32_const(value)
69	}
70
71	/// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
72	///
73	/// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
74	/// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
75	/// in any non-`const` context.
76	///
77	/// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
78	/// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
79	/// other values are truncated and rounded to the nearest representable value.
80	#[inline]
81	#[must_use]
82	pub const fn from_f32_const(value: f32) -> bf16 {
83	bf16(convert::f32_to_bf16(value))
84	}
85
86	/// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
87	///
88	/// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
89	/// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
90	/// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
91	/// values are truncated and rounded to the nearest representable value.
92	#[inline]
93	#[must_use]
94	pub fn from_f64(value: f64) -> bf16 {
95	Self::from_f64_const(value)
96	}
97
98	/// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
99	///
100	/// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
101	/// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
102	/// in any non-`const` context.
103	///
104	/// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
105	/// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
106	/// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
107	/// values are truncated and rounded to the nearest representable value.
108	#[inline]
109	#[must_use]
110	pub const fn from_f64_const(value: f64) -> bf16 {
111	bf16(convert::f64_to_bf16(value))
112	}
113
114	/// Converts a [`struct@bf16`] into the underlying bit representation.
115	#[inline]
116	#[must_use]
117	pub const fn to_bits(self) -> u16 {
118	self.0
119	}
120
121	/// Returns the memory representation of the underlying bit representation as a byte array in
122	/// little-endian byte order.
123	///
124	/// # Examples
125	///
126	/// ```rust
127	/// # use half::prelude::*;
128	/// let bytes = bf16::from_f32(`12.5`).to_le_bytes();
129	/// assert_eq!(bytes, [`0x48`, `0x41`]);
130	/// ```
131	#[inline]
132	#[must_use]
133	pub const fn to_le_bytes(self) -> [u8; `2`] {
134	self.0.to_le_bytes()
135	}
136
137	/// Returns the memory representation of the underlying bit representation as a byte array in
138	/// big-endian (network) byte order.
139	///
140	/// # Examples
141	///
142	/// ```rust
143	/// # use half::prelude::*;
144	/// let bytes = bf16::from_f32(`12.5`).to_be_bytes();
145	/// assert_eq!(bytes, [`0x41`, `0x48`]);
146	/// ```
147	#[inline]
148	#[must_use]
149	pub const fn to_be_bytes(self) -> [u8; `2`] {
150	self.0.to_be_bytes()
151	}
152
153	/// Returns the memory representation of the underlying bit representation as a byte array in
154	/// native byte order.
155	///
156	/// As the target platform's native endianness is used, portable code should use
157	/// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate,
158	/// instead.
159	///
160	/// # Examples
161	///
162	/// ```rust
163	/// # use half::prelude::*;
164	/// let bytes = bf16::from_f32(`12.5`).to_ne_bytes();
165	/// assert_eq!(bytes, if cfg!(target_endian = "big") {
166	/// [`0x41`, `0x48`]
167	/// } else {
168	/// [`0x48`, `0x41`]
169	/// });
170	/// ```
171	#[inline]
172	#[must_use]
173	pub const fn to_ne_bytes(self) -> [u8; `2`] {
174	self.0.to_ne_bytes()
175	}
176
177	/// Creates a floating point value from its representation as a byte array in little endian.
178	///
179	/// # Examples
180	///
181	/// ```rust
182	/// # use half::prelude::*;
183	/// let value = bf16::from_le_bytes([`0x48`, `0x41`]);
184	/// assert_eq!(value, bf16::from_f32(`12.5`));
185	/// ```
186	#[inline]
187	#[must_use]
188	pub const fn from_le_bytes(bytes: [u8; `2`]) -> bf16 {
189	bf16::from_bits(u16::from_le_bytes(bytes))
190	}
191
192	/// Creates a floating point value from its representation as a byte array in big endian.
193	///
194	/// # Examples
195	///
196	/// ```rust
197	/// # use half::prelude::*;
198	/// let value = bf16::from_be_bytes([`0x41`, `0x48`]);
199	/// assert_eq!(value, bf16::from_f32(`12.5`));
200	/// ```
201	#[inline]
202	#[must_use]
203	pub const fn from_be_bytes(bytes: [u8; `2`]) -> bf16 {
204	bf16::from_bits(u16::from_be_bytes(bytes))
205	}
206
207	/// Creates a floating point value from its representation as a byte array in native endian.
208	///
209	/// As the target platform's native endianness is used, portable code likely wants to use
210	/// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as
211	/// appropriate instead.
212	///
213	/// # Examples
214	///
215	/// ```rust
216	/// # use half::prelude::*;
217	/// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") {
218	/// [`0x41`, `0x48`]
219	/// } else {
220	/// [`0x48`, `0x41`]
221	/// });
222	/// assert_eq!(value, bf16::from_f32(`12.5`));
223	/// ```
224	#[inline]
225	#[must_use]
226	pub const fn from_ne_bytes(bytes: [u8; `2`]) -> bf16 {
227	bf16::from_bits(u16::from_ne_bytes(bytes))
228	}
229
230	/// Converts a [`struct@bf16`] value into an [`f32`] value.
231	///
232	/// This conversion is lossless as all values can be represented exactly in [`f32`].
233	#[inline]
234	#[must_use]
235	pub fn to_f32(self) -> f32 {
236	self.to_f32_const()
237	}
238
239	/// Converts a [`struct@bf16`] value into an [`f32`] value.
240	///
241	/// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
242	/// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
243	/// in any non-`const` context.
244	///
245	/// This conversion is lossless as all values can be represented exactly in [`f32`].
246	#[inline]
247	#[must_use]
248	pub const fn to_f32_const(self) -> f32 {
249	convert::bf16_to_f32(self.0)
250	}
251
252	/// Converts a [`struct@bf16`] value into an [`f64`] value.
253	///
254	/// This conversion is lossless as all values can be represented exactly in [`f64`].
255	#[inline]
256	#[must_use]
257	pub fn to_f64(self) -> f64 {
258	self.to_f64_const()
259	}
260
261	/// Converts a [`struct@bf16`] value into an [`f64`] value.
262	///
263	/// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
264	/// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
265	/// in any non-`const` context.
266	///
267	/// This conversion is lossless as all values can be represented exactly in [`f64`].
268	#[inline]
269	#[must_use]
270	pub const fn to_f64_const(self) -> f64 {
271	convert::bf16_to_f64(self.0)
272	}
273
274	/// Returns `true` if this value is NaN and `false` otherwise.
275	///
276	/// # Examples
277	///
278	/// ```rust
279	/// # use half::prelude::*;
280	///
281	/// let nan = bf16::NAN;
282	/// let f = bf16::from_f32(`7.0_f32`);
283	///
284	/// assert!(nan.is_nan());
285	/// assert!(!f.is_nan());
286	/// ```
287	#[inline]
288	#[must_use]
289	pub const fn is_nan(self) -> bool {
290	self.0 & `0x7FFFu16` > `0x7F80u16`
291	}
292
293	/// Returns `true` if this value is ±∞ and `false` otherwise.
294	///
295	/// # Examples
296	///
297	/// ```rust
298	/// # use half::prelude::*;
299	///
300	/// let f = bf16::from_f32(`7.0f32`);
301	/// let inf = bf16::INFINITY;
302	/// let neg_inf = bf16::NEG_INFINITY;
303	/// let nan = bf16::NAN;
304	///
305	/// assert!(!f.is_infinite());
306	/// assert!(!nan.is_infinite());
307	///
308	/// assert!(inf.is_infinite());
309	/// assert!(neg_inf.is_infinite());
310	/// ```
311	#[inline]
312	#[must_use]
313	pub const fn is_infinite(self) -> bool {
314	self.0 & `0x7FFFu16` == `0x7F80u16`
315	}
316
317	/// Returns `true` if this number is neither infinite nor NaN.
318	///
319	/// # Examples
320	///
321	/// ```rust
322	/// # use half::prelude::*;
323	///
324	/// let f = bf16::from_f32(`7.0f32`);
325	/// let inf = bf16::INFINITY;
326	/// let neg_inf = bf16::NEG_INFINITY;
327	/// let nan = bf16::NAN;
328	///
329	/// assert!(f.is_finite());
330	///
331	/// assert!(!nan.is_finite());
332	/// assert!(!inf.is_finite());
333	/// assert!(!neg_inf.is_finite());
334	/// ```
335	#[inline]
336	#[must_use]
337	pub const fn is_finite(self) -> bool {
338	self.0 & `0x7F80u16` != `0x7F80u16`
339	}
340
341	/// Returns `true` if the number is neither zero, infinite, subnormal, or NaN.
342	///
343	/// # Examples
344	///
345	/// ```rust
346	/// # use half::prelude::*;
347	///
348	/// let min = bf16::MIN_POSITIVE;
349	/// let max = bf16::MAX;
350	/// let lower_than_min = bf16::from_f32(`1.0e-39_f32`);
351	/// let zero = bf16::from_f32(`0.0_f32`);
352	///
353	/// assert!(min.is_normal());
354	/// assert!(max.is_normal());
355	///
356	/// assert!(!zero.is_normal());
357	/// assert!(!bf16::NAN.is_normal());
358	/// assert!(!bf16::INFINITY.is_normal());
359	/// // Values between 0 and `min` are subnormal.
360	/// assert!(!lower_than_min.is_normal());
361	/// ```
362	#[inline]
363	#[must_use]
364	pub const fn is_normal(self) -> bool {
365	let exp = self.0 & `0x7F80u16`;
366	exp != `0x7F80u16` && exp != `0`
367	}
368
369	/// Returns the floating point category of the number.
370	///
371	/// If only one property is going to be tested, it is generally faster to use the specific
372	/// predicate instead.
373	///
374	/// # Examples
375	///
376	/// ```rust
377	/// use std::num::FpCategory;
378	/// # use half::prelude::*;
379	///
380	/// let num = bf16::from_f32(`12.4_f32`);
381	/// let inf = bf16::INFINITY;
382	///
383	/// assert_eq!(num.classify(), FpCategory::Normal);
384	/// assert_eq!(inf.classify(), FpCategory::Infinite);
385	/// ```
386	#[must_use]
387	pub const fn classify(self) -> FpCategory {
388	let exp = self.0 & `0x7F80u16`;
389	let man = self.0 & `0x007Fu16`;
390	match (exp, man) {
391	(`0`, `0`) => FpCategory::Zero,
392	(`0`, _) => FpCategory::Subnormal,
393	(`0x7F80u16`, `0`) => FpCategory::Infinite,
394	(`0x7F80u16`, _) => FpCategory::Nan,
395	_ => FpCategory::Normal,
396	}
397	}
398
399	/// Returns a number that represents the sign of `self`.
400	///
401	/// 1.0 if the number is positive, +0.0 or* [`INFINITY`][bf16::INFINITY]
402	/// −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`]*[bf16::NEG_INFINITY]
403	/// * [`NAN`][bf16::NAN] if the number is NaN
404	///
405	/// # Examples
406	///
407	/// ```rust
408	/// # use half::prelude::*;
409	///
410	/// let f = bf16::from_f32(`3.5_f32`);
411	///
412	/// assert_eq!(f.signum(), bf16::from_f32(`1.0`));
413	/// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-`1.0`));
414	///
415	/// assert!(bf16::NAN.signum().is_nan());
416	/// ```
417	#[must_use]
418	pub const fn signum(self) -> bf16 {
419	if self.is_nan() {
420	self
421	} else if self.0 & `0x8000u16` != `0` {
422	Self::NEG_ONE
423	} else {
424	Self::ONE
425	}
426	}
427
428	/// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a
429	/// positive sign bit and +∞.
430	///
431	/// # Examples
432	///
433	/// ```rust
434	/// # use half::prelude::*;
435	///
436	/// let nan = bf16::NAN;
437	/// let f = bf16::from_f32(`7.0_f32`);
438	/// let g = bf16::from_f32(`-7.0_f32`);
439	///
440	/// assert!(f.is_sign_positive());
441	/// assert!(!g.is_sign_positive());
442	/// // NaN can be either positive or negative
443	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
444	/// ```
445	#[inline]
446	#[must_use]
447	pub const fn is_sign_positive(self) -> bool {
448	self.0 & `0x8000u16` == `0`
449	}
450
451	/// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a
452	/// negative sign bit and −∞.
453	///
454	/// # Examples
455	///
456	/// ```rust
457	/// # use half::prelude::*;
458	///
459	/// let nan = bf16::NAN;
460	/// let f = bf16::from_f32(`7.0f32`);
461	/// let g = bf16::from_f32(`-7.0f32`);
462	///
463	/// assert!(!f.is_sign_negative());
464	/// assert!(g.is_sign_negative());
465	/// // NaN can be either positive or negative
466	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
467	/// ```
468	#[inline]
469	#[must_use]
470	pub const fn is_sign_negative(self) -> bool {
471	self.0 & `0x8000u16` != `0`
472	}
473
474	/// Returns a number composed of the magnitude of `self` and the sign of `sign`.
475	///
476	/// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
477	/// If `self` is NaN, then NaN with the sign of `sign` is returned.
478	///
479	/// # Examples
480	///
481	/// ```
482	/// # use half::prelude::*;
483	/// let f = bf16::from_f32(`3.5`);
484	///
485	/// assert_eq!(f.copysign(bf16::from_f32(`0.42`)), bf16::from_f32(`3.5`));
486	/// assert_eq!(f.copysign(bf16::from_f32(-`0.42`)), bf16::from_f32(-`3.5`));
487	/// assert_eq!((-f).copysign(bf16::from_f32(`0.42`)), bf16::from_f32(`3.5`));
488	/// assert_eq!((-f).copysign(bf16::from_f32(-`0.42`)), bf16::from_f32(-`3.5`));
489	///
490	/// assert!(bf16::NAN.copysign(bf16::from_f32(`1.0`)).is_nan());
491	/// ```
492	#[inline]
493	#[must_use]
494	pub const fn copysign(self, sign: bf16) -> bf16 {
495	bf16((sign.0 & `0x8000u16`) \| (self.0 & `0x7FFFu16`))
496	}
497
498	/// Returns the maximum of the two numbers.
499	///
500	/// If one of the arguments is NaN, then the other argument is returned.
501	///
502	/// # Examples
503	///
504	/// ```
505	/// # use half::prelude::*;
506	/// let x = bf16::from_f32(`1.0`);
507	/// let y = bf16::from_f32(`2.0`);
508	///
509	/// assert_eq!(x.max(y), y);
510	/// ```
511	#[inline]
512	#[must_use]
513	pub fn max(self, other: bf16) -> bf16 {
514	if other > self && !other.is_nan() {
515	other
516	} else {
517	self
518	}
519	}
520
521	/// Returns the minimum of the two numbers.
522	///
523	/// If one of the arguments is NaN, then the other argument is returned.
524	///
525	/// # Examples
526	///
527	/// ```
528	/// # use half::prelude::*;
529	/// let x = bf16::from_f32(`1.0`);
530	/// let y = bf16::from_f32(`2.0`);
531	///
532	/// assert_eq!(x.min(y), x);
533	/// ```
534	#[inline]
535	#[must_use]
536	pub fn min(self, other: bf16) -> bf16 {
537	if other < self && !other.is_nan() {
538	other
539	} else {
540	self
541	}
542	}
543
544	/// Restrict a value to a certain interval unless it is NaN.
545	///
546	/// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
547	/// Otherwise this returns `self`.
548	///
549	/// Note that this function returns NaN if the initial value was NaN as well.
550	///
551	/// # Panics
552	/// Panics if `min > max`, `min` is NaN, or `max` is NaN.
553	///
554	/// # Examples
555	///
556	/// ```
557	/// # use half::prelude::*;
558	/// assert!(bf16::from_f32(-`3.0`).clamp(bf16::from_f32(-`2.0`), bf16::from_f32(`1.0`)) == bf16::from_f32(-`2.0`));
559	/// assert!(bf16::from_f32(`0.0`).clamp(bf16::from_f32(-`2.0`), bf16::from_f32(`1.0`)) == bf16::from_f32(`0.0`));
560	/// assert!(bf16::from_f32(`2.0`).clamp(bf16::from_f32(-`2.0`), bf16::from_f32(`1.0`)) == bf16::from_f32(`1.0`));
561	/// assert!(bf16::NAN.clamp(bf16::from_f32(-`2.0`), bf16::from_f32(`1.0`)).is_nan());
562	/// ```
563	#[inline]
564	#[must_use]
565	pub fn clamp(self, min: bf16, max: bf16) -> bf16 {
566	assert!(min <= max);
567	let mut x = self;
568	if x < min {
569	x = min;
570	}
571	if x > max {
572	x = max;
573	}
574	x
575	}
576
577	/// Returns the ordering between `self` and `other`.
578	///
579	/// Unlike the standard partial comparison between floating point numbers,
580	/// this comparison always produces an ordering in accordance to
581	/// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
582	/// floating point standard. The values are ordered in the following sequence:
583	///
584	/// - negative quiet NaN
585	/// - negative signaling NaN
586	/// - negative infinity
587	/// - negative numbers
588	/// - negative subnormal numbers
589	/// - negative zero
590	/// - positive zero
591	/// - positive subnormal numbers
592	/// - positive numbers
593	/// - positive infinity
594	/// - positive signaling NaN
595	/// - positive quiet NaN.
596	///
597	/// The ordering established by this function does not always agree with the
598	/// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example,
599	/// they consider negative and positive zero equal, while `total_cmp`
600	/// doesn't.
601	///
602	/// The interpretation of the signaling NaN bit follows the definition in
603	/// the IEEE 754 standard, which may not match the interpretation by some of
604	/// the older, non-conformant (e.g. MIPS) hardware implementations.
605	///
606	/// # Examples
607	/// ```
608	/// # use half::bf16;
609	/// let mut v: Vec<bf16> = vec![];
610	/// v.push(bf16::ONE);
611	/// v.push(bf16::INFINITY);
612	/// v.push(bf16::NEG_INFINITY);
613	/// v.push(bf16::NAN);
614	/// v.push(bf16::MAX_SUBNORMAL);
615	/// v.push(-bf16::MAX_SUBNORMAL);
616	/// v.push(bf16::ZERO);
617	/// v.push(bf16::NEG_ZERO);
618	/// v.push(bf16::NEG_ONE);
619	/// v.push(bf16::MIN_POSITIVE);
620	///
621	/// v.sort_by(\|a, b\| a.total_cmp(&b));
622	///
623	/// assert!(v
624	/// .into_iter()
625	/// .zip(
626	/// [
627	/// bf16::NEG_INFINITY,
628	/// bf16::NEG_ONE,
629	/// -bf16::MAX_SUBNORMAL,
630	/// bf16::NEG_ZERO,
631	/// bf16::ZERO,
632	/// bf16::MAX_SUBNORMAL,
633	/// bf16::MIN_POSITIVE,
634	/// bf16::ONE,
635	/// bf16::INFINITY,
636	/// bf16::NAN
637	/// ]
638	/// .iter()
639	/// )
640	/// .all(\|(a, b)\| a.to_bits() == b.to_bits()));
641	/// ```
642	// Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
643	#[inline]
644	#[must_use]
645	pub fn total_cmp(&self, other: &Self) -> Ordering {
646	let mut left = self.to_bits() as i16;
647	let mut right = other.to_bits() as i16;
648	left ^= (((left >> `15`) as u16) >> `1`) as i16;
649	right ^= (((right >> `15`) as u16) >> `1`) as i16;
650	left.cmp(&right)
651	}
652
653	/// Alternate serialize adapter for serializing as a float.
654	///
655	/// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
656	/// implementation that serializes as an [`f32`] value. It is designed for use with
657	/// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
658	/// the default deserialize implementation.
659	///
660	/// # Examples
661	///
662	/// A demonstration on how to use this adapater:
663	///
664	/// ```
665	/// use serde::{Serialize, Deserialize};
666	/// use half::bf16;
667	///
668	/// #[derive(Serialize, Deserialize)]
669	/// struct MyStruct {
670	/// #[serde(serialize_with = "bf16::serialize_as_f32")]
671	/// value: bf16 // Will be serialized as f32 instead of u16
672	/// }
673	/// ```
674	#[cfg(feature = "serde")]
675	pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
676	serializer.serialize_f32(self.to_f32())
677	}
678
679	/// Alternate serialize adapter for serializing as a string.
680	///
681	/// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
682	/// implementation that serializes as a string value. It is designed for use with
683	/// `serialize_with` serde attributes. Deserialization from string values is already supported
684	/// by the default deserialize implementation.
685	///
686	/// # Examples
687	///
688	/// A demonstration on how to use this adapater:
689	///
690	/// ```
691	/// use serde::{Serialize, Deserialize};
692	/// use half::bf16;
693	///
694	/// #[derive(Serialize, Deserialize)]
695	/// struct MyStruct {
696	/// #[serde(serialize_with = "bf16::serialize_as_string")]
697	/// value: bf16 // Will be serialized as a string instead of u16
698	/// }
699	/// ```
700	#[cfg(all(feature = "serde", feature = "alloc"))]
701	pub fn serialize_as_string<S: serde::Serializer>(
702	&self,
703	serializer: S,
704	) -> Result<S::Ok, S::Error> {
705	serializer.serialize_str(&self.to_string())
706	}
707
708	/// Approximate number of [`struct@bf16`] significant digits in base 10
709	pub const DIGITS: u32 = `2`;
710	/// [`struct@bf16`]
711	/// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
712	///
713	/// This is the difference between 1.0 and the next largest representable number.
714	pub const EPSILON: bf16 = bf16(`0x3C00u16`);
715	/// [`struct@bf16`] positive Infinity (+∞)
716	pub const INFINITY: bf16 = bf16(`0x7F80u16`);
717	/// Number of [`struct@bf16`] significant digits in base 2
718	pub const MANTISSA_DIGITS: u32 = `8`;
719	/// Largest finite [`struct@bf16`] value
720	pub const MAX: bf16 = bf16(`0x7F7F`);
721	/// Maximum possible [`struct@bf16`] power of 10 exponent
722	pub const MAX_10_EXP: i32 = `38`;
723	/// Maximum possible [`struct@bf16`] power of 2 exponent
724	pub const MAX_EXP: i32 = `128`;
725	/// Smallest finite [`struct@bf16`] value
726	pub const MIN: bf16 = bf16(`0xFF7F`);
727	/// Minimum possible normal [`struct@bf16`] power of 10 exponent
728	pub const MIN_10_EXP: i32 = `-37`;
729	/// One greater than the minimum possible normal [`struct@bf16`] power of 2 exponent
730	pub const MIN_EXP: i32 = `-125`;
731	/// Smallest positive normal [`struct@bf16`] value
732	pub const MIN_POSITIVE: bf16 = bf16(`0x0080u16`);
733	/// [`struct@bf16`] Not a Number (NaN)
734	pub const NAN: bf16 = bf16(`0x7FC0u16`);
735	/// [`struct@bf16`] negative infinity (-∞).
736	pub const NEG_INFINITY: bf16 = bf16(`0xFF80u16`);
737	/// The radix or base of the internal representation of [`struct@bf16`]
738	pub const RADIX: u32 = `2`;
739
740	/// Minimum positive subnormal [`struct@bf16`] value
741	pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(`0x0001u16`);
742	/// Maximum subnormal [`struct@bf16`] value
743	pub const MAX_SUBNORMAL: bf16 = bf16(`0x007Fu16`);
744
745	/// [`struct@bf16`] 1
746	pub const ONE: bf16 = bf16(`0x3F80u16`);
747	/// [`struct@bf16`] 0
748	pub const ZERO: bf16 = bf16(`0x0000u16`);
749	/// [`struct@bf16`] -0
750	pub const NEG_ZERO: bf16 = bf16(`0x8000u16`);
751	/// [`struct@bf16`] -1
752	pub const NEG_ONE: bf16 = bf16(`0xBF80u16`);
753
754	/// [`struct@bf16`] Euler's number (ℯ)
755	pub const E: bf16 = bf16(`0x402Eu16`);
756	/// [`struct@bf16`] Archimedes' constant (π)
757	pub const PI: bf16 = bf16(`0x4049u16`);
758	/// [`struct@bf16`] 1/π
759	pub const FRAC_1_PI: bf16 = bf16(`0x3EA3u16`);
760	/// [`struct@bf16`] 1/√2
761	pub const FRAC_1_SQRT_2: bf16 = bf16(`0x3F35u16`);
762	/// [`struct@bf16`] 2/π
763	pub const FRAC_2_PI: bf16 = bf16(`0x3F23u16`);
764	/// [`struct@bf16`] 2/√π
765	pub const FRAC_2_SQRT_PI: bf16 = bf16(`0x3F90u16`);
766	/// [`struct@bf16`] π/2
767	pub const FRAC_PI_2: bf16 = bf16(`0x3FC9u16`);
768	/// [`struct@bf16`] π/3
769	pub const FRAC_PI_3: bf16 = bf16(`0x3F86u16`);
770	/// [`struct@bf16`] π/4
771	pub const FRAC_PI_4: bf16 = bf16(`0x3F49u16`);
772	/// [`struct@bf16`] π/6
773	pub const FRAC_PI_6: bf16 = bf16(`0x3F06u16`);
774	/// [`struct@bf16`] π/8
775	pub const FRAC_PI_8: bf16 = bf16(`0x3EC9u16`);
776	/// [`struct@bf16`] 𝗅𝗇 10
777	pub const LN_10: bf16 = bf16(`0x4013u16`);
778	/// [`struct@bf16`] 𝗅𝗇 2
779	pub const LN_2: bf16 = bf16(`0x3F31u16`);
780	/// [`struct@bf16`] 𝗅𝗈𝗀₁₀ℯ
781	pub const LOG10_E: bf16 = bf16(`0x3EDEu16`);
782	/// [`struct@bf16`] 𝗅𝗈𝗀₁₀2
783	pub const LOG10_2: bf16 = bf16(`0x3E9Au16`);
784	/// [`struct@bf16`] 𝗅𝗈𝗀₂ℯ
785	pub const LOG2_E: bf16 = bf16(`0x3FB9u16`);
786	/// [`struct@bf16`] 𝗅𝗈𝗀₂10
787	pub const LOG2_10: bf16 = bf16(`0x4055u16`);
788	/// [`struct@bf16`] √2
789	pub const SQRT_2: bf16 = bf16(`0x3FB5u16`);
790	}
791
792	impl From<bf16> for f32 {
793	#[inline]
794	fn from(x: bf16) -> f32 {
795	x.to_f32()
796	}
797	}
798
799	impl From<bf16> for f64 {
800	#[inline]
801	fn from(x: bf16) -> f64 {
802	x.to_f64()
803	}
804	}
805
806	impl From<i8> for bf16 {
807	#[inline]
808	fn from(x: i8) -> bf16 {
809	// Convert to f32, then to bf16
810	bf16::from_f32(f32::from(x))
811	}
812	}
813
814	impl From<u8> for bf16 {
815	#[inline]
816	fn from(x: u8) -> bf16 {
817	// Convert to f32, then to f16
818	bf16::from_f32(f32::from(x))
819	}
820	}
821
822	impl PartialEq for bf16 {
823	fn eq(&self, other: &bf16) -> bool {
824	if self.is_nan() \|\| other.is_nan() {
825	`false`
826	} else {
827	(self.0 == other.0) \|\| ((self.0 \| other.0) & `0x7FFFu16` == `0`)
828	}
829	}
830	}
831
832	impl PartialOrd for bf16 {
833	fn partial_cmp(&self, other: &bf16) -> Option<Ordering> {
834	if self.is_nan() \|\| other.is_nan() {
835	None
836	} else {
837	let neg = self.0 & `0x8000u16` != `0`;
838	let other_neg = other.0 & `0x8000u16` != `0`;
839	match (neg, other_neg) {
840	(`false`, `false`) => Some(self.0.cmp(&other.0)),
841	(`false`, `true`) => {
842	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
843	Some(Ordering::Equal)
844	} else {
845	Some(Ordering::Greater)
846	}
847	}
848	(`true`, `false`) => {
849	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
850	Some(Ordering::Equal)
851	} else {
852	Some(Ordering::Less)
853	}
854	}
855	(`true`, `true`) => Some(other.0.cmp(&self.0)),
856	}
857	}
858	}
859
860	fn lt(&self, other: &bf16) -> bool {
861	if self.is_nan() \|\| other.is_nan() {
862	`false`
863	} else {
864	let neg = self.0 & `0x8000u16` != `0`;
865	let other_neg = other.0 & `0x8000u16` != `0`;
866	match (neg, other_neg) {
867	(`false`, `false`) => self.0 < other.0,
868	(`false`, `true`) => `false`,
869	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
870	(`true`, `true`) => self.0 > other.0,
871	}
872	}
873	}
874
875	fn le(&self, other: &bf16) -> bool {
876	if self.is_nan() \|\| other.is_nan() {
877	`false`
878	} else {
879	let neg = self.0 & `0x8000u16` != `0`;
880	let other_neg = other.0 & `0x8000u16` != `0`;
881	match (neg, other_neg) {
882	(`false`, `false`) => self.0 <= other.0,
883	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
884	(`true`, `false`) => `true`,
885	(`true`, `true`) => self.0 >= other.0,
886	}
887	}
888	}
889
890	fn gt(&self, other: &bf16) -> bool {
891	if self.is_nan() \|\| other.is_nan() {
892	`false`
893	} else {
894	let neg = self.0 & `0x8000u16` != `0`;
895	let other_neg = other.0 & `0x8000u16` != `0`;
896	match (neg, other_neg) {
897	(`false`, `false`) => self.0 > other.0,
898	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
899	(`true`, `false`) => `false`,
900	(`true`, `true`) => self.0 < other.0,
901	}
902	}
903	}
904
905	fn ge(&self, other: &bf16) -> bool {
906	if self.is_nan() \|\| other.is_nan() {
907	`false`
908	} else {
909	let neg = self.0 & `0x8000u16` != `0`;
910	let other_neg = other.0 & `0x8000u16` != `0`;
911	match (neg, other_neg) {
912	(`false`, `false`) => self.0 >= other.0,
913	(`false`, `true`) => `true`,
914	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
915	(`true`, `true`) => self.0 <= other.0,
916	}
917	}
918	}
919	}
920
921	#[cfg(not(target_arch = "spirv"))]
922	impl FromStr for bf16 {
923	type Err = ParseFloatError;
924	fn from_str(src: &str) -> Result<bf16, ParseFloatError> {
925	f32::from_str(src).map(op:bf16::from_f32)
926	}
927	}
928
929	#[cfg(not(target_arch = "spirv"))]
930	impl Debug for bf16 {
931	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
932	Debug::fmt(&self.to_f32(), f)
933	}
934	}
935
936	#[cfg(not(target_arch = "spirv"))]
937	impl Display for bf16 {
938	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
939	Display::fmt(&self.to_f32(), f)
940	}
941	}
942
943	#[cfg(not(target_arch = "spirv"))]
944	impl LowerExp for bf16 {
945	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
946	write!(f, "{:e}", self.to_f32())
947	}
948	}
949
950	#[cfg(not(target_arch = "spirv"))]
951	impl UpperExp for bf16 {
952	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
953	write!(f, "{:E}", self.to_f32())
954	}
955	}
956
957	#[cfg(not(target_arch = "spirv"))]
958	impl Binary for bf16 {
959	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
960	write!(f, "{:b}", self.0)
961	}
962	}
963
964	#[cfg(not(target_arch = "spirv"))]
965	impl Octal for bf16 {
966	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
967	write!(f, "{:o}", self.0)
968	}
969	}
970
971	#[cfg(not(target_arch = "spirv"))]
972	impl LowerHex for bf16 {
973	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
974	write!(f, "{:x}", self.0)
975	}
976	}
977
978	#[cfg(not(target_arch = "spirv"))]
979	impl UpperHex for bf16 {
980	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
981	write!(f, "{:X}", self.0)
982	}
983	}
984
985	impl Neg for bf16 {
986	type Output = Self;
987
988	fn neg(self) -> Self::Output {
989	Self(self.0 ^ `0x8000`)
990	}
991	}
992
993	impl Neg for &bf16 {
994	type Output = <bf16 as Neg>::Output;
995
996	#[inline]
997	fn neg(self) -> Self::Output {
998	Neg::neg(*self)
999	}
1000	}
1001
1002	impl Add for bf16 {
1003	type Output = Self;
1004
1005	fn add(self, rhs: Self) -> Self::Output {
1006	Self::from_f32(Self::to_f32(self) + Self::to_f32(self:rhs))
1007	}
1008	}
1009
1010	impl Add<&bf16> for bf16 {
1011	type Output = <bf16 as Add<bf16>>::Output;
1012
1013	#[inline]
1014	fn add(self, rhs: &bf16) -> Self::Output {
1015	self.add(*rhs)
1016	}
1017	}
1018
1019	impl Add<&bf16> for &bf16 {
1020	type Output = <bf16 as Add<bf16>>::Output;
1021
1022	#[inline]
1023	fn add(self, rhs: &bf16) -> Self::Output {
1024	(self).add(rhs)
1025	}
1026	}
1027
1028	impl Add<bf16> for &bf16 {
1029	type Output = <bf16 as Add<bf16>>::Output;
1030
1031	#[inline]
1032	fn add(self, rhs: bf16) -> Self::Output {
1033	(*self).add(rhs)
1034	}
1035	}
1036
1037	impl AddAssign for bf16 {
1038	#[inline]
1039	fn add_assign(&mut self, rhs: Self) {
1040	self = (self).add(rhs);
1041	}
1042	}
1043
1044	impl AddAssign<&bf16> for bf16 {
1045	#[inline]
1046	fn add_assign(&mut self, rhs: &bf16) {
1047	self = (self).add(rhs);
1048	}
1049	}
1050
1051	impl Sub for bf16 {
1052	type Output = Self;
1053
1054	fn sub(self, rhs: Self) -> Self::Output {
1055	Self::from_f32(Self::to_f32(self) - Self::to_f32(self:rhs))
1056	}
1057	}
1058
1059	impl Sub<&bf16> for bf16 {
1060	type Output = <bf16 as Sub<bf16>>::Output;
1061
1062	#[inline]
1063	fn sub(self, rhs: &bf16) -> Self::Output {
1064	self.sub(*rhs)
1065	}
1066	}
1067
1068	impl Sub<&bf16> for &bf16 {
1069	type Output = <bf16 as Sub<bf16>>::Output;
1070
1071	#[inline]
1072	fn sub(self, rhs: &bf16) -> Self::Output {
1073	(self).sub(rhs)
1074	}
1075	}
1076
1077	impl Sub<bf16> for &bf16 {
1078	type Output = <bf16 as Sub<bf16>>::Output;
1079
1080	#[inline]
1081	fn sub(self, rhs: bf16) -> Self::Output {
1082	(*self).sub(rhs)
1083	}
1084	}
1085
1086	impl SubAssign for bf16 {
1087	#[inline]
1088	fn sub_assign(&mut self, rhs: Self) {
1089	self = (self).sub(rhs);
1090	}
1091	}
1092
1093	impl SubAssign<&bf16> for bf16 {
1094	#[inline]
1095	fn sub_assign(&mut self, rhs: &bf16) {
1096	self = (self).sub(rhs);
1097	}
1098	}
1099
1100	impl Mul for bf16 {
1101	type Output = Self;
1102
1103	fn mul(self, rhs: Self) -> Self::Output {
1104	Self::from_f32(Self::to_f32(self) * Self::to_f32(self:rhs))
1105	}
1106	}
1107
1108	impl Mul<&bf16> for bf16 {
1109	type Output = <bf16 as Mul<bf16>>::Output;
1110
1111	#[inline]
1112	fn mul(self, rhs: &bf16) -> Self::Output {
1113	self.mul(*rhs)
1114	}
1115	}
1116
1117	impl Mul<&bf16> for &bf16 {
1118	type Output = <bf16 as Mul<bf16>>::Output;
1119
1120	#[inline]
1121	fn mul(self, rhs: &bf16) -> Self::Output {
1122	(self).mul(rhs)
1123	}
1124	}
1125
1126	impl Mul<bf16> for &bf16 {
1127	type Output = <bf16 as Mul<bf16>>::Output;
1128
1129	#[inline]
1130	fn mul(self, rhs: bf16) -> Self::Output {
1131	(*self).mul(rhs)
1132	}
1133	}
1134
1135	impl MulAssign for bf16 {
1136	#[inline]
1137	fn mul_assign(&mut self, rhs: Self) {
1138	self = (self).mul(rhs);
1139	}
1140	}
1141
1142	impl MulAssign<&bf16> for bf16 {
1143	#[inline]
1144	fn mul_assign(&mut self, rhs: &bf16) {
1145	self = (self).mul(rhs);
1146	}
1147	}
1148
1149	impl Div for bf16 {
1150	type Output = Self;
1151
1152	fn div(self, rhs: Self) -> Self::Output {
1153	Self::from_f32(Self::to_f32(self) / Self::to_f32(self:rhs))
1154	}
1155	}
1156
1157	impl Div<&bf16> for bf16 {
1158	type Output = <bf16 as Div<bf16>>::Output;
1159
1160	#[inline]
1161	fn div(self, rhs: &bf16) -> Self::Output {
1162	self.div(*rhs)
1163	}
1164	}
1165
1166	impl Div<&bf16> for &bf16 {
1167	type Output = <bf16 as Div<bf16>>::Output;
1168
1169	#[inline]
1170	fn div(self, rhs: &bf16) -> Self::Output {
1171	(self).div(rhs)
1172	}
1173	}
1174
1175	impl Div<bf16> for &bf16 {
1176	type Output = <bf16 as Div<bf16>>::Output;
1177
1178	#[inline]
1179	fn div(self, rhs: bf16) -> Self::Output {
1180	(*self).div(rhs)
1181	}
1182	}
1183
1184	impl DivAssign for bf16 {
1185	#[inline]
1186	fn div_assign(&mut self, rhs: Self) {
1187	self = (self).div(rhs);
1188	}
1189	}
1190
1191	impl DivAssign<&bf16> for bf16 {
1192	#[inline]
1193	fn div_assign(&mut self, rhs: &bf16) {
1194	self = (self).div(rhs);
1195	}
1196	}
1197
1198	impl Rem for bf16 {
1199	type Output = Self;
1200
1201	fn rem(self, rhs: Self) -> Self::Output {
1202	Self::from_f32(Self::to_f32(self) % Self::to_f32(self:rhs))
1203	}
1204	}
1205
1206	impl Rem<&bf16> for bf16 {
1207	type Output = <bf16 as Rem<bf16>>::Output;
1208
1209	#[inline]
1210	fn rem(self, rhs: &bf16) -> Self::Output {
1211	self.rem(*rhs)
1212	}
1213	}
1214
1215	impl Rem<&bf16> for &bf16 {
1216	type Output = <bf16 as Rem<bf16>>::Output;
1217
1218	#[inline]
1219	fn rem(self, rhs: &bf16) -> Self::Output {
1220	(self).rem(rhs)
1221	}
1222	}
1223
1224	impl Rem<bf16> for &bf16 {
1225	type Output = <bf16 as Rem<bf16>>::Output;
1226
1227	#[inline]
1228	fn rem(self, rhs: bf16) -> Self::Output {
1229	(*self).rem(rhs)
1230	}
1231	}
1232
1233	impl RemAssign for bf16 {
1234	#[inline]
1235	fn rem_assign(&mut self, rhs: Self) {
1236	self = (self).rem(rhs);
1237	}
1238	}
1239
1240	impl RemAssign<&bf16> for bf16 {
1241	#[inline]
1242	fn rem_assign(&mut self, rhs: &bf16) {
1243	self = (self).rem(rhs);
1244	}
1245	}
1246
1247	impl Product for bf16 {
1248	#[inline]
1249	fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1250	bf16::from_f32(iter.map(\|f: bf16\| f.to_f32()).product())
1251	}
1252	}
1253
1254	impl<'a> Product<&'a bf16> for bf16 {
1255	#[inline]
1256	fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1257	bf16::from_f32(iter.map(\|f: &'a bf16\| f.to_f32()).product())
1258	}
1259	}
1260
1261	impl Sum for bf16 {
1262	#[inline]
1263	fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1264	bf16::from_f32(iter.map(\|f: bf16\| f.to_f32()).sum())
1265	}
1266	}
1267
1268	impl<'a> Sum<&'a bf16> for bf16 {
1269	#[inline]
1270	fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1271	bf16::from_f32(iter.map(\|f: &'a bf16\| f.to_f32()).sum())
1272	}
1273	}
1274
1275	#[cfg(feature = "serde")]
1276	struct Visitor;
1277
1278	#[cfg(feature = "serde")]
1279	impl<'de> Deserialize<'de> for bf16 {
1280	fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error>
1281	where
1282	D: serde::de::Deserializer<'de>,
1283	{
1284	deserializer.deserialize_newtype_struct("bf16", Visitor)
1285	}
1286	}
1287
1288	#[cfg(feature = "serde")]
1289	impl<'de> serde::de::Visitor<'de> for Visitor {
1290	type Value = bf16;
1291
1292	fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1293	write!(formatter, "tuple struct bf16")
1294	}
1295
1296	fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1297	where
1298	D: serde::Deserializer<'de>,
1299	{
1300	Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?))
1301	}
1302
1303	fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1304	where
1305	E: serde::de::Error,
1306	{
1307	v.parse().map_err(\|_\| {
1308	serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1309	})
1310	}
1311
1312	fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1313	where
1314	E: serde::de::Error,
1315	{
1316	Ok(bf16::from_f32(v))
1317	}
1318
1319	fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1320	where
1321	E: serde::de::Error,
1322	{
1323	Ok(bf16::from_f64(v))
1324	}
1325	}
1326
1327	#[allow(
1328	clippy::cognitive_complexity,
1329	clippy::float_cmp,
1330	clippy::neg_cmp_op_on_partial_ord
1331	)]
1332	#[cfg(test)]
1333	mod test {
1334	use super::*;
1335	#[allow(unused_imports)]
1336	use core::cmp::Ordering;
1337	#[cfg(feature = "num-traits")]
1338	use num_traits::{AsPrimitive, FromBytes, FromPrimitive, ToBytes, ToPrimitive};
1339	use quickcheck_macros::quickcheck;
1340
1341	#[cfg(feature = "num-traits")]
1342	#[test]
1343	fn as_primitive() {
1344	let two = bf16::from_f32(`2.0`);
1345	assert_eq!(<i32 as AsPrimitive<bf16>>::as_(`2`), two);
1346	assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), `2`);
1347
1348	assert_eq!(<f32 as AsPrimitive<bf16>>::as_(`2.0`), two);
1349	assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), `2.0`);
1350
1351	assert_eq!(<f64 as AsPrimitive<bf16>>::as_(`2.0`), two);
1352	assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), `2.0`);
1353	}
1354
1355	#[cfg(feature = "num-traits")]
1356	#[test]
1357	fn to_primitive() {
1358	let two = bf16::from_f32(`2.0`);
1359	assert_eq!(ToPrimitive::to_i32(&two).unwrap(), `2i32`);
1360	assert_eq!(ToPrimitive::to_f32(&two).unwrap(), `2.0f32`);
1361	assert_eq!(ToPrimitive::to_f64(&two).unwrap(), `2.0f64`);
1362	}
1363
1364	#[cfg(feature = "num-traits")]
1365	#[test]
1366	fn from_primitive() {
1367	let two = bf16::from_f32(`2.0`);
1368	assert_eq!(<bf16 as FromPrimitive>::from_i32(`2`).unwrap(), two);
1369	assert_eq!(<bf16 as FromPrimitive>::from_f32(`2.0`).unwrap(), two);
1370	assert_eq!(<bf16 as FromPrimitive>::from_f64(`2.0`).unwrap(), two);
1371	}
1372
1373	#[cfg(feature = "num-traits")]
1374	#[test]
1375	fn to_and_from_bytes() {
1376	let two = bf16::from_f32(`2.0`);
1377	assert_eq!(<bf16 as ToBytes>::to_le_bytes(&two), [`0`, `64`]);
1378	assert_eq!(<bf16 as FromBytes>::from_le_bytes(&[`0`, `64`]), two);
1379	assert_eq!(<bf16 as ToBytes>::to_be_bytes(&two), [`64`, `0`]);
1380	assert_eq!(<bf16 as FromBytes>::from_be_bytes(&[`64`, `0`]), two);
1381	}
1382
1383	#[test]
1384	fn test_bf16_consts_from_f32() {
1385	let one = bf16::from_f32(`1.0`);
1386	let zero = bf16::from_f32(`0.0`);
1387	let neg_zero = bf16::from_f32(`-0.0`);
1388	let neg_one = bf16::from_f32(`-1.0`);
1389	let inf = bf16::from_f32(core::f32::INFINITY);
1390	let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY);
1391	let nan = bf16::from_f32(core::f32::NAN);
1392
1393	assert_eq!(bf16::ONE, one);
1394	assert_eq!(bf16::ZERO, zero);
1395	assert!(zero.is_sign_positive());
1396	assert_eq!(bf16::NEG_ZERO, neg_zero);
1397	assert!(neg_zero.is_sign_negative());
1398	assert_eq!(bf16::NEG_ONE, neg_one);
1399	assert!(neg_one.is_sign_negative());
1400	assert_eq!(bf16::INFINITY, inf);
1401	assert_eq!(bf16::NEG_INFINITY, neg_inf);
1402	assert!(nan.is_nan());
1403	assert!(bf16::NAN.is_nan());
1404
1405	let e = bf16::from_f32(core::f32::consts::E);
1406	let pi = bf16::from_f32(core::f32::consts::PI);
1407	let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI);
1408	let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1409	let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI);
1410	let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1411	let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2);
1412	let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3);
1413	let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4);
1414	let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6);
1415	let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8);
1416	let ln_10 = bf16::from_f32(core::f32::consts::LN_10);
1417	let ln_2 = bf16::from_f32(core::f32::consts::LN_2);
1418	let log10_e = bf16::from_f32(core::f32::consts::LOG10_E);
1419	// core::f32::consts::LOG10_2 requires rustc 1.43.0
1420	let log10_2 = bf16::from_f32(`2f32`.log10());
1421	let log2_e = bf16::from_f32(core::f32::consts::LOG2_E);
1422	// core::f32::consts::LOG2_10 requires rustc 1.43.0
1423	let log2_10 = bf16::from_f32(`10f32`.log2());
1424	let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2);
1425
1426	assert_eq!(bf16::E, e);
1427	assert_eq!(bf16::PI, pi);
1428	assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1429	assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1430	assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1431	assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1432	assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1433	assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1434	assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1435	assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1436	assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1437	assert_eq!(bf16::LN_10, ln_10);
1438	assert_eq!(bf16::LN_2, ln_2);
1439	assert_eq!(bf16::LOG10_E, log10_e);
1440	assert_eq!(bf16::LOG10_2, log10_2);
1441	assert_eq!(bf16::LOG2_E, log2_e);
1442	assert_eq!(bf16::LOG2_10, log2_10);
1443	assert_eq!(bf16::SQRT_2, sqrt_2);
1444	}
1445
1446	#[test]
1447	fn test_bf16_consts_from_f64() {
1448	let one = bf16::from_f64(`1.0`);
1449	let zero = bf16::from_f64(`0.0`);
1450	let neg_zero = bf16::from_f64(`-0.0`);
1451	let inf = bf16::from_f64(core::f64::INFINITY);
1452	let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY);
1453	let nan = bf16::from_f64(core::f64::NAN);
1454
1455	assert_eq!(bf16::ONE, one);
1456	assert_eq!(bf16::ZERO, zero);
1457	assert_eq!(bf16::NEG_ZERO, neg_zero);
1458	assert_eq!(bf16::INFINITY, inf);
1459	assert_eq!(bf16::NEG_INFINITY, neg_inf);
1460	assert!(nan.is_nan());
1461	assert!(bf16::NAN.is_nan());
1462
1463	let e = bf16::from_f64(core::f64::consts::E);
1464	let pi = bf16::from_f64(core::f64::consts::PI);
1465	let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI);
1466	let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1467	let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI);
1468	let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1469	let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2);
1470	let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3);
1471	let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4);
1472	let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6);
1473	let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8);
1474	let ln_10 = bf16::from_f64(core::f64::consts::LN_10);
1475	let ln_2 = bf16::from_f64(core::f64::consts::LN_2);
1476	let log10_e = bf16::from_f64(core::f64::consts::LOG10_E);
1477	// core::f64::consts::LOG10_2 requires rustc 1.43.0
1478	let log10_2 = bf16::from_f64(`2f64`.log10());
1479	let log2_e = bf16::from_f64(core::f64::consts::LOG2_E);
1480	// core::f64::consts::LOG2_10 requires rustc 1.43.0
1481	let log2_10 = bf16::from_f64(`10f64`.log2());
1482	let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2);
1483
1484	assert_eq!(bf16::E, e);
1485	assert_eq!(bf16::PI, pi);
1486	assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1487	assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1488	assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1489	assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1490	assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1491	assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1492	assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1493	assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1494	assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1495	assert_eq!(bf16::LN_10, ln_10);
1496	assert_eq!(bf16::LN_2, ln_2);
1497	assert_eq!(bf16::LOG10_E, log10_e);
1498	assert_eq!(bf16::LOG10_2, log10_2);
1499	assert_eq!(bf16::LOG2_E, log2_e);
1500	assert_eq!(bf16::LOG2_10, log2_10);
1501	assert_eq!(bf16::SQRT_2, sqrt_2);
1502	}
1503
1504	#[test]
1505	fn test_nan_conversion_to_smaller() {
1506	let nan64 = f64::from_bits(`0x7FF0_0000_0000_0001u64`);
1507	let neg_nan64 = f64::from_bits(`0xFFF0_0000_0000_0001u64`);
1508	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1509	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1510	let nan32_from_64 = nan64 as f32;
1511	let neg_nan32_from_64 = neg_nan64 as f32;
1512	let nan16_from_64 = bf16::from_f64(nan64);
1513	let neg_nan16_from_64 = bf16::from_f64(neg_nan64);
1514	let nan16_from_32 = bf16::from_f32(nan32);
1515	let neg_nan16_from_32 = bf16::from_f32(neg_nan32);
1516
1517	assert!(nan64.is_nan() && nan64.is_sign_positive());
1518	assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1519	assert!(nan32.is_nan() && nan32.is_sign_positive());
1520	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1521
1522	// f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1523	assert!(neg_nan32_from_64.is_nan());
1524	assert!(nan32_from_64.is_nan());
1525	assert!(nan16_from_64.is_nan());
1526	assert!(neg_nan16_from_64.is_nan());
1527	assert!(nan16_from_32.is_nan());
1528	assert!(neg_nan16_from_32.is_nan());
1529	}
1530
1531	#[test]
1532	fn test_nan_conversion_to_larger() {
1533	let nan16 = bf16::from_bits(`0x7F81u16`);
1534	let neg_nan16 = bf16::from_bits(`0xFF81u16`);
1535	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1536	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1537	let nan32_from_16 = f32::from(nan16);
1538	let neg_nan32_from_16 = f32::from(neg_nan16);
1539	let nan64_from_16 = f64::from(nan16);
1540	let neg_nan64_from_16 = f64::from(neg_nan16);
1541	let nan64_from_32 = f64::from(nan32);
1542	let neg_nan64_from_32 = f64::from(neg_nan32);
1543
1544	assert!(nan16.is_nan() && nan16.is_sign_positive());
1545	assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1546	assert!(nan32.is_nan() && nan32.is_sign_positive());
1547	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1548
1549	// // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1550	assert!(nan32_from_16.is_nan());
1551	assert!(neg_nan32_from_16.is_nan());
1552	assert!(nan64_from_16.is_nan());
1553	assert!(neg_nan64_from_16.is_nan());
1554	assert!(nan64_from_32.is_nan());
1555	assert!(neg_nan64_from_32.is_nan());
1556	}
1557
1558	#[test]
1559	fn test_bf16_to_f32() {
1560	let f = bf16::from_f32(`7.0`);
1561	assert_eq!(f.to_f32(), `7.0f32`);
1562
1563	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1564	let f = bf16::from_f32(`7.1`);
1565	let diff = (f.to_f32() - `7.1f32`).abs();
1566	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1567	assert!(diff <= `4.0` * bf16::EPSILON.to_f32());
1568
1569	let tiny32 = f32::from_bits(`0x0001_0000u32`);
1570	assert_eq!(bf16::from_bits(`0x0001`).to_f32(), tiny32);
1571	assert_eq!(bf16::from_bits(`0x0005`).to_f32(), `5.0` * tiny32);
1572
1573	assert_eq!(bf16::from_bits(`0x0001`), bf16::from_f32(tiny32));
1574	assert_eq!(bf16::from_bits(`0x0005`), bf16::from_f32(`5.0` * tiny32));
1575	}
1576
1577	#[test]
1578	fn test_bf16_to_f64() {
1579	let f = bf16::from_f64(`7.0`);
1580	assert_eq!(f.to_f64(), `7.0f64`);
1581
1582	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1583	let f = bf16::from_f64(`7.1`);
1584	let diff = (f.to_f64() - `7.1f64`).abs();
1585	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1586	assert!(diff <= `4.0` * bf16::EPSILON.to_f64());
1587
1588	let tiny64 = `2.0f64`.powi(`-133`);
1589	assert_eq!(bf16::from_bits(`0x0001`).to_f64(), tiny64);
1590	assert_eq!(bf16::from_bits(`0x0005`).to_f64(), `5.0` * tiny64);
1591
1592	assert_eq!(bf16::from_bits(`0x0001`), bf16::from_f64(tiny64));
1593	assert_eq!(bf16::from_bits(`0x0005`), bf16::from_f64(`5.0` * tiny64));
1594	}
1595
1596	#[test]
1597	fn test_comparisons() {
1598	let zero = bf16::from_f64(`0.0`);
1599	let one = bf16::from_f64(`1.0`);
1600	let neg_zero = bf16::from_f64(`-0.0`);
1601	let neg_one = bf16::from_f64(`-1.0`);
1602
1603	assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1604	assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1605	assert!(zero == neg_zero);
1606	assert!(neg_zero == zero);
1607	assert!(!(zero != neg_zero));
1608	assert!(!(neg_zero != zero));
1609	assert!(!(zero < neg_zero));
1610	assert!(!(neg_zero < zero));
1611	assert!(zero <= neg_zero);
1612	assert!(neg_zero <= zero);
1613	assert!(!(zero > neg_zero));
1614	assert!(!(neg_zero > zero));
1615	assert!(zero >= neg_zero);
1616	assert!(neg_zero >= zero);
1617
1618	assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1619	assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1620	assert!(!(one == neg_zero));
1621	assert!(!(neg_zero == one));
1622	assert!(one != neg_zero);
1623	assert!(neg_zero != one);
1624	assert!(!(one < neg_zero));
1625	assert!(neg_zero < one);
1626	assert!(!(one <= neg_zero));
1627	assert!(neg_zero <= one);
1628	assert!(one > neg_zero);
1629	assert!(!(neg_zero > one));
1630	assert!(one >= neg_zero);
1631	assert!(!(neg_zero >= one));
1632
1633	assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1634	assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1635	assert!(!(one == neg_one));
1636	assert!(!(neg_one == one));
1637	assert!(one != neg_one);
1638	assert!(neg_one != one);
1639	assert!(!(one < neg_one));
1640	assert!(neg_one < one);
1641	assert!(!(one <= neg_one));
1642	assert!(neg_one <= one);
1643	assert!(one > neg_one);
1644	assert!(!(neg_one > one));
1645	assert!(one >= neg_one);
1646	assert!(!(neg_one >= one));
1647	}
1648
1649	#[test]
1650	#[allow(clippy::erasing_op, clippy::identity_op)]
1651	fn round_to_even_f32() {
1652	// smallest positive subnormal = 0b0.0000_001 2^-126 = 2^-133*
1653	let min_sub = bf16::from_bits(`1`);
1654	let min_sub_f = (`-133f32`).exp2();
1655	assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1656	assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1657
1658	// 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1659	// 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1660	// 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1661	assert_eq!(
1662	bf16::from_f32(min_sub_f * `0.49`).to_bits(),
1663	min_sub.to_bits() * `0`
1664	);
1665	assert_eq!(
1666	bf16::from_f32(min_sub_f * `0.50`).to_bits(),
1667	min_sub.to_bits() * `0`
1668	);
1669	assert_eq!(
1670	bf16::from_f32(min_sub_f * `0.51`).to_bits(),
1671	min_sub.to_bits() * `1`
1672	);
1673
1674	// 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1675	// 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1676	// 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1677	assert_eq!(
1678	bf16::from_f32(min_sub_f * `1.49`).to_bits(),
1679	min_sub.to_bits() * `1`
1680	);
1681	assert_eq!(
1682	bf16::from_f32(min_sub_f * `1.50`).to_bits(),
1683	min_sub.to_bits() * `2`
1684	);
1685	assert_eq!(
1686	bf16::from_f32(min_sub_f * `1.51`).to_bits(),
1687	min_sub.to_bits() * `2`
1688	);
1689
1690	// 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1691	// 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1692	// 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1693	assert_eq!(
1694	bf16::from_f32(min_sub_f * `2.49`).to_bits(),
1695	min_sub.to_bits() * `2`
1696	);
1697	assert_eq!(
1698	bf16::from_f32(min_sub_f * `2.50`).to_bits(),
1699	min_sub.to_bits() * `2`
1700	);
1701	assert_eq!(
1702	bf16::from_f32(min_sub_f * `2.51`).to_bits(),
1703	min_sub.to_bits() * `3`
1704	);
1705
1706	assert_eq!(
1707	bf16::from_f32(`250.49f32`).to_bits(),
1708	bf16::from_f32(`250.0`).to_bits()
1709	);
1710	assert_eq!(
1711	bf16::from_f32(`250.50f32`).to_bits(),
1712	bf16::from_f32(`250.0`).to_bits()
1713	);
1714	assert_eq!(
1715	bf16::from_f32(`250.51f32`).to_bits(),
1716	bf16::from_f32(`251.0`).to_bits()
1717	);
1718	assert_eq!(
1719	bf16::from_f32(`251.49f32`).to_bits(),
1720	bf16::from_f32(`251.0`).to_bits()
1721	);
1722	assert_eq!(
1723	bf16::from_f32(`251.50f32`).to_bits(),
1724	bf16::from_f32(`252.0`).to_bits()
1725	);
1726	assert_eq!(
1727	bf16::from_f32(`251.51f32`).to_bits(),
1728	bf16::from_f32(`252.0`).to_bits()
1729	);
1730	assert_eq!(
1731	bf16::from_f32(`252.49f32`).to_bits(),
1732	bf16::from_f32(`252.0`).to_bits()
1733	);
1734	assert_eq!(
1735	bf16::from_f32(`252.50f32`).to_bits(),
1736	bf16::from_f32(`252.0`).to_bits()
1737	);
1738	assert_eq!(
1739	bf16::from_f32(`252.51f32`).to_bits(),
1740	bf16::from_f32(`253.0`).to_bits()
1741	);
1742	}
1743
1744	#[test]
1745	#[allow(clippy::erasing_op, clippy::identity_op)]
1746	fn round_to_even_f64() {
1747	// smallest positive subnormal = 0b0.0000_001 2^-126 = 2^-133*
1748	let min_sub = bf16::from_bits(`1`);
1749	let min_sub_f = (`-133f64`).exp2();
1750	assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1751	assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1752
1753	// 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1754	// 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1755	// 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1756	assert_eq!(
1757	bf16::from_f64(min_sub_f * `0.49`).to_bits(),
1758	min_sub.to_bits() * `0`
1759	);
1760	assert_eq!(
1761	bf16::from_f64(min_sub_f * `0.50`).to_bits(),
1762	min_sub.to_bits() * `0`
1763	);
1764	assert_eq!(
1765	bf16::from_f64(min_sub_f * `0.51`).to_bits(),
1766	min_sub.to_bits() * `1`
1767	);
1768
1769	// 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1770	// 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1771	// 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1772	assert_eq!(
1773	bf16::from_f64(min_sub_f * `1.49`).to_bits(),
1774	min_sub.to_bits() * `1`
1775	);
1776	assert_eq!(
1777	bf16::from_f64(min_sub_f * `1.50`).to_bits(),
1778	min_sub.to_bits() * `2`
1779	);
1780	assert_eq!(
1781	bf16::from_f64(min_sub_f * `1.51`).to_bits(),
1782	min_sub.to_bits() * `2`
1783	);
1784
1785	// 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1786	// 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1787	// 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1788	assert_eq!(
1789	bf16::from_f64(min_sub_f * `2.49`).to_bits(),
1790	min_sub.to_bits() * `2`
1791	);
1792	assert_eq!(
1793	bf16::from_f64(min_sub_f * `2.50`).to_bits(),
1794	min_sub.to_bits() * `2`
1795	);
1796	assert_eq!(
1797	bf16::from_f64(min_sub_f * `2.51`).to_bits(),
1798	min_sub.to_bits() * `3`
1799	);
1800
1801	assert_eq!(
1802	bf16::from_f64(`250.49f64`).to_bits(),
1803	bf16::from_f64(`250.0`).to_bits()
1804	);
1805	assert_eq!(
1806	bf16::from_f64(`250.50f64`).to_bits(),
1807	bf16::from_f64(`250.0`).to_bits()
1808	);
1809	assert_eq!(
1810	bf16::from_f64(`250.51f64`).to_bits(),
1811	bf16::from_f64(`251.0`).to_bits()
1812	);
1813	assert_eq!(
1814	bf16::from_f64(`251.49f64`).to_bits(),
1815	bf16::from_f64(`251.0`).to_bits()
1816	);
1817	assert_eq!(
1818	bf16::from_f64(`251.50f64`).to_bits(),
1819	bf16::from_f64(`252.0`).to_bits()
1820	);
1821	assert_eq!(
1822	bf16::from_f64(`251.51f64`).to_bits(),
1823	bf16::from_f64(`252.0`).to_bits()
1824	);
1825	assert_eq!(
1826	bf16::from_f64(`252.49f64`).to_bits(),
1827	bf16::from_f64(`252.0`).to_bits()
1828	);
1829	assert_eq!(
1830	bf16::from_f64(`252.50f64`).to_bits(),
1831	bf16::from_f64(`252.0`).to_bits()
1832	);
1833	assert_eq!(
1834	bf16::from_f64(`252.51f64`).to_bits(),
1835	bf16::from_f64(`253.0`).to_bits()
1836	);
1837	}
1838
1839	#[cfg(feature = "std")]
1840	#[test]
1841	fn formatting() {
1842	let f = bf16::from_f32(`0.1152344`);
1843
1844	assert_eq!(format!("{:.3}", f), "0.115");
1845	assert_eq!(format!("{:.4}", f), "0.1152");
1846	assert_eq!(format!("{:+.4}", f), "+0.1152");
1847	assert_eq!(format!("{:>+10.4}", f), " +0.1152");
1848
1849	assert_eq!(format!("{:.3?}", f), "0.115");
1850	assert_eq!(format!("{:.4?}", f), "0.1152");
1851	assert_eq!(format!("{:+.4?}", f), "+0.1152");
1852	assert_eq!(format!("{:>+10.4?}", f), " +0.1152");
1853	}
1854
1855	impl quickcheck::Arbitrary for bf16 {
1856	fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1857	bf16(u16::arbitrary(g))
1858	}
1859	}
1860
1861	#[quickcheck]
1862	fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool {
1863	let roundtrip = bf16::from_f32(f.to_f32());
1864	if f.is_nan() {
1865	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1866	} else {
1867	f.0 == roundtrip.0
1868	}
1869	}
1870
1871	#[quickcheck]
1872	fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool {
1873	let roundtrip = bf16::from_f64(f.to_f64());
1874	if f.is_nan() {
1875	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1876	} else {
1877	f.0 == roundtrip.0
1878	}
1879	}
1880	}
1881