binary16.rs source code [crates/half/src/binary16.rs]

1	#[cfg(all(feature = "serde", feature = "alloc"))]
2	#[allow(unused_imports)]
3	use alloc::string::ToString;
4	#[cfg(feature = "bytemuck")]
5	use bytemuck::{Pod, Zeroable};
6	use core::{
7	cmp::Ordering,
8	iter::{Product, Sum},
9	num::FpCategory,
10	ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11	};
12	#[cfg(not(target_arch = "spirv"))]
13	use core::{
14	fmt::{
15	Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16	},
17	num::ParseFloatError,
18	str::FromStr,
19	};
20	#[cfg(feature = "serde")]
21	use serde::{Deserialize, Serialize};
22	#[cfg(feature = "zerocopy")]
23	use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
24
25	pub(crate) mod arch;
26
27	/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a "half"
28	/// format.
29	///
30	/// This 16-bit floating point type is intended for efficient storage where the full range and
31	/// precision of a larger floating point value is not required.
32	///
33	/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
34	#[allow(non_camel_case_types)]
35	#[derive(Clone, Copy, Default)]
36	#[repr(transparent)]
37	#[cfg_attr(feature = "serde", derive(Serialize))]
38	#[cfg_attr(
39	feature = "rkyv",
40	derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
41	)]
42	#[cfg_attr(feature = "rkyv", rkyv(resolver = F16Resolver))]
43	#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
44	#[cfg_attr(
45	feature = "zerocopy",
46	derive(FromBytes, Immutable, IntoBytes, KnownLayout)
47	)]
48	#[cfg_attr(kani, derive(kani::Arbitrary))]
49	#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
50	pub struct f16(u16);
51
52	impl f16 {
53	/// Constructs a 16-bit floating point value from the raw bits.
54	#[inline]
55	#[must_use]
56	pub const fn from_bits(bits: u16) -> f16 {
57	f16(bits)
58	}
59
60	/// Constructs a 16-bit floating point value from a 32-bit floating point value.
61	///
62	/// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
63	/// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
64	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
65	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
66	/// 16-bit value.
67	#[inline]
68	#[must_use]
69	pub fn from_f32(value: f32) -> f16 {
70	f16(arch::f32_to_f16(value))
71	}
72
73	/// Constructs a 16-bit floating point value from a 32-bit floating point value.
74	///
75	/// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
76	/// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
77	/// in any non-`const` context.
78	///
79	/// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
80	/// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
81	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
82	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
83	/// 16-bit value.
84	#[inline]
85	#[must_use]
86	pub const fn from_f32_const(value: f32) -> f16 {
87	f16(arch::f32_to_f16_fallback(value))
88	}
89
90	/// Constructs a 16-bit floating point value from a 64-bit floating point value.
91	///
92	/// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
93	/// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
94	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
95	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
96	/// 16-bit value.
97	#[inline]
98	#[must_use]
99	pub fn from_f64(value: f64) -> f16 {
100	f16(arch::f64_to_f16(value))
101	}
102
103	/// Constructs a 16-bit floating point value from a 64-bit floating point value.
104	///
105	/// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
106	/// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
107	/// in any non-`const` context.
108	///
109	/// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
110	/// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
111	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
112	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
113	/// 16-bit value.
114	#[inline]
115	#[must_use]
116	pub const fn from_f64_const(value: f64) -> f16 {
117	f16(arch::f64_to_f16_fallback(value))
118	}
119
120	/// Converts a [`struct@f16`] into the underlying bit representation.
121	#[inline]
122	#[must_use]
123	pub const fn to_bits(self) -> u16 {
124	self.0
125	}
126
127	/// Returns the memory representation of the underlying bit representation as a byte array in
128	/// little-endian byte order.
129	///
130	/// # Examples
131	///
132	/// ```rust
133	/// # use half::prelude::*;
134	/// let bytes = f16::from_f32(`12.5`).to_le_bytes();
135	/// assert_eq!(bytes, [`0x40`, `0x4A`]);
136	/// ```
137	#[inline]
138	#[must_use]
139	pub const fn to_le_bytes(self) -> [u8; `2`] {
140	self.0.to_le_bytes()
141	}
142
143	/// Returns the memory representation of the underlying bit representation as a byte array in
144	/// big-endian (network) byte order.
145	///
146	/// # Examples
147	///
148	/// ```rust
149	/// # use half::prelude::*;
150	/// let bytes = f16::from_f32(`12.5`).to_be_bytes();
151	/// assert_eq!(bytes, [`0x4A`, `0x40`]);
152	/// ```
153	#[inline]
154	#[must_use]
155	pub const fn to_be_bytes(self) -> [u8; `2`] {
156	self.0.to_be_bytes()
157	}
158
159	/// Returns the memory representation of the underlying bit representation as a byte array in
160	/// native byte order.
161	///
162	/// As the target platform's native endianness is used, portable code should use
163	/// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate,
164	/// instead.
165	///
166	/// # Examples
167	///
168	/// ```rust
169	/// # use half::prelude::*;
170	/// let bytes = f16::from_f32(`12.5`).to_ne_bytes();
171	/// assert_eq!(bytes, if cfg!(target_endian = "big") {
172	/// [`0x4A`, `0x40`]
173	/// } else {
174	/// [`0x40`, `0x4A`]
175	/// });
176	/// ```
177	#[inline]
178	#[must_use]
179	pub const fn to_ne_bytes(self) -> [u8; `2`] {
180	self.0.to_ne_bytes()
181	}
182
183	/// Creates a floating point value from its representation as a byte array in little endian.
184	///
185	/// # Examples
186	///
187	/// ```rust
188	/// # use half::prelude::*;
189	/// let value = f16::from_le_bytes([`0x40`, `0x4A`]);
190	/// assert_eq!(value, f16::from_f32(`12.5`));
191	/// ```
192	#[inline]
193	#[must_use]
194	pub const fn from_le_bytes(bytes: [u8; `2`]) -> f16 {
195	f16::from_bits(u16::from_le_bytes(bytes))
196	}
197
198	/// Creates a floating point value from its representation as a byte array in big endian.
199	///
200	/// # Examples
201	///
202	/// ```rust
203	/// # use half::prelude::*;
204	/// let value = f16::from_be_bytes([`0x4A`, `0x40`]);
205	/// assert_eq!(value, f16::from_f32(`12.5`));
206	/// ```
207	#[inline]
208	#[must_use]
209	pub const fn from_be_bytes(bytes: [u8; `2`]) -> f16 {
210	f16::from_bits(u16::from_be_bytes(bytes))
211	}
212
213	/// Creates a floating point value from its representation as a byte array in native endian.
214	///
215	/// As the target platform's native endianness is used, portable code likely wants to use
216	/// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as
217	/// appropriate instead.
218	///
219	/// # Examples
220	///
221	/// ```rust
222	/// # use half::prelude::*;
223	/// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") {
224	/// [`0x4A`, `0x40`]
225	/// } else {
226	/// [`0x40`, `0x4A`]
227	/// });
228	/// assert_eq!(value, f16::from_f32(`12.5`));
229	/// ```
230	#[inline]
231	#[must_use]
232	pub const fn from_ne_bytes(bytes: [u8; `2`]) -> f16 {
233	f16::from_bits(u16::from_ne_bytes(bytes))
234	}
235
236	/// Converts a [`struct@f16`] value into a `f32` value.
237	///
238	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
239	/// in 32-bit floating point.
240	#[inline]
241	#[must_use]
242	pub fn to_f32(self) -> f32 {
243	arch::f16_to_f32(self.0)
244	}
245
246	/// Converts a [`struct@f16`] value into a `f32` value.
247	///
248	/// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
249	/// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
250	/// in any non-`const` context.
251	///
252	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
253	/// in 32-bit floating point.
254	#[inline]
255	#[must_use]
256	pub const fn to_f32_const(self) -> f32 {
257	arch::f16_to_f32_fallback(self.0)
258	}
259
260	/// Converts a [`struct@f16`] value into a `f64` value.
261	///
262	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
263	/// in 64-bit floating point.
264	#[inline]
265	#[must_use]
266	pub fn to_f64(self) -> f64 {
267	arch::f16_to_f64(self.0)
268	}
269
270	/// Converts a [`struct@f16`] value into a `f64` value.
271	///
272	/// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
273	/// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
274	/// in any non-`const` context.
275	///
276	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
277	/// in 64-bit floating point.
278	#[inline]
279	#[must_use]
280	pub const fn to_f64_const(self) -> f64 {
281	arch::f16_to_f64_fallback(self.0)
282	}
283
284	/// Returns `true` if this value is `NaN` and `false` otherwise.
285	///
286	/// # Examples
287	///
288	/// ```rust
289	/// # use half::prelude::*;
290	///
291	/// let nan = f16::NAN;
292	/// let f = f16::from_f32(`7.0_f32`);
293	///
294	/// assert!(nan.is_nan());
295	/// assert!(!f.is_nan());
296	/// ```
297	#[inline]
298	#[must_use]
299	pub const fn is_nan(self) -> bool {
300	self.0 & `0x7FFFu16` > `0x7C00u16`
301	}
302
303	/// Returns `true` if this value is ±∞ and `false`.
304	/// otherwise.
305	///
306	/// # Examples
307	///
308	/// ```rust
309	/// # use half::prelude::*;
310	///
311	/// let f = f16::from_f32(`7.0f32`);
312	/// let inf = f16::INFINITY;
313	/// let neg_inf = f16::NEG_INFINITY;
314	/// let nan = f16::NAN;
315	///
316	/// assert!(!f.is_infinite());
317	/// assert!(!nan.is_infinite());
318	///
319	/// assert!(inf.is_infinite());
320	/// assert!(neg_inf.is_infinite());
321	/// ```
322	#[inline]
323	#[must_use]
324	pub const fn is_infinite(self) -> bool {
325	self.0 & `0x7FFFu16` == `0x7C00u16`
326	}
327
328	/// Returns `true` if this number is neither infinite nor `NaN`.
329	///
330	/// # Examples
331	///
332	/// ```rust
333	/// # use half::prelude::*;
334	///
335	/// let f = f16::from_f32(`7.0f32`);
336	/// let inf = f16::INFINITY;
337	/// let neg_inf = f16::NEG_INFINITY;
338	/// let nan = f16::NAN;
339	///
340	/// assert!(f.is_finite());
341	///
342	/// assert!(!nan.is_finite());
343	/// assert!(!inf.is_finite());
344	/// assert!(!neg_inf.is_finite());
345	/// ```
346	#[inline]
347	#[must_use]
348	pub const fn is_finite(self) -> bool {
349	self.0 & `0x7C00u16` != `0x7C00u16`
350	}
351
352	/// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`.
353	///
354	/// # Examples
355	///
356	/// ```rust
357	/// # use half::prelude::*;
358	///
359	/// let min = f16::MIN_POSITIVE;
360	/// let max = f16::MAX;
361	/// let lower_than_min = f16::from_f32(`1.0e-10_f32`);
362	/// let zero = f16::from_f32(`0.0_f32`);
363	///
364	/// assert!(min.is_normal());
365	/// assert!(max.is_normal());
366	///
367	/// assert!(!zero.is_normal());
368	/// assert!(!f16::NAN.is_normal());
369	/// assert!(!f16::INFINITY.is_normal());
370	/// // Values between `0` and `min` are Subnormal.
371	/// assert!(!lower_than_min.is_normal());
372	/// ```
373	#[inline]
374	#[must_use]
375	pub const fn is_normal(self) -> bool {
376	let exp = self.0 & `0x7C00u16`;
377	exp != `0x7C00u16` && exp != `0`
378	}
379
380	/// Returns the floating point category of the number.
381	///
382	/// If only one property is going to be tested, it is generally faster to use the specific
383	/// predicate instead.
384	///
385	/// # Examples
386	///
387	/// ```rust
388	/// use std::num::FpCategory;
389	/// # use half::prelude::*;
390	///
391	/// let num = f16::from_f32(`12.4_f32`);
392	/// let inf = f16::INFINITY;
393	///
394	/// assert_eq!(num.classify(), FpCategory::Normal);
395	/// assert_eq!(inf.classify(), FpCategory::Infinite);
396	/// ```
397	#[must_use]
398	pub const fn classify(self) -> FpCategory {
399	let exp = self.0 & `0x7C00u16`;
400	let man = self.0 & `0x03FFu16`;
401	match (exp, man) {
402	(`0`, `0`) => FpCategory::Zero,
403	(`0`, _) => FpCategory::Subnormal,
404	(`0x7C00u16`, `0`) => FpCategory::Infinite,
405	(`0x7C00u16`, _) => FpCategory::Nan,
406	_ => FpCategory::Normal,
407	}
408	}
409
410	/// Returns a number that represents the sign of `self`.
411	///
412	/// `1.0` if the number is positive, `+0.0` or* [`INFINITY`][f16::INFINITY]
413	/// `-1.0` if the number is negative, `-0.0` or* [`NEG_INFINITY`][f16::NEG_INFINITY]
414	/// * [`NAN`][f16::NAN] if the number is `NaN`
415	///
416	/// # Examples
417	///
418	/// ```rust
419	/// # use half::prelude::*;
420	///
421	/// let f = f16::from_f32(`3.5_f32`);
422	///
423	/// assert_eq!(f.signum(), f16::from_f32(`1.0`));
424	/// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-`1.0`));
425	///
426	/// assert!(f16::NAN.signum().is_nan());
427	/// ```
428	#[must_use]
429	pub const fn signum(self) -> f16 {
430	if self.is_nan() {
431	self
432	} else if self.0 & `0x8000u16` != `0` {
433	Self::NEG_ONE
434	} else {
435	Self::ONE
436	}
437	}
438
439	/// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a
440	/// positive sign bit and +∞.
441	///
442	/// # Examples
443	///
444	/// ```rust
445	/// # use half::prelude::*;
446	///
447	/// let nan = f16::NAN;
448	/// let f = f16::from_f32(`7.0_f32`);
449	/// let g = f16::from_f32(`-7.0_f32`);
450	///
451	/// assert!(f.is_sign_positive());
452	/// assert!(!g.is_sign_positive());
453	/// // `NaN` can be either positive or negative
454	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
455	/// ```
456	#[inline]
457	#[must_use]
458	pub const fn is_sign_positive(self) -> bool {
459	self.0 & `0x8000u16` == `0`
460	}
461
462	/// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a
463	/// negative sign bit and −∞.
464	///
465	/// # Examples
466	///
467	/// ```rust
468	/// # use half::prelude::*;
469	///
470	/// let nan = f16::NAN;
471	/// let f = f16::from_f32(`7.0f32`);
472	/// let g = f16::from_f32(`-7.0f32`);
473	///
474	/// assert!(!f.is_sign_negative());
475	/// assert!(g.is_sign_negative());
476	/// // `NaN` can be either positive or negative
477	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
478	/// ```
479	#[inline]
480	#[must_use]
481	pub const fn is_sign_negative(self) -> bool {
482	self.0 & `0x8000u16` != `0`
483	}
484
485	/// Returns a number composed of the magnitude of `self` and the sign of `sign`.
486	///
487	/// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
488	/// If `self` is NaN, then NaN with the sign of `sign` is returned.
489	///
490	/// # Examples
491	///
492	/// ```
493	/// # use half::prelude::*;
494	/// let f = f16::from_f32(`3.5`);
495	///
496	/// assert_eq!(f.copysign(f16::from_f32(`0.42`)), f16::from_f32(`3.5`));
497	/// assert_eq!(f.copysign(f16::from_f32(-`0.42`)), f16::from_f32(-`3.5`));
498	/// assert_eq!((-f).copysign(f16::from_f32(`0.42`)), f16::from_f32(`3.5`));
499	/// assert_eq!((-f).copysign(f16::from_f32(-`0.42`)), f16::from_f32(-`3.5`));
500	///
501	/// assert!(f16::NAN.copysign(f16::from_f32(`1.0`)).is_nan());
502	/// ```
503	#[inline]
504	#[must_use]
505	pub const fn copysign(self, sign: f16) -> f16 {
506	f16((sign.0 & `0x8000u16`) \| (self.0 & `0x7FFFu16`))
507	}
508
509	/// Returns the maximum of the two numbers.
510	///
511	/// If one of the arguments is NaN, then the other argument is returned.
512	///
513	/// # Examples
514	///
515	/// ```
516	/// # use half::prelude::*;
517	/// let x = f16::from_f32(`1.0`);
518	/// let y = f16::from_f32(`2.0`);
519	///
520	/// assert_eq!(x.max(y), y);
521	/// ```
522	#[inline]
523	#[must_use]
524	pub fn max(self, other: f16) -> f16 {
525	if other > self && !other.is_nan() {
526	other
527	} else {
528	self
529	}
530	}
531
532	/// Returns the minimum of the two numbers.
533	///
534	/// If one of the arguments is NaN, then the other argument is returned.
535	///
536	/// # Examples
537	///
538	/// ```
539	/// # use half::prelude::*;
540	/// let x = f16::from_f32(`1.0`);
541	/// let y = f16::from_f32(`2.0`);
542	///
543	/// assert_eq!(x.min(y), x);
544	/// ```
545	#[inline]
546	#[must_use]
547	pub fn min(self, other: f16) -> f16 {
548	if other < self && !other.is_nan() {
549	other
550	} else {
551	self
552	}
553	}
554
555	/// Restrict a value to a certain interval unless it is NaN.
556	///
557	/// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
558	/// Otherwise this returns `self`.
559	///
560	/// Note that this function returns NaN if the initial value was NaN as well.
561	///
562	/// # Panics
563	/// Panics if `min > max`, `min` is NaN, or `max` is NaN.
564	///
565	/// # Examples
566	///
567	/// ```
568	/// # use half::prelude::*;
569	/// assert!(f16::from_f32(-`3.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(-`2.0`));
570	/// assert!(f16::from_f32(`0.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(`0.0`));
571	/// assert!(f16::from_f32(`2.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(`1.0`));
572	/// assert!(f16::NAN.clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)).is_nan());
573	/// ```
574	#[inline]
575	#[must_use]
576	pub fn clamp(self, min: f16, max: f16) -> f16 {
577	assert!(min <= max);
578	let mut x = self;
579	if x < min {
580	x = min;
581	}
582	if x > max {
583	x = max;
584	}
585	x
586	}
587
588	/// Returns the ordering between `self` and `other`.
589	///
590	/// Unlike the standard partial comparison between floating point numbers,
591	/// this comparison always produces an ordering in accordance to
592	/// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
593	/// floating point standard. The values are ordered in the following sequence:
594	///
595	/// - negative quiet NaN
596	/// - negative signaling NaN
597	/// - negative infinity
598	/// - negative numbers
599	/// - negative subnormal numbers
600	/// - negative zero
601	/// - positive zero
602	/// - positive subnormal numbers
603	/// - positive numbers
604	/// - positive infinity
605	/// - positive signaling NaN
606	/// - positive quiet NaN.
607	///
608	/// The ordering established by this function does not always agree with the
609	/// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example,
610	/// they consider negative and positive zero equal, while `total_cmp`
611	/// doesn't.
612	///
613	/// The interpretation of the signaling NaN bit follows the definition in
614	/// the IEEE 754 standard, which may not match the interpretation by some of
615	/// the older, non-conformant (e.g. MIPS) hardware implementations.
616	///
617	/// # Examples
618	/// ```
619	/// # use half::f16;
620	/// let mut v: Vec<f16> = vec![];
621	/// v.push(f16::ONE);
622	/// v.push(f16::INFINITY);
623	/// v.push(f16::NEG_INFINITY);
624	/// v.push(f16::NAN);
625	/// v.push(f16::MAX_SUBNORMAL);
626	/// v.push(-f16::MAX_SUBNORMAL);
627	/// v.push(f16::ZERO);
628	/// v.push(f16::NEG_ZERO);
629	/// v.push(f16::NEG_ONE);
630	/// v.push(f16::MIN_POSITIVE);
631	///
632	/// v.sort_by(\|a, b\| a.total_cmp(&b));
633	///
634	/// assert!(v
635	/// .into_iter()
636	/// .zip(
637	/// [
638	/// f16::NEG_INFINITY,
639	/// f16::NEG_ONE,
640	/// -f16::MAX_SUBNORMAL,
641	/// f16::NEG_ZERO,
642	/// f16::ZERO,
643	/// f16::MAX_SUBNORMAL,
644	/// f16::MIN_POSITIVE,
645	/// f16::ONE,
646	/// f16::INFINITY,
647	/// f16::NAN
648	/// ]
649	/// .iter()
650	/// )
651	/// .all(\|(a, b)\| a.to_bits() == b.to_bits()));
652	/// ```
653	// Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
654	#[inline]
655	#[must_use]
656	pub fn total_cmp(&self, other: &Self) -> Ordering {
657	let mut left = self.to_bits() as i16;
658	let mut right = other.to_bits() as i16;
659	left ^= (((left >> `15`) as u16) >> `1`) as i16;
660	right ^= (((right >> `15`) as u16) >> `1`) as i16;
661	left.cmp(&right)
662	}
663
664	/// Alternate serialize adapter for serializing as a float.
665	///
666	/// By default, [`struct@f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
667	/// implementation that serializes as an [`f32`] value. It is designed for use with
668	/// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
669	/// the default deserialize implementation.
670	///
671	/// # Examples
672	///
673	/// A demonstration on how to use this adapater:
674	///
675	/// ```
676	/// use serde::{Serialize, Deserialize};
677	/// use half::f16;
678	///
679	/// #[derive(Serialize, Deserialize)]
680	/// struct MyStruct {
681	/// #[serde(serialize_with = "f16::serialize_as_f32")]
682	/// value: f16 // Will be serialized as f32 instead of u16
683	/// }
684	/// ```
685	#[cfg(feature = "serde")]
686	pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
687	serializer.serialize_f32(self.to_f32())
688	}
689
690	/// Alternate serialize adapter for serializing as a string.
691	///
692	/// By default, [`struct@f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
693	/// implementation that serializes as a string value. It is designed for use with
694	/// `serialize_with` serde attributes. Deserialization from string values is already supported
695	/// by the default deserialize implementation.
696	///
697	/// # Examples
698	///
699	/// A demonstration on how to use this adapater:
700	///
701	/// ```
702	/// use serde::{Serialize, Deserialize};
703	/// use half::f16;
704	///
705	/// #[derive(Serialize, Deserialize)]
706	/// struct MyStruct {
707	/// #[serde(serialize_with = "f16::serialize_as_string")]
708	/// value: f16 // Will be serialized as a string instead of u16
709	/// }
710	/// ```
711	#[cfg(all(feature = "serde", feature = "alloc"))]
712	pub fn serialize_as_string<S: serde::Serializer>(
713	&self,
714	serializer: S,
715	) -> Result<S::Ok, S::Error> {
716	serializer.serialize_str(&self.to_string())
717	}
718
719	/// Approximate number of [`struct@f16`] significant digits in base 10
720	pub const DIGITS: u32 = `3`;
721	/// [`struct@f16`]
722	/// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
723	///
724	/// This is the difference between 1.0 and the next largest representable number.
725	pub const EPSILON: f16 = f16(`0x1400u16`);
726	/// [`struct@f16`] positive Infinity (+∞)
727	pub const INFINITY: f16 = f16(`0x7C00u16`);
728	/// Number of [`struct@f16`] significant digits in base 2
729	pub const MANTISSA_DIGITS: u32 = `11`;
730	/// Largest finite [`struct@f16`] value
731	pub const MAX: f16 = f16(`0x7BFF`);
732	/// Maximum possible [`struct@f16`] power of 10 exponent
733	pub const MAX_10_EXP: i32 = `4`;
734	/// Maximum possible [`struct@f16`] power of 2 exponent
735	pub const MAX_EXP: i32 = `16`;
736	/// Smallest finite [`struct@f16`] value
737	pub const MIN: f16 = f16(`0xFBFF`);
738	/// Minimum possible normal [`struct@f16`] power of 10 exponent
739	pub const MIN_10_EXP: i32 = `-4`;
740	/// One greater than the minimum possible normal [`struct@f16`] power of 2 exponent
741	pub const MIN_EXP: i32 = `-13`;
742	/// Smallest positive normal [`struct@f16`] value
743	pub const MIN_POSITIVE: f16 = f16(`0x0400u16`);
744	/// [`struct@f16`] Not a Number (NaN)
745	pub const NAN: f16 = f16(`0x7E00u16`);
746	/// [`struct@f16`] negative infinity (-∞)
747	pub const NEG_INFINITY: f16 = f16(`0xFC00u16`);
748	/// The radix or base of the internal representation of [`struct@f16`]
749	pub const RADIX: u32 = `2`;
750
751	/// Minimum positive subnormal [`struct@f16`] value
752	pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(`0x0001u16`);
753	/// Maximum subnormal [`struct@f16`] value
754	pub const MAX_SUBNORMAL: f16 = f16(`0x03FFu16`);
755
756	/// [`struct@f16`] 1
757	pub const ONE: f16 = f16(`0x3C00u16`);
758	/// [`struct@f16`] 0
759	pub const ZERO: f16 = f16(`0x0000u16`);
760	/// [`struct@f16`] -0
761	pub const NEG_ZERO: f16 = f16(`0x8000u16`);
762	/// [`struct@f16`] -1
763	pub const NEG_ONE: f16 = f16(`0xBC00u16`);
764
765	/// [`struct@f16`] Euler's number (ℯ)
766	pub const E: f16 = f16(`0x4170u16`);
767	/// [`struct@f16`] Archimedes' constant (π)
768	pub const PI: f16 = f16(`0x4248u16`);
769	/// [`struct@f16`] 1/π
770	pub const FRAC_1_PI: f16 = f16(`0x3518u16`);
771	/// [`struct@f16`] 1/√2
772	pub const FRAC_1_SQRT_2: f16 = f16(`0x39A8u16`);
773	/// [`struct@f16`] 2/π
774	pub const FRAC_2_PI: f16 = f16(`0x3918u16`);
775	/// [`struct@f16`] 2/√π
776	pub const FRAC_2_SQRT_PI: f16 = f16(`0x3C83u16`);
777	/// [`struct@f16`] π/2
778	pub const FRAC_PI_2: f16 = f16(`0x3E48u16`);
779	/// [`struct@f16`] π/3
780	pub const FRAC_PI_3: f16 = f16(`0x3C30u16`);
781	/// [`struct@f16`] π/4
782	pub const FRAC_PI_4: f16 = f16(`0x3A48u16`);
783	/// [`struct@f16`] π/6
784	pub const FRAC_PI_6: f16 = f16(`0x3830u16`);
785	/// [`struct@f16`] π/8
786	pub const FRAC_PI_8: f16 = f16(`0x3648u16`);
787	/// [`struct@f16`] 𝗅𝗇 10
788	pub const LN_10: f16 = f16(`0x409Bu16`);
789	/// [`struct@f16`] 𝗅𝗇 2
790	pub const LN_2: f16 = f16(`0x398Cu16`);
791	/// [`struct@f16`] 𝗅𝗈𝗀₁₀ℯ
792	pub const LOG10_E: f16 = f16(`0x36F3u16`);
793	/// [`struct@f16`] 𝗅𝗈𝗀₁₀2
794	pub const LOG10_2: f16 = f16(`0x34D1u16`);
795	/// [`struct@f16`] 𝗅𝗈𝗀₂ℯ
796	pub const LOG2_E: f16 = f16(`0x3DC5u16`);
797	/// [`struct@f16`] 𝗅𝗈𝗀₂10
798	pub const LOG2_10: f16 = f16(`0x42A5u16`);
799	/// [`struct@f16`] √2
800	pub const SQRT_2: f16 = f16(`0x3DA8u16`);
801	}
802
803	impl From<f16> for f32 {
804	#[inline]
805	fn from(x: f16) -> f32 {
806	x.to_f32()
807	}
808	}
809
810	impl From<f16> for f64 {
811	#[inline]
812	fn from(x: f16) -> f64 {
813	x.to_f64()
814	}
815	}
816
817	impl From<i8> for f16 {
818	#[inline]
819	fn from(x: i8) -> f16 {
820	// Convert to f32, then to f16
821	f16::from_f32(f32::from(x))
822	}
823	}
824
825	impl From<u8> for f16 {
826	#[inline]
827	fn from(x: u8) -> f16 {
828	// Convert to f32, then to f16
829	f16::from_f32(f32::from(x))
830	}
831	}
832
833	impl PartialEq for f16 {
834	fn eq(&self, other: &f16) -> bool {
835	if self.is_nan() \|\| other.is_nan() {
836	`false`
837	} else {
838	(self.0 == other.0) \|\| ((self.0 \| other.0) & `0x7FFFu16` == `0`)
839	}
840	}
841	}
842
843	impl PartialOrd for f16 {
844	fn partial_cmp(&self, other: &f16) -> Option<Ordering> {
845	if self.is_nan() \|\| other.is_nan() {
846	None
847	} else {
848	let neg = self.0 & `0x8000u16` != `0`;
849	let other_neg = other.0 & `0x8000u16` != `0`;
850	match (neg, other_neg) {
851	(`false`, `false`) => Some(self.0.cmp(&other.0)),
852	(`false`, `true`) => {
853	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
854	Some(Ordering::Equal)
855	} else {
856	Some(Ordering::Greater)
857	}
858	}
859	(`true`, `false`) => {
860	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
861	Some(Ordering::Equal)
862	} else {
863	Some(Ordering::Less)
864	}
865	}
866	(`true`, `true`) => Some(other.0.cmp(&self.0)),
867	}
868	}
869	}
870
871	fn lt(&self, other: &f16) -> bool {
872	if self.is_nan() \|\| other.is_nan() {
873	`false`
874	} else {
875	let neg = self.0 & `0x8000u16` != `0`;
876	let other_neg = other.0 & `0x8000u16` != `0`;
877	match (neg, other_neg) {
878	(`false`, `false`) => self.0 < other.0,
879	(`false`, `true`) => `false`,
880	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
881	(`true`, `true`) => self.0 > other.0,
882	}
883	}
884	}
885
886	fn le(&self, other: &f16) -> bool {
887	if self.is_nan() \|\| other.is_nan() {
888	`false`
889	} else {
890	let neg = self.0 & `0x8000u16` != `0`;
891	let other_neg = other.0 & `0x8000u16` != `0`;
892	match (neg, other_neg) {
893	(`false`, `false`) => self.0 <= other.0,
894	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
895	(`true`, `false`) => `true`,
896	(`true`, `true`) => self.0 >= other.0,
897	}
898	}
899	}
900
901	fn gt(&self, other: &f16) -> bool {
902	if self.is_nan() \|\| other.is_nan() {
903	`false`
904	} else {
905	let neg = self.0 & `0x8000u16` != `0`;
906	let other_neg = other.0 & `0x8000u16` != `0`;
907	match (neg, other_neg) {
908	(`false`, `false`) => self.0 > other.0,
909	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
910	(`true`, `false`) => `false`,
911	(`true`, `true`) => self.0 < other.0,
912	}
913	}
914	}
915
916	fn ge(&self, other: &f16) -> bool {
917	if self.is_nan() \|\| other.is_nan() {
918	`false`
919	} else {
920	let neg = self.0 & `0x8000u16` != `0`;
921	let other_neg = other.0 & `0x8000u16` != `0`;
922	match (neg, other_neg) {
923	(`false`, `false`) => self.0 >= other.0,
924	(`false`, `true`) => `true`,
925	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
926	(`true`, `true`) => self.0 <= other.0,
927	}
928	}
929	}
930	}
931
932	#[cfg(not(target_arch = "spirv"))]
933	impl FromStr for f16 {
934	type Err = ParseFloatError;
935	fn from_str(src: &str) -> Result<f16, ParseFloatError> {
936	f32::from_str(src).map(op:f16::from_f32)
937	}
938	}
939
940	#[cfg(not(target_arch = "spirv"))]
941	impl Debug for f16 {
942	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
943	Debug::fmt(&self.to_f32(), f)
944	}
945	}
946
947	#[cfg(not(target_arch = "spirv"))]
948	impl Display for f16 {
949	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
950	Display::fmt(&self.to_f32(), f)
951	}
952	}
953
954	#[cfg(not(target_arch = "spirv"))]
955	impl LowerExp for f16 {
956	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
957	write!(f, "{:e}", self.to_f32())
958	}
959	}
960
961	#[cfg(not(target_arch = "spirv"))]
962	impl UpperExp for f16 {
963	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
964	write!(f, "{:E}", self.to_f32())
965	}
966	}
967
968	#[cfg(not(target_arch = "spirv"))]
969	impl Binary for f16 {
970	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
971	write!(f, "{:b}", self.0)
972	}
973	}
974
975	#[cfg(not(target_arch = "spirv"))]
976	impl Octal for f16 {
977	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
978	write!(f, "{:o}", self.0)
979	}
980	}
981
982	#[cfg(not(target_arch = "spirv"))]
983	impl LowerHex for f16 {
984	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
985	write!(f, "{:x}", self.0)
986	}
987	}
988
989	#[cfg(not(target_arch = "spirv"))]
990	impl UpperHex for f16 {
991	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
992	write!(f, "{:X}", self.0)
993	}
994	}
995
996	impl Neg for f16 {
997	type Output = Self;
998
999	#[inline]
1000	fn neg(self) -> Self::Output {
1001	Self(self.0 ^ `0x8000`)
1002	}
1003	}
1004
1005	impl Neg for &f16 {
1006	type Output = <f16 as Neg>::Output;
1007
1008	#[inline]
1009	fn neg(self) -> Self::Output {
1010	Neg::neg(*self)
1011	}
1012	}
1013
1014	impl Add for f16 {
1015	type Output = Self;
1016
1017	#[inline]
1018	fn add(self, rhs: Self) -> Self::Output {
1019	f16(arch::add_f16(self.0, b:rhs.0))
1020	}
1021	}
1022
1023	impl Add<&f16> for f16 {
1024	type Output = <f16 as Add<f16>>::Output;
1025
1026	#[inline]
1027	fn add(self, rhs: &f16) -> Self::Output {
1028	self.add(*rhs)
1029	}
1030	}
1031
1032	impl Add<&f16> for &f16 {
1033	type Output = <f16 as Add<f16>>::Output;
1034
1035	#[inline]
1036	fn add(self, rhs: &f16) -> Self::Output {
1037	(self).add(rhs)
1038	}
1039	}
1040
1041	impl Add<f16> for &f16 {
1042	type Output = <f16 as Add<f16>>::Output;
1043
1044	#[inline]
1045	fn add(self, rhs: f16) -> Self::Output {
1046	(*self).add(rhs)
1047	}
1048	}
1049
1050	impl AddAssign for f16 {
1051	#[inline]
1052	fn add_assign(&mut self, rhs: Self) {
1053	self = (self).add(rhs);
1054	}
1055	}
1056
1057	impl AddAssign<&f16> for f16 {
1058	#[inline]
1059	fn add_assign(&mut self, rhs: &f16) {
1060	self = (self).add(rhs);
1061	}
1062	}
1063
1064	impl Sub for f16 {
1065	type Output = Self;
1066
1067	#[inline]
1068	fn sub(self, rhs: Self) -> Self::Output {
1069	f16(arch::subtract_f16(self.0, b:rhs.0))
1070	}
1071	}
1072
1073	impl Sub<&f16> for f16 {
1074	type Output = <f16 as Sub<f16>>::Output;
1075
1076	#[inline]
1077	fn sub(self, rhs: &f16) -> Self::Output {
1078	self.sub(*rhs)
1079	}
1080	}
1081
1082	impl Sub<&f16> for &f16 {
1083	type Output = <f16 as Sub<f16>>::Output;
1084
1085	#[inline]
1086	fn sub(self, rhs: &f16) -> Self::Output {
1087	(self).sub(rhs)
1088	}
1089	}
1090
1091	impl Sub<f16> for &f16 {
1092	type Output = <f16 as Sub<f16>>::Output;
1093
1094	#[inline]
1095	fn sub(self, rhs: f16) -> Self::Output {
1096	(*self).sub(rhs)
1097	}
1098	}
1099
1100	impl SubAssign for f16 {
1101	#[inline]
1102	fn sub_assign(&mut self, rhs: Self) {
1103	self = (self).sub(rhs);
1104	}
1105	}
1106
1107	impl SubAssign<&f16> for f16 {
1108	#[inline]
1109	fn sub_assign(&mut self, rhs: &f16) {
1110	self = (self).sub(rhs);
1111	}
1112	}
1113
1114	impl Mul for f16 {
1115	type Output = Self;
1116
1117	#[inline]
1118	fn mul(self, rhs: Self) -> Self::Output {
1119	f16(arch::multiply_f16(self.0, b:rhs.0))
1120	}
1121	}
1122
1123	impl Mul<&f16> for f16 {
1124	type Output = <f16 as Mul<f16>>::Output;
1125
1126	#[inline]
1127	fn mul(self, rhs: &f16) -> Self::Output {
1128	self.mul(*rhs)
1129	}
1130	}
1131
1132	impl Mul<&f16> for &f16 {
1133	type Output = <f16 as Mul<f16>>::Output;
1134
1135	#[inline]
1136	fn mul(self, rhs: &f16) -> Self::Output {
1137	(self).mul(rhs)
1138	}
1139	}
1140
1141	impl Mul<f16> for &f16 {
1142	type Output = <f16 as Mul<f16>>::Output;
1143
1144	#[inline]
1145	fn mul(self, rhs: f16) -> Self::Output {
1146	(*self).mul(rhs)
1147	}
1148	}
1149
1150	impl MulAssign for f16 {
1151	#[inline]
1152	fn mul_assign(&mut self, rhs: Self) {
1153	self = (self).mul(rhs);
1154	}
1155	}
1156
1157	impl MulAssign<&f16> for f16 {
1158	#[inline]
1159	fn mul_assign(&mut self, rhs: &f16) {
1160	self = (self).mul(rhs);
1161	}
1162	}
1163
1164	impl Div for f16 {
1165	type Output = Self;
1166
1167	#[inline]
1168	fn div(self, rhs: Self) -> Self::Output {
1169	f16(arch::divide_f16(self.0, b:rhs.0))
1170	}
1171	}
1172
1173	impl Div<&f16> for f16 {
1174	type Output = <f16 as Div<f16>>::Output;
1175
1176	#[inline]
1177	fn div(self, rhs: &f16) -> Self::Output {
1178	self.div(*rhs)
1179	}
1180	}
1181
1182	impl Div<&f16> for &f16 {
1183	type Output = <f16 as Div<f16>>::Output;
1184
1185	#[inline]
1186	fn div(self, rhs: &f16) -> Self::Output {
1187	(self).div(rhs)
1188	}
1189	}
1190
1191	impl Div<f16> for &f16 {
1192	type Output = <f16 as Div<f16>>::Output;
1193
1194	#[inline]
1195	fn div(self, rhs: f16) -> Self::Output {
1196	(*self).div(rhs)
1197	}
1198	}
1199
1200	impl DivAssign for f16 {
1201	#[inline]
1202	fn div_assign(&mut self, rhs: Self) {
1203	self = (self).div(rhs);
1204	}
1205	}
1206
1207	impl DivAssign<&f16> for f16 {
1208	#[inline]
1209	fn div_assign(&mut self, rhs: &f16) {
1210	self = (self).div(rhs);
1211	}
1212	}
1213
1214	impl Rem for f16 {
1215	type Output = Self;
1216
1217	#[inline]
1218	fn rem(self, rhs: Self) -> Self::Output {
1219	f16(arch::remainder_f16(self.0, b:rhs.0))
1220	}
1221	}
1222
1223	impl Rem<&f16> for f16 {
1224	type Output = <f16 as Rem<f16>>::Output;
1225
1226	#[inline]
1227	fn rem(self, rhs: &f16) -> Self::Output {
1228	self.rem(*rhs)
1229	}
1230	}
1231
1232	impl Rem<&f16> for &f16 {
1233	type Output = <f16 as Rem<f16>>::Output;
1234
1235	#[inline]
1236	fn rem(self, rhs: &f16) -> Self::Output {
1237	(self).rem(rhs)
1238	}
1239	}
1240
1241	impl Rem<f16> for &f16 {
1242	type Output = <f16 as Rem<f16>>::Output;
1243
1244	#[inline]
1245	fn rem(self, rhs: f16) -> Self::Output {
1246	(*self).rem(rhs)
1247	}
1248	}
1249
1250	impl RemAssign for f16 {
1251	#[inline]
1252	fn rem_assign(&mut self, rhs: Self) {
1253	self = (self).rem(rhs);
1254	}
1255	}
1256
1257	impl RemAssign<&f16> for f16 {
1258	#[inline]
1259	fn rem_assign(&mut self, rhs: &f16) {
1260	self = (self).rem(rhs);
1261	}
1262	}
1263
1264	impl Product for f16 {
1265	#[inline]
1266	fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1267	f16(arch::product_f16(iter.map(\|f: f16\| f.to_bits())))
1268	}
1269	}
1270
1271	impl<'a> Product<&'a f16> for f16 {
1272	#[inline]
1273	fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1274	f16(arch::product_f16(iter.map(\|f: &'a f16\| f.to_bits())))
1275	}
1276	}
1277
1278	impl Sum for f16 {
1279	#[inline]
1280	fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1281	f16(arch::sum_f16(iter.map(\|f: f16\| f.to_bits())))
1282	}
1283	}
1284
1285	impl<'a> Sum<&'a f16> for f16 {
1286	#[inline]
1287	fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1288	f16(arch::sum_f16(iter.map(\|f: &'a f16\| f.to_bits())))
1289	}
1290	}
1291
1292	#[cfg(feature = "serde")]
1293	struct Visitor;
1294
1295	#[cfg(feature = "serde")]
1296	impl<'de> Deserialize<'de> for f16 {
1297	fn deserialize<D>(deserializer: D) -> Result<f16, D::Error>
1298	where
1299	D: serde::de::Deserializer<'de>,
1300	{
1301	deserializer.deserialize_newtype_struct("f16", Visitor)
1302	}
1303	}
1304
1305	#[cfg(feature = "serde")]
1306	impl<'de> serde::de::Visitor<'de> for Visitor {
1307	type Value = f16;
1308
1309	fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1310	write!(formatter, "tuple struct f16")
1311	}
1312
1313	fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1314	where
1315	D: serde::Deserializer<'de>,
1316	{
1317	Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?))
1318	}
1319
1320	fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1321	where
1322	E: serde::de::Error,
1323	{
1324	v.parse().map_err(\|_\| {
1325	serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1326	})
1327	}
1328
1329	fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1330	where
1331	E: serde::de::Error,
1332	{
1333	Ok(f16::from_f32(v))
1334	}
1335
1336	fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1337	where
1338	E: serde::de::Error,
1339	{
1340	Ok(f16::from_f64(v))
1341	}
1342	}
1343
1344	#[allow(
1345	clippy::cognitive_complexity,
1346	clippy::float_cmp,
1347	clippy::neg_cmp_op_on_partial_ord
1348	)]
1349	#[cfg(test)]
1350	mod test {
1351	use super::*;
1352	#[allow(unused_imports)]
1353	use core::cmp::Ordering;
1354	#[cfg(feature = "num-traits")]
1355	use num_traits::{AsPrimitive, FromBytes, FromPrimitive, ToBytes, ToPrimitive};
1356	use quickcheck_macros::quickcheck;
1357
1358	#[cfg(feature = "num-traits")]
1359	#[test]
1360	fn as_primitive() {
1361	let two = f16::from_f32(`2.0`);
1362	assert_eq!(<i32 as AsPrimitive<f16>>::as_(`2`), two);
1363	assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), `2`);
1364
1365	assert_eq!(<f32 as AsPrimitive<f16>>::as_(`2.0`), two);
1366	assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), `2.0`);
1367
1368	assert_eq!(<f64 as AsPrimitive<f16>>::as_(`2.0`), two);
1369	assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), `2.0`);
1370	}
1371
1372	#[cfg(feature = "num-traits")]
1373	#[test]
1374	fn to_primitive() {
1375	let two = f16::from_f32(`2.0`);
1376	assert_eq!(ToPrimitive::to_i32(&two).unwrap(), `2i32`);
1377	assert_eq!(ToPrimitive::to_f32(&two).unwrap(), `2.0f32`);
1378	assert_eq!(ToPrimitive::to_f64(&two).unwrap(), `2.0f64`);
1379	}
1380
1381	#[cfg(feature = "num-traits")]
1382	#[test]
1383	fn from_primitive() {
1384	let two = f16::from_f32(`2.0`);
1385	assert_eq!(<f16 as FromPrimitive>::from_i32(`2`).unwrap(), two);
1386	assert_eq!(<f16 as FromPrimitive>::from_f32(`2.0`).unwrap(), two);
1387	assert_eq!(<f16 as FromPrimitive>::from_f64(`2.0`).unwrap(), two);
1388	}
1389
1390	#[cfg(feature = "num-traits")]
1391	#[test]
1392	fn to_and_from_bytes() {
1393	let two = f16::from_f32(`2.0`);
1394	assert_eq!(<f16 as ToBytes>::to_le_bytes(&two), [`0`, `64`]);
1395	assert_eq!(<f16 as FromBytes>::from_le_bytes(&[`0`, `64`]), two);
1396	assert_eq!(<f16 as ToBytes>::to_be_bytes(&two), [`64`, `0`]);
1397	assert_eq!(<f16 as FromBytes>::from_be_bytes(&[`64`, `0`]), two);
1398	}
1399
1400	#[test]
1401	fn test_f16_consts() {
1402	// DIGITS
1403	let digits = ((f16::MANTISSA_DIGITS as f32 - `1.0`) * `2f32`.log10()).floor() as u32;
1404	assert_eq!(f16::DIGITS, digits);
1405	// sanity check to show test is good
1406	let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - `1.0`) * `2f32`.log10()).floor() as u32;
1407	assert_eq!(core::f32::DIGITS, digits32);
1408
1409	// EPSILON
1410	let one = f16::from_f32(`1.0`);
1411	let one_plus_epsilon = f16::from_bits(one.to_bits() + `1`);
1412	let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - `1.0`);
1413	assert_eq!(f16::EPSILON, epsilon);
1414	// sanity check to show test is good
1415	let one_plus_epsilon32 = f32::from_bits(`1.0f32`.to_bits() + `1`);
1416	let epsilon32 = one_plus_epsilon32 - `1f32`;
1417	assert_eq!(core::f32::EPSILON, epsilon32);
1418
1419	// MAX, MIN and MIN_POSITIVE
1420	let max = f16::from_bits(f16::INFINITY.to_bits() - `1`);
1421	let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - `1`);
1422	let min_pos = f16::from_f32(`2f32`.powi(f16::MIN_EXP - `1`));
1423	assert_eq!(f16::MAX, max);
1424	assert_eq!(f16::MIN, min);
1425	assert_eq!(f16::MIN_POSITIVE, min_pos);
1426	// sanity check to show test is good
1427	let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - `1`);
1428	let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - `1`);
1429	let min_pos32 = `2f32`.powi(core::f32::MIN_EXP - `1`);
1430	assert_eq!(core::f32::MAX, max32);
1431	assert_eq!(core::f32::MIN, min32);
1432	assert_eq!(core::f32::MIN_POSITIVE, min_pos32);
1433
1434	// MIN_10_EXP and MAX_10_EXP
1435	let ten_to_min = `10f32`.powi(f16::MIN_10_EXP);
1436	assert!(ten_to_min / `10.0` < f16::MIN_POSITIVE.to_f32());
1437	assert!(ten_to_min > f16::MIN_POSITIVE.to_f32());
1438	let ten_to_max = `10f32`.powi(f16::MAX_10_EXP);
1439	assert!(ten_to_max < f16::MAX.to_f32());
1440	assert!(ten_to_max * `10.0` > f16::MAX.to_f32());
1441	// sanity check to show test is good
1442	let ten_to_min32 = `10f64`.powi(core::f32::MIN_10_EXP);
1443	assert!(ten_to_min32 / `10.0` < f64::from(core::f32::MIN_POSITIVE));
1444	assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE));
1445	let ten_to_max32 = `10f64`.powi(core::f32::MAX_10_EXP);
1446	assert!(ten_to_max32 < f64::from(core::f32::MAX));
1447	assert!(ten_to_max32 * `10.0` > f64::from(core::f32::MAX));
1448	}
1449
1450	#[test]
1451	fn test_f16_consts_from_f32() {
1452	let one = f16::from_f32(`1.0`);
1453	let zero = f16::from_f32(`0.0`);
1454	let neg_zero = f16::from_f32(`-0.0`);
1455	let neg_one = f16::from_f32(`-1.0`);
1456	let inf = f16::from_f32(core::f32::INFINITY);
1457	let neg_inf = f16::from_f32(core::f32::NEG_INFINITY);
1458	let nan = f16::from_f32(core::f32::NAN);
1459
1460	assert_eq!(f16::ONE, one);
1461	assert_eq!(f16::ZERO, zero);
1462	assert!(zero.is_sign_positive());
1463	assert_eq!(f16::NEG_ZERO, neg_zero);
1464	assert!(neg_zero.is_sign_negative());
1465	assert_eq!(f16::NEG_ONE, neg_one);
1466	assert!(neg_one.is_sign_negative());
1467	assert_eq!(f16::INFINITY, inf);
1468	assert_eq!(f16::NEG_INFINITY, neg_inf);
1469	assert!(nan.is_nan());
1470	assert!(f16::NAN.is_nan());
1471
1472	let e = f16::from_f32(core::f32::consts::E);
1473	let pi = f16::from_f32(core::f32::consts::PI);
1474	let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI);
1475	let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1476	let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI);
1477	let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1478	let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2);
1479	let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3);
1480	let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4);
1481	let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6);
1482	let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8);
1483	let ln_10 = f16::from_f32(core::f32::consts::LN_10);
1484	let ln_2 = f16::from_f32(core::f32::consts::LN_2);
1485	let log10_e = f16::from_f32(core::f32::consts::LOG10_E);
1486	// core::f32::consts::LOG10_2 requires rustc 1.43.0
1487	let log10_2 = f16::from_f32(`2f32`.log10());
1488	let log2_e = f16::from_f32(core::f32::consts::LOG2_E);
1489	// core::f32::consts::LOG2_10 requires rustc 1.43.0
1490	let log2_10 = f16::from_f32(`10f32`.log2());
1491	let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2);
1492
1493	assert_eq!(f16::E, e);
1494	assert_eq!(f16::PI, pi);
1495	assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1496	assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1497	assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1498	assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1499	assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1500	assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1501	assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1502	assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1503	assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1504	assert_eq!(f16::LN_10, ln_10);
1505	assert_eq!(f16::LN_2, ln_2);
1506	assert_eq!(f16::LOG10_E, log10_e);
1507	assert_eq!(f16::LOG10_2, log10_2);
1508	assert_eq!(f16::LOG2_E, log2_e);
1509	assert_eq!(f16::LOG2_10, log2_10);
1510	assert_eq!(f16::SQRT_2, sqrt_2);
1511	}
1512
1513	#[test]
1514	fn test_f16_consts_from_f64() {
1515	let one = f16::from_f64(`1.0`);
1516	let zero = f16::from_f64(`0.0`);
1517	let neg_zero = f16::from_f64(`-0.0`);
1518	let inf = f16::from_f64(core::f64::INFINITY);
1519	let neg_inf = f16::from_f64(core::f64::NEG_INFINITY);
1520	let nan = f16::from_f64(core::f64::NAN);
1521
1522	assert_eq!(f16::ONE, one);
1523	assert_eq!(f16::ZERO, zero);
1524	assert!(zero.is_sign_positive());
1525	assert_eq!(f16::NEG_ZERO, neg_zero);
1526	assert!(neg_zero.is_sign_negative());
1527	assert_eq!(f16::INFINITY, inf);
1528	assert_eq!(f16::NEG_INFINITY, neg_inf);
1529	assert!(nan.is_nan());
1530	assert!(f16::NAN.is_nan());
1531
1532	let e = f16::from_f64(core::f64::consts::E);
1533	let pi = f16::from_f64(core::f64::consts::PI);
1534	let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI);
1535	let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1536	let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI);
1537	let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1538	let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2);
1539	let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3);
1540	let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4);
1541	let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6);
1542	let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8);
1543	let ln_10 = f16::from_f64(core::f64::consts::LN_10);
1544	let ln_2 = f16::from_f64(core::f64::consts::LN_2);
1545	let log10_e = f16::from_f64(core::f64::consts::LOG10_E);
1546	// core::f64::consts::LOG10_2 requires rustc 1.43.0
1547	let log10_2 = f16::from_f64(`2f64`.log10());
1548	let log2_e = f16::from_f64(core::f64::consts::LOG2_E);
1549	// core::f64::consts::LOG2_10 requires rustc 1.43.0
1550	let log2_10 = f16::from_f64(`10f64`.log2());
1551	let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2);
1552
1553	assert_eq!(f16::E, e);
1554	assert_eq!(f16::PI, pi);
1555	assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1556	assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1557	assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1558	assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1559	assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1560	assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1561	assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1562	assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1563	assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1564	assert_eq!(f16::LN_10, ln_10);
1565	assert_eq!(f16::LN_2, ln_2);
1566	assert_eq!(f16::LOG10_E, log10_e);
1567	assert_eq!(f16::LOG10_2, log10_2);
1568	assert_eq!(f16::LOG2_E, log2_e);
1569	assert_eq!(f16::LOG2_10, log2_10);
1570	assert_eq!(f16::SQRT_2, sqrt_2);
1571	}
1572
1573	#[test]
1574	fn test_nan_conversion_to_smaller() {
1575	let nan64 = f64::from_bits(`0x7FF0_0000_0000_0001u64`);
1576	let neg_nan64 = f64::from_bits(`0xFFF0_0000_0000_0001u64`);
1577	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1578	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1579	let nan32_from_64 = nan64 as f32;
1580	let neg_nan32_from_64 = neg_nan64 as f32;
1581	let nan16_from_64 = f16::from_f64(nan64);
1582	let neg_nan16_from_64 = f16::from_f64(neg_nan64);
1583	let nan16_from_32 = f16::from_f32(nan32);
1584	let neg_nan16_from_32 = f16::from_f32(neg_nan32);
1585
1586	assert!(nan64.is_nan() && nan64.is_sign_positive());
1587	assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1588	assert!(nan32.is_nan() && nan32.is_sign_positive());
1589	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1590
1591	// f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1592	assert!(nan32_from_64.is_nan());
1593	assert!(neg_nan32_from_64.is_nan());
1594	assert!(nan16_from_64.is_nan());
1595	assert!(neg_nan16_from_64.is_nan());
1596	assert!(nan16_from_32.is_nan());
1597	assert!(neg_nan16_from_32.is_nan());
1598	}
1599
1600	#[test]
1601	fn test_nan_conversion_to_larger() {
1602	let nan16 = f16::from_bits(`0x7C01u16`);
1603	let neg_nan16 = f16::from_bits(`0xFC01u16`);
1604	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1605	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1606	let nan32_from_16 = f32::from(nan16);
1607	let neg_nan32_from_16 = f32::from(neg_nan16);
1608	let nan64_from_16 = f64::from(nan16);
1609	let neg_nan64_from_16 = f64::from(neg_nan16);
1610	let nan64_from_32 = f64::from(nan32);
1611	let neg_nan64_from_32 = f64::from(neg_nan32);
1612
1613	assert!(nan16.is_nan() && nan16.is_sign_positive());
1614	assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1615	assert!(nan32.is_nan() && nan32.is_sign_positive());
1616	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1617
1618	// f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1619	assert!(nan32_from_16.is_nan());
1620	assert!(neg_nan32_from_16.is_nan());
1621	assert!(nan64_from_16.is_nan());
1622	assert!(neg_nan64_from_16.is_nan());
1623	assert!(nan64_from_32.is_nan());
1624	assert!(neg_nan64_from_32.is_nan());
1625	}
1626
1627	#[test]
1628	fn test_f16_to_f32() {
1629	let f = f16::from_f32(`7.0`);
1630	assert_eq!(f.to_f32(), `7.0f32`);
1631
1632	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1633	let f = f16::from_f32(`7.1`);
1634	let diff = (f.to_f32() - `7.1f32`).abs();
1635	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1636	assert!(diff <= `4.0` * f16::EPSILON.to_f32());
1637
1638	assert_eq!(f16::from_bits(`0x0000_0001`).to_f32(), `2.0f32`.powi(-`24`));
1639	assert_eq!(f16::from_bits(`0x0000_0005`).to_f32(), `5.0` * `2.0f32`.powi(-`24`));
1640
1641	assert_eq!(f16::from_bits(`0x0000_0001`), f16::from_f32(`2.0f32`.powi(-`24`)));
1642	assert_eq!(
1643	f16::from_bits(`0x0000_0005`),
1644	f16::from_f32(`5.0` * `2.0f32`.powi(-`24`))
1645	);
1646	}
1647
1648	#[test]
1649	fn test_f16_to_f64() {
1650	let f = f16::from_f64(`7.0`);
1651	assert_eq!(f.to_f64(), `7.0f64`);
1652
1653	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1654	let f = f16::from_f64(`7.1`);
1655	let diff = (f.to_f64() - `7.1f64`).abs();
1656	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1657	assert!(diff <= `4.0` * f16::EPSILON.to_f64());
1658
1659	assert_eq!(f16::from_bits(`0x0000_0001`).to_f64(), `2.0f64`.powi(-`24`));
1660	assert_eq!(f16::from_bits(`0x0000_0005`).to_f64(), `5.0` * `2.0f64`.powi(-`24`));
1661
1662	assert_eq!(f16::from_bits(`0x0000_0001`), f16::from_f64(`2.0f64`.powi(-`24`)));
1663	assert_eq!(
1664	f16::from_bits(`0x0000_0005`),
1665	f16::from_f64(`5.0` * `2.0f64`.powi(-`24`))
1666	);
1667	}
1668
1669	#[test]
1670	fn test_comparisons() {
1671	let zero = f16::from_f64(`0.0`);
1672	let one = f16::from_f64(`1.0`);
1673	let neg_zero = f16::from_f64(`-0.0`);
1674	let neg_one = f16::from_f64(`-1.0`);
1675
1676	assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1677	assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1678	assert!(zero == neg_zero);
1679	assert!(neg_zero == zero);
1680	assert!(!(zero != neg_zero));
1681	assert!(!(neg_zero != zero));
1682	assert!(!(zero < neg_zero));
1683	assert!(!(neg_zero < zero));
1684	assert!(zero <= neg_zero);
1685	assert!(neg_zero <= zero);
1686	assert!(!(zero > neg_zero));
1687	assert!(!(neg_zero > zero));
1688	assert!(zero >= neg_zero);
1689	assert!(neg_zero >= zero);
1690
1691	assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1692	assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1693	assert!(!(one == neg_zero));
1694	assert!(!(neg_zero == one));
1695	assert!(one != neg_zero);
1696	assert!(neg_zero != one);
1697	assert!(!(one < neg_zero));
1698	assert!(neg_zero < one);
1699	assert!(!(one <= neg_zero));
1700	assert!(neg_zero <= one);
1701	assert!(one > neg_zero);
1702	assert!(!(neg_zero > one));
1703	assert!(one >= neg_zero);
1704	assert!(!(neg_zero >= one));
1705
1706	assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1707	assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1708	assert!(!(one == neg_one));
1709	assert!(!(neg_one == one));
1710	assert!(one != neg_one);
1711	assert!(neg_one != one);
1712	assert!(!(one < neg_one));
1713	assert!(neg_one < one);
1714	assert!(!(one <= neg_one));
1715	assert!(neg_one <= one);
1716	assert!(one > neg_one);
1717	assert!(!(neg_one > one));
1718	assert!(one >= neg_one);
1719	assert!(!(neg_one >= one));
1720	}
1721
1722	#[test]
1723	#[allow(clippy::erasing_op, clippy::identity_op)]
1724	fn round_to_even_f32() {
1725	// smallest positive subnormal = 0b0.0000_0000_01 2^-14 = 2^-24*
1726	let min_sub = f16::from_bits(`1`);
1727	let min_sub_f = (`-24f32`).exp2();
1728	assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1729	assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1730
1731	// 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1732	// 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1733	// 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1734	assert_eq!(
1735	f16::from_f32(min_sub_f * `0.49`).to_bits(),
1736	min_sub.to_bits() * `0`
1737	);
1738	assert_eq!(
1739	f16::from_f32(min_sub_f * `0.50`).to_bits(),
1740	min_sub.to_bits() * `0`
1741	);
1742	assert_eq!(
1743	f16::from_f32(min_sub_f * `0.51`).to_bits(),
1744	min_sub.to_bits() * `1`
1745	);
1746
1747	// 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1748	// 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1749	// 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1750	assert_eq!(
1751	f16::from_f32(min_sub_f * `1.49`).to_bits(),
1752	min_sub.to_bits() * `1`
1753	);
1754	assert_eq!(
1755	f16::from_f32(min_sub_f * `1.50`).to_bits(),
1756	min_sub.to_bits() * `2`
1757	);
1758	assert_eq!(
1759	f16::from_f32(min_sub_f * `1.51`).to_bits(),
1760	min_sub.to_bits() * `2`
1761	);
1762
1763	// 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1764	// 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1765	// 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1766	assert_eq!(
1767	f16::from_f32(min_sub_f * `2.49`).to_bits(),
1768	min_sub.to_bits() * `2`
1769	);
1770	assert_eq!(
1771	f16::from_f32(min_sub_f * `2.50`).to_bits(),
1772	min_sub.to_bits() * `2`
1773	);
1774	assert_eq!(
1775	f16::from_f32(min_sub_f * `2.51`).to_bits(),
1776	min_sub.to_bits() * `3`
1777	);
1778
1779	assert_eq!(
1780	f16::from_f32(`2000.49f32`).to_bits(),
1781	f16::from_f32(`2000.0`).to_bits()
1782	);
1783	assert_eq!(
1784	f16::from_f32(`2000.50f32`).to_bits(),
1785	f16::from_f32(`2000.0`).to_bits()
1786	);
1787	assert_eq!(
1788	f16::from_f32(`2000.51f32`).to_bits(),
1789	f16::from_f32(`2001.0`).to_bits()
1790	);
1791	assert_eq!(
1792	f16::from_f32(`2001.49f32`).to_bits(),
1793	f16::from_f32(`2001.0`).to_bits()
1794	);
1795	assert_eq!(
1796	f16::from_f32(`2001.50f32`).to_bits(),
1797	f16::from_f32(`2002.0`).to_bits()
1798	);
1799	assert_eq!(
1800	f16::from_f32(`2001.51f32`).to_bits(),
1801	f16::from_f32(`2002.0`).to_bits()
1802	);
1803	assert_eq!(
1804	f16::from_f32(`2002.49f32`).to_bits(),
1805	f16::from_f32(`2002.0`).to_bits()
1806	);
1807	assert_eq!(
1808	f16::from_f32(`2002.50f32`).to_bits(),
1809	f16::from_f32(`2002.0`).to_bits()
1810	);
1811	assert_eq!(
1812	f16::from_f32(`2002.51f32`).to_bits(),
1813	f16::from_f32(`2003.0`).to_bits()
1814	);
1815	}
1816
1817	#[test]
1818	#[allow(clippy::erasing_op, clippy::identity_op)]
1819	fn round_to_even_f64() {
1820	// smallest positive subnormal = 0b0.0000_0000_01 2^-14 = 2^-24*
1821	let min_sub = f16::from_bits(`1`);
1822	let min_sub_f = (`-24f64`).exp2();
1823	assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1824	assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1825
1826	// 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1827	// 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1828	// 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1829	assert_eq!(
1830	f16::from_f64(min_sub_f * `0.49`).to_bits(),
1831	min_sub.to_bits() * `0`
1832	);
1833	assert_eq!(
1834	f16::from_f64(min_sub_f * `0.50`).to_bits(),
1835	min_sub.to_bits() * `0`
1836	);
1837	assert_eq!(
1838	f16::from_f64(min_sub_f * `0.51`).to_bits(),
1839	min_sub.to_bits() * `1`
1840	);
1841
1842	// 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1843	// 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1844	// 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1845	assert_eq!(
1846	f16::from_f64(min_sub_f * `1.49`).to_bits(),
1847	min_sub.to_bits() * `1`
1848	);
1849	assert_eq!(
1850	f16::from_f64(min_sub_f * `1.50`).to_bits(),
1851	min_sub.to_bits() * `2`
1852	);
1853	assert_eq!(
1854	f16::from_f64(min_sub_f * `1.51`).to_bits(),
1855	min_sub.to_bits() * `2`
1856	);
1857
1858	// 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1859	// 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1860	// 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1861	assert_eq!(
1862	f16::from_f64(min_sub_f * `2.49`).to_bits(),
1863	min_sub.to_bits() * `2`
1864	);
1865	assert_eq!(
1866	f16::from_f64(min_sub_f * `2.50`).to_bits(),
1867	min_sub.to_bits() * `2`
1868	);
1869	assert_eq!(
1870	f16::from_f64(min_sub_f * `2.51`).to_bits(),
1871	min_sub.to_bits() * `3`
1872	);
1873
1874	assert_eq!(
1875	f16::from_f64(`2000.49f64`).to_bits(),
1876	f16::from_f64(`2000.0`).to_bits()
1877	);
1878	assert_eq!(
1879	f16::from_f64(`2000.50f64`).to_bits(),
1880	f16::from_f64(`2000.0`).to_bits()
1881	);
1882	assert_eq!(
1883	f16::from_f64(`2000.51f64`).to_bits(),
1884	f16::from_f64(`2001.0`).to_bits()
1885	);
1886	assert_eq!(
1887	f16::from_f64(`2001.49f64`).to_bits(),
1888	f16::from_f64(`2001.0`).to_bits()
1889	);
1890	assert_eq!(
1891	f16::from_f64(`2001.50f64`).to_bits(),
1892	f16::from_f64(`2002.0`).to_bits()
1893	);
1894	assert_eq!(
1895	f16::from_f64(`2001.51f64`).to_bits(),
1896	f16::from_f64(`2002.0`).to_bits()
1897	);
1898	assert_eq!(
1899	f16::from_f64(`2002.49f64`).to_bits(),
1900	f16::from_f64(`2002.0`).to_bits()
1901	);
1902	assert_eq!(
1903	f16::from_f64(`2002.50f64`).to_bits(),
1904	f16::from_f64(`2002.0`).to_bits()
1905	);
1906	assert_eq!(
1907	f16::from_f64(`2002.51f64`).to_bits(),
1908	f16::from_f64(`2003.0`).to_bits()
1909	);
1910	}
1911
1912	#[test]
1913	fn arithmetic() {
1914	assert_eq!(f16::ONE + f16::ONE, f16::from_f32(`2.`));
1915	assert_eq!(f16::ONE - f16::ONE, f16::ZERO);
1916	assert_eq!(f16::ONE * f16::ONE, f16::ONE);
1917	assert_eq!(f16::from_f32(`2.`) * f16::from_f32(`2.`), f16::from_f32(`4.`));
1918	assert_eq!(f16::ONE / f16::ONE, f16::ONE);
1919	assert_eq!(f16::from_f32(`4.`) / f16::from_f32(`2.`), f16::from_f32(`2.`));
1920	assert_eq!(f16::from_f32(`4.`) % f16::from_f32(`3.`), f16::from_f32(`1.`));
1921	}
1922
1923	#[cfg(feature = "std")]
1924	#[test]
1925	fn formatting() {
1926	let f = f16::from_f32(`0.1152344`);
1927
1928	assert_eq!(format!("{:.3}", f), "0.115");
1929	assert_eq!(format!("{:.4}", f), "0.1152");
1930	assert_eq!(format!("{:+.4}", f), "+0.1152");
1931	assert_eq!(format!("{:>+10.4}", f), " +0.1152");
1932
1933	assert_eq!(format!("{:.3?}", f), "0.115");
1934	assert_eq!(format!("{:.4?}", f), "0.1152");
1935	assert_eq!(format!("{:+.4?}", f), "+0.1152");
1936	assert_eq!(format!("{:>+10.4?}", f), " +0.1152");
1937	}
1938
1939	impl quickcheck::Arbitrary for f16 {
1940	fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1941	f16(u16::arbitrary(g))
1942	}
1943	}
1944
1945	#[quickcheck]
1946	fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool {
1947	let roundtrip = f16::from_f32(f.to_f32());
1948	if f.is_nan() {
1949	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1950	} else {
1951	f.0 == roundtrip.0
1952	}
1953	}
1954
1955	#[quickcheck]
1956	fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool {
1957	let roundtrip = f16::from_f64(f.to_f64());
1958	if f.is_nan() {
1959	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1960	} else {
1961	f.0 == roundtrip.0
1962	}
1963	}
1964	}
1965