binary16.rs source code [crates/half-2.4.0/src/binary16.rs]

1	#[cfg(all(feature = "serde", feature = "alloc"))]
2	#[allow(unused_imports)]
3	use alloc::string::ToString;
4	#[cfg(feature = "bytemuck")]
5	use bytemuck::{Pod, Zeroable};
6	use core::{
7	cmp::Ordering,
8	iter::{Product, Sum},
9	num::FpCategory,
10	ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11	};
12	#[cfg(not(target_arch = "spirv"))]
13	use core::{
14	fmt::{
15	Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16	},
17	num::ParseFloatError,
18	str::FromStr,
19	};
20	#[cfg(feature = "serde")]
21	use serde::{Deserialize, Serialize};
22	#[cfg(feature = "zerocopy")]
23	use zerocopy::{AsBytes, FromBytes};
24
25	pub(crate) mod arch;
26
27	/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a "half"
28	/// format.
29	///
30	/// This 16-bit floating point type is intended for efficient storage where the full range and
31	/// precision of a larger floating point value is not required.
32	///
33	/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
34	#[allow(non_camel_case_types)]
35	#[derive(Clone, Copy, Default)]
36	#[repr(transparent)]
37	#[cfg_attr(feature = "serde", derive(Serialize))]
38	#[cfg_attr(
39	feature = "rkyv",
40	derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
41	)]
42	#[cfg_attr(feature = "rkyv", archive(resolver = "F16Resolver"))]
43	#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
44	#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
45	#[cfg_attr(kani, derive(kani::Arbitrary))]
46	pub struct f16(u16);
47
48	impl f16 {
49	/// Constructs a 16-bit floating point value from the raw bits.
50	#[inline]
51	#[must_use]
52	pub const fn from_bits(bits: u16) -> f16 {
53	f16(bits)
54	}
55
56	/// Constructs a 16-bit floating point value from a 32-bit floating point value.
57	///
58	/// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
59	/// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
60	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
61	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
62	/// 16-bit value.
63	#[inline]
64	#[must_use]
65	pub fn from_f32(value: f32) -> f16 {
66	f16(arch::f32_to_f16(value))
67	}
68
69	/// Constructs a 16-bit floating point value from a 32-bit floating point value.
70	///
71	/// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
72	/// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
73	/// in any non-`const` context.
74	///
75	/// This operation is lossy. If the 32-bit value is to large to fit in 16-bits, ±∞ will result.
76	/// NaN values are preserved. 32-bit subnormal values are too tiny to be represented in 16-bits
77	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
78	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
79	/// 16-bit value.
80	#[inline]
81	#[must_use]
82	pub const fn from_f32_const(value: f32) -> f16 {
83	f16(arch::f32_to_f16_fallback(value))
84	}
85
86	/// Constructs a 16-bit floating point value from a 64-bit floating point value.
87	///
88	/// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
89	/// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
90	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
91	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
92	/// 16-bit value.
93	#[inline]
94	#[must_use]
95	pub fn from_f64(value: f64) -> f16 {
96	f16(arch::f64_to_f16(value))
97	}
98
99	/// Constructs a 16-bit floating point value from a 64-bit floating point value.
100	///
101	/// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
102	/// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
103	/// in any non-`const` context.
104	///
105	/// This operation is lossy. If the 64-bit value is to large to fit in 16-bits, ±∞ will result.
106	/// NaN values are preserved. 64-bit subnormal values are too tiny to be represented in 16-bits
107	/// and result in ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit
108	/// subnormals or ±0. All other values are truncated and rounded to the nearest representable
109	/// 16-bit value.
110	#[inline]
111	#[must_use]
112	pub const fn from_f64_const(value: f64) -> f16 {
113	f16(arch::f64_to_f16_fallback(value))
114	}
115
116	/// Converts a [`f16`] into the underlying bit representation.
117	#[inline]
118	#[must_use]
119	pub const fn to_bits(self) -> u16 {
120	self.0
121	}
122
123	/// Returns the memory representation of the underlying bit representation as a byte array in
124	/// little-endian byte order.
125	///
126	/// # Examples
127	///
128	/// ```rust
129	/// # use half::prelude::*;
130	/// let bytes = f16::from_f32(`12.5`).to_le_bytes();
131	/// assert_eq!(bytes, [`0x40`, `0x4A`]);
132	/// ```
133	#[inline]
134	#[must_use]
135	pub const fn to_le_bytes(self) -> [u8; `2`] {
136	self.0.to_le_bytes()
137	}
138
139	/// Returns the memory representation of the underlying bit representation as a byte array in
140	/// big-endian (network) byte order.
141	///
142	/// # Examples
143	///
144	/// ```rust
145	/// # use half::prelude::*;
146	/// let bytes = f16::from_f32(`12.5`).to_be_bytes();
147	/// assert_eq!(bytes, [`0x4A`, `0x40`]);
148	/// ```
149	#[inline]
150	#[must_use]
151	pub const fn to_be_bytes(self) -> [u8; `2`] {
152	self.0.to_be_bytes()
153	}
154
155	/// Returns the memory representation of the underlying bit representation as a byte array in
156	/// native byte order.
157	///
158	/// As the target platform's native endianness is used, portable code should use
159	/// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate,
160	/// instead.
161	///
162	/// # Examples
163	///
164	/// ```rust
165	/// # use half::prelude::*;
166	/// let bytes = f16::from_f32(`12.5`).to_ne_bytes();
167	/// assert_eq!(bytes, if cfg!(target_endian = "big") {
168	/// [`0x4A`, `0x40`]
169	/// } else {
170	/// [`0x40`, `0x4A`]
171	/// });
172	/// ```
173	#[inline]
174	#[must_use]
175	pub const fn to_ne_bytes(self) -> [u8; `2`] {
176	self.0.to_ne_bytes()
177	}
178
179	/// Creates a floating point value from its representation as a byte array in little endian.
180	///
181	/// # Examples
182	///
183	/// ```rust
184	/// # use half::prelude::*;
185	/// let value = f16::from_le_bytes([`0x40`, `0x4A`]);
186	/// assert_eq!(value, f16::from_f32(`12.5`));
187	/// ```
188	#[inline]
189	#[must_use]
190	pub const fn from_le_bytes(bytes: [u8; `2`]) -> f16 {
191	f16::from_bits(u16::from_le_bytes(bytes))
192	}
193
194	/// Creates a floating point value from its representation as a byte array in big endian.
195	///
196	/// # Examples
197	///
198	/// ```rust
199	/// # use half::prelude::*;
200	/// let value = f16::from_be_bytes([`0x4A`, `0x40`]);
201	/// assert_eq!(value, f16::from_f32(`12.5`));
202	/// ```
203	#[inline]
204	#[must_use]
205	pub const fn from_be_bytes(bytes: [u8; `2`]) -> f16 {
206	f16::from_bits(u16::from_be_bytes(bytes))
207	}
208
209	/// Creates a floating point value from its representation as a byte array in native endian.
210	///
211	/// As the target platform's native endianness is used, portable code likely wants to use
212	/// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as
213	/// appropriate instead.
214	///
215	/// # Examples
216	///
217	/// ```rust
218	/// # use half::prelude::*;
219	/// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") {
220	/// [`0x4A`, `0x40`]
221	/// } else {
222	/// [`0x40`, `0x4A`]
223	/// });
224	/// assert_eq!(value, f16::from_f32(`12.5`));
225	/// ```
226	#[inline]
227	#[must_use]
228	pub const fn from_ne_bytes(bytes: [u8; `2`]) -> f16 {
229	f16::from_bits(u16::from_ne_bytes(bytes))
230	}
231
232	/// Converts a [`f16`] value into a `f32` value.
233	///
234	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
235	/// in 32-bit floating point.
236	#[inline]
237	#[must_use]
238	pub fn to_f32(self) -> f32 {
239	arch::f16_to_f32(self.0)
240	}
241
242	/// Converts a [`f16`] value into a `f32` value.
243	///
244	/// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
245	/// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
246	/// in any non-`const` context.
247	///
248	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
249	/// in 32-bit floating point.
250	#[inline]
251	#[must_use]
252	pub const fn to_f32_const(self) -> f32 {
253	arch::f16_to_f32_fallback(self.0)
254	}
255
256	/// Converts a [`f16`] value into a `f64` value.
257	///
258	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
259	/// in 64-bit floating point.
260	#[inline]
261	#[must_use]
262	pub fn to_f64(self) -> f64 {
263	arch::f16_to_f64(self.0)
264	}
265
266	/// Converts a [`f16`] value into a `f64` value.
267	///
268	/// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
269	/// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
270	/// in any non-`const` context.
271	///
272	/// This conversion is lossless as all 16-bit floating point values can be represented exactly
273	/// in 64-bit floating point.
274	#[inline]
275	#[must_use]
276	pub const fn to_f64_const(self) -> f64 {
277	arch::f16_to_f64_fallback(self.0)
278	}
279
280	/// Returns `true` if this value is `NaN` and `false` otherwise.
281	///
282	/// # Examples
283	///
284	/// ```rust
285	/// # use half::prelude::*;
286	///
287	/// let nan = f16::NAN;
288	/// let f = f16::from_f32(`7.0_f32`);
289	///
290	/// assert!(nan.is_nan());
291	/// assert!(!f.is_nan());
292	/// ```
293	#[inline]
294	#[must_use]
295	pub const fn is_nan(self) -> bool {
296	self.0 & `0x7FFFu16` > `0x7C00u16`
297	}
298
299	/// Returns `true` if this value is ±∞ and `false`.
300	/// otherwise.
301	///
302	/// # Examples
303	///
304	/// ```rust
305	/// # use half::prelude::*;
306	///
307	/// let f = f16::from_f32(`7.0f32`);
308	/// let inf = f16::INFINITY;
309	/// let neg_inf = f16::NEG_INFINITY;
310	/// let nan = f16::NAN;
311	///
312	/// assert!(!f.is_infinite());
313	/// assert!(!nan.is_infinite());
314	///
315	/// assert!(inf.is_infinite());
316	/// assert!(neg_inf.is_infinite());
317	/// ```
318	#[inline]
319	#[must_use]
320	pub const fn is_infinite(self) -> bool {
321	self.0 & `0x7FFFu16` == `0x7C00u16`
322	}
323
324	/// Returns `true` if this number is neither infinite nor `NaN`.
325	///
326	/// # Examples
327	///
328	/// ```rust
329	/// # use half::prelude::*;
330	///
331	/// let f = f16::from_f32(`7.0f32`);
332	/// let inf = f16::INFINITY;
333	/// let neg_inf = f16::NEG_INFINITY;
334	/// let nan = f16::NAN;
335	///
336	/// assert!(f.is_finite());
337	///
338	/// assert!(!nan.is_finite());
339	/// assert!(!inf.is_finite());
340	/// assert!(!neg_inf.is_finite());
341	/// ```
342	#[inline]
343	#[must_use]
344	pub const fn is_finite(self) -> bool {
345	self.0 & `0x7C00u16` != `0x7C00u16`
346	}
347
348	/// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`.
349	///
350	/// # Examples
351	///
352	/// ```rust
353	/// # use half::prelude::*;
354	///
355	/// let min = f16::MIN_POSITIVE;
356	/// let max = f16::MAX;
357	/// let lower_than_min = f16::from_f32(`1.0e-10_f32`);
358	/// let zero = f16::from_f32(`0.0_f32`);
359	///
360	/// assert!(min.is_normal());
361	/// assert!(max.is_normal());
362	///
363	/// assert!(!zero.is_normal());
364	/// assert!(!f16::NAN.is_normal());
365	/// assert!(!f16::INFINITY.is_normal());
366	/// // Values between `0` and `min` are Subnormal.
367	/// assert!(!lower_than_min.is_normal());
368	/// ```
369	#[inline]
370	#[must_use]
371	pub const fn is_normal(self) -> bool {
372	let exp = self.0 & `0x7C00u16`;
373	exp != `0x7C00u16` && exp != `0`
374	}
375
376	/// Returns the floating point category of the number.
377	///
378	/// If only one property is going to be tested, it is generally faster to use the specific
379	/// predicate instead.
380	///
381	/// # Examples
382	///
383	/// ```rust
384	/// use std::num::FpCategory;
385	/// # use half::prelude::*;
386	///
387	/// let num = f16::from_f32(`12.4_f32`);
388	/// let inf = f16::INFINITY;
389	///
390	/// assert_eq!(num.classify(), FpCategory::Normal);
391	/// assert_eq!(inf.classify(), FpCategory::Infinite);
392	/// ```
393	#[must_use]
394	pub const fn classify(self) -> FpCategory {
395	let exp = self.0 & `0x7C00u16`;
396	let man = self.0 & `0x03FFu16`;
397	match (exp, man) {
398	(`0`, `0`) => FpCategory::Zero,
399	(`0`, _) => FpCategory::Subnormal,
400	(`0x7C00u16`, `0`) => FpCategory::Infinite,
401	(`0x7C00u16`, _) => FpCategory::Nan,
402	_ => FpCategory::Normal,
403	}
404	}
405
406	/// Returns a number that represents the sign of `self`.
407	///
408	/// `1.0` if the number is positive, `+0.0` or* [`INFINITY`][f16::INFINITY]
409	/// `-1.0` if the number is negative, `-0.0` or* [`NEG_INFINITY`][f16::NEG_INFINITY]
410	/// * [`NAN`][f16::NAN] if the number is `NaN`
411	///
412	/// # Examples
413	///
414	/// ```rust
415	/// # use half::prelude::*;
416	///
417	/// let f = f16::from_f32(`3.5_f32`);
418	///
419	/// assert_eq!(f.signum(), f16::from_f32(`1.0`));
420	/// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-`1.0`));
421	///
422	/// assert!(f16::NAN.signum().is_nan());
423	/// ```
424	#[must_use]
425	pub const fn signum(self) -> f16 {
426	if self.is_nan() {
427	self
428	} else if self.0 & `0x8000u16` != `0` {
429	Self::NEG_ONE
430	} else {
431	Self::ONE
432	}
433	}
434
435	/// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a
436	/// positive sign bit and +∞.
437	///
438	/// # Examples
439	///
440	/// ```rust
441	/// # use half::prelude::*;
442	///
443	/// let nan = f16::NAN;
444	/// let f = f16::from_f32(`7.0_f32`);
445	/// let g = f16::from_f32(`-7.0_f32`);
446	///
447	/// assert!(f.is_sign_positive());
448	/// assert!(!g.is_sign_positive());
449	/// // `NaN` can be either positive or negative
450	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
451	/// ```
452	#[inline]
453	#[must_use]
454	pub const fn is_sign_positive(self) -> bool {
455	self.0 & `0x8000u16` == `0`
456	}
457
458	/// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a
459	/// negative sign bit and −∞.
460	///
461	/// # Examples
462	///
463	/// ```rust
464	/// # use half::prelude::*;
465	///
466	/// let nan = f16::NAN;
467	/// let f = f16::from_f32(`7.0f32`);
468	/// let g = f16::from_f32(`-7.0f32`);
469	///
470	/// assert!(!f.is_sign_negative());
471	/// assert!(g.is_sign_negative());
472	/// // `NaN` can be either positive or negative
473	/// assert!(nan.is_sign_positive() != nan.is_sign_negative());
474	/// ```
475	#[inline]
476	#[must_use]
477	pub const fn is_sign_negative(self) -> bool {
478	self.0 & `0x8000u16` != `0`
479	}
480
481	/// Returns a number composed of the magnitude of `self` and the sign of `sign`.
482	///
483	/// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
484	/// If `self` is NaN, then NaN with the sign of `sign` is returned.
485	///
486	/// # Examples
487	///
488	/// ```
489	/// # use half::prelude::*;
490	/// let f = f16::from_f32(`3.5`);
491	///
492	/// assert_eq!(f.copysign(f16::from_f32(`0.42`)), f16::from_f32(`3.5`));
493	/// assert_eq!(f.copysign(f16::from_f32(-`0.42`)), f16::from_f32(-`3.5`));
494	/// assert_eq!((-f).copysign(f16::from_f32(`0.42`)), f16::from_f32(`3.5`));
495	/// assert_eq!((-f).copysign(f16::from_f32(-`0.42`)), f16::from_f32(-`3.5`));
496	///
497	/// assert!(f16::NAN.copysign(f16::from_f32(`1.0`)).is_nan());
498	/// ```
499	#[inline]
500	#[must_use]
501	pub const fn copysign(self, sign: f16) -> f16 {
502	f16((sign.0 & `0x8000u16`) \| (self.0 & `0x7FFFu16`))
503	}
504
505	/// Returns the maximum of the two numbers.
506	///
507	/// If one of the arguments is NaN, then the other argument is returned.
508	///
509	/// # Examples
510	///
511	/// ```
512	/// # use half::prelude::*;
513	/// let x = f16::from_f32(`1.0`);
514	/// let y = f16::from_f32(`2.0`);
515	///
516	/// assert_eq!(x.max(y), y);
517	/// ```
518	#[inline]
519	#[must_use]
520	pub fn max(self, other: f16) -> f16 {
521	if other > self && !other.is_nan() {
522	other
523	} else {
524	self
525	}
526	}
527
528	/// Returns the minimum of the two numbers.
529	///
530	/// If one of the arguments is NaN, then the other argument is returned.
531	///
532	/// # Examples
533	///
534	/// ```
535	/// # use half::prelude::*;
536	/// let x = f16::from_f32(`1.0`);
537	/// let y = f16::from_f32(`2.0`);
538	///
539	/// assert_eq!(x.min(y), x);
540	/// ```
541	#[inline]
542	#[must_use]
543	pub fn min(self, other: f16) -> f16 {
544	if other < self && !other.is_nan() {
545	other
546	} else {
547	self
548	}
549	}
550
551	/// Restrict a value to a certain interval unless it is NaN.
552	///
553	/// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
554	/// Otherwise this returns `self`.
555	///
556	/// Note that this function returns NaN if the initial value was NaN as well.
557	///
558	/// # Panics
559	/// Panics if `min > max`, `min` is NaN, or `max` is NaN.
560	///
561	/// # Examples
562	///
563	/// ```
564	/// # use half::prelude::*;
565	/// assert!(f16::from_f32(-`3.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(-`2.0`));
566	/// assert!(f16::from_f32(`0.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(`0.0`));
567	/// assert!(f16::from_f32(`2.0`).clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)) == f16::from_f32(`1.0`));
568	/// assert!(f16::NAN.clamp(f16::from_f32(-`2.0`), f16::from_f32(`1.0`)).is_nan());
569	/// ```
570	#[inline]
571	#[must_use]
572	pub fn clamp(self, min: f16, max: f16) -> f16 {
573	assert!(min <= max);
574	let mut x = self;
575	if x < min {
576	x = min;
577	}
578	if x > max {
579	x = max;
580	}
581	x
582	}
583
584	/// Returns the ordering between `self` and `other`.
585	///
586	/// Unlike the standard partial comparison between floating point numbers,
587	/// this comparison always produces an ordering in accordance to
588	/// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
589	/// floating point standard. The values are ordered in the following sequence:
590	///
591	/// - negative quiet NaN
592	/// - negative signaling NaN
593	/// - negative infinity
594	/// - negative numbers
595	/// - negative subnormal numbers
596	/// - negative zero
597	/// - positive zero
598	/// - positive subnormal numbers
599	/// - positive numbers
600	/// - positive infinity
601	/// - positive signaling NaN
602	/// - positive quiet NaN.
603	///
604	/// The ordering established by this function does not always agree with the
605	/// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example,
606	/// they consider negative and positive zero equal, while `total_cmp`
607	/// doesn't.
608	///
609	/// The interpretation of the signaling NaN bit follows the definition in
610	/// the IEEE 754 standard, which may not match the interpretation by some of
611	/// the older, non-conformant (e.g. MIPS) hardware implementations.
612	///
613	/// # Examples
614	/// ```
615	/// # use half::f16;
616	/// let mut v: Vec<f16> = vec![];
617	/// v.push(f16::ONE);
618	/// v.push(f16::INFINITY);
619	/// v.push(f16::NEG_INFINITY);
620	/// v.push(f16::NAN);
621	/// v.push(f16::MAX_SUBNORMAL);
622	/// v.push(-f16::MAX_SUBNORMAL);
623	/// v.push(f16::ZERO);
624	/// v.push(f16::NEG_ZERO);
625	/// v.push(f16::NEG_ONE);
626	/// v.push(f16::MIN_POSITIVE);
627	///
628	/// v.sort_by(\|a, b\| a.total_cmp(&b));
629	///
630	/// assert!(v
631	/// .into_iter()
632	/// .zip(
633	/// [
634	/// f16::NEG_INFINITY,
635	/// f16::NEG_ONE,
636	/// -f16::MAX_SUBNORMAL,
637	/// f16::NEG_ZERO,
638	/// f16::ZERO,
639	/// f16::MAX_SUBNORMAL,
640	/// f16::MIN_POSITIVE,
641	/// f16::ONE,
642	/// f16::INFINITY,
643	/// f16::NAN
644	/// ]
645	/// .iter()
646	/// )
647	/// .all(\|(a, b)\| a.to_bits() == b.to_bits()));
648	/// ```
649	// Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
650	#[inline]
651	#[must_use]
652	pub fn total_cmp(&self, other: &Self) -> Ordering {
653	let mut left = self.to_bits() as i16;
654	let mut right = other.to_bits() as i16;
655	left ^= (((left >> `15`) as u16) >> `1`) as i16;
656	right ^= (((right >> `15`) as u16) >> `1`) as i16;
657	left.cmp(&right)
658	}
659
660	/// Alternate serialize adapter for serializing as a float.
661	///
662	/// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
663	/// implementation that serializes as an [`f32`] value. It is designed for use with
664	/// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
665	/// the default deserialize implementation.
666	///
667	/// # Examples
668	///
669	/// A demonstration on how to use this adapater:
670	///
671	/// ```
672	/// use serde::{Serialize, Deserialize};
673	/// use half::f16;
674	///
675	/// #[derive(Serialize, Deserialize)]
676	/// struct MyStruct {
677	/// #[serde(serialize_with = "f16::serialize_as_f32")]
678	/// value: f16 // Will be serialized as f32 instead of u16
679	/// }
680	/// ```
681	#[cfg(feature = "serde")]
682	pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
683	serializer.serialize_f32(self.to_f32())
684	}
685
686	/// Alternate serialize adapter for serializing as a string.
687	///
688	/// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize
689	/// implementation that serializes as a string value. It is designed for use with
690	/// `serialize_with` serde attributes. Deserialization from string values is already supported
691	/// by the default deserialize implementation.
692	///
693	/// # Examples
694	///
695	/// A demonstration on how to use this adapater:
696	///
697	/// ```
698	/// use serde::{Serialize, Deserialize};
699	/// use half::f16;
700	///
701	/// #[derive(Serialize, Deserialize)]
702	/// struct MyStruct {
703	/// #[serde(serialize_with = "f16::serialize_as_string")]
704	/// value: f16 // Will be serialized as a string instead of u16
705	/// }
706	/// ```
707	#[cfg(all(feature = "serde", feature = "alloc"))]
708	pub fn serialize_as_string<S: serde::Serializer>(
709	&self,
710	serializer: S,
711	) -> Result<S::Ok, S::Error> {
712	serializer.serialize_str(&self.to_string())
713	}
714
715	/// Approximate number of [`f16`] significant digits in base 10
716	pub const DIGITS: u32 = `3`;
717	/// [`f16`]
718	/// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
719	///
720	/// This is the difference between 1.0 and the next largest representable number.
721	pub const EPSILON: f16 = f16(`0x1400u16`);
722	/// [`f16`] positive Infinity (+∞)
723	pub const INFINITY: f16 = f16(`0x7C00u16`);
724	/// Number of [`f16`] significant digits in base 2
725	pub const MANTISSA_DIGITS: u32 = `11`;
726	/// Largest finite [`f16`] value
727	pub const MAX: f16 = f16(`0x7BFF`);
728	/// Maximum possible [`f16`] power of 10 exponent
729	pub const MAX_10_EXP: i32 = `4`;
730	/// Maximum possible [`f16`] power of 2 exponent
731	pub const MAX_EXP: i32 = `16`;
732	/// Smallest finite [`f16`] value
733	pub const MIN: f16 = f16(`0xFBFF`);
734	/// Minimum possible normal [`f16`] power of 10 exponent
735	pub const MIN_10_EXP: i32 = `-4`;
736	/// One greater than the minimum possible normal [`f16`] power of 2 exponent
737	pub const MIN_EXP: i32 = `-13`;
738	/// Smallest positive normal [`f16`] value
739	pub const MIN_POSITIVE: f16 = f16(`0x0400u16`);
740	/// [`f16`] Not a Number (NaN)
741	pub const NAN: f16 = f16(`0x7E00u16`);
742	/// [`f16`] negative infinity (-∞)
743	pub const NEG_INFINITY: f16 = f16(`0xFC00u16`);
744	/// The radix or base of the internal representation of [`f16`]
745	pub const RADIX: u32 = `2`;
746
747	/// Minimum positive subnormal [`f16`] value
748	pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(`0x0001u16`);
749	/// Maximum subnormal [`f16`] value
750	pub const MAX_SUBNORMAL: f16 = f16(`0x03FFu16`);
751
752	/// [`f16`] 1
753	pub const ONE: f16 = f16(`0x3C00u16`);
754	/// [`f16`] 0
755	pub const ZERO: f16 = f16(`0x0000u16`);
756	/// [`f16`] -0
757	pub const NEG_ZERO: f16 = f16(`0x8000u16`);
758	/// [`f16`] -1
759	pub const NEG_ONE: f16 = f16(`0xBC00u16`);
760
761	/// [`f16`] Euler's number (ℯ)
762	pub const E: f16 = f16(`0x4170u16`);
763	/// [`f16`] Archimedes' constant (π)
764	pub const PI: f16 = f16(`0x4248u16`);
765	/// [`f16`] 1/π
766	pub const FRAC_1_PI: f16 = f16(`0x3518u16`);
767	/// [`f16`] 1/√2
768	pub const FRAC_1_SQRT_2: f16 = f16(`0x39A8u16`);
769	/// [`f16`] 2/π
770	pub const FRAC_2_PI: f16 = f16(`0x3918u16`);
771	/// [`f16`] 2/√π
772	pub const FRAC_2_SQRT_PI: f16 = f16(`0x3C83u16`);
773	/// [`f16`] π/2
774	pub const FRAC_PI_2: f16 = f16(`0x3E48u16`);
775	/// [`f16`] π/3
776	pub const FRAC_PI_3: f16 = f16(`0x3C30u16`);
777	/// [`f16`] π/4
778	pub const FRAC_PI_4: f16 = f16(`0x3A48u16`);
779	/// [`f16`] π/6
780	pub const FRAC_PI_6: f16 = f16(`0x3830u16`);
781	/// [`f16`] π/8
782	pub const FRAC_PI_8: f16 = f16(`0x3648u16`);
783	/// [`f16`] 𝗅𝗇 10
784	pub const LN_10: f16 = f16(`0x409Bu16`);
785	/// [`f16`] 𝗅𝗇 2
786	pub const LN_2: f16 = f16(`0x398Cu16`);
787	/// [`f16`] 𝗅𝗈𝗀₁₀ℯ
788	pub const LOG10_E: f16 = f16(`0x36F3u16`);
789	/// [`f16`] 𝗅𝗈𝗀₁₀2
790	pub const LOG10_2: f16 = f16(`0x34D1u16`);
791	/// [`f16`] 𝗅𝗈𝗀₂ℯ
792	pub const LOG2_E: f16 = f16(`0x3DC5u16`);
793	/// [`f16`] 𝗅𝗈𝗀₂10
794	pub const LOG2_10: f16 = f16(`0x42A5u16`);
795	/// [`f16`] √2
796	pub const SQRT_2: f16 = f16(`0x3DA8u16`);
797	}
798
799	impl From<f16> for f32 {
800	#[inline]
801	fn from(x: f16) -> f32 {
802	x.to_f32()
803	}
804	}
805
806	impl From<f16> for f64 {
807	#[inline]
808	fn from(x: f16) -> f64 {
809	x.to_f64()
810	}
811	}
812
813	impl From<i8> for f16 {
814	#[inline]
815	fn from(x: i8) -> f16 {
816	// Convert to f32, then to f16
817	f16::from_f32(f32::from(x))
818	}
819	}
820
821	impl From<u8> for f16 {
822	#[inline]
823	fn from(x: u8) -> f16 {
824	// Convert to f32, then to f16
825	f16::from_f32(f32::from(x))
826	}
827	}
828
829	impl PartialEq for f16 {
830	fn eq(&self, other: &f16) -> bool {
831	if self.is_nan() \|\| other.is_nan() {
832	`false`
833	} else {
834	(self.0 == other.0) \|\| ((self.0 \| other.0) & `0x7FFFu16` == `0`)
835	}
836	}
837	}
838
839	impl PartialOrd for f16 {
840	fn partial_cmp(&self, other: &f16) -> Option<Ordering> {
841	if self.is_nan() \|\| other.is_nan() {
842	None
843	} else {
844	let neg = self.0 & `0x8000u16` != `0`;
845	let other_neg = other.0 & `0x8000u16` != `0`;
846	match (neg, other_neg) {
847	(`false`, `false`) => Some(self.0.cmp(&other.0)),
848	(`false`, `true`) => {
849	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
850	Some(Ordering::Equal)
851	} else {
852	Some(Ordering::Greater)
853	}
854	}
855	(`true`, `false`) => {
856	if (self.0 \| other.0) & `0x7FFFu16` == `0` {
857	Some(Ordering::Equal)
858	} else {
859	Some(Ordering::Less)
860	}
861	}
862	(`true`, `true`) => Some(other.0.cmp(&self.0)),
863	}
864	}
865	}
866
867	fn lt(&self, other: &f16) -> bool {
868	if self.is_nan() \|\| other.is_nan() {
869	`false`
870	} else {
871	let neg = self.0 & `0x8000u16` != `0`;
872	let other_neg = other.0 & `0x8000u16` != `0`;
873	match (neg, other_neg) {
874	(`false`, `false`) => self.0 < other.0,
875	(`false`, `true`) => `false`,
876	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
877	(`true`, `true`) => self.0 > other.0,
878	}
879	}
880	}
881
882	fn le(&self, other: &f16) -> bool {
883	if self.is_nan() \|\| other.is_nan() {
884	`false`
885	} else {
886	let neg = self.0 & `0x8000u16` != `0`;
887	let other_neg = other.0 & `0x8000u16` != `0`;
888	match (neg, other_neg) {
889	(`false`, `false`) => self.0 <= other.0,
890	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
891	(`true`, `false`) => `true`,
892	(`true`, `true`) => self.0 >= other.0,
893	}
894	}
895	}
896
897	fn gt(&self, other: &f16) -> bool {
898	if self.is_nan() \|\| other.is_nan() {
899	`false`
900	} else {
901	let neg = self.0 & `0x8000u16` != `0`;
902	let other_neg = other.0 & `0x8000u16` != `0`;
903	match (neg, other_neg) {
904	(`false`, `false`) => self.0 > other.0,
905	(`false`, `true`) => (self.0 \| other.0) & `0x7FFFu16` != `0`,
906	(`true`, `false`) => `false`,
907	(`true`, `true`) => self.0 < other.0,
908	}
909	}
910	}
911
912	fn ge(&self, other: &f16) -> bool {
913	if self.is_nan() \|\| other.is_nan() {
914	`false`
915	} else {
916	let neg = self.0 & `0x8000u16` != `0`;
917	let other_neg = other.0 & `0x8000u16` != `0`;
918	match (neg, other_neg) {
919	(`false`, `false`) => self.0 >= other.0,
920	(`false`, `true`) => `true`,
921	(`true`, `false`) => (self.0 \| other.0) & `0x7FFFu16` == `0`,
922	(`true`, `true`) => self.0 <= other.0,
923	}
924	}
925	}
926	}
927
928	#[cfg(not(target_arch = "spirv"))]
929	impl FromStr for f16 {
930	type Err = ParseFloatError;
931	fn from_str(src: &str) -> Result<f16, ParseFloatError> {
932	f32::from_str(src).map(op:f16::from_f32)
933	}
934	}
935
936	#[cfg(not(target_arch = "spirv"))]
937	impl Debug for f16 {
938	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
939	Debug::fmt(&self.to_f32(), f)
940	}
941	}
942
943	#[cfg(not(target_arch = "spirv"))]
944	impl Display for f16 {
945	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
946	Display::fmt(&self.to_f32(), f)
947	}
948	}
949
950	#[cfg(not(target_arch = "spirv"))]
951	impl LowerExp for f16 {
952	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
953	write!(f, "{:e}", self.to_f32())
954	}
955	}
956
957	#[cfg(not(target_arch = "spirv"))]
958	impl UpperExp for f16 {
959	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
960	write!(f, "{:E}", self.to_f32())
961	}
962	}
963
964	#[cfg(not(target_arch = "spirv"))]
965	impl Binary for f16 {
966	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
967	write!(f, "{:b}", self.0)
968	}
969	}
970
971	#[cfg(not(target_arch = "spirv"))]
972	impl Octal for f16 {
973	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
974	write!(f, "{:o}", self.0)
975	}
976	}
977
978	#[cfg(not(target_arch = "spirv"))]
979	impl LowerHex for f16 {
980	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
981	write!(f, "{:x}", self.0)
982	}
983	}
984
985	#[cfg(not(target_arch = "spirv"))]
986	impl UpperHex for f16 {
987	fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
988	write!(f, "{:X}", self.0)
989	}
990	}
991
992	impl Neg for f16 {
993	type Output = Self;
994
995	#[inline]
996	fn neg(self) -> Self::Output {
997	Self(self.0 ^ `0x8000`)
998	}
999	}
1000
1001	impl Neg for &f16 {
1002	type Output = <f16 as Neg>::Output;
1003
1004	#[inline]
1005	fn neg(self) -> Self::Output {
1006	Neg::neg(*self)
1007	}
1008	}
1009
1010	impl Add for f16 {
1011	type Output = Self;
1012
1013	#[inline]
1014	fn add(self, rhs: Self) -> Self::Output {
1015	f16(arch::add_f16(self.0, b:rhs.0))
1016	}
1017	}
1018
1019	impl Add<&f16> for f16 {
1020	type Output = <f16 as Add<f16>>::Output;
1021
1022	#[inline]
1023	fn add(self, rhs: &f16) -> Self::Output {
1024	self.add(*rhs)
1025	}
1026	}
1027
1028	impl Add<&f16> for &f16 {
1029	type Output = <f16 as Add<f16>>::Output;
1030
1031	#[inline]
1032	fn add(self, rhs: &f16) -> Self::Output {
1033	(self).add(rhs)
1034	}
1035	}
1036
1037	impl Add<f16> for &f16 {
1038	type Output = <f16 as Add<f16>>::Output;
1039
1040	#[inline]
1041	fn add(self, rhs: f16) -> Self::Output {
1042	(*self).add(rhs)
1043	}
1044	}
1045
1046	impl AddAssign for f16 {
1047	#[inline]
1048	fn add_assign(&mut self, rhs: Self) {
1049	self = (self).add(rhs);
1050	}
1051	}
1052
1053	impl AddAssign<&f16> for f16 {
1054	#[inline]
1055	fn add_assign(&mut self, rhs: &f16) {
1056	self = (self).add(rhs);
1057	}
1058	}
1059
1060	impl Sub for f16 {
1061	type Output = Self;
1062
1063	#[inline]
1064	fn sub(self, rhs: Self) -> Self::Output {
1065	f16(arch::subtract_f16(self.0, b:rhs.0))
1066	}
1067	}
1068
1069	impl Sub<&f16> for f16 {
1070	type Output = <f16 as Sub<f16>>::Output;
1071
1072	#[inline]
1073	fn sub(self, rhs: &f16) -> Self::Output {
1074	self.sub(*rhs)
1075	}
1076	}
1077
1078	impl Sub<&f16> for &f16 {
1079	type Output = <f16 as Sub<f16>>::Output;
1080
1081	#[inline]
1082	fn sub(self, rhs: &f16) -> Self::Output {
1083	(self).sub(rhs)
1084	}
1085	}
1086
1087	impl Sub<f16> for &f16 {
1088	type Output = <f16 as Sub<f16>>::Output;
1089
1090	#[inline]
1091	fn sub(self, rhs: f16) -> Self::Output {
1092	(*self).sub(rhs)
1093	}
1094	}
1095
1096	impl SubAssign for f16 {
1097	#[inline]
1098	fn sub_assign(&mut self, rhs: Self) {
1099	self = (self).sub(rhs);
1100	}
1101	}
1102
1103	impl SubAssign<&f16> for f16 {
1104	#[inline]
1105	fn sub_assign(&mut self, rhs: &f16) {
1106	self = (self).sub(rhs);
1107	}
1108	}
1109
1110	impl Mul for f16 {
1111	type Output = Self;
1112
1113	#[inline]
1114	fn mul(self, rhs: Self) -> Self::Output {
1115	f16(arch::multiply_f16(self.0, b:rhs.0))
1116	}
1117	}
1118
1119	impl Mul<&f16> for f16 {
1120	type Output = <f16 as Mul<f16>>::Output;
1121
1122	#[inline]
1123	fn mul(self, rhs: &f16) -> Self::Output {
1124	self.mul(*rhs)
1125	}
1126	}
1127
1128	impl Mul<&f16> for &f16 {
1129	type Output = <f16 as Mul<f16>>::Output;
1130
1131	#[inline]
1132	fn mul(self, rhs: &f16) -> Self::Output {
1133	(self).mul(rhs)
1134	}
1135	}
1136
1137	impl Mul<f16> for &f16 {
1138	type Output = <f16 as Mul<f16>>::Output;
1139
1140	#[inline]
1141	fn mul(self, rhs: f16) -> Self::Output {
1142	(*self).mul(rhs)
1143	}
1144	}
1145
1146	impl MulAssign for f16 {
1147	#[inline]
1148	fn mul_assign(&mut self, rhs: Self) {
1149	self = (self).mul(rhs);
1150	}
1151	}
1152
1153	impl MulAssign<&f16> for f16 {
1154	#[inline]
1155	fn mul_assign(&mut self, rhs: &f16) {
1156	self = (self).mul(rhs);
1157	}
1158	}
1159
1160	impl Div for f16 {
1161	type Output = Self;
1162
1163	#[inline]
1164	fn div(self, rhs: Self) -> Self::Output {
1165	f16(arch::divide_f16(self.0, b:rhs.0))
1166	}
1167	}
1168
1169	impl Div<&f16> for f16 {
1170	type Output = <f16 as Div<f16>>::Output;
1171
1172	#[inline]
1173	fn div(self, rhs: &f16) -> Self::Output {
1174	self.div(*rhs)
1175	}
1176	}
1177
1178	impl Div<&f16> for &f16 {
1179	type Output = <f16 as Div<f16>>::Output;
1180
1181	#[inline]
1182	fn div(self, rhs: &f16) -> Self::Output {
1183	(self).div(rhs)
1184	}
1185	}
1186
1187	impl Div<f16> for &f16 {
1188	type Output = <f16 as Div<f16>>::Output;
1189
1190	#[inline]
1191	fn div(self, rhs: f16) -> Self::Output {
1192	(*self).div(rhs)
1193	}
1194	}
1195
1196	impl DivAssign for f16 {
1197	#[inline]
1198	fn div_assign(&mut self, rhs: Self) {
1199	self = (self).div(rhs);
1200	}
1201	}
1202
1203	impl DivAssign<&f16> for f16 {
1204	#[inline]
1205	fn div_assign(&mut self, rhs: &f16) {
1206	self = (self).div(rhs);
1207	}
1208	}
1209
1210	impl Rem for f16 {
1211	type Output = Self;
1212
1213	#[inline]
1214	fn rem(self, rhs: Self) -> Self::Output {
1215	f16(arch::remainder_f16(self.0, b:rhs.0))
1216	}
1217	}
1218
1219	impl Rem<&f16> for f16 {
1220	type Output = <f16 as Rem<f16>>::Output;
1221
1222	#[inline]
1223	fn rem(self, rhs: &f16) -> Self::Output {
1224	self.rem(*rhs)
1225	}
1226	}
1227
1228	impl Rem<&f16> for &f16 {
1229	type Output = <f16 as Rem<f16>>::Output;
1230
1231	#[inline]
1232	fn rem(self, rhs: &f16) -> Self::Output {
1233	(self).rem(rhs)
1234	}
1235	}
1236
1237	impl Rem<f16> for &f16 {
1238	type Output = <f16 as Rem<f16>>::Output;
1239
1240	#[inline]
1241	fn rem(self, rhs: f16) -> Self::Output {
1242	(*self).rem(rhs)
1243	}
1244	}
1245
1246	impl RemAssign for f16 {
1247	#[inline]
1248	fn rem_assign(&mut self, rhs: Self) {
1249	self = (self).rem(rhs);
1250	}
1251	}
1252
1253	impl RemAssign<&f16> for f16 {
1254	#[inline]
1255	fn rem_assign(&mut self, rhs: &f16) {
1256	self = (self).rem(rhs);
1257	}
1258	}
1259
1260	impl Product for f16 {
1261	#[inline]
1262	fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1263	f16(arch::product_f16(iter:iter.map(\|f: f16\| f.to_bits())))
1264	}
1265	}
1266
1267	impl<'a> Product<&'a f16> for f16 {
1268	#[inline]
1269	fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1270	f16(arch::product_f16(iter:iter.map(\|f: &f16\| f.to_bits())))
1271	}
1272	}
1273
1274	impl Sum for f16 {
1275	#[inline]
1276	fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1277	f16(arch::sum_f16(iter:iter.map(\|f: f16\| f.to_bits())))
1278	}
1279	}
1280
1281	impl<'a> Sum<&'a f16> for f16 {
1282	#[inline]
1283	fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self {
1284	f16(arch::sum_f16(iter:iter.map(\|f: &f16\| f.to_bits())))
1285	}
1286	}
1287
1288	#[cfg(feature = "serde")]
1289	struct Visitor;
1290
1291	#[cfg(feature = "serde")]
1292	impl<'de> Deserialize<'de> for f16 {
1293	fn deserialize<D>(deserializer: D) -> Result<f16, D::Error>
1294	where
1295	D: serde::de::Deserializer<'de>,
1296	{
1297	deserializer.deserialize_newtype_struct("f16", Visitor)
1298	}
1299	}
1300
1301	#[cfg(feature = "serde")]
1302	impl<'de> serde::de::Visitor<'de> for Visitor {
1303	type Value = f16;
1304
1305	fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1306	write!(formatter, "tuple struct f16")
1307	}
1308
1309	fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1310	where
1311	D: serde::Deserializer<'de>,
1312	{
1313	Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?))
1314	}
1315
1316	fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1317	where
1318	E: serde::de::Error,
1319	{
1320	v.parse().map_err(\|_\| {
1321	serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1322	})
1323	}
1324
1325	fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1326	where
1327	E: serde::de::Error,
1328	{
1329	Ok(f16::from_f32(v))
1330	}
1331
1332	fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1333	where
1334	E: serde::de::Error,
1335	{
1336	Ok(f16::from_f64(v))
1337	}
1338	}
1339
1340	#[allow(
1341	clippy::cognitive_complexity,
1342	clippy::float_cmp,
1343	clippy::neg_cmp_op_on_partial_ord
1344	)]
1345	#[cfg(test)]
1346	mod test {
1347	use super::*;
1348	#[allow(unused_imports)]
1349	use core::cmp::Ordering;
1350	#[cfg(feature = "num-traits")]
1351	use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive};
1352	use quickcheck_macros::quickcheck;
1353
1354	#[cfg(feature = "num-traits")]
1355	#[test]
1356	fn as_primitive() {
1357	let two = f16::from_f32(`2.0`);
1358	assert_eq!(<i32 as AsPrimitive<f16>>::as_(`2`), two);
1359	assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), `2`);
1360
1361	assert_eq!(<f32 as AsPrimitive<f16>>::as_(`2.0`), two);
1362	assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), `2.0`);
1363
1364	assert_eq!(<f64 as AsPrimitive<f16>>::as_(`2.0`), two);
1365	assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), `2.0`);
1366	}
1367
1368	#[cfg(feature = "num-traits")]
1369	#[test]
1370	fn to_primitive() {
1371	let two = f16::from_f32(`2.0`);
1372	assert_eq!(ToPrimitive::to_i32(&two).unwrap(), `2i32`);
1373	assert_eq!(ToPrimitive::to_f32(&two).unwrap(), `2.0f32`);
1374	assert_eq!(ToPrimitive::to_f64(&two).unwrap(), `2.0f64`);
1375	}
1376
1377	#[cfg(feature = "num-traits")]
1378	#[test]
1379	fn from_primitive() {
1380	let two = f16::from_f32(`2.0`);
1381	assert_eq!(<f16 as FromPrimitive>::from_i32(`2`).unwrap(), two);
1382	assert_eq!(<f16 as FromPrimitive>::from_f32(`2.0`).unwrap(), two);
1383	assert_eq!(<f16 as FromPrimitive>::from_f64(`2.0`).unwrap(), two);
1384	}
1385
1386	#[test]
1387	fn test_f16_consts() {
1388	// DIGITS
1389	let digits = ((f16::MANTISSA_DIGITS as f32 - `1.0`) * `2f32`.log10()).floor() as u32;
1390	assert_eq!(f16::DIGITS, digits);
1391	// sanity check to show test is good
1392	let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - `1.0`) * `2f32`.log10()).floor() as u32;
1393	assert_eq!(core::f32::DIGITS, digits32);
1394
1395	// EPSILON
1396	let one = f16::from_f32(`1.0`);
1397	let one_plus_epsilon = f16::from_bits(one.to_bits() + `1`);
1398	let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - `1.0`);
1399	assert_eq!(f16::EPSILON, epsilon);
1400	// sanity check to show test is good
1401	let one_plus_epsilon32 = f32::from_bits(`1.0f32`.to_bits() + `1`);
1402	let epsilon32 = one_plus_epsilon32 - `1f32`;
1403	assert_eq!(core::f32::EPSILON, epsilon32);
1404
1405	// MAX, MIN and MIN_POSITIVE
1406	let max = f16::from_bits(f16::INFINITY.to_bits() - `1`);
1407	let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - `1`);
1408	let min_pos = f16::from_f32(`2f32`.powi(f16::MIN_EXP - `1`));
1409	assert_eq!(f16::MAX, max);
1410	assert_eq!(f16::MIN, min);
1411	assert_eq!(f16::MIN_POSITIVE, min_pos);
1412	// sanity check to show test is good
1413	let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - `1`);
1414	let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - `1`);
1415	let min_pos32 = `2f32`.powi(core::f32::MIN_EXP - `1`);
1416	assert_eq!(core::f32::MAX, max32);
1417	assert_eq!(core::f32::MIN, min32);
1418	assert_eq!(core::f32::MIN_POSITIVE, min_pos32);
1419
1420	// MIN_10_EXP and MAX_10_EXP
1421	let ten_to_min = `10f32`.powi(f16::MIN_10_EXP);
1422	assert!(ten_to_min / `10.0` < f16::MIN_POSITIVE.to_f32());
1423	assert!(ten_to_min > f16::MIN_POSITIVE.to_f32());
1424	let ten_to_max = `10f32`.powi(f16::MAX_10_EXP);
1425	assert!(ten_to_max < f16::MAX.to_f32());
1426	assert!(ten_to_max * `10.0` > f16::MAX.to_f32());
1427	// sanity check to show test is good
1428	let ten_to_min32 = `10f64`.powi(core::f32::MIN_10_EXP);
1429	assert!(ten_to_min32 / `10.0` < f64::from(core::f32::MIN_POSITIVE));
1430	assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE));
1431	let ten_to_max32 = `10f64`.powi(core::f32::MAX_10_EXP);
1432	assert!(ten_to_max32 < f64::from(core::f32::MAX));
1433	assert!(ten_to_max32 * `10.0` > f64::from(core::f32::MAX));
1434	}
1435
1436	#[test]
1437	fn test_f16_consts_from_f32() {
1438	let one = f16::from_f32(`1.0`);
1439	let zero = f16::from_f32(`0.0`);
1440	let neg_zero = f16::from_f32(`-0.0`);
1441	let neg_one = f16::from_f32(`-1.0`);
1442	let inf = f16::from_f32(core::f32::INFINITY);
1443	let neg_inf = f16::from_f32(core::f32::NEG_INFINITY);
1444	let nan = f16::from_f32(core::f32::NAN);
1445
1446	assert_eq!(f16::ONE, one);
1447	assert_eq!(f16::ZERO, zero);
1448	assert!(zero.is_sign_positive());
1449	assert_eq!(f16::NEG_ZERO, neg_zero);
1450	assert!(neg_zero.is_sign_negative());
1451	assert_eq!(f16::NEG_ONE, neg_one);
1452	assert!(neg_one.is_sign_negative());
1453	assert_eq!(f16::INFINITY, inf);
1454	assert_eq!(f16::NEG_INFINITY, neg_inf);
1455	assert!(nan.is_nan());
1456	assert!(f16::NAN.is_nan());
1457
1458	let e = f16::from_f32(core::f32::consts::E);
1459	let pi = f16::from_f32(core::f32::consts::PI);
1460	let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI);
1461	let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1462	let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI);
1463	let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1464	let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2);
1465	let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3);
1466	let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4);
1467	let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6);
1468	let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8);
1469	let ln_10 = f16::from_f32(core::f32::consts::LN_10);
1470	let ln_2 = f16::from_f32(core::f32::consts::LN_2);
1471	let log10_e = f16::from_f32(core::f32::consts::LOG10_E);
1472	// core::f32::consts::LOG10_2 requires rustc 1.43.0
1473	let log10_2 = f16::from_f32(`2f32`.log10());
1474	let log2_e = f16::from_f32(core::f32::consts::LOG2_E);
1475	// core::f32::consts::LOG2_10 requires rustc 1.43.0
1476	let log2_10 = f16::from_f32(`10f32`.log2());
1477	let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2);
1478
1479	assert_eq!(f16::E, e);
1480	assert_eq!(f16::PI, pi);
1481	assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1482	assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1483	assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1484	assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1485	assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1486	assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1487	assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1488	assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1489	assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1490	assert_eq!(f16::LN_10, ln_10);
1491	assert_eq!(f16::LN_2, ln_2);
1492	assert_eq!(f16::LOG10_E, log10_e);
1493	assert_eq!(f16::LOG10_2, log10_2);
1494	assert_eq!(f16::LOG2_E, log2_e);
1495	assert_eq!(f16::LOG2_10, log2_10);
1496	assert_eq!(f16::SQRT_2, sqrt_2);
1497	}
1498
1499	#[test]
1500	fn test_f16_consts_from_f64() {
1501	let one = f16::from_f64(`1.0`);
1502	let zero = f16::from_f64(`0.0`);
1503	let neg_zero = f16::from_f64(`-0.0`);
1504	let inf = f16::from_f64(core::f64::INFINITY);
1505	let neg_inf = f16::from_f64(core::f64::NEG_INFINITY);
1506	let nan = f16::from_f64(core::f64::NAN);
1507
1508	assert_eq!(f16::ONE, one);
1509	assert_eq!(f16::ZERO, zero);
1510	assert!(zero.is_sign_positive());
1511	assert_eq!(f16::NEG_ZERO, neg_zero);
1512	assert!(neg_zero.is_sign_negative());
1513	assert_eq!(f16::INFINITY, inf);
1514	assert_eq!(f16::NEG_INFINITY, neg_inf);
1515	assert!(nan.is_nan());
1516	assert!(f16::NAN.is_nan());
1517
1518	let e = f16::from_f64(core::f64::consts::E);
1519	let pi = f16::from_f64(core::f64::consts::PI);
1520	let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI);
1521	let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1522	let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI);
1523	let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1524	let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2);
1525	let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3);
1526	let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4);
1527	let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6);
1528	let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8);
1529	let ln_10 = f16::from_f64(core::f64::consts::LN_10);
1530	let ln_2 = f16::from_f64(core::f64::consts::LN_2);
1531	let log10_e = f16::from_f64(core::f64::consts::LOG10_E);
1532	// core::f64::consts::LOG10_2 requires rustc 1.43.0
1533	let log10_2 = f16::from_f64(`2f64`.log10());
1534	let log2_e = f16::from_f64(core::f64::consts::LOG2_E);
1535	// core::f64::consts::LOG2_10 requires rustc 1.43.0
1536	let log2_10 = f16::from_f64(`10f64`.log2());
1537	let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2);
1538
1539	assert_eq!(f16::E, e);
1540	assert_eq!(f16::PI, pi);
1541	assert_eq!(f16::FRAC_1_PI, frac_1_pi);
1542	assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1543	assert_eq!(f16::FRAC_2_PI, frac_2_pi);
1544	assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1545	assert_eq!(f16::FRAC_PI_2, frac_pi_2);
1546	assert_eq!(f16::FRAC_PI_3, frac_pi_3);
1547	assert_eq!(f16::FRAC_PI_4, frac_pi_4);
1548	assert_eq!(f16::FRAC_PI_6, frac_pi_6);
1549	assert_eq!(f16::FRAC_PI_8, frac_pi_8);
1550	assert_eq!(f16::LN_10, ln_10);
1551	assert_eq!(f16::LN_2, ln_2);
1552	assert_eq!(f16::LOG10_E, log10_e);
1553	assert_eq!(f16::LOG10_2, log10_2);
1554	assert_eq!(f16::LOG2_E, log2_e);
1555	assert_eq!(f16::LOG2_10, log2_10);
1556	assert_eq!(f16::SQRT_2, sqrt_2);
1557	}
1558
1559	#[test]
1560	fn test_nan_conversion_to_smaller() {
1561	let nan64 = f64::from_bits(`0x7FF0_0000_0000_0001u64`);
1562	let neg_nan64 = f64::from_bits(`0xFFF0_0000_0000_0001u64`);
1563	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1564	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1565	let nan32_from_64 = nan64 as f32;
1566	let neg_nan32_from_64 = neg_nan64 as f32;
1567	let nan16_from_64 = f16::from_f64(nan64);
1568	let neg_nan16_from_64 = f16::from_f64(neg_nan64);
1569	let nan16_from_32 = f16::from_f32(nan32);
1570	let neg_nan16_from_32 = f16::from_f32(neg_nan32);
1571
1572	assert!(nan64.is_nan() && nan64.is_sign_positive());
1573	assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1574	assert!(nan32.is_nan() && nan32.is_sign_positive());
1575	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1576
1577	// f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1578	assert!(nan32_from_64.is_nan());
1579	assert!(neg_nan32_from_64.is_nan());
1580	assert!(nan16_from_64.is_nan());
1581	assert!(neg_nan16_from_64.is_nan());
1582	assert!(nan16_from_32.is_nan());
1583	assert!(neg_nan16_from_32.is_nan());
1584	}
1585
1586	#[test]
1587	fn test_nan_conversion_to_larger() {
1588	let nan16 = f16::from_bits(`0x7C01u16`);
1589	let neg_nan16 = f16::from_bits(`0xFC01u16`);
1590	let nan32 = f32::from_bits(`0x7F80_0001u32`);
1591	let neg_nan32 = f32::from_bits(`0xFF80_0001u32`);
1592	let nan32_from_16 = f32::from(nan16);
1593	let neg_nan32_from_16 = f32::from(neg_nan16);
1594	let nan64_from_16 = f64::from(nan16);
1595	let neg_nan64_from_16 = f64::from(neg_nan16);
1596	let nan64_from_32 = f64::from(nan32);
1597	let neg_nan64_from_32 = f64::from(neg_nan32);
1598
1599	assert!(nan16.is_nan() && nan16.is_sign_positive());
1600	assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1601	assert!(nan32.is_nan() && nan32.is_sign_positive());
1602	assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1603
1604	// f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1605	assert!(nan32_from_16.is_nan());
1606	assert!(neg_nan32_from_16.is_nan());
1607	assert!(nan64_from_16.is_nan());
1608	assert!(neg_nan64_from_16.is_nan());
1609	assert!(nan64_from_32.is_nan());
1610	assert!(neg_nan64_from_32.is_nan());
1611	}
1612
1613	#[test]
1614	fn test_f16_to_f32() {
1615	let f = f16::from_f32(`7.0`);
1616	assert_eq!(f.to_f32(), `7.0f32`);
1617
1618	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1619	let f = f16::from_f32(`7.1`);
1620	let diff = (f.to_f32() - `7.1f32`).abs();
1621	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1622	assert!(diff <= `4.0` * f16::EPSILON.to_f32());
1623
1624	assert_eq!(f16::from_bits(`0x0000_0001`).to_f32(), `2.0f32`.powi(`-24`));
1625	assert_eq!(f16::from_bits(`0x0000_0005`).to_f32(), `5.0` * `2.0f32`.powi(`-24`));
1626
1627	assert_eq!(f16::from_bits(`0x0000_0001`), f16::from_f32(`2.0f32`.powi(`-24`)));
1628	assert_eq!(
1629	f16::from_bits(`0x0000_0005`),
1630	f16::from_f32(`5.0` * `2.0f32`.powi(`-24`))
1631	);
1632	}
1633
1634	#[test]
1635	fn test_f16_to_f64() {
1636	let f = f16::from_f64(`7.0`);
1637	assert_eq!(f.to_f64(), `7.0f64`);
1638
1639	// 7.1 is NOT exactly representable in 16-bit, it's rounded
1640	let f = f16::from_f64(`7.1`);
1641	let diff = (f.to_f64() - `7.1f64`).abs();
1642	// diff must be <= 4 EPSILON, as 7 has two more significant bits than 1*
1643	assert!(diff <= `4.0` * f16::EPSILON.to_f64());
1644
1645	assert_eq!(f16::from_bits(`0x0000_0001`).to_f64(), `2.0f64`.powi(`-24`));
1646	assert_eq!(f16::from_bits(`0x0000_0005`).to_f64(), `5.0` * `2.0f64`.powi(`-24`));
1647
1648	assert_eq!(f16::from_bits(`0x0000_0001`), f16::from_f64(`2.0f64`.powi(`-24`)));
1649	assert_eq!(
1650	f16::from_bits(`0x0000_0005`),
1651	f16::from_f64(`5.0` * `2.0f64`.powi(`-24`))
1652	);
1653	}
1654
1655	#[test]
1656	fn test_comparisons() {
1657	let zero = f16::from_f64(`0.0`);
1658	let one = f16::from_f64(`1.0`);
1659	let neg_zero = f16::from_f64(`-0.0`);
1660	let neg_one = f16::from_f64(`-1.0`);
1661
1662	assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1663	assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1664	assert!(zero == neg_zero);
1665	assert!(neg_zero == zero);
1666	assert!(!(zero != neg_zero));
1667	assert!(!(neg_zero != zero));
1668	assert!(!(zero < neg_zero));
1669	assert!(!(neg_zero < zero));
1670	assert!(zero <= neg_zero);
1671	assert!(neg_zero <= zero);
1672	assert!(!(zero > neg_zero));
1673	assert!(!(neg_zero > zero));
1674	assert!(zero >= neg_zero);
1675	assert!(neg_zero >= zero);
1676
1677	assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1678	assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1679	assert!(!(one == neg_zero));
1680	assert!(!(neg_zero == one));
1681	assert!(one != neg_zero);
1682	assert!(neg_zero != one);
1683	assert!(!(one < neg_zero));
1684	assert!(neg_zero < one);
1685	assert!(!(one <= neg_zero));
1686	assert!(neg_zero <= one);
1687	assert!(one > neg_zero);
1688	assert!(!(neg_zero > one));
1689	assert!(one >= neg_zero);
1690	assert!(!(neg_zero >= one));
1691
1692	assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1693	assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1694	assert!(!(one == neg_one));
1695	assert!(!(neg_one == one));
1696	assert!(one != neg_one);
1697	assert!(neg_one != one);
1698	assert!(!(one < neg_one));
1699	assert!(neg_one < one);
1700	assert!(!(one <= neg_one));
1701	assert!(neg_one <= one);
1702	assert!(one > neg_one);
1703	assert!(!(neg_one > one));
1704	assert!(one >= neg_one);
1705	assert!(!(neg_one >= one));
1706	}
1707
1708	#[test]
1709	#[allow(clippy::erasing_op, clippy::identity_op)]
1710	fn round_to_even_f32() {
1711	// smallest positive subnormal = 0b0.0000_0000_01 2^-14 = 2^-24*
1712	let min_sub = f16::from_bits(`1`);
1713	let min_sub_f = (`-24f32`).exp2();
1714	assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1715	assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1716
1717	// 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1718	// 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1719	// 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1720	assert_eq!(
1721	f16::from_f32(min_sub_f * `0.49`).to_bits(),
1722	min_sub.to_bits() * `0`
1723	);
1724	assert_eq!(
1725	f16::from_f32(min_sub_f * `0.50`).to_bits(),
1726	min_sub.to_bits() * `0`
1727	);
1728	assert_eq!(
1729	f16::from_f32(min_sub_f * `0.51`).to_bits(),
1730	min_sub.to_bits() * `1`
1731	);
1732
1733	// 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1734	// 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1735	// 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1736	assert_eq!(
1737	f16::from_f32(min_sub_f * `1.49`).to_bits(),
1738	min_sub.to_bits() * `1`
1739	);
1740	assert_eq!(
1741	f16::from_f32(min_sub_f * `1.50`).to_bits(),
1742	min_sub.to_bits() * `2`
1743	);
1744	assert_eq!(
1745	f16::from_f32(min_sub_f * `1.51`).to_bits(),
1746	min_sub.to_bits() * `2`
1747	);
1748
1749	// 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1750	// 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1751	// 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1752	assert_eq!(
1753	f16::from_f32(min_sub_f * `2.49`).to_bits(),
1754	min_sub.to_bits() * `2`
1755	);
1756	assert_eq!(
1757	f16::from_f32(min_sub_f * `2.50`).to_bits(),
1758	min_sub.to_bits() * `2`
1759	);
1760	assert_eq!(
1761	f16::from_f32(min_sub_f * `2.51`).to_bits(),
1762	min_sub.to_bits() * `3`
1763	);
1764
1765	assert_eq!(
1766	f16::from_f32(`2000.49f32`).to_bits(),
1767	f16::from_f32(`2000.0`).to_bits()
1768	);
1769	assert_eq!(
1770	f16::from_f32(`2000.50f32`).to_bits(),
1771	f16::from_f32(`2000.0`).to_bits()
1772	);
1773	assert_eq!(
1774	f16::from_f32(`2000.51f32`).to_bits(),
1775	f16::from_f32(`2001.0`).to_bits()
1776	);
1777	assert_eq!(
1778	f16::from_f32(`2001.49f32`).to_bits(),
1779	f16::from_f32(`2001.0`).to_bits()
1780	);
1781	assert_eq!(
1782	f16::from_f32(`2001.50f32`).to_bits(),
1783	f16::from_f32(`2002.0`).to_bits()
1784	);
1785	assert_eq!(
1786	f16::from_f32(`2001.51f32`).to_bits(),
1787	f16::from_f32(`2002.0`).to_bits()
1788	);
1789	assert_eq!(
1790	f16::from_f32(`2002.49f32`).to_bits(),
1791	f16::from_f32(`2002.0`).to_bits()
1792	);
1793	assert_eq!(
1794	f16::from_f32(`2002.50f32`).to_bits(),
1795	f16::from_f32(`2002.0`).to_bits()
1796	);
1797	assert_eq!(
1798	f16::from_f32(`2002.51f32`).to_bits(),
1799	f16::from_f32(`2003.0`).to_bits()
1800	);
1801	}
1802
1803	#[test]
1804	#[allow(clippy::erasing_op, clippy::identity_op)]
1805	fn round_to_even_f64() {
1806	// smallest positive subnormal = 0b0.0000_0000_01 2^-14 = 2^-24*
1807	let min_sub = f16::from_bits(`1`);
1808	let min_sub_f = (`-24f64`).exp2();
1809	assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1810	assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1811
1812	// 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding)
1813	// 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even)
1814	// 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up)
1815	assert_eq!(
1816	f16::from_f64(min_sub_f * `0.49`).to_bits(),
1817	min_sub.to_bits() * `0`
1818	);
1819	assert_eq!(
1820	f16::from_f64(min_sub_f * `0.50`).to_bits(),
1821	min_sub.to_bits() * `0`
1822	);
1823	assert_eq!(
1824	f16::from_f64(min_sub_f * `0.51`).to_bits(),
1825	min_sub.to_bits() * `1`
1826	);
1827
1828	// 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding)
1829	// 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even)
1830	// 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up)
1831	assert_eq!(
1832	f16::from_f64(min_sub_f * `1.49`).to_bits(),
1833	min_sub.to_bits() * `1`
1834	);
1835	assert_eq!(
1836	f16::from_f64(min_sub_f * `1.50`).to_bits(),
1837	min_sub.to_bits() * `2`
1838	);
1839	assert_eq!(
1840	f16::from_f64(min_sub_f * `1.51`).to_bits(),
1841	min_sub.to_bits() * `2`
1842	);
1843
1844	// 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding)
1845	// 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even)
1846	// 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up)
1847	assert_eq!(
1848	f16::from_f64(min_sub_f * `2.49`).to_bits(),
1849	min_sub.to_bits() * `2`
1850	);
1851	assert_eq!(
1852	f16::from_f64(min_sub_f * `2.50`).to_bits(),
1853	min_sub.to_bits() * `2`
1854	);
1855	assert_eq!(
1856	f16::from_f64(min_sub_f * `2.51`).to_bits(),
1857	min_sub.to_bits() * `3`
1858	);
1859
1860	assert_eq!(
1861	f16::from_f64(`2000.49f64`).to_bits(),
1862	f16::from_f64(`2000.0`).to_bits()
1863	);
1864	assert_eq!(
1865	f16::from_f64(`2000.50f64`).to_bits(),
1866	f16::from_f64(`2000.0`).to_bits()
1867	);
1868	assert_eq!(
1869	f16::from_f64(`2000.51f64`).to_bits(),
1870	f16::from_f64(`2001.0`).to_bits()
1871	);
1872	assert_eq!(
1873	f16::from_f64(`2001.49f64`).to_bits(),
1874	f16::from_f64(`2001.0`).to_bits()
1875	);
1876	assert_eq!(
1877	f16::from_f64(`2001.50f64`).to_bits(),
1878	f16::from_f64(`2002.0`).to_bits()
1879	);
1880	assert_eq!(
1881	f16::from_f64(`2001.51f64`).to_bits(),
1882	f16::from_f64(`2002.0`).to_bits()
1883	);
1884	assert_eq!(
1885	f16::from_f64(`2002.49f64`).to_bits(),
1886	f16::from_f64(`2002.0`).to_bits()
1887	);
1888	assert_eq!(
1889	f16::from_f64(`2002.50f64`).to_bits(),
1890	f16::from_f64(`2002.0`).to_bits()
1891	);
1892	assert_eq!(
1893	f16::from_f64(`2002.51f64`).to_bits(),
1894	f16::from_f64(`2003.0`).to_bits()
1895	);
1896	}
1897
1898	#[test]
1899	fn arithmetic() {
1900	assert_eq!(f16::ONE + f16::ONE, f16::from_f32(`2.`));
1901	assert_eq!(f16::ONE - f16::ONE, f16::ZERO);
1902	assert_eq!(f16::ONE * f16::ONE, f16::ONE);
1903	assert_eq!(f16::from_f32(`2.`) * f16::from_f32(`2.`), f16::from_f32(`4.`));
1904	assert_eq!(f16::ONE / f16::ONE, f16::ONE);
1905	assert_eq!(f16::from_f32(`4.`) / f16::from_f32(`2.`), f16::from_f32(`2.`));
1906	assert_eq!(f16::from_f32(`4.`) % f16::from_f32(`3.`), f16::from_f32(`1.`));
1907	}
1908
1909	#[cfg(feature = "std")]
1910	#[test]
1911	fn formatting() {
1912	let f = f16::from_f32(`0.1152344`);
1913
1914	assert_eq!(format!("{:.`3`}", f), "0.115");
1915	assert_eq!(format!("{:.`4`}", f), "0.1152");
1916	assert_eq!(format!("{:+.`4`}", f), "+0.1152");
1917	assert_eq!(format!("{:>+`10`.`4`}", f), " +0.1152");
1918
1919	assert_eq!(format!("{:.`3`?}", f), "0.115");
1920	assert_eq!(format!("{:.`4`?}", f), "0.1152");
1921	assert_eq!(format!("{:+.`4`?}", f), "+0.1152");
1922	assert_eq!(format!("{:>+`10`.`4`?}", f), " +0.1152");
1923	}
1924
1925	impl quickcheck::Arbitrary for f16 {
1926	fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1927	f16(u16::arbitrary(g))
1928	}
1929	}
1930
1931	#[quickcheck]
1932	fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool {
1933	let roundtrip = f16::from_f32(f.to_f32());
1934	if f.is_nan() {
1935	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1936	} else {
1937	f.0 == roundtrip.0
1938	}
1939	}
1940
1941	#[quickcheck]
1942	fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool {
1943	let roundtrip = f16::from_f64(f.to_f64());
1944	if f.is_nan() {
1945	roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1946	} else {
1947	f.0 == roundtrip.0
1948	}
1949	}
1950	}
1951