convert.rs source code [crates/half/src/bfloat/convert.rs]

1	use crate::leading_zeros::leading_zeros_u16;
2	use core::mem;
3
4	#[inline]
5	pub(crate) const fn f32_to_bf16(value: f32) -> u16 {
6	// TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
7	// Convert to raw bytes
8	let x: u32 = unsafe { mem::transmute::<f32, u32>(src:value) };
9
10	// check for NaN
11	if x & `0x7FFF_FFFFu32` > `0x7F80_0000u32` {
12	// Keep high part of current mantissa but also set most significiant mantissa bit
13	return ((x >> `16`) \| `0x0040u32`) as u16;
14	}
15
16	// round and shift
17	let round_bit: u32 = `0x0000_8000u32`;
18	if (x & round_bit) != `0` && (x & (`3` * round_bit - `1`)) != `0` {
19	(x >> `16`) as u16 + `1`
20	} else {
21	(x >> `16`) as u16
22	}
23	}
24
25	#[inline]
26	pub(crate) const fn f64_to_bf16(value: f64) -> u16 {
27	// TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
28	// Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always
29	// be lost on half-precision.
30	let val: u64 = unsafe { mem::transmute::<f64, u64>(value) };
31	let x = (val >> `32`) as u32;
32
33	// Extract IEEE754 components
34	let sign = x & `0x8000_0000u32`;
35	let exp = x & `0x7FF0_0000u32`;
36	let man = x & `0x000F_FFFFu32`;
37
38	// Check for all exponent bits being set, which is Infinity or NaN
39	if exp == `0x7FF0_0000u32` {
40	// Set mantissa MSB for NaN (and also keep shifted mantissa bits).
41	// We also have to check the last 32 bits.
42	let nan_bit = if man == `0` && (val as u32 == `0`) {
43	`0`
44	} else {
45	`0x0040u32`
46	};
47	return ((sign >> `16`) \| `0x7F80u32` \| nan_bit \| (man >> `13`)) as u16;
48	}
49
50	// The number is normalized, start assembling half precision version
51	let half_sign = sign >> `16`;
52	// Unbias the exponent, then bias for bfloat16 precision
53	let unbiased_exp = ((exp >> `20`) as i64) - `1023`;
54	let half_exp = unbiased_exp + `127`;
55
56	// Check for exponent overflow, return +infinity
57	if half_exp >= `0xFF` {
58	return (half_sign \| `0x7F80u32`) as u16;
59	}
60
61	// Check for underflow
62	if half_exp <= `0` {
63	// Check mantissa for what we can do
64	if `7` - half_exp > `21` {
65	// No rounding possibility, so this is a full underflow, return signed zero
66	return half_sign as u16;
67	}
68	// Don't forget about hidden leading mantissa bit when assembling mantissa
69	let man = man \| `0x0010_0000u32`;
70	let mut half_man = man >> (`14` - half_exp);
71	// Check for rounding
72	let round_bit = `1` << (`13` - half_exp);
73	if (man & round_bit) != `0` && (man & (`3` * round_bit - `1`)) != `0` {
74	half_man += `1`;
75	}
76	// No exponent for subnormals
77	return (half_sign \| half_man) as u16;
78	}
79
80	// Rebias the exponent
81	let half_exp = (half_exp as u32) << `7`;
82	let half_man = man >> `13`;
83	// Check for rounding
84	let round_bit = `0x0000_1000u32`;
85	if (man & round_bit) != `0` && (man & (`3` * round_bit - `1`)) != `0` {
86	// Round it
87	((half_sign \| half_exp \| half_man) + `1`) as u16
88	} else {
89	(half_sign \| half_exp \| half_man) as u16
90	}
91	}
92
93	#[inline]
94	pub(crate) const fn bf16_to_f32(i: u16) -> f32 {
95	// TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
96	// If NaN, keep current mantissa but also set most significiant mantissa bit
97	if i & `0x7FFFu16` > `0x7F80u16` {
98	unsafe { mem::transmute::<u32, f32>((i as u32 \| `0x0040u32`) << `16`) }
99	} else {
100	unsafe { mem::transmute::<u32, f32>((i as u32) << `16`) }
101	}
102	}
103
104	#[inline]
105	pub(crate) const fn bf16_to_f64(i: u16) -> f64 {
106	// TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
107	// Check for signed zero
108	if i & `0x7FFFu16` == `0` {
109	return unsafe { mem::transmute::<u64, f64>((i as u64) << `48`) };
110	}
111
112	let half_sign = (i & `0x8000u16`) as u64;
113	let half_exp = (i & `0x7F80u16`) as u64;
114	let half_man = (i & `0x007Fu16`) as u64;
115
116	// Check for an infinity or NaN when all exponent bits set
117	if half_exp == `0x7F80u64` {
118	// Check for signed infinity if mantissa is zero
119	if half_man == `0` {
120	return unsafe {
121	mem::transmute::<u64, f64>((half_sign << `48`) \| `0x7FF0_0000_0000_0000u64`)
122	};
123	} else {
124	// NaN, keep current mantissa but also set most significiant mantissa bit
125	return unsafe {
126	mem::transmute::<u64, f64>(
127	(half_sign << `48`) \| `0x7FF8_0000_0000_0000u64` \| (half_man << `45`),
128	)
129	};
130	}
131	}
132
133	// Calculate double-precision components with adjusted exponent
134	let sign = half_sign << `48`;
135	// Unbias exponent
136	let unbiased_exp = ((half_exp as i64) >> `7`) - `127`;
137
138	// Check for subnormals, which will be normalized by adjusting exponent
139	if half_exp == `0` {
140	// Calculate how much to adjust the exponent by
141	let e = leading_zeros_u16(half_man as u16) - `9`;
142
143	// Rebias and adjust exponent
144	let exp = ((`1023` - `127` - e) as u64) << `52`;
145	let man = (half_man << (`46` + e)) & `0xF_FFFF_FFFF_FFFFu64`;
146	return unsafe { mem::transmute::<u64, f64>(sign \| exp \| man) };
147	}
148	// Rebias exponent for a normalized normal
149	let exp = ((unbiased_exp + `1023`) as u64) << `52`;
150	let man = (half_man & `0x007Fu64`) << `45`;
151	unsafe { mem::transmute::<u64, f64>(sign \| exp \| man) }
152	}
153