1//! Common math utils
2use super::errors::NumberOutOfRange;
3
4macro_rules! floating_fixed_convert {
5 ($f_to_q:ident, $q_to_f:ident, $unsigned_bin_typ:ty, $signed_bin_typ:ty, $float_ty:ty, $offset:literal, $min_positive:literal) => {
6 /// convert float point to fixed point format
7 pub fn $f_to_q(value: $float_ty) -> Result<$unsigned_bin_typ, NumberOutOfRange> {
8 const MIN_POSITIVE: $float_ty = unsafe { core::mem::transmute($min_positive) };
9
10 if value < -1.0 {
11 return Err(NumberOutOfRange::BelowLowerBound)
12 }
13
14 if value > 1.0 {
15 return Err(NumberOutOfRange::AboveUpperBound)
16 }
17
18
19 let value = if 1.0 - MIN_POSITIVE < value && value <= 1.0 {
20 // make a exception for value between (1.0^{-x} , 1.0] float point,
21 // convert it to max representable value of q1.x format
22 (1.0 as $float_ty) - MIN_POSITIVE
23 } else {
24 value
25 };
26
27 // It's necessary to cast the float value to signed integer, before convert it to a unsigned value.
28 // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a unsigned value for register writing.
29 // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
30 Ok((value * ((1 as $unsigned_bin_typ << $offset) as $float_ty)) as $signed_bin_typ as $unsigned_bin_typ)
31 }
32
33 #[inline(always)]
34 /// convert fixed point to float point format
35 pub fn $q_to_f(value: $unsigned_bin_typ) -> $float_ty {
36 // It's necessary to cast the unsigned integer to signed integer, before convert it to a float value.
37 // Since value from register is actually a "signed value", a "as" cast will keep original binary format but mark it as a signed value.
38 // see https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
39 (value as $signed_bin_typ as $float_ty) / ((1 as $unsigned_bin_typ << $offset) as $float_ty)
40 }
41 };
42}
43
44floating_fixed_convert!(
45 f64_to_q1_31,
46 q1_31_to_f64,
47 u32,
48 i32,
49 f64,
50 31,
51 0x3E00_0000_0000_0000u64 // binary form of 1f64^(-31)
52);
53
54floating_fixed_convert!(
55 f32_to_q1_15,
56 q1_15_to_f32,
57 u16,
58 i16,
59 f32,
60 15,
61 0x3800_0000u32 // binary form of 1f32^(-15)
62);
63