1 | //! Platform-specific, assembly instructions to avoid |
2 | //! intermediate rounding on architectures with FPUs. |
3 | |
4 | pub(super) use fpu_precision::set_precision; |
5 | |
6 | // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available. |
7 | // The x87 FPU operates with 80 bits of precision by default, which means that operations will |
8 | // round to 80 bits causing double rounding to happen when values are eventually represented as |
9 | // 32/64 bit float values. To overcome this, the FPU control word can be set so that the |
10 | // computations are performed in the desired precision. |
11 | // |
12 | // Note that normally, it is Undefined Behavior to alter the FPU control word while Rust code runs. |
13 | // The compiler assumes that the control word is always in its default state. However, in this |
14 | // particular case the semantics with the altered control word are actually *more faithful* |
15 | // to Rust semantics than the default -- arguably it is all the code that runs *outside* of the scope |
16 | // of a `set_precision` guard that is wrong. |
17 | // In other words, we are only using this to work around <https://github.com/rust-lang/rust/issues/114479>. |
18 | // Sometimes killing UB with UB actually works... |
19 | // (If this is used to set 32bit precision, there is still a risk that the compiler moves some 64bit |
20 | // operation into the scope of the `set_precision` guard. So it's not like this is totally sound. |
21 | // But it's not really any less sound than the default state of 80bit precision...) |
22 | #[cfg (all(target_arch = "x86" , not(target_feature = "sse2" )))] |
23 | mod fpu_precision { |
24 | use core::arch::asm; |
25 | |
26 | /// A structure used to preserve the original value of the FPU control word, so that it can be |
27 | /// restored when the structure is dropped. |
28 | /// |
29 | /// The x87 FPU is a 16-bits register whose fields are as follows: |
30 | /// |
31 | /// | 12-15 | 10-11 | 8-9 | 6-7 | 5 | 4 | 3 | 2 | 1 | 0 | |
32 | /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:| |
33 | /// | | RC | PC | | PM | UM | OM | ZM | DM | IM | |
34 | /// |
35 | /// The documentation for all of the fields is available in the IA-32 Architectures Software |
36 | /// Developer's Manual (Volume 1). |
37 | /// |
38 | /// The only field which is relevant for the following code is PC, Precision Control. This |
39 | /// field determines the precision of the operations performed by the FPU. It can be set to: |
40 | /// - 0b00, single precision i.e., 32-bits |
41 | /// - 0b10, double precision i.e., 64-bits |
42 | /// - 0b11, double extended precision i.e., 80-bits (default state) |
43 | /// The 0b01 value is reserved and should not be used. |
44 | pub(crate) struct FPUControlWord(u16); |
45 | |
46 | fn set_cw(cw: u16) { |
47 | // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with |
48 | // any `u16` |
49 | unsafe { |
50 | asm!( |
51 | "fldcw word ptr [{}]" , |
52 | in(reg) &cw, |
53 | options(nostack), |
54 | ) |
55 | } |
56 | } |
57 | |
58 | /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`. |
59 | pub(crate) fn set_precision<T>() -> FPUControlWord { |
60 | let mut cw = 0_u16; |
61 | |
62 | // Compute the value for the Precision Control field that is appropriate for `T`. |
63 | let cw_precision = match size_of::<T>() { |
64 | 4 => 0x0000, // 32 bits |
65 | 8 => 0x0200, // 64 bits |
66 | _ => 0x0300, // default, 80 bits |
67 | }; |
68 | |
69 | // Get the original value of the control word to restore it later, when the |
70 | // `FPUControlWord` structure is dropped |
71 | // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with |
72 | // any `u16` |
73 | unsafe { |
74 | asm!( |
75 | "fnstcw word ptr [{}]" , |
76 | in(reg) &mut cw, |
77 | options(nostack), |
78 | ) |
79 | } |
80 | |
81 | // Set the control word to the desired precision. This is achieved by masking away the old |
82 | // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above. |
83 | set_cw((cw & 0xFCFF) | cw_precision); |
84 | |
85 | FPUControlWord(cw) |
86 | } |
87 | |
88 | impl Drop for FPUControlWord { |
89 | fn drop(&mut self) { |
90 | set_cw(self.0) |
91 | } |
92 | } |
93 | } |
94 | |
95 | // In most architectures, floating point operations have an explicit bit size, therefore the |
96 | // precision of the computation is determined on a per-operation basis. |
97 | #[cfg (any(not(target_arch = "x86" ), target_feature = "sse2" ))] |
98 | mod fpu_precision { |
99 | pub(crate) fn set_precision<T>() {} |
100 | } |
101 | |