1 | //! Platform-specific, assembly instructions to avoid |
2 | //! intermediate rounding on architectures with FPUs. |
3 | |
4 | pub use fpu_precision::set_precision; |
5 | |
6 | // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available. |
7 | // The x87 FPU operates with 80 bits of precision by default, which means that operations will |
8 | // round to 80 bits causing double rounding to happen when values are eventually represented as |
9 | // 32/64 bit float values. To overcome this, the FPU control word can be set so that the |
10 | // computations are performed in the desired precision. |
11 | // |
12 | // Note that normally, it is Undefined Behavior to alter the FPU control word while Rust code runs. |
13 | // The compiler assumes that the control word is always in its default state. However, in this |
14 | // particular case the semantics with the altered control word are actually *more faithful* |
15 | // to Rust semantics than the default -- arguably it is all the code that runs *outside* of the scope |
16 | // of a `set_precision` guard that is wrong. |
17 | // In other words, we are only using this to work around <https://github.com/rust-lang/rust/issues/114479>. |
18 | // Sometimes killing UB with UB actually works... |
19 | // (If this is used to set 32bit precision, there is still a risk that the compiler moves some 64bit |
20 | // operation into the scope of the `set_precision` guard. So it's not like this is totally sound. |
21 | // But it's not really any less sound than the default state of 80bit precision...) |
22 | #[cfg (all(target_arch = "x86" , not(target_feature = "sse2" )))] |
23 | mod fpu_precision { |
24 | use core::arch::asm; |
25 | use core::mem::size_of; |
26 | |
27 | /// A structure used to preserve the original value of the FPU control word, so that it can be |
28 | /// restored when the structure is dropped. |
29 | /// |
30 | /// The x87 FPU is a 16-bits register whose fields are as follows: |
31 | /// |
32 | /// | 12-15 | 10-11 | 8-9 | 6-7 | 5 | 4 | 3 | 2 | 1 | 0 | |
33 | /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:| |
34 | /// | | RC | PC | | PM | UM | OM | ZM | DM | IM | |
35 | /// |
36 | /// The documentation for all of the fields is available in the IA-32 Architectures Software |
37 | /// Developer's Manual (Volume 1). |
38 | /// |
39 | /// The only field which is relevant for the following code is PC, Precision Control. This |
40 | /// field determines the precision of the operations performed by the FPU. It can be set to: |
41 | /// - 0b00, single precision i.e., 32-bits |
42 | /// - 0b10, double precision i.e., 64-bits |
43 | /// - 0b11, double extended precision i.e., 80-bits (default state) |
44 | /// The 0b01 value is reserved and should not be used. |
45 | pub struct FPUControlWord(u16); |
46 | |
47 | fn set_cw(cw: u16) { |
48 | // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with |
49 | // any `u16` |
50 | unsafe { |
51 | asm!( |
52 | "fldcw word ptr [{}]" , |
53 | in(reg) &cw, |
54 | options(nostack), |
55 | ) |
56 | } |
57 | } |
58 | |
59 | /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`. |
60 | pub fn set_precision<T>() -> FPUControlWord { |
61 | let mut cw = 0_u16; |
62 | |
63 | // Compute the value for the Precision Control field that is appropriate for `T`. |
64 | let cw_precision = match size_of::<T>() { |
65 | 4 => 0x0000, // 32 bits |
66 | 8 => 0x0200, // 64 bits |
67 | _ => 0x0300, // default, 80 bits |
68 | }; |
69 | |
70 | // Get the original value of the control word to restore it later, when the |
71 | // `FPUControlWord` structure is dropped |
72 | // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with |
73 | // any `u16` |
74 | unsafe { |
75 | asm!( |
76 | "fnstcw word ptr [{}]" , |
77 | in(reg) &mut cw, |
78 | options(nostack), |
79 | ) |
80 | } |
81 | |
82 | // Set the control word to the desired precision. This is achieved by masking away the old |
83 | // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above. |
84 | set_cw((cw & 0xFCFF) | cw_precision); |
85 | |
86 | FPUControlWord(cw) |
87 | } |
88 | |
89 | impl Drop for FPUControlWord { |
90 | fn drop(&mut self) { |
91 | set_cw(self.0) |
92 | } |
93 | } |
94 | } |
95 | |
96 | // In most architectures, floating point operations have an explicit bit size, therefore the |
97 | // precision of the computation is determined on a per-operation basis. |
98 | #[cfg (any(not(target_arch = "x86" ), target_feature = "sse2" ))] |
99 | mod fpu_precision { |
100 | pub fn set_precision<T>() {} |
101 | } |
102 | |