| 1 | //! Platform-specific, assembly instructions to avoid |
| 2 | //! intermediate rounding on architectures with FPUs. |
| 3 | |
| 4 | pub(super) use fpu_precision::set_precision; |
| 5 | |
| 6 | // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available. |
| 7 | // The x87 FPU operates with 80 bits of precision by default, which means that operations will |
| 8 | // round to 80 bits causing double rounding to happen when values are eventually represented as |
| 9 | // 32/64 bit float values. To overcome this, the FPU control word can be set so that the |
| 10 | // computations are performed in the desired precision. |
| 11 | // |
| 12 | // Note that normally, it is Undefined Behavior to alter the FPU control word while Rust code runs. |
| 13 | // The compiler assumes that the control word is always in its default state. However, in this |
| 14 | // particular case the semantics with the altered control word are actually *more faithful* |
| 15 | // to Rust semantics than the default -- arguably it is all the code that runs *outside* of the scope |
| 16 | // of a `set_precision` guard that is wrong. |
| 17 | // In other words, we are only using this to work around <https://github.com/rust-lang/rust/issues/114479>. |
| 18 | // Sometimes killing UB with UB actually works... |
| 19 | // (If this is used to set 32bit precision, there is still a risk that the compiler moves some 64bit |
| 20 | // operation into the scope of the `set_precision` guard. So it's not like this is totally sound. |
| 21 | // But it's not really any less sound than the default state of 80bit precision...) |
| 22 | #[cfg (all(target_arch = "x86" , not(target_feature = "sse2" )))] |
| 23 | mod fpu_precision { |
| 24 | use core::arch::asm; |
| 25 | |
| 26 | /// A structure used to preserve the original value of the FPU control word, so that it can be |
| 27 | /// restored when the structure is dropped. |
| 28 | /// |
| 29 | /// The x87 FPU is a 16-bits register whose fields are as follows: |
| 30 | /// |
| 31 | /// | 12-15 | 10-11 | 8-9 | 6-7 | 5 | 4 | 3 | 2 | 1 | 0 | |
| 32 | /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:| |
| 33 | /// | | RC | PC | | PM | UM | OM | ZM | DM | IM | |
| 34 | /// |
| 35 | /// The documentation for all of the fields is available in the IA-32 Architectures Software |
| 36 | /// Developer's Manual (Volume 1). |
| 37 | /// |
| 38 | /// The only field which is relevant for the following code is PC, Precision Control. This |
| 39 | /// field determines the precision of the operations performed by the FPU. It can be set to: |
| 40 | /// - 0b00, single precision i.e., 32-bits |
| 41 | /// - 0b10, double precision i.e., 64-bits |
| 42 | /// - 0b11, double extended precision i.e., 80-bits (default state) |
| 43 | /// The 0b01 value is reserved and should not be used. |
| 44 | pub(crate) struct FPUControlWord(u16); |
| 45 | |
| 46 | fn set_cw(cw: u16) { |
| 47 | // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with |
| 48 | // any `u16` |
| 49 | unsafe { |
| 50 | asm!( |
| 51 | "fldcw word ptr [{}]" , |
| 52 | in(reg) &cw, |
| 53 | options(nostack), |
| 54 | ) |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`. |
| 59 | pub(crate) fn set_precision<T>() -> FPUControlWord { |
| 60 | let mut cw = 0_u16; |
| 61 | |
| 62 | // Compute the value for the Precision Control field that is appropriate for `T`. |
| 63 | let cw_precision = match size_of::<T>() { |
| 64 | 4 => 0x0000, // 32 bits |
| 65 | 8 => 0x0200, // 64 bits |
| 66 | _ => 0x0300, // default, 80 bits |
| 67 | }; |
| 68 | |
| 69 | // Get the original value of the control word to restore it later, when the |
| 70 | // `FPUControlWord` structure is dropped |
| 71 | // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with |
| 72 | // any `u16` |
| 73 | unsafe { |
| 74 | asm!( |
| 75 | "fnstcw word ptr [{}]" , |
| 76 | in(reg) &mut cw, |
| 77 | options(nostack), |
| 78 | ) |
| 79 | } |
| 80 | |
| 81 | // Set the control word to the desired precision. This is achieved by masking away the old |
| 82 | // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above. |
| 83 | set_cw((cw & 0xFCFF) | cw_precision); |
| 84 | |
| 85 | FPUControlWord(cw) |
| 86 | } |
| 87 | |
| 88 | impl Drop for FPUControlWord { |
| 89 | fn drop(&mut self) { |
| 90 | set_cw(self.0) |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | // In most architectures, floating point operations have an explicit bit size, therefore the |
| 96 | // precision of the computation is determined on a per-operation basis. |
| 97 | #[cfg (any(not(target_arch = "x86" ), target_feature = "sse2" ))] |
| 98 | mod fpu_precision { |
| 99 | pub(crate) fn set_precision<T>() {} |
| 100 | } |
| 101 | |