| 1 | //! `x86_64` Streaming SIMD Extensions (SSE) |
| 2 | |
| 3 | use crate::core_arch::x86::*; |
| 4 | |
| 5 | #[cfg (test)] |
| 6 | use stdarch_test::assert_instr; |
| 7 | |
| 8 | #[allow (improper_ctypes)] |
| 9 | unsafe extern "C" { |
| 10 | #[link_name = "llvm.x86.sse.cvtss2si64" ] |
| 11 | unsafefn cvtss2si64(a: __m128) -> i64; |
| 12 | #[link_name = "llvm.x86.sse.cvttss2si64" ] |
| 13 | unsafefn cvttss2si64(a: __m128) -> i64; |
| 14 | #[link_name = "llvm.x86.sse.cvtsi642ss" ] |
| 15 | unsafefn cvtsi642ss(a: __m128, b: i64) -> __m128; |
| 16 | } |
| 17 | |
| 18 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer. |
| 19 | /// |
| 20 | /// The result is rounded according to the current rounding mode. If the result |
| 21 | /// cannot be represented as a 64 bit integer the result will be |
| 22 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation |
| 23 | /// floating point exception if unmasked (see |
| 24 | /// [`_mm_setcsr`](fn._mm_setcsr.html)). |
| 25 | /// |
| 26 | /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). |
| 27 | /// |
| 28 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64) |
| 29 | #[inline ] |
| 30 | #[target_feature (enable = "sse" )] |
| 31 | #[cfg_attr (test, assert_instr(cvtss2si))] |
| 32 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
| 33 | pub fn _mm_cvtss_si64(a: __m128) -> i64 { |
| 34 | unsafe { cvtss2si64(a) } |
| 35 | } |
| 36 | |
| 37 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer |
| 38 | /// with truncation. |
| 39 | /// |
| 40 | /// The result is rounded always using truncation (round towards zero). If the |
| 41 | /// result cannot be represented as a 64 bit integer the result will be |
| 42 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating |
| 43 | /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). |
| 44 | /// |
| 45 | /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). |
| 46 | /// |
| 47 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64) |
| 48 | #[inline ] |
| 49 | #[target_feature (enable = "sse" )] |
| 50 | #[cfg_attr (test, assert_instr(cvttss2si))] |
| 51 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
| 52 | pub fn _mm_cvttss_si64(a: __m128) -> i64 { |
| 53 | unsafe { cvttss2si64(a) } |
| 54 | } |
| 55 | |
| 56 | /// Converts a 64 bit integer to a 32 bit float. The result vector is the input |
| 57 | /// vector `a` with the lowest 32 bit float replaced by the converted integer. |
| 58 | /// |
| 59 | /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit |
| 60 | /// input). |
| 61 | /// |
| 62 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss) |
| 63 | #[inline ] |
| 64 | #[target_feature (enable = "sse" )] |
| 65 | #[cfg_attr (test, assert_instr(cvtsi2ss))] |
| 66 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
| 67 | pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { |
| 68 | unsafe { cvtsi642ss(a, b) } |
| 69 | } |
| 70 | |
| 71 | #[cfg (test)] |
| 72 | mod tests { |
| 73 | use crate::core_arch::arch::x86_64::*; |
| 74 | use stdarch_test::simd_test; |
| 75 | |
| 76 | #[simd_test(enable = "sse" )] |
| 77 | unsafe fn test_mm_cvtss_si64() { |
| 78 | let inputs = &[ |
| 79 | (42.0f32, 42i64), |
| 80 | (-31.4, -31), |
| 81 | (-33.5, -34), |
| 82 | (-34.5, -34), |
| 83 | (4.0e10, 40_000_000_000), |
| 84 | (4.0e-10, 0), |
| 85 | (f32::NAN, i64::MIN), |
| 86 | (2147483500.1, 2147483520), |
| 87 | (9.223371e18, 9223370937343148032), |
| 88 | ]; |
| 89 | for (i, &(xi, e)) in inputs.iter().enumerate() { |
| 90 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); |
| 91 | let r = _mm_cvtss_si64(x); |
| 92 | assert_eq!( |
| 93 | e, r, |
| 94 | "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}" , |
| 95 | i, x, r, e |
| 96 | ); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | #[simd_test(enable = "sse" )] |
| 101 | unsafe fn test_mm_cvttss_si64() { |
| 102 | let inputs = &[ |
| 103 | (42.0f32, 42i64), |
| 104 | (-31.4, -31), |
| 105 | (-33.5, -33), |
| 106 | (-34.5, -34), |
| 107 | (10.999, 10), |
| 108 | (-5.99, -5), |
| 109 | (4.0e10, 40_000_000_000), |
| 110 | (4.0e-10, 0), |
| 111 | (f32::NAN, i64::MIN), |
| 112 | (2147483500.1, 2147483520), |
| 113 | (9.223371e18, 9223370937343148032), |
| 114 | (9.223372e18, i64::MIN), |
| 115 | ]; |
| 116 | for (i, &(xi, e)) in inputs.iter().enumerate() { |
| 117 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); |
| 118 | let r = _mm_cvttss_si64(x); |
| 119 | assert_eq!( |
| 120 | e, r, |
| 121 | "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}" , |
| 122 | i, x, r, e |
| 123 | ); |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | #[simd_test(enable = "sse" )] |
| 128 | unsafe fn test_mm_cvtsi64_ss() { |
| 129 | let inputs = &[ |
| 130 | (4555i64, 4555.0f32), |
| 131 | (322223333, 322223330.0), |
| 132 | (-432, -432.0), |
| 133 | (-322223333, -322223330.0), |
| 134 | (9223372036854775807, 9.223372e18), |
| 135 | (-9223372036854775808, -9.223372e18), |
| 136 | ]; |
| 137 | |
| 138 | for &(x, f) in inputs { |
| 139 | let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); |
| 140 | let r = _mm_cvtsi64_ss(a, x); |
| 141 | let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); |
| 142 | assert_eq_m128(e, r); |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | |