| 1 | //! `x86_64` Streaming SIMD Extensions (SSE) | 
| 2 |  | 
|---|
| 3 | use crate::core_arch::x86::*; | 
|---|
| 4 |  | 
|---|
| 5 | #[ cfg(test)] | 
|---|
| 6 | use stdarch_test::assert_instr; | 
|---|
| 7 |  | 
|---|
| 8 | #[ allow(improper_ctypes)] | 
|---|
| 9 | unsafe extern "C"{ | 
|---|
| 10 | #[ link_name= "llvm.x86.sse.cvtss2si64"] | 
|---|
| 11 | unsafefn cvtss2si64(a: __m128) -> i64; | 
|---|
| 12 | #[ link_name= "llvm.x86.sse.cvttss2si64"] | 
|---|
| 13 | unsafefn cvttss2si64(a: __m128) -> i64; | 
|---|
| 14 | #[ link_name= "llvm.x86.sse.cvtsi642ss"] | 
|---|
| 15 | unsafefn cvtsi642ss(a: __m128, b: i64) -> __m128; | 
|---|
| 16 | } | 
|---|
| 17 |  | 
|---|
| 18 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer. | 
|---|
| 19 | /// | 
|---|
| 20 | /// The result is rounded according to the current rounding mode. If the result | 
|---|
| 21 | /// cannot be represented as a 64 bit integer the result will be | 
|---|
| 22 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation | 
|---|
| 23 | /// floating point exception if unmasked (see | 
|---|
| 24 | /// [`_mm_setcsr`](fn._mm_setcsr.html)). | 
|---|
| 25 | /// | 
|---|
| 26 | /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). | 
|---|
| 27 | /// | 
|---|
| 28 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64) | 
|---|
| 29 | #[ inline] | 
|---|
| 30 | #[ target_feature(enable = "sse")] | 
|---|
| 31 | #[ cfg_attr(test, assert_instr(cvtss2si))] | 
|---|
| 32 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 33 | pub fn _mm_cvtss_si64(a: __m128) -> i64 { | 
|---|
| 34 | unsafe { cvtss2si64(a) } | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer | 
|---|
| 38 | /// with truncation. | 
|---|
| 39 | /// | 
|---|
| 40 | /// The result is rounded always using truncation (round towards zero). If the | 
|---|
| 41 | /// result cannot be represented as a 64 bit integer the result will be | 
|---|
| 42 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating | 
|---|
| 43 | /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). | 
|---|
| 44 | /// | 
|---|
| 45 | /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). | 
|---|
| 46 | /// | 
|---|
| 47 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64) | 
|---|
| 48 | #[ inline] | 
|---|
| 49 | #[ target_feature(enable = "sse")] | 
|---|
| 50 | #[ cfg_attr(test, assert_instr(cvttss2si))] | 
|---|
| 51 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 52 | pub fn _mm_cvttss_si64(a: __m128) -> i64 { | 
|---|
| 53 | unsafe { cvttss2si64(a) } | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | /// Converts a 64 bit integer to a 32 bit float. The result vector is the input | 
|---|
| 57 | /// vector `a` with the lowest 32 bit float replaced by the converted integer. | 
|---|
| 58 | /// | 
|---|
| 59 | /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit | 
|---|
| 60 | /// input). | 
|---|
| 61 | /// | 
|---|
| 62 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss) | 
|---|
| 63 | #[ inline] | 
|---|
| 64 | #[ target_feature(enable = "sse")] | 
|---|
| 65 | #[ cfg_attr(test, assert_instr(cvtsi2ss))] | 
|---|
| 66 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 67 | pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { | 
|---|
| 68 | unsafe { cvtsi642ss(a, b) } | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | #[ cfg(test)] | 
|---|
| 72 | mod tests { | 
|---|
| 73 | use crate::core_arch::arch::x86_64::*; | 
|---|
| 74 | use stdarch_test::simd_test; | 
|---|
| 75 |  | 
|---|
| 76 | #[simd_test(enable = "sse")] | 
|---|
| 77 | unsafe fn test_mm_cvtss_si64() { | 
|---|
| 78 | let inputs = &[ | 
|---|
| 79 | (42.0f32, 42i64), | 
|---|
| 80 | (-31.4, -31), | 
|---|
| 81 | (-33.5, -34), | 
|---|
| 82 | (-34.5, -34), | 
|---|
| 83 | (4.0e10, 40_000_000_000), | 
|---|
| 84 | (4.0e-10, 0), | 
|---|
| 85 | (f32::NAN, i64::MIN), | 
|---|
| 86 | (2147483500.1, 2147483520), | 
|---|
| 87 | (9.223371e18, 9223370937343148032), | 
|---|
| 88 | ]; | 
|---|
| 89 | for (i, &(xi, e)) in inputs.iter().enumerate() { | 
|---|
| 90 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); | 
|---|
| 91 | let r = _mm_cvtss_si64(x); | 
|---|
| 92 | assert_eq!( | 
|---|
| 93 | e, r, | 
|---|
| 94 | "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", | 
|---|
| 95 | i, x, r, e | 
|---|
| 96 | ); | 
|---|
| 97 | } | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | #[simd_test(enable = "sse")] | 
|---|
| 101 | unsafe fn test_mm_cvttss_si64() { | 
|---|
| 102 | let inputs = &[ | 
|---|
| 103 | (42.0f32, 42i64), | 
|---|
| 104 | (-31.4, -31), | 
|---|
| 105 | (-33.5, -33), | 
|---|
| 106 | (-34.5, -34), | 
|---|
| 107 | (10.999, 10), | 
|---|
| 108 | (-5.99, -5), | 
|---|
| 109 | (4.0e10, 40_000_000_000), | 
|---|
| 110 | (4.0e-10, 0), | 
|---|
| 111 | (f32::NAN, i64::MIN), | 
|---|
| 112 | (2147483500.1, 2147483520), | 
|---|
| 113 | (9.223371e18, 9223370937343148032), | 
|---|
| 114 | (9.223372e18, i64::MIN), | 
|---|
| 115 | ]; | 
|---|
| 116 | for (i, &(xi, e)) in inputs.iter().enumerate() { | 
|---|
| 117 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); | 
|---|
| 118 | let r = _mm_cvttss_si64(x); | 
|---|
| 119 | assert_eq!( | 
|---|
| 120 | e, r, | 
|---|
| 121 | "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", | 
|---|
| 122 | i, x, r, e | 
|---|
| 123 | ); | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | #[simd_test(enable = "sse")] | 
|---|
| 128 | unsafe fn test_mm_cvtsi64_ss() { | 
|---|
| 129 | let inputs = &[ | 
|---|
| 130 | (4555i64, 4555.0f32), | 
|---|
| 131 | (322223333, 322223330.0), | 
|---|
| 132 | (-432, -432.0), | 
|---|
| 133 | (-322223333, -322223330.0), | 
|---|
| 134 | (9223372036854775807, 9.223372e18), | 
|---|
| 135 | (-9223372036854775808, -9.223372e18), | 
|---|
| 136 | ]; | 
|---|
| 137 |  | 
|---|
| 138 | for &(x, f) in inputs { | 
|---|
| 139 | let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); | 
|---|
| 140 | let r = _mm_cvtsi64_ss(a, x); | 
|---|
| 141 | let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); | 
|---|
| 142 | assert_eq_m128(e, r); | 
|---|
| 143 | } | 
|---|
| 144 | } | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|