1 | //! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) |
2 | |
3 | use crate::{ |
4 | core_arch::x86::*, |
5 | intrinsics::{self, simd::*}, |
6 | }; |
7 | |
8 | #[cfg (test)] |
9 | use stdarch_test::assert_instr; |
10 | |
11 | #[allow (improper_ctypes)] |
12 | extern "C" { |
13 | #[link_name = "llvm.x86.sse2.cvtsd2si64" ] |
14 | fn cvtsd2si64(a: __m128d) -> i64; |
15 | #[link_name = "llvm.x86.sse2.cvttsd2si64" ] |
16 | fn cvttsd2si64(a: __m128d) -> i64; |
17 | } |
18 | |
19 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
20 | /// a 64-bit integer. |
21 | /// |
22 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64) |
23 | #[inline ] |
24 | #[target_feature (enable = "sse2" )] |
25 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
26 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
27 | pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { |
28 | cvtsd2si64(a) |
29 | } |
30 | |
31 | /// Alias for `_mm_cvtsd_si64` |
32 | /// |
33 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x) |
34 | #[inline ] |
35 | #[target_feature (enable = "sse2" )] |
36 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
37 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
38 | pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { |
39 | _mm_cvtsd_si64(a) |
40 | } |
41 | |
42 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
43 | /// to a 64-bit integer with truncation. |
44 | /// |
45 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64) |
46 | #[inline ] |
47 | #[target_feature (enable = "sse2" )] |
48 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
49 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
50 | pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { |
51 | cvttsd2si64(a) |
52 | } |
53 | |
54 | /// Alias for `_mm_cvttsd_si64` |
55 | /// |
56 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x) |
57 | #[inline ] |
58 | #[target_feature (enable = "sse2" )] |
59 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
60 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
61 | pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { |
62 | _mm_cvttsd_si64(a) |
63 | } |
64 | |
65 | /// Stores a 64-bit integer value in the specified memory location. |
66 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
67 | /// used again soon). |
68 | /// |
69 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64) |
70 | /// |
71 | /// # Safety of non-temporal stores |
72 | /// |
73 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
74 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
75 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
76 | /// return. |
77 | /// |
78 | /// See [`_mm_sfence`] for details. |
79 | #[inline ] |
80 | #[target_feature (enable = "sse2" )] |
81 | #[cfg_attr (test, assert_instr(movnti))] |
82 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
83 | pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { |
84 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
85 | } |
86 | |
87 | /// Returns a vector whose lowest element is `a` and all higher elements are |
88 | /// `0`. |
89 | /// |
90 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128) |
91 | #[inline ] |
92 | #[target_feature (enable = "sse2" )] |
93 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
94 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
95 | pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { |
96 | _mm_set_epi64x(e1:0, e0:a) |
97 | } |
98 | |
99 | /// Returns a vector whose lowest element is `a` and all higher elements are |
100 | /// `0`. |
101 | /// |
102 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128) |
103 | #[inline ] |
104 | #[target_feature (enable = "sse2" )] |
105 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
106 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
107 | pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { |
108 | _mm_cvtsi64_si128(a) |
109 | } |
110 | |
111 | /// Returns the lowest element of `a`. |
112 | /// |
113 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64) |
114 | #[inline ] |
115 | #[target_feature (enable = "sse2" )] |
116 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
117 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
118 | pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { |
119 | simd_extract!(a.as_i64x2(), 0) |
120 | } |
121 | |
122 | /// Returns the lowest element of `a`. |
123 | /// |
124 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x) |
125 | #[inline ] |
126 | #[target_feature (enable = "sse2" )] |
127 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
128 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
129 | pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { |
130 | _mm_cvtsi128_si64(a) |
131 | } |
132 | |
133 | /// Returns `a` with its lower element replaced by `b` after converting it to |
134 | /// an `f64`. |
135 | /// |
136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd) |
137 | #[inline ] |
138 | #[target_feature (enable = "sse2" )] |
139 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
140 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
141 | pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { |
142 | simd_insert!(a, 0, b as f64) |
143 | } |
144 | |
145 | /// Returns `a` with its lower element replaced by `b` after converting it to |
146 | /// an `f64`. |
147 | /// |
148 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd) |
149 | #[inline ] |
150 | #[target_feature (enable = "sse2" )] |
151 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
152 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
153 | pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { |
154 | _mm_cvtsi64_sd(a, b) |
155 | } |
156 | |
157 | #[cfg (test)] |
158 | mod tests { |
159 | use crate::core_arch::arch::x86_64::*; |
160 | use std::boxed; |
161 | use std::ptr; |
162 | use stdarch_test::simd_test; |
163 | |
164 | #[simd_test(enable = "sse2" )] |
165 | unsafe fn test_mm_cvtsd_si64() { |
166 | let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); |
167 | assert_eq!(r, -2_i64); |
168 | |
169 | let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); |
170 | assert_eq!(r, i64::MIN); |
171 | } |
172 | |
173 | #[simd_test(enable = "sse2" )] |
174 | unsafe fn test_mm_cvtsd_si64x() { |
175 | let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN)); |
176 | assert_eq!(r, i64::MIN); |
177 | } |
178 | |
179 | #[simd_test(enable = "sse2" )] |
180 | unsafe fn test_mm_cvttsd_si64() { |
181 | let a = _mm_setr_pd(-1.1, 2.2); |
182 | let r = _mm_cvttsd_si64(a); |
183 | assert_eq!(r, -1_i64); |
184 | } |
185 | |
186 | #[simd_test(enable = "sse2" )] |
187 | unsafe fn test_mm_cvttsd_si64x() { |
188 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
189 | let r = _mm_cvttsd_si64x(a); |
190 | assert_eq!(r, i64::MIN); |
191 | } |
192 | |
193 | #[simd_test(enable = "sse2" )] |
194 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
195 | // (non-temporal store) |
196 | #[cfg_attr (miri, ignore)] |
197 | unsafe fn test_mm_stream_si64() { |
198 | let a: i64 = 7; |
199 | let mut mem = boxed::Box::<i64>::new(-1); |
200 | _mm_stream_si64(ptr::addr_of_mut!(*mem), a); |
201 | assert_eq!(a, *mem); |
202 | } |
203 | |
204 | #[simd_test(enable = "sse2" )] |
205 | unsafe fn test_mm_cvtsi64_si128() { |
206 | let r = _mm_cvtsi64_si128(5); |
207 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
208 | } |
209 | |
210 | #[simd_test(enable = "sse2" )] |
211 | unsafe fn test_mm_cvtsi128_si64() { |
212 | let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0)); |
213 | assert_eq!(r, 5); |
214 | } |
215 | |
216 | #[simd_test(enable = "sse2" )] |
217 | unsafe fn test_mm_cvtsi64_sd() { |
218 | let a = _mm_set1_pd(3.5); |
219 | let r = _mm_cvtsi64_sd(a, 5); |
220 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
221 | } |
222 | } |
223 | |