1 | //! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) |
2 | |
3 | use crate::{ |
4 | core_arch::{simd_llvm::*, x86::*}, |
5 | intrinsics, |
6 | }; |
7 | |
8 | #[cfg (test)] |
9 | use stdarch_test::assert_instr; |
10 | |
11 | #[allow (improper_ctypes)] |
12 | extern "C" { |
13 | #[link_name = "llvm.x86.sse2.cvtsd2si64" ] |
14 | fn cvtsd2si64(a: __m128d) -> i64; |
15 | #[link_name = "llvm.x86.sse2.cvttsd2si64" ] |
16 | fn cvttsd2si64(a: __m128d) -> i64; |
17 | } |
18 | |
19 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
20 | /// a 64-bit integer. |
21 | /// |
22 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64) |
23 | #[inline ] |
24 | #[target_feature (enable = "sse2" )] |
25 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
26 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
27 | pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { |
28 | cvtsd2si64(a) |
29 | } |
30 | |
31 | /// Alias for `_mm_cvtsd_si64` |
32 | /// |
33 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x) |
34 | #[inline ] |
35 | #[target_feature (enable = "sse2" )] |
36 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
37 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
38 | pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { |
39 | _mm_cvtsd_si64(a) |
40 | } |
41 | |
42 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
43 | /// to a 64-bit integer with truncation. |
44 | /// |
45 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64) |
46 | #[inline ] |
47 | #[target_feature (enable = "sse2" )] |
48 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
49 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
50 | pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { |
51 | cvttsd2si64(a) |
52 | } |
53 | |
54 | /// Alias for `_mm_cvttsd_si64` |
55 | /// |
56 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x) |
57 | #[inline ] |
58 | #[target_feature (enable = "sse2" )] |
59 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
60 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
61 | pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { |
62 | _mm_cvttsd_si64(a) |
63 | } |
64 | |
65 | /// Stores a 64-bit integer value in the specified memory location. |
66 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
67 | /// used again soon). |
68 | /// |
69 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64) |
70 | #[inline ] |
71 | #[target_feature (enable = "sse2" )] |
72 | #[cfg_attr (test, assert_instr(movnti))] |
73 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
74 | pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { |
75 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
76 | } |
77 | |
78 | /// Returns a vector whose lowest element is `a` and all higher elements are |
79 | /// `0`. |
80 | /// |
81 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128) |
82 | #[inline ] |
83 | #[target_feature (enable = "sse2" )] |
84 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
85 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
86 | pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { |
87 | _mm_set_epi64x(e1:0, e0:a) |
88 | } |
89 | |
90 | /// Returns a vector whose lowest element is `a` and all higher elements are |
91 | /// `0`. |
92 | /// |
93 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128) |
94 | #[inline ] |
95 | #[target_feature (enable = "sse2" )] |
96 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
97 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
98 | pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { |
99 | _mm_cvtsi64_si128(a) |
100 | } |
101 | |
102 | /// Returns the lowest element of `a`. |
103 | /// |
104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64) |
105 | #[inline ] |
106 | #[target_feature (enable = "sse2" )] |
107 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
108 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
109 | pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { |
110 | simd_extract(x:a.as_i64x2(), idx:0) |
111 | } |
112 | |
113 | /// Returns the lowest element of `a`. |
114 | /// |
115 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x) |
116 | #[inline ] |
117 | #[target_feature (enable = "sse2" )] |
118 | #[cfg_attr (all(test, not(windows)), assert_instr(movq))] |
119 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
120 | pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { |
121 | _mm_cvtsi128_si64(a) |
122 | } |
123 | |
124 | /// Returns `a` with its lower element replaced by `b` after converting it to |
125 | /// an `f64`. |
126 | /// |
127 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd) |
128 | #[inline ] |
129 | #[target_feature (enable = "sse2" )] |
130 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
131 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
132 | pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { |
133 | simd_insert(x:a, idx:0, val:b as f64) |
134 | } |
135 | |
136 | /// Returns `a` with its lower element replaced by `b` after converting it to |
137 | /// an `f64`. |
138 | /// |
139 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd) |
140 | #[inline ] |
141 | #[target_feature (enable = "sse2" )] |
142 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
143 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
144 | pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { |
145 | _mm_cvtsi64_sd(a, b) |
146 | } |
147 | |
148 | #[cfg (test)] |
149 | mod tests { |
150 | use crate::core_arch::arch::x86_64::*; |
151 | use std::boxed; |
152 | use stdarch_test::simd_test; |
153 | |
154 | #[simd_test(enable = "sse2" )] |
155 | unsafe fn test_mm_cvtsd_si64() { |
156 | let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); |
157 | assert_eq!(r, -2_i64); |
158 | |
159 | let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); |
160 | assert_eq!(r, i64::MIN); |
161 | } |
162 | |
163 | #[simd_test(enable = "sse2" )] |
164 | unsafe fn test_mm_cvtsd_si64x() { |
165 | let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN)); |
166 | assert_eq!(r, i64::MIN); |
167 | } |
168 | |
169 | #[simd_test(enable = "sse2" )] |
170 | unsafe fn test_mm_cvttsd_si64() { |
171 | let a = _mm_setr_pd(-1.1, 2.2); |
172 | let r = _mm_cvttsd_si64(a); |
173 | assert_eq!(r, -1_i64); |
174 | } |
175 | |
176 | #[simd_test(enable = "sse2" )] |
177 | unsafe fn test_mm_cvttsd_si64x() { |
178 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
179 | let r = _mm_cvttsd_si64x(a); |
180 | assert_eq!(r, i64::MIN); |
181 | } |
182 | |
183 | #[simd_test(enable = "sse2" )] |
184 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
185 | // (non-temporal store) |
186 | #[cfg_attr (miri, ignore)] |
187 | unsafe fn test_mm_stream_si64() { |
188 | let a: i64 = 7; |
189 | let mut mem = boxed::Box::<i64>::new(-1); |
190 | _mm_stream_si64(&mut *mem as *mut i64, a); |
191 | assert_eq!(a, *mem); |
192 | } |
193 | |
194 | #[simd_test(enable = "sse2" )] |
195 | unsafe fn test_mm_cvtsi64_si128() { |
196 | let r = _mm_cvtsi64_si128(5); |
197 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
198 | } |
199 | |
200 | #[simd_test(enable = "sse2" )] |
201 | unsafe fn test_mm_cvtsi128_si64() { |
202 | let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0)); |
203 | assert_eq!(r, 5); |
204 | } |
205 | |
206 | #[simd_test(enable = "sse2" )] |
207 | unsafe fn test_mm_cvtsi64_sd() { |
208 | let a = _mm_set1_pd(3.5); |
209 | let r = _mm_cvtsi64_sd(a, 5); |
210 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
211 | } |
212 | } |
213 | |