1//! Streaming SIMD Extensions 3 (SSE3)
2
3use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Alternatively add and subtract packed single-precision (32-bit)
10/// floating-point elements in `a` to/from packed elements in `b`.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps)
13#[inline]
14#[target_feature(enable = "sse3")]
15#[cfg_attr(test, assert_instr(addsubps))]
16#[stable(feature = "simd_x86", since = "1.27.0")]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
19 unsafe {
20 let a: Simd = a.as_f32x4();
21 let b: Simd = b.as_f32x4();
22 let add: Simd = simd_add(x:a, y:b);
23 let sub: Simd = simd_sub(lhs:a, rhs:b);
24 simd_shuffle!(add, sub, [4, 1, 6, 3])
25 }
26}
27
28/// Alternatively add and subtract packed double-precision (64-bit)
29/// floating-point elements in `a` to/from packed elements in `b`.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd)
32#[inline]
33#[target_feature(enable = "sse3")]
34#[cfg_attr(test, assert_instr(addsubpd))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37pub const fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
38 unsafe {
39 let a: Simd = a.as_f64x2();
40 let b: Simd = b.as_f64x2();
41 let add: Simd = simd_add(x:a, y:b);
42 let sub: Simd = simd_sub(lhs:a, rhs:b);
43 simd_shuffle!(add, sub, [2, 1])
44 }
45}
46
47/// Horizontally adds adjacent pairs of double-precision (64-bit)
48/// floating-point elements in `a` and `b`, and pack the results.
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd)
51#[inline]
52#[target_feature(enable = "sse3")]
53#[cfg_attr(test, assert_instr(haddpd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56pub const fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
57 unsafe {
58 let even: __m128d = simd_shuffle!(a, b, [0, 2]);
59 let odd: __m128d = simd_shuffle!(a, b, [1, 3]);
60 simd_add(x:even, y:odd)
61 }
62}
63
64/// Horizontally adds adjacent pairs of single-precision (32-bit)
65/// floating-point elements in `a` and `b`, and pack the results.
66///
67/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps)
68#[inline]
69#[target_feature(enable = "sse3")]
70#[cfg_attr(test, assert_instr(haddps))]
71#[stable(feature = "simd_x86", since = "1.27.0")]
72#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
73pub const fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
74 unsafe {
75 let even: __m128 = simd_shuffle!(a, b, [0, 2, 4, 6]);
76 let odd: __m128 = simd_shuffle!(a, b, [1, 3, 5, 7]);
77 simd_add(x:even, y:odd)
78 }
79}
80
81/// Horizontally subtract adjacent pairs of double-precision (64-bit)
82/// floating-point elements in `a` and `b`, and pack the results.
83///
84/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd)
85#[inline]
86#[target_feature(enable = "sse3")]
87#[cfg_attr(test, assert_instr(hsubpd))]
88#[stable(feature = "simd_x86", since = "1.27.0")]
89#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
90pub const fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
91 unsafe {
92 let even: __m128d = simd_shuffle!(a, b, [0, 2]);
93 let odd: __m128d = simd_shuffle!(a, b, [1, 3]);
94 simd_sub(lhs:even, rhs:odd)
95 }
96}
97
98/// Horizontally adds adjacent pairs of single-precision (32-bit)
99/// floating-point elements in `a` and `b`, and pack the results.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps)
102#[inline]
103#[target_feature(enable = "sse3")]
104#[cfg_attr(test, assert_instr(hsubps))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
108 unsafe {
109 let even: __m128 = simd_shuffle!(a, b, [0, 2, 4, 6]);
110 let odd: __m128 = simd_shuffle!(a, b, [1, 3, 5, 7]);
111 simd_sub(lhs:even, rhs:odd)
112 }
113}
114
115/// Loads 128-bits of integer data from unaligned memory.
116/// This intrinsic may perform better than `_mm_loadu_si128`
117/// when the data crosses a cache line boundary.
118///
119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128)
120#[inline]
121#[target_feature(enable = "sse3")]
122#[cfg_attr(test, assert_instr(lddqu))]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
125 transmute(src:lddqu(mem_addr as *const _))
126}
127
128/// Duplicate the low double-precision (64-bit) floating-point element
129/// from `a`.
130///
131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd)
132#[inline]
133#[target_feature(enable = "sse3")]
134#[cfg_attr(test, assert_instr(movddup))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
137pub const fn _mm_movedup_pd(a: __m128d) -> __m128d {
138 unsafe { simd_shuffle!(a, a, [0, 0]) }
139}
140
141/// Loads a double-precision (64-bit) floating-point element from memory
142/// into both elements of return vector.
143///
144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd)
145#[inline]
146#[target_feature(enable = "sse3")]
147#[cfg_attr(test, assert_instr(movddup))]
148#[stable(feature = "simd_x86", since = "1.27.0")]
149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
150pub const unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
151 _mm_load1_pd(mem_addr)
152}
153
154/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
155/// from `a`.
156///
157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps)
158#[inline]
159#[target_feature(enable = "sse3")]
160#[cfg_attr(test, assert_instr(movshdup))]
161#[stable(feature = "simd_x86", since = "1.27.0")]
162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
163pub const fn _mm_movehdup_ps(a: __m128) -> __m128 {
164 unsafe { simd_shuffle!(a, a, [1, 1, 3, 3]) }
165}
166
167/// Duplicate even-indexed single-precision (32-bit) floating-point elements
168/// from `a`.
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps)
171#[inline]
172#[target_feature(enable = "sse3")]
173#[cfg_attr(test, assert_instr(movsldup))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
176pub const fn _mm_moveldup_ps(a: __m128) -> __m128 {
177 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
178}
179
180#[allow(improper_ctypes)]
181unsafe extern "C" {
182 #[link_name = "llvm.x86.sse3.ldu.dq"]
183 unsafefn lddqu(mem_addr: *const i8) -> i8x16;
184}
185
186#[cfg(test)]
187mod tests {
188 use crate::core_arch::assert_eq_const as assert_eq;
189 use stdarch_test::simd_test;
190
191 use crate::core_arch::x86::*;
192
193 #[simd_test(enable = "sse3")]
194 const fn test_mm_addsub_ps() {
195 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
196 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
197 let r = _mm_addsub_ps(a, b);
198 assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
199 }
200
201 #[simd_test(enable = "sse3")]
202 const fn test_mm_addsub_pd() {
203 let a = _mm_setr_pd(-1.0, 5.0);
204 let b = _mm_setr_pd(-100.0, 20.0);
205 let r = _mm_addsub_pd(a, b);
206 assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
207 }
208
209 #[simd_test(enable = "sse3")]
210 const fn test_mm_hadd_pd() {
211 let a = _mm_setr_pd(-1.0, 5.0);
212 let b = _mm_setr_pd(-100.0, 20.0);
213 let r = _mm_hadd_pd(a, b);
214 assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
215 }
216
217 #[simd_test(enable = "sse3")]
218 const fn test_mm_hadd_ps() {
219 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
220 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
221 let r = _mm_hadd_ps(a, b);
222 assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
223 }
224
225 #[simd_test(enable = "sse3")]
226 const fn test_mm_hsub_pd() {
227 let a = _mm_setr_pd(-1.0, 5.0);
228 let b = _mm_setr_pd(-100.0, 20.0);
229 let r = _mm_hsub_pd(a, b);
230 assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
231 }
232
233 #[simd_test(enable = "sse3")]
234 const fn test_mm_hsub_ps() {
235 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
236 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
237 let r = _mm_hsub_ps(a, b);
238 assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
239 }
240
241 #[simd_test(enable = "sse3")]
242 fn test_mm_lddqu_si128() {
243 #[rustfmt::skip]
244 let a = _mm_setr_epi8(
245 1, 2, 3, 4,
246 5, 6, 7, 8,
247 9, 10, 11, 12,
248 13, 14, 15, 16,
249 );
250 let r = unsafe { _mm_lddqu_si128(&a) };
251 assert_eq_m128i(a, r);
252 }
253
254 #[simd_test(enable = "sse3")]
255 const fn test_mm_movedup_pd() {
256 let a = _mm_setr_pd(-1.0, 5.0);
257 let r = _mm_movedup_pd(a);
258 assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0));
259 }
260
261 #[simd_test(enable = "sse3")]
262 const fn test_mm_movehdup_ps() {
263 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
264 let r = _mm_movehdup_ps(a);
265 assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0));
266 }
267
268 #[simd_test(enable = "sse3")]
269 const fn test_mm_moveldup_ps() {
270 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
271 let r = _mm_moveldup_ps(a);
272 assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0));
273 }
274
275 #[simd_test(enable = "sse3")]
276 const fn test_mm_loaddup_pd() {
277 let d = -5.0;
278 let r = unsafe { _mm_loaddup_pd(&d) };
279 assert_eq_m128d(r, _mm_setr_pd(d, d));
280 }
281}
282