1//! Streaming SIMD Extensions 3 (SSE3)
2
3use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Alternatively add and subtract packed single-precision (32-bit)
10/// floating-point elements in `a` to/from packed elements in `b`.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps)
13#[inline]
14#[target_feature(enable = "sse3")]
15#[cfg_attr(test, assert_instr(addsubps))]
16#[stable(feature = "simd_x86", since = "1.27.0")]
17pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
18 let a: f32x4 = a.as_f32x4();
19 let b: f32x4 = b.as_f32x4();
20 let add: f32x4 = simd_add(x:a, y:b);
21 let sub: f32x4 = simd_sub(lhs:a, rhs:b);
22 simd_shuffle!(add, sub, [4, 1, 6, 3])
23}
24
25/// Alternatively add and subtract packed double-precision (64-bit)
26/// floating-point elements in `a` to/from packed elements in `b`.
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd)
29#[inline]
30#[target_feature(enable = "sse3")]
31#[cfg_attr(test, assert_instr(addsubpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
34 let a: f64x2 = a.as_f64x2();
35 let b: f64x2 = b.as_f64x2();
36 let add: f64x2 = simd_add(x:a, y:b);
37 let sub: f64x2 = simd_sub(lhs:a, rhs:b);
38 simd_shuffle!(add, sub, [2, 1])
39}
40
41/// Horizontally adds adjacent pairs of double-precision (64-bit)
42/// floating-point elements in `a` and `b`, and pack the results.
43///
44/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd)
45#[inline]
46#[target_feature(enable = "sse3")]
47#[cfg_attr(test, assert_instr(haddpd))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
50 haddpd(a, b)
51}
52
53/// Horizontally adds adjacent pairs of single-precision (32-bit)
54/// floating-point elements in `a` and `b`, and pack the results.
55///
56/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps)
57#[inline]
58#[target_feature(enable = "sse3")]
59#[cfg_attr(test, assert_instr(haddps))]
60#[stable(feature = "simd_x86", since = "1.27.0")]
61pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
62 haddps(a, b)
63}
64
65/// Horizontally subtract adjacent pairs of double-precision (64-bit)
66/// floating-point elements in `a` and `b`, and pack the results.
67///
68/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd)
69#[inline]
70#[target_feature(enable = "sse3")]
71#[cfg_attr(test, assert_instr(hsubpd))]
72#[stable(feature = "simd_x86", since = "1.27.0")]
73pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
74 hsubpd(a, b)
75}
76
77/// Horizontally adds adjacent pairs of single-precision (32-bit)
78/// floating-point elements in `a` and `b`, and pack the results.
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps)
81#[inline]
82#[target_feature(enable = "sse3")]
83#[cfg_attr(test, assert_instr(hsubps))]
84#[stable(feature = "simd_x86", since = "1.27.0")]
85pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
86 hsubps(a, b)
87}
88
89/// Loads 128-bits of integer data from unaligned memory.
90/// This intrinsic may perform better than `_mm_loadu_si128`
91/// when the data crosses a cache line boundary.
92///
93/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128)
94#[inline]
95#[target_feature(enable = "sse3")]
96#[cfg_attr(test, assert_instr(lddqu))]
97#[stable(feature = "simd_x86", since = "1.27.0")]
98pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
99 transmute(src:lddqu(mem_addr as *const _))
100}
101
102/// Duplicate the low double-precision (64-bit) floating-point element
103/// from `a`.
104///
105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd)
106#[inline]
107#[target_feature(enable = "sse3")]
108#[cfg_attr(test, assert_instr(movddup))]
109#[stable(feature = "simd_x86", since = "1.27.0")]
110pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
111 simd_shuffle!(a, a, [0, 0])
112}
113
114/// Loads a double-precision (64-bit) floating-point element from memory
115/// into both elements of return vector.
116///
117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd)
118#[inline]
119#[target_feature(enable = "sse3")]
120#[cfg_attr(test, assert_instr(movddup))]
121#[stable(feature = "simd_x86", since = "1.27.0")]
122pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
123 _mm_load1_pd(mem_addr)
124}
125
126/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
127/// from `a`.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps)
130#[inline]
131#[target_feature(enable = "sse3")]
132#[cfg_attr(test, assert_instr(movshdup))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
135 simd_shuffle!(a, a, [1, 1, 3, 3])
136}
137
138/// Duplicate even-indexed single-precision (32-bit) floating-point elements
139/// from `a`.
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps)
142#[inline]
143#[target_feature(enable = "sse3")]
144#[cfg_attr(test, assert_instr(movsldup))]
145#[stable(feature = "simd_x86", since = "1.27.0")]
146pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
147 simd_shuffle!(a, a, [0, 0, 2, 2])
148}
149
150#[allow(improper_ctypes)]
151extern "C" {
152 #[link_name = "llvm.x86.sse3.hadd.pd"]
153 fn haddpd(a: __m128d, b: __m128d) -> __m128d;
154 #[link_name = "llvm.x86.sse3.hadd.ps"]
155 fn haddps(a: __m128, b: __m128) -> __m128;
156 #[link_name = "llvm.x86.sse3.hsub.pd"]
157 fn hsubpd(a: __m128d, b: __m128d) -> __m128d;
158 #[link_name = "llvm.x86.sse3.hsub.ps"]
159 fn hsubps(a: __m128, b: __m128) -> __m128;
160 #[link_name = "llvm.x86.sse3.ldu.dq"]
161 fn lddqu(mem_addr: *const i8) -> i8x16;
162}
163
164#[cfg(test)]
165mod tests {
166 use stdarch_test::simd_test;
167
168 use crate::core_arch::x86::*;
169
170 #[simd_test(enable = "sse3")]
171 unsafe fn test_mm_addsub_ps() {
172 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
173 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
174 let r = _mm_addsub_ps(a, b);
175 assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
176 }
177
178 #[simd_test(enable = "sse3")]
179 unsafe fn test_mm_addsub_pd() {
180 let a = _mm_setr_pd(-1.0, 5.0);
181 let b = _mm_setr_pd(-100.0, 20.0);
182 let r = _mm_addsub_pd(a, b);
183 assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
184 }
185
186 #[simd_test(enable = "sse3")]
187 unsafe fn test_mm_hadd_pd() {
188 let a = _mm_setr_pd(-1.0, 5.0);
189 let b = _mm_setr_pd(-100.0, 20.0);
190 let r = _mm_hadd_pd(a, b);
191 assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
192 }
193
194 #[simd_test(enable = "sse3")]
195 unsafe fn test_mm_hadd_ps() {
196 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
197 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
198 let r = _mm_hadd_ps(a, b);
199 assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
200 }
201
202 #[simd_test(enable = "sse3")]
203 unsafe fn test_mm_hsub_pd() {
204 let a = _mm_setr_pd(-1.0, 5.0);
205 let b = _mm_setr_pd(-100.0, 20.0);
206 let r = _mm_hsub_pd(a, b);
207 assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
208 }
209
210 #[simd_test(enable = "sse3")]
211 unsafe fn test_mm_hsub_ps() {
212 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
213 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
214 let r = _mm_hsub_ps(a, b);
215 assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
216 }
217
218 #[simd_test(enable = "sse3")]
219 unsafe fn test_mm_lddqu_si128() {
220 #[rustfmt::skip]
221 let a = _mm_setr_epi8(
222 1, 2, 3, 4,
223 5, 6, 7, 8,
224 9, 10, 11, 12,
225 13, 14, 15, 16,
226 );
227 let r = _mm_lddqu_si128(&a);
228 assert_eq_m128i(a, r);
229 }
230
231 #[simd_test(enable = "sse3")]
232 unsafe fn test_mm_movedup_pd() {
233 let a = _mm_setr_pd(-1.0, 5.0);
234 let r = _mm_movedup_pd(a);
235 assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0));
236 }
237
238 #[simd_test(enable = "sse3")]
239 unsafe fn test_mm_movehdup_ps() {
240 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
241 let r = _mm_movehdup_ps(a);
242 assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0));
243 }
244
245 #[simd_test(enable = "sse3")]
246 unsafe fn test_mm_moveldup_ps() {
247 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
248 let r = _mm_moveldup_ps(a);
249 assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0));
250 }
251
252 #[simd_test(enable = "sse3")]
253 unsafe fn test_mm_loaddup_pd() {
254 let d = -5.0;
255 let r = _mm_loaddup_pd(&d);
256 assert_eq_m128d(r, _mm_setr_pd(d, d));
257 }
258}
259