1//! Streaming SIMD Extensions 3 (SSE3)
2
3use crate::{
4 core_arch::{simd::*, simd_llvm::*, x86::*},
5 mem::transmute,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Alternatively add and subtract packed single-precision (32-bit)
12/// floating-point elements in `a` to/from packed elements in `b`.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps)
15#[inline]
16#[target_feature(enable = "sse3")]
17#[cfg_attr(test, assert_instr(addsubps))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
20 let a: f32x4 = a.as_f32x4();
21 let b: f32x4 = b.as_f32x4();
22 let add: f32x4 = simd_add(x:a, y:b);
23 let sub: f32x4 = simd_sub(x:a, y:b);
24 simd_shuffle!(add, sub, [4, 1, 6, 3])
25}
26
27/// Alternatively add and subtract packed double-precision (64-bit)
28/// floating-point elements in `a` to/from packed elements in `b`.
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd)
31#[inline]
32#[target_feature(enable = "sse3")]
33#[cfg_attr(test, assert_instr(addsubpd))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
36 let a: f64x2 = a.as_f64x2();
37 let b: f64x2 = b.as_f64x2();
38 let add: f64x2 = simd_add(x:a, y:b);
39 let sub: f64x2 = simd_sub(x:a, y:b);
40 simd_shuffle!(add, sub, [2, 1])
41}
42
43/// Horizontally adds adjacent pairs of double-precision (64-bit)
44/// floating-point elements in `a` and `b`, and pack the results.
45///
46/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd)
47#[inline]
48#[target_feature(enable = "sse3")]
49#[cfg_attr(test, assert_instr(haddpd))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
52 haddpd(a, b)
53}
54
55/// Horizontally adds adjacent pairs of single-precision (32-bit)
56/// floating-point elements in `a` and `b`, and pack the results.
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps)
59#[inline]
60#[target_feature(enable = "sse3")]
61#[cfg_attr(test, assert_instr(haddps))]
62#[stable(feature = "simd_x86", since = "1.27.0")]
63pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
64 haddps(a, b)
65}
66
67/// Horizontally subtract adjacent pairs of double-precision (64-bit)
68/// floating-point elements in `a` and `b`, and pack the results.
69///
70/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd)
71#[inline]
72#[target_feature(enable = "sse3")]
73#[cfg_attr(test, assert_instr(hsubpd))]
74#[stable(feature = "simd_x86", since = "1.27.0")]
75pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
76 hsubpd(a, b)
77}
78
79/// Horizontally adds adjacent pairs of single-precision (32-bit)
80/// floating-point elements in `a` and `b`, and pack the results.
81///
82/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps)
83#[inline]
84#[target_feature(enable = "sse3")]
85#[cfg_attr(test, assert_instr(hsubps))]
86#[stable(feature = "simd_x86", since = "1.27.0")]
87pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
88 hsubps(a, b)
89}
90
91/// Loads 128-bits of integer data from unaligned memory.
92/// This intrinsic may perform better than `_mm_loadu_si128`
93/// when the data crosses a cache line boundary.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128)
96#[inline]
97#[target_feature(enable = "sse3")]
98#[cfg_attr(test, assert_instr(lddqu))]
99#[stable(feature = "simd_x86", since = "1.27.0")]
100pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
101 transmute(src:lddqu(mem_addr as *const _))
102}
103
104/// Duplicate the low double-precision (64-bit) floating-point element
105/// from `a`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd)
108#[inline]
109#[target_feature(enable = "sse3")]
110#[cfg_attr(test, assert_instr(movddup))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
113 simd_shuffle!(a, a, [0, 0])
114}
115
116/// Loads a double-precision (64-bit) floating-point element from memory
117/// into both elements of return vector.
118///
119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd)
120#[inline]
121#[target_feature(enable = "sse3")]
122#[cfg_attr(test, assert_instr(movddup))]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
125 _mm_load1_pd(mem_addr)
126}
127
128/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
129/// from `a`.
130///
131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps)
132#[inline]
133#[target_feature(enable = "sse3")]
134#[cfg_attr(test, assert_instr(movshdup))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
137 simd_shuffle!(a, a, [1, 1, 3, 3])
138}
139
140/// Duplicate even-indexed single-precision (32-bit) floating-point elements
141/// from `a`.
142///
143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps)
144#[inline]
145#[target_feature(enable = "sse3")]
146#[cfg_attr(test, assert_instr(movsldup))]
147#[stable(feature = "simd_x86", since = "1.27.0")]
148pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
149 simd_shuffle!(a, a, [0, 0, 2, 2])
150}
151
152#[allow(improper_ctypes)]
153extern "C" {
154 #[link_name = "llvm.x86.sse3.hadd.pd"]
155 fn haddpd(a: __m128d, b: __m128d) -> __m128d;
156 #[link_name = "llvm.x86.sse3.hadd.ps"]
157 fn haddps(a: __m128, b: __m128) -> __m128;
158 #[link_name = "llvm.x86.sse3.hsub.pd"]
159 fn hsubpd(a: __m128d, b: __m128d) -> __m128d;
160 #[link_name = "llvm.x86.sse3.hsub.ps"]
161 fn hsubps(a: __m128, b: __m128) -> __m128;
162 #[link_name = "llvm.x86.sse3.ldu.dq"]
163 fn lddqu(mem_addr: *const i8) -> i8x16;
164}
165
166#[cfg(test)]
167mod tests {
168 use stdarch_test::simd_test;
169
170 use crate::core_arch::x86::*;
171
172 #[simd_test(enable = "sse3")]
173 unsafe fn test_mm_addsub_ps() {
174 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
175 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
176 let r = _mm_addsub_ps(a, b);
177 assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
178 }
179
180 #[simd_test(enable = "sse3")]
181 unsafe fn test_mm_addsub_pd() {
182 let a = _mm_setr_pd(-1.0, 5.0);
183 let b = _mm_setr_pd(-100.0, 20.0);
184 let r = _mm_addsub_pd(a, b);
185 assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
186 }
187
188 #[simd_test(enable = "sse3")]
189 unsafe fn test_mm_hadd_pd() {
190 let a = _mm_setr_pd(-1.0, 5.0);
191 let b = _mm_setr_pd(-100.0, 20.0);
192 let r = _mm_hadd_pd(a, b);
193 assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
194 }
195
196 #[simd_test(enable = "sse3")]
197 unsafe fn test_mm_hadd_ps() {
198 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
199 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
200 let r = _mm_hadd_ps(a, b);
201 assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
202 }
203
204 #[simd_test(enable = "sse3")]
205 unsafe fn test_mm_hsub_pd() {
206 let a = _mm_setr_pd(-1.0, 5.0);
207 let b = _mm_setr_pd(-100.0, 20.0);
208 let r = _mm_hsub_pd(a, b);
209 assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
210 }
211
212 #[simd_test(enable = "sse3")]
213 unsafe fn test_mm_hsub_ps() {
214 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
215 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
216 let r = _mm_hsub_ps(a, b);
217 assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
218 }
219
220 #[simd_test(enable = "sse3")]
221 unsafe fn test_mm_lddqu_si128() {
222 #[rustfmt::skip]
223 let a = _mm_setr_epi8(
224 1, 2, 3, 4,
225 5, 6, 7, 8,
226 9, 10, 11, 12,
227 13, 14, 15, 16,
228 );
229 let r = _mm_lddqu_si128(&a);
230 assert_eq_m128i(a, r);
231 }
232
233 #[simd_test(enable = "sse3")]
234 unsafe fn test_mm_movedup_pd() {
235 let a = _mm_setr_pd(-1.0, 5.0);
236 let r = _mm_movedup_pd(a);
237 assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0));
238 }
239
240 #[simd_test(enable = "sse3")]
241 unsafe fn test_mm_movehdup_ps() {
242 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
243 let r = _mm_movehdup_ps(a);
244 assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0));
245 }
246
247 #[simd_test(enable = "sse3")]
248 unsafe fn test_mm_moveldup_ps() {
249 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
250 let r = _mm_moveldup_ps(a);
251 assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0));
252 }
253
254 #[simd_test(enable = "sse3")]
255 unsafe fn test_mm_loaddup_pd() {
256 let d = -5.0;
257 let r = _mm_loaddup_pd(&d);
258 assert_eq_m128d(r, _mm_setr_pd(d, d));
259 }
260}
261