sse3.rs source code [crates/core_arch/src/x86/sse3.rs]

1	//! Streaming SIMD Extensions 3 (SSE3)
2
3	use crate::{
4	core_arch::{simd::, simd_llvm::, x86::*},
5	mem::transmute,
6	};
7
8	#[cfg(test)]
9	use stdarch_test::assert_instr;
10
11	/// Alternatively add and subtract packed single-precision (32-bit)
12	/// floating-point elements in `a` to/from packed elements in `b`.
13	///
14	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps)
15	#[inline]
16	#[target_feature(enable = "sse3")]
17	#[cfg_attr(test, assert_instr(addsubps))]
18	#[stable(feature = "simd_x86", since = "1.27.0")]
19	pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
20	let a: f32x4 = a.as_f32x4();
21	let b: f32x4 = b.as_f32x4();
22	let add: f32x4 = simd_add(x:a, y:b);
23	let sub: f32x4 = simd_sub(x:a, y:b);
24	simd_shuffle!(add, sub, [`4`, `1`, `6`, `3`])
25	}
26
27	/// Alternatively add and subtract packed double-precision (64-bit)
28	/// floating-point elements in `a` to/from packed elements in `b`.
29	///
30	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd)
31	#[inline]
32	#[target_feature(enable = "sse3")]
33	#[cfg_attr(test, assert_instr(addsubpd))]
34	#[stable(feature = "simd_x86", since = "1.27.0")]
35	pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
36	let a: f64x2 = a.as_f64x2();
37	let b: f64x2 = b.as_f64x2();
38	let add: f64x2 = simd_add(x:a, y:b);
39	let sub: f64x2 = simd_sub(x:a, y:b);
40	simd_shuffle!(add, sub, [`2`, `1`])
41	}
42
43	/// Horizontally adds adjacent pairs of double-precision (64-bit)
44	/// floating-point elements in `a` and `b`, and pack the results.
45	///
46	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd)
47	#[inline]
48	#[target_feature(enable = "sse3")]
49	#[cfg_attr(test, assert_instr(haddpd))]
50	#[stable(feature = "simd_x86", since = "1.27.0")]
51	pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
52	haddpd(a, b)
53	}
54
55	/// Horizontally adds adjacent pairs of single-precision (32-bit)
56	/// floating-point elements in `a` and `b`, and pack the results.
57	///
58	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps)
59	#[inline]
60	#[target_feature(enable = "sse3")]
61	#[cfg_attr(test, assert_instr(haddps))]
62	#[stable(feature = "simd_x86", since = "1.27.0")]
63	pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
64	haddps(a, b)
65	}
66
67	/// Horizontally subtract adjacent pairs of double-precision (64-bit)
68	/// floating-point elements in `a` and `b`, and pack the results.
69	///
70	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd)
71	#[inline]
72	#[target_feature(enable = "sse3")]
73	#[cfg_attr(test, assert_instr(hsubpd))]
74	#[stable(feature = "simd_x86", since = "1.27.0")]
75	pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
76	hsubpd(a, b)
77	}
78
79	/// Horizontally adds adjacent pairs of single-precision (32-bit)
80	/// floating-point elements in `a` and `b`, and pack the results.
81	///
82	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps)
83	#[inline]
84	#[target_feature(enable = "sse3")]
85	#[cfg_attr(test, assert_instr(hsubps))]
86	#[stable(feature = "simd_x86", since = "1.27.0")]
87	pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
88	hsubps(a, b)
89	}
90
91	/// Loads 128-bits of integer data from unaligned memory.
92	/// This intrinsic may perform better than `_mm_loadu_si128`
93	/// when the data crosses a cache line boundary.
94	///
95	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128)
96	#[inline]
97	#[target_feature(enable = "sse3")]
98	#[cfg_attr(test, assert_instr(lddqu))]
99	#[stable(feature = "simd_x86", since = "1.27.0")]
100	pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
101	transmute(src:lddqu(mem_addr as *const _))
102	}
103
104	/// Duplicate the low double-precision (64-bit) floating-point element
105	/// from `a`.
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd)
108	#[inline]
109	#[target_feature(enable = "sse3")]
110	#[cfg_attr(test, assert_instr(movddup))]
111	#[stable(feature = "simd_x86", since = "1.27.0")]
112	pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
113	simd_shuffle!(a, a, [`0`, `0`])
114	}
115
116	/// Loads a double-precision (64-bit) floating-point element from memory
117	/// into both elements of return vector.
118	///
119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd)
120	#[inline]
121	#[target_feature(enable = "sse3")]
122	#[cfg_attr(test, assert_instr(movddup))]
123	#[stable(feature = "simd_x86", since = "1.27.0")]
124	pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
125	_mm_load1_pd(mem_addr)
126	}
127
128	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
129	/// from `a`.
130	///
131	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps)
132	#[inline]
133	#[target_feature(enable = "sse3")]
134	#[cfg_attr(test, assert_instr(movshdup))]
135	#[stable(feature = "simd_x86", since = "1.27.0")]
136	pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
137	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`])
138	}
139
140	/// Duplicate even-indexed single-precision (32-bit) floating-point elements
141	/// from `a`.
142	///
143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps)
144	#[inline]
145	#[target_feature(enable = "sse3")]
146	#[cfg_attr(test, assert_instr(movsldup))]
147	#[stable(feature = "simd_x86", since = "1.27.0")]
148	pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
149	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`])
150	}
151
152	#[allow(improper_ctypes)]
153	extern "C" {
154	#[link_name = "llvm.x86.sse3.hadd.pd"]
155	fn haddpd(a: __m128d, b: __m128d) -> __m128d;
156	#[link_name = "llvm.x86.sse3.hadd.ps"]
157	fn haddps(a: __m128, b: __m128) -> __m128;
158	#[link_name = "llvm.x86.sse3.hsub.pd"]
159	fn hsubpd(a: __m128d, b: __m128d) -> __m128d;
160	#[link_name = "llvm.x86.sse3.hsub.ps"]
161	fn hsubps(a: __m128, b: __m128) -> __m128;
162	#[link_name = "llvm.x86.sse3.ldu.dq"]
163	fn lddqu(mem_addr: *const i8) -> i8x16;
164	}
165
166	#[cfg(test)]
167	mod tests {
168	use stdarch_test::simd_test;
169
170	use crate::core_arch::x86::*;
171
172	#[simd_test(enable = "sse3")]
173	unsafe fn test_mm_addsub_ps() {
174	let a = _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `-10.0`);
175	let b = _mm_setr_ps(`-100.0`, `20.0`, `0.0`, `-5.0`);
176	let r = _mm_addsub_ps(a, b);
177	assert_eq_m128(r, _mm_setr_ps(`99.0`, `25.0`, `0.0`, `-15.0`));
178	}
179
180	#[simd_test(enable = "sse3")]
181	unsafe fn test_mm_addsub_pd() {
182	let a = _mm_setr_pd(`-1.0`, `5.0`);
183	let b = _mm_setr_pd(`-100.0`, `20.0`);
184	let r = _mm_addsub_pd(a, b);
185	assert_eq_m128d(r, _mm_setr_pd(`99.0`, `25.0`));
186	}
187
188	#[simd_test(enable = "sse3")]
189	unsafe fn test_mm_hadd_pd() {
190	let a = _mm_setr_pd(`-1.0`, `5.0`);
191	let b = _mm_setr_pd(`-100.0`, `20.0`);
192	let r = _mm_hadd_pd(a, b);
193	assert_eq_m128d(r, _mm_setr_pd(`4.0`, `-80.0`));
194	}
195
196	#[simd_test(enable = "sse3")]
197	unsafe fn test_mm_hadd_ps() {
198	let a = _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `-10.0`);
199	let b = _mm_setr_ps(`-100.0`, `20.0`, `0.0`, `-5.0`);
200	let r = _mm_hadd_ps(a, b);
201	assert_eq_m128(r, _mm_setr_ps(`4.0`, `-10.0`, `-80.0`, `-5.0`));
202	}
203
204	#[simd_test(enable = "sse3")]
205	unsafe fn test_mm_hsub_pd() {
206	let a = _mm_setr_pd(`-1.0`, `5.0`);
207	let b = _mm_setr_pd(`-100.0`, `20.0`);
208	let r = _mm_hsub_pd(a, b);
209	assert_eq_m128d(r, _mm_setr_pd(`-6.0`, `-120.0`));
210	}
211
212	#[simd_test(enable = "sse3")]
213	unsafe fn test_mm_hsub_ps() {
214	let a = _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `-10.0`);
215	let b = _mm_setr_ps(`-100.0`, `20.0`, `0.0`, `-5.0`);
216	let r = _mm_hsub_ps(a, b);
217	assert_eq_m128(r, _mm_setr_ps(`-6.0`, `10.0`, `-120.0`, `5.0`));
218	}
219
220	#[simd_test(enable = "sse3")]
221	unsafe fn test_mm_lddqu_si128() {
222	#[rustfmt::skip]
223	let a = _mm_setr_epi8(
224	`1`, `2`, `3`, `4`,
225	`5`, `6`, `7`, `8`,
226	`9`, `10`, `11`, `12`,
227	`13`, `14`, `15`, `16`,
228	);
229	let r = _mm_lddqu_si128(&a);
230	assert_eq_m128i(a, r);
231	}
232
233	#[simd_test(enable = "sse3")]
234	unsafe fn test_mm_movedup_pd() {
235	let a = _mm_setr_pd(`-1.0`, `5.0`);
236	let r = _mm_movedup_pd(a);
237	assert_eq_m128d(r, _mm_setr_pd(`-1.0`, `-1.0`));
238	}
239
240	#[simd_test(enable = "sse3")]
241	unsafe fn test_mm_movehdup_ps() {
242	let a = _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `-10.0`);
243	let r = _mm_movehdup_ps(a);
244	assert_eq_m128(r, _mm_setr_ps(`5.0`, `5.0`, `-10.0`, `-10.0`));
245	}
246
247	#[simd_test(enable = "sse3")]
248	unsafe fn test_mm_moveldup_ps() {
249	let a = _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `-10.0`);
250	let r = _mm_moveldup_ps(a);
251	assert_eq_m128(r, _mm_setr_ps(`-1.0`, `-1.0`, `0.0`, `0.0`));
252	}
253
254	#[simd_test(enable = "sse3")]
255	unsafe fn test_mm_loaddup_pd() {
256	let d = `-5.0`;
257	let r = _mm_loaddup_pd(&d);
258	assert_eq_m128d(r, _mm_setr_pd(d, d));
259	}
260	}
261