ssse3.rs source code [crates/core_arch/src/x86/ssse3.rs]

1	//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3	use crate::{
4	core_arch::{simd::, x86::},
5	intrinsics::simd::*,
6	};
7
8	#[cfg(test)]
9	use stdarch_test::assert_instr;
10
11	/// Computes the absolute value of packed 8-bit signed integers in `a` and
12	/// return the unsigned results.
13	///
14	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15	#[inline]
16	#[target_feature(enable = "ssse3")]
17	#[cfg_attr(test, assert_instr(pabsb))]
18	#[stable(feature = "simd_x86", since = "1.27.0")]
19	pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20	unsafe {
21	let a: i8x16 = a.as_i8x16();
22	let zero: i8x16 = i8x16::ZERO;
23	let r: i8x16 = simd_select::<m8x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
24	transmute(src:r)
25	}
26	}
27
28	/// Computes the absolute value of each of the packed 16-bit signed integers in
29	/// `a` and
30	/// return the 16-bit unsigned integer
31	///
32	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
33	#[inline]
34	#[target_feature(enable = "ssse3")]
35	#[cfg_attr(test, assert_instr(pabsw))]
36	#[stable(feature = "simd_x86", since = "1.27.0")]
37	pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38	unsafe {
39	let a: i16x8 = a.as_i16x8();
40	let zero: i16x8 = i16x8::ZERO;
41	let r: i16x8 = simd_select::<m16x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
42	transmute(src:r)
43	}
44	}
45
46	/// Computes the absolute value of each of the packed 32-bit signed integers in
47	/// `a` and
48	/// return the 32-bit unsigned integer
49	///
50	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
51	#[inline]
52	#[target_feature(enable = "ssse3")]
53	#[cfg_attr(test, assert_instr(pabsd))]
54	#[stable(feature = "simd_x86", since = "1.27.0")]
55	pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56	unsafe {
57	let a: i32x4 = a.as_i32x4();
58	let zero: i32x4 = i32x4::ZERO;
59	let r: i32x4 = simd_select::<m32x4, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
60	transmute(src:r)
61	}
62	}
63
64	/// Shuffles bytes from `a` according to the content of `b`.
65	///
66	/// The last 4 bits of each byte of `b` are used as addresses
67	/// into the 16 bytes of `a`.
68	///
69	/// In addition, if the highest significant bit of a byte of `b`
70	/// is set, the respective destination byte is set to 0.
71	///
72	/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
73	/// logically equivalent to:
74	///
75	/// ```
76	/// fn mm_shuffle_epi8(a: [u8; `16`], b: [u8; `16`]) -> [u8; `16`] {
77	/// let mut r = [`0u8`; `16`];
78	/// for i in `0`..`16` {
79	/// // if the most significant bit of b is set,
80	/// // then the destination byte is set to 0.
81	/// if b[i] & `0x80` == `0u8` {
82	/// r[i] = a[(b[i] % `16`) as usize];
83	/// }
84	/// }
85	/// r
86	/// }
87	/// ```
88	///
89	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
90	#[inline]
91	#[target_feature(enable = "ssse3")]
92	#[cfg_attr(test, assert_instr(pshufb))]
93	#[stable(feature = "simd_x86", since = "1.27.0")]
94	pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95	unsafe { transmute(src:pshufb128(a.as_u8x16(), b.as_u8x16())) }
96	}
97
98	/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
99	/// shift the result right by `n` bytes, and returns the low 16 bytes.
100	///
101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
102	#[inline]
103	#[target_feature(enable = "ssse3")]
104	#[cfg_attr(test, assert_instr(palignr, IMM8 = `15`))]
105	#[rustc_legacy_const_generics(`2`)]
106	#[stable(feature = "simd_x86", since = "1.27.0")]
107	pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108	static_assert_uimm_bits!(IMM8, `8`);
109	// If palignr is shifting the pair of vectors more than the size of two
110	// lanes, emit zero.
111	if IMM8 > `32` {
112	return _mm_setzero_si128();
113	}
114	// If palignr is shifting the pair of input vectors more than one lane,
115	// but less than two lanes, convert to shifting in zeroes.
116	let (a, b) = if IMM8 > `16` {
117	(_mm_setzero_si128(), a)
118	} else {
119	(a, b)
120	};
121	const fn mask(shift: u32, i: u32) -> u32 {
122	if shift > `32` {
123	// Unused, but needs to be a valid index.
124	i
125	} else if shift > `16` {
126	shift - `16` + i
127	} else {
128	shift + i
129	}
130	}
131	unsafe {
132	let r: i8x16 = simd_shuffle!(
133	b.as_i8x16(),
134	a.as_i8x16(),
135	[
136	mask(IMM8 as u32, `0`),
137	mask(IMM8 as u32, `1`),
138	mask(IMM8 as u32, `2`),
139	mask(IMM8 as u32, `3`),
140	mask(IMM8 as u32, `4`),
141	mask(IMM8 as u32, `5`),
142	mask(IMM8 as u32, `6`),
143	mask(IMM8 as u32, `7`),
144	mask(IMM8 as u32, `8`),
145	mask(IMM8 as u32, `9`),
146	mask(IMM8 as u32, `10`),
147	mask(IMM8 as u32, `11`),
148	mask(IMM8 as u32, `12`),
149	mask(IMM8 as u32, `13`),
150	mask(IMM8 as u32, `14`),
151	mask(IMM8 as u32, `15`),
152	],
153	);
154	transmute(r)
155	}
156	}
157
158	/// Horizontally adds the adjacent pairs of values contained in 2 packed
159	/// 128-bit vectors of `[8 x i16]`.
160	///
161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
162	#[inline]
163	#[target_feature(enable = "ssse3")]
164	#[cfg_attr(test, assert_instr(phaddw))]
165	#[stable(feature = "simd_x86", since = "1.27.0")]
166	pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167	unsafe { transmute(src:phaddw128(a.as_i16x8(), b.as_i16x8())) }
168	}
169
170	/// Horizontally adds the adjacent pairs of values contained in 2 packed
171	/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
172	/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
173	///
174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
175	#[inline]
176	#[target_feature(enable = "ssse3")]
177	#[cfg_attr(test, assert_instr(phaddsw))]
178	#[stable(feature = "simd_x86", since = "1.27.0")]
179	pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
180	unsafe { transmute(src:phaddsw128(a.as_i16x8(), b.as_i16x8())) }
181	}
182
183	/// Horizontally adds the adjacent pairs of values contained in 2 packed
184	/// 128-bit vectors of `[4 x i32]`.
185	///
186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
187	#[inline]
188	#[target_feature(enable = "ssse3")]
189	#[cfg_attr(test, assert_instr(phaddd))]
190	#[stable(feature = "simd_x86", since = "1.27.0")]
191	pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
192	unsafe { transmute(src:phaddd128(a.as_i32x4(), b.as_i32x4())) }
193	}
194
195	/// Horizontally subtract the adjacent pairs of values contained in 2
196	/// packed 128-bit vectors of `[8 x i16]`.
197	///
198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
199	#[inline]
200	#[target_feature(enable = "ssse3")]
201	#[cfg_attr(test, assert_instr(phsubw))]
202	#[stable(feature = "simd_x86", since = "1.27.0")]
203	pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
204	unsafe { transmute(src:phsubw128(a.as_i16x8(), b.as_i16x8())) }
205	}
206
207	/// Horizontally subtract the adjacent pairs of values contained in 2
208	/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
209	/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
210	/// saturated to 8000h.
211	///
212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
213	#[inline]
214	#[target_feature(enable = "ssse3")]
215	#[cfg_attr(test, assert_instr(phsubsw))]
216	#[stable(feature = "simd_x86", since = "1.27.0")]
217	pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
218	unsafe { transmute(src:phsubsw128(a.as_i16x8(), b.as_i16x8())) }
219	}
220
221	/// Horizontally subtract the adjacent pairs of values contained in 2
222	/// packed 128-bit vectors of `[4 x i32]`.
223	///
224	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
225	#[inline]
226	#[target_feature(enable = "ssse3")]
227	#[cfg_attr(test, assert_instr(phsubd))]
228	#[stable(feature = "simd_x86", since = "1.27.0")]
229	pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
230	unsafe { transmute(src:phsubd128(a.as_i32x4(), b.as_i32x4())) }
231	}
232
233	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
234	/// values contained in the first source operand and packed 8-bit signed
235	/// integer values contained in the second source operand, add pairs of
236	/// contiguous products with signed saturation, and writes the 16-bit sums to
237	/// the corresponding bits in the destination.
238	///
239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
240	#[inline]
241	#[target_feature(enable = "ssse3")]
242	#[cfg_attr(test, assert_instr(pmaddubsw))]
243	#[stable(feature = "simd_x86", since = "1.27.0")]
244	pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
245	unsafe { transmute(src:pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
246	}
247
248	/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
249	/// product to the 18 most significant bits by right-shifting, round the
250	/// truncated value by adding 1, and write bits `[16:1]` to the destination.
251	///
252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
253	#[inline]
254	#[target_feature(enable = "ssse3")]
255	#[cfg_attr(test, assert_instr(pmulhrsw))]
256	#[stable(feature = "simd_x86", since = "1.27.0")]
257	pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
258	unsafe { transmute(src:pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
259	}
260
261	/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
262	/// integer in `b` is negative, and returns the result.
263	/// Elements in result are zeroed out when the corresponding element in `b`
264	/// is zero.
265	///
266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
267	#[inline]
268	#[target_feature(enable = "ssse3")]
269	#[cfg_attr(test, assert_instr(psignb))]
270	#[stable(feature = "simd_x86", since = "1.27.0")]
271	pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
272	unsafe { transmute(src:psignb128(a.as_i8x16(), b.as_i8x16())) }
273	}
274
275	/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
276	/// integer in `b` is negative, and returns the results.
277	/// Elements in result are zeroed out when the corresponding element in `b`
278	/// is zero.
279	///
280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
281	#[inline]
282	#[target_feature(enable = "ssse3")]
283	#[cfg_attr(test, assert_instr(psignw))]
284	#[stable(feature = "simd_x86", since = "1.27.0")]
285	pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
286	unsafe { transmute(src:psignw128(a.as_i16x8(), b.as_i16x8())) }
287	}
288
289	/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
290	/// integer in `b` is negative, and returns the results.
291	/// Element in result are zeroed out when the corresponding element in `b`
292	/// is zero.
293	///
294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
295	#[inline]
296	#[target_feature(enable = "ssse3")]
297	#[cfg_attr(test, assert_instr(psignd))]
298	#[stable(feature = "simd_x86", since = "1.27.0")]
299	pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
300	unsafe { transmute(src:psignd128(a.as_i32x4(), b.as_i32x4())) }
301	}
302
303	#[allow(improper_ctypes)]
304	unsafe extern "C" {
305	#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
306	unsafefn pshufb128(a: u8x16, b: u8x16) -> u8x16;
307
308	#[link_name = "llvm.x86.ssse3.phadd.w.128"]
309	unsafefn phaddw128(a: i16x8, b: i16x8) -> i16x8;
310
311	#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
312	unsafefn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
313
314	#[link_name = "llvm.x86.ssse3.phadd.d.128"]
315	unsafefn phaddd128(a: i32x4, b: i32x4) -> i32x4;
316
317	#[link_name = "llvm.x86.ssse3.phsub.w.128"]
318	unsafefn phsubw128(a: i16x8, b: i16x8) -> i16x8;
319
320	#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
321	unsafefn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
322
323	#[link_name = "llvm.x86.ssse3.phsub.d.128"]
324	unsafefn phsubd128(a: i32x4, b: i32x4) -> i32x4;
325
326	#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
327	unsafefn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
328
329	#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
330	unsafefn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
331
332	#[link_name = "llvm.x86.ssse3.psign.b.128"]
333	unsafefn psignb128(a: i8x16, b: i8x16) -> i8x16;
334
335	#[link_name = "llvm.x86.ssse3.psign.w.128"]
336	unsafefn psignw128(a: i16x8, b: i16x8) -> i16x8;
337
338	#[link_name = "llvm.x86.ssse3.psign.d.128"]
339	unsafefn psignd128(a: i32x4, b: i32x4) -> i32x4;
340	}
341
342	#[cfg(test)]
343	mod tests {
344	use stdarch_test::simd_test;
345
346	use crate::core_arch::x86::*;
347
348	#[simd_test(enable = "ssse3")]
349	unsafe fn test_mm_abs_epi8() {
350	let r = _mm_abs_epi8(_mm_set1_epi8(`-5`));
351	assert_eq_m128i(r, _mm_set1_epi8(`5`));
352	}
353
354	#[simd_test(enable = "ssse3")]
355	unsafe fn test_mm_abs_epi16() {
356	let r = _mm_abs_epi16(_mm_set1_epi16(`-5`));
357	assert_eq_m128i(r, _mm_set1_epi16(`5`));
358	}
359
360	#[simd_test(enable = "ssse3")]
361	unsafe fn test_mm_abs_epi32() {
362	let r = _mm_abs_epi32(_mm_set1_epi32(`-5`));
363	assert_eq_m128i(r, _mm_set1_epi32(`5`));
364	}
365
366	#[simd_test(enable = "ssse3")]
367	unsafe fn test_mm_shuffle_epi8() {
368	#[rustfmt::skip]
369	let a = _mm_setr_epi8(
370	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
371	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
372	);
373	#[rustfmt::skip]
374	let b = _mm_setr_epi8(
375	`4`, `128_u8` as i8, `4`, `3`,
376	`24`, `12`, `6`, `19`,
377	`12`, `5`, `5`, `10`,
378	`4`, `1`, `8`, `0`,
379	);
380	let expected = _mm_setr_epi8(`5`, `0`, `5`, `4`, `9`, `13`, `7`, `4`, `13`, `6`, `6`, `11`, `5`, `2`, `9`, `1`);
381	let r = _mm_shuffle_epi8(a, b);
382	assert_eq_m128i(r, expected);
383
384	// Test indices greater than 15 wrapping around
385	let b = _mm_add_epi8(b, _mm_set1_epi8(`32`));
386	let r = _mm_shuffle_epi8(a, b);
387	assert_eq_m128i(r, expected);
388	}
389
390	#[simd_test(enable = "ssse3")]
391	unsafe fn test_mm_alignr_epi8() {
392	#[rustfmt::skip]
393	let a = _mm_setr_epi8(
394	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
395	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
396	);
397	#[rustfmt::skip]
398	let b = _mm_setr_epi8(
399	`4`, `63`, `4`, `3`,
400	`24`, `12`, `6`, `19`,
401	`12`, `5`, `5`, `10`,
402	`4`, `1`, `8`, `0`,
403	);
404	let r = _mm_alignr_epi8::<`33`>(a, b);
405	assert_eq_m128i(r, _mm_set1_epi8(`0`));
406
407	let r = _mm_alignr_epi8::<`17`>(a, b);
408	#[rustfmt::skip]
409	let expected = _mm_setr_epi8(
410	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`,
411	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
412	);
413	assert_eq_m128i(r, expected);
414
415	let r = _mm_alignr_epi8::<`16`>(a, b);
416	assert_eq_m128i(r, a);
417
418	let r = _mm_alignr_epi8::<`15`>(a, b);
419	#[rustfmt::skip]
420	let expected = _mm_setr_epi8(
421	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
422	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
423	);
424	assert_eq_m128i(r, expected);
425
426	let r = _mm_alignr_epi8::<`0`>(a, b);
427	assert_eq_m128i(r, b);
428	}
429
430	#[simd_test(enable = "ssse3")]
431	unsafe fn test_mm_hadd_epi16() {
432	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
433	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `24`, `12`, `6`, `19`);
434	let expected = _mm_setr_epi16(`3`, `7`, `11`, `15`, `132`, `7`, `36`, `25`);
435	let r = _mm_hadd_epi16(a, b);
436	assert_eq_m128i(r, expected);
437
438	// Test wrapping on overflow
439	let a = _mm_setr_epi16(i16::MAX, `1`, i16::MAX, `2`, i16::MAX, `3`, i16::MAX, `4`);
440	let b = _mm_setr_epi16(i16::MIN, `-1`, i16::MIN, `-2`, i16::MIN, `-3`, i16::MIN, `-4`);
441	let expected = _mm_setr_epi16(
442	i16::MIN,
443	i16::MIN + `1`,
444	i16::MIN + `2`,
445	i16::MIN + `3`,
446	i16::MAX,
447	i16::MAX - `1`,
448	i16::MAX - `2`,
449	i16::MAX - `3`,
450	);
451	let r = _mm_hadd_epi16(a, b);
452	assert_eq_m128i(r, expected);
453	}
454
455	#[simd_test(enable = "ssse3")]
456	unsafe fn test_mm_hadds_epi16() {
457	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
458	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `1`, `-32768`, `-1`);
459	let expected = _mm_setr_epi16(`3`, `7`, `11`, `15`, `132`, `7`, `32767`, `-32768`);
460	let r = _mm_hadds_epi16(a, b);
461	assert_eq_m128i(r, expected);
462
463	// Test saturating on overflow
464	let a = _mm_setr_epi16(i16::MAX, `1`, i16::MAX, `2`, i16::MAX, `3`, i16::MAX, `4`);
465	let b = _mm_setr_epi16(i16::MIN, `-1`, i16::MIN, `-2`, i16::MIN, `-3`, i16::MIN, `-4`);
466	let expected = _mm_setr_epi16(
467	i16::MAX,
468	i16::MAX,
469	i16::MAX,
470	i16::MAX,
471	i16::MIN,
472	i16::MIN,
473	i16::MIN,
474	i16::MIN,
475	);
476	let r = _mm_hadds_epi16(a, b);
477	assert_eq_m128i(r, expected);
478	}
479
480	#[simd_test(enable = "ssse3")]
481	unsafe fn test_mm_hadd_epi32() {
482	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
483	let b = _mm_setr_epi32(`4`, `128`, `4`, `3`);
484	let expected = _mm_setr_epi32(`3`, `7`, `132`, `7`);
485	let r = _mm_hadd_epi32(a, b);
486	assert_eq_m128i(r, expected);
487
488	// Test wrapping on overflow
489	let a = _mm_setr_epi32(i32::MAX, `1`, i32::MAX, `2`);
490	let b = _mm_setr_epi32(i32::MIN, `-1`, i32::MIN, `-2`);
491	let expected = _mm_setr_epi32(i32::MIN, i32::MIN + `1`, i32::MAX, i32::MAX - `1`);
492	let r = _mm_hadd_epi32(a, b);
493	assert_eq_m128i(r, expected);
494	}
495
496	#[simd_test(enable = "ssse3")]
497	unsafe fn test_mm_hsub_epi16() {
498	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
499	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `24`, `12`, `6`, `19`);
500	let expected = _mm_setr_epi16(`-1`, `-1`, `-1`, `-1`, `-124`, `1`, `12`, `-13`);
501	let r = _mm_hsub_epi16(a, b);
502	assert_eq_m128i(r, expected);
503
504	// Test wrapping on overflow
505	let a = _mm_setr_epi16(i16::MAX, `-1`, i16::MAX, `-2`, i16::MAX, `-3`, i16::MAX, `-4`);
506	let b = _mm_setr_epi16(i16::MIN, `1`, i16::MIN, `2`, i16::MIN, `3`, i16::MIN, `4`);
507	let expected = _mm_setr_epi16(
508	i16::MIN,
509	i16::MIN + `1`,
510	i16::MIN + `2`,
511	i16::MIN + `3`,
512	i16::MAX,
513	i16::MAX - `1`,
514	i16::MAX - `2`,
515	i16::MAX - `3`,
516	);
517	let r = _mm_hsub_epi16(a, b);
518	assert_eq_m128i(r, expected);
519	}
520
521	#[simd_test(enable = "ssse3")]
522	unsafe fn test_mm_hsubs_epi16() {
523	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
524	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `-1`, `-32768`, `1`);
525	let expected = _mm_setr_epi16(`-1`, `-1`, `-1`, `-1`, `-124`, `1`, `32767`, `-32768`);
526	let r = _mm_hsubs_epi16(a, b);
527	assert_eq_m128i(r, expected);
528
529	// Test saturating on overflow
530	let a = _mm_setr_epi16(i16::MAX, `-1`, i16::MAX, `-2`, i16::MAX, `-3`, i16::MAX, `-4`);
531	let b = _mm_setr_epi16(i16::MIN, `1`, i16::MIN, `2`, i16::MIN, `3`, i16::MIN, `4`);
532	let expected = _mm_setr_epi16(
533	i16::MAX,
534	i16::MAX,
535	i16::MAX,
536	i16::MAX,
537	i16::MIN,
538	i16::MIN,
539	i16::MIN,
540	i16::MIN,
541	);
542	let r = _mm_hsubs_epi16(a, b);
543	assert_eq_m128i(r, expected);
544	}
545
546	#[simd_test(enable = "ssse3")]
547	unsafe fn test_mm_hsub_epi32() {
548	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
549	let b = _mm_setr_epi32(`4`, `128`, `4`, `3`);
550	let expected = _mm_setr_epi32(`-1`, `-1`, `-124`, `1`);
551	let r = _mm_hsub_epi32(a, b);
552	assert_eq_m128i(r, expected);
553
554	// Test wrapping on overflow
555	let a = _mm_setr_epi32(i32::MAX, `-1`, i32::MAX, `-2`);
556	let b = _mm_setr_epi32(i32::MIN, `1`, i32::MIN, `2`);
557	let expected = _mm_setr_epi32(i32::MIN, i32::MIN + `1`, i32::MAX, i32::MAX - `1`);
558	let r = _mm_hsub_epi32(a, b);
559	assert_eq_m128i(r, expected);
560	}
561
562	#[simd_test(enable = "ssse3")]
563	unsafe fn test_mm_maddubs_epi16() {
564	#[rustfmt::skip]
565	let a = _mm_setr_epi8(
566	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
567	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
568	);
569	#[rustfmt::skip]
570	let b = _mm_setr_epi8(
571	`4`, `63`, `4`, `3`,
572	`24`, `12`, `6`, `19`,
573	`12`, `5`, `5`, `10`,
574	`4`, `1`, `8`, `0`,
575	);
576	let expected = _mm_setr_epi16(`130`, `24`, `192`, `194`, `158`, `175`, `66`, `120`);
577	let r = _mm_maddubs_epi16(a, b);
578	assert_eq_m128i(r, expected);
579
580	// Test widening and saturation
581	#[rustfmt::skip]
582	let a = _mm_setr_epi8(
583	u8::MAX as i8, u8::MAX as i8,
584	u8::MAX as i8, u8::MAX as i8,
585	u8::MAX as i8, u8::MAX as i8,
586	`100`, `100`, `0`, `0`,
587	`0`, `0`, `0`, `0`, `0`, `0`,
588	);
589	#[rustfmt::skip]
590	let b = _mm_setr_epi8(
591	i8::MAX, i8::MAX,
592	i8::MAX, i8::MIN,
593	i8::MIN, i8::MIN,
594	`50`, `15`, `0`, `0`, `0`,
595	`0`, `0`, `0`, `0`, `0`,
596	);
597	let expected = _mm_setr_epi16(i16::MAX, `-255`, i16::MIN, `6500`, `0`, `0`, `0`, `0`);
598	let r = _mm_maddubs_epi16(a, b);
599	assert_eq_m128i(r, expected);
600	}
601
602	#[simd_test(enable = "ssse3")]
603	unsafe fn test_mm_mulhrs_epi16() {
604	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
605	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `-1`, `-32768`, `1`);
606	let expected = _mm_setr_epi16(`0`, `0`, `0`, `0`, `5`, `0`, `-7`, `0`);
607	let r = _mm_mulhrs_epi16(a, b);
608	assert_eq_m128i(r, expected);
609
610	// Test extreme values
611	let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, `0`, `0`, `0`, `0`, `0`);
612	let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, `0`, `0`, `0`, `0`, `0`);
613	let expected = _mm_setr_epi16(i16::MAX - `1`, i16::MIN, -i16::MAX, `0`, `0`, `0`, `0`, `0`);
614	let r = _mm_mulhrs_epi16(a, b);
615	assert_eq_m128i(r, expected);
616	}
617
618	#[simd_test(enable = "ssse3")]
619	unsafe fn test_mm_sign_epi8() {
620	#[rustfmt::skip]
621	let a = _mm_setr_epi8(
622	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
623	`9`, `10`, `11`, `12`, `13`, `-14`, `-15`, `16`,
624	);
625	#[rustfmt::skip]
626	let b = _mm_setr_epi8(
627	`4`, `63`, `-4`, `3`, `24`, `12`, `-6`, `-19`,
628	`12`, `5`, `-5`, `10`, `4`, `1`, `-8`, `0`,
629	);
630	#[rustfmt::skip]
631	let expected = _mm_setr_epi8(
632	`1`, `2`, `-3`, `4`, `5`, `6`, `-7`, `-8`,
633	`9`, `10`, `-11`, `12`, `13`, `-14`, `15`, `0`,
634	);
635	let r = _mm_sign_epi8(a, b);
636	assert_eq_m128i(r, expected);
637	}
638
639	#[simd_test(enable = "ssse3")]
640	unsafe fn test_mm_sign_epi16() {
641	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `-5`, `-6`, `7`, `8`);
642	let b = _mm_setr_epi16(`4`, `128`, `0`, `3`, `1`, `-1`, `-2`, `1`);
643	let expected = _mm_setr_epi16(`1`, `2`, `0`, `4`, `-5`, `6`, `-7`, `8`);
644	let r = _mm_sign_epi16(a, b);
645	assert_eq_m128i(r, expected);
646	}
647
648	#[simd_test(enable = "ssse3")]
649	unsafe fn test_mm_sign_epi32() {
650	let a = _mm_setr_epi32(`-1`, `2`, `3`, `4`);
651	let b = _mm_setr_epi32(`1`, `-1`, `1`, `0`);
652	let expected = _mm_setr_epi32(`-1`, `-2`, `3`, `0`);
653	let r = _mm_sign_epi32(a, b);
654	assert_eq_m128i(r, expected);
655	}
656	}
657

Provided by KDAB

Definitions