ssse3.rs source code [crates/core_arch/src/x86/ssse3.rs]

1	//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3	use crate::{
4	core_arch::{simd::, simd_llvm::, x86::*},
5	mem::transmute,
6	};
7
8	#[cfg(test)]
9	use stdarch_test::assert_instr;
10
11	/// Computes the absolute value of packed 8-bit signed integers in `a` and
12	/// return the unsigned results.
13	///
14	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15	#[inline]
16	#[target_feature(enable = "ssse3")]
17	#[cfg_attr(test, assert_instr(pabsb))]
18	#[stable(feature = "simd_x86", since = "1.27.0")]
19	pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20	transmute(src:pabsb128(a.as_i8x16()))
21	}
22
23	/// Computes the absolute value of each of the packed 16-bit signed integers in
24	/// `a` and
25	/// return the 16-bit unsigned integer
26	///
27	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
28	#[inline]
29	#[target_feature(enable = "ssse3")]
30	#[cfg_attr(test, assert_instr(pabsw))]
31	#[stable(feature = "simd_x86", since = "1.27.0")]
32	pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
33	transmute(src:pabsw128(a.as_i16x8()))
34	}
35
36	/// Computes the absolute value of each of the packed 32-bit signed integers in
37	/// `a` and
38	/// return the 32-bit unsigned integer
39	///
40	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
41	#[inline]
42	#[target_feature(enable = "ssse3")]
43	#[cfg_attr(test, assert_instr(pabsd))]
44	#[stable(feature = "simd_x86", since = "1.27.0")]
45	pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
46	transmute(src:pabsd128(a.as_i32x4()))
47	}
48
49	/// Shuffles bytes from `a` according to the content of `b`.
50	///
51	/// The last 4 bits of each byte of `b` are used as addresses
52	/// into the 16 bytes of `a`.
53	///
54	/// In addition, if the highest significant bit of a byte of `b`
55	/// is set, the respective destination byte is set to 0.
56	///
57	/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
58	/// logically equivalent to:
59	///
60	/// ```
61	/// fn mm_shuffle_epi8(a: [u8; `16`], b: [u8; `16`]) -> [u8; `16`] {
62	/// let mut r = [`0u8`; `16`];
63	/// for i in `0`..`16` {
64	/// // if the most significant bit of b is set,
65	/// // then the destination byte is set to 0.
66	/// if b[i] & `0x80` == `0u8` {
67	/// r[i] = a[(b[i] % `16`) as usize];
68	/// }
69	/// }
70	/// r
71	/// }
72	/// ```
73	///
74	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
75	#[inline]
76	#[target_feature(enable = "ssse3")]
77	#[cfg_attr(test, assert_instr(pshufb))]
78	#[stable(feature = "simd_x86", since = "1.27.0")]
79	pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
80	transmute(src:pshufb128(a:a.as_u8x16(), b:b.as_u8x16()))
81	}
82
83	/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
84	/// shift the result right by `n` bytes, and returns the low 16 bytes.
85	///
86	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
87	#[inline]
88	#[target_feature(enable = "ssse3")]
89	#[cfg_attr(test, assert_instr(palignr, IMM8 = `15`))]
90	#[rustc_legacy_const_generics(`2`)]
91	#[stable(feature = "simd_x86", since = "1.27.0")]
92	pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
93	static_assert_uimm_bits!(IMM8, `8`);
94	// If palignr is shifting the pair of vectors more than the size of two
95	// lanes, emit zero.
96	if IMM8 > `32` {
97	return _mm_set1_epi8(`0`);
98	}
99	// If palignr is shifting the pair of input vectors more than one lane,
100	// but less than two lanes, convert to shifting in zeroes.
101	let (a, b) = if IMM8 > `16` {
102	(_mm_set1_epi8(`0`), a)
103	} else {
104	(a, b)
105	};
106	const fn mask(shift: u32, i: u32) -> u32 {
107	if shift > `32` {
108	// Unused, but needs to be a valid index.
109	i
110	} else if shift > `16` {
111	shift - `16` + i
112	} else {
113	shift + i
114	}
115	}
116	let r: i8x16 = simd_shuffle!(
117	b.as_i8x16(),
118	a.as_i8x16(),
119	[
120	mask(IMM8 as u32, `0`),
121	mask(IMM8 as u32, `1`),
122	mask(IMM8 as u32, `2`),
123	mask(IMM8 as u32, `3`),
124	mask(IMM8 as u32, `4`),
125	mask(IMM8 as u32, `5`),
126	mask(IMM8 as u32, `6`),
127	mask(IMM8 as u32, `7`),
128	mask(IMM8 as u32, `8`),
129	mask(IMM8 as u32, `9`),
130	mask(IMM8 as u32, `10`),
131	mask(IMM8 as u32, `11`),
132	mask(IMM8 as u32, `12`),
133	mask(IMM8 as u32, `13`),
134	mask(IMM8 as u32, `14`),
135	mask(IMM8 as u32, `15`),
136	],
137	);
138	transmute(r)
139	}
140
141	/// Horizontally adds the adjacent pairs of values contained in 2 packed
142	/// 128-bit vectors of `[8 x i16]`.
143	///
144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
145	#[inline]
146	#[target_feature(enable = "ssse3")]
147	#[cfg_attr(test, assert_instr(phaddw))]
148	#[stable(feature = "simd_x86", since = "1.27.0")]
149	pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
150	transmute(src:phaddw128(a:a.as_i16x8(), b:b.as_i16x8()))
151	}
152
153	/// Horizontally adds the adjacent pairs of values contained in 2 packed
154	/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
155	/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
156	///
157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
158	#[inline]
159	#[target_feature(enable = "ssse3")]
160	#[cfg_attr(test, assert_instr(phaddsw))]
161	#[stable(feature = "simd_x86", since = "1.27.0")]
162	pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
163	transmute(src:phaddsw128(a:a.as_i16x8(), b:b.as_i16x8()))
164	}
165
166	/// Horizontally adds the adjacent pairs of values contained in 2 packed
167	/// 128-bit vectors of `[4 x i32]`.
168	///
169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
170	#[inline]
171	#[target_feature(enable = "ssse3")]
172	#[cfg_attr(test, assert_instr(phaddd))]
173	#[stable(feature = "simd_x86", since = "1.27.0")]
174	pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
175	transmute(src:phaddd128(a:a.as_i32x4(), b:b.as_i32x4()))
176	}
177
178	/// Horizontally subtract the adjacent pairs of values contained in 2
179	/// packed 128-bit vectors of `[8 x i16]`.
180	///
181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
182	#[inline]
183	#[target_feature(enable = "ssse3")]
184	#[cfg_attr(test, assert_instr(phsubw))]
185	#[stable(feature = "simd_x86", since = "1.27.0")]
186	pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
187	transmute(src:phsubw128(a:a.as_i16x8(), b:b.as_i16x8()))
188	}
189
190	/// Horizontally subtract the adjacent pairs of values contained in 2
191	/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
192	/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
193	/// saturated to 8000h.
194	///
195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
196	#[inline]
197	#[target_feature(enable = "ssse3")]
198	#[cfg_attr(test, assert_instr(phsubsw))]
199	#[stable(feature = "simd_x86", since = "1.27.0")]
200	pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
201	transmute(src:phsubsw128(a:a.as_i16x8(), b:b.as_i16x8()))
202	}
203
204	/// Horizontally subtract the adjacent pairs of values contained in 2
205	/// packed 128-bit vectors of `[4 x i32]`.
206	///
207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
208	#[inline]
209	#[target_feature(enable = "ssse3")]
210	#[cfg_attr(test, assert_instr(phsubd))]
211	#[stable(feature = "simd_x86", since = "1.27.0")]
212	pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
213	transmute(src:phsubd128(a:a.as_i32x4(), b:b.as_i32x4()))
214	}
215
216	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
217	/// values contained in the first source operand and packed 8-bit signed
218	/// integer values contained in the second source operand, add pairs of
219	/// contiguous products with signed saturation, and writes the 16-bit sums to
220	/// the corresponding bits in the destination.
221	///
222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
223	#[inline]
224	#[target_feature(enable = "ssse3")]
225	#[cfg_attr(test, assert_instr(pmaddubsw))]
226	#[stable(feature = "simd_x86", since = "1.27.0")]
227	pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
228	transmute(src:pmaddubsw128(a:a.as_u8x16(), b:b.as_i8x16()))
229	}
230
231	/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
232	/// product to the 18 most significant bits by right-shifting, round the
233	/// truncated value by adding 1, and write bits `[16:1]` to the destination.
234	///
235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
236	#[inline]
237	#[target_feature(enable = "ssse3")]
238	#[cfg_attr(test, assert_instr(pmulhrsw))]
239	#[stable(feature = "simd_x86", since = "1.27.0")]
240	pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
241	transmute(src:pmulhrsw128(a:a.as_i16x8(), b:b.as_i16x8()))
242	}
243
244	/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
245	/// integer in `b` is negative, and returns the result.
246	/// Elements in result are zeroed out when the corresponding element in `b`
247	/// is zero.
248	///
249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
250	#[inline]
251	#[target_feature(enable = "ssse3")]
252	#[cfg_attr(test, assert_instr(psignb))]
253	#[stable(feature = "simd_x86", since = "1.27.0")]
254	pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
255	transmute(src:psignb128(a:a.as_i8x16(), b:b.as_i8x16()))
256	}
257
258	/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
259	/// integer in `b` is negative, and returns the results.
260	/// Elements in result are zeroed out when the corresponding element in `b`
261	/// is zero.
262	///
263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
264	#[inline]
265	#[target_feature(enable = "ssse3")]
266	#[cfg_attr(test, assert_instr(psignw))]
267	#[stable(feature = "simd_x86", since = "1.27.0")]
268	pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
269	transmute(src:psignw128(a:a.as_i16x8(), b:b.as_i16x8()))
270	}
271
272	/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
273	/// integer in `b` is negative, and returns the results.
274	/// Element in result are zeroed out when the corresponding element in `b`
275	/// is zero.
276	///
277	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
278	#[inline]
279	#[target_feature(enable = "ssse3")]
280	#[cfg_attr(test, assert_instr(psignd))]
281	#[stable(feature = "simd_x86", since = "1.27.0")]
282	pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
283	transmute(src:psignd128(a:a.as_i32x4(), b:b.as_i32x4()))
284	}
285
286	#[allow(improper_ctypes)]
287	extern "C" {
288	#[link_name = "llvm.x86.ssse3.pabs.b.128"]
289	fn pabsb128(a: i8x16) -> u8x16;
290
291	#[link_name = "llvm.x86.ssse3.pabs.w.128"]
292	fn pabsw128(a: i16x8) -> u16x8;
293
294	#[link_name = "llvm.x86.ssse3.pabs.d.128"]
295	fn pabsd128(a: i32x4) -> u32x4;
296
297	#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
298	fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
299
300	#[link_name = "llvm.x86.ssse3.phadd.w.128"]
301	fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
302
303	#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
304	fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
305
306	#[link_name = "llvm.x86.ssse3.phadd.d.128"]
307	fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
308
309	#[link_name = "llvm.x86.ssse3.phsub.w.128"]
310	fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
311
312	#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
313	fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
314
315	#[link_name = "llvm.x86.ssse3.phsub.d.128"]
316	fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
317
318	#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
319	fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
320
321	#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
322	fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
323
324	#[link_name = "llvm.x86.ssse3.psign.b.128"]
325	fn psignb128(a: i8x16, b: i8x16) -> i8x16;
326
327	#[link_name = "llvm.x86.ssse3.psign.w.128"]
328	fn psignw128(a: i16x8, b: i16x8) -> i16x8;
329
330	#[link_name = "llvm.x86.ssse3.psign.d.128"]
331	fn psignd128(a: i32x4, b: i32x4) -> i32x4;
332	}
333
334	#[cfg(test)]
335	mod tests {
336	use stdarch_test::simd_test;
337
338	use crate::core_arch::x86::*;
339
340	#[simd_test(enable = "ssse3")]
341	unsafe fn test_mm_abs_epi8() {
342	let r = _mm_abs_epi8(_mm_set1_epi8(`-5`));
343	assert_eq_m128i(r, _mm_set1_epi8(`5`));
344	}
345
346	#[simd_test(enable = "ssse3")]
347	unsafe fn test_mm_abs_epi16() {
348	let r = _mm_abs_epi16(_mm_set1_epi16(`-5`));
349	assert_eq_m128i(r, _mm_set1_epi16(`5`));
350	}
351
352	#[simd_test(enable = "ssse3")]
353	unsafe fn test_mm_abs_epi32() {
354	let r = _mm_abs_epi32(_mm_set1_epi32(`-5`));
355	assert_eq_m128i(r, _mm_set1_epi32(`5`));
356	}
357
358	#[simd_test(enable = "ssse3")]
359	unsafe fn test_mm_shuffle_epi8() {
360	#[rustfmt::skip]
361	let a = _mm_setr_epi8(
362	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
363	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
364	);
365	#[rustfmt::skip]
366	let b = _mm_setr_epi8(
367	`4`, `128_u8` as i8, `4`, `3`,
368	`24`, `12`, `6`, `19`,
369	`12`, `5`, `5`, `10`,
370	`4`, `1`, `8`, `0`,
371	);
372	let expected = _mm_setr_epi8(`5`, `0`, `5`, `4`, `9`, `13`, `7`, `4`, `13`, `6`, `6`, `11`, `5`, `2`, `9`, `1`);
373	let r = _mm_shuffle_epi8(a, b);
374	assert_eq_m128i(r, expected);
375	}
376
377	#[simd_test(enable = "ssse3")]
378	unsafe fn test_mm_alignr_epi8() {
379	#[rustfmt::skip]
380	let a = _mm_setr_epi8(
381	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
382	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
383	);
384	#[rustfmt::skip]
385	let b = _mm_setr_epi8(
386	`4`, `63`, `4`, `3`,
387	`24`, `12`, `6`, `19`,
388	`12`, `5`, `5`, `10`,
389	`4`, `1`, `8`, `0`,
390	);
391	let r = _mm_alignr_epi8::<`33`>(a, b);
392	assert_eq_m128i(r, _mm_set1_epi8(`0`));
393
394	let r = _mm_alignr_epi8::<`17`>(a, b);
395	#[rustfmt::skip]
396	let expected = _mm_setr_epi8(
397	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`,
398	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
399	);
400	assert_eq_m128i(r, expected);
401
402	let r = _mm_alignr_epi8::<`16`>(a, b);
403	assert_eq_m128i(r, a);
404
405	let r = _mm_alignr_epi8::<`15`>(a, b);
406	#[rustfmt::skip]
407	let expected = _mm_setr_epi8(
408	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
409	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
410	);
411	assert_eq_m128i(r, expected);
412
413	let r = _mm_alignr_epi8::<`0`>(a, b);
414	assert_eq_m128i(r, b);
415	}
416
417	#[simd_test(enable = "ssse3")]
418	unsafe fn test_mm_hadd_epi16() {
419	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
420	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `24`, `12`, `6`, `19`);
421	let expected = _mm_setr_epi16(`3`, `7`, `11`, `15`, `132`, `7`, `36`, `25`);
422	let r = _mm_hadd_epi16(a, b);
423	assert_eq_m128i(r, expected);
424	}
425
426	#[simd_test(enable = "ssse3")]
427	unsafe fn test_mm_hadds_epi16() {
428	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
429	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `1`, `-32768`, `-1`);
430	let expected = _mm_setr_epi16(`3`, `7`, `11`, `15`, `132`, `7`, `32767`, `-32768`);
431	let r = _mm_hadds_epi16(a, b);
432	assert_eq_m128i(r, expected);
433	}
434
435	#[simd_test(enable = "ssse3")]
436	unsafe fn test_mm_hadd_epi32() {
437	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
438	let b = _mm_setr_epi32(`4`, `128`, `4`, `3`);
439	let expected = _mm_setr_epi32(`3`, `7`, `132`, `7`);
440	let r = _mm_hadd_epi32(a, b);
441	assert_eq_m128i(r, expected);
442	}
443
444	#[simd_test(enable = "ssse3")]
445	unsafe fn test_mm_hsub_epi16() {
446	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
447	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `24`, `12`, `6`, `19`);
448	let expected = _mm_setr_epi16(`-1`, `-1`, `-1`, `-1`, `-124`, `1`, `12`, `-13`);
449	let r = _mm_hsub_epi16(a, b);
450	assert_eq_m128i(r, expected);
451	}
452
453	#[simd_test(enable = "ssse3")]
454	unsafe fn test_mm_hsubs_epi16() {
455	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
456	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `-1`, `-32768`, `1`);
457	let expected = _mm_setr_epi16(`-1`, `-1`, `-1`, `-1`, `-124`, `1`, `32767`, `-32768`);
458	let r = _mm_hsubs_epi16(a, b);
459	assert_eq_m128i(r, expected);
460	}
461
462	#[simd_test(enable = "ssse3")]
463	unsafe fn test_mm_hsub_epi32() {
464	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
465	let b = _mm_setr_epi32(`4`, `128`, `4`, `3`);
466	let expected = _mm_setr_epi32(`-1`, `-1`, `-124`, `1`);
467	let r = _mm_hsub_epi32(a, b);
468	assert_eq_m128i(r, expected);
469	}
470
471	#[simd_test(enable = "ssse3")]
472	unsafe fn test_mm_maddubs_epi16() {
473	#[rustfmt::skip]
474	let a = _mm_setr_epi8(
475	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
476	`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
477	);
478	#[rustfmt::skip]
479	let b = _mm_setr_epi8(
480	`4`, `63`, `4`, `3`,
481	`24`, `12`, `6`, `19`,
482	`12`, `5`, `5`, `10`,
483	`4`, `1`, `8`, `0`,
484	);
485	let expected = _mm_setr_epi16(`130`, `24`, `192`, `194`, `158`, `175`, `66`, `120`);
486	let r = _mm_maddubs_epi16(a, b);
487	assert_eq_m128i(r, expected);
488	}
489
490	#[simd_test(enable = "ssse3")]
491	unsafe fn test_mm_mulhrs_epi16() {
492	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
493	let b = _mm_setr_epi16(`4`, `128`, `4`, `3`, `32767`, `-1`, `-32768`, `1`);
494	let expected = _mm_setr_epi16(`0`, `0`, `0`, `0`, `5`, `0`, `-7`, `0`);
495	let r = _mm_mulhrs_epi16(a, b);
496	assert_eq_m128i(r, expected);
497	}
498
499	#[simd_test(enable = "ssse3")]
500	unsafe fn test_mm_sign_epi8() {
501	#[rustfmt::skip]
502	let a = _mm_setr_epi8(
503	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
504	`9`, `10`, `11`, `12`, `13`, `-14`, `-15`, `16`,
505	);
506	#[rustfmt::skip]
507	let b = _mm_setr_epi8(
508	`4`, `63`, `-4`, `3`, `24`, `12`, `-6`, `-19`,
509	`12`, `5`, `-5`, `10`, `4`, `1`, `-8`, `0`,
510	);
511	#[rustfmt::skip]
512	let expected = _mm_setr_epi8(
513	`1`, `2`, `-3`, `4`, `5`, `6`, `-7`, `-8`,
514	`9`, `10`, `-11`, `12`, `13`, `-14`, `15`, `0`,
515	);
516	let r = _mm_sign_epi8(a, b);
517	assert_eq_m128i(r, expected);
518	}
519
520	#[simd_test(enable = "ssse3")]
521	unsafe fn test_mm_sign_epi16() {
522	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `-5`, `-6`, `7`, `8`);
523	let b = _mm_setr_epi16(`4`, `128`, `0`, `3`, `1`, `-1`, `-2`, `1`);
524	let expected = _mm_setr_epi16(`1`, `2`, `0`, `4`, `-5`, `6`, `-7`, `8`);
525	let r = _mm_sign_epi16(a, b);
526	assert_eq_m128i(r, expected);
527	}
528
529	#[simd_test(enable = "ssse3")]
530	unsafe fn test_mm_sign_epi32() {
531	let a = _mm_setr_epi32(`-1`, `2`, `3`, `4`);
532	let b = _mm_setr_epi32(`1`, `-1`, `1`, `0`);
533	let expected = _mm_setr_epi32(`-1`, `-2`, `3`, `0`);
534	let r = _mm_sign_epi32(a, b);
535	assert_eq_m128i(r, expected);
536	}
537	}
538