1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 unsafe {
21 let a: i8x16 = a.as_i8x16();
22 let zero: i8x16 = i8x16::ZERO;
23 let r: i8x16 = simd_select::<m8x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
24 transmute(src:r)
25 }
26}
27
28/// Computes the absolute value of each of the packed 16-bit signed integers in
29/// `a` and
30/// return the 16-bit unsigned integer
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
33#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsw))]
36#[stable(feature = "simd_x86", since = "1.27.0")]
37pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38 unsafe {
39 let a: i16x8 = a.as_i16x8();
40 let zero: i16x8 = i16x8::ZERO;
41 let r: i16x8 = simd_select::<m16x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
42 transmute(src:r)
43 }
44}
45
46/// Computes the absolute value of each of the packed 32-bit signed integers in
47/// `a` and
48/// return the 32-bit unsigned integer
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
51#[inline]
52#[target_feature(enable = "ssse3")]
53#[cfg_attr(test, assert_instr(pabsd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56 unsafe {
57 let a: i32x4 = a.as_i32x4();
58 let zero: i32x4 = i32x4::ZERO;
59 let r: i32x4 = simd_select::<m32x4, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
60 transmute(src:r)
61 }
62}
63
64/// Shuffles bytes from `a` according to the content of `b`.
65///
66/// The last 4 bits of each byte of `b` are used as addresses
67/// into the 16 bytes of `a`.
68///
69/// In addition, if the highest significant bit of a byte of `b`
70/// is set, the respective destination byte is set to 0.
71///
72/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
73/// logically equivalent to:
74///
75/// ```
76/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
77/// let mut r = [0u8; 16];
78/// for i in 0..16 {
79/// // if the most significant bit of b is set,
80/// // then the destination byte is set to 0.
81/// if b[i] & 0x80 == 0u8 {
82/// r[i] = a[(b[i] % 16) as usize];
83/// }
84/// }
85/// r
86/// }
87/// ```
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
90#[inline]
91#[target_feature(enable = "ssse3")]
92#[cfg_attr(test, assert_instr(pshufb))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95 unsafe { transmute(src:pshufb128(a.as_u8x16(), b.as_u8x16())) }
96}
97
98/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
99/// shift the result right by `n` bytes, and returns the low 16 bytes.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
102#[inline]
103#[target_feature(enable = "ssse3")]
104#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
105#[rustc_legacy_const_generics(2)]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108 static_assert_uimm_bits!(IMM8, 8);
109 // If palignr is shifting the pair of vectors more than the size of two
110 // lanes, emit zero.
111 if IMM8 > 32 {
112 return _mm_setzero_si128();
113 }
114 // If palignr is shifting the pair of input vectors more than one lane,
115 // but less than two lanes, convert to shifting in zeroes.
116 let (a, b) = if IMM8 > 16 {
117 (_mm_setzero_si128(), a)
118 } else {
119 (a, b)
120 };
121 const fn mask(shift: u32, i: u32) -> u32 {
122 if shift > 32 {
123 // Unused, but needs to be a valid index.
124 i
125 } else if shift > 16 {
126 shift - 16 + i
127 } else {
128 shift + i
129 }
130 }
131 unsafe {
132 let r: i8x16 = simd_shuffle!(
133 b.as_i8x16(),
134 a.as_i8x16(),
135 [
136 mask(IMM8 as u32, 0),
137 mask(IMM8 as u32, 1),
138 mask(IMM8 as u32, 2),
139 mask(IMM8 as u32, 3),
140 mask(IMM8 as u32, 4),
141 mask(IMM8 as u32, 5),
142 mask(IMM8 as u32, 6),
143 mask(IMM8 as u32, 7),
144 mask(IMM8 as u32, 8),
145 mask(IMM8 as u32, 9),
146 mask(IMM8 as u32, 10),
147 mask(IMM8 as u32, 11),
148 mask(IMM8 as u32, 12),
149 mask(IMM8 as u32, 13),
150 mask(IMM8 as u32, 14),
151 mask(IMM8 as u32, 15),
152 ],
153 );
154 transmute(r)
155 }
156}
157
158/// Horizontally adds the adjacent pairs of values contained in 2 packed
159/// 128-bit vectors of `[8 x i16]`.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
162#[inline]
163#[target_feature(enable = "ssse3")]
164#[cfg_attr(test, assert_instr(phaddw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167 let a: i16x8 = a.as_i16x8();
168 let b: i16x8 = b.as_i16x8();
169 unsafe {
170 let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
171 let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
172 simd_add(x:even, y:odd).as_m128i()
173 }
174}
175
176/// Horizontally adds the adjacent pairs of values contained in 2 packed
177/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
178/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
179///
180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
181#[inline]
182#[target_feature(enable = "ssse3")]
183#[cfg_attr(test, assert_instr(phaddsw))]
184#[stable(feature = "simd_x86", since = "1.27.0")]
185pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
186 unsafe { transmute(src:phaddsw128(a.as_i16x8(), b.as_i16x8())) }
187}
188
189/// Horizontally adds the adjacent pairs of values contained in 2 packed
190/// 128-bit vectors of `[4 x i32]`.
191///
192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
193#[inline]
194#[target_feature(enable = "ssse3")]
195#[cfg_attr(test, assert_instr(phaddd))]
196#[stable(feature = "simd_x86", since = "1.27.0")]
197pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
198 let a: i32x4 = a.as_i32x4();
199 let b: i32x4 = b.as_i32x4();
200 unsafe {
201 let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
202 let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
203 simd_add(x:even, y:odd).as_m128i()
204 }
205}
206
207/// Horizontally subtract the adjacent pairs of values contained in 2
208/// packed 128-bit vectors of `[8 x i16]`.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
211#[inline]
212#[target_feature(enable = "ssse3")]
213#[cfg_attr(test, assert_instr(phsubw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
216 let a: i16x8 = a.as_i16x8();
217 let b: i16x8 = b.as_i16x8();
218 unsafe {
219 let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
220 let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
221 simd_sub(lhs:even, rhs:odd).as_m128i()
222 }
223}
224
225/// Horizontally subtract the adjacent pairs of values contained in 2
226/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
227/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
228/// saturated to 8000h.
229///
230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
231#[inline]
232#[target_feature(enable = "ssse3")]
233#[cfg_attr(test, assert_instr(phsubsw))]
234#[stable(feature = "simd_x86", since = "1.27.0")]
235pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
236 unsafe { transmute(src:phsubsw128(a.as_i16x8(), b.as_i16x8())) }
237}
238
239/// Horizontally subtract the adjacent pairs of values contained in 2
240/// packed 128-bit vectors of `[4 x i32]`.
241///
242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
243#[inline]
244#[target_feature(enable = "ssse3")]
245#[cfg_attr(test, assert_instr(phsubd))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
248 let a: i32x4 = a.as_i32x4();
249 let b: i32x4 = b.as_i32x4();
250 unsafe {
251 let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
252 let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
253 simd_sub(lhs:even, rhs:odd).as_m128i()
254 }
255}
256
257/// Multiplies corresponding pairs of packed 8-bit unsigned integer
258/// values contained in the first source operand and packed 8-bit signed
259/// integer values contained in the second source operand, add pairs of
260/// contiguous products with signed saturation, and writes the 16-bit sums to
261/// the corresponding bits in the destination.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
264#[inline]
265#[target_feature(enable = "ssse3")]
266#[cfg_attr(test, assert_instr(pmaddubsw))]
267#[stable(feature = "simd_x86", since = "1.27.0")]
268pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
269 unsafe { transmute(src:pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
270}
271
272/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
273/// product to the 18 most significant bits by right-shifting, round the
274/// truncated value by adding 1, and write bits `[16:1]` to the destination.
275///
276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
277#[inline]
278#[target_feature(enable = "ssse3")]
279#[cfg_attr(test, assert_instr(pmulhrsw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe { transmute(src:pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
283}
284
285/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
286/// integer in `b` is negative, and returns the result.
287/// Elements in result are zeroed out when the corresponding element in `b`
288/// is zero.
289///
290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
291#[inline]
292#[target_feature(enable = "ssse3")]
293#[cfg_attr(test, assert_instr(psignb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
296 unsafe { transmute(src:psignb128(a.as_i8x16(), b.as_i8x16())) }
297}
298
299/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
300/// integer in `b` is negative, and returns the results.
301/// Elements in result are zeroed out when the corresponding element in `b`
302/// is zero.
303///
304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
305#[inline]
306#[target_feature(enable = "ssse3")]
307#[cfg_attr(test, assert_instr(psignw))]
308#[stable(feature = "simd_x86", since = "1.27.0")]
309pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
310 unsafe { transmute(src:psignw128(a.as_i16x8(), b.as_i16x8())) }
311}
312
313/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
314/// integer in `b` is negative, and returns the results.
315/// Element in result are zeroed out when the corresponding element in `b`
316/// is zero.
317///
318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
319#[inline]
320#[target_feature(enable = "ssse3")]
321#[cfg_attr(test, assert_instr(psignd))]
322#[stable(feature = "simd_x86", since = "1.27.0")]
323pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
324 unsafe { transmute(src:psignd128(a.as_i32x4(), b.as_i32x4())) }
325}
326
327#[allow(improper_ctypes)]
328unsafe extern "C" {
329 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
330 unsafefn pshufb128(a: u8x16, b: u8x16) -> u8x16;
331
332 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
333 unsafefn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
334
335 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
336 unsafefn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
337
338 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
339 unsafefn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
340
341 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
342 unsafefn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
343
344 #[link_name = "llvm.x86.ssse3.psign.b.128"]
345 unsafefn psignb128(a: i8x16, b: i8x16) -> i8x16;
346
347 #[link_name = "llvm.x86.ssse3.psign.w.128"]
348 unsafefn psignw128(a: i16x8, b: i16x8) -> i16x8;
349
350 #[link_name = "llvm.x86.ssse3.psign.d.128"]
351 unsafefn psignd128(a: i32x4, b: i32x4) -> i32x4;
352}
353
354#[cfg(test)]
355mod tests {
356 use stdarch_test::simd_test;
357
358 use crate::core_arch::x86::*;
359
360 #[simd_test(enable = "ssse3")]
361 unsafe fn test_mm_abs_epi8() {
362 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
363 assert_eq_m128i(r, _mm_set1_epi8(5));
364 }
365
366 #[simd_test(enable = "ssse3")]
367 unsafe fn test_mm_abs_epi16() {
368 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
369 assert_eq_m128i(r, _mm_set1_epi16(5));
370 }
371
372 #[simd_test(enable = "ssse3")]
373 unsafe fn test_mm_abs_epi32() {
374 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
375 assert_eq_m128i(r, _mm_set1_epi32(5));
376 }
377
378 #[simd_test(enable = "ssse3")]
379 unsafe fn test_mm_shuffle_epi8() {
380 #[rustfmt::skip]
381 let a = _mm_setr_epi8(
382 1, 2, 3, 4, 5, 6, 7, 8,
383 9, 10, 11, 12, 13, 14, 15, 16,
384 );
385 #[rustfmt::skip]
386 let b = _mm_setr_epi8(
387 4, 128_u8 as i8, 4, 3,
388 24, 12, 6, 19,
389 12, 5, 5, 10,
390 4, 1, 8, 0,
391 );
392 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
393 let r = _mm_shuffle_epi8(a, b);
394 assert_eq_m128i(r, expected);
395
396 // Test indices greater than 15 wrapping around
397 let b = _mm_add_epi8(b, _mm_set1_epi8(32));
398 let r = _mm_shuffle_epi8(a, b);
399 assert_eq_m128i(r, expected);
400 }
401
402 #[simd_test(enable = "ssse3")]
403 unsafe fn test_mm_alignr_epi8() {
404 #[rustfmt::skip]
405 let a = _mm_setr_epi8(
406 1, 2, 3, 4, 5, 6, 7, 8,
407 9, 10, 11, 12, 13, 14, 15, 16,
408 );
409 #[rustfmt::skip]
410 let b = _mm_setr_epi8(
411 4, 63, 4, 3,
412 24, 12, 6, 19,
413 12, 5, 5, 10,
414 4, 1, 8, 0,
415 );
416 let r = _mm_alignr_epi8::<33>(a, b);
417 assert_eq_m128i(r, _mm_set1_epi8(0));
418
419 let r = _mm_alignr_epi8::<17>(a, b);
420 #[rustfmt::skip]
421 let expected = _mm_setr_epi8(
422 2, 3, 4, 5, 6, 7, 8, 9,
423 10, 11, 12, 13, 14, 15, 16, 0,
424 );
425 assert_eq_m128i(r, expected);
426
427 let r = _mm_alignr_epi8::<16>(a, b);
428 assert_eq_m128i(r, a);
429
430 let r = _mm_alignr_epi8::<15>(a, b);
431 #[rustfmt::skip]
432 let expected = _mm_setr_epi8(
433 0, 1, 2, 3, 4, 5, 6, 7,
434 8, 9, 10, 11, 12, 13, 14, 15,
435 );
436 assert_eq_m128i(r, expected);
437
438 let r = _mm_alignr_epi8::<0>(a, b);
439 assert_eq_m128i(r, b);
440 }
441
442 #[simd_test(enable = "ssse3")]
443 unsafe fn test_mm_hadd_epi16() {
444 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
445 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
446 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
447 let r = _mm_hadd_epi16(a, b);
448 assert_eq_m128i(r, expected);
449
450 // Test wrapping on overflow
451 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
452 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
453 let expected = _mm_setr_epi16(
454 i16::MIN,
455 i16::MIN + 1,
456 i16::MIN + 2,
457 i16::MIN + 3,
458 i16::MAX,
459 i16::MAX - 1,
460 i16::MAX - 2,
461 i16::MAX - 3,
462 );
463 let r = _mm_hadd_epi16(a, b);
464 assert_eq_m128i(r, expected);
465 }
466
467 #[simd_test(enable = "ssse3")]
468 unsafe fn test_mm_hadds_epi16() {
469 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
470 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
471 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
472 let r = _mm_hadds_epi16(a, b);
473 assert_eq_m128i(r, expected);
474
475 // Test saturating on overflow
476 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
477 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
478 let expected = _mm_setr_epi16(
479 i16::MAX,
480 i16::MAX,
481 i16::MAX,
482 i16::MAX,
483 i16::MIN,
484 i16::MIN,
485 i16::MIN,
486 i16::MIN,
487 );
488 let r = _mm_hadds_epi16(a, b);
489 assert_eq_m128i(r, expected);
490 }
491
492 #[simd_test(enable = "ssse3")]
493 unsafe fn test_mm_hadd_epi32() {
494 let a = _mm_setr_epi32(1, 2, 3, 4);
495 let b = _mm_setr_epi32(4, 128, 4, 3);
496 let expected = _mm_setr_epi32(3, 7, 132, 7);
497 let r = _mm_hadd_epi32(a, b);
498 assert_eq_m128i(r, expected);
499
500 // Test wrapping on overflow
501 let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
502 let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
503 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
504 let r = _mm_hadd_epi32(a, b);
505 assert_eq_m128i(r, expected);
506 }
507
508 #[simd_test(enable = "ssse3")]
509 unsafe fn test_mm_hsub_epi16() {
510 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
511 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
512 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
513 let r = _mm_hsub_epi16(a, b);
514 assert_eq_m128i(r, expected);
515
516 // Test wrapping on overflow
517 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
518 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
519 let expected = _mm_setr_epi16(
520 i16::MIN,
521 i16::MIN + 1,
522 i16::MIN + 2,
523 i16::MIN + 3,
524 i16::MAX,
525 i16::MAX - 1,
526 i16::MAX - 2,
527 i16::MAX - 3,
528 );
529 let r = _mm_hsub_epi16(a, b);
530 assert_eq_m128i(r, expected);
531 }
532
533 #[simd_test(enable = "ssse3")]
534 unsafe fn test_mm_hsubs_epi16() {
535 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
536 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
537 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
538 let r = _mm_hsubs_epi16(a, b);
539 assert_eq_m128i(r, expected);
540
541 // Test saturating on overflow
542 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
543 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
544 let expected = _mm_setr_epi16(
545 i16::MAX,
546 i16::MAX,
547 i16::MAX,
548 i16::MAX,
549 i16::MIN,
550 i16::MIN,
551 i16::MIN,
552 i16::MIN,
553 );
554 let r = _mm_hsubs_epi16(a, b);
555 assert_eq_m128i(r, expected);
556 }
557
558 #[simd_test(enable = "ssse3")]
559 unsafe fn test_mm_hsub_epi32() {
560 let a = _mm_setr_epi32(1, 2, 3, 4);
561 let b = _mm_setr_epi32(4, 128, 4, 3);
562 let expected = _mm_setr_epi32(-1, -1, -124, 1);
563 let r = _mm_hsub_epi32(a, b);
564 assert_eq_m128i(r, expected);
565
566 // Test wrapping on overflow
567 let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
568 let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
569 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
570 let r = _mm_hsub_epi32(a, b);
571 assert_eq_m128i(r, expected);
572 }
573
574 #[simd_test(enable = "ssse3")]
575 unsafe fn test_mm_maddubs_epi16() {
576 #[rustfmt::skip]
577 let a = _mm_setr_epi8(
578 1, 2, 3, 4, 5, 6, 7, 8,
579 9, 10, 11, 12, 13, 14, 15, 16,
580 );
581 #[rustfmt::skip]
582 let b = _mm_setr_epi8(
583 4, 63, 4, 3,
584 24, 12, 6, 19,
585 12, 5, 5, 10,
586 4, 1, 8, 0,
587 );
588 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
589 let r = _mm_maddubs_epi16(a, b);
590 assert_eq_m128i(r, expected);
591
592 // Test widening and saturation
593 #[rustfmt::skip]
594 let a = _mm_setr_epi8(
595 u8::MAX as i8, u8::MAX as i8,
596 u8::MAX as i8, u8::MAX as i8,
597 u8::MAX as i8, u8::MAX as i8,
598 100, 100, 0, 0,
599 0, 0, 0, 0, 0, 0,
600 );
601 #[rustfmt::skip]
602 let b = _mm_setr_epi8(
603 i8::MAX, i8::MAX,
604 i8::MAX, i8::MIN,
605 i8::MIN, i8::MIN,
606 50, 15, 0, 0, 0,
607 0, 0, 0, 0, 0,
608 );
609 let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
610 let r = _mm_maddubs_epi16(a, b);
611 assert_eq_m128i(r, expected);
612 }
613
614 #[simd_test(enable = "ssse3")]
615 unsafe fn test_mm_mulhrs_epi16() {
616 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
617 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
618 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
619 let r = _mm_mulhrs_epi16(a, b);
620 assert_eq_m128i(r, expected);
621
622 // Test extreme values
623 let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
624 let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
625 let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
626 let r = _mm_mulhrs_epi16(a, b);
627 assert_eq_m128i(r, expected);
628 }
629
630 #[simd_test(enable = "ssse3")]
631 unsafe fn test_mm_sign_epi8() {
632 #[rustfmt::skip]
633 let a = _mm_setr_epi8(
634 1, 2, 3, 4, 5, 6, 7, 8,
635 9, 10, 11, 12, 13, -14, -15, 16,
636 );
637 #[rustfmt::skip]
638 let b = _mm_setr_epi8(
639 4, 63, -4, 3, 24, 12, -6, -19,
640 12, 5, -5, 10, 4, 1, -8, 0,
641 );
642 #[rustfmt::skip]
643 let expected = _mm_setr_epi8(
644 1, 2, -3, 4, 5, 6, -7, -8,
645 9, 10, -11, 12, 13, -14, 15, 0,
646 );
647 let r = _mm_sign_epi8(a, b);
648 assert_eq_m128i(r, expected);
649 }
650
651 #[simd_test(enable = "ssse3")]
652 unsafe fn test_mm_sign_epi16() {
653 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
654 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
655 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
656 let r = _mm_sign_epi16(a, b);
657 assert_eq_m128i(r, expected);
658 }
659
660 #[simd_test(enable = "ssse3")]
661 unsafe fn test_mm_sign_epi32() {
662 let a = _mm_setr_epi32(-1, 2, 3, 4);
663 let b = _mm_setr_epi32(1, -1, 1, 0);
664 let expected = _mm_setr_epi32(-1, -2, 3, 0);
665 let r = _mm_sign_epi32(a, b);
666 assert_eq_m128i(r, expected);
667 }
668}
669