1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 let a: i8x16 = a.as_i8x16();
21 let zero: i8x16 = i8x16::splat(0);
22 let r: i8x16 = simd_select::<m8x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
23 transmute(src:r)
24}
25
26/// Computes the absolute value of each of the packed 16-bit signed integers in
27/// `a` and
28/// return the 16-bit unsigned integer
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
31#[inline]
32#[target_feature(enable = "ssse3")]
33#[cfg_attr(test, assert_instr(pabsw))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
36 let a: i16x8 = a.as_i16x8();
37 let zero: i16x8 = i16x8::splat(0);
38 let r: i16x8 = simd_select::<m16x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
39 transmute(src:r)
40}
41
42/// Computes the absolute value of each of the packed 32-bit signed integers in
43/// `a` and
44/// return the 32-bit unsigned integer
45///
46/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
47#[inline]
48#[target_feature(enable = "ssse3")]
49#[cfg_attr(test, assert_instr(pabsd))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
52 let a: i32x4 = a.as_i32x4();
53 let zero: i32x4 = i32x4::splat(0);
54 let r: i32x4 = simd_select::<m32x4, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
55 transmute(src:r)
56}
57
58/// Shuffles bytes from `a` according to the content of `b`.
59///
60/// The last 4 bits of each byte of `b` are used as addresses
61/// into the 16 bytes of `a`.
62///
63/// In addition, if the highest significant bit of a byte of `b`
64/// is set, the respective destination byte is set to 0.
65///
66/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
67/// logically equivalent to:
68///
69/// ```
70/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
71/// let mut r = [0u8; 16];
72/// for i in 0..16 {
73/// // if the most significant bit of b is set,
74/// // then the destination byte is set to 0.
75/// if b[i] & 0x80 == 0u8 {
76/// r[i] = a[(b[i] % 16) as usize];
77/// }
78/// }
79/// r
80/// }
81/// ```
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
84#[inline]
85#[target_feature(enable = "ssse3")]
86#[cfg_attr(test, assert_instr(pshufb))]
87#[stable(feature = "simd_x86", since = "1.27.0")]
88pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
89 transmute(src:pshufb128(a:a.as_u8x16(), b:b.as_u8x16()))
90}
91
92/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
93/// shift the result right by `n` bytes, and returns the low 16 bytes.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
96#[inline]
97#[target_feature(enable = "ssse3")]
98#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
99#[rustc_legacy_const_generics(2)]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
102 static_assert_uimm_bits!(IMM8, 8);
103 // If palignr is shifting the pair of vectors more than the size of two
104 // lanes, emit zero.
105 if IMM8 > 32 {
106 return _mm_set1_epi8(0);
107 }
108 // If palignr is shifting the pair of input vectors more than one lane,
109 // but less than two lanes, convert to shifting in zeroes.
110 let (a, b) = if IMM8 > 16 {
111 (_mm_set1_epi8(0), a)
112 } else {
113 (a, b)
114 };
115 const fn mask(shift: u32, i: u32) -> u32 {
116 if shift > 32 {
117 // Unused, but needs to be a valid index.
118 i
119 } else if shift > 16 {
120 shift - 16 + i
121 } else {
122 shift + i
123 }
124 }
125 let r: i8x16 = simd_shuffle!(
126 b.as_i8x16(),
127 a.as_i8x16(),
128 [
129 mask(IMM8 as u32, 0),
130 mask(IMM8 as u32, 1),
131 mask(IMM8 as u32, 2),
132 mask(IMM8 as u32, 3),
133 mask(IMM8 as u32, 4),
134 mask(IMM8 as u32, 5),
135 mask(IMM8 as u32, 6),
136 mask(IMM8 as u32, 7),
137 mask(IMM8 as u32, 8),
138 mask(IMM8 as u32, 9),
139 mask(IMM8 as u32, 10),
140 mask(IMM8 as u32, 11),
141 mask(IMM8 as u32, 12),
142 mask(IMM8 as u32, 13),
143 mask(IMM8 as u32, 14),
144 mask(IMM8 as u32, 15),
145 ],
146 );
147 transmute(r)
148}
149
150/// Horizontally adds the adjacent pairs of values contained in 2 packed
151/// 128-bit vectors of `[8 x i16]`.
152///
153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
154#[inline]
155#[target_feature(enable = "ssse3")]
156#[cfg_attr(test, assert_instr(phaddw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
159 transmute(src:phaddw128(a:a.as_i16x8(), b:b.as_i16x8()))
160}
161
162/// Horizontally adds the adjacent pairs of values contained in 2 packed
163/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
164/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
165///
166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
167#[inline]
168#[target_feature(enable = "ssse3")]
169#[cfg_attr(test, assert_instr(phaddsw))]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
172 transmute(src:phaddsw128(a:a.as_i16x8(), b:b.as_i16x8()))
173}
174
175/// Horizontally adds the adjacent pairs of values contained in 2 packed
176/// 128-bit vectors of `[4 x i32]`.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
179#[inline]
180#[target_feature(enable = "ssse3")]
181#[cfg_attr(test, assert_instr(phaddd))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
184 transmute(src:phaddd128(a:a.as_i32x4(), b:b.as_i32x4()))
185}
186
187/// Horizontally subtract the adjacent pairs of values contained in 2
188/// packed 128-bit vectors of `[8 x i16]`.
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
191#[inline]
192#[target_feature(enable = "ssse3")]
193#[cfg_attr(test, assert_instr(phsubw))]
194#[stable(feature = "simd_x86", since = "1.27.0")]
195pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
196 transmute(src:phsubw128(a:a.as_i16x8(), b:b.as_i16x8()))
197}
198
199/// Horizontally subtract the adjacent pairs of values contained in 2
200/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
201/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
202/// saturated to 8000h.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
205#[inline]
206#[target_feature(enable = "ssse3")]
207#[cfg_attr(test, assert_instr(phsubsw))]
208#[stable(feature = "simd_x86", since = "1.27.0")]
209pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
210 transmute(src:phsubsw128(a:a.as_i16x8(), b:b.as_i16x8()))
211}
212
213/// Horizontally subtract the adjacent pairs of values contained in 2
214/// packed 128-bit vectors of `[4 x i32]`.
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
217#[inline]
218#[target_feature(enable = "ssse3")]
219#[cfg_attr(test, assert_instr(phsubd))]
220#[stable(feature = "simd_x86", since = "1.27.0")]
221pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
222 transmute(src:phsubd128(a:a.as_i32x4(), b:b.as_i32x4()))
223}
224
225/// Multiplies corresponding pairs of packed 8-bit unsigned integer
226/// values contained in the first source operand and packed 8-bit signed
227/// integer values contained in the second source operand, add pairs of
228/// contiguous products with signed saturation, and writes the 16-bit sums to
229/// the corresponding bits in the destination.
230///
231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
232#[inline]
233#[target_feature(enable = "ssse3")]
234#[cfg_attr(test, assert_instr(pmaddubsw))]
235#[stable(feature = "simd_x86", since = "1.27.0")]
236pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
237 transmute(src:pmaddubsw128(a:a.as_u8x16(), b:b.as_i8x16()))
238}
239
240/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
241/// product to the 18 most significant bits by right-shifting, round the
242/// truncated value by adding 1, and write bits `[16:1]` to the destination.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
245#[inline]
246#[target_feature(enable = "ssse3")]
247#[cfg_attr(test, assert_instr(pmulhrsw))]
248#[stable(feature = "simd_x86", since = "1.27.0")]
249pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
250 transmute(src:pmulhrsw128(a:a.as_i16x8(), b:b.as_i16x8()))
251}
252
253/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
254/// integer in `b` is negative, and returns the result.
255/// Elements in result are zeroed out when the corresponding element in `b`
256/// is zero.
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
259#[inline]
260#[target_feature(enable = "ssse3")]
261#[cfg_attr(test, assert_instr(psignb))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
264 transmute(src:psignb128(a:a.as_i8x16(), b:b.as_i8x16()))
265}
266
267/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
268/// integer in `b` is negative, and returns the results.
269/// Elements in result are zeroed out when the corresponding element in `b`
270/// is zero.
271///
272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
273#[inline]
274#[target_feature(enable = "ssse3")]
275#[cfg_attr(test, assert_instr(psignw))]
276#[stable(feature = "simd_x86", since = "1.27.0")]
277pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
278 transmute(src:psignw128(a:a.as_i16x8(), b:b.as_i16x8()))
279}
280
281/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
282/// integer in `b` is negative, and returns the results.
283/// Element in result are zeroed out when the corresponding element in `b`
284/// is zero.
285///
286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
287#[inline]
288#[target_feature(enable = "ssse3")]
289#[cfg_attr(test, assert_instr(psignd))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
292 transmute(src:psignd128(a:a.as_i32x4(), b:b.as_i32x4()))
293}
294
295#[allow(improper_ctypes)]
296extern "C" {
297 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
298 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
299
300 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
301 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
302
303 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
304 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
305
306 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
307 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
308
309 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
310 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
311
312 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
313 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
314
315 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
316 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
317
318 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
319 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
320
321 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
322 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
323
324 #[link_name = "llvm.x86.ssse3.psign.b.128"]
325 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
326
327 #[link_name = "llvm.x86.ssse3.psign.w.128"]
328 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
329
330 #[link_name = "llvm.x86.ssse3.psign.d.128"]
331 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
332}
333
334#[cfg(test)]
335mod tests {
336 use stdarch_test::simd_test;
337
338 use crate::core_arch::x86::*;
339
340 #[simd_test(enable = "ssse3")]
341 unsafe fn test_mm_abs_epi8() {
342 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
343 assert_eq_m128i(r, _mm_set1_epi8(5));
344 }
345
346 #[simd_test(enable = "ssse3")]
347 unsafe fn test_mm_abs_epi16() {
348 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
349 assert_eq_m128i(r, _mm_set1_epi16(5));
350 }
351
352 #[simd_test(enable = "ssse3")]
353 unsafe fn test_mm_abs_epi32() {
354 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
355 assert_eq_m128i(r, _mm_set1_epi32(5));
356 }
357
358 #[simd_test(enable = "ssse3")]
359 unsafe fn test_mm_shuffle_epi8() {
360 #[rustfmt::skip]
361 let a = _mm_setr_epi8(
362 1, 2, 3, 4, 5, 6, 7, 8,
363 9, 10, 11, 12, 13, 14, 15, 16,
364 );
365 #[rustfmt::skip]
366 let b = _mm_setr_epi8(
367 4, 128_u8 as i8, 4, 3,
368 24, 12, 6, 19,
369 12, 5, 5, 10,
370 4, 1, 8, 0,
371 );
372 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
373 let r = _mm_shuffle_epi8(a, b);
374 assert_eq_m128i(r, expected);
375
376 // Test indices greater than 15 wrapping around
377 let b = _mm_add_epi8(b, _mm_set1_epi8(32));
378 let r = _mm_shuffle_epi8(a, b);
379 assert_eq_m128i(r, expected);
380 }
381
382 #[simd_test(enable = "ssse3")]
383 unsafe fn test_mm_alignr_epi8() {
384 #[rustfmt::skip]
385 let a = _mm_setr_epi8(
386 1, 2, 3, 4, 5, 6, 7, 8,
387 9, 10, 11, 12, 13, 14, 15, 16,
388 );
389 #[rustfmt::skip]
390 let b = _mm_setr_epi8(
391 4, 63, 4, 3,
392 24, 12, 6, 19,
393 12, 5, 5, 10,
394 4, 1, 8, 0,
395 );
396 let r = _mm_alignr_epi8::<33>(a, b);
397 assert_eq_m128i(r, _mm_set1_epi8(0));
398
399 let r = _mm_alignr_epi8::<17>(a, b);
400 #[rustfmt::skip]
401 let expected = _mm_setr_epi8(
402 2, 3, 4, 5, 6, 7, 8, 9,
403 10, 11, 12, 13, 14, 15, 16, 0,
404 );
405 assert_eq_m128i(r, expected);
406
407 let r = _mm_alignr_epi8::<16>(a, b);
408 assert_eq_m128i(r, a);
409
410 let r = _mm_alignr_epi8::<15>(a, b);
411 #[rustfmt::skip]
412 let expected = _mm_setr_epi8(
413 0, 1, 2, 3, 4, 5, 6, 7,
414 8, 9, 10, 11, 12, 13, 14, 15,
415 );
416 assert_eq_m128i(r, expected);
417
418 let r = _mm_alignr_epi8::<0>(a, b);
419 assert_eq_m128i(r, b);
420 }
421
422 #[simd_test(enable = "ssse3")]
423 unsafe fn test_mm_hadd_epi16() {
424 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
425 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
426 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
427 let r = _mm_hadd_epi16(a, b);
428 assert_eq_m128i(r, expected);
429
430 // Test wrapping on overflow
431 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
432 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
433 let expected = _mm_setr_epi16(
434 i16::MIN,
435 i16::MIN + 1,
436 i16::MIN + 2,
437 i16::MIN + 3,
438 i16::MAX,
439 i16::MAX - 1,
440 i16::MAX - 2,
441 i16::MAX - 3,
442 );
443 let r = _mm_hadd_epi16(a, b);
444 assert_eq_m128i(r, expected);
445 }
446
447 #[simd_test(enable = "ssse3")]
448 unsafe fn test_mm_hadds_epi16() {
449 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
450 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
451 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
452 let r = _mm_hadds_epi16(a, b);
453 assert_eq_m128i(r, expected);
454
455 // Test saturating on overflow
456 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
457 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
458 let expected = _mm_setr_epi16(
459 i16::MAX,
460 i16::MAX,
461 i16::MAX,
462 i16::MAX,
463 i16::MIN,
464 i16::MIN,
465 i16::MIN,
466 i16::MIN,
467 );
468 let r = _mm_hadds_epi16(a, b);
469 assert_eq_m128i(r, expected);
470 }
471
472 #[simd_test(enable = "ssse3")]
473 unsafe fn test_mm_hadd_epi32() {
474 let a = _mm_setr_epi32(1, 2, 3, 4);
475 let b = _mm_setr_epi32(4, 128, 4, 3);
476 let expected = _mm_setr_epi32(3, 7, 132, 7);
477 let r = _mm_hadd_epi32(a, b);
478 assert_eq_m128i(r, expected);
479
480 // Test wrapping on overflow
481 let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
482 let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
483 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
484 let r = _mm_hadd_epi32(a, b);
485 assert_eq_m128i(r, expected);
486 }
487
488 #[simd_test(enable = "ssse3")]
489 unsafe fn test_mm_hsub_epi16() {
490 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
491 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
492 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
493 let r = _mm_hsub_epi16(a, b);
494 assert_eq_m128i(r, expected);
495
496 // Test wrapping on overflow
497 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
498 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
499 let expected = _mm_setr_epi16(
500 i16::MIN,
501 i16::MIN + 1,
502 i16::MIN + 2,
503 i16::MIN + 3,
504 i16::MAX,
505 i16::MAX - 1,
506 i16::MAX - 2,
507 i16::MAX - 3,
508 );
509 let r = _mm_hsub_epi16(a, b);
510 assert_eq_m128i(r, expected);
511 }
512
513 #[simd_test(enable = "ssse3")]
514 unsafe fn test_mm_hsubs_epi16() {
515 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
516 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
517 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
518 let r = _mm_hsubs_epi16(a, b);
519 assert_eq_m128i(r, expected);
520
521 // Test saturating on overflow
522 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
523 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
524 let expected = _mm_setr_epi16(
525 i16::MAX,
526 i16::MAX,
527 i16::MAX,
528 i16::MAX,
529 i16::MIN,
530 i16::MIN,
531 i16::MIN,
532 i16::MIN,
533 );
534 let r = _mm_hsubs_epi16(a, b);
535 assert_eq_m128i(r, expected);
536 }
537
538 #[simd_test(enable = "ssse3")]
539 unsafe fn test_mm_hsub_epi32() {
540 let a = _mm_setr_epi32(1, 2, 3, 4);
541 let b = _mm_setr_epi32(4, 128, 4, 3);
542 let expected = _mm_setr_epi32(-1, -1, -124, 1);
543 let r = _mm_hsub_epi32(a, b);
544 assert_eq_m128i(r, expected);
545
546 // Test wrapping on overflow
547 let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
548 let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
549 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
550 let r = _mm_hsub_epi32(a, b);
551 assert_eq_m128i(r, expected);
552 }
553
554 #[simd_test(enable = "ssse3")]
555 unsafe fn test_mm_maddubs_epi16() {
556 #[rustfmt::skip]
557 let a = _mm_setr_epi8(
558 1, 2, 3, 4, 5, 6, 7, 8,
559 9, 10, 11, 12, 13, 14, 15, 16,
560 );
561 #[rustfmt::skip]
562 let b = _mm_setr_epi8(
563 4, 63, 4, 3,
564 24, 12, 6, 19,
565 12, 5, 5, 10,
566 4, 1, 8, 0,
567 );
568 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
569 let r = _mm_maddubs_epi16(a, b);
570 assert_eq_m128i(r, expected);
571
572 // Test widening and saturation
573 #[rustfmt::skip]
574 let a = _mm_setr_epi8(
575 u8::MAX as i8, u8::MAX as i8,
576 u8::MAX as i8, u8::MAX as i8,
577 u8::MAX as i8, u8::MAX as i8,
578 100, 100, 0, 0,
579 0, 0, 0, 0, 0, 0,
580 );
581 #[rustfmt::skip]
582 let b = _mm_setr_epi8(
583 i8::MAX, i8::MAX,
584 i8::MAX, i8::MIN,
585 i8::MIN, i8::MIN,
586 50, 15, 0, 0, 0,
587 0, 0, 0, 0, 0,
588 );
589 let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
590 let r = _mm_maddubs_epi16(a, b);
591 assert_eq_m128i(r, expected);
592 }
593
594 #[simd_test(enable = "ssse3")]
595 unsafe fn test_mm_mulhrs_epi16() {
596 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
597 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
598 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
599 let r = _mm_mulhrs_epi16(a, b);
600 assert_eq_m128i(r, expected);
601
602 // Test extreme values
603 let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
604 let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
605 let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
606 let r = _mm_mulhrs_epi16(a, b);
607 assert_eq_m128i(r, expected);
608 }
609
610 #[simd_test(enable = "ssse3")]
611 unsafe fn test_mm_sign_epi8() {
612 #[rustfmt::skip]
613 let a = _mm_setr_epi8(
614 1, 2, 3, 4, 5, 6, 7, 8,
615 9, 10, 11, 12, 13, -14, -15, 16,
616 );
617 #[rustfmt::skip]
618 let b = _mm_setr_epi8(
619 4, 63, -4, 3, 24, 12, -6, -19,
620 12, 5, -5, 10, 4, 1, -8, 0,
621 );
622 #[rustfmt::skip]
623 let expected = _mm_setr_epi8(
624 1, 2, -3, 4, 5, 6, -7, -8,
625 9, 10, -11, 12, 13, -14, 15, 0,
626 );
627 let r = _mm_sign_epi8(a, b);
628 assert_eq_m128i(r, expected);
629 }
630
631 #[simd_test(enable = "ssse3")]
632 unsafe fn test_mm_sign_epi16() {
633 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
634 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
635 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
636 let r = _mm_sign_epi16(a, b);
637 assert_eq_m128i(r, expected);
638 }
639
640 #[simd_test(enable = "ssse3")]
641 unsafe fn test_mm_sign_epi32() {
642 let a = _mm_setr_epi32(-1, 2, 3, 4);
643 let b = _mm_setr_epi32(1, -1, 1, 0);
644 let expected = _mm_setr_epi32(-1, -2, 3, 0);
645 let r = _mm_sign_epi32(a, b);
646 assert_eq_m128i(r, expected);
647 }
648}
649