tmmintrin.h source code [flutter_engine/buildtools/linux-x64/clang/lib/clang/17/include/tmmintrin.h]

1	/===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===*
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#ifndef __TMMINTRIN_H
11	#define __TMMINTRIN_H
12
13	#if !defined(__i386__) && !defined(__x86_64__)
14	#error "This header is only meant to be used on x86 and x64 architecture"
15	#endif
16
17	#include <pmmintrin.h>
18
19	/ Define the default attributes for the functions in this file. /
20	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
21	#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
22
23	/// Computes the absolute value of each of the packed 8-bit signed
24	/// integers in the source operand and stores the 8-bit unsigned integer
25	/// results in the destination.
26	///
27	/// \headerfile <x86intrin.h>
28	///
29	/// This intrinsic corresponds to the \c PABSB instruction.
30	///
31	/// \param __a
32	/// A 64-bit vector of [8 x i8].
33	/// \returns A 64-bit integer vector containing the absolute values of the
34	/// elements in the operand.
35	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
36	_mm_abs_pi8(__m64 __a)
37	{
38	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
39	}
40
41	/// Computes the absolute value of each of the packed 8-bit signed
42	/// integers in the source operand and stores the 8-bit unsigned integer
43	/// results in the destination.
44	///
45	/// \headerfile <x86intrin.h>
46	///
47	/// This intrinsic corresponds to the \c VPABSB instruction.
48	///
49	/// \param __a
50	/// A 128-bit vector of [16 x i8].
51	/// \returns A 128-bit integer vector containing the absolute values of the
52	/// elements in the operand.
53	static __inline__ __m128i __DEFAULT_FN_ATTRS
54	_mm_abs_epi8(__m128i __a)
55	{
56	return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
57	}
58
59	/// Computes the absolute value of each of the packed 16-bit signed
60	/// integers in the source operand and stores the 16-bit unsigned integer
61	/// results in the destination.
62	///
63	/// \headerfile <x86intrin.h>
64	///
65	/// This intrinsic corresponds to the \c PABSW instruction.
66	///
67	/// \param __a
68	/// A 64-bit vector of [4 x i16].
69	/// \returns A 64-bit integer vector containing the absolute values of the
70	/// elements in the operand.
71	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
72	_mm_abs_pi16(__m64 __a)
73	{
74	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
75	}
76
77	/// Computes the absolute value of each of the packed 16-bit signed
78	/// integers in the source operand and stores the 16-bit unsigned integer
79	/// results in the destination.
80	///
81	/// \headerfile <x86intrin.h>
82	///
83	/// This intrinsic corresponds to the \c VPABSW instruction.
84	///
85	/// \param __a
86	/// A 128-bit vector of [8 x i16].
87	/// \returns A 128-bit integer vector containing the absolute values of the
88	/// elements in the operand.
89	static __inline__ __m128i __DEFAULT_FN_ATTRS
90	_mm_abs_epi16(__m128i __a)
91	{
92	return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
93	}
94
95	/// Computes the absolute value of each of the packed 32-bit signed
96	/// integers in the source operand and stores the 32-bit unsigned integer
97	/// results in the destination.
98	///
99	/// \headerfile <x86intrin.h>
100	///
101	/// This intrinsic corresponds to the \c PABSD instruction.
102	///
103	/// \param __a
104	/// A 64-bit vector of [2 x i32].
105	/// \returns A 64-bit integer vector containing the absolute values of the
106	/// elements in the operand.
107	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
108	_mm_abs_pi32(__m64 __a)
109	{
110	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
111	}
112
113	/// Computes the absolute value of each of the packed 32-bit signed
114	/// integers in the source operand and stores the 32-bit unsigned integer
115	/// results in the destination.
116	///
117	/// \headerfile <x86intrin.h>
118	///
119	/// This intrinsic corresponds to the \c VPABSD instruction.
120	///
121	/// \param __a
122	/// A 128-bit vector of [4 x i32].
123	/// \returns A 128-bit integer vector containing the absolute values of the
124	/// elements in the operand.
125	static __inline__ __m128i __DEFAULT_FN_ATTRS
126	_mm_abs_epi32(__m128i __a)
127	{
128	return (__m128i)__builtin_elementwise_abs((__v4si)__a);
129	}
130
131	/// Concatenates the two 128-bit integer vector operands, and
132	/// right-shifts the result by the number of bytes specified in the immediate
133	/// operand.
134	///
135	/// \headerfile <x86intrin.h>
136	///
137	/// \code
138	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
139	/// \endcode
140	///
141	/// This intrinsic corresponds to the \c PALIGNR instruction.
142	///
143	/// \param a
144	/// A 128-bit vector of [16 x i8] containing one of the source operands.
145	/// \param b
146	/// A 128-bit vector of [16 x i8] containing one of the source operands.
147	/// \param n
148	/// An immediate operand specifying how many bytes to right-shift the result.
149	/// \returns A 128-bit integer vector containing the concatenated right-shifted
150	/// value.
151	#define _mm_alignr_epi8(a, b, n) \
152	((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
153	(__v16qi)(__m128i)(b), (n)))
154
155	/// Concatenates the two 64-bit integer vector operands, and right-shifts
156	/// the result by the number of bytes specified in the immediate operand.
157	///
158	/// \headerfile <x86intrin.h>
159	///
160	/// \code
161	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
162	/// \endcode
163	///
164	/// This intrinsic corresponds to the \c PALIGNR instruction.
165	///
166	/// \param a
167	/// A 64-bit vector of [8 x i8] containing one of the source operands.
168	/// \param b
169	/// A 64-bit vector of [8 x i8] containing one of the source operands.
170	/// \param n
171	/// An immediate operand specifying how many bytes to right-shift the result.
172	/// \returns A 64-bit integer vector containing the concatenated right-shifted
173	/// value.
174	#define _mm_alignr_pi8(a, b, n) \
175	((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
176
177	/// Horizontally adds the adjacent pairs of values contained in 2 packed
178	/// 128-bit vectors of [8 x i16].
179	///
180	/// \headerfile <x86intrin.h>
181	///
182	/// This intrinsic corresponds to the \c VPHADDW instruction.
183	///
184	/// \param __a
185	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
186	/// horizontal sums of the values are stored in the lower bits of the
187	/// destination.
188	/// \param __b
189	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
190	/// horizontal sums of the values are stored in the upper bits of the
191	/// destination.
192	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
193	/// both operands.
194	static __inline__ __m128i __DEFAULT_FN_ATTRS
195	_mm_hadd_epi16(__m128i __a, __m128i __b)
196	{
197	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
198	}
199
200	/// Horizontally adds the adjacent pairs of values contained in 2 packed
201	/// 128-bit vectors of [4 x i32].
202	///
203	/// \headerfile <x86intrin.h>
204	///
205	/// This intrinsic corresponds to the \c VPHADDD instruction.
206	///
207	/// \param __a
208	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
209	/// horizontal sums of the values are stored in the lower bits of the
210	/// destination.
211	/// \param __b
212	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
213	/// horizontal sums of the values are stored in the upper bits of the
214	/// destination.
215	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
216	/// both operands.
217	static __inline__ __m128i __DEFAULT_FN_ATTRS
218	_mm_hadd_epi32(__m128i __a, __m128i __b)
219	{
220	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
221	}
222
223	/// Horizontally adds the adjacent pairs of values contained in 2 packed
224	/// 64-bit vectors of [4 x i16].
225	///
226	/// \headerfile <x86intrin.h>
227	///
228	/// This intrinsic corresponds to the \c PHADDW instruction.
229	///
230	/// \param __a
231	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
232	/// horizontal sums of the values are stored in the lower bits of the
233	/// destination.
234	/// \param __b
235	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
236	/// horizontal sums of the values are stored in the upper bits of the
237	/// destination.
238	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
239	/// operands.
240	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
241	_mm_hadd_pi16(__m64 __a, __m64 __b)
242	{
243	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
244	}
245
246	/// Horizontally adds the adjacent pairs of values contained in 2 packed
247	/// 64-bit vectors of [2 x i32].
248	///
249	/// \headerfile <x86intrin.h>
250	///
251	/// This intrinsic corresponds to the \c PHADDD instruction.
252	///
253	/// \param __a
254	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
255	/// horizontal sums of the values are stored in the lower bits of the
256	/// destination.
257	/// \param __b
258	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
259	/// horizontal sums of the values are stored in the upper bits of the
260	/// destination.
261	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
262	/// operands.
263	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
264	_mm_hadd_pi32(__m64 __a, __m64 __b)
265	{
266	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
267	}
268
269	/// Horizontally adds the adjacent pairs of values contained in 2 packed
270	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
271	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
272	/// 0x8000.
273	///
274	/// \headerfile <x86intrin.h>
275	///
276	/// This intrinsic corresponds to the \c VPHADDSW instruction.
277	///
278	/// \param __a
279	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
280	/// horizontal sums of the values are stored in the lower bits of the
281	/// destination.
282	/// \param __b
283	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
284	/// horizontal sums of the values are stored in the upper bits of the
285	/// destination.
286	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
287	/// sums of both operands.
288	static __inline__ __m128i __DEFAULT_FN_ATTRS
289	_mm_hadds_epi16(__m128i __a, __m128i __b)
290	{
291	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
292	}
293
294	/// Horizontally adds the adjacent pairs of values contained in 2 packed
295	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
296	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
297	/// 0x8000.
298	///
299	/// \headerfile <x86intrin.h>
300	///
301	/// This intrinsic corresponds to the \c PHADDSW instruction.
302	///
303	/// \param __a
304	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
305	/// horizontal sums of the values are stored in the lower bits of the
306	/// destination.
307	/// \param __b
308	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
309	/// horizontal sums of the values are stored in the upper bits of the
310	/// destination.
311	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
312	/// sums of both operands.
313	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
314	_mm_hadds_pi16(__m64 __a, __m64 __b)
315	{
316	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
317	}
318
319	/// Horizontally subtracts the adjacent pairs of values contained in 2
320	/// packed 128-bit vectors of [8 x i16].
321	///
322	/// \headerfile <x86intrin.h>
323	///
324	/// This intrinsic corresponds to the \c VPHSUBW instruction.
325	///
326	/// \param __a
327	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
328	/// horizontal differences between the values are stored in the lower bits of
329	/// the destination.
330	/// \param __b
331	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
332	/// horizontal differences between the values are stored in the upper bits of
333	/// the destination.
334	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
335	/// of both operands.
336	static __inline__ __m128i __DEFAULT_FN_ATTRS
337	_mm_hsub_epi16(__m128i __a, __m128i __b)
338	{
339	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
340	}
341
342	/// Horizontally subtracts the adjacent pairs of values contained in 2
343	/// packed 128-bit vectors of [4 x i32].
344	///
345	/// \headerfile <x86intrin.h>
346	///
347	/// This intrinsic corresponds to the \c VPHSUBD instruction.
348	///
349	/// \param __a
350	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
351	/// horizontal differences between the values are stored in the lower bits of
352	/// the destination.
353	/// \param __b
354	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
355	/// horizontal differences between the values are stored in the upper bits of
356	/// the destination.
357	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
358	/// of both operands.
359	static __inline__ __m128i __DEFAULT_FN_ATTRS
360	_mm_hsub_epi32(__m128i __a, __m128i __b)
361	{
362	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
363	}
364
365	/// Horizontally subtracts the adjacent pairs of values contained in 2
366	/// packed 64-bit vectors of [4 x i16].
367	///
368	/// \headerfile <x86intrin.h>
369	///
370	/// This intrinsic corresponds to the \c PHSUBW instruction.
371	///
372	/// \param __a
373	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
374	/// horizontal differences between the values are stored in the lower bits of
375	/// the destination.
376	/// \param __b
377	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
378	/// horizontal differences between the values are stored in the upper bits of
379	/// the destination.
380	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
381	/// of both operands.
382	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
383	_mm_hsub_pi16(__m64 __a, __m64 __b)
384	{
385	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
386	}
387
388	/// Horizontally subtracts the adjacent pairs of values contained in 2
389	/// packed 64-bit vectors of [2 x i32].
390	///
391	/// \headerfile <x86intrin.h>
392	///
393	/// This intrinsic corresponds to the \c PHSUBD instruction.
394	///
395	/// \param __a
396	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
397	/// horizontal differences between the values are stored in the lower bits of
398	/// the destination.
399	/// \param __b
400	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
401	/// horizontal differences between the values are stored in the upper bits of
402	/// the destination.
403	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
404	/// of both operands.
405	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
406	_mm_hsub_pi32(__m64 __a, __m64 __b)
407	{
408	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
409	}
410
411	/// Horizontally subtracts the adjacent pairs of values contained in 2
412	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
413	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
414	/// saturated to 0x8000.
415	///
416	/// \headerfile <x86intrin.h>
417	///
418	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
419	///
420	/// \param __a
421	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
422	/// horizontal differences between the values are stored in the lower bits of
423	/// the destination.
424	/// \param __b
425	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
426	/// horizontal differences between the values are stored in the upper bits of
427	/// the destination.
428	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
429	/// differences of both operands.
430	static __inline__ __m128i __DEFAULT_FN_ATTRS
431	_mm_hsubs_epi16(__m128i __a, __m128i __b)
432	{
433	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
434	}
435
436	/// Horizontally subtracts the adjacent pairs of values contained in 2
437	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
438	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
439	/// saturated to 0x8000.
440	///
441	/// \headerfile <x86intrin.h>
442	///
443	/// This intrinsic corresponds to the \c PHSUBSW instruction.
444	///
445	/// \param __a
446	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
447	/// horizontal differences between the values are stored in the lower bits of
448	/// the destination.
449	/// \param __b
450	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
451	/// horizontal differences between the values are stored in the upper bits of
452	/// the destination.
453	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
454	/// differences of both operands.
455	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
456	_mm_hsubs_pi16(__m64 __a, __m64 __b)
457	{
458	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
459	}
460
461	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
462	/// values contained in the first source operand and packed 8-bit signed
463	/// integer values contained in the second source operand, adds pairs of
464	/// contiguous products with signed saturation, and writes the 16-bit sums to
465	/// the corresponding bits in the destination.
466	///
467	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
468	/// both operands are multiplied, and the sum of both results is written to
469	/// bits [15:0] of the destination.
470	///
471	/// \headerfile <x86intrin.h>
472	///
473	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
474	///
475	/// \param __a
476	/// A 128-bit integer vector containing the first source operand.
477	/// \param __b
478	/// A 128-bit integer vector containing the second source operand.
479	/// \returns A 128-bit integer vector containing the sums of products of both
480	/// operands: \n
481	/// \a R0 := (\a __a0 \a __b0) + (\a __a1 * \a __b1) \n*
482	/// \a R1 := (\a __a2 \a __b2) + (\a __a3 * \a __b3) \n*
483	/// \a R2 := (\a __a4 \a __b4) + (\a __a5 * \a __b5) \n*
484	/// \a R3 := (\a __a6 \a __b6) + (\a __a7 * \a __b7) \n*
485	/// \a R4 := (\a __a8 \a __b8) + (\a __a9 * \a __b9) \n*
486	/// \a R5 := (\a __a10 \a __b10) + (\a __a11 * \a __b11) \n*
487	/// \a R6 := (\a __a12 \a __b12) + (\a __a13 * \a __b13) \n*
488	/// \a R7 := (\a __a14 \a __b14) + (\a __a15 * \a __b15)*
489	static __inline__ __m128i __DEFAULT_FN_ATTRS
490	_mm_maddubs_epi16(__m128i __a, __m128i __b)
491	{
492	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
493	}
494
495	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
496	/// values contained in the first source operand and packed 8-bit signed
497	/// integer values contained in the second source operand, adds pairs of
498	/// contiguous products with signed saturation, and writes the 16-bit sums to
499	/// the corresponding bits in the destination.
500	///
501	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
502	/// both operands are multiplied, and the sum of both results is written to
503	/// bits [15:0] of the destination.
504	///
505	/// \headerfile <x86intrin.h>
506	///
507	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
508	///
509	/// \param __a
510	/// A 64-bit integer vector containing the first source operand.
511	/// \param __b
512	/// A 64-bit integer vector containing the second source operand.
513	/// \returns A 64-bit integer vector containing the sums of products of both
514	/// operands: \n
515	/// \a R0 := (\a __a0 \a __b0) + (\a __a1 * \a __b1) \n*
516	/// \a R1 := (\a __a2 \a __b2) + (\a __a3 * \a __b3) \n*
517	/// \a R2 := (\a __a4 \a __b4) + (\a __a5 * \a __b5) \n*
518	/// \a R3 := (\a __a6 \a __b6) + (\a __a7 * \a __b7)*
519	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
520	_mm_maddubs_pi16(__m64 __a, __m64 __b)
521	{
522	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
523	}
524
525	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
526	/// products to the 18 most significant bits by right-shifting, rounds the
527	/// truncated value by adding 1, and writes bits [16:1] to the destination.
528	///
529	/// \headerfile <x86intrin.h>
530	///
531	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
532	///
533	/// \param __a
534	/// A 128-bit vector of [8 x i16] containing one of the source operands.
535	/// \param __b
536	/// A 128-bit vector of [8 x i16] containing one of the source operands.
537	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
538	/// products of both operands.
539	static __inline__ __m128i __DEFAULT_FN_ATTRS
540	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
541	{
542	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
543	}
544
545	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546	/// products to the 18 most significant bits by right-shifting, rounds the
547	/// truncated value by adding 1, and writes bits [16:1] to the destination.
548	///
549	/// \headerfile <x86intrin.h>
550	///
551	/// This intrinsic corresponds to the \c PMULHRSW instruction.
552	///
553	/// \param __a
554	/// A 64-bit vector of [4 x i16] containing one of the source operands.
555	/// \param __b
556	/// A 64-bit vector of [4 x i16] containing one of the source operands.
557	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
558	/// products of both operands.
559	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
560	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
561	{
562	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
563	}
564
565	/// Copies the 8-bit integers from a 128-bit integer vector to the
566	/// destination or clears 8-bit values in the destination, as specified by
567	/// the second source operand.
568	///
569	/// \headerfile <x86intrin.h>
570	///
571	/// This intrinsic corresponds to the \c VPSHUFB instruction.
572	///
573	/// \param __a
574	/// A 128-bit integer vector containing the values to be copied.
575	/// \param __b
576	/// A 128-bit integer vector containing control bytes corresponding to
577	/// positions in the destination:
578	/// Bit 7: \n
579	/// 1: Clear the corresponding byte in the destination. \n
580	/// 0: Copy the selected source byte to the corresponding byte in the
581	/// destination. \n
582	/// Bits [6:4] Reserved. \n
583	/// Bits [3:0] select the source byte to be copied.
584	/// \returns A 128-bit integer vector containing the copied or cleared values.
585	static __inline__ __m128i __DEFAULT_FN_ATTRS
586	_mm_shuffle_epi8(__m128i __a, __m128i __b)
587	{
588	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
589	}
590
591	/// Copies the 8-bit integers from a 64-bit integer vector to the
592	/// destination or clears 8-bit values in the destination, as specified by
593	/// the second source operand.
594	///
595	/// \headerfile <x86intrin.h>
596	///
597	/// This intrinsic corresponds to the \c PSHUFB instruction.
598	///
599	/// \param __a
600	/// A 64-bit integer vector containing the values to be copied.
601	/// \param __b
602	/// A 64-bit integer vector containing control bytes corresponding to
603	/// positions in the destination:
604	/// Bit 7: \n
605	/// 1: Clear the corresponding byte in the destination. \n
606	/// 0: Copy the selected source byte to the corresponding byte in the
607	/// destination. \n
608	/// Bits [3:0] select the source byte to be copied.
609	/// \returns A 64-bit integer vector containing the copied or cleared values.
610	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
611	_mm_shuffle_pi8(__m64 __a, __m64 __b)
612	{
613	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
614	}
615
616	/// For each 8-bit integer in the first source operand, perform one of
617	/// the following actions as specified by the second source operand.
618	///
619	/// If the byte in the second source is negative, calculate the two's
620	/// complement of the corresponding byte in the first source, and write that
621	/// value to the destination. If the byte in the second source is positive,
622	/// copy the corresponding byte from the first source to the destination. If
623	/// the byte in the second source is zero, clear the corresponding byte in
624	/// the destination.
625	///
626	/// \headerfile <x86intrin.h>
627	///
628	/// This intrinsic corresponds to the \c VPSIGNB instruction.
629	///
630	/// \param __a
631	/// A 128-bit integer vector containing the values to be copied.
632	/// \param __b
633	/// A 128-bit integer vector containing control bytes corresponding to
634	/// positions in the destination.
635	/// \returns A 128-bit integer vector containing the resultant values.
636	static __inline__ __m128i __DEFAULT_FN_ATTRS
637	_mm_sign_epi8(__m128i __a, __m128i __b)
638	{
639	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
640	}
641
642	/// For each 16-bit integer in the first source operand, perform one of
643	/// the following actions as specified by the second source operand.
644	///
645	/// If the word in the second source is negative, calculate the two's
646	/// complement of the corresponding word in the first source, and write that
647	/// value to the destination. If the word in the second source is positive,
648	/// copy the corresponding word from the first source to the destination. If
649	/// the word in the second source is zero, clear the corresponding word in
650	/// the destination.
651	///
652	/// \headerfile <x86intrin.h>
653	///
654	/// This intrinsic corresponds to the \c VPSIGNW instruction.
655	///
656	/// \param __a
657	/// A 128-bit integer vector containing the values to be copied.
658	/// \param __b
659	/// A 128-bit integer vector containing control words corresponding to
660	/// positions in the destination.
661	/// \returns A 128-bit integer vector containing the resultant values.
662	static __inline__ __m128i __DEFAULT_FN_ATTRS
663	_mm_sign_epi16(__m128i __a, __m128i __b)
664	{
665	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
666	}
667
668	/// For each 32-bit integer in the first source operand, perform one of
669	/// the following actions as specified by the second source operand.
670	///
671	/// If the doubleword in the second source is negative, calculate the two's
672	/// complement of the corresponding word in the first source, and write that
673	/// value to the destination. If the doubleword in the second source is
674	/// positive, copy the corresponding word from the first source to the
675	/// destination. If the doubleword in the second source is zero, clear the
676	/// corresponding word in the destination.
677	///
678	/// \headerfile <x86intrin.h>
679	///
680	/// This intrinsic corresponds to the \c VPSIGND instruction.
681	///
682	/// \param __a
683	/// A 128-bit integer vector containing the values to be copied.
684	/// \param __b
685	/// A 128-bit integer vector containing control doublewords corresponding to
686	/// positions in the destination.
687	/// \returns A 128-bit integer vector containing the resultant values.
688	static __inline__ __m128i __DEFAULT_FN_ATTRS
689	_mm_sign_epi32(__m128i __a, __m128i __b)
690	{
691	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
692	}
693
694	/// For each 8-bit integer in the first source operand, perform one of
695	/// the following actions as specified by the second source operand.
696	///
697	/// If the byte in the second source is negative, calculate the two's
698	/// complement of the corresponding byte in the first source, and write that
699	/// value to the destination. If the byte in the second source is positive,
700	/// copy the corresponding byte from the first source to the destination. If
701	/// the byte in the second source is zero, clear the corresponding byte in
702	/// the destination.
703	///
704	/// \headerfile <x86intrin.h>
705	///
706	/// This intrinsic corresponds to the \c PSIGNB instruction.
707	///
708	/// \param __a
709	/// A 64-bit integer vector containing the values to be copied.
710	/// \param __b
711	/// A 64-bit integer vector containing control bytes corresponding to
712	/// positions in the destination.
713	/// \returns A 64-bit integer vector containing the resultant values.
714	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
715	_mm_sign_pi8(__m64 __a, __m64 __b)
716	{
717	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
718	}
719
720	/// For each 16-bit integer in the first source operand, perform one of
721	/// the following actions as specified by the second source operand.
722	///
723	/// If the word in the second source is negative, calculate the two's
724	/// complement of the corresponding word in the first source, and write that
725	/// value to the destination. If the word in the second source is positive,
726	/// copy the corresponding word from the first source to the destination. If
727	/// the word in the second source is zero, clear the corresponding word in
728	/// the destination.
729	///
730	/// \headerfile <x86intrin.h>
731	///
732	/// This intrinsic corresponds to the \c PSIGNW instruction.
733	///
734	/// \param __a
735	/// A 64-bit integer vector containing the values to be copied.
736	/// \param __b
737	/// A 64-bit integer vector containing control words corresponding to
738	/// positions in the destination.
739	/// \returns A 64-bit integer vector containing the resultant values.
740	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
741	_mm_sign_pi16(__m64 __a, __m64 __b)
742	{
743	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744	}
745
746	/// For each 32-bit integer in the first source operand, perform one of
747	/// the following actions as specified by the second source operand.
748	///
749	/// If the doubleword in the second source is negative, calculate the two's
750	/// complement of the corresponding doubleword in the first source, and
751	/// write that value to the destination. If the doubleword in the second
752	/// source is positive, copy the corresponding doubleword from the first
753	/// source to the destination. If the doubleword in the second source is
754	/// zero, clear the corresponding doubleword in the destination.
755	///
756	/// \headerfile <x86intrin.h>
757	///
758	/// This intrinsic corresponds to the \c PSIGND instruction.
759	///
760	/// \param __a
761	/// A 64-bit integer vector containing the values to be copied.
762	/// \param __b
763	/// A 64-bit integer vector containing two control doublewords corresponding
764	/// to positions in the destination.
765	/// \returns A 64-bit integer vector containing the resultant values.
766	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
767	_mm_sign_pi32(__m64 __a, __m64 __b)
768	{
769	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
770	}
771
772	#undef __DEFAULT_FN_ATTRS
773	#undef __DEFAULT_FN_ATTRS_MMX
774
775	#endif /* __TMMINTRIN_H */
776

source code of flutter_engine/buildtools/linux-x64/clang/lib/clang/17/include/tmmintrin.h