1 | /*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== |
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef _MM3DNOW_H_INCLUDED |
11 | #define _MM3DNOW_H_INCLUDED |
12 | |
13 | #include <mmintrin.h> |
14 | #include <prfchwintrin.h> |
15 | |
16 | typedef float __v2sf __attribute__((__vector_size__(8))); |
17 | |
18 | /* Define the default attributes for the functions in this file. */ |
19 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) |
20 | |
21 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow" ))) |
22 | _m_femms(void) { |
23 | __builtin_ia32_femms(); |
24 | } |
25 | |
26 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
27 | _m_pavgusb(__m64 __m1, __m64 __m2) { |
28 | return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); |
29 | } |
30 | |
31 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
32 | _m_pf2id(__m64 __m) { |
33 | return (__m64)__builtin_ia32_pf2id((__v2sf)__m); |
34 | } |
35 | |
36 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
37 | _m_pfacc(__m64 __m1, __m64 __m2) { |
38 | return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); |
39 | } |
40 | |
41 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
42 | _m_pfadd(__m64 __m1, __m64 __m2) { |
43 | return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); |
44 | } |
45 | |
46 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
47 | _m_pfcmpeq(__m64 __m1, __m64 __m2) { |
48 | return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); |
49 | } |
50 | |
51 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
52 | _m_pfcmpge(__m64 __m1, __m64 __m2) { |
53 | return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); |
54 | } |
55 | |
56 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
57 | _m_pfcmpgt(__m64 __m1, __m64 __m2) { |
58 | return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); |
59 | } |
60 | |
61 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
62 | _m_pfmax(__m64 __m1, __m64 __m2) { |
63 | return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); |
64 | } |
65 | |
66 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
67 | _m_pfmin(__m64 __m1, __m64 __m2) { |
68 | return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); |
69 | } |
70 | |
71 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
72 | _m_pfmul(__m64 __m1, __m64 __m2) { |
73 | return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); |
74 | } |
75 | |
76 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
77 | _m_pfrcp(__m64 __m) { |
78 | return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); |
79 | } |
80 | |
81 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
82 | _m_pfrcpit1(__m64 __m1, __m64 __m2) { |
83 | return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); |
84 | } |
85 | |
86 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
87 | _m_pfrcpit2(__m64 __m1, __m64 __m2) { |
88 | return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); |
89 | } |
90 | |
91 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
92 | _m_pfrsqrt(__m64 __m) { |
93 | return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); |
94 | } |
95 | |
96 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
97 | _m_pfrsqrtit1(__m64 __m1, __m64 __m2) { |
98 | return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); |
99 | } |
100 | |
101 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
102 | _m_pfsub(__m64 __m1, __m64 __m2) { |
103 | return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); |
104 | } |
105 | |
106 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
107 | _m_pfsubr(__m64 __m1, __m64 __m2) { |
108 | return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); |
109 | } |
110 | |
111 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
112 | _m_pi2fd(__m64 __m) { |
113 | return (__m64)__builtin_ia32_pi2fd((__v2si)__m); |
114 | } |
115 | |
116 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
117 | _m_pmulhrw(__m64 __m1, __m64 __m2) { |
118 | return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); |
119 | } |
120 | |
121 | /* Handle the 3dnowa instructions here. */ |
122 | #undef __DEFAULT_FN_ATTRS |
123 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) |
124 | |
125 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
126 | _m_pf2iw(__m64 __m) { |
127 | return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); |
128 | } |
129 | |
130 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
131 | _m_pfnacc(__m64 __m1, __m64 __m2) { |
132 | return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); |
133 | } |
134 | |
135 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
136 | _m_pfpnacc(__m64 __m1, __m64 __m2) { |
137 | return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); |
138 | } |
139 | |
140 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
141 | _m_pi2fw(__m64 __m) { |
142 | return (__m64)__builtin_ia32_pi2fw((__v2si)__m); |
143 | } |
144 | |
145 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
146 | _m_pswapdsf(__m64 __m) { |
147 | return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); |
148 | } |
149 | |
150 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
151 | _m_pswapdsi(__m64 __m) { |
152 | return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); |
153 | } |
154 | |
155 | #undef __DEFAULT_FN_ATTRS |
156 | |
157 | #endif |
158 | |