1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
342static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
355 else
356 return CGF.Builder.CreateCall(Callee: F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasLegalHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
366 case NeonTypeFlags::Int8:
367 case NeonTypeFlags::Poly8:
368 case NeonTypeFlags::MFloat8:
369 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
370 case NeonTypeFlags::Int16:
371 case NeonTypeFlags::Poly16:
372 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
373 case NeonTypeFlags::BFloat16:
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
378 case NeonTypeFlags::Float16:
379 if (HasLegalHalfType)
380 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
383 case NeonTypeFlags::Int32:
384 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
385 case NeonTypeFlags::Int64:
386 case NeonTypeFlags::Poly64:
387 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
388 case NeonTypeFlags::Poly128:
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
393 case NeonTypeFlags::Float32:
394 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
395 case NeonTypeFlags::Float64:
396 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
405 case NeonTypeFlags::Int16:
406 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
407 case NeonTypeFlags::Int32:
408 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
409 case NeonTypeFlags::Int64:
410 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
416Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
419 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
420}
421
422Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
423 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, Count: EC);
425}
426
427Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
444 else
445 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
446}
447
448Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
449 ArrayRef<llvm::Type *> Tys,
450 SmallVectorImpl<Value *> &Ops,
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, Idx: E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(F: CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
458llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
468 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
469 Idx: uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
474llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
480 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
481 Idx: uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, Tys: {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount)},
486 Ops, E, name);
487}
488
489Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
490 bool neg) {
491 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
492 return ConstantInt::get(Ty, V: neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
497 SmallVectorImpl<llvm::Value *> &Ops,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
505 Ops[0] = Builder.CreateExtractVector(DstType: Tys[1], SrcVec: Ops[0], Idx: uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
511Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(Ty: VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
536 if (usgn)
537 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
538 else
539 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
547 VectorizeRetType = (1 << 3),
548 VectorizeArgTypes = (1 << 4),
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
554 Use128BitVectors = (1 << 8),
555
556 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
557 VectorRet = AddRetType | VectorizeRetType,
558 VectorRetGetArgs01 =
559 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
560 FpCmpzModifiers =
561 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
570 uint64_t TypeModifier;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595 NEONMAP0(splat_lane_v),
596 NEONMAP0(splat_laneq_v),
597 NEONMAP0(splatq_lane_v),
598 NEONMAP0(splatq_laneq_v),
599 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
600 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP1(vabs_v, arm_neon_vabs, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
603 NEONMAP0(vadd_v),
604 NEONMAP0(vaddhn_v),
605 NEONMAP0(vaddq_v),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
615 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
616 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
635 NEONMAP0(vceqz_v),
636 NEONMAP0(vceqzq_v),
637 NEONMAP0(vcgez_v),
638 NEONMAP0(vcgezq_v),
639 NEONMAP0(vcgtz_v),
640 NEONMAP0(vcgtzq_v),
641 NEONMAP0(vclez_v),
642 NEONMAP0(vclezq_v),
643 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
644 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP0(vcltz_v),
646 NEONMAP0(vcltzq_v),
647 NEONMAP1(vclz_v, ctlz, Add1ArgType),
648 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
649 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
650 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
652 NEONMAP0(vcvt_f16_s16),
653 NEONMAP0(vcvt_f16_u16),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
655 NEONMAP0(vcvt_f32_v),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP0(vcvt_s16_f16),
666 NEONMAP0(vcvt_s32_v),
667 NEONMAP0(vcvt_s64_v),
668 NEONMAP0(vcvt_u16_f16),
669 NEONMAP0(vcvt_u32_v),
670 NEONMAP0(vcvt_u64_v),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
720 NEONMAP0(vcvtq_f16_s16),
721 NEONMAP0(vcvtq_f16_u16),
722 NEONMAP0(vcvtq_f32_v),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP0(vcvtq_s16_f16),
733 NEONMAP0(vcvtq_s32_v),
734 NEONMAP0(vcvtq_s64_v),
735 NEONMAP0(vcvtq_u16_f16),
736 NEONMAP0(vcvtq_u32_v),
737 NEONMAP0(vcvtq_u64_v),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
742 NEONMAP0(vext_v),
743 NEONMAP0(vextq_v),
744 NEONMAP0(vfma_v),
745 NEONMAP0(vfmaq_v),
746 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP0(vld1_dup_v),
751 NEONMAP1(vld1_v, arm_neon_vld1, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
755 NEONMAP0(vld1q_dup_v),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
762 NEONMAP1(vld2_v, arm_neon_vld2, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
768 NEONMAP1(vld3_v, arm_neon_vld3, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
774 NEONMAP1(vld4_v, arm_neon_vld4, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
778 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
780 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
782 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
783 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
784 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
788 NEONMAP0(vmovl_v),
789 NEONMAP0(vmovn_v),
790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
791 NEONMAP0(vmull_v),
792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
815 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
823 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
825 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
827 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
828 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
829 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
838 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
842 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
843 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
844 NEONMAP0(vrndi_v),
845 NEONMAP0(vrndiq_v),
846 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
847 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
848 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
849 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
850 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
851 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
852 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
853 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
854 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
855 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
856 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
858 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
862 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
870 NEONMAP0(vshl_n_v),
871 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
872 NEONMAP0(vshll_n_v),
873 NEONMAP0(vshlq_n_v),
874 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
875 NEONMAP0(vshr_n_v),
876 NEONMAP0(vshrn_n_v),
877 NEONMAP0(vshrq_n_v),
878 NEONMAP1(vst1_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
887 NEONMAP1(vst2_v, arm_neon_vst2, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
891 NEONMAP1(vst3_v, arm_neon_vst3, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
895 NEONMAP1(vst4_v, arm_neon_vst4, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
898 NEONMAP0(vsubhn_v),
899 NEONMAP0(vtrn_v),
900 NEONMAP0(vtrnq_v),
901 NEONMAP0(vtst_v),
902 NEONMAP0(vtstq_v),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
906 NEONMAP0(vuzp_v),
907 NEONMAP0(vuzpq_v),
908 NEONMAP0(vzip_v),
909 NEONMAP0(vzipq_v)
910};
911
912static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
913 NEONMAP0(splat_lane_v),
914 NEONMAP0(splat_laneq_v),
915 NEONMAP0(splatq_lane_v),
916 NEONMAP0(splatq_laneq_v),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
919 NEONMAP0(vadd_v),
920 NEONMAP0(vaddhn_v),
921 NEONMAP0(vaddq_p128),
922 NEONMAP0(vaddq_v),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
927 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
940 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
958 NEONMAP0(vceqz_v),
959 NEONMAP0(vceqzq_v),
960 NEONMAP0(vcgez_v),
961 NEONMAP0(vcgezq_v),
962 NEONMAP0(vcgtz_v),
963 NEONMAP0(vcgtzq_v),
964 NEONMAP0(vclez_v),
965 NEONMAP0(vclezq_v),
966 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
967 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP0(vcltz_v),
969 NEONMAP0(vcltzq_v),
970 NEONMAP1(vclz_v, ctlz, Add1ArgType),
971 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
972 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
973 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
981 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
993 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
995 NEONMAP0(vcvt_f16_s16),
996 NEONMAP0(vcvt_f16_u16),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
998 NEONMAP0(vcvt_f32_v),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP0(vcvtq_f16_s16),
1010 NEONMAP0(vcvtq_f16_u16),
1011 NEONMAP0(vcvtq_f32_v),
1012 NEONMAP0(vcvtq_high_bf16_f32),
1013 NEONMAP0(vcvtq_low_bf16_f32),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1029 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP0(vext_v),
1038 NEONMAP0(vextq_v),
1039 NEONMAP0(vfma_v),
1040 NEONMAP0(vfmaq_v),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1049 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1050 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1061 NEONMAP0(vmovl_v),
1062 NEONMAP0(vmovn_v),
1063 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1064 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1066 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1067 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1069 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1070 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1072 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1077 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1080 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1081 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1082 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1083 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1084 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1085 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1096 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1099 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1100 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1101 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1103 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1104 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1105 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1109 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1115 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1117 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1119 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1123 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1127 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1131 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP0(vrndi_v),
1135 NEONMAP0(vrndiq_v),
1136 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1137 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1139 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1143 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1155 NEONMAP0(vshl_n_v),
1156 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1157 NEONMAP0(vshll_n_v),
1158 NEONMAP0(vshlq_n_v),
1159 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1160 NEONMAP0(vshr_n_v),
1161 NEONMAP0(vshrn_n_v),
1162 NEONMAP0(vshrq_n_v),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1178 NEONMAP0(vsubhn_v),
1179 NEONMAP0(vtst_v),
1180 NEONMAP0(vtstq_v),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1185};
1186
1187static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1188 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1189 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1191 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1192 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1198 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1199 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1205 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1209 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1211 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1222 NEONMAP0(vcvth_bf16_f32),
1223 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1242 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1243 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1244 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1245 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1248 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1249 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1252 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1253 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1254 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1255 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1258 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1259 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1263 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1264 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1267 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1268 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1269 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1270 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1271 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1274 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1275 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1276 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1277 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1278 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1279 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1280 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1281 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1282 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1283 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1284 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1285 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1286 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1287 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1288 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1289 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1291 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1292 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1293 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1294 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1295 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1296 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1297 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1298 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1299 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1300 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1301 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1302 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1303 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1304 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1305 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1306 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1307 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1308 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1309 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1310 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1311 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1312 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1313 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1314 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1316 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1317 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1318 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1319 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1320 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1321 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1322 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1323 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1324 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1325 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1326 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1327 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1328 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1331 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1332 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1333 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1334 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1336 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1338 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1339 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1340 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1341 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1344 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1345 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1346 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1347 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1348 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1349 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1350 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1351 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1352 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1355 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1356 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1357 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1358 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1359 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1360 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1361 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1362 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1363 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1364 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1365 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1366 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1367 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1368 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1369 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1370 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1375 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1376 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1377 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1378 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1379 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1381 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1383 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1384 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1385 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1386 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1387 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1389 // FP16 scalar intrinisics go here.
1390 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1391 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1392 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1393 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1394 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1395 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1396 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1397 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1398 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1399 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1400 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1401 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1402 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1403 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1404 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1405 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1406 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1407 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1408 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1409 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1410 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1411 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1412 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1413 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1414 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1415 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1416 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1417 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1418 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1419 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1420 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1422 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1423 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1424};
1425
1426// Some intrinsics are equivalent for codegen.
1427static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1556 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1557 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1558 // arbitrary one to be handled as tha canonical variation.
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1571};
1572
1573#undef NEONMAP0
1574#undef NEONMAP1
1575#undef NEONMAP2
1576
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1578 { \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1580 TypeModifier \
1581 }
1582
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1585static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1590};
1591
1592#undef SVEMAP1
1593#undef SVEMAP2
1594
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1596 { \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1598 TypeModifier \
1599 }
1600
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1603static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1607};
1608
1609#undef SMEMAP1
1610#undef SMEMAP2
1611
1612static bool NEONSIMDIntrinsicsProvenSorted = false;
1613
1614static bool AArch64SIMDIntrinsicsProvenSorted = false;
1615static bool AArch64SISDIntrinsicsProvenSorted = false;
1616static bool AArch64SVEIntrinsicsProvenSorted = false;
1617static bool AArch64SMEIntrinsicsProvenSorted = false;
1618
1619static const ARMVectorIntrinsicInfo *
1620findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
1621 unsigned BuiltinID, bool &MapProvenSorted) {
1622
1623#ifndef NDEBUG
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted = true;
1627 }
1628#endif
1629
1630 const ARMVectorIntrinsicInfo *Builtin =
1631 llvm::lower_bound(Range&: IntrinsicMap, Value&: BuiltinID);
1632
1633 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1634 return Builtin;
1635
1636 return nullptr;
1637}
1638
1639Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
1640 unsigned Modifier,
1641 llvm::Type *ArgType,
1642 const CallExpr *E) {
1643 int VectorSize = 0;
1644 if (Modifier & Use64BitVectors)
1645 VectorSize = 64;
1646 else if (Modifier & Use128BitVectors)
1647 VectorSize = 128;
1648
1649 // Return type.
1650 SmallVector<llvm::Type *, 3> Tys;
1651 if (Modifier & AddRetType) {
1652 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
1653 if (Modifier & VectorizeRetType)
1654 Ty = llvm::FixedVectorType::get(
1655 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1656
1657 Tys.push_back(Elt: Ty);
1658 }
1659
1660 // Arguments.
1661 if (Modifier & VectorizeArgTypes) {
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
1664 }
1665
1666 if (Modifier & (Add1ArgType | Add2ArgTypes))
1667 Tys.push_back(Elt: ArgType);
1668
1669 if (Modifier & Add2ArgTypes)
1670 Tys.push_back(Elt: ArgType);
1671
1672 if (Modifier & InventFloatType)
1673 Tys.push_back(Elt: FloatTy);
1674
1675 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
1676}
1677
1678static Value *EmitCommonNeonSISDBuiltinExpr(
1679 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1680 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *s = SISDInfo.NameHint;
1685
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1699 // Only one direction of comparisons actually exist, cmle is actually a cmge
1700 // with swapped operands. The table gives us the right intrinsic but we
1701 // still need to do the swap.
1702 std::swap(a&: Ops[0], b&: Ops[1]);
1703 break;
1704 }
1705
1706 assert(Int && "Generic code assumes a valid intrinsic");
1707
1708 // Determine the type(s) of this overloaded AArch64 intrinsic.
1709 const Expr *Arg = E->getArg(Arg: 0);
1710 llvm::Type *ArgTy = CGF.ConvertType(T: Arg->getType());
1711 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: ArgTy, E);
1712
1713 int j = 0;
1714 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1720 continue;
1721
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1723 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1724 // it before inserting.
1725 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1726 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
1727 Ops[j] =
1728 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
1729 }
1730
1731 Value *Result = CGF.EmitNeonCall(F, Ops, name: s);
1732 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1735 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
1736
1737 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: s);
1738}
1739
1740Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
1741 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1742 const char *NameHint, unsigned Modifier, const CallExpr *E,
1743 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1744 llvm::Triple::ArchType Arch) {
1745 // Get the last argument, which specifies the vector type.
1746 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1748 Arg->getIntegerConstantExpr(Ctx: getContext());
1749 if (!NeonTypeConst)
1750 return nullptr;
1751
1752 // Determine the type of this overloaded NEON intrinsic.
1753 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1754 const bool Usgn = Type.isUnsigned();
1755 const bool Quad = Type.isQuad();
1756 const bool Floating = Type.isFloatingPoint();
1757 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
1758 const bool AllowBFloatArgsAndRet =
1759 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1760
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType, V1Ty: false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1764 if (!Ty)
1765 return nullptr;
1766
1767 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1768 return Builder.getInt32(C: addr.getAlignment().getQuantity());
1769 };
1770
1771 unsigned Int = LLVMIntrinsic;
1772 if ((Modifier & UnsignedAlts) && !Usgn)
1773 Int = AltLLVMIntrinsic;
1774
1775 switch (BuiltinID) {
1776 default: break;
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(RHS: 2);
1786
1787 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1788 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
1789 }
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1792 // We don't allow fp/int overloading of intrinsics.
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1796 break;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1801 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
1805 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1806 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
1807 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
1808 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1809 }
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1813
1814 // %sum = add <4 x i32> %lhs, %rhs
1815 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1816 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1817 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
1818
1819 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1820 Constant *ShiftAmt =
1821 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
1823
1824 // %res = trunc <4 x i32> %high to <4 x i16>
1825 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
1826 }
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(a&: Ops[0], b&: Ops[1]);
1832 [[fallthrough]];
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1837 llvm::Type *Ty;
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable("unexpected type");
1840 case 32:
1841 Ty = FloatTy;
1842 break;
1843 case 64:
1844 Ty = DoubleTy;
1845 break;
1846 case 16:
1847 Ty = HalfTy;
1848 break;
1849 }
1850 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1853 return EmitNeonCall(F, Ops, name: NameHint);
1854 }
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1857 return EmitAArch64CompareBuiltinExpr(
1858 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, Name: "vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1861 return EmitAArch64CompareBuiltinExpr(
1862 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1863 Name: "vcgez");
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1866 return EmitAArch64CompareBuiltinExpr(
1867 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1868 Name: "vclez");
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1871 return EmitAArch64CompareBuiltinExpr(
1872 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1873 Name: "vcgtz");
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1876 return EmitAArch64CompareBuiltinExpr(
1877 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1878 Name: "vcltz");
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1881 // We generate target-independent intrinsic, which needs a second argument
1882 // for whether or not clz of zero is undefined; on ARM it isn't.
1883 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
1884 break;
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1888 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1889 HasLegalHalfType);
1890 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1891 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1897 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1898 HasLegalHalfType);
1899 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1900 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1905 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1906 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1907 return EmitNeonCall(F, Ops, name: "vcvt_n");
1908 }
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1913 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1915 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1916 return EmitNeonCall(F, Ops, name: "vcvt_n");
1917 }
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1931 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1932 return EmitNeonCall(F, Ops, name: "vcvt_n");
1933 }
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1946 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
1947 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
1948 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
1949 }
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1998 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1999 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2000 }
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2004
2005 }
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
2009 SmallVector<int, 16> Indices;
2010 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(Elt: i+CV);
2012
2013 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2014 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2015 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
2016 }
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2020 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2021 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2022
2023 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2024 return emitCallMaybeConstrainedFPBuiltin(
2025 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2027 }
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2030 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2031 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
2033 }
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2040 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2041 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2042 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2044 }
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2057 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2058 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
2061 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2062 }
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *V = PoisonValue::get(T: Ty);
2066 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
2067 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
2069 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
2070 return EmitNeonSplat(V: Ops[0], C: CI);
2071 }
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2078 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2079 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2080 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
2082 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
2083 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
2084 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
2090 if (Usgn)
2091 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2092 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2093 }
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
2098 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
2099 }
2100 case NEON::BI__builtin_neon_vmull_v:
2101 // FIXME: the integer vmull operations could be emitted in terms of pure
2102 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2103 // hoisting the exts outside loops. Until global ISel comes along that can
2104 // see through such movement this leads to bad CodeGen. So we need an
2105 // intrinsic for now.
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2108 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2111 // The source operand type has twice as many elements of half the size.
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2113 llvm::Type *EltTy =
2114 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2115 auto *NarrowTy =
2116 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2118 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2119 }
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2122 // The source operand type has twice as many elements of half the size.
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2125 auto *NarrowTy =
2126 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2128 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
2129 }
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2132 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2133 Ops[1] =
2134 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
2135 Ops.resize(N: 2);
2136 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
2137 }
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
2146 NumElts: RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2148 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2149 /*isQuad*/ false))};
2150 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2151 }
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2157 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2158 /*isQuad*/ true))};
2159 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2163 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
2164 shift: 1, rightshift: false);
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2167 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
2168 shift: 1, rightshift: false);
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2174 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int = Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2180 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2183 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
2184 shift: 1, rightshift: true);
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2189 Function *F = CGM.getIntrinsic(IID: Int);
2190 return EmitNeonCall(F, Ops, name: "");
2191 }
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2194 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
2195 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
2196 Name: "vshl_n");
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2201 if (Usgn)
2202 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
2203 else
2204 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
2205 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
2206 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
2207 }
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2212 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
2213 if (Usgn)
2214 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
2215 else
2216 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
2217 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
2218 }
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2221 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2236 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2237 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2238 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
2239 }
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2245 Function *F = CGM.getIntrinsic(IID: Int);
2246 return EmitNeonCall(F, Ops, name: "");
2247 }
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2252 Function *F = CGM.getIntrinsic(IID: Int);
2253 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
2254 return EmitNeonCall(F, Ops, name: "");
2255 }
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2262 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2263 // in AArch64 it comes last. We may want to stick to one or another.
2264 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2266 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2267 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
2268 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2269 }
2270 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
2271 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2272 }
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276
2277 // %sum = add <4 x i32> %lhs, %rhs
2278 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2279 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
2280 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
2281
2282 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2283 Constant *ShiftAmt =
2284 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
2286
2287 // %res = trunc <4 x i32> %high to <4 x i16>
2288 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
2289 }
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2293 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2294 Value *SV = nullptr;
2295
2296 for (unsigned vi = 0; vi != 2; ++vi) {
2297 SmallVector<int, 16> Indices;
2298 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(Elt: i+vi);
2300 Indices.push_back(Elt: i+e+vi);
2301 }
2302 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2303 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
2304 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2305 }
2306 return SV;
2307 }
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2311 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2312 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
2313 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
2314 RHS: ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
2316 }
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2320 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2321 Value *SV = nullptr;
2322
2323 for (unsigned vi = 0; vi != 2; ++vi) {
2324 SmallVector<int, 16> Indices;
2325 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(Elt: 2*i+vi);
2327
2328 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2329 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
2330 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2331 }
2332 return SV;
2333 }
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2335 Function *F = CGM.getIntrinsic(IID: Int);
2336 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
2337 return EmitNeonCall(F, Ops, name: "");
2338 }
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2342 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2343 Value *SV = nullptr;
2344
2345 for (unsigned vi = 0; vi != 2; ++vi) {
2346 SmallVector<int, 16> Indices;
2347 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back(Elt: (i + vi*e) >> 1);
2349 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
2350 }
2351 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2352 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
2353 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2354 }
2355 return SV;
2356 }
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2361 auto *InputTy =
2362 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2364 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
2365 }
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2368 auto *InputTy =
2369 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2371 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
2372 }
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2375 auto *InputTy =
2376 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2378 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
2379 }
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2382 auto *InputTy =
2383 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2385 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
2386 }
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2389 auto *InputTy =
2390 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2392 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
2393 }
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2396 auto *InputTy =
2397 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
2400 }
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
2406 }
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
2413 }
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
2420 }
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2423 Function *F = CGM.getIntrinsic(IID: Int, Tys);
2424 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
2425 }
2426
2427 }
2428
2429 assert(Int && "Expected valid intrinsic number");
2430
2431 // Determine the type(s) of this overloaded AArch64 intrinsic.
2432 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
2433
2434 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
2435 llvm::Type *ResultType = ConvertType(E->getType());
2436 // AArch64 intrinsic one-element vector type cast to
2437 // scalar type expected by the builtin
2438 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
2439}
2440
2441Value *
2442CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2445
2446 if (isa<FixedVectorType>(Val: Ty)) {
2447 // Vector types are cast to i8 vectors. Recover original type.
2448 Op = Builder.CreateBitCast(V: Op, DestTy: Ty);
2449 }
2450
2451 if (CmpInst::isFPPredicate(P: Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op = Builder.CreateFCmp(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2454 else
2455 Op = Builder.CreateFCmpS(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2456 } else {
2457 Op = Builder.CreateICmp(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2458 }
2459
2460 llvm::Type *ResTy = Ty;
2461 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
2462 ResTy = FixedVectorType::get(
2463 ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: VTy->getScalarSizeInBits()),
2464 NumElts: VTy->getNumElements());
2465
2466 return Builder.CreateSExt(V: Op, DestTy: ResTy, Name);
2467}
2468
2469static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
2470 Value *ExtOp, Value *IndexOp,
2471 llvm::Type *ResTy, unsigned IntID,
2472 const char *Name) {
2473 SmallVector<Value *, 2> TblOps;
2474 if (ExtOp)
2475 TblOps.push_back(Elt: ExtOp);
2476
2477 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2478 SmallVector<int, 16> Indices;
2479 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
2480 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(Elt: 2*i);
2482 Indices.push_back(Elt: 2*i+1);
2483 }
2484
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2488 V2: Ops[PairPos+1], Mask: Indices,
2489 Name));
2490 PairPos += 2;
2491 }
2492
2493 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2494 // of the 128-bit lookup table with zero.
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
2497 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2498 V2: ZeroTbl, Mask: Indices, Name));
2499 }
2500
2501 Function *TblF;
2502 TblOps.push_back(Elt: IndexOp);
2503 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
2504
2505 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
2506}
2507
2508Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2509 unsigned Value;
2510 switch (BuiltinID) {
2511 default:
2512 return nullptr;
2513 case clang::ARM::BI__builtin_arm_nop:
2514 Value = 0;
2515 break;
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2518 Value = 1;
2519 break;
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2522 Value = 2;
2523 break;
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2526 Value = 3;
2527 break;
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2530 Value = 4;
2531 break;
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2534 Value = 5;
2535 break;
2536 }
2537
2538 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2539 llvm::ConstantInt::get(Int32Ty, Value));
2540}
2541
2542enum SpecialRegisterAccessKind {
2543 NormalRead,
2544 VolatileRead,
2545 Write,
2546};
2547
2548// Generates the IR for the read/write special register builtin,
2549// ValueType is the type of the value that is to be written or read,
2550// RegisterType is the type of the register being written to or read from.
2551static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
2552 const CallExpr *E,
2553 llvm::Type *RegisterType,
2554 llvm::Type *ValueType,
2555 SpecialRegisterAccessKind AccessKind,
2556 StringRef SysReg = "") {
2557 // write and register intrinsics only support 32, 64 and 128 bit operations.
2558 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2559 RegisterType->isIntegerTy(128)) &&
2560 "Unsupported size for register.");
2561
2562 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2563 CodeGen::CodeGenModule &CGM = CGF.CGM;
2564 LLVMContext &Context = CGM.getLLVMContext();
2565
2566 if (SysReg.empty()) {
2567 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
2568 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
2569 }
2570
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
2574
2575 llvm::Type *Types[] = { RegisterType };
2576
2577 bool MixedTypes = RegisterType->isIntegerTy(Bitwidth: 64) && ValueType->isIntegerTy(Bitwidth: 32);
2578 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 && "Can't fit 64-bit value in 32-bit register");
2580
2581 if (AccessKind != Write) {
2582 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2583 llvm::Function *F = CGM.getIntrinsic(
2584 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2586 Types);
2587 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
2588
2589 if (MixedTypes)
2590 // Read into 64 bit register and then truncate result to 32 bit.
2591 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
2592
2593 if (ValueType->isPointerTy())
2594 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2595 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
2596
2597 return Call;
2598 }
2599
2600 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2601 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
2602 if (MixedTypes) {
2603 // Extend 32 bit write value to 64 bit to pass to write.
2604 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
2605 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2606 }
2607
2608 if (ValueType->isPointerTy()) {
2609 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2610 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
2611 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2612 }
2613
2614 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2615}
2616
2617/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2618/// argument that specifies the vector type.
2619static bool HasExtraNeonArgument(unsigned BuiltinID) {
2620 switch (BuiltinID) {
2621 default: break;
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2659 return false;
2660 }
2661 return true;
2662}
2663
2664Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2665 const CallExpr *E,
2666 ReturnValueSlot ReturnValue,
2667 llvm::Triple::ArchType Arch) {
2668 if (auto Hint = GetValueForARMHint(BuiltinID))
2669 return Hint;
2670
2671 if (BuiltinID == clang::ARM::BI__emit) {
2672 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
2675
2676 Expr::EvalResult Result;
2677 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2678 llvm_unreachable("Sema will ensure that the parameter is constant");
2679
2680 llvm::APSInt Value = Result.Val.getInt();
2681 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
2682
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
2685 /*hasSideEffects=*/true)
2686 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
2687 /*hasSideEffects=*/true);
2688
2689 return Builder.CreateCall(Callee: Emit);
2690 }
2691
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2693 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
2694 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2695 }
2696
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2698 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
2699 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
2700 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
2701
2702 // Locality is not supported on ARM target
2703 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
2704
2705 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2706 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
2707 }
2708
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2710 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2711 return Builder.CreateCall(
2712 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2713 }
2714
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2717 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2718 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2719 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2721 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
2722 return Res;
2723 }
2724
2725
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2727 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2728 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2729 }
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2731 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2732 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2733 "cls");
2734 }
2735
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2738 const FunctionDecl *FD = E->getDirectCallee();
2739 Value *Ops[2];
2740 for (unsigned i = 0; i < 2; i++)
2741 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
2742 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
2744 StringRef Name = FD->getName();
2745 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
2746 }
2747
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2750 Function *F;
2751
2752 switch (BuiltinID) {
2753 default: llvm_unreachable("unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2755 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2756 break;
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2758 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2759 break;
2760 }
2761
2762 // MCRR{2} instruction has 5 operands but
2763 // the intrinsic has 4 because Rt and Rt2
2764 // are represented as a single unsigned 64
2765 // bit integer in the intrinsic definition
2766 // but internally it's represented as 2 32
2767 // bit integers.
2768
2769 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2770 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2771 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
2772 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
2773
2774 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2775 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
2776 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
2777 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
2778
2779 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2784 Function *F;
2785
2786 switch (BuiltinID) {
2787 default: llvm_unreachable("unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2789 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2790 break;
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2792 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2793 break;
2794 }
2795
2796 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2797 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2798 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
2799 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
2800
2801 // Returns an unsigned 64 bit integer, represented
2802 // as two 32 bit integers.
2803
2804 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
2805 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
2806 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
2807 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
2808
2809 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2810 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
2811 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
2812
2813 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(E->getType()));
2814 }
2815
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2819 getContext().getTypeSize(E->getType()) == 64) ||
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2821 Function *F;
2822
2823 switch (BuiltinID) {
2824 default: llvm_unreachable("unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2826 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2827 break;
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2831 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2832 break;
2833 }
2834
2835 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
2836 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
2837
2838 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2839 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2840 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
2841 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
2842
2843 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2844 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
2845 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
2846 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(E->getType()));
2847 }
2848
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2851 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
2852
2853 QualType Ty = E->getType();
2854 llvm::Type *RealResTy = ConvertType(T: Ty);
2855 llvm::Type *IntTy =
2856 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2857
2858 Function *F = CGM.getIntrinsic(
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2861 UnqualPtrTy);
2862 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
2863 Val->addParamAttr(
2864 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2865
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
2868 else {
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2870 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
2871 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
2872 DestTy: RealResTy);
2873 }
2874 }
2875
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2879 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
2880 Function *F = CGM.getIntrinsic(
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2883 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
2884
2885 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
2886 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
2887 Builder.CreateStore(Val, Addr: Tmp);
2888
2889 Address LdPtr = Tmp.withElementType(ElemTy: STy);
2890 Val = Builder.CreateLoad(Addr: LdPtr);
2891
2892 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2893 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2894 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
2895 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
2896 }
2897
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2900 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
2901 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
2902
2903 QualType Ty = E->getArg(Arg: 0)->getType();
2904 llvm::Type *StoreTy =
2905 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2906
2907 if (StoreVal->getType()->isPointerTy())
2908 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
2909 else {
2910 llvm::Type *IntTy = llvm::IntegerType::get(
2911 C&: getLLVMContext(),
2912 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
2913 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
2914 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
2915 }
2916
2917 Function *F = CGM.getIntrinsic(
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2920 StoreAddr->getType());
2921
2922 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
2923 CI->addParamAttr(
2924 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2925 return CI;
2926 }
2927
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2929 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2930 return Builder.CreateCall(Callee: F);
2931 }
2932
2933 // CRC32
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2950 }
2951
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2953 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
2954 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2955
2956 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2957 // intrinsics, hence we need different codegen for these cases.
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2960 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2961 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
2962 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
2963 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
2964
2965 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2966 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
2967 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
2968 } else {
2969 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
2970
2971 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2972 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
2973 }
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2982
2983 SpecialRegisterAccessKind AccessKind = Write;
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2987 AccessKind = VolatileRead;
2988
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2991
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2994
2995 llvm::Type *ValueType;
2996 llvm::Type *RegisterType;
2997 if (IsPointerBuiltin) {
2998 ValueType = VoidPtrTy;
2999 RegisterType = Int32Ty;
3000 } else if (Is64Bit) {
3001 ValueType = RegisterType = Int64Ty;
3002 } else {
3003 ValueType = RegisterType = Int32Ty;
3004 }
3005
3006 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
3007 AccessKind);
3008 }
3009
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3011 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3012 return Builder.CreateCall(Callee: F);
3013 }
3014
3015 // Handle MSVC intrinsics before argument evaluation to prevent double
3016 // evaluation.
3017 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3018 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
3019
3020 // Deal with MVE builtins
3021 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3022 return Result;
3023 // Handle CDE builtins
3024 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3025 return Result;
3026
3027 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3028 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3029 return P.first == BuiltinID;
3030 });
3031 if (It != end(NEONEquivalentIntrinsicMap))
3032 BuiltinID = It->second;
3033
3034 // Find out if any arguments are required to be integer constant
3035 // expressions.
3036 unsigned ICEArguments = 0;
3037 ASTContext::GetBuiltinTypeError Error;
3038 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
3039 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3040
3041 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3042 return Builder.getInt32(C: addr.getAlignment().getQuantity());
3043 };
3044
3045 Address PtrOp0 = Address::invalid();
3046 Address PtrOp1 = Address::invalid();
3047 SmallVector<Value*, 4> Ops;
3048 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3049 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3051 if (i == 0) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3075 // Get the alignment for the argument in addition to the value;
3076 // we'll use it later.
3077 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
3078 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
3079 continue;
3080 }
3081 }
3082 if (i == 1) {
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3102 // Get the alignment for the argument in addition to the value;
3103 // we'll use it later.
3104 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
3105 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
3106 continue;
3107 }
3108 }
3109
3110 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
3111 }
3112
3113 switch (BuiltinID) {
3114 default: break;
3115
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
3131
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3133 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
3134 llvm::Type *Tys[] = {Arg->getType()};
3135 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
3136 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
3137
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
3151
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3153 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3154 "vsha1h");
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3156 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3157 "vsha1h");
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3159 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3160 "vsha1h");
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3163 "vsha1h");
3164
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3167 "vcvtbfp2bf");
3168 }
3169
3170 // The ARM _MoveToCoprocessor builtins put the input register value as
3171 // the first argument, but the LLVM intrinsic expects it as the third one.
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3174 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3179 }
3180 }
3181
3182 // Get the last argument, which specifies the vector type.
3183 assert(HasExtraArg);
3184 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
3185 std::optional<llvm::APSInt> Result =
3186 Arg->getIntegerConstantExpr(Ctx: getContext());
3187 if (!Result)
3188 return nullptr;
3189
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3192 // Determine the overloaded type of this builtin.
3193 llvm::Type *Ty;
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3195 Ty = FloatTy;
3196 else
3197 Ty = DoubleTy;
3198
3199 // Determine whether this is an unsigned conversion or not.
3200 bool usgn = Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3202
3203 // Call the appropriate intrinsic.
3204 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
3205 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
3206 }
3207
3208 // Determine the type of this overloaded NEON intrinsic.
3209 NeonTypeFlags Type = Result->getZExtValue();
3210 bool usgn = Type.isUnsigned();
3211 bool rightShift = false;
3212
3213 llvm::FixedVectorType *VTy =
3214 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType: getTarget().hasLegalHalfType(), V1Ty: false,
3215 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
3216 llvm::Type *Ty = VTy;
3217 if (!Ty)
3218 return nullptr;
3219
3220 // Many NEON builtins have identical semantics and uses in ARM and
3221 // AArch64. Emit these in a single function.
3222 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3223 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3224 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3225 if (Builtin)
3226 return EmitCommonNeonBuiltinExpr(
3227 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
3228 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3229
3230 unsigned Int;
3231 switch (BuiltinID) {
3232 default: return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3234 // Handle 64-bit integer elements as a special case. Use shuffles of
3235 // one-element vectors to avoid poor code for i64 in the backend.
3236 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3237 // Extract the other lane.
3238 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3239 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
3241 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3242 // Load the value as a one-element vector.
3243 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
3244 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3245 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3247 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
3248 // Combine them.
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
3251 }
3252 [[fallthrough]];
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3255 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
3256 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
3257 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
3258 }
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3260 Int =
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3262 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
3263 shift: 1, rightshift: true);
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3266 Ops, "vqrshrun_n", 1, true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3269 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
3270 shift: 1, rightshift: true);
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3272 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3273 Ops, "vqshrun_n", 1, true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3277 Ops, "vrecpe");
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3279 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3280 Ops, "vrshrn_n", 1, true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3284 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3285 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3287 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
3288 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3291 rightShift = true;
3292 [[fallthrough]];
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3295 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
3296 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3297 Ops, "vsli_n");
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3301 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
3302 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3304 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3305 // a one-element vector and avoid poor code for i64 in the backend.
3306 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3307 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3308 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
3309 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3313 Tys), Ops);
3314 }
3315 [[fallthrough]];
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3318 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
3319 return Builder.CreateStore(Val: Ops[1],
3320 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
3321 }
3322 case NEON::BI__builtin_neon_vtbl1_v:
3323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3324 Ops, "vtbl1");
3325 case NEON::BI__builtin_neon_vtbl2_v:
3326 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3327 Ops, "vtbl2");
3328 case NEON::BI__builtin_neon_vtbl3_v:
3329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3330 Ops, "vtbl3");
3331 case NEON::BI__builtin_neon_vtbl4_v:
3332 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3333 Ops, "vtbl4");
3334 case NEON::BI__builtin_neon_vtbx1_v:
3335 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3336 Ops, "vtbx1");
3337 case NEON::BI__builtin_neon_vtbx2_v:
3338 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3339 Ops, "vtbx2");
3340 case NEON::BI__builtin_neon_vtbx3_v:
3341 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3342 Ops, "vtbx3");
3343 case NEON::BI__builtin_neon_vtbx4_v:
3344 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3345 Ops, "vtbx4");
3346 }
3347}
3348
3349template<typename Integer>
3350static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
3351 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
3352}
3353
3354static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3355 llvm::Type *T, bool Unsigned) {
3356 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3357 // which finds it convenient to specify signed/unsigned as a boolean flag.
3358 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
3359}
3360
3361static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3362 uint32_t Shift, bool Unsigned) {
3363 // MVE helper function for integer shift right. This must handle signed vs
3364 // unsigned, and also deal specially with the case where the shift count is
3365 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3366 // undefined behavior, but in MVE it's legal, so we must convert it to code
3367 // that is not undefined in IR.
3368 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
3369 ->getElementType()
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3372 // An unsigned shift of the full lane size always generates zero, so we can
3373 // simply emit a zero vector. A signed shift of the full lane size does the
3374 // same thing as shifting by one bit fewer.
3375 if (Unsigned)
3376 return llvm::Constant::getNullValue(Ty: V->getType());
3377 else
3378 --Shift;
3379 }
3380 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
3381}
3382
3383static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3384 // MVE-specific helper function for a vector splat, which infers the element
3385 // count of the output vector by knowing that MVE vectors are all 128 bits
3386 // wide.
3387 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(NumElts: Elements, V);
3389}
3390
3391static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3392 CodeGenFunction *CGF,
3393 llvm::Value *V,
3394 llvm::Type *DestType) {
3395 // Convert one MVE vector type into another by reinterpreting its in-register
3396 // format.
3397 //
3398 // Little-endian, this is identical to a bitcast (which reinterprets the
3399 // memory format). But big-endian, they're not necessarily the same, because
3400 // the register and memory formats map to each other differently depending on
3401 // the lane size.
3402 //
3403 // We generate a bitcast whenever we can (if we're little-endian, or if the
3404 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3405 // that performs the different kind of reinterpretation.
3406 if (CGF->getTarget().isBigEndian() &&
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3409 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3410 {DestType, V->getType()}),
3411 V);
3412 } else {
3413 return Builder.CreateBitCast(V, DestTy: DestType);
3414 }
3415}
3416
3417static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3418 // Make a shufflevector that extracts every other element of a vector (evens
3419 // or odds, as desired).
3420 SmallVector<int, 16> Indices;
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
3423 for (unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(Elt: i + Odd);
3425 return Builder.CreateShuffleVector(V, Mask: Indices);
3426}
3427
3428static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3429 llvm::Value *V1) {
3430 // Make a shufflevector that interleaves two vectors element by element.
3431 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3432 SmallVector<int, 16> Indices;
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
3435 for (unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(Elt: i);
3437 Indices.push_back(Elt: i + InputElements);
3438 }
3439 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
3440}
3441
3442template<unsigned HighBit, unsigned OtherBits>
3443static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3444 // MVE-specific helper function to make a vector splat of a constant such as
3445 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3446 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
3447 unsigned LaneBits = T->getPrimitiveSizeInBits();
3448 uint32_t Value = HighBit << (LaneBits - 1);
3449 if (OtherBits)
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
3452 return ARMMVEVectorSplat(Builder, V: Lane);
3453}
3454
3455static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3456 llvm::Value *V,
3457 unsigned ReverseWidth) {
3458 // MVE-specific helper function which reverses the elements of a
3459 // vector within every (ReverseWidth)-bit collection of lanes.
3460 SmallVector<int, 16> Indices;
3461 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(Elt: i ^ Mask);
3466 return Builder.CreateShuffleVector(V, Mask: Indices);
3467}
3468
3469Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
3470 const CallExpr *E,
3471 ReturnValueSlot ReturnValue,
3472 llvm::Triple::ArchType Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3476
3477 // Code autogenerated by Tablegen will handle all the simple builtins.
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3480
3481 // If we didn't match an MVE builtin id at all, go back to the
3482 // main EmitARMBuiltinExpr.
3483 default:
3484 return nullptr;
3485 }
3486
3487 // Anything that breaks from that switch is an MVE builtin that
3488 // needs handwritten code to generate.
3489
3490 switch (CustomCodeGenType) {
3491
3492 case CustomCodeGen::VLD24: {
3493 llvm::SmallVector<Value *, 4> Ops;
3494 llvm::SmallVector<llvm::Type *, 4> Tys;
3495
3496 auto MvecCType = E->getType();
3497 auto MvecLType = ConvertType(MvecCType);
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3508
3509 Tys.push_back(VecLType);
3510
3511 auto Addr = E->getArg(Arg: 0);
3512 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3513 Tys.push_back(ConvertType(T: Addr->getType()));
3514
3515 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3516 Value *LoadResult = Builder.CreateCall(F, Ops);
3517 Value *MvecOut = PoisonValue::get(T: MvecLType);
3518 for (unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
3520 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
3521 }
3522
3523 if (ReturnValue.isNull())
3524 return MvecOut;
3525 else
3526 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
3527 }
3528
3529 case CustomCodeGen::VST24: {
3530 llvm::SmallVector<Value *, 4> Ops;
3531 llvm::SmallVector<llvm::Type *, 4> Tys;
3532
3533 auto Addr = E->getArg(Arg: 0);
3534 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3535 Tys.push_back(ConvertType(T: Addr->getType()));
3536
3537 auto MvecCType = E->getArg(Arg: 1)->getType();
3538 auto MvecLType = ConvertType(T: MvecCType);
3539 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3548
3549 Tys.push_back(VecLType);
3550
3551 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
3552 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
3553 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
3554 for (unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
3556
3557 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3558 Value *ToReturn = nullptr;
3559 for (unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
3561 ToReturn = Builder.CreateCall(F, Ops);
3562 Ops.pop_back();
3563 }
3564 return ToReturn;
3565 }
3566 }
3567 llvm_unreachable("unknown custom codegen type.");
3568}
3569
3570Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
3571 const CallExpr *E,
3572 ReturnValueSlot ReturnValue,
3573 llvm::Triple::ArchType Arch) {
3574 switch (BuiltinID) {
3575 default:
3576 return nullptr;
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3578 }
3579}
3580
3581static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3582 const CallExpr *E,
3583 SmallVectorImpl<Value *> &Ops,
3584 llvm::Triple::ArchType Arch) {
3585 unsigned int Int = 0;
3586 const char *s = nullptr;
3587
3588 switch (BuiltinID) {
3589 default:
3590 return nullptr;
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3603 break;
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3616 break;
3617 }
3618
3619 assert(E->getNumArgs() >= 3);
3620
3621 // Get the last argument, which specifies the vector type.
3622 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt> Result =
3624 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
3625 if (!Result)
3626 return nullptr;
3627
3628 // Determine the type of this overloaded NEON intrinsic.
3629 NeonTypeFlags Type = Result->getZExtValue();
3630 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
3631 if (!Ty)
3632 return nullptr;
3633
3634 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3635
3636 // AArch64 scalar builtins are not overloaded, they do not have an extra
3637 // argument that specifies the vector type, need to handle each case.
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3640 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3641 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3642 }
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3644 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3645 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3646 }
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3648 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3649 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3650 }
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3652 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3653 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3654 }
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3656 Value *TblRes =
3657 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3658 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3659
3660 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
3661 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
3662 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3663
3664 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3666 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3667 }
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3669 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3670 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3671 }
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3673 Value *TblRes =
3674 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3675 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3676
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
3678 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
3679 RHS: TwentyFourV);
3680 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3681
3682 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3684 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3685 }
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3687 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3688 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3689 }
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3714 }
3715 }
3716
3717 if (!Int)
3718 return nullptr;
3719
3720 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
3721 return CGF.EmitNeonCall(F, Ops, name: s);
3722}
3723
3724Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
3725 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
3726 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
3727 Value *V = PoisonValue::get(T: VTy);
3728 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
3729 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
3730 return Op;
3731}
3732
3733/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3734/// access builtin. Only required if it can't be inferred from the base pointer
3735/// operand.
3736llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
3737 switch (TypeFlags.getMemEltType()) {
3738 case SVETypeFlags::MemEltTyDefault:
3739 return getEltType(TypeFlags);
3740 case SVETypeFlags::MemEltTyInt8:
3741 return Builder.getInt8Ty();
3742 case SVETypeFlags::MemEltTyInt16:
3743 return Builder.getInt16Ty();
3744 case SVETypeFlags::MemEltTyInt32:
3745 return Builder.getInt32Ty();
3746 case SVETypeFlags::MemEltTyInt64:
3747 return Builder.getInt64Ty();
3748 }
3749 llvm_unreachable("Unknown MemEltType");
3750}
3751
3752llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3753 switch (TypeFlags.getEltType()) {
3754 default:
3755 llvm_unreachable("Invalid SVETypeFlag!");
3756
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3759 return Builder.getInt8Ty();
3760 case SVETypeFlags::EltTyInt16:
3761 return Builder.getInt16Ty();
3762 case SVETypeFlags::EltTyInt32:
3763 return Builder.getInt32Ty();
3764 case SVETypeFlags::EltTyInt64:
3765 return Builder.getInt64Ty();
3766 case SVETypeFlags::EltTyInt128:
3767 return Builder.getInt128Ty();
3768
3769 case SVETypeFlags::EltTyFloat16:
3770 return Builder.getHalfTy();
3771 case SVETypeFlags::EltTyFloat32:
3772 return Builder.getFloatTy();
3773 case SVETypeFlags::EltTyFloat64:
3774 return Builder.getDoubleTy();
3775
3776 case SVETypeFlags::EltTyBFloat16:
3777 return Builder.getBFloatTy();
3778
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3783 return Builder.getInt1Ty();
3784 }
3785}
3786
3787// Return the llvm predicate vector type corresponding to the specified element
3788// TypeFlags.
3789llvm::ScalableVectorType *
3790CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
3791 switch (TypeFlags.getEltType()) {
3792 default: llvm_unreachable("Unhandled SVETypeFlag!");
3793
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3802
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3811
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3820 }
3821}
3822
3823// Return the llvm vector type corresponding to the specified element TypeFlags.
3824llvm::ScalableVectorType *
3825CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
3826 switch (TypeFlags.getEltType()) {
3827 default:
3828 llvm_unreachable("Invalid SVETypeFlag!");
3829
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
3838
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
3849
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3858 }
3859}
3860
3861llvm::Value *
3862CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
3863 Function *Ptrue =
3864 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3865 return Builder.CreateCall(Callee: Ptrue, Args: {Builder.getInt32(/*SV_ALL*/ C: 31)});
3866}
3867
3868constexpr unsigned SVEBitsPerBlock = 128;
3869
3870static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3871 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3872 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
3873}
3874
3875// Reinterpret the input predicate so that it can be used to correctly isolate
3876// the elements of the specified datatype.
3877Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
3878 llvm::ScalableVectorType *VTy) {
3879
3880 if (isa<TargetExtType>(Val: Pred->getType()) &&
3881 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
3882 return Pred;
3883
3884 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
3885 if (Pred->getType() == RTy)
3886 return Pred;
3887
3888 unsigned IntID;
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3891 default:
3892 llvm_unreachable("unsupported element count!");
3893 case 1:
3894 case 2:
3895 case 4:
3896 case 8:
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3898 IntrinsicTy = RTy;
3899 break;
3900 case 16:
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->getType();
3903 break;
3904 }
3905
3906 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
3907 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
3908 assert(C->getType() == RTy && "Unexpected return type!");
3909 return C;
3910}
3911
3912Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
3913 llvm::StructType *Ty) {
3914 if (PredTuple->getType() == Ty)
3915 return PredTuple;
3916
3917 Value *Ret = llvm::PoisonValue::get(T: Ty);
3918 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred = Builder.CreateExtractValue(Agg: PredTuple, Idxs: I);
3920 Pred = EmitSVEPredicateCast(
3921 Pred, VTy: cast<llvm::ScalableVectorType>(Val: Ty->getTypeAtIndex(N: I)));
3922 Ret = Builder.CreateInsertValue(Agg: Ret, Val: Pred, Idxs: I);
3923 }
3924
3925 return Ret;
3926}
3927
3928Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
3929 SmallVectorImpl<Value *> &Ops,
3930 unsigned IntID) {
3931 auto *ResultTy = getSVEType(TypeFlags);
3932 auto *OverloadedTy =
3933 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
3934
3935 Function *F = nullptr;
3936 if (Ops[1]->getType()->isVectorTy())
3937 // This is the "vector base, scalar offset" case. In order to uniquely
3938 // map this built-in to an LLVM IR intrinsic, we need both the return type
3939 // and the type of the vector base.
3940 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
3941 else
3942 // This is the "scalar base, vector offset case". The type of the offset
3943 // is encoded in the name of the intrinsic. We only need to specify the
3944 // return type in order to uniquely map this built-in to an LLVM IR
3945 // intrinsic.
3946 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
3947
3948 // At the ACLE level there's only one predicate type, svbool_t, which is
3949 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3950 // actual type being loaded. For example, when loading doubles (i64) the
3951 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3952 // the predicate and the data being loaded must match. Cast to the type
3953 // expected by the intrinsic. The intrinsic itself should be defined in
3954 // a way than enforces relations between parameter types.
3955 Ops[0] = EmitSVEPredicateCast(
3956 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
3957
3958 // Pass 0 when the offset is missing. This can only be applied when using
3959 // the "vector base" addressing mode for which ACLE allows no offset. The
3960 // corresponding LLVM IR always requires an offset.
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3963 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
3964 }
3965
3966 // For "vector base, scalar index" scale the index so that it becomes a
3967 // scalar offset.
3968 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
3972 }
3973
3974 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
3975
3976 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3977 // other cases it's folded into a nop.
3978 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
3979 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
3980}
3981
3982Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
3983 SmallVectorImpl<Value *> &Ops,
3984 unsigned IntID) {
3985 auto *SrcDataTy = getSVEType(TypeFlags);
3986 auto *OverloadedTy =
3987 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
3988
3989 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3990 // it's the first argument. Move it accordingly.
3991 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
3992
3993 Function *F = nullptr;
3994 if (Ops[2]->getType()->isVectorTy())
3995 // This is the "vector base, scalar offset" case. In order to uniquely
3996 // map this built-in to an LLVM IR intrinsic, we need both the return type
3997 // and the type of the vector base.
3998 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
3999 else
4000 // This is the "scalar base, vector offset case". The type of the offset
4001 // is encoded in the name of the intrinsic. We only need to specify the
4002 // return type in order to uniquely map this built-in to an LLVM IR
4003 // intrinsic.
4004 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4005
4006 // Pass 0 when the offset is missing. This can only be applied when using
4007 // the "vector base" addressing mode for which ACLE allows no offset. The
4008 // corresponding LLVM IR always requires an offset.
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4011 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4012 }
4013
4014 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4015 // folded into a nop.
4016 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
4017
4018 // At the ACLE level there's only one predicate type, svbool_t, which is
4019 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4020 // actual type being stored. For example, when storing doubles (i64) the
4021 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4022 // the predicate and the data being stored must match. Cast to the type
4023 // expected by the intrinsic. The intrinsic itself should be defined in
4024 // a way that enforces relations between parameter types.
4025 Ops[1] = EmitSVEPredicateCast(
4026 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
4027
4028 // For "vector base, scalar index" scale the index so that it becomes a
4029 // scalar offset.
4030 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
4034 }
4035
4036 return Builder.CreateCall(Callee: F, Args: Ops);
4037}
4038
4039Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
4040 SmallVectorImpl<Value *> &Ops,
4041 unsigned IntID) {
4042 // The gather prefetches are overloaded on the vector input - this can either
4043 // be the vector of base addresses or vector of offsets.
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
4045 if (!OverloadedTy)
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
4047
4048 // Cast the predicate from svbool_t to the right number of elements.
4049 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
4050
4051 // vector + imm addressing modes
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4054 // Pass 0 for 'vector+imm' when the index is omitted.
4055 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4056
4057 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4058 std::swap(a&: Ops[2], b&: Ops[3]);
4059 } else {
4060 // Index needs to be passed as scaled offset.
4061 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4065 }
4066 }
4067
4068 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4069 return Builder.CreateCall(Callee: F, Args: Ops);
4070}
4071
4072Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
4073 SmallVectorImpl<Value*> &Ops,
4074 unsigned IntID) {
4075 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4076 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4077 Value *BasePtr = Ops[1];
4078
4079 // Does the load have an offset?
4080 if (Ops.size() > 2)
4081 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4082
4083 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy});
4084 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
4085}
4086
4087Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
4088 SmallVectorImpl<Value*> &Ops,
4089 unsigned IntID) {
4090 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4091
4092 unsigned N;
4093 switch (IntID) {
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4098 N = 2;
4099 break;
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4102 N = 3;
4103 break;
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4108 N = 4;
4109 break;
4110 default:
4111 llvm_unreachable("unknown intrinsic!");
4112 }
4113
4114 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4115 Value *BasePtr = Ops[1];
4116
4117 // Does the store have an offset?
4118 if (Ops.size() > (2 + N))
4119 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4120
4121 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4122 // need to break up the tuple vector.
4123 SmallVector<llvm::Value*, 5> Operands;
4124 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Elt: Ops[I]);
4126 Operands.append(IL: {Predicate, BasePtr});
4127 Function *F = CGM.getIntrinsic(IID: IntID, Tys: { VTy });
4128
4129 return Builder.CreateCall(Callee: F, Args: Operands);
4130}
4131
4132// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4133// svpmullt_pair intrinsics, with the exception that their results are bitcast
4134// to a wider type.
4135Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
4136 SmallVectorImpl<Value *> &Ops,
4137 unsigned BuiltinID) {
4138 // Splat scalar operand to vector (intrinsics with _n infix)
4139 if (TypeFlags.hasSplatOperand()) {
4140 unsigned OpNo = TypeFlags.getSplatOperand();
4141 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4142 }
4143
4144 // The pair-wise function has a narrower overloaded type.
4145 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
4146 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
4147
4148 // Now bitcast to the wider result type.
4149 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4150 return EmitSVEReinterpret(Val: Call, Ty);
4151}
4152
4153Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
4154 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4155 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4156 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
4157 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
4158}
4159
4160Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
4161 SmallVectorImpl<Value *> &Ops,
4162 unsigned BuiltinID) {
4163 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4164 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
4165 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4166
4167 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
4168 Value *BasePtr = Ops[1];
4169
4170 // Implement the index operand if not omitted.
4171 if (Ops.size() > 3)
4172 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4173
4174 Value *PrfOp = Ops.back();
4175
4176 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Predicate->getType());
4177 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
4178}
4179
4180Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
4181 llvm::Type *ReturnTy,
4182 SmallVectorImpl<Value *> &Ops,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4185 QualType LangPTy = E->getArg(Arg: 1)->getType();
4186 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4187 T: LangPTy->castAs<PointerType>()->getPointeeType());
4188
4189 // Mfloat8 types is stored as a vector, so extra work
4190 // to extract sclar element type is necessary.
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4195 }
4196
4197 // The vector type that is returned may be different from the
4198 // eventual type loaded from memory.
4199 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy = nullptr;
4201 llvm::ScalableVectorType *PredTy = nullptr;
4202 bool IsQuadLoad = false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4207 PredTy = llvm::ScalableVectorType::get(
4208 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
4209 IsQuadLoad = true;
4210 break;
4211 default:
4212 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4213 PredTy = MemoryTy;
4214 break;
4215 }
4216
4217 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4218 Value *BasePtr = Ops[1];
4219
4220 // Does the load have an offset?
4221 if (Ops.size() > 2)
4222 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4223
4224 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadLoad ? VectorTy : MemoryTy);
4225 auto *Load =
4226 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr}));
4227 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4228 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
4229
4230 if (IsQuadLoad)
4231 return Load;
4232
4233 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
4234 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
4235}
4236
4237Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
4238 SmallVectorImpl<Value *> &Ops,
4239 unsigned IntrinsicID) {
4240 QualType LangPTy = E->getArg(Arg: 1)->getType();
4241 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4242 T: LangPTy->castAs<PointerType>()->getPointeeType());
4243
4244 // Mfloat8 types is stored as a vector, so extra work
4245 // to extract sclar element type is necessary.
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4250 }
4251
4252 // The vector type that is stored may be different from the
4253 // eventual type stored to memory.
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4256
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore = false;
4260
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4265 PredTy =
4266 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
4267 IsQuadStore = true;
4268 break;
4269 default:
4270 break;
4271 }
4272 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4273 Value *BasePtr = Ops[1];
4274
4275 // Does the store have an offset?
4276 if (Ops.size() == 4)
4277 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4278
4279 // Last value is always the data
4280 Value *Val =
4281 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
4282
4283 Function *F =
4284 CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadStore ? VectorTy : MemoryTy);
4285 auto *Store =
4286 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr}));
4287 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4288 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
4289 return Store;
4290}
4291
4292Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
4293 SmallVectorImpl<Value *> &Ops,
4294 unsigned IntID) {
4295 Ops[2] = EmitSVEPredicateCast(
4296 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
4297
4298 SmallVector<Value *> NewOps;
4299 NewOps.push_back(Elt: Ops[2]);
4300
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4303 // If the intrinsic contains the vnum parameter, multiply it with the vector
4304 // size in bytes.
4305 if (Ops.size() == 5) {
4306 Function *StreamingVectorLength =
4307 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(Callee: StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
4312 // The type of the ptr parameter is void *, so use Int8Ty here.
4313 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
4314 RealSlice = Builder.CreateZExt(V: RealSlice, DestTy: Int64Ty);
4315 RealSlice = Builder.CreateAdd(LHS: RealSlice, RHS: Ops[4]);
4316 RealSlice = Builder.CreateTrunc(V: RealSlice, DestTy: Int32Ty);
4317 }
4318 NewOps.push_back(Elt: BasePtr);
4319 NewOps.push_back(Elt: Ops[0]);
4320 NewOps.push_back(Elt: RealSlice);
4321 Function *F = CGM.getIntrinsic(IID: IntID);
4322 return Builder.CreateCall(Callee: F, Args: NewOps);
4323}
4324
4325Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
4326 SmallVectorImpl<Value *> &Ops,
4327 unsigned IntID) {
4328 auto *VecTy = getSVEType(TypeFlags);
4329 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
4330 if (TypeFlags.isReadZA())
4331 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
4332 else if (TypeFlags.isWriteZA())
4333 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
4334 return Builder.CreateCall(Callee: F, Args: Ops);
4335}
4336
4337Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
4338 SmallVectorImpl<Value *> &Ops,
4339 unsigned IntID) {
4340 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4341 if (Ops.size() == 0)
4342 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
4343 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4344 return Builder.CreateCall(Callee: F, Args: Ops);
4345}
4346
4347Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
4348 SmallVectorImpl<Value *> &Ops,
4349 unsigned IntID) {
4350 if (Ops.size() == 2)
4351 Ops.push_back(Elt: Builder.getInt32(C: 0));
4352 else
4353 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
4354 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4355 return Builder.CreateCall(Callee: F, Args: Ops);
4356}
4357
4358// Limit the usage of scalable llvm IR generated by the ACLE by using the
4359// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4360Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4361 return Builder.CreateVectorSplat(
4362 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
4363}
4364
4365Value *CodeGenFunction::EmitSVEDupX(Value *Scalar) {
4366 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4367#ifndef NDEBUG
4368 auto *VecTy = cast<llvm::VectorType>(Val: Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4371 "Only <1 x i8> expected");
4372#endif
4373 Scalar = Builder.CreateExtractElement(Vec: Scalar, Idx: uint64_t(0));
4374 }
4375 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
4376}
4377
4378Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
4379 // FIXME: For big endian this needs an additional REV, or needs a separate
4380 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4381 // instruction is defined as 'bitwise' equivalent from memory point of
4382 // view (when storing/reloading), whereas the svreinterpret builtin
4383 // implements bitwise equivalent cast from register point of view.
4384 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4385
4386 if (auto *StructTy = dyn_cast<StructType>(Val: Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4388
4389 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4390 Value *In = Builder.CreateExtractValue(Agg: Val, Idxs: I);
4391 Value *Out = Builder.CreateBitCast(V: In, DestTy: StructTy->getTypeAtIndex(N: I));
4392 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Out, Idxs: I);
4393 }
4394
4395 return Tuple;
4396 }
4397
4398 return Builder.CreateBitCast(V: Val, DestTy: Ty);
4399}
4400
4401static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4402 SmallVectorImpl<Value *> &Ops) {
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(I: Ops.begin(), Elt: SplatZero);
4405}
4406
4407static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4408 SmallVectorImpl<Value *> &Ops) {
4409 auto *SplatUndef = UndefValue::get(T: Ty);
4410 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
4411}
4412
4413SmallVector<llvm::Type *, 2>
4414CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
4415 llvm::Type *ResultType,
4416 ArrayRef<Value *> Ops) {
4417 if (TypeFlags.isOverloadNone())
4418 return {};
4419
4420 llvm::Type *DefaultType = getSVEType(TypeFlags);
4421
4422 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4423 return {DefaultType, Ops[1]->getType()};
4424
4425 if (TypeFlags.isOverloadWhileRW())
4426 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4427
4428 if (TypeFlags.isOverloadCvt())
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4430
4431 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4434
4435 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4436 return {DefaultType};
4437}
4438
4439Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
4440 ArrayRef<Value *> Ops) {
4441 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
4444
4445 if (TypeFlags.isTupleSet())
4446 return Builder.CreateInsertValue(Agg: Ops[0], Val: Ops[2], Idxs: Idx);
4447 return Builder.CreateExtractValue(Agg: Ops[0], Idxs: Idx);
4448}
4449
4450Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
4451 llvm::Type *Ty,
4452 ArrayRef<Value *> Ops) {
4453 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4454
4455 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4456 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Ops[Idx], Idxs: Idx);
4458
4459 return Tuple;
4460}
4461
4462void CodeGenFunction::GetAArch64SVEProcessedOperands(
4463 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4464 SVETypeFlags TypeFlags) {
4465 // Find out if any arguments are required to be integer constant expressions.
4466 unsigned ICEArguments = 0;
4467 ASTContext::GetBuiltinTypeError Error;
4468 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
4469 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4470
4471 // Tuple set/get only requires one insert/extract vector, which is
4472 // created by EmitSVETupleSetOrGet.
4473 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4474
4475 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4477 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
4478
4479 if (IsICE) {
4480 // If this is required to be a constant, constant fold it so that we know
4481 // that the generated intrinsic gets a ConstantInt.
4482 std::optional<llvm::APSInt> Result =
4483 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
4484 assert(Result && "Expected argument to be a constant");
4485
4486 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4487 // truncate because the immediate has been range checked and no valid
4488 // immediate requires more than a handful of bits.
4489 *Result = Result->extOrTrunc(width: 32);
4490 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
4491 continue;
4492 }
4493
4494 if (isa<StructType>(Val: Arg->getType()) && !IsTupleGetOrSet) {
4495 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Arg, Idxs: I));
4497
4498 continue;
4499 }
4500
4501 Ops.push_back(Elt: Arg);
4502 }
4503}
4504
4505Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
4506 const CallExpr *E) {
4507 llvm::Type *Ty = ConvertType(E->getType());
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4510 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
4511 return EmitSVEReinterpret(Val, Ty);
4512 }
4513
4514 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
4515 AArch64SVEIntrinsicsProvenSorted);
4516
4517 llvm::SmallVector<Value *, 4> Ops;
4518 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4519 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4520
4521 if (TypeFlags.isLoad())
4522 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
4523 IsZExtReturn: TypeFlags.isZExtReturn());
4524 else if (TypeFlags.isStore())
4525 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
4526 else if (TypeFlags.isGatherLoad())
4527 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4528 else if (TypeFlags.isScatterStore())
4529 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4530 else if (TypeFlags.isPrefetch())
4531 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
4532 else if (TypeFlags.isGatherPrefetch())
4533 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4534 else if (TypeFlags.isStructLoad())
4535 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4536 else if (TypeFlags.isStructStore())
4537 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4538 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4539 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4540 else if (TypeFlags.isTupleCreate())
4541 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4542 else if (TypeFlags.isUndef())
4543 return UndefValue::get(T: Ty);
4544 else if (Builtin->LLVMIntrinsic != 0) {
4545 // Emit set FPMR for intrinsics that require it
4546 if (TypeFlags.setsFPMR())
4547 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4548 Ops.pop_back_val());
4549 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4550 InsertExplicitZeroOperand(Builder, Ty, Ops);
4551
4552 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4553 InsertExplicitUndefOperand(Builder, Ty, Ops);
4554
4555 // Some ACLE builtins leave out the argument to specify the predicate
4556 // pattern, which is expected to be expanded to an SV_ALL pattern.
4557 if (TypeFlags.isAppendSVALL())
4558 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4559 if (TypeFlags.isInsertOp1SVALL())
4560 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4561
4562 // Predicates must match the main datatype.
4563 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
4564 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Ops[i]->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4566 Ops[i] = EmitSVEPredicateCast(Pred: Ops[i], VTy: getSVEType(TypeFlags));
4567
4568 // Splat scalar operand to vector (intrinsics with _n infix)
4569 if (TypeFlags.hasSplatOperand()) {
4570 unsigned OpNo = TypeFlags.getSplatOperand();
4571 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4572 }
4573
4574 if (TypeFlags.isReverseCompare())
4575 std::swap(a&: Ops[1], b&: Ops[2]);
4576 else if (TypeFlags.isReverseUSDOT())
4577 std::swap(a&: Ops[1], b&: Ops[2]);
4578 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4579 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4580 std::swap(a&: Ops[1], b&: Ops[2]);
4581 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4582 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4583 std::swap(a&: Ops[1], b&: Ops[3]);
4584
4585 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4586 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
4589 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
4590 }
4591
4592 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
4593 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
4594 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4595
4596 if (Call->getType() == Ty)
4597 return Call;
4598
4599 // Predicate results must be converted to svbool_t.
4600 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4601 return EmitSVEPredicateCast(Pred: Call, VTy: PredTy);
4602 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4603 return EmitSVEPredicateTupleCast(PredTuple: Call, Ty: PredTupleTy);
4604
4605 llvm_unreachable("unsupported element count!");
4606 }
4607
4608 switch (BuiltinID) {
4609 default:
4610 return nullptr;
4611
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4613 auto SVCountTy =
4614 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4615 Function *CastFromSVCountF =
4616 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4617 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
4618 }
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4620 auto SVCountTy =
4621 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4622 Function *CastToSVCountF =
4623 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4624 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
4625 }
4626
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4639 auto SVCountTy =
4640 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4641 Function *CastFromSVCountF =
4642 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4643 Function *CastToSVCountF =
4644 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4645
4646 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
4647 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4648 llvm::Value *Ops0 =
4649 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
4650 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
4651 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
4653 }
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4655 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4656 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4657 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4658 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4659 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
4660 }
4661
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4663 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4664 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4665 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4666 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4667 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
4668 }
4669
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4674
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4679
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4684
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4689
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4695
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4701
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4706 Value *CmpNE =
4707 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4709 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
4710 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4711 }
4712
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4729 // These builtins are implemented by storing each element to an array and using
4730 // ld1rq to materialize a vector.
4731 unsigned NumOpnds = Ops.size();
4732
4733 bool IsBoolTy =
4734 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(Bitwidth: 1);
4735
4736 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4737 // so that the compare can use the width that is natural for the expected
4738 // number of predicate lanes.
4739 llvm::Type *EltTy = Ops[0]->getType();
4740 if (IsBoolTy)
4741 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
4742
4743 SmallVector<llvm::Value *, 16> VecOps;
4744 for (unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
4746 Value *Vec = BuildVector(Ops: VecOps);
4747
4748 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4749 Value *InsertSubVec = Builder.CreateInsertVector(
4750 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: uint64_t(0));
4751
4752 Function *F =
4753 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4754 Value *DupQLane =
4755 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
4756
4757 if (!IsBoolTy)
4758 return DupQLane;
4759
4760 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4761 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4762
4763 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4764 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4766 OverloadedTy);
4767 Value *Call = Builder.CreateCall(
4768 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
4769 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4770 }
4771
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4774
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
4777 Function *CastToSVCountF =
4778 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4779 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
4780 }
4781
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4794 SVETypeFlags TF(Builtin->TypeModifier);
4795 return Builder.CreateElementCount(Ty, EC: getSVEType(TypeFlags: TF)->getElementCount());
4796 }
4797
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4810 SVETypeFlags TF(Builtin->TypeModifier);
4811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4812 return Builder.CreateCall(Callee: F, Args: Ops);
4813 }
4814
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: uint64_t(0));
4828 }
4829
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: uint64_t(0));
4843 }
4844
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
4858 Idx: uint64_t(0));
4859 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4860 {Insert, Builder.getInt64(0)});
4861 }
4862 }
4863
4864 /// Should not happen
4865 return nullptr;
4866}
4867
4868static void swapCommutativeSMEOperands(unsigned BuiltinID,
4869 SmallVectorImpl<Value *> &Ops) {
4870 unsigned MultiVec;
4871 switch (BuiltinID) {
4872 default:
4873 return;
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4875 MultiVec = 1;
4876 break;
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4879 MultiVec = 2;
4880 break;
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4883 MultiVec = 4;
4884 break;
4885 }
4886
4887 if (MultiVec > 0)
4888 for (unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
4890}
4891
4892Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
4893 const CallExpr *E) {
4894 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
4895 AArch64SMEIntrinsicsProvenSorted);
4896
4897 llvm::SmallVector<Value *, 4> Ops;
4898 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4899 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4900
4901 if (TypeFlags.isLoad() || TypeFlags.isStore())
4902 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4903 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4904 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4907 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4912 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4913
4914 // Emit set FPMR for intrinsics that require it
4915 if (TypeFlags.setsFPMR())
4916 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4917 Ops.pop_back_val());
4918 // Handle builtins which require their multi-vector operands to be swapped
4919 swapCommutativeSMEOperands(BuiltinID, Ops);
4920
4921 // Should not happen!
4922 if (Builtin->LLVMIntrinsic == 0)
4923 return nullptr;
4924
4925 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
4926 // If we already know the streaming mode, don't bother with the intrinsic
4927 // and emit a constant instead
4928 const auto *FD = cast<FunctionDecl>(Val: CurFuncDecl);
4929 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
4930 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
4931 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
4932 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
4933 return ConstantInt::getBool(Context&: Builder.getContext(), V: IsStreaming);
4934 }
4935 }
4936 }
4937
4938 // Predicates must match the main datatype.
4939 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
4940 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Ops[i]->getType()))
4941 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4942 Ops[i] = EmitSVEPredicateCast(Pred: Ops[i], VTy: getSVEType(TypeFlags));
4943
4944 Function *F =
4945 TypeFlags.isOverloadNone()
4946 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
4947 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
4948
4949 return Builder.CreateCall(Callee: F, Args: Ops);
4950}
4951
4952/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4953/// return it as an i8 pointer.
4954Value *readX18AsPtr(CodeGenFunction &CGF) {
4955 LLVMContext &Context = CGF.CGM.getLLVMContext();
4956 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
4957 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4958 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4959 llvm::Function *F =
4960 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4961 llvm::Value *X18 = CGF.Builder.CreateCall(Callee: F, Args: Metadata);
4962 return CGF.Builder.CreateIntToPtr(V: X18, DestTy: CGF.Int8PtrTy);
4963}
4964
4965Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4966 const CallExpr *E,
4967 llvm::Triple::ArchType Arch) {
4968 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4969 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4970 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4971
4972 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4973 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4974 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4975
4976 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4977 return EmitAArch64CpuSupports(E);
4978
4979 unsigned HintID = static_cast<unsigned>(-1);
4980 switch (BuiltinID) {
4981 default: break;
4982 case clang::AArch64::BI__builtin_arm_nop:
4983 HintID = 0;
4984 break;
4985 case clang::AArch64::BI__builtin_arm_yield:
4986 case clang::AArch64::BI__yield:
4987 HintID = 1;
4988 break;
4989 case clang::AArch64::BI__builtin_arm_wfe:
4990 case clang::AArch64::BI__wfe:
4991 HintID = 2;
4992 break;
4993 case clang::AArch64::BI__builtin_arm_wfi:
4994 case clang::AArch64::BI__wfi:
4995 HintID = 3;
4996 break;
4997 case clang::AArch64::BI__builtin_arm_sev:
4998 case clang::AArch64::BI__sev:
4999 HintID = 4;
5000 break;
5001 case clang::AArch64::BI__builtin_arm_sevl:
5002 case clang::AArch64::BI__sevl:
5003 HintID = 5;
5004 break;
5005 }
5006
5007 if (HintID != static_cast<unsigned>(-1)) {
5008 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5009 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
5010 }
5011
5012 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5013 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5014 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5015 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
5016 }
5017
5018 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5019 // Create call to __arm_sme_state and store the results to the two pointers.
5020 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
5021 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
5022 isVarArg: false),
5023 Name: "__arm_sme_state"));
5024 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
5025 Kind: "aarch64_pstate_sm_compatible");
5026 CI->setAttributes(Attrs);
5027 CI->setCallingConv(
5028 llvm::CallingConv::
5029 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5030 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
5031 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
5032 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
5033 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
5034 }
5035
5036 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5037 assert((getContext().getTypeSize(E->getType()) == 32) &&
5038 "rbit of unusual size!");
5039 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5040 return Builder.CreateCall(
5041 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5042 }
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5044 assert((getContext().getTypeSize(E->getType()) == 64) &&
5045 "rbit of unusual size!");
5046 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5047 return Builder.CreateCall(
5048 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5049 }
5050
5051 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5052 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5053 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5054 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5055 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5056 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5057 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
5058 return Res;
5059 }
5060
5061 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5062 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5063 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5064 "cls");
5065 }
5066 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5067 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5068 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5069 "cls");
5070 }
5071
5072 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5073 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5074 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5075 llvm::Type *Ty = Arg->getType();
5076 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5077 Arg, "frint32z");
5078 }
5079
5080 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5081 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5082 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5083 llvm::Type *Ty = Arg->getType();
5084 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5085 Arg, "frint64z");
5086 }
5087
5088 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5089 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5090 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5091 llvm::Type *Ty = Arg->getType();
5092 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5093 Arg, "frint32x");
5094 }
5095
5096 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5097 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5098 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5099 llvm::Type *Ty = Arg->getType();
5100 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5101 Arg, "frint64x");
5102 }
5103
5104 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5105 assert((getContext().getTypeSize(E->getType()) == 32) &&
5106 "__jcvt of unusual size!");
5107 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5108 return Builder.CreateCall(
5109 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5110 }
5111
5112 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5113 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5114 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5115 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5116 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5117 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5118
5119 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5120 // Load from the address via an LLVM intrinsic, receiving a
5121 // tuple of 8 i64 words, and store each one to ValPtr.
5122 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5123 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
5124 llvm::Value *ToRet;
5125 for (size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5127 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5128 Address Addr =
5129 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5130 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
5131 }
5132 return ToRet;
5133 } else {
5134 // Load 8 i64 words from ValPtr, and store them to the address
5135 // via an LLVM intrinsic.
5136 SmallVector<llvm::Value *, 9> Args;
5137 Args.push_back(Elt: MemAddr);
5138 for (size_t i = 0; i < 8; i++) {
5139 llvm::Value *ValOffsetPtr =
5140 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5141 Address Addr =
5142 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5143 Args.push_back(Elt: Builder.CreateLoad(Addr));
5144 }
5145
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5147 ? Intrinsic::aarch64_st64b
5148 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5149 ? Intrinsic::aarch64_st64bv
5150 : Intrinsic::aarch64_st64bv0);
5151 Function *F = CGM.getIntrinsic(IID: Intr);
5152 return Builder.CreateCall(Callee: F, Args);
5153 }
5154 }
5155
5156 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5157 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5158
5159 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5160 ? Intrinsic::aarch64_rndr
5161 : Intrinsic::aarch64_rndrrs);
5162 Function *F = CGM.getIntrinsic(IID: Intr);
5163 llvm::Value *Val = Builder.CreateCall(Callee: F);
5164 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5165 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5166
5167 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5168 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
5169 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
5170 return Status;
5171 }
5172
5173 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5174 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5175 const FunctionDecl *FD = E->getDirectCallee();
5176 Value *Ops[2];
5177 for (unsigned i = 0; i < 2; i++)
5178 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
5179 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
5180 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
5181 StringRef Name = FD->getName();
5182 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
5183 }
5184
5185 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5186 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5187 getContext().getTypeSize(E->getType()) == 128) {
5188 Function *F =
5189 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5190 ? Intrinsic::aarch64_ldaxp
5191 : Intrinsic::aarch64_ldxp);
5192
5193 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
5194 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
5195
5196 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5197 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5198 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5199 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
5200 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
5201
5202 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
5203 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
5204 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
5205 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(E->getType()));
5206 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5207 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5208 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5209
5210 QualType Ty = E->getType();
5211 llvm::Type *RealResTy = ConvertType(T: Ty);
5212 llvm::Type *IntTy =
5213 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5214
5215 Function *F =
5216 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5217 ? Intrinsic::aarch64_ldaxr
5218 : Intrinsic::aarch64_ldxr,
5219 UnqualPtrTy);
5220 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
5221 Val->addParamAttr(
5222 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5223
5224 if (RealResTy->isPointerTy())
5225 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
5226
5227 llvm::Type *IntResTy = llvm::IntegerType::get(
5228 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
5229 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
5230 DestTy: RealResTy);
5231 }
5232
5233 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5234 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5235 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
5236 Function *F =
5237 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5238 ? Intrinsic::aarch64_stlxp
5239 : Intrinsic::aarch64_stxp);
5240 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
5241
5242 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
5243 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
5244
5245 Tmp = Tmp.withElementType(ElemTy: STy);
5246 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
5247
5248 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5249 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5250 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5251 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
5252 }
5253
5254 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5255 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5256 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
5257 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
5258
5259 QualType Ty = E->getArg(Arg: 0)->getType();
5260 llvm::Type *StoreTy =
5261 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5262
5263 if (StoreVal->getType()->isPointerTy())
5264 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
5265 else {
5266 llvm::Type *IntTy = llvm::IntegerType::get(
5267 C&: getLLVMContext(),
5268 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
5269 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
5270 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
5271 }
5272
5273 Function *F =
5274 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5275 ? Intrinsic::aarch64_stlxr
5276 : Intrinsic::aarch64_stxr,
5277 StoreAddr->getType());
5278 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
5279 CI->addParamAttr(
5280 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5281 return CI;
5282 }
5283
5284 if (BuiltinID == clang::AArch64::BI__getReg) {
5285 Expr::EvalResult Result;
5286 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5287 llvm_unreachable("Sema will ensure that the parameter is constant");
5288
5289 llvm::APSInt Value = Result.Val.getInt();
5290 LLVMContext &Context = CGM.getLLVMContext();
5291 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
5292
5293 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
5294 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5295 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5296
5297 llvm::Function *F =
5298 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5299 return Builder.CreateCall(Callee: F, Args: Metadata);
5300 }
5301
5302 if (BuiltinID == clang::AArch64::BI__break) {
5303 Expr::EvalResult Result;
5304 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5305 llvm_unreachable("Sema will ensure that the parameter is constant");
5306
5307 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5308 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5309 }
5310
5311 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5312 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5313 return Builder.CreateCall(Callee: F);
5314 }
5315
5316 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5317 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
5318 SSID: llvm::SyncScope::SingleThread);
5319
5320 // CRC32
5321 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5322 switch (BuiltinID) {
5323 case clang::AArch64::BI__builtin_arm_crc32b:
5324 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5325 case clang::AArch64::BI__builtin_arm_crc32cb:
5326 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5327 case clang::AArch64::BI__builtin_arm_crc32h:
5328 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5329 case clang::AArch64::BI__builtin_arm_crc32ch:
5330 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5331 case clang::AArch64::BI__builtin_arm_crc32w:
5332 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5333 case clang::AArch64::BI__builtin_arm_crc32cw:
5334 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5335 case clang::AArch64::BI__builtin_arm_crc32d:
5336 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5337 case clang::AArch64::BI__builtin_arm_crc32cd:
5338 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5339 }
5340
5341 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5342 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5343 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5344 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
5345
5346 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
5347 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
5348
5349 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
5350 }
5351
5352 // Memory Operations (MOPS)
5353 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5354 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
5355 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
5356 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
5357 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
5358 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
5359 return Builder.CreateCall(
5360 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5361 }
5362
5363 // Memory Tagging Extensions (MTE) Intrinsics
5364 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5365 switch (BuiltinID) {
5366 case clang::AArch64::BI__builtin_arm_irg:
5367 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5368 case clang::AArch64::BI__builtin_arm_addg:
5369 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5370 case clang::AArch64::BI__builtin_arm_gmi:
5371 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5372 case clang::AArch64::BI__builtin_arm_ldg:
5373 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5374 case clang::AArch64::BI__builtin_arm_stg:
5375 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5376 case clang::AArch64::BI__builtin_arm_subp:
5377 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5378 }
5379
5380 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5381 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5382 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5383 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
5384
5385 Mask = Builder.CreateZExt(V: Mask, DestTy: Int64Ty);
5386 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5387 Args: {Pointer, Mask});
5388 }
5389 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5390 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5391 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
5392
5393 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
5394 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5395 Args: {Pointer, TagOffset});
5396 }
5397 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5398 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5399 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
5400
5401 ExcludedMask = Builder.CreateZExt(V: ExcludedMask, DestTy: Int64Ty);
5402 return Builder.CreateCall(
5403 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, ExcludedMask});
5404 }
5405 // Although it is possible to supply a different return
5406 // address (first arg) to this intrinsic, for now we set
5407 // return address same as input address.
5408 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5409 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5410 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5411 Args: {TagAddress, TagAddress});
5412 }
5413 // Although it is possible to supply a different tag (to set)
5414 // to this intrinsic (as first arg), for now we supply
5415 // the tag that is in input address arg (common use case).
5416 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5417 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5418 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5419 Args: {TagAddress, TagAddress});
5420 }
5421 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5422 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
5423 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
5424 return Builder.CreateCall(
5425 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
5426 }
5427 }
5428
5429 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5430 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5431 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5432 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5434 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5435 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5437
5438 SpecialRegisterAccessKind AccessKind = Write;
5439 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5440 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5441 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5442 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5443 AccessKind = VolatileRead;
5444
5445 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5446 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5447
5448 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5449 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5450
5451 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5452 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5453
5454 llvm::Type *ValueType;
5455 llvm::Type *RegisterType = Int64Ty;
5456 if (Is32Bit) {
5457 ValueType = Int32Ty;
5458 } else if (Is128Bit) {
5459 llvm::Type *Int128Ty =
5460 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
5461 ValueType = Int128Ty;
5462 RegisterType = Int128Ty;
5463 } else if (IsPointerBuiltin) {
5464 ValueType = VoidPtrTy;
5465 } else {
5466 ValueType = Int64Ty;
5467 };
5468
5469 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
5470 AccessKind);
5471 }
5472
5473 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5474 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5475 LLVMContext &Context = CGM.getLLVMContext();
5476
5477 unsigned SysReg =
5478 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
5479
5480 std::string SysRegStr;
5481 llvm::raw_string_ostream(SysRegStr) <<
5482 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
5483 ((SysReg >> 11) & 7) << ":" <<
5484 ((SysReg >> 7) & 15) << ":" <<
5485 ((SysReg >> 3) & 15) << ":" <<
5486 ( SysReg & 7);
5487
5488 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
5489 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5490 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5491
5492 llvm::Type *RegisterType = Int64Ty;
5493 llvm::Type *Types[] = { RegisterType };
5494
5495 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5496 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5497
5498 return Builder.CreateCall(Callee: F, Args: Metadata);
5499 }
5500
5501 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5502 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5503
5504 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
5505 }
5506
5507 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5508 llvm::Function *F =
5509 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5510 return Builder.CreateCall(Callee: F);
5511 }
5512
5513 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5514 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5515 return Builder.CreateCall(Callee: F);
5516 }
5517
5518 if (BuiltinID == clang::AArch64::BI__mulh ||
5519 BuiltinID == clang::AArch64::BI__umulh) {
5520 llvm::Type *ResType = ConvertType(E->getType());
5521 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5522
5523 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5524 Value *LHS =
5525 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
5526 Value *RHS =
5527 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
5528
5529 Value *MulResult, *HigherBits;
5530 if (IsSigned) {
5531 MulResult = Builder.CreateNSWMul(LHS, RHS);
5532 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
5533 } else {
5534 MulResult = Builder.CreateNUWMul(LHS, RHS);
5535 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
5536 }
5537 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
5538
5539 return HigherBits;
5540 }
5541
5542 if (BuiltinID == AArch64::BI__writex18byte ||
5543 BuiltinID == AArch64::BI__writex18word ||
5544 BuiltinID == AArch64::BI__writex18dword ||
5545 BuiltinID == AArch64::BI__writex18qword) {
5546 // Process the args first
5547 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5548 Value *DataArg = EmitScalarExpr(E: E->getArg(Arg: 1));
5549
5550 // Read x18 as i8*
5551 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5552
5553 // Store val at x18 + offset
5554 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5555 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5556 StoreInst *Store =
5557 Builder.CreateAlignedStore(Val: DataArg, Addr: Ptr, Align: CharUnits::One());
5558 return Store;
5559 }
5560
5561 if (BuiltinID == AArch64::BI__readx18byte ||
5562 BuiltinID == AArch64::BI__readx18word ||
5563 BuiltinID == AArch64::BI__readx18dword ||
5564 BuiltinID == AArch64::BI__readx18qword) {
5565 // Process the args first
5566 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5567
5568 // Read x18 as i8*
5569 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5570
5571 // Load x18 + offset
5572 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5573 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5574 llvm::Type *IntTy = ConvertType(E->getType());
5575 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5576 return Load;
5577 }
5578
5579 if (BuiltinID == AArch64::BI__addx18byte ||
5580 BuiltinID == AArch64::BI__addx18word ||
5581 BuiltinID == AArch64::BI__addx18dword ||
5582 BuiltinID == AArch64::BI__addx18qword ||
5583 BuiltinID == AArch64::BI__incx18byte ||
5584 BuiltinID == AArch64::BI__incx18word ||
5585 BuiltinID == AArch64::BI__incx18dword ||
5586 BuiltinID == AArch64::BI__incx18qword) {
5587 llvm::Type *IntTy;
5588 bool isIncrement;
5589 switch (BuiltinID) {
5590 case AArch64::BI__incx18byte:
5591 IntTy = Int8Ty;
5592 isIncrement = true;
5593 break;
5594 case AArch64::BI__incx18word:
5595 IntTy = Int16Ty;
5596 isIncrement = true;
5597 break;
5598 case AArch64::BI__incx18dword:
5599 IntTy = Int32Ty;
5600 isIncrement = true;
5601 break;
5602 case AArch64::BI__incx18qword:
5603 IntTy = Int64Ty;
5604 isIncrement = true;
5605 break;
5606 default:
5607 IntTy = ConvertType(T: E->getArg(Arg: 1)->getType());
5608 isIncrement = false;
5609 break;
5610 }
5611 // Process the args first
5612 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5613 Value *ValToAdd =
5614 isIncrement ? ConstantInt::get(Ty: IntTy, V: 1) : EmitScalarExpr(E: E->getArg(Arg: 1));
5615
5616 // Read x18 as i8*
5617 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5618
5619 // Load x18 + offset
5620 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5621 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5622 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5623
5624 // Add values
5625 Value *AddResult = Builder.CreateAdd(LHS: Load, RHS: ValToAdd);
5626
5627 // Store val at x18 + offset
5628 StoreInst *Store =
5629 Builder.CreateAlignedStore(Val: AddResult, Addr: Ptr, Align: CharUnits::One());
5630 return Store;
5631 }
5632
5633 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5634 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5635 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5636 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5637 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5638 llvm::Type *RetTy = ConvertType(E->getType());
5639 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
5640 }
5641
5642 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5643 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5644 BuiltinID == AArch64::BI_CountLeadingZeros ||
5645 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5646 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5647 llvm::Type *ArgType = Arg->getType();
5648
5649 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5650 BuiltinID == AArch64::BI_CountLeadingOnes64)
5651 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
5652
5653 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5654 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5655
5656 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5657 BuiltinID == AArch64::BI_CountLeadingZeros64)
5658 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5659 return Result;
5660 }
5661
5662 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5663 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5664 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5665
5666 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5667 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5668 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5669
5670 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
5671 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5672 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5673 return Result;
5674 }
5675
5676 if (BuiltinID == AArch64::BI_CountOneBits ||
5677 BuiltinID == AArch64::BI_CountOneBits64) {
5678 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5679 llvm::Type *ArgType = ArgValue->getType();
5680 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5681
5682 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
5683 if (BuiltinID == AArch64::BI_CountOneBits64)
5684 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5685 return Result;
5686 }
5687
5688 if (BuiltinID == AArch64::BI__prefetch) {
5689 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5690 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
5691 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
5692 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
5693 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5694 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
5695 }
5696
5697 if (BuiltinID == AArch64::BI__hlt) {
5698 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5699 Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5700
5701 // Return 0 for convenience, even though MSVC returns some other undefined
5702 // value.
5703 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5704 }
5705
5706 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5707 return Builder.CreateFPTrunc(
5708 V: Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)),
5709 DestTy: Builder.getFloatTy()),
5710 DestTy: Builder.getBFloatTy());
5711
5712 // Handle MSVC intrinsics before argument evaluation to prevent double
5713 // evaluation.
5714 if (std::optional<MSVCIntrin> MsvcIntId =
5715 translateAarch64ToMsvcIntrin(BuiltinID))
5716 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
5717
5718 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5719 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5720 return P.first == BuiltinID;
5721 });
5722 if (It != end(NEONEquivalentIntrinsicMap))
5723 BuiltinID = It->second;
5724
5725 // Find out if any arguments are required to be integer constant
5726 // expressions.
5727 unsigned ICEArguments = 0;
5728 ASTContext::GetBuiltinTypeError Error;
5729 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
5730 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5731
5732 llvm::SmallVector<Value*, 4> Ops;
5733 Address PtrOp0 = Address::invalid();
5734 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5735 if (i == 0) {
5736 switch (BuiltinID) {
5737 case NEON::BI__builtin_neon_vld1_v:
5738 case NEON::BI__builtin_neon_vld1q_v:
5739 case NEON::BI__builtin_neon_vld1_dup_v:
5740 case NEON::BI__builtin_neon_vld1q_dup_v:
5741 case NEON::BI__builtin_neon_vld1_lane_v:
5742 case NEON::BI__builtin_neon_vld1q_lane_v:
5743 case NEON::BI__builtin_neon_vst1_v:
5744 case NEON::BI__builtin_neon_vst1q_v:
5745 case NEON::BI__builtin_neon_vst1_lane_v:
5746 case NEON::BI__builtin_neon_vst1q_lane_v:
5747 case NEON::BI__builtin_neon_vldap1_lane_s64:
5748 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5749 case NEON::BI__builtin_neon_vstl1_lane_s64:
5750 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5751 // Get the alignment for the argument in addition to the value;
5752 // we'll use it later.
5753 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5754 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
5755 continue;
5756 }
5757 }
5758 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
5759 }
5760
5761 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5762 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5763 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5764
5765 if (Builtin) {
5766 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: E->getNumArgs() - 1)));
5767 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
5768 assert(Result && "SISD intrinsic should have been handled");
5769 return Result;
5770 }
5771
5772 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
5773 NeonTypeFlags Type(0);
5774 if (std::optional<llvm::APSInt> Result =
5775 Arg->getIntegerConstantExpr(Ctx: getContext()))
5776 // Determine the type of this overloaded NEON intrinsic.
5777 Type = NeonTypeFlags(Result->getZExtValue());
5778
5779 bool usgn = Type.isUnsigned();
5780 bool quad = Type.isQuad();
5781
5782 // Handle non-overloaded intrinsics first.
5783 switch (BuiltinID) {
5784 default: break;
5785 case NEON::BI__builtin_neon_vabsh_f16:
5786 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5787 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5788 case NEON::BI__builtin_neon_vaddq_p128: {
5789 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
5790 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5791 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
5792 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
5793 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
5794 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5795 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
5796 }
5797 case NEON::BI__builtin_neon_vldrq_p128: {
5798 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5799 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
5800 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ptr,
5801 Align: CharUnits::fromQuantity(Quantity: 16));
5802 }
5803 case NEON::BI__builtin_neon_vstrq_p128: {
5804 Value *Ptr = Ops[0];
5805 return Builder.CreateDefaultAlignedStore(Val: EmitScalarExpr(E: E->getArg(Arg: 1)), Addr: Ptr);
5806 }
5807 case NEON::BI__builtin_neon_vcvts_f32_u32:
5808 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5809 usgn = true;
5810 [[fallthrough]];
5811 case NEON::BI__builtin_neon_vcvts_f32_s32:
5812 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5813 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5814 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5815 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5816 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5817 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5818 if (usgn)
5819 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5820 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5821 }
5822 case NEON::BI__builtin_neon_vcvth_f16_u16:
5823 case NEON::BI__builtin_neon_vcvth_f16_u32:
5824 case NEON::BI__builtin_neon_vcvth_f16_u64:
5825 usgn = true;
5826 [[fallthrough]];
5827 case NEON::BI__builtin_neon_vcvth_f16_s16:
5828 case NEON::BI__builtin_neon_vcvth_f16_s32:
5829 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5830 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5831 llvm::Type *FTy = HalfTy;
5832 llvm::Type *InTy;
5833 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5834 InTy = Int64Ty;
5835 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5836 InTy = Int32Ty;
5837 else
5838 InTy = Int16Ty;
5839 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5840 if (usgn)
5841 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5842 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5843 }
5844 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5845 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5846 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5847 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5848 case NEON::BI__builtin_neon_vcvth_u16_f16:
5849 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5850 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5851 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5852 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5853 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5854 unsigned Int;
5855 llvm::Type* InTy = Int32Ty;
5856 llvm::Type* FTy = HalfTy;
5857 llvm::Type *Tys[2] = {InTy, FTy};
5858 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5859 switch (BuiltinID) {
5860 default: llvm_unreachable("missing builtin ID in switch!");
5861 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5862 Int = Intrinsic::aarch64_neon_fcvtau; break;
5863 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5864 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5865 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5866 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5867 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5868 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5869 case NEON::BI__builtin_neon_vcvth_u16_f16:
5870 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5871 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5872 Int = Intrinsic::aarch64_neon_fcvtas; break;
5873 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5874 Int = Intrinsic::aarch64_neon_fcvtms; break;
5875 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5876 Int = Intrinsic::aarch64_neon_fcvtns; break;
5877 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5878 Int = Intrinsic::aarch64_neon_fcvtps; break;
5879 case NEON::BI__builtin_neon_vcvth_s16_f16:
5880 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5881 }
5882 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
5883 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5884 }
5885 case NEON::BI__builtin_neon_vcaleh_f16:
5886 case NEON::BI__builtin_neon_vcalth_f16:
5887 case NEON::BI__builtin_neon_vcageh_f16:
5888 case NEON::BI__builtin_neon_vcagth_f16: {
5889 unsigned Int;
5890 llvm::Type* InTy = Int32Ty;
5891 llvm::Type* FTy = HalfTy;
5892 llvm::Type *Tys[2] = {InTy, FTy};
5893 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5894 switch (BuiltinID) {
5895 default: llvm_unreachable("missing builtin ID in switch!");
5896 case NEON::BI__builtin_neon_vcageh_f16:
5897 Int = Intrinsic::aarch64_neon_facge; break;
5898 case NEON::BI__builtin_neon_vcagth_f16:
5899 Int = Intrinsic::aarch64_neon_facgt; break;
5900 case NEON::BI__builtin_neon_vcaleh_f16:
5901 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
5902 case NEON::BI__builtin_neon_vcalth_f16:
5903 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
5904 }
5905 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
5906 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5907 }
5908 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5909 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5910 unsigned Int;
5911 llvm::Type* InTy = Int32Ty;
5912 llvm::Type* FTy = HalfTy;
5913 llvm::Type *Tys[2] = {InTy, FTy};
5914 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5915 switch (BuiltinID) {
5916 default: llvm_unreachable("missing builtin ID in switch!");
5917 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5918 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5919 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5920 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5921 }
5922 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5923 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5924 }
5925 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5926 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5927 unsigned Int;
5928 llvm::Type* FTy = HalfTy;
5929 llvm::Type* InTy = Int32Ty;
5930 llvm::Type *Tys[2] = {FTy, InTy};
5931 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5932 switch (BuiltinID) {
5933 default: llvm_unreachable("missing builtin ID in switch!");
5934 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5935 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5936 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
5937 break;
5938 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5939 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5940 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
5941 break;
5942 }
5943 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5944 }
5945 case NEON::BI__builtin_neon_vpaddd_s64: {
5946 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
5947 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5948 // The vector is v2f64, so make sure it's bitcast to that.
5949 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2i64");
5950 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5951 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5952 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5953 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5954 // Pairwise addition of a v2f64 into a scalar f64.
5955 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
5956 }
5957 case NEON::BI__builtin_neon_vpaddd_f64: {
5958 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
5959 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5960 // The vector is v2f64, so make sure it's bitcast to that.
5961 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f64");
5962 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5963 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5964 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5965 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5966 // Pairwise addition of a v2f64 into a scalar f64.
5967 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5968 }
5969 case NEON::BI__builtin_neon_vpadds_f32: {
5970 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
5971 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5972 // The vector is v2f32, so make sure it's bitcast to that.
5973 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f32");
5974 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5975 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5976 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5977 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5978 // Pairwise addition of a v2f32 into a scalar f32.
5979 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5980 }
5981 case NEON::BI__builtin_neon_vceqzd_s64:
5982 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5983 return EmitAArch64CompareBuiltinExpr(
5984 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5985 Pred: ICmpInst::ICMP_EQ, Name: "vceqz");
5986 case NEON::BI__builtin_neon_vceqzd_f64:
5987 case NEON::BI__builtin_neon_vceqzs_f32:
5988 case NEON::BI__builtin_neon_vceqzh_f16:
5989 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5990 return EmitAArch64CompareBuiltinExpr(
5991 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5992 Pred: ICmpInst::FCMP_OEQ, Name: "vceqz");
5993 case NEON::BI__builtin_neon_vcgezd_s64:
5994 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5995 return EmitAArch64CompareBuiltinExpr(
5996 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5997 Pred: ICmpInst::ICMP_SGE, Name: "vcgez");
5998 case NEON::BI__builtin_neon_vcgezd_f64:
5999 case NEON::BI__builtin_neon_vcgezs_f32:
6000 case NEON::BI__builtin_neon_vcgezh_f16:
6001 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6002 return EmitAArch64CompareBuiltinExpr(
6003 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6004 Pred: ICmpInst::FCMP_OGE, Name: "vcgez");
6005 case NEON::BI__builtin_neon_vclezd_s64:
6006 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6007 return EmitAArch64CompareBuiltinExpr(
6008 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6009 Pred: ICmpInst::ICMP_SLE, Name: "vclez");
6010 case NEON::BI__builtin_neon_vclezd_f64:
6011 case NEON::BI__builtin_neon_vclezs_f32:
6012 case NEON::BI__builtin_neon_vclezh_f16:
6013 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6014 return EmitAArch64CompareBuiltinExpr(
6015 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6016 Pred: ICmpInst::FCMP_OLE, Name: "vclez");
6017 case NEON::BI__builtin_neon_vcgtzd_s64:
6018 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6019 return EmitAArch64CompareBuiltinExpr(
6020 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6021 Pred: ICmpInst::ICMP_SGT, Name: "vcgtz");
6022 case NEON::BI__builtin_neon_vcgtzd_f64:
6023 case NEON::BI__builtin_neon_vcgtzs_f32:
6024 case NEON::BI__builtin_neon_vcgtzh_f16:
6025 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6026 return EmitAArch64CompareBuiltinExpr(
6027 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6028 Pred: ICmpInst::FCMP_OGT, Name: "vcgtz");
6029 case NEON::BI__builtin_neon_vcltzd_s64:
6030 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6031 return EmitAArch64CompareBuiltinExpr(
6032 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6033 Pred: ICmpInst::ICMP_SLT, Name: "vcltz");
6034
6035 case NEON::BI__builtin_neon_vcltzd_f64:
6036 case NEON::BI__builtin_neon_vcltzs_f32:
6037 case NEON::BI__builtin_neon_vcltzh_f16:
6038 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6039 return EmitAArch64CompareBuiltinExpr(
6040 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6041 Pred: ICmpInst::FCMP_OLT, Name: "vcltz");
6042
6043 case NEON::BI__builtin_neon_vceqzd_u64: {
6044 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6045 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6046 Ops[0] =
6047 Builder.CreateICmpEQ(LHS: Ops[0], RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6048 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqzd");
6049 }
6050 case NEON::BI__builtin_neon_vceqd_f64:
6051 case NEON::BI__builtin_neon_vcled_f64:
6052 case NEON::BI__builtin_neon_vcltd_f64:
6053 case NEON::BI__builtin_neon_vcged_f64:
6054 case NEON::BI__builtin_neon_vcgtd_f64: {
6055 llvm::CmpInst::Predicate P;
6056 switch (BuiltinID) {
6057 default: llvm_unreachable("missing builtin ID in switch!");
6058 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6059 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6060 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6061 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6062 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6063 }
6064 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6065 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6066 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6067 if (P == llvm::FCmpInst::FCMP_OEQ)
6068 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6069 else
6070 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6071 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
6072 }
6073 case NEON::BI__builtin_neon_vceqs_f32:
6074 case NEON::BI__builtin_neon_vcles_f32:
6075 case NEON::BI__builtin_neon_vclts_f32:
6076 case NEON::BI__builtin_neon_vcges_f32:
6077 case NEON::BI__builtin_neon_vcgts_f32: {
6078 llvm::CmpInst::Predicate P;
6079 switch (BuiltinID) {
6080 default: llvm_unreachable("missing builtin ID in switch!");
6081 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6082 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6083 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6084 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6085 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6086 }
6087 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6088 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
6089 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
6090 if (P == llvm::FCmpInst::FCMP_OEQ)
6091 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6092 else
6093 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6094 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
6095 }
6096 case NEON::BI__builtin_neon_vceqh_f16:
6097 case NEON::BI__builtin_neon_vcleh_f16:
6098 case NEON::BI__builtin_neon_vclth_f16:
6099 case NEON::BI__builtin_neon_vcgeh_f16:
6100 case NEON::BI__builtin_neon_vcgth_f16: {
6101 llvm::CmpInst::Predicate P;
6102 switch (BuiltinID) {
6103 default: llvm_unreachable("missing builtin ID in switch!");
6104 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6105 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6106 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6107 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6108 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6109 }
6110 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6111 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
6112 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
6113 if (P == llvm::FCmpInst::FCMP_OEQ)
6114 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6115 else
6116 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6117 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
6118 }
6119 case NEON::BI__builtin_neon_vceqd_s64:
6120 case NEON::BI__builtin_neon_vceqd_u64:
6121 case NEON::BI__builtin_neon_vcgtd_s64:
6122 case NEON::BI__builtin_neon_vcgtd_u64:
6123 case NEON::BI__builtin_neon_vcltd_s64:
6124 case NEON::BI__builtin_neon_vcltd_u64:
6125 case NEON::BI__builtin_neon_vcged_u64:
6126 case NEON::BI__builtin_neon_vcged_s64:
6127 case NEON::BI__builtin_neon_vcled_u64:
6128 case NEON::BI__builtin_neon_vcled_s64: {
6129 llvm::CmpInst::Predicate P;
6130 switch (BuiltinID) {
6131 default: llvm_unreachable("missing builtin ID in switch!");
6132 case NEON::BI__builtin_neon_vceqd_s64:
6133 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6134 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6135 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6136 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6137 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6138 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6139 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6140 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6141 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6142 }
6143 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6144 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6145 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6146 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
6147 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
6148 }
6149 case NEON::BI__builtin_neon_vtstd_s64:
6150 case NEON::BI__builtin_neon_vtstd_u64: {
6151 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6152 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6153 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6154 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
6155 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
6156 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6157 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
6158 }
6159 case NEON::BI__builtin_neon_vset_lane_i8:
6160 case NEON::BI__builtin_neon_vset_lane_i16:
6161 case NEON::BI__builtin_neon_vset_lane_i32:
6162 case NEON::BI__builtin_neon_vset_lane_i64:
6163 case NEON::BI__builtin_neon_vset_lane_bf16:
6164 case NEON::BI__builtin_neon_vset_lane_f32:
6165 case NEON::BI__builtin_neon_vsetq_lane_i8:
6166 case NEON::BI__builtin_neon_vsetq_lane_i16:
6167 case NEON::BI__builtin_neon_vsetq_lane_i32:
6168 case NEON::BI__builtin_neon_vsetq_lane_i64:
6169 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6170 case NEON::BI__builtin_neon_vsetq_lane_f32:
6171 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6172 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6173 case NEON::BI__builtin_neon_vset_lane_f64:
6174 // The vector type needs a cast for the v1f64 variant.
6175 Ops[1] =
6176 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6177 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6178 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6179 case NEON::BI__builtin_neon_vset_lane_mf8:
6180 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6181 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6182 // The input vector type needs a cast to scalar type.
6183 Ops[0] =
6184 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::Type::getInt8Ty(C&: getLLVMContext()));
6185 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6186 case NEON::BI__builtin_neon_vsetq_lane_f64:
6187 // The vector type needs a cast for the v2f64 variant.
6188 Ops[1] =
6189 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6190 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6191 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6192
6193 case NEON::BI__builtin_neon_vget_lane_i8:
6194 case NEON::BI__builtin_neon_vdupb_lane_i8:
6195 Ops[0] =
6196 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8));
6197 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6198 Name: "vget_lane");
6199 case NEON::BI__builtin_neon_vgetq_lane_i8:
6200 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6201 Ops[0] =
6202 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16));
6203 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6204 Name: "vgetq_lane");
6205 case NEON::BI__builtin_neon_vget_lane_mf8:
6206 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6207 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6208 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6209 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6210 Name: "vget_lane");
6211 case NEON::BI__builtin_neon_vget_lane_i16:
6212 case NEON::BI__builtin_neon_vduph_lane_i16:
6213 Ops[0] =
6214 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4));
6215 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6216 Name: "vget_lane");
6217 case NEON::BI__builtin_neon_vgetq_lane_i16:
6218 case NEON::BI__builtin_neon_vduph_laneq_i16:
6219 Ops[0] =
6220 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8));
6221 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6222 Name: "vgetq_lane");
6223 case NEON::BI__builtin_neon_vget_lane_i32:
6224 case NEON::BI__builtin_neon_vdups_lane_i32:
6225 Ops[0] =
6226 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 2));
6227 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6228 Name: "vget_lane");
6229 case NEON::BI__builtin_neon_vdups_lane_f32:
6230 Ops[0] =
6231 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6232 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6233 Name: "vdups_lane");
6234 case NEON::BI__builtin_neon_vgetq_lane_i32:
6235 case NEON::BI__builtin_neon_vdups_laneq_i32:
6236 Ops[0] =
6237 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
6238 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6239 Name: "vgetq_lane");
6240 case NEON::BI__builtin_neon_vget_lane_i64:
6241 case NEON::BI__builtin_neon_vdupd_lane_i64:
6242 Ops[0] =
6243 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 1));
6244 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6245 Name: "vget_lane");
6246 case NEON::BI__builtin_neon_vdupd_lane_f64:
6247 Ops[0] =
6248 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6249 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6250 Name: "vdupd_lane");
6251 case NEON::BI__builtin_neon_vgetq_lane_i64:
6252 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6253 Ops[0] =
6254 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
6255 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6256 Name: "vgetq_lane");
6257 case NEON::BI__builtin_neon_vget_lane_f32:
6258 Ops[0] =
6259 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6260 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6261 Name: "vget_lane");
6262 case NEON::BI__builtin_neon_vget_lane_f64:
6263 Ops[0] =
6264 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6265 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6266 Name: "vget_lane");
6267 case NEON::BI__builtin_neon_vgetq_lane_f32:
6268 case NEON::BI__builtin_neon_vdups_laneq_f32:
6269 Ops[0] =
6270 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4));
6271 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6272 Name: "vgetq_lane");
6273 case NEON::BI__builtin_neon_vgetq_lane_f64:
6274 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6275 Ops[0] =
6276 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6277 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6278 Name: "vgetq_lane");
6279 case NEON::BI__builtin_neon_vaddh_f16:
6280 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6281 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
6282 case NEON::BI__builtin_neon_vsubh_f16:
6283 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6284 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
6285 case NEON::BI__builtin_neon_vmulh_f16:
6286 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6287 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
6288 case NEON::BI__builtin_neon_vdivh_f16:
6289 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6290 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
6291 case NEON::BI__builtin_neon_vfmah_f16:
6292 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6293 return emitCallMaybeConstrainedFPBuiltin(
6294 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6295 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6296 case NEON::BI__builtin_neon_vfmsh_f16: {
6297 Value* Neg = Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubh");
6298
6299 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6300 return emitCallMaybeConstrainedFPBuiltin(
6301 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6302 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6303 }
6304 case NEON::BI__builtin_neon_vaddd_s64:
6305 case NEON::BI__builtin_neon_vaddd_u64:
6306 return Builder.CreateAdd(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vaddd");
6307 case NEON::BI__builtin_neon_vsubd_s64:
6308 case NEON::BI__builtin_neon_vsubd_u64:
6309 return Builder.CreateSub(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubd");
6310 case NEON::BI__builtin_neon_vqdmlalh_s16:
6311 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6312 SmallVector<Value *, 2> ProductOps;
6313 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6314 ProductOps.push_back(Elt: vectorWrapScalar16(Op: EmitScalarExpr(E: E->getArg(Arg: 2))));
6315 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6316 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6317 ProductOps, "vqdmlXl");
6318 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6319 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6320
6321 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6322 ? Intrinsic::aarch64_neon_sqadd
6323 : Intrinsic::aarch64_neon_sqsub;
6324 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6325 }
6326 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6327 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6328 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6330 Ops, "vqshlu_n");
6331 }
6332 case NEON::BI__builtin_neon_vqshld_n_u64:
6333 case NEON::BI__builtin_neon_vqshld_n_s64: {
6334 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6335 ? Intrinsic::aarch64_neon_uqshl
6336 : Intrinsic::aarch64_neon_sqshl;
6337 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6338 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6339 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
6340 }
6341 case NEON::BI__builtin_neon_vrshrd_n_u64:
6342 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6343 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6344 ? Intrinsic::aarch64_neon_urshl
6345 : Intrinsic::aarch64_neon_srshl;
6346 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6347 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
6348 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
6349 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
6350 }
6351 case NEON::BI__builtin_neon_vrsrad_n_u64:
6352 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6353 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6354 ? Intrinsic::aarch64_neon_urshl
6355 : Intrinsic::aarch64_neon_srshl;
6356 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6357 Ops.push_back(Elt: Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 2))));
6358 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
6359 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
6360 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
6361 }
6362 case NEON::BI__builtin_neon_vshld_n_s64:
6363 case NEON::BI__builtin_neon_vshld_n_u64: {
6364 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6365 return Builder.CreateShl(
6366 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
6367 }
6368 case NEON::BI__builtin_neon_vshrd_n_s64: {
6369 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6370 return Builder.CreateAShr(
6371 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6372 b: Amt->getZExtValue())),
6373 Name: "shrd_n");
6374 }
6375 case NEON::BI__builtin_neon_vshrd_n_u64: {
6376 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6377 uint64_t ShiftAmt = Amt->getZExtValue();
6378 // Right-shifting an unsigned value by its size yields 0.
6379 if (ShiftAmt == 64)
6380 return ConstantInt::get(Ty: Int64Ty, V: 0);
6381 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6382 Name: "shrd_n");
6383 }
6384 case NEON::BI__builtin_neon_vsrad_n_s64: {
6385 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6386 Ops[1] = Builder.CreateAShr(
6387 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6388 b: Amt->getZExtValue())),
6389 Name: "shrd_n");
6390 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6391 }
6392 case NEON::BI__builtin_neon_vsrad_n_u64: {
6393 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6394 uint64_t ShiftAmt = Amt->getZExtValue();
6395 // Right-shifting an unsigned value by its size yields 0.
6396 // As Op + 0 = Op, return Ops[0] directly.
6397 if (ShiftAmt == 64)
6398 return Ops[0];
6399 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6400 Name: "shrd_n");
6401 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6402 }
6403 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6404 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6405 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6406 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6407 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
6408 Name: "lane");
6409 SmallVector<Value *, 2> ProductOps;
6410 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6411 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6412 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6413 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6414 ProductOps, "vqdmlXl");
6415 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6416 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6417 Ops.pop_back();
6418
6419 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6420 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6421 ? Intrinsic::aarch64_neon_sqadd
6422 : Intrinsic::aarch64_neon_sqsub;
6423 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6424 }
6425 case NEON::BI__builtin_neon_vqdmlals_s32:
6426 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6427 SmallVector<Value *, 2> ProductOps;
6428 ProductOps.push_back(Elt: Ops[1]);
6429 ProductOps.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6430 Ops[1] =
6431 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6432 ProductOps, "vqdmlXl");
6433
6434 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6435 ? Intrinsic::aarch64_neon_sqadd
6436 : Intrinsic::aarch64_neon_sqsub;
6437 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6438 }
6439 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6440 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6441 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6442 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6443 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
6444 Name: "lane");
6445 SmallVector<Value *, 2> ProductOps;
6446 ProductOps.push_back(Elt: Ops[1]);
6447 ProductOps.push_back(Elt: Ops[2]);
6448 Ops[1] =
6449 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6450 ProductOps, "vqdmlXl");
6451 Ops.pop_back();
6452
6453 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6454 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6455 ? Intrinsic::aarch64_neon_sqadd
6456 : Intrinsic::aarch64_neon_sqsub;
6457 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6458 }
6459 case NEON::BI__builtin_neon_vget_lane_bf16:
6460 case NEON::BI__builtin_neon_vduph_lane_bf16:
6461 case NEON::BI__builtin_neon_vduph_lane_f16: {
6462 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6463 Name: "vget_lane");
6464 }
6465 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6466 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6467 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6468 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6469 Name: "vgetq_lane");
6470 }
6471 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6472 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6473 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6474 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6475 }
6476 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6477 SmallVector<int, 16> ConcatMask(8);
6478 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6479 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6480 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6481 llvm::Value *Trunc =
6482 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6483 return Builder.CreateShuffleVector(
6484 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
6485 }
6486 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6487 SmallVector<int, 16> ConcatMask(8);
6488 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6489 SmallVector<int, 16> LoMask(4);
6490 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
6491 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6492 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6493 llvm::Type *V8BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 8);
6494 llvm::Value *Inactive = Builder.CreateShuffleVector(
6495 V: Builder.CreateBitCast(V: Ops[0], DestTy: V8BF16), Mask: LoMask);
6496 llvm::Value *Trunc =
6497 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[1], DestTy: V4F32), DestTy: V4BF16);
6498 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
6499 }
6500
6501 case clang::AArch64::BI_InterlockedAdd:
6502 case clang::AArch64::BI_InterlockedAdd64: {
6503 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
6504 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
6505 AtomicRMWInst *RMWI =
6506 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val,
6507 Ordering: llvm::AtomicOrdering::SequentiallyConsistent);
6508 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
6509 }
6510 }
6511
6512 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
6513 llvm::Type *Ty = VTy;
6514 if (!Ty)
6515 return nullptr;
6516
6517 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6518 // defer to common code if it's been added to our special map.
6519 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6520 AArch64SIMDIntrinsicsProvenSorted);
6521
6522 if (Builtin)
6523 return EmitCommonNeonBuiltinExpr(
6524 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
6525 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
6526 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
6527
6528 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
6529 return V;
6530
6531 unsigned Int;
6532 bool ExtractLow = false;
6533 bool ExtendLaneArg = false;
6534 switch (BuiltinID) {
6535 default: return nullptr;
6536 case NEON::BI__builtin_neon_vbsl_v:
6537 case NEON::BI__builtin_neon_vbslq_v: {
6538 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6539 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
6540 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
6541 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
6542
6543 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
6544 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
6545 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
6546 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6547 }
6548 case NEON::BI__builtin_neon_vfma_lane_v:
6549 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6550 // The ARM builtins (and instructions) have the addend as the first
6551 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6552 Value *Addend = Ops[0];
6553 Value *Multiplicand = Ops[1];
6554 Value *LaneSource = Ops[2];
6555 Ops[0] = Multiplicand;
6556 Ops[1] = LaneSource;
6557 Ops[2] = Addend;
6558
6559 // Now adjust things to handle the lane access.
6560 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6561 ? llvm::FixedVectorType::get(VTy->getElementType(),
6562 VTy->getNumElements() / 2)
6563 : VTy;
6564 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
6565 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
6566 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
6567 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
6568
6569 Ops.pop_back();
6570 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6571 : Intrinsic::fma;
6572 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
6573 }
6574 case NEON::BI__builtin_neon_vfma_laneq_v: {
6575 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
6576 // v1f64 fma should be mapped to Neon scalar f64 fma
6577 if (VTy && VTy->getElementType() == DoubleTy) {
6578 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6579 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6580 llvm::FixedVectorType *VTy =
6581 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6582 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
6583 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6584 Value *Result;
6585 Result = emitCallMaybeConstrainedFPBuiltin(
6586 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6587 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6588 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6589 }
6590 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6591 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6592
6593 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6594 NumElts: VTy->getNumElements() * 2);
6595 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
6596 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
6597 Elt: cast<ConstantInt>(Val: Ops[3]));
6598 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
6599
6600 return emitCallMaybeConstrainedFPBuiltin(
6601 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6602 {Ops[2], Ops[1], Ops[0]});
6603 }
6604 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6605 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6606 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6607
6608 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6609 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
6610 return emitCallMaybeConstrainedFPBuiltin(
6611 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6612 {Ops[2], Ops[1], Ops[0]});
6613 }
6614 case NEON::BI__builtin_neon_vfmah_lane_f16:
6615 case NEON::BI__builtin_neon_vfmas_lane_f32:
6616 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6617 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6618 case NEON::BI__builtin_neon_vfmad_lane_f64:
6619 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6620 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 3)));
6621 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
6622 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6623 return emitCallMaybeConstrainedFPBuiltin(
6624 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6625 {Ops[1], Ops[2], Ops[0]});
6626 }
6627 case NEON::BI__builtin_neon_vmull_v:
6628 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6629 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6630 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6631 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
6632 case NEON::BI__builtin_neon_vmax_v:
6633 case NEON::BI__builtin_neon_vmaxq_v:
6634 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6635 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6636 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6637 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
6638 case NEON::BI__builtin_neon_vmaxh_f16: {
6639 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6640 Int = Intrinsic::aarch64_neon_fmax;
6641 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
6642 }
6643 case NEON::BI__builtin_neon_vmin_v:
6644 case NEON::BI__builtin_neon_vminq_v:
6645 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6646 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6647 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6648 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
6649 case NEON::BI__builtin_neon_vminh_f16: {
6650 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6651 Int = Intrinsic::aarch64_neon_fmin;
6652 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
6653 }
6654 case NEON::BI__builtin_neon_vabd_v:
6655 case NEON::BI__builtin_neon_vabdq_v:
6656 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6657 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6658 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6659 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
6660 case NEON::BI__builtin_neon_vpadal_v:
6661 case NEON::BI__builtin_neon_vpadalq_v: {
6662 unsigned ArgElts = VTy->getNumElements();
6663 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
6664 unsigned BitWidth = EltTy->getBitWidth();
6665 auto *ArgTy = llvm::FixedVectorType::get(
6666 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
6667 llvm::Type* Tys[2] = { VTy, ArgTy };
6668 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6669 SmallVector<llvm::Value*, 1> TmpOps;
6670 TmpOps.push_back(Elt: Ops[1]);
6671 Function *F = CGM.getIntrinsic(IID: Int, Tys);
6672 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
6673 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
6674 return Builder.CreateAdd(LHS: tmp, RHS: addend);
6675 }
6676 case NEON::BI__builtin_neon_vpmin_v:
6677 case NEON::BI__builtin_neon_vpminq_v:
6678 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6679 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6680 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6681 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
6682 case NEON::BI__builtin_neon_vpmax_v:
6683 case NEON::BI__builtin_neon_vpmaxq_v:
6684 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6685 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6686 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6687 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
6688 case NEON::BI__builtin_neon_vminnm_v:
6689 case NEON::BI__builtin_neon_vminnmq_v:
6690 Int = Intrinsic::aarch64_neon_fminnm;
6691 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
6692 case NEON::BI__builtin_neon_vminnmh_f16:
6693 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6694 Int = Intrinsic::aarch64_neon_fminnm;
6695 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
6696 case NEON::BI__builtin_neon_vmaxnm_v:
6697 case NEON::BI__builtin_neon_vmaxnmq_v:
6698 Int = Intrinsic::aarch64_neon_fmaxnm;
6699 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
6700 case NEON::BI__builtin_neon_vmaxnmh_f16:
6701 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6702 Int = Intrinsic::aarch64_neon_fmaxnm;
6703 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
6704 case NEON::BI__builtin_neon_vrecpss_f32: {
6705 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6707 Ops, "vrecps");
6708 }
6709 case NEON::BI__builtin_neon_vrecpsd_f64:
6710 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6711 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6712 Ops, "vrecps");
6713 case NEON::BI__builtin_neon_vrecpsh_f16:
6714 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6715 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6716 Ops, "vrecps");
6717 case NEON::BI__builtin_neon_vqshrun_n_v:
6718 Int = Intrinsic::aarch64_neon_sqshrun;
6719 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
6720 case NEON::BI__builtin_neon_vqrshrun_n_v:
6721 Int = Intrinsic::aarch64_neon_sqrshrun;
6722 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
6723 case NEON::BI__builtin_neon_vqshrn_n_v:
6724 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6725 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
6726 case NEON::BI__builtin_neon_vrshrn_n_v:
6727 Int = Intrinsic::aarch64_neon_rshrn;
6728 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
6729 case NEON::BI__builtin_neon_vqrshrn_n_v:
6730 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6731 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
6732 case NEON::BI__builtin_neon_vrndah_f16: {
6733 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6734 Int = Builder.getIsFPConstrained()
6735 ? Intrinsic::experimental_constrained_round
6736 : Intrinsic::round;
6737 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
6738 }
6739 case NEON::BI__builtin_neon_vrnda_v:
6740 case NEON::BI__builtin_neon_vrndaq_v: {
6741 Int = Builder.getIsFPConstrained()
6742 ? Intrinsic::experimental_constrained_round
6743 : Intrinsic::round;
6744 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
6745 }
6746 case NEON::BI__builtin_neon_vrndih_f16: {
6747 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6748 Int = Builder.getIsFPConstrained()
6749 ? Intrinsic::experimental_constrained_nearbyint
6750 : Intrinsic::nearbyint;
6751 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
6752 }
6753 case NEON::BI__builtin_neon_vrndmh_f16: {
6754 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6755 Int = Builder.getIsFPConstrained()
6756 ? Intrinsic::experimental_constrained_floor
6757 : Intrinsic::floor;
6758 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
6759 }
6760 case NEON::BI__builtin_neon_vrndm_v:
6761 case NEON::BI__builtin_neon_vrndmq_v: {
6762 Int = Builder.getIsFPConstrained()
6763 ? Intrinsic::experimental_constrained_floor
6764 : Intrinsic::floor;
6765 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
6766 }
6767 case NEON::BI__builtin_neon_vrndnh_f16: {
6768 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6769 Int = Builder.getIsFPConstrained()
6770 ? Intrinsic::experimental_constrained_roundeven
6771 : Intrinsic::roundeven;
6772 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
6773 }
6774 case NEON::BI__builtin_neon_vrndn_v:
6775 case NEON::BI__builtin_neon_vrndnq_v: {
6776 Int = Builder.getIsFPConstrained()
6777 ? Intrinsic::experimental_constrained_roundeven
6778 : Intrinsic::roundeven;
6779 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
6780 }
6781 case NEON::BI__builtin_neon_vrndns_f32: {
6782 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6783 Int = Builder.getIsFPConstrained()
6784 ? Intrinsic::experimental_constrained_roundeven
6785 : Intrinsic::roundeven;
6786 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
6787 }
6788 case NEON::BI__builtin_neon_vrndph_f16: {
6789 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6790 Int = Builder.getIsFPConstrained()
6791 ? Intrinsic::experimental_constrained_ceil
6792 : Intrinsic::ceil;
6793 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
6794 }
6795 case NEON::BI__builtin_neon_vrndp_v:
6796 case NEON::BI__builtin_neon_vrndpq_v: {
6797 Int = Builder.getIsFPConstrained()
6798 ? Intrinsic::experimental_constrained_ceil
6799 : Intrinsic::ceil;
6800 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
6801 }
6802 case NEON::BI__builtin_neon_vrndxh_f16: {
6803 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6804 Int = Builder.getIsFPConstrained()
6805 ? Intrinsic::experimental_constrained_rint
6806 : Intrinsic::rint;
6807 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
6808 }
6809 case NEON::BI__builtin_neon_vrndx_v:
6810 case NEON::BI__builtin_neon_vrndxq_v: {
6811 Int = Builder.getIsFPConstrained()
6812 ? Intrinsic::experimental_constrained_rint
6813 : Intrinsic::rint;
6814 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
6815 }
6816 case NEON::BI__builtin_neon_vrndh_f16: {
6817 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6818 Int = Builder.getIsFPConstrained()
6819 ? Intrinsic::experimental_constrained_trunc
6820 : Intrinsic::trunc;
6821 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
6822 }
6823 case NEON::BI__builtin_neon_vrnd32x_f32:
6824 case NEON::BI__builtin_neon_vrnd32xq_f32:
6825 case NEON::BI__builtin_neon_vrnd32x_f64:
6826 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6827 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6828 Int = Intrinsic::aarch64_neon_frint32x;
6829 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32x");
6830 }
6831 case NEON::BI__builtin_neon_vrnd32z_f32:
6832 case NEON::BI__builtin_neon_vrnd32zq_f32:
6833 case NEON::BI__builtin_neon_vrnd32z_f64:
6834 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6835 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6836 Int = Intrinsic::aarch64_neon_frint32z;
6837 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32z");
6838 }
6839 case NEON::BI__builtin_neon_vrnd64x_f32:
6840 case NEON::BI__builtin_neon_vrnd64xq_f32:
6841 case NEON::BI__builtin_neon_vrnd64x_f64:
6842 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6843 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6844 Int = Intrinsic::aarch64_neon_frint64x;
6845 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64x");
6846 }
6847 case NEON::BI__builtin_neon_vrnd64z_f32:
6848 case NEON::BI__builtin_neon_vrnd64zq_f32:
6849 case NEON::BI__builtin_neon_vrnd64z_f64:
6850 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6851 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6852 Int = Intrinsic::aarch64_neon_frint64z;
6853 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64z");
6854 }
6855 case NEON::BI__builtin_neon_vrnd_v:
6856 case NEON::BI__builtin_neon_vrndq_v: {
6857 Int = Builder.getIsFPConstrained()
6858 ? Intrinsic::experimental_constrained_trunc
6859 : Intrinsic::trunc;
6860 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
6861 }
6862 case NEON::BI__builtin_neon_vcvt_f64_v:
6863 case NEON::BI__builtin_neon_vcvtq_f64_v:
6864 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6865 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6866 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
6867 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
6868 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6869 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6870 "unexpected vcvt_f64_f32 builtin");
6871 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6872 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6873
6874 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
6875 }
6876 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6877 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6878 "unexpected vcvt_f32_f64 builtin");
6879 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6880 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6881
6882 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
6883 }
6884 case NEON::BI__builtin_neon_vcvt_s32_v:
6885 case NEON::BI__builtin_neon_vcvt_u32_v:
6886 case NEON::BI__builtin_neon_vcvt_s64_v:
6887 case NEON::BI__builtin_neon_vcvt_u64_v:
6888 case NEON::BI__builtin_neon_vcvt_s16_f16:
6889 case NEON::BI__builtin_neon_vcvt_u16_f16:
6890 case NEON::BI__builtin_neon_vcvtq_s32_v:
6891 case NEON::BI__builtin_neon_vcvtq_u32_v:
6892 case NEON::BI__builtin_neon_vcvtq_s64_v:
6893 case NEON::BI__builtin_neon_vcvtq_u64_v:
6894 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6895 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6896 Int =
6897 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6898 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type)};
6899 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
6900 }
6901 case NEON::BI__builtin_neon_vcvta_s16_f16:
6902 case NEON::BI__builtin_neon_vcvta_u16_f16:
6903 case NEON::BI__builtin_neon_vcvta_s32_v:
6904 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6905 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6906 case NEON::BI__builtin_neon_vcvta_u32_v:
6907 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6908 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6909 case NEON::BI__builtin_neon_vcvta_s64_v:
6910 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6911 case NEON::BI__builtin_neon_vcvta_u64_v:
6912 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6913 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6914 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6915 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
6916 }
6917 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6918 case NEON::BI__builtin_neon_vcvtm_s32_v:
6919 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6920 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6921 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6922 case NEON::BI__builtin_neon_vcvtm_u32_v:
6923 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6924 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6925 case NEON::BI__builtin_neon_vcvtm_s64_v:
6926 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6927 case NEON::BI__builtin_neon_vcvtm_u64_v:
6928 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6929 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6931 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
6932 }
6933 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6934 case NEON::BI__builtin_neon_vcvtn_s32_v:
6935 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6936 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6937 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6938 case NEON::BI__builtin_neon_vcvtn_u32_v:
6939 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6940 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6941 case NEON::BI__builtin_neon_vcvtn_s64_v:
6942 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6943 case NEON::BI__builtin_neon_vcvtn_u64_v:
6944 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6945 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6946 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6947 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
6948 }
6949 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6950 case NEON::BI__builtin_neon_vcvtp_s32_v:
6951 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6952 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6953 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6954 case NEON::BI__builtin_neon_vcvtp_u32_v:
6955 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6956 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6957 case NEON::BI__builtin_neon_vcvtp_s64_v:
6958 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6959 case NEON::BI__builtin_neon_vcvtp_u64_v:
6960 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6961 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6962 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6963 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
6964 }
6965 case NEON::BI__builtin_neon_vmulx_v:
6966 case NEON::BI__builtin_neon_vmulxq_v: {
6967 Int = Intrinsic::aarch64_neon_fmulx;
6968 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
6969 }
6970 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6971 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6972 // vmulx_lane should be mapped to Neon scalar mulx after
6973 // extracting the scalar element
6974 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6975 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
6976 Ops.pop_back();
6977 Int = Intrinsic::aarch64_neon_fmulx;
6978 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
6979 }
6980 case NEON::BI__builtin_neon_vmul_lane_v:
6981 case NEON::BI__builtin_neon_vmul_laneq_v: {
6982 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6983 bool Quad = false;
6984 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6985 Quad = true;
6986 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6987 llvm::FixedVectorType *VTy =
6988 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6989 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
6990 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
6991 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
6992 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6993 }
6994 case NEON::BI__builtin_neon_vnegd_s64:
6995 return Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegd");
6996 case NEON::BI__builtin_neon_vnegh_f16:
6997 return Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegh");
6998 case NEON::BI__builtin_neon_vpmaxnm_v:
6999 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7000 Int = Intrinsic::aarch64_neon_fmaxnmp;
7001 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
7002 }
7003 case NEON::BI__builtin_neon_vpminnm_v:
7004 case NEON::BI__builtin_neon_vpminnmq_v: {
7005 Int = Intrinsic::aarch64_neon_fminnmp;
7006 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
7007 }
7008 case NEON::BI__builtin_neon_vsqrth_f16: {
7009 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7010 Int = Builder.getIsFPConstrained()
7011 ? Intrinsic::experimental_constrained_sqrt
7012 : Intrinsic::sqrt;
7013 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
7014 }
7015 case NEON::BI__builtin_neon_vsqrt_v:
7016 case NEON::BI__builtin_neon_vsqrtq_v: {
7017 Int = Builder.getIsFPConstrained()
7018 ? Intrinsic::experimental_constrained_sqrt
7019 : Intrinsic::sqrt;
7020 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7021 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
7022 }
7023 case NEON::BI__builtin_neon_vrbit_v:
7024 case NEON::BI__builtin_neon_vrbitq_v: {
7025 Int = Intrinsic::bitreverse;
7026 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
7027 }
7028 case NEON::BI__builtin_neon_vaddv_u8:
7029 // FIXME: These are handled by the AArch64 scalar code.
7030 usgn = true;
7031 [[fallthrough]];
7032 case NEON::BI__builtin_neon_vaddv_s8: {
7033 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7034 Ty = Int32Ty;
7035 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7036 llvm::Type *Tys[2] = { Ty, VTy };
7037 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7038 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7039 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7040 }
7041 case NEON::BI__builtin_neon_vaddv_u16:
7042 usgn = true;
7043 [[fallthrough]];
7044 case NEON::BI__builtin_neon_vaddv_s16: {
7045 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7046 Ty = Int32Ty;
7047 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7048 llvm::Type *Tys[2] = { Ty, VTy };
7049 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7050 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7051 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7052 }
7053 case NEON::BI__builtin_neon_vaddvq_u8:
7054 usgn = true;
7055 [[fallthrough]];
7056 case NEON::BI__builtin_neon_vaddvq_s8: {
7057 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7058 Ty = Int32Ty;
7059 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7060 llvm::Type *Tys[2] = { Ty, VTy };
7061 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7062 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7063 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7064 }
7065 case NEON::BI__builtin_neon_vaddvq_u16:
7066 usgn = true;
7067 [[fallthrough]];
7068 case NEON::BI__builtin_neon_vaddvq_s16: {
7069 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7070 Ty = Int32Ty;
7071 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7072 llvm::Type *Tys[2] = { Ty, VTy };
7073 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7074 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7075 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7076 }
7077 case NEON::BI__builtin_neon_vmaxv_u8: {
7078 Int = Intrinsic::aarch64_neon_umaxv;
7079 Ty = Int32Ty;
7080 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7081 llvm::Type *Tys[2] = { Ty, VTy };
7082 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7083 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7084 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7085 }
7086 case NEON::BI__builtin_neon_vmaxv_u16: {
7087 Int = Intrinsic::aarch64_neon_umaxv;
7088 Ty = Int32Ty;
7089 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7090 llvm::Type *Tys[2] = { Ty, VTy };
7091 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7092 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7093 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7094 }
7095 case NEON::BI__builtin_neon_vmaxvq_u8: {
7096 Int = Intrinsic::aarch64_neon_umaxv;
7097 Ty = Int32Ty;
7098 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7099 llvm::Type *Tys[2] = { Ty, VTy };
7100 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7101 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7102 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7103 }
7104 case NEON::BI__builtin_neon_vmaxvq_u16: {
7105 Int = Intrinsic::aarch64_neon_umaxv;
7106 Ty = Int32Ty;
7107 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7108 llvm::Type *Tys[2] = { Ty, VTy };
7109 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7110 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7111 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7112 }
7113 case NEON::BI__builtin_neon_vmaxv_s8: {
7114 Int = Intrinsic::aarch64_neon_smaxv;
7115 Ty = Int32Ty;
7116 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7117 llvm::Type *Tys[2] = { Ty, VTy };
7118 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7119 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7120 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7121 }
7122 case NEON::BI__builtin_neon_vmaxv_s16: {
7123 Int = Intrinsic::aarch64_neon_smaxv;
7124 Ty = Int32Ty;
7125 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7126 llvm::Type *Tys[2] = { Ty, VTy };
7127 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7128 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7129 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7130 }
7131 case NEON::BI__builtin_neon_vmaxvq_s8: {
7132 Int = Intrinsic::aarch64_neon_smaxv;
7133 Ty = Int32Ty;
7134 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7135 llvm::Type *Tys[2] = { Ty, VTy };
7136 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7137 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7138 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7139 }
7140 case NEON::BI__builtin_neon_vmaxvq_s16: {
7141 Int = Intrinsic::aarch64_neon_smaxv;
7142 Ty = Int32Ty;
7143 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7144 llvm::Type *Tys[2] = { Ty, VTy };
7145 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7146 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7147 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7148 }
7149 case NEON::BI__builtin_neon_vmaxv_f16: {
7150 Int = Intrinsic::aarch64_neon_fmaxv;
7151 Ty = HalfTy;
7152 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7153 llvm::Type *Tys[2] = { Ty, VTy };
7154 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7155 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7156 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7157 }
7158 case NEON::BI__builtin_neon_vmaxvq_f16: {
7159 Int = Intrinsic::aarch64_neon_fmaxv;
7160 Ty = HalfTy;
7161 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7162 llvm::Type *Tys[2] = { Ty, VTy };
7163 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7164 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7165 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7166 }
7167 case NEON::BI__builtin_neon_vminv_u8: {
7168 Int = Intrinsic::aarch64_neon_uminv;
7169 Ty = Int32Ty;
7170 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7171 llvm::Type *Tys[2] = { Ty, VTy };
7172 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7173 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7174 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7175 }
7176 case NEON::BI__builtin_neon_vminv_u16: {
7177 Int = Intrinsic::aarch64_neon_uminv;
7178 Ty = Int32Ty;
7179 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7180 llvm::Type *Tys[2] = { Ty, VTy };
7181 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7182 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7183 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7184 }
7185 case NEON::BI__builtin_neon_vminvq_u8: {
7186 Int = Intrinsic::aarch64_neon_uminv;
7187 Ty = Int32Ty;
7188 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7189 llvm::Type *Tys[2] = { Ty, VTy };
7190 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7191 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7192 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7193 }
7194 case NEON::BI__builtin_neon_vminvq_u16: {
7195 Int = Intrinsic::aarch64_neon_uminv;
7196 Ty = Int32Ty;
7197 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7198 llvm::Type *Tys[2] = { Ty, VTy };
7199 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7200 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7201 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7202 }
7203 case NEON::BI__builtin_neon_vminv_s8: {
7204 Int = Intrinsic::aarch64_neon_sminv;
7205 Ty = Int32Ty;
7206 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7207 llvm::Type *Tys[2] = { Ty, VTy };
7208 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7209 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7210 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7211 }
7212 case NEON::BI__builtin_neon_vminv_s16: {
7213 Int = Intrinsic::aarch64_neon_sminv;
7214 Ty = Int32Ty;
7215 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7216 llvm::Type *Tys[2] = { Ty, VTy };
7217 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7218 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7219 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7220 }
7221 case NEON::BI__builtin_neon_vminvq_s8: {
7222 Int = Intrinsic::aarch64_neon_sminv;
7223 Ty = Int32Ty;
7224 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7225 llvm::Type *Tys[2] = { Ty, VTy };
7226 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7227 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7228 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7229 }
7230 case NEON::BI__builtin_neon_vminvq_s16: {
7231 Int = Intrinsic::aarch64_neon_sminv;
7232 Ty = Int32Ty;
7233 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7234 llvm::Type *Tys[2] = { Ty, VTy };
7235 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7236 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7237 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7238 }
7239 case NEON::BI__builtin_neon_vminv_f16: {
7240 Int = Intrinsic::aarch64_neon_fminv;
7241 Ty = HalfTy;
7242 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7243 llvm::Type *Tys[2] = { Ty, VTy };
7244 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7245 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7246 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7247 }
7248 case NEON::BI__builtin_neon_vminvq_f16: {
7249 Int = Intrinsic::aarch64_neon_fminv;
7250 Ty = HalfTy;
7251 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7252 llvm::Type *Tys[2] = { Ty, VTy };
7253 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7254 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7255 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7256 }
7257 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7258 Int = Intrinsic::aarch64_neon_fmaxnmv;
7259 Ty = HalfTy;
7260 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7261 llvm::Type *Tys[2] = { Ty, VTy };
7262 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7263 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7264 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7265 }
7266 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7267 Int = Intrinsic::aarch64_neon_fmaxnmv;
7268 Ty = HalfTy;
7269 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7270 llvm::Type *Tys[2] = { Ty, VTy };
7271 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7272 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7273 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7274 }
7275 case NEON::BI__builtin_neon_vminnmv_f16: {
7276 Int = Intrinsic::aarch64_neon_fminnmv;
7277 Ty = HalfTy;
7278 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7279 llvm::Type *Tys[2] = { Ty, VTy };
7280 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7281 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7282 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7283 }
7284 case NEON::BI__builtin_neon_vminnmvq_f16: {
7285 Int = Intrinsic::aarch64_neon_fminnmv;
7286 Ty = HalfTy;
7287 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7288 llvm::Type *Tys[2] = { Ty, VTy };
7289 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7290 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7291 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7292 }
7293 case NEON::BI__builtin_neon_vmul_n_f64: {
7294 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7295 Value *RHS = Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: DoubleTy);
7296 return Builder.CreateFMul(L: Ops[0], R: RHS);
7297 }
7298 case NEON::BI__builtin_neon_vaddlv_u8: {
7299 Int = Intrinsic::aarch64_neon_uaddlv;
7300 Ty = Int32Ty;
7301 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7302 llvm::Type *Tys[2] = { Ty, VTy };
7303 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7304 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7305 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7306 }
7307 case NEON::BI__builtin_neon_vaddlv_u16: {
7308 Int = Intrinsic::aarch64_neon_uaddlv;
7309 Ty = Int32Ty;
7310 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7311 llvm::Type *Tys[2] = { Ty, VTy };
7312 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7313 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7314 }
7315 case NEON::BI__builtin_neon_vaddlvq_u8: {
7316 Int = Intrinsic::aarch64_neon_uaddlv;
7317 Ty = Int32Ty;
7318 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7319 llvm::Type *Tys[2] = { Ty, VTy };
7320 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7321 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7322 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7323 }
7324 case NEON::BI__builtin_neon_vaddlvq_u16: {
7325 Int = Intrinsic::aarch64_neon_uaddlv;
7326 Ty = Int32Ty;
7327 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7328 llvm::Type *Tys[2] = { Ty, VTy };
7329 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7330 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7331 }
7332 case NEON::BI__builtin_neon_vaddlv_s8: {
7333 Int = Intrinsic::aarch64_neon_saddlv;
7334 Ty = Int32Ty;
7335 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7336 llvm::Type *Tys[2] = { Ty, VTy };
7337 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7338 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7339 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7340 }
7341 case NEON::BI__builtin_neon_vaddlv_s16: {
7342 Int = Intrinsic::aarch64_neon_saddlv;
7343 Ty = Int32Ty;
7344 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7345 llvm::Type *Tys[2] = { Ty, VTy };
7346 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7347 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7348 }
7349 case NEON::BI__builtin_neon_vaddlvq_s8: {
7350 Int = Intrinsic::aarch64_neon_saddlv;
7351 Ty = Int32Ty;
7352 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7353 llvm::Type *Tys[2] = { Ty, VTy };
7354 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7355 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7356 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7357 }
7358 case NEON::BI__builtin_neon_vaddlvq_s16: {
7359 Int = Intrinsic::aarch64_neon_saddlv;
7360 Ty = Int32Ty;
7361 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7362 llvm::Type *Tys[2] = { Ty, VTy };
7363 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7364 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7365 }
7366 case NEON::BI__builtin_neon_vsri_n_v:
7367 case NEON::BI__builtin_neon_vsriq_n_v: {
7368 Int = Intrinsic::aarch64_neon_vsri;
7369 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7370 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
7371 }
7372 case NEON::BI__builtin_neon_vsli_n_v:
7373 case NEON::BI__builtin_neon_vsliq_n_v: {
7374 Int = Intrinsic::aarch64_neon_vsli;
7375 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7376 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
7377 }
7378 case NEON::BI__builtin_neon_vsra_n_v:
7379 case NEON::BI__builtin_neon_vsraq_n_v:
7380 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7381 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
7382 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
7383 case NEON::BI__builtin_neon_vrsra_n_v:
7384 case NEON::BI__builtin_neon_vrsraq_n_v: {
7385 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7386 SmallVector<llvm::Value*,2> TmpOps;
7387 TmpOps.push_back(Elt: Ops[1]);
7388 TmpOps.push_back(Elt: Ops[2]);
7389 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
7390 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
7391 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7392 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
7393 }
7394 case NEON::BI__builtin_neon_vld1_v:
7395 case NEON::BI__builtin_neon_vld1q_v: {
7396 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
7397 }
7398 case NEON::BI__builtin_neon_vst1_v:
7399 case NEON::BI__builtin_neon_vst1q_v:
7400 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7401 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7402 case NEON::BI__builtin_neon_vld1_lane_v:
7403 case NEON::BI__builtin_neon_vld1q_lane_v: {
7404 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7405 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7406 Align: PtrOp0.getAlignment());
7407 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
7408 }
7409 case NEON::BI__builtin_neon_vldap1_lane_s64:
7410 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7411 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7412 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7413 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
7414 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
7415 Ops[0] = LI;
7416 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
7417 }
7418 case NEON::BI__builtin_neon_vld1_dup_v:
7419 case NEON::BI__builtin_neon_vld1q_dup_v: {
7420 Value *V = PoisonValue::get(T: Ty);
7421 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7422 Align: PtrOp0.getAlignment());
7423 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
7424 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
7425 return EmitNeonSplat(V: Ops[0], C: CI);
7426 }
7427 case NEON::BI__builtin_neon_vst1_lane_v:
7428 case NEON::BI__builtin_neon_vst1q_lane_v:
7429 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7430 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7431 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7432 case NEON::BI__builtin_neon_vstl1_lane_s64:
7433 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7434 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7435 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7436 llvm::StoreInst *SI =
7437 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7438 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
7439 return SI;
7440 }
7441 case NEON::BI__builtin_neon_vld2_v:
7442 case NEON::BI__builtin_neon_vld2q_v: {
7443 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7444 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7445 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7446 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7447 }
7448 case NEON::BI__builtin_neon_vld3_v:
7449 case NEON::BI__builtin_neon_vld3q_v: {
7450 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7451 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7452 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7453 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7454 }
7455 case NEON::BI__builtin_neon_vld4_v:
7456 case NEON::BI__builtin_neon_vld4q_v: {
7457 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7458 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7459 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7460 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7461 }
7462 case NEON::BI__builtin_neon_vld2_dup_v:
7463 case NEON::BI__builtin_neon_vld2q_dup_v: {
7464 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7465 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7466 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7467 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7468 }
7469 case NEON::BI__builtin_neon_vld3_dup_v:
7470 case NEON::BI__builtin_neon_vld3q_dup_v: {
7471 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7472 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7473 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7474 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7475 }
7476 case NEON::BI__builtin_neon_vld4_dup_v:
7477 case NEON::BI__builtin_neon_vld4q_dup_v: {
7478 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7479 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7480 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7481 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7482 }
7483 case NEON::BI__builtin_neon_vld2_lane_v:
7484 case NEON::BI__builtin_neon_vld2q_lane_v: {
7485 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7486 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7487 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7488 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7489 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7490 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7491 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
7492 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7493 }
7494 case NEON::BI__builtin_neon_vld3_lane_v:
7495 case NEON::BI__builtin_neon_vld3q_lane_v: {
7496 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7497 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7498 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7499 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7500 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7501 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7502 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7503 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
7504 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7505 }
7506 case NEON::BI__builtin_neon_vld4_lane_v:
7507 case NEON::BI__builtin_neon_vld4q_lane_v: {
7508 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7509 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7510 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7511 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7512 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7513 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7514 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
7515 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
7516 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
7517 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7518 }
7519 case NEON::BI__builtin_neon_vst2_v:
7520 case NEON::BI__builtin_neon_vst2q_v: {
7521 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7522 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7523 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7524 Ops, "");
7525 }
7526 case NEON::BI__builtin_neon_vst2_lane_v:
7527 case NEON::BI__builtin_neon_vst2q_lane_v: {
7528 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7529 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
7530 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7532 Ops, "");
7533 }
7534 case NEON::BI__builtin_neon_vst3_v:
7535 case NEON::BI__builtin_neon_vst3q_v: {
7536 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7537 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7538 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7539 Ops, "");
7540 }
7541 case NEON::BI__builtin_neon_vst3_lane_v:
7542 case NEON::BI__builtin_neon_vst3q_lane_v: {
7543 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7544 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7545 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7546 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7547 Ops, "");
7548 }
7549 case NEON::BI__builtin_neon_vst4_v:
7550 case NEON::BI__builtin_neon_vst4q_v: {
7551 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7552 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7553 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7554 Ops, "");
7555 }
7556 case NEON::BI__builtin_neon_vst4_lane_v:
7557 case NEON::BI__builtin_neon_vst4q_lane_v: {
7558 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7559 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7560 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7562 Ops, "");
7563 }
7564 case NEON::BI__builtin_neon_vtrn_v:
7565 case NEON::BI__builtin_neon_vtrnq_v: {
7566 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7567 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7568 Value *SV = nullptr;
7569
7570 for (unsigned vi = 0; vi != 2; ++vi) {
7571 SmallVector<int, 16> Indices;
7572 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7573 Indices.push_back(Elt: i+vi);
7574 Indices.push_back(Elt: i+e+vi);
7575 }
7576 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7577 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
7578 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7579 }
7580 return SV;
7581 }
7582 case NEON::BI__builtin_neon_vuzp_v:
7583 case NEON::BI__builtin_neon_vuzpq_v: {
7584 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7585 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7586 Value *SV = nullptr;
7587
7588 for (unsigned vi = 0; vi != 2; ++vi) {
7589 SmallVector<int, 16> Indices;
7590 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7591 Indices.push_back(Elt: 2*i+vi);
7592
7593 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7594 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
7595 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7596 }
7597 return SV;
7598 }
7599 case NEON::BI__builtin_neon_vzip_v:
7600 case NEON::BI__builtin_neon_vzipq_v: {
7601 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7602 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7603 Value *SV = nullptr;
7604
7605 for (unsigned vi = 0; vi != 2; ++vi) {
7606 SmallVector<int, 16> Indices;
7607 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7608 Indices.push_back(Elt: (i + vi*e) >> 1);
7609 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
7610 }
7611 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7612 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
7613 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7614 }
7615 return SV;
7616 }
7617 case NEON::BI__builtin_neon_vqtbl1q_v: {
7618 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7619 Ops, "vtbl1");
7620 }
7621 case NEON::BI__builtin_neon_vqtbl2q_v: {
7622 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7623 Ops, "vtbl2");
7624 }
7625 case NEON::BI__builtin_neon_vqtbl3q_v: {
7626 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7627 Ops, "vtbl3");
7628 }
7629 case NEON::BI__builtin_neon_vqtbl4q_v: {
7630 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7631 Ops, "vtbl4");
7632 }
7633 case NEON::BI__builtin_neon_vqtbx1q_v: {
7634 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7635 Ops, "vtbx1");
7636 }
7637 case NEON::BI__builtin_neon_vqtbx2q_v: {
7638 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7639 Ops, "vtbx2");
7640 }
7641 case NEON::BI__builtin_neon_vqtbx3q_v: {
7642 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7643 Ops, "vtbx3");
7644 }
7645 case NEON::BI__builtin_neon_vqtbx4q_v: {
7646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7647 Ops, "vtbx4");
7648 }
7649 case NEON::BI__builtin_neon_vsqadd_v:
7650 case NEON::BI__builtin_neon_vsqaddq_v: {
7651 Int = Intrinsic::aarch64_neon_usqadd;
7652 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
7653 }
7654 case NEON::BI__builtin_neon_vuqadd_v:
7655 case NEON::BI__builtin_neon_vuqaddq_v: {
7656 Int = Intrinsic::aarch64_neon_suqadd;
7657 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
7658 }
7659
7660 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7661 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7662 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7663 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7664 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7665 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7666 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7667 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7668 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7669 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7670 llvm::Type *Tys[2];
7671 Tys[0] = Ty;
7672 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7673 /*isQuad*/ false));
7674 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7675 }
7676 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7677 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7678 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7679 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7680 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7681 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7682 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7683 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7684 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7685 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7686 llvm::Type *Tys[2];
7687 Tys[0] = Ty;
7688 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7689 /*isQuad*/ true));
7690 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7691 }
7692 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7693 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7694 case NEON::BI__builtin_neon_vluti2_lane_f16:
7695 case NEON::BI__builtin_neon_vluti2_lane_p16:
7696 case NEON::BI__builtin_neon_vluti2_lane_p8:
7697 case NEON::BI__builtin_neon_vluti2_lane_s16:
7698 case NEON::BI__builtin_neon_vluti2_lane_s8:
7699 case NEON::BI__builtin_neon_vluti2_lane_u16:
7700 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7701 Int = Intrinsic::aarch64_neon_vluti2_lane;
7702 llvm::Type *Tys[2];
7703 Tys[0] = Ty;
7704 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7705 /*isQuad*/ false));
7706 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7707 }
7708 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7709 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7710 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7711 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7712 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7713 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7714 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7715 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7716 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7717 Int = Intrinsic::aarch64_neon_vluti2_lane;
7718 llvm::Type *Tys[2];
7719 Tys[0] = Ty;
7720 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7721 /*isQuad*/ true));
7722 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7723 }
7724 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7725 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7726 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7727 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7728 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7729 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane");
7730 }
7731 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7732 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7733 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7734 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7735 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7736 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq");
7737 }
7738 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7739 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7740 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7741 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7742 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7743 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7744 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane_x2");
7745 }
7746 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7747 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7748 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7749 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7750 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7751 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7752 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq_x2");
7753 }
7754 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7755 ExtractLow = true;
7756 LLVM_FALLTHROUGH;
7757 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7758 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7759 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7760 llvm::FixedVectorType::get(BFloatTy, 8),
7761 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7762 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7763 ExtractLow = true;
7764 LLVM_FALLTHROUGH;
7765 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7766 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7767 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7768 llvm::FixedVectorType::get(BFloatTy, 8),
7769 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7770 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7771 ExtractLow = true;
7772 LLVM_FALLTHROUGH;
7773 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7774 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7775 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7776 llvm::FixedVectorType::get(HalfTy, 8),
7777 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7778 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7779 ExtractLow = true;
7780 LLVM_FALLTHROUGH;
7781 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7782 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7783 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7784 llvm::FixedVectorType::get(HalfTy, 8),
7785 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7786 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7787 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7788 llvm::FixedVectorType::get(Int8Ty, 8),
7789 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7790 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7791 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7792 llvm::FixedVectorType::get(Int8Ty, 8),
7793 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7794 E, "vfcvtn");
7795 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7796 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7797 llvm::FixedVectorType::get(Int8Ty, 16),
7798 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7799 E, "vfcvtn");
7800 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7801 llvm::Type *Ty = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7802 Ops[0] = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
7803 Idx: uint64_t(0));
7804 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7805 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7806 }
7807
7808 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7809 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7810 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7811 Ops, E, "fdot2");
7812 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7813 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7814 ExtendLaneArg = true;
7815 LLVM_FALLTHROUGH;
7816 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7817 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7818 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7819 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7820 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7821 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7822 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7823 FloatTy, Ops, E, "fdot4");
7824 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7825 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7826 ExtendLaneArg = true;
7827 LLVM_FALLTHROUGH;
7828 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7829 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7830 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7831 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7832
7833 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7834 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7835 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7836 "vmlal");
7837 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7838 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7839 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7840 "vmlal");
7841 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7842 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7843 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7844 "vmlall");
7845 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7846 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7847 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7848 "vmlall");
7849 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7850 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7851 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7852 "vmlall");
7853 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7854 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7855 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7856 "vmlall");
7857 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7858 ExtendLaneArg = true;
7859 LLVM_FALLTHROUGH;
7860 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7861 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7862 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7863 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7864 ExtendLaneArg = true;
7865 LLVM_FALLTHROUGH;
7866 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7867 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7868 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7869 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7870 ExtendLaneArg = true;
7871 LLVM_FALLTHROUGH;
7872 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7873 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7874 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7875 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7876 ExtendLaneArg = true;
7877 LLVM_FALLTHROUGH;
7878 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7879 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7880 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7881 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7882 ExtendLaneArg = true;
7883 LLVM_FALLTHROUGH;
7884 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7885 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7886 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7887 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7888 ExtendLaneArg = true;
7889 LLVM_FALLTHROUGH;
7890 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7891 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7892 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7893 case NEON::BI__builtin_neon_vamin_f16:
7894 case NEON::BI__builtin_neon_vaminq_f16:
7895 case NEON::BI__builtin_neon_vamin_f32:
7896 case NEON::BI__builtin_neon_vaminq_f32:
7897 case NEON::BI__builtin_neon_vaminq_f64: {
7898 Int = Intrinsic::aarch64_neon_famin;
7899 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famin");
7900 }
7901 case NEON::BI__builtin_neon_vamax_f16:
7902 case NEON::BI__builtin_neon_vamaxq_f16:
7903 case NEON::BI__builtin_neon_vamax_f32:
7904 case NEON::BI__builtin_neon_vamaxq_f32:
7905 case NEON::BI__builtin_neon_vamaxq_f64: {
7906 Int = Intrinsic::aarch64_neon_famax;
7907 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famax");
7908 }
7909 case NEON::BI__builtin_neon_vscale_f16:
7910 case NEON::BI__builtin_neon_vscaleq_f16:
7911 case NEON::BI__builtin_neon_vscale_f32:
7912 case NEON::BI__builtin_neon_vscaleq_f32:
7913 case NEON::BI__builtin_neon_vscaleq_f64: {
7914 Int = Intrinsic::aarch64_neon_fp8_fscale;
7915 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fscale");
7916 }
7917 }
7918}
7919
7920Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
7921 const CallExpr *E) {
7922 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7923 BuiltinID == BPF::BI__builtin_btf_type_id ||
7924 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7925 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7926 "unexpected BPF builtin");
7927
7928 // A sequence number, injected into IR builtin functions, to
7929 // prevent CSE given the only difference of the function
7930 // may just be the debuginfo metadata.
7931 static uint32_t BuiltinSeqNum;
7932
7933 switch (BuiltinID) {
7934 default:
7935 llvm_unreachable("Unexpected BPF builtin");
7936 case BPF::BI__builtin_preserve_field_info: {
7937 const Expr *Arg = E->getArg(Arg: 0);
7938 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7939
7940 if (!getDebugInfo()) {
7941 CGM.Error(loc: E->getExprLoc(),
7942 error: "using __builtin_preserve_field_info() without -g");
7943 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7944 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7945 }
7946
7947 // Enable underlying preserve_*_access_index() generation.
7948 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7949 IsInPreservedAIRegion = true;
7950 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7951 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7952 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7953
7954 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7955 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
7956
7957 // Built the IR for the preserve_field_info intrinsic.
7958 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7959 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7960 {FieldAddr->getType()});
7961 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
7962 }
7963 case BPF::BI__builtin_btf_type_id:
7964 case BPF::BI__builtin_preserve_type_info: {
7965 if (!getDebugInfo()) {
7966 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7967 return nullptr;
7968 }
7969
7970 const Expr *Arg0 = E->getArg(Arg: 0);
7971 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7972 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7973
7974 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7975 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7976 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7977
7978 llvm::Function *FnDecl;
7979 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7980 FnDecl = Intrinsic::getOrInsertDeclaration(
7981 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7982 else
7983 FnDecl = Intrinsic::getOrInsertDeclaration(
7984 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7985 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
7986 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7987 return Fn;
7988 }
7989 case BPF::BI__builtin_preserve_enum_value: {
7990 if (!getDebugInfo()) {
7991 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7992 return nullptr;
7993 }
7994
7995 const Expr *Arg0 = E->getArg(Arg: 0);
7996 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7997 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7998
7999 // Find enumerator
8000 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
8001 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
8002 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8003 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8004
8005 auto InitVal = Enumerator->getInitVal();
8006 std::string InitValStr;
8007 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8008 InitValStr = std::to_string(InitVal.getSExtValue());
8009 else
8010 InitValStr = std::to_string(InitVal.getZExtValue());
8011 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8012 Value *EnumStrVal = Builder.CreateGlobalString(Str: EnumStr);
8013
8014 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
8015 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
8016 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
8017
8018 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8019 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
8020 CallInst *Fn =
8021 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
8022 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
8023 return Fn;
8024 }
8025 }
8026}
8027
8028llvm::Value *CodeGenFunction::
8029BuildVector(ArrayRef<llvm::Value*> Ops) {
8030 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8031 "Not a power-of-two sized vector!");
8032 bool AllConstants = true;
8033 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8034 AllConstants &= isa<Constant>(Val: Ops[i]);
8035
8036 // If this is a constant vector, create a ConstantVector.
8037 if (AllConstants) {
8038 SmallVector<llvm::Constant*, 16> CstOps;
8039 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8040 CstOps.push_back(Elt: cast<Constant>(Val: Ops[i]));
8041 return llvm::ConstantVector::get(V: CstOps);
8042 }
8043
8044 // Otherwise, insertelement the values to build the vector.
8045 Value *Result = llvm::PoisonValue::get(
8046 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
8047
8048 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8049 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
8050
8051 return Result;
8052}
8053
8054Value *CodeGenFunction::EmitAArch64CpuInit() {
8055 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
8056 llvm::FunctionCallee Func =
8057 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
8058 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
8059 cast<llvm::GlobalValue>(Val: Func.getCallee())
8060 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8061 return Builder.CreateCall(Callee: Func);
8062}
8063
8064Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8065 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
8066 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
8067 llvm::SmallVector<StringRef, 8> Features;
8068 ArgStr.split(A&: Features, Separator: "+");
8069 for (auto &Feature : Features) {
8070 Feature = Feature.trim();
8071 if (!llvm::AArch64::parseFMVExtension(Feature))
8072 return Builder.getFalse();
8073 if (Feature != "default")
8074 Features.push_back(Elt: Feature);
8075 }
8076 return EmitAArch64CpuSupports(FeatureStrs: Features);
8077}
8078
8079llvm::Value *
8080CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8081 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(Features: FeaturesStrs);
8082 Value *Result = Builder.getTrue();
8083 if (FeaturesMask != 0) {
8084 // Get features from structure in runtime library
8085 // struct {
8086 // unsigned long long features;
8087 // } __aarch64_cpu_features;
8088 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
8089 llvm::Constant *AArch64CPUFeatures =
8090 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
8091 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
8092 llvm::Value *CpuFeatures = Builder.CreateGEP(
8093 Ty: STy, Ptr: AArch64CPUFeatures,
8094 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
8095 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
8096 Align: CharUnits::fromQuantity(Quantity: 8));
8097 Value *Mask = Builder.getInt64(C: FeaturesMask);
8098 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
8099 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
8100 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
8101 }
8102 return Result;
8103}
8104

source code of clang/lib/CodeGen/TargetBuiltins/ARM.cpp