1//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit HLSL Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CGHLSLRuntime.h"
15
16using namespace clang;
17using namespace CodeGen;
18using namespace llvm;
19
20static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
21 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
22 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
23 "asdouble operands types mismatch");
24 Value *OpLowBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
25 Value *OpHighBits = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
26
27 llvm::Type *ResultType = CGF.DoubleTy;
28 int N = 1;
29 if (auto *VTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
30 N = VTy->getNumElements();
31 ResultType = llvm::FixedVectorType::get(ElementType: CGF.DoubleTy, NumElts: N);
32 }
33
34 if (CGF.CGM.getTarget().getTriple().isDXIL())
35 return CGF.Builder.CreateIntrinsic(
36 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
37 {OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
38
39 if (!E->getArg(Arg: 0)->getType()->isVectorType()) {
40 OpLowBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpLowBits);
41 OpHighBits = CGF.Builder.CreateVectorSplat(NumElts: 1, V: OpHighBits);
42 }
43
44 llvm::SmallVector<int> Mask;
45 for (int i = 0; i < N; i++) {
46 Mask.push_back(Elt: i);
47 Mask.push_back(Elt: i + N);
48 }
49
50 Value *BitVec = CGF.Builder.CreateShuffleVector(V1: OpLowBits, V2: OpHighBits, Mask);
51
52 return CGF.Builder.CreateBitCast(V: BitVec, DestTy: ResultType);
53}
54
55static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
56 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
57
58 Constant *FZeroConst = ConstantFP::getZero(Ty: CGF->FloatTy);
59 Value *CMP;
60 Value *LastInstr;
61
62 if (const auto *VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>()) {
63 FZeroConst = ConstantVector::getSplat(
64 EC: ElementCount::getFixed(MinVal: VecTy->getNumElements()), Elt: FZeroConst);
65 auto *FCompInst = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
66 CMP = CGF->Builder.CreateIntrinsic(
67 RetTy: CGF->Builder.getInt1Ty(), ID: CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
68 Args: {FCompInst});
69 } else {
70 CMP = CGF->Builder.CreateFCmpOLT(LHS: Op0, RHS: FZeroConst);
71 }
72
73 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
74 LastInstr = CGF->Builder.CreateIntrinsic(Intrinsic::dx_discard, {CMP});
75 } else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
76 BasicBlock *LT0 = CGF->createBasicBlock(name: "lt0", parent: CGF->CurFn);
77 BasicBlock *End = CGF->createBasicBlock(name: "end", parent: CGF->CurFn);
78
79 CGF->Builder.CreateCondBr(Cond: CMP, True: LT0, False: End);
80
81 CGF->Builder.SetInsertPoint(LT0);
82
83 CGF->Builder.CreateIntrinsic(Intrinsic::spv_discard, {});
84
85 LastInstr = CGF->Builder.CreateBr(Dest: End);
86 CGF->Builder.SetInsertPoint(End);
87 } else {
88 llvm_unreachable("Backend Codegen not supported.");
89 }
90
91 return LastInstr;
92}
93
94static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
95 Value *Op0 = CGF->EmitScalarExpr(E: E->getArg(Arg: 0));
96 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 1));
97 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(Val: E->getArg(Arg: 2));
98
99 CallArgList Args;
100 LValue Op1TmpLValue =
101 CGF->EmitHLSLOutArgExpr(E: OutArg1, Args, Ty: OutArg1->getType());
102 LValue Op2TmpLValue =
103 CGF->EmitHLSLOutArgExpr(E: OutArg2, Args, Ty: OutArg2->getType());
104
105 if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
106 Args.reverseWritebacks();
107
108 Value *LowBits = nullptr;
109 Value *HighBits = nullptr;
110
111 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
112 llvm::Type *RetElementTy = CGF->Int32Ty;
113 if (auto *Op0VecTy = E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
114 RetElementTy = llvm::VectorType::get(
115 ElementType: CGF->Int32Ty, EC: ElementCount::getFixed(MinVal: Op0VecTy->getNumElements()));
116 auto *RetTy = llvm::StructType::get(elt1: RetElementTy, elts: RetElementTy);
117
118 CallInst *CI = CGF->Builder.CreateIntrinsic(
119 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
120
121 LowBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 0);
122 HighBits = CGF->Builder.CreateExtractValue(Agg: CI, Idxs: 1);
123 } else {
124 // For Non DXIL targets we generate the instructions.
125
126 if (!Op0->getType()->isVectorTy()) {
127 FixedVectorType *DestTy = FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: 2);
128 Value *Bitcast = CGF->Builder.CreateBitCast(V: Op0, DestTy);
129
130 LowBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: (uint64_t)0);
131 HighBits = CGF->Builder.CreateExtractElement(Vec: Bitcast, Idx: 1);
132 } else {
133 int NumElements = 1;
134 if (const auto *VecTy =
135 E->getArg(Arg: 0)->getType()->getAs<clang::VectorType>())
136 NumElements = VecTy->getNumElements();
137
138 FixedVectorType *Uint32VecTy =
139 FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: NumElements * 2);
140 Value *Uint32Vec = CGF->Builder.CreateBitCast(V: Op0, DestTy: Uint32VecTy);
141 if (NumElements == 1) {
142 LowBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: (uint64_t)0);
143 HighBits = CGF->Builder.CreateExtractElement(Vec: Uint32Vec, Idx: 1);
144 } else {
145 SmallVector<int> EvenMask, OddMask;
146 for (int I = 0, E = NumElements; I != E; ++I) {
147 EvenMask.push_back(Elt: I * 2);
148 OddMask.push_back(Elt: I * 2 + 1);
149 }
150 LowBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: EvenMask);
151 HighBits = CGF->Builder.CreateShuffleVector(V: Uint32Vec, Mask: OddMask);
152 }
153 }
154 }
155 CGF->Builder.CreateStore(Val: LowBits, Addr: Op1TmpLValue.getAddress());
156 auto *LastInst =
157 CGF->Builder.CreateStore(Val: HighBits, Addr: Op2TmpLValue.getAddress());
158 CGF->EmitWritebacks(Args);
159 return LastInst;
160}
161
162// Return dot product intrinsic that corresponds to the QT scalar type
163static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
164 if (QT->isFloatingType())
165 return RT.getFDotIntrinsic();
166 if (QT->isSignedIntegerType())
167 return RT.getSDotIntrinsic();
168 assert(QT->isUnsignedIntegerType());
169 return RT.getUDotIntrinsic();
170}
171
172static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
173 if (QT->hasSignedIntegerRepresentation()) {
174 return RT.getFirstBitSHighIntrinsic();
175 }
176
177 assert(QT->hasUnsignedIntegerRepresentation());
178 return RT.getFirstBitUHighIntrinsic();
179}
180
181// Return wave active sum that corresponds to the QT scalar type
182static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
183 CGHLSLRuntime &RT, QualType QT) {
184 switch (Arch) {
185 case llvm::Triple::spirv:
186 return Intrinsic::spv_wave_reduce_sum;
187 case llvm::Triple::dxil: {
188 if (QT->isUnsignedIntegerType())
189 return Intrinsic::dx_wave_reduce_usum;
190 return Intrinsic::dx_wave_reduce_sum;
191 }
192 default:
193 llvm_unreachable("Intrinsic WaveActiveSum"
194 " not supported by target architecture");
195 }
196}
197
198// Return wave active sum that corresponds to the QT scalar type
199static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch,
200 CGHLSLRuntime &RT, QualType QT) {
201 switch (Arch) {
202 case llvm::Triple::spirv:
203 if (QT->isUnsignedIntegerType())
204 return Intrinsic::spv_wave_reduce_umax;
205 return Intrinsic::spv_wave_reduce_max;
206 case llvm::Triple::dxil: {
207 if (QT->isUnsignedIntegerType())
208 return Intrinsic::dx_wave_reduce_umax;
209 return Intrinsic::dx_wave_reduce_max;
210 }
211 default:
212 llvm_unreachable("Intrinsic WaveActiveMax"
213 " not supported by target architecture");
214 }
215}
216
217Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
218 const CallExpr *E,
219 ReturnValueSlot ReturnValue) {
220 if (!getLangOpts().HLSL)
221 return nullptr;
222
223 switch (BuiltinID) {
224 case Builtin::BI__builtin_hlsl_adduint64: {
225 Value *OpA = EmitScalarExpr(E: E->getArg(Arg: 0));
226 Value *OpB = EmitScalarExpr(E: E->getArg(Arg: 1));
227 QualType Arg0Ty = E->getArg(Arg: 0)->getType();
228 uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
229 assert(Arg0Ty == E->getArg(1)->getType() &&
230 "AddUint64 operand types must match");
231 assert(Arg0Ty->hasIntegerRepresentation() &&
232 "AddUint64 operands must have an integer representation");
233 assert((NumElements == 2 || NumElements == 4) &&
234 "AddUint64 operands must have 2 or 4 elements");
235
236 llvm::Value *LowA;
237 llvm::Value *HighA;
238 llvm::Value *LowB;
239 llvm::Value *HighB;
240
241 // Obtain low and high words of inputs A and B
242 if (NumElements == 2) {
243 LowA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)0, Name: "LowA");
244 HighA = Builder.CreateExtractElement(Vec: OpA, Idx: (uint64_t)1, Name: "HighA");
245 LowB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)0, Name: "LowB");
246 HighB = Builder.CreateExtractElement(Vec: OpB, Idx: (uint64_t)1, Name: "HighB");
247 } else {
248 LowA = Builder.CreateShuffleVector(V: OpA, Mask: {0, 2}, Name: "LowA");
249 HighA = Builder.CreateShuffleVector(V: OpA, Mask: {1, 3}, Name: "HighA");
250 LowB = Builder.CreateShuffleVector(V: OpB, Mask: {0, 2}, Name: "LowB");
251 HighB = Builder.CreateShuffleVector(V: OpB, Mask: {1, 3}, Name: "HighB");
252 }
253
254 // Use an uadd_with_overflow to compute the sum of low words and obtain a
255 // carry value
256 llvm::Value *Carry;
257 llvm::Value *LowSum = EmitOverflowIntrinsic(
258 *this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
259 llvm::Value *ZExtCarry =
260 Builder.CreateZExt(V: Carry, DestTy: HighA->getType(), Name: "CarryZExt");
261
262 // Sum the high words and the carry
263 llvm::Value *HighSum = Builder.CreateAdd(LHS: HighA, RHS: HighB, Name: "HighSum");
264 llvm::Value *HighSumPlusCarry =
265 Builder.CreateAdd(LHS: HighSum, RHS: ZExtCarry, Name: "HighSumPlusCarry");
266
267 if (NumElements == 4) {
268 return Builder.CreateShuffleVector(V1: LowSum, V2: HighSumPlusCarry, Mask: {0, 2, 1, 3},
269 Name: "hlsl.AddUint64");
270 }
271
272 llvm::Value *Result = PoisonValue::get(T: OpA->getType());
273 Result = Builder.CreateInsertElement(Vec: Result, NewElt: LowSum, Idx: (uint64_t)0,
274 Name: "hlsl.AddUint64.upto0");
275 Result = Builder.CreateInsertElement(Vec: Result, NewElt: HighSumPlusCarry, Idx: (uint64_t)1,
276 Name: "hlsl.AddUint64");
277 return Result;
278 }
279 case Builtin::BI__builtin_hlsl_resource_getpointer: {
280 Value *HandleOp = EmitScalarExpr(E: E->getArg(Arg: 0));
281 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 1));
282
283 llvm::Type *RetTy = ConvertType(E->getType());
284 return Builder.CreateIntrinsic(
285 RetTy, ID: CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
286 Args: ArrayRef<Value *>{HandleOp, IndexOp});
287 }
288 case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
289 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
290 return llvm::PoisonValue::get(T: HandleTy);
291 }
292 case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
293 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
294 Value *RegisterOp = EmitScalarExpr(E: E->getArg(Arg: 1));
295 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 2));
296 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 3));
297 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 4));
298 // FIXME: NonUniformResourceIndex bit is not yet implemented
299 // (llvm/llvm-project#135452)
300 Value *NonUniform =
301 llvm::ConstantInt::get(Ty: llvm::Type::getInt1Ty(C&: getLLVMContext()), V: false);
302
303 auto [IntrinsicID, HasNameArg] =
304 CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
305 SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp,
306 NonUniform};
307 if (HasNameArg)
308 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 5)));
309 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
310 }
311 case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
312 llvm::Type *HandleTy = CGM.getTypes().ConvertType(T: E->getType());
313 Value *SpaceOp = EmitScalarExpr(E: E->getArg(Arg: 1));
314 Value *RangeOp = EmitScalarExpr(E: E->getArg(Arg: 2));
315 Value *IndexOp = EmitScalarExpr(E: E->getArg(Arg: 3));
316 Value *OrderID = EmitScalarExpr(E: E->getArg(Arg: 4));
317 // FIXME: NonUniformResourceIndex bit is not yet implemented
318 // (llvm/llvm-project#135452)
319 Value *NonUniform =
320 llvm::ConstantInt::get(Ty: llvm::Type::getInt1Ty(C&: getLLVMContext()), V: false);
321
322 auto [IntrinsicID, HasNameArg] =
323 CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
324 SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, NonUniform};
325 if (HasNameArg)
326 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 5)));
327 return Builder.CreateIntrinsic(RetTy: HandleTy, ID: IntrinsicID, Args);
328 }
329 case Builtin::BI__builtin_hlsl_all: {
330 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
331 return Builder.CreateIntrinsic(
332 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
333 ID: CGM.getHLSLRuntime().getAllIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
334 Name: "hlsl.all");
335 }
336 case Builtin::BI__builtin_hlsl_and: {
337 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
338 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
339 return Builder.CreateAnd(LHS: Op0, RHS: Op1, Name: "hlsl.and");
340 }
341 case Builtin::BI__builtin_hlsl_or: {
342 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
343 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
344 return Builder.CreateOr(LHS: Op0, RHS: Op1, Name: "hlsl.or");
345 }
346 case Builtin::BI__builtin_hlsl_any: {
347 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
348 return Builder.CreateIntrinsic(
349 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
350 ID: CGM.getHLSLRuntime().getAnyIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
351 Name: "hlsl.any");
352 }
353 case Builtin::BI__builtin_hlsl_asdouble:
354 return handleAsDoubleBuiltin(CGF&: *this, E);
355 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
356 Value *OpX = EmitScalarExpr(E: E->getArg(Arg: 0));
357 Value *OpMin = EmitScalarExpr(E: E->getArg(Arg: 1));
358 Value *OpMax = EmitScalarExpr(E: E->getArg(Arg: 2));
359
360 QualType Ty = E->getArg(Arg: 0)->getType();
361 if (auto *VecTy = Ty->getAs<VectorType>())
362 Ty = VecTy->getElementType();
363
364 Intrinsic::ID Intr;
365 if (Ty->isFloatingType()) {
366 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
367 } else if (Ty->isUnsignedIntegerType()) {
368 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
369 } else {
370 assert(Ty->isSignedIntegerType());
371 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
372 }
373 return Builder.CreateIntrinsic(
374 /*ReturnType=*/RetTy: OpX->getType(), ID: Intr,
375 Args: ArrayRef<Value *>{OpX, OpMin, OpMax}, FMFSource: nullptr, Name: "hlsl.clamp");
376 }
377 case Builtin::BI__builtin_hlsl_crossf16:
378 case Builtin::BI__builtin_hlsl_crossf32: {
379 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
380 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
381 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
382 E->getArg(1)->getType()->hasFloatingRepresentation() &&
383 "cross operands must have a float representation");
384 // make sure each vector has exactly 3 elements
385 assert(
386 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
387 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
388 "input vectors must have 3 elements each");
389 return Builder.CreateIntrinsic(
390 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getCrossIntrinsic(),
391 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.cross");
392 }
393 case Builtin::BI__builtin_hlsl_dot: {
394 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
395 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
396 llvm::Type *T0 = Op0->getType();
397 llvm::Type *T1 = Op1->getType();
398
399 // If the arguments are scalars, just emit a multiply
400 if (!T0->isVectorTy() && !T1->isVectorTy()) {
401 if (T0->isFloatingPointTy())
402 return Builder.CreateFMul(L: Op0, R: Op1, Name: "hlsl.dot");
403
404 if (T0->isIntegerTy())
405 return Builder.CreateMul(LHS: Op0, RHS: Op1, Name: "hlsl.dot");
406
407 llvm_unreachable(
408 "Scalar dot product is only supported on ints and floats.");
409 }
410 // For vectors, validate types and emit the appropriate intrinsic
411 assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
412 E->getArg(1)->getType()) &&
413 "Dot product operands must have the same type.");
414
415 auto *VecTy0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
416 assert(VecTy0 && "Dot product argument must be a vector.");
417
418 return Builder.CreateIntrinsic(
419 /*ReturnType=*/RetTy: T0->getScalarType(),
420 ID: getDotProductIntrinsic(RT&: CGM.getHLSLRuntime(), QT: VecTy0->getElementType()),
421 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.dot");
422 }
423 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
424 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
425 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
426 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
427
428 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
429 // Note that the argument order disagrees between the builtin and the
430 // intrinsic here.
431 return Builder.CreateIntrinsic(
432 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
433 FMFSource: nullptr, Name: "hlsl.dot4add.i8packed");
434 }
435 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
436 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
437 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
438 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
439
440 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
441 // Note that the argument order disagrees between the builtin and the
442 // intrinsic here.
443 return Builder.CreateIntrinsic(
444 /*ReturnType=*/RetTy: Acc->getType(), ID, Args: ArrayRef<Value *>{Acc, X, Y},
445 FMFSource: nullptr, Name: "hlsl.dot4add.u8packed");
446 }
447 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
448 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
449
450 return Builder.CreateIntrinsic(
451 /*ReturnType=*/ConvertType(E->getType()),
452 getFirstBitHighIntrinsic(RT&: CGM.getHLSLRuntime(), QT: E->getArg(Arg: 0)->getType()),
453 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
454 }
455 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
456 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
457
458 return Builder.CreateIntrinsic(
459 /*ReturnType=*/ConvertType(E->getType()),
460 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
461 nullptr, "hlsl.firstbitlow");
462 }
463 case Builtin::BI__builtin_hlsl_lerp: {
464 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
465 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
466 Value *S = EmitScalarExpr(E: E->getArg(Arg: 2));
467 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
468 llvm_unreachable("lerp operand must have a float representation");
469 return Builder.CreateIntrinsic(
470 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getLerpIntrinsic(),
471 Args: ArrayRef<Value *>{X, Y, S}, FMFSource: nullptr, Name: "hlsl.lerp");
472 }
473 case Builtin::BI__builtin_hlsl_normalize: {
474 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
475
476 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
477 "normalize operand must have a float representation");
478
479 return Builder.CreateIntrinsic(
480 /*ReturnType=*/RetTy: X->getType(),
481 ID: CGM.getHLSLRuntime().getNormalizeIntrinsic(), Args: ArrayRef<Value *>{X},
482 FMFSource: nullptr, Name: "hlsl.normalize");
483 }
484 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
485 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
486
487 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
488 "degree operand must have a float representation");
489
490 return Builder.CreateIntrinsic(
491 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getDegreesIntrinsic(),
492 Args: ArrayRef<Value *>{X}, FMFSource: nullptr, Name: "hlsl.degrees");
493 }
494 case Builtin::BI__builtin_hlsl_elementwise_frac: {
495 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
496 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
497 llvm_unreachable("frac operand must have a float representation");
498 return Builder.CreateIntrinsic(
499 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getFracIntrinsic(),
500 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.frac");
501 }
502 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
503 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
504 llvm::Type *Xty = Op0->getType();
505 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
506 if (Xty->isVectorTy()) {
507 auto *XVecTy = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
508 retType = llvm::VectorType::get(
509 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
510 }
511 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
512 llvm_unreachable("isinf operand must have a float representation");
513 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
514 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
515 }
516 case Builtin::BI__builtin_hlsl_mad: {
517 Value *M = EmitScalarExpr(E: E->getArg(Arg: 0));
518 Value *A = EmitScalarExpr(E: E->getArg(Arg: 1));
519 Value *B = EmitScalarExpr(E: E->getArg(Arg: 2));
520 if (E->getArg(0)->getType()->hasFloatingRepresentation())
521 return Builder.CreateIntrinsic(
522 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
523 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
524
525 if (E->getArg(Arg: 0)->getType()->hasSignedIntegerRepresentation()) {
526 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
527 return Builder.CreateIntrinsic(
528 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
529 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
530
531 Value *Mul = Builder.CreateNSWMul(LHS: M, RHS: A);
532 return Builder.CreateNSWAdd(LHS: Mul, RHS: B);
533 }
534 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
535 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
536 return Builder.CreateIntrinsic(
537 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
538 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
539
540 Value *Mul = Builder.CreateNUWMul(LHS: M, RHS: A);
541 return Builder.CreateNUWAdd(LHS: Mul, RHS: B);
542 }
543 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
544 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
545 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
546 llvm_unreachable("rcp operand must have a float representation");
547 llvm::Type *Ty = Op0->getType();
548 llvm::Type *EltTy = Ty->getScalarType();
549 Constant *One = Ty->isVectorTy()
550 ? ConstantVector::getSplat(
551 EC: ElementCount::getFixed(
552 MinVal: cast<FixedVectorType>(Val: Ty)->getNumElements()),
553 Elt: ConstantFP::get(Ty: EltTy, V: 1.0))
554 : ConstantFP::get(Ty: EltTy, V: 1.0);
555 return Builder.CreateFDiv(L: One, R: Op0, Name: "hlsl.rcp");
556 }
557 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
558 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
559 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
560 llvm_unreachable("rsqrt operand must have a float representation");
561 return Builder.CreateIntrinsic(
562 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getRsqrtIntrinsic(),
563 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.rsqrt");
564 }
565 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
566 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
567 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
568 "saturate operand must have a float representation");
569 return Builder.CreateIntrinsic(
570 /*ReturnType=*/RetTy: Op0->getType(),
571 ID: CGM.getHLSLRuntime().getSaturateIntrinsic(), Args: ArrayRef<Value *>{Op0},
572 FMFSource: nullptr, Name: "hlsl.saturate");
573 }
574 case Builtin::BI__builtin_hlsl_select: {
575 Value *OpCond = EmitScalarExpr(E: E->getArg(Arg: 0));
576 RValue RValTrue = EmitAnyExpr(E: E->getArg(Arg: 1));
577 Value *OpTrue =
578 RValTrue.isScalar()
579 ? RValTrue.getScalarVal()
580 : RValTrue.getAggregatePointer(PointeeType: E->getArg(Arg: 1)->getType(), CGF&: *this);
581 RValue RValFalse = EmitAnyExpr(E: E->getArg(Arg: 2));
582 Value *OpFalse =
583 RValFalse.isScalar()
584 ? RValFalse.getScalarVal()
585 : RValFalse.getAggregatePointer(PointeeType: E->getArg(Arg: 2)->getType(), CGF&: *this);
586 if (auto *VTy = E->getType()->getAs<VectorType>()) {
587 if (!OpTrue->getType()->isVectorTy())
588 OpTrue =
589 Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat");
590 if (!OpFalse->getType()->isVectorTy())
591 OpFalse =
592 Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat");
593 }
594
595 Value *SelectVal =
596 Builder.CreateSelect(C: OpCond, True: OpTrue, False: OpFalse, Name: "hlsl.select");
597 if (!RValTrue.isScalar())
598 Builder.CreateStore(Val: SelectVal, Addr: ReturnValue.getAddress(),
599 IsVolatile: ReturnValue.isVolatile());
600
601 return SelectVal;
602 }
603 case Builtin::BI__builtin_hlsl_step: {
604 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
605 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
606 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
607 E->getArg(1)->getType()->hasFloatingRepresentation() &&
608 "step operands must have a float representation");
609 return Builder.CreateIntrinsic(
610 /*ReturnType=*/RetTy: Op0->getType(), ID: CGM.getHLSLRuntime().getStepIntrinsic(),
611 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "hlsl.step");
612 }
613 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
614 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
615 assert(Op->getType()->isIntegerTy(1) &&
616 "Intrinsic WaveActiveAllTrue operand must be a bool");
617
618 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
619 return EmitRuntimeCall(
620 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID), args: {Op});
621 }
622 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
623 Value *Op = EmitScalarExpr(E: E->getArg(Arg: 0));
624 assert(Op->getType()->isIntegerTy(1) &&
625 "Intrinsic WaveActiveAnyTrue operand must be a bool");
626
627 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
628 return EmitRuntimeCall(
629 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID), args: {Op});
630 }
631 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
632 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
633 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
634 return EmitRuntimeCall(
635 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID),
636 args: ArrayRef{OpExpr});
637 }
638 case Builtin::BI__builtin_hlsl_wave_active_sum: {
639 // Due to the use of variadic arguments, explicitly retreive argument
640 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
641 llvm::FunctionType *FT = llvm::FunctionType::get(
642 Result: OpExpr->getType(), Params: ArrayRef{OpExpr->getType()}, isVarArg: false);
643 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
644 Arch: getTarget().getTriple().getArch(), RT&: CGM.getHLSLRuntime(),
645 QT: E->getArg(Arg: 0)->getType());
646
647 // Get overloaded name
648 std::string Name =
649 Intrinsic::getName(Id: IID, Tys: ArrayRef{OpExpr->getType()}, M: &CGM.getModule());
650 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FT, Name, ExtraAttrs: {},
651 /*Local=*/false,
652 /*AssumeConvergent=*/true),
653 args: ArrayRef{OpExpr}, name: "hlsl.wave.active.sum");
654 }
655 case Builtin::BI__builtin_hlsl_wave_active_max: {
656 // Due to the use of variadic arguments, explicitly retreive argument
657 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
658 llvm::FunctionType *FT = llvm::FunctionType::get(
659 Result: OpExpr->getType(), Params: ArrayRef{OpExpr->getType()}, isVarArg: false);
660 Intrinsic::ID IID = getWaveActiveMaxIntrinsic(
661 Arch: getTarget().getTriple().getArch(), RT&: CGM.getHLSLRuntime(),
662 QT: E->getArg(Arg: 0)->getType());
663
664 // Get overloaded name
665 std::string Name =
666 Intrinsic::getName(Id: IID, Tys: ArrayRef{OpExpr->getType()}, M: &CGM.getModule());
667 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FT, Name, ExtraAttrs: {},
668 /*Local=*/false,
669 /*AssumeConvergent=*/true),
670 args: ArrayRef{OpExpr}, name: "hlsl.wave.active.max");
671 }
672 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
673 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
674 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
675 // for the DirectX intrinsic and the demangled builtin name
676 switch (CGM.getTarget().getTriple().getArch()) {
677 case llvm::Triple::dxil:
678 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
679 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
680 case llvm::Triple::spirv:
681 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
682 Ty: llvm::FunctionType::get(Result: IntTy, Params: {}, isVarArg: false),
683 Name: "__hlsl_wave_get_lane_index", ExtraAttrs: {}, Local: false, AssumeConvergent: true));
684 default:
685 llvm_unreachable(
686 "Intrinsic WaveGetLaneIndex not supported by target architecture");
687 }
688 }
689 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
690 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
691 return EmitRuntimeCall(
692 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
693 }
694 case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
695 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
696 return EmitRuntimeCall(
697 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
698 }
699 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
700 // Due to the use of variadic arguments we must explicitly retreive them and
701 // create our function type.
702 Value *OpExpr = EmitScalarExpr(E: E->getArg(Arg: 0));
703 Value *OpIndex = EmitScalarExpr(E: E->getArg(Arg: 1));
704 llvm::FunctionType *FT = llvm::FunctionType::get(
705 Result: OpExpr->getType(), Params: ArrayRef{OpExpr->getType(), OpIndex->getType()},
706 isVarArg: false);
707
708 // Get overloaded name
709 std::string Name =
710 Intrinsic::getName(Id: CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
711 Tys: ArrayRef{OpExpr->getType()}, M: &CGM.getModule());
712 return EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FT, Name, ExtraAttrs: {},
713 /*Local=*/false,
714 /*AssumeConvergent=*/true),
715 args: ArrayRef{OpExpr, OpIndex}, name: "hlsl.wave.readlane");
716 }
717 case Builtin::BI__builtin_hlsl_elementwise_sign: {
718 auto *Arg0 = E->getArg(Arg: 0);
719 Value *Op0 = EmitScalarExpr(E: Arg0);
720 llvm::Type *Xty = Op0->getType();
721 llvm::Type *retType = llvm::Type::getInt32Ty(C&: this->getLLVMContext());
722 if (Xty->isVectorTy()) {
723 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
724 retType = llvm::VectorType::get(
725 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
726 }
727 assert((Arg0->getType()->hasFloatingRepresentation() ||
728 Arg0->getType()->hasIntegerRepresentation()) &&
729 "sign operand must have a float or int representation");
730
731 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
732 Value *Cmp = Builder.CreateICmpEQ(LHS: Op0, RHS: ConstantInt::get(Ty: Xty, V: 0));
733 return Builder.CreateSelect(C: Cmp, True: ConstantInt::get(Ty: retType, V: 0),
734 False: ConstantInt::get(Ty: retType, V: 1), Name: "hlsl.sign");
735 }
736
737 return Builder.CreateIntrinsic(
738 RetTy: retType, ID: CGM.getHLSLRuntime().getSignIntrinsic(),
739 Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr, Name: "hlsl.sign");
740 }
741 case Builtin::BI__builtin_hlsl_elementwise_radians: {
742 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
743 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
744 "radians operand must have a float representation");
745 return Builder.CreateIntrinsic(
746 /*ReturnType=*/RetTy: Op0->getType(),
747 ID: CGM.getHLSLRuntime().getRadiansIntrinsic(), Args: ArrayRef<Value *>{Op0},
748 FMFSource: nullptr, Name: "hlsl.radians");
749 }
750 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
751 Value *ResHandle = EmitScalarExpr(E: E->getArg(Arg: 0));
752 Value *Offset = EmitScalarExpr(E: E->getArg(Arg: 1));
753 Value *OffsetI8 = Builder.CreateIntCast(V: Offset, DestTy: Int8Ty, isSigned: true);
754 return Builder.CreateIntrinsic(
755 /*ReturnType=*/RetTy: Offset->getType(),
756 ID: CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
757 Args: ArrayRef<Value *>{ResHandle, OffsetI8}, FMFSource: nullptr);
758 }
759 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
760
761 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
762 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
763 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
764 "asuint operands types mismatch");
765 return handleHlslSplitdouble(E, CGF: this);
766 }
767 case Builtin::BI__builtin_hlsl_elementwise_clip:
768 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
769 "clip operands types mismatch");
770 return handleHlslClip(E, CGF: this);
771 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
772 Intrinsic::ID ID =
773 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
774 return EmitRuntimeCall(
775 callee: Intrinsic::getOrInsertDeclaration(M: &CGM.getModule(), id: ID));
776 }
777 }
778 return nullptr;
779}
780

source code of clang/lib/CodeGen/CGHLSLBuiltins.cpp