1 | //===- NVPTX.cpp ----------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ABIInfoImpl.h" |
10 | #include "TargetInfo.h" |
11 | #include "llvm/IR/IntrinsicsNVPTX.h" |
12 | |
13 | using namespace clang; |
14 | using namespace clang::CodeGen; |
15 | |
16 | //===----------------------------------------------------------------------===// |
17 | // NVPTX ABI Implementation |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | namespace { |
21 | |
22 | class NVPTXTargetCodeGenInfo; |
23 | |
24 | class NVPTXABIInfo : public ABIInfo { |
25 | NVPTXTargetCodeGenInfo &CGInfo; |
26 | |
27 | public: |
28 | NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) |
29 | : ABIInfo(CGT), CGInfo(Info) {} |
30 | |
31 | ABIArgInfo classifyReturnType(QualType RetTy) const; |
32 | ABIArgInfo classifyArgumentType(QualType Ty) const; |
33 | |
34 | void computeInfo(CGFunctionInfo &FI) const override; |
35 | Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
36 | QualType Ty) const override; |
37 | bool isUnsupportedType(QualType T) const; |
38 | ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; |
39 | }; |
40 | |
41 | class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { |
42 | public: |
43 | NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) |
44 | : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} |
45 | |
46 | void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, |
47 | CodeGen::CodeGenModule &M) const override; |
48 | bool shouldEmitStaticExternCAliases() const override; |
49 | |
50 | llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, |
51 | llvm::PointerType *T, |
52 | QualType QT) const override; |
53 | |
54 | llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { |
55 | // On the device side, surface reference is represented as an object handle |
56 | // in 64-bit integer. |
57 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
58 | } |
59 | |
60 | llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { |
61 | // On the device side, texture reference is represented as an object handle |
62 | // in 64-bit integer. |
63 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
64 | } |
65 | |
66 | bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
67 | LValue Src) const override { |
68 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
69 | return true; |
70 | } |
71 | |
72 | bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
73 | LValue Src) const override { |
74 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
75 | return true; |
76 | } |
77 | |
78 | // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the |
79 | // resulting MDNode to the nvvm.annotations MDNode. |
80 | static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
81 | int Operand); |
82 | |
83 | private: |
84 | static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
85 | LValue Src) { |
86 | llvm::Value *Handle = nullptr; |
87 | llvm::Constant *C = |
88 | llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).emitRawPointer(CGF)); |
89 | // Lookup `addrspacecast` through the constant pointer if any. |
90 | if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) |
91 | C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); |
92 | if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { |
93 | // Load the handle from the specific global variable using |
94 | // `nvvm.texsurf.handle.internal` intrinsic. |
95 | Handle = CGF.EmitRuntimeCall( |
96 | CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, |
97 | {GV->getType()}), |
98 | {GV}, "texsurf_handle" ); |
99 | } else |
100 | Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation()); |
101 | CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst); |
102 | } |
103 | }; |
104 | |
105 | /// Checks if the type is unsupported directly by the current target. |
106 | bool NVPTXABIInfo::isUnsupportedType(QualType T) const { |
107 | ASTContext &Context = getContext(); |
108 | if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) |
109 | return true; |
110 | if (!Context.getTargetInfo().hasFloat128Type() && |
111 | (T->isFloat128Type() || |
112 | (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) |
113 | return true; |
114 | if (const auto *EIT = T->getAs<BitIntType>()) |
115 | return EIT->getNumBits() > |
116 | (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); |
117 | if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && |
118 | Context.getTypeSize(T) > 64U) |
119 | return true; |
120 | if (const auto *AT = T->getAsArrayTypeUnsafe()) |
121 | return isUnsupportedType(T: AT->getElementType()); |
122 | const auto *RT = T->getAs<RecordType>(); |
123 | if (!RT) |
124 | return false; |
125 | const RecordDecl *RD = RT->getDecl(); |
126 | |
127 | // If this is a C++ record, check the bases first. |
128 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) |
129 | for (const CXXBaseSpecifier &I : CXXRD->bases()) |
130 | if (isUnsupportedType(I.getType())) |
131 | return true; |
132 | |
133 | for (const FieldDecl *I : RD->fields()) |
134 | if (isUnsupportedType(I->getType())) |
135 | return true; |
136 | return false; |
137 | } |
138 | |
139 | /// Coerce the given type into an array with maximum allowed size of elements. |
140 | ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, |
141 | unsigned MaxSize) const { |
142 | // Alignment and Size are measured in bits. |
143 | const uint64_t Size = getContext().getTypeSize(T: Ty); |
144 | const uint64_t Alignment = getContext().getTypeAlign(T: Ty); |
145 | const unsigned Div = std::min<unsigned>(MaxSize, Alignment); |
146 | llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div); |
147 | const uint64_t NumElements = (Size + Div - 1) / Div; |
148 | return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements)); |
149 | } |
150 | |
151 | ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { |
152 | if (RetTy->isVoidType()) |
153 | return ABIArgInfo::getIgnore(); |
154 | |
155 | if (getContext().getLangOpts().OpenMP && |
156 | getContext().getLangOpts().OpenMPIsTargetDevice && |
157 | isUnsupportedType(T: RetTy)) |
158 | return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64); |
159 | |
160 | // note: this is different from default ABI |
161 | if (!RetTy->isScalarType()) |
162 | return ABIArgInfo::getDirect(); |
163 | |
164 | // Treat an enum type as its underlying type. |
165 | if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) |
166 | RetTy = EnumTy->getDecl()->getIntegerType(); |
167 | |
168 | return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy) |
169 | : ABIArgInfo::getDirect()); |
170 | } |
171 | |
172 | ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { |
173 | // Treat an enum type as its underlying type. |
174 | if (const EnumType *EnumTy = Ty->getAs<EnumType>()) |
175 | Ty = EnumTy->getDecl()->getIntegerType(); |
176 | |
177 | // Return aggregates type as indirect by value |
178 | if (isAggregateTypeForABI(T: Ty)) { |
179 | // Under CUDA device compilation, tex/surf builtin types are replaced with |
180 | // object types and passed directly. |
181 | if (getContext().getLangOpts().CUDAIsDevice) { |
182 | if (Ty->isCUDADeviceBuiltinSurfaceType()) |
183 | return ABIArgInfo::getDirect( |
184 | T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); |
185 | if (Ty->isCUDADeviceBuiltinTextureType()) |
186 | return ABIArgInfo::getDirect( |
187 | T: CGInfo.getCUDADeviceBuiltinTextureDeviceType()); |
188 | } |
189 | return getNaturalAlignIndirect(Ty, /* byval */ ByVal: true); |
190 | } |
191 | |
192 | if (const auto *EIT = Ty->getAs<BitIntType>()) { |
193 | if ((EIT->getNumBits() > 128) || |
194 | (!getContext().getTargetInfo().hasInt128Type() && |
195 | EIT->getNumBits() > 64)) |
196 | return getNaturalAlignIndirect(Ty, /* byval */ ByVal: true); |
197 | } |
198 | |
199 | return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) |
200 | : ABIArgInfo::getDirect()); |
201 | } |
202 | |
203 | void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { |
204 | if (!getCXXABI().classifyReturnType(FI)) |
205 | FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType()); |
206 | for (auto &I : FI.arguments()) |
207 | I.info = classifyArgumentType(I.type); |
208 | |
209 | // Always honor user-specified calling convention. |
210 | if (FI.getCallingConvention() != llvm::CallingConv::C) |
211 | return; |
212 | |
213 | FI.setEffectiveCallingConvention(getRuntimeCC()); |
214 | } |
215 | |
216 | Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
217 | QualType Ty) const { |
218 | llvm_unreachable("NVPTX does not support varargs" ); |
219 | } |
220 | |
221 | void NVPTXTargetCodeGenInfo::setTargetAttributes( |
222 | const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { |
223 | if (GV->isDeclaration()) |
224 | return; |
225 | const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); |
226 | if (VD) { |
227 | if (M.getLangOpts().CUDA) { |
228 | if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) |
229 | addNVVMMetadata(GV, Name: "surface" , Operand: 1); |
230 | else if (VD->getType()->isCUDADeviceBuiltinTextureType()) |
231 | addNVVMMetadata(GV, Name: "texture" , Operand: 1); |
232 | return; |
233 | } |
234 | } |
235 | |
236 | const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); |
237 | if (!FD) return; |
238 | |
239 | llvm::Function *F = cast<llvm::Function>(GV); |
240 | |
241 | // Perform special handling in OpenCL mode |
242 | if (M.getLangOpts().OpenCL) { |
243 | // Use OpenCL function attributes to check for kernel functions |
244 | // By default, all functions are device functions |
245 | if (FD->hasAttr<OpenCLKernelAttr>()) { |
246 | // OpenCL __kernel functions get kernel metadata |
247 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
248 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
249 | // And kernel functions are not subject to inlining |
250 | F->addFnAttr(llvm::Attribute::NoInline); |
251 | } |
252 | } |
253 | |
254 | // Perform special handling in CUDA mode. |
255 | if (M.getLangOpts().CUDA) { |
256 | // CUDA __global__ functions get a kernel metadata entry. Since |
257 | // __global__ functions cannot be called from the device, we do not |
258 | // need to set the noinline attribute. |
259 | if (FD->hasAttr<CUDAGlobalAttr>()) { |
260 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
261 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
262 | } |
263 | if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) |
264 | M.handleCUDALaunchBoundsAttr(F, Attr); |
265 | } |
266 | |
267 | // Attach kernel metadata directly if compiling for NVPTX. |
268 | if (FD->hasAttr<NVPTXKernelAttr>()) { |
269 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
270 | } |
271 | } |
272 | |
273 | void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
274 | StringRef Name, int Operand) { |
275 | llvm::Module *M = GV->getParent(); |
276 | llvm::LLVMContext &Ctx = M->getContext(); |
277 | |
278 | // Get "nvvm.annotations" metadata node |
279 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
280 | |
281 | llvm::Metadata *MDVals[] = { |
282 | llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name), |
283 | llvm::ConstantAsMetadata::get( |
284 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))}; |
285 | // Append metadata to nvvm.annotations |
286 | MD->addOperand(M: llvm::MDNode::get(Ctx, MDVals)); |
287 | } |
288 | |
289 | bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { |
290 | return false; |
291 | } |
292 | |
293 | llvm::Constant * |
294 | NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, |
295 | llvm::PointerType *PT, |
296 | QualType QT) const { |
297 | auto &Ctx = CGM.getContext(); |
298 | if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local)) |
299 | return llvm::ConstantPointerNull::get(T: PT); |
300 | |
301 | auto NPT = llvm::PointerType::get( |
302 | C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic)); |
303 | return llvm::ConstantExpr::getAddrSpaceCast( |
304 | C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT); |
305 | } |
306 | } |
307 | |
308 | void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, |
309 | const CUDALaunchBoundsAttr *Attr, |
310 | int32_t *MaxThreadsVal, |
311 | int32_t *MinBlocksVal, |
312 | int32_t *MaxClusterRankVal) { |
313 | // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node |
314 | llvm::APSInt MaxThreads(32); |
315 | MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); |
316 | if (MaxThreads > 0) { |
317 | if (MaxThreadsVal) |
318 | *MaxThreadsVal = MaxThreads.getExtValue(); |
319 | if (F) { |
320 | // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node |
321 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxntidx" , |
322 | Operand: MaxThreads.getExtValue()); |
323 | } |
324 | } |
325 | |
326 | // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it |
327 | // was not specified in __launch_bounds__ or if the user specified a 0 value, |
328 | // we don't have to add a PTX directive. |
329 | if (Attr->getMinBlocks()) { |
330 | llvm::APSInt MinBlocks(32); |
331 | MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); |
332 | if (MinBlocks > 0) { |
333 | if (MinBlocksVal) |
334 | *MinBlocksVal = MinBlocks.getExtValue(); |
335 | if (F) { |
336 | // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node |
337 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "minctasm" , |
338 | Operand: MinBlocks.getExtValue()); |
339 | } |
340 | } |
341 | } |
342 | if (Attr->getMaxBlocks()) { |
343 | llvm::APSInt MaxBlocks(32); |
344 | MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); |
345 | if (MaxBlocks > 0) { |
346 | if (MaxClusterRankVal) |
347 | *MaxClusterRankVal = MaxBlocks.getExtValue(); |
348 | if (F) { |
349 | // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node |
350 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxclusterrank" , |
351 | Operand: MaxBlocks.getExtValue()); |
352 | } |
353 | } |
354 | } |
355 | } |
356 | |
357 | std::unique_ptr<TargetCodeGenInfo> |
358 | CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { |
359 | return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes()); |
360 | } |
361 | |