NVPTX.cpp source code [clang/lib/Basic/Targets/NVPTX.cpp]

1	//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements NVPTX TargetInfo objects.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "NVPTX.h"
14	#include "Targets.h"
15	#include "clang/Basic/Builtins.h"
16	#include "clang/Basic/MacroBuilder.h"
17	#include "clang/Basic/TargetBuiltins.h"
18	#include "llvm/ADT/StringSwitch.h"
19
20	using namespace clang;
21	using namespace clang::targets;
22
23	static constexpr Builtin::Info BuiltinInfo[] = {
24	#define BUILTIN(ID, TYPE, ATTRS) \
25	{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26	#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
27	{#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28	#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
29	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30	#include "clang/Basic/BuiltinsNVPTX.def"
31	};
32
33	const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
34
35	NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
36	const TargetOptions &Opts,
37	unsigned TargetPointerWidth)
38	: TargetInfo (Triple) {
39	assert((TargetPointerWidth == `32` \|\| TargetPointerWidth == `64`) &&
40	"NVPTX only supports 32- and 64-bit modes.");
41
42	PTXVersion = `32`;
43	for (const StringRef Feature : Opts.FeaturesAsWritten) {
44	int PTXV;
45	if (!Feature.starts_with(Prefix: "+ptx") \|\|
46	Feature.drop_front(N: `4`).getAsInteger(Radix: `10`, Result&: PTXV))
47	continue;
48	PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
49	}
50
51	TLSSupported = false;
52	VLASupported = false;
53	AddrSpaceMap = &NVPTXAddrSpaceMap;
54	UseAddrSpaceMapMangling = true;
55	// __bf16 is always available as a load/store only type.
56	BFloat16Width = BFloat16Align = `16`;
57	BFloat16Format = &llvm::APFloat::BFloat();
58
59	// Define available target features
60	// These must be defined in sorted order!
61	NoAsmVariants = true;
62	GPU = CudaArch::UNUSED;
63
64	// PTX supports f16 as a fundamental type.
65	HasLegalHalfType = true;
66	HasFloat16 = true;
67
68	if (TargetPointerWidth == `32`)
69	resetDataLayout(DL: "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
70	else if (Opts.NVPTXUseShortPointers)
71	resetDataLayout(
72	DL: "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
73	else
74	resetDataLayout(DL: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
75
76	// If possible, get a TargetInfo for our host triple, so we can match its
77	// types.
78	llvm::Triple HostTriple(Opts.HostTriple);
79	if (!HostTriple.isNVPTX())
80	HostTarget = AllocateTarget(Triple: llvm::Triple (Opts.HostTriple), Opts);
81
82	// If no host target, make some guesses about the data layout and return.
83	if (!HostTarget) {
84	LongWidth = LongAlign = TargetPointerWidth;
85	PointerWidth = PointerAlign = TargetPointerWidth;
86	switch (TargetPointerWidth) {
87	case `32`:
88	SizeType = TargetInfo::UnsignedInt;
89	PtrDiffType = TargetInfo::SignedInt;
90	IntPtrType = TargetInfo::SignedInt;
91	break;
92	case `64`:
93	SizeType = TargetInfo::UnsignedLong;
94	PtrDiffType = TargetInfo::SignedLong;
95	IntPtrType = TargetInfo::SignedLong;
96	break;
97	default:
98	llvm_unreachable("TargetPointerWidth must be 32 or 64");
99	}
100
101	MaxAtomicInlineWidth = TargetPointerWidth;
102	return;
103	}
104
105	// Copy properties from host target.
106	PointerWidth = HostTarget ->getPointerWidth(AddrSpace: LangAS::Default);
107	PointerAlign = HostTarget ->getPointerAlign(AddrSpace: LangAS::Default);
108	BoolWidth = HostTarget ->getBoolWidth();
109	BoolAlign = HostTarget ->getBoolAlign();
110	IntWidth = HostTarget ->getIntWidth();
111	IntAlign = HostTarget ->getIntAlign();
112	HalfWidth = HostTarget ->getHalfWidth();
113	HalfAlign = HostTarget ->getHalfAlign();
114	FloatWidth = HostTarget ->getFloatWidth();
115	FloatAlign = HostTarget ->getFloatAlign();
116	DoubleWidth = HostTarget ->getDoubleWidth();
117	DoubleAlign = HostTarget ->getDoubleAlign();
118	LongWidth = HostTarget ->getLongWidth();
119	LongAlign = HostTarget ->getLongAlign();
120	LongLongWidth = HostTarget ->getLongLongWidth();
121	LongLongAlign = HostTarget ->getLongLongAlign();
122	MinGlobalAlign = HostTarget ->getMinGlobalAlign(/ TypeSize = / Size: `0`,
123	/ HasNonWeakDef = / true);
124	NewAlign = HostTarget ->getNewAlign();
125	DefaultAlignForAttributeAligned =
126	HostTarget ->getDefaultAlignForAttributeAligned();
127	SizeType = HostTarget ->getSizeType();
128	IntMaxType = HostTarget ->getIntMaxType();
129	PtrDiffType = HostTarget ->getPtrDiffType(AddrSpace: LangAS::Default);
130	IntPtrType = HostTarget ->getIntPtrType();
131	WCharType = HostTarget ->getWCharType();
132	WIntType = HostTarget ->getWIntType();
133	Char16Type = HostTarget ->getChar16Type();
134	Char32Type = HostTarget ->getChar32Type();
135	Int64Type = HostTarget ->getInt64Type();
136	SigAtomicType = HostTarget ->getSigAtomicType();
137	ProcessIDType = HostTarget ->getProcessIDType();
138
139	UseBitFieldTypeAlignment = HostTarget ->useBitFieldTypeAlignment();
140	UseZeroLengthBitfieldAlignment = HostTarget ->useZeroLengthBitfieldAlignment();
141	UseExplicitBitFieldAlignment = HostTarget ->useExplicitBitFieldAlignment();
142	ZeroLengthBitfieldBoundary = HostTarget ->getZeroLengthBitfieldBoundary();
143
144	// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
145	// we need those macros to be identical on host and device, because (among
146	// other things) they affect which standard library classes are defined, and
147	// we need all classes to be defined on both the host and device.
148	MaxAtomicInlineWidth = HostTarget ->getMaxAtomicInlineWidth();
149
150	// Properties intentionally not copied from host:
151	// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
152	// host/device boundary.
153	// - SuitableAlign: Not visible across the host/device boundary, and may
154	// correctly be different on host/device, e.g. if host has wider vector
155	// types than device.
156	// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
157	// as its double type, but that's not necessarily true on the host.
158	// TODO: nvcc emits a warning when using long double on device; we should
159	// do the same.
160	}
161
162	ArrayRef<const char > NVPTXTargetInfo::getGCCRegNames() const* {
163	return llvm::ArrayRef(GCCRegNames);
164	}
165
166	bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
167	return llvm::StringSwitch<bool>(Feature)
168	.Cases(S0: "ptx", S1: "nvptx", Value: true)
169	.Default(Value: false);
170	}
171
172	void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
173	MacroBuilder &Builder) const {
174	Builder.defineMacro(Name: "__PTX__");
175	Builder.defineMacro(Name: "__NVPTX__");
176
177	// Skip setting architecture dependent macros if undefined.
178	if (GPU == CudaArch::UNUSED && !HostTarget)
179	return;
180
181	if (Opts.CUDAIsDevice \|\| Opts.OpenMPIsTargetDevice \|\| !HostTarget) {
182	// Set __CUDA_ARCH__ for the GPU specified.
183	std::string CUDAArchCode = [this] {
184	switch (GPU) {
185	case CudaArch::GFX600:
186	case CudaArch::GFX601:
187	case CudaArch::GFX602:
188	case CudaArch::GFX700:
189	case CudaArch::GFX701:
190	case CudaArch::GFX702:
191	case CudaArch::GFX703:
192	case CudaArch::GFX704:
193	case CudaArch::GFX705:
194	case CudaArch::GFX801:
195	case CudaArch::GFX802:
196	case CudaArch::GFX803:
197	case CudaArch::GFX805:
198	case CudaArch::GFX810:
199	case CudaArch::GFX900:
200	case CudaArch::GFX902:
201	case CudaArch::GFX904:
202	case CudaArch::GFX906:
203	case CudaArch::GFX908:
204	case CudaArch::GFX909:
205	case CudaArch::GFX90a:
206	case CudaArch::GFX90c:
207	case CudaArch::GFX940:
208	case CudaArch::GFX941:
209	case CudaArch::GFX942:
210	case CudaArch::GFX1010:
211	case CudaArch::GFX1011:
212	case CudaArch::GFX1012:
213	case CudaArch::GFX1013:
214	case CudaArch::GFX1030:
215	case CudaArch::GFX1031:
216	case CudaArch::GFX1032:
217	case CudaArch::GFX1033:
218	case CudaArch::GFX1034:
219	case CudaArch::GFX1035:
220	case CudaArch::GFX1036:
221	case CudaArch::GFX1100:
222	case CudaArch::GFX1101:
223	case CudaArch::GFX1102:
224	case CudaArch::GFX1103:
225	case CudaArch::GFX1150:
226	case CudaArch::GFX1151:
227	case CudaArch::GFX1200:
228	case CudaArch::GFX1201:
229	case CudaArch::Generic:
230	case CudaArch::LAST:
231	break;
232	case CudaArch::UNKNOWN:
233	assert(false && "No GPU arch when compiling CUDA device code.");
234	return "";
235	case CudaArch::UNUSED:
236	case CudaArch::SM_20:
237	return "200";
238	case CudaArch::SM_21:
239	return "210";
240	case CudaArch::SM_30:
241	return "300";
242	case CudaArch::SM_32_:
243	return "320";
244	case CudaArch::SM_35:
245	return "350";
246	case CudaArch::SM_37:
247	return "370";
248	case CudaArch::SM_50:
249	return "500";
250	case CudaArch::SM_52:
251	return "520";
252	case CudaArch::SM_53:
253	return "530";
254	case CudaArch::SM_60:
255	return "600";
256	case CudaArch::SM_61:
257	return "610";
258	case CudaArch::SM_62:
259	return "620";
260	case CudaArch::SM_70:
261	return "700";
262	case CudaArch::SM_72:
263	return "720";
264	case CudaArch::SM_75:
265	return "750";
266	case CudaArch::SM_80:
267	return "800";
268	case CudaArch::SM_86:
269	return "860";
270	case CudaArch::SM_87:
271	return "870";
272	case CudaArch::SM_89:
273	return "890";
274	case CudaArch::SM_90:
275	case CudaArch::SM_90a:
276	return "900";
277	}
278	llvm_unreachable("unhandled CudaArch");
279	}();
280	Builder.defineMacro(Name: "__CUDA_ARCH__", Value: CUDAArchCode);
281	if (GPU == CudaArch::SM_90a)
282	Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM90_ALL", Value: "1");
283	}
284	}
285
286	ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
287	return llvm::ArrayRef(BuiltinInfo,
288	clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
289	}
290

source code of clang/lib/Basic/Targets/NVPTX.cpp