1 | //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements NVPTX TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTX.h" |
14 | #include "Targets.h" |
15 | #include "clang/Basic/Builtins.h" |
16 | #include "clang/Basic/MacroBuilder.h" |
17 | #include "clang/Basic/TargetBuiltins.h" |
18 | #include "llvm/ADT/StringSwitch.h" |
19 | |
20 | using namespace clang; |
21 | using namespace clang::targets; |
22 | |
23 | static constexpr Builtin::Info BuiltinInfo[] = { |
24 | #define BUILTIN(ID, TYPE, ATTRS) \ |
25 | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
26 | #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ |
27 | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, |
28 | #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ |
29 | {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
30 | #include "clang/Basic/BuiltinsNVPTX.def" |
31 | }; |
32 | |
33 | const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0" }; |
34 | |
35 | NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, |
36 | const TargetOptions &Opts, |
37 | unsigned TargetPointerWidth) |
38 | : TargetInfo(Triple) { |
39 | assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && |
40 | "NVPTX only supports 32- and 64-bit modes." ); |
41 | |
42 | PTXVersion = 32; |
43 | for (const StringRef Feature : Opts.FeaturesAsWritten) { |
44 | int PTXV; |
45 | if (!Feature.starts_with(Prefix: "+ptx" ) || |
46 | Feature.drop_front(N: 4).getAsInteger(Radix: 10, Result&: PTXV)) |
47 | continue; |
48 | PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? |
49 | } |
50 | |
51 | TLSSupported = false; |
52 | VLASupported = false; |
53 | AddrSpaceMap = &NVPTXAddrSpaceMap; |
54 | UseAddrSpaceMapMangling = true; |
55 | // __bf16 is always available as a load/store only type. |
56 | BFloat16Width = BFloat16Align = 16; |
57 | BFloat16Format = &llvm::APFloat::BFloat(); |
58 | |
59 | // Define available target features |
60 | // These must be defined in sorted order! |
61 | NoAsmVariants = true; |
62 | GPU = CudaArch::UNUSED; |
63 | |
64 | // PTX supports f16 as a fundamental type. |
65 | HasLegalHalfType = true; |
66 | HasFloat16 = true; |
67 | |
68 | if (TargetPointerWidth == 32) |
69 | resetDataLayout(DL: "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
70 | else if (Opts.NVPTXUseShortPointers) |
71 | resetDataLayout( |
72 | DL: "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
73 | else |
74 | resetDataLayout(DL: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
75 | |
76 | // If possible, get a TargetInfo for our host triple, so we can match its |
77 | // types. |
78 | llvm::Triple HostTriple(Opts.HostTriple); |
79 | if (!HostTriple.isNVPTX()) |
80 | HostTarget = AllocateTarget(Triple: llvm::Triple(Opts.HostTriple), Opts); |
81 | |
82 | // If no host target, make some guesses about the data layout and return. |
83 | if (!HostTarget) { |
84 | LongWidth = LongAlign = TargetPointerWidth; |
85 | PointerWidth = PointerAlign = TargetPointerWidth; |
86 | switch (TargetPointerWidth) { |
87 | case 32: |
88 | SizeType = TargetInfo::UnsignedInt; |
89 | PtrDiffType = TargetInfo::SignedInt; |
90 | IntPtrType = TargetInfo::SignedInt; |
91 | break; |
92 | case 64: |
93 | SizeType = TargetInfo::UnsignedLong; |
94 | PtrDiffType = TargetInfo::SignedLong; |
95 | IntPtrType = TargetInfo::SignedLong; |
96 | break; |
97 | default: |
98 | llvm_unreachable("TargetPointerWidth must be 32 or 64" ); |
99 | } |
100 | |
101 | MaxAtomicInlineWidth = TargetPointerWidth; |
102 | return; |
103 | } |
104 | |
105 | // Copy properties from host target. |
106 | PointerWidth = HostTarget->getPointerWidth(AddrSpace: LangAS::Default); |
107 | PointerAlign = HostTarget->getPointerAlign(AddrSpace: LangAS::Default); |
108 | BoolWidth = HostTarget->getBoolWidth(); |
109 | BoolAlign = HostTarget->getBoolAlign(); |
110 | IntWidth = HostTarget->getIntWidth(); |
111 | IntAlign = HostTarget->getIntAlign(); |
112 | HalfWidth = HostTarget->getHalfWidth(); |
113 | HalfAlign = HostTarget->getHalfAlign(); |
114 | FloatWidth = HostTarget->getFloatWidth(); |
115 | FloatAlign = HostTarget->getFloatAlign(); |
116 | DoubleWidth = HostTarget->getDoubleWidth(); |
117 | DoubleAlign = HostTarget->getDoubleAlign(); |
118 | LongWidth = HostTarget->getLongWidth(); |
119 | LongAlign = HostTarget->getLongAlign(); |
120 | LongLongWidth = HostTarget->getLongLongWidth(); |
121 | LongLongAlign = HostTarget->getLongLongAlign(); |
122 | MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ Size: 0, |
123 | /* HasNonWeakDef = */ true); |
124 | NewAlign = HostTarget->getNewAlign(); |
125 | DefaultAlignForAttributeAligned = |
126 | HostTarget->getDefaultAlignForAttributeAligned(); |
127 | SizeType = HostTarget->getSizeType(); |
128 | IntMaxType = HostTarget->getIntMaxType(); |
129 | PtrDiffType = HostTarget->getPtrDiffType(AddrSpace: LangAS::Default); |
130 | IntPtrType = HostTarget->getIntPtrType(); |
131 | WCharType = HostTarget->getWCharType(); |
132 | WIntType = HostTarget->getWIntType(); |
133 | Char16Type = HostTarget->getChar16Type(); |
134 | Char32Type = HostTarget->getChar32Type(); |
135 | Int64Type = HostTarget->getInt64Type(); |
136 | SigAtomicType = HostTarget->getSigAtomicType(); |
137 | ProcessIDType = HostTarget->getProcessIDType(); |
138 | |
139 | UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); |
140 | UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); |
141 | UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); |
142 | ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); |
143 | |
144 | // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and |
145 | // we need those macros to be identical on host and device, because (among |
146 | // other things) they affect which standard library classes are defined, and |
147 | // we need all classes to be defined on both the host and device. |
148 | MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); |
149 | |
150 | // Properties intentionally not copied from host: |
151 | // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the |
152 | // host/device boundary. |
153 | // - SuitableAlign: Not visible across the host/device boundary, and may |
154 | // correctly be different on host/device, e.g. if host has wider vector |
155 | // types than device. |
156 | // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same |
157 | // as its double type, but that's not necessarily true on the host. |
158 | // TODO: nvcc emits a warning when using long double on device; we should |
159 | // do the same. |
160 | } |
161 | |
162 | ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { |
163 | return llvm::ArrayRef(GCCRegNames); |
164 | } |
165 | |
166 | bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { |
167 | return llvm::StringSwitch<bool>(Feature) |
168 | .Cases(S0: "ptx" , S1: "nvptx" , Value: true) |
169 | .Default(Value: false); |
170 | } |
171 | |
172 | void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, |
173 | MacroBuilder &Builder) const { |
174 | Builder.defineMacro(Name: "__PTX__" ); |
175 | Builder.defineMacro(Name: "__NVPTX__" ); |
176 | |
177 | // Skip setting architecture dependent macros if undefined. |
178 | if (GPU == CudaArch::UNUSED && !HostTarget) |
179 | return; |
180 | |
181 | if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { |
182 | // Set __CUDA_ARCH__ for the GPU specified. |
183 | std::string CUDAArchCode = [this] { |
184 | switch (GPU) { |
185 | case CudaArch::GFX600: |
186 | case CudaArch::GFX601: |
187 | case CudaArch::GFX602: |
188 | case CudaArch::GFX700: |
189 | case CudaArch::GFX701: |
190 | case CudaArch::GFX702: |
191 | case CudaArch::GFX703: |
192 | case CudaArch::GFX704: |
193 | case CudaArch::GFX705: |
194 | case CudaArch::GFX801: |
195 | case CudaArch::GFX802: |
196 | case CudaArch::GFX803: |
197 | case CudaArch::GFX805: |
198 | case CudaArch::GFX810: |
199 | case CudaArch::GFX900: |
200 | case CudaArch::GFX902: |
201 | case CudaArch::GFX904: |
202 | case CudaArch::GFX906: |
203 | case CudaArch::GFX908: |
204 | case CudaArch::GFX909: |
205 | case CudaArch::GFX90a: |
206 | case CudaArch::GFX90c: |
207 | case CudaArch::GFX940: |
208 | case CudaArch::GFX941: |
209 | case CudaArch::GFX942: |
210 | case CudaArch::GFX1010: |
211 | case CudaArch::GFX1011: |
212 | case CudaArch::GFX1012: |
213 | case CudaArch::GFX1013: |
214 | case CudaArch::GFX1030: |
215 | case CudaArch::GFX1031: |
216 | case CudaArch::GFX1032: |
217 | case CudaArch::GFX1033: |
218 | case CudaArch::GFX1034: |
219 | case CudaArch::GFX1035: |
220 | case CudaArch::GFX1036: |
221 | case CudaArch::GFX1100: |
222 | case CudaArch::GFX1101: |
223 | case CudaArch::GFX1102: |
224 | case CudaArch::GFX1103: |
225 | case CudaArch::GFX1150: |
226 | case CudaArch::GFX1151: |
227 | case CudaArch::GFX1200: |
228 | case CudaArch::GFX1201: |
229 | case CudaArch::Generic: |
230 | case CudaArch::LAST: |
231 | break; |
232 | case CudaArch::UNKNOWN: |
233 | assert(false && "No GPU arch when compiling CUDA device code." ); |
234 | return "" ; |
235 | case CudaArch::UNUSED: |
236 | case CudaArch::SM_20: |
237 | return "200" ; |
238 | case CudaArch::SM_21: |
239 | return "210" ; |
240 | case CudaArch::SM_30: |
241 | return "300" ; |
242 | case CudaArch::SM_32_: |
243 | return "320" ; |
244 | case CudaArch::SM_35: |
245 | return "350" ; |
246 | case CudaArch::SM_37: |
247 | return "370" ; |
248 | case CudaArch::SM_50: |
249 | return "500" ; |
250 | case CudaArch::SM_52: |
251 | return "520" ; |
252 | case CudaArch::SM_53: |
253 | return "530" ; |
254 | case CudaArch::SM_60: |
255 | return "600" ; |
256 | case CudaArch::SM_61: |
257 | return "610" ; |
258 | case CudaArch::SM_62: |
259 | return "620" ; |
260 | case CudaArch::SM_70: |
261 | return "700" ; |
262 | case CudaArch::SM_72: |
263 | return "720" ; |
264 | case CudaArch::SM_75: |
265 | return "750" ; |
266 | case CudaArch::SM_80: |
267 | return "800" ; |
268 | case CudaArch::SM_86: |
269 | return "860" ; |
270 | case CudaArch::SM_87: |
271 | return "870" ; |
272 | case CudaArch::SM_89: |
273 | return "890" ; |
274 | case CudaArch::SM_90: |
275 | case CudaArch::SM_90a: |
276 | return "900" ; |
277 | } |
278 | llvm_unreachable("unhandled CudaArch" ); |
279 | }(); |
280 | Builder.defineMacro(Name: "__CUDA_ARCH__" , Value: CUDAArchCode); |
281 | if (GPU == CudaArch::SM_90a) |
282 | Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM90_ALL" , Value: "1" ); |
283 | } |
284 | } |
285 | |
286 | ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { |
287 | return llvm::ArrayRef(BuiltinInfo, |
288 | clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); |
289 | } |
290 | |