1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/CodeGenOptions.h"
16#include "clang/Basic/Diagnostic.h"
17#include "clang/Basic/LangOptions.h"
18#include "clang/Basic/MacroBuilder.h"
19#include "clang/Basic/TargetBuiltins.h"
20#include "llvm/ADT/SmallString.h"
21using namespace clang;
22using namespace clang::targets;
23
24namespace clang {
25namespace targets {
26
27// If you edit the description strings, make sure you update
28// getPointerWidthV().
29
30static const char *const DataLayoutStringR600 =
31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33
34static const char *const DataLayoutStringAMDGCN =
35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37 "32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39 "-ni:7:8:9";
40
41const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
44 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
45 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
47 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
50 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
56 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
60 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
61 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
62};
63
64const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
65 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
66 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
67 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
68 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
69 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
70 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
71 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
73 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
74 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
75 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
76 // SYCL address space values for this map are dummy
77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
81 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
82 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
84 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
85 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
86
87};
88} // namespace targets
89} // namespace clang
90
91static constexpr Builtin::Info BuiltinInfo[] = {
92#define BUILTIN(ID, TYPE, ATTRS) \
93 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
95 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
96#include "clang/Basic/BuiltinsAMDGPU.def"
97};
98
99const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144 "flat_scratch_lo", "flat_scratch_hi",
145 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
146 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
147 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
148 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
149 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
150 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
151 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
152 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
153 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
154 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
155 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
156 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
157 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
158 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
159 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
160 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
161 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
162 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
163 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
164 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
165 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
166 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
167 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
168 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
169 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
170 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
171 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
172 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
173 "a252", "a253", "a254", "a255"
174};
175
176ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
177 return llvm::ArrayRef(GCCRegNames);
178}
179
180bool AMDGPUTargetInfo::initFeatureMap(
181 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
182 const std::vector<std::string> &FeatureVec) const {
183
184 using namespace llvm::AMDGPU;
185 fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
186 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
187 return false;
188
189 // TODO: Should move this logic into TargetParser
190 std::string ErrorMsg;
191 if (!insertWaveSizeFeature(GPU: CPU, T: getTriple(), Features, ErrorMsg)) {
192 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
193 return false;
194 }
195
196 return true;
197}
198
199void AMDGPUTargetInfo::fillValidCPUList(
200 SmallVectorImpl<StringRef> &Values) const {
201 if (isAMDGCN(TT: getTriple()))
202 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
203 else
204 llvm::AMDGPU::fillValidArchListR600(Values);
205}
206
207void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
208 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
209}
210
211AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
212 const TargetOptions &Opts)
213 : TargetInfo(Triple),
214 GPUKind(isAMDGCN(TT: Triple) ?
215 llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU) :
216 llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
217 GPUFeatures(isAMDGCN(TT: Triple) ?
218 llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind) :
219 llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
220 resetDataLayout(DL: isAMDGCN(TT: getTriple()) ? DataLayoutStringAMDGCN
221 : DataLayoutStringR600);
222
223 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
224 !isAMDGCN(TT: Triple));
225 UseAddrSpaceMapMangling = true;
226
227 if (isAMDGCN(TT: Triple)) {
228 // __bf16 is always available as a load/store only type on AMDGCN.
229 BFloat16Width = BFloat16Align = 16;
230 BFloat16Format = &llvm::APFloat::BFloat();
231 }
232
233 HasLegalHalfType = true;
234 HasFloat16 = true;
235 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
236 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
237
238 // Set pointer width and alignment for the generic address space.
239 PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
240 if (getMaxPointerWidth() == 64) {
241 LongWidth = LongAlign = 64;
242 SizeType = UnsignedLong;
243 PtrDiffType = SignedLong;
244 IntPtrType = SignedLong;
245 }
246
247 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
248 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
249 for (auto F : {"image-insts", "gws"})
250 ReadOnlyFeatures.insert(key: F);
251 HalfArgsAndReturns = true;
252}
253
254void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
255 TargetInfo::adjust(Diags, Opts);
256 // ToDo: There are still a few places using default address space as private
257 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
258 // can be removed from the following line.
259 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
260 !isAMDGCN(TT: getTriple()));
261}
262
263ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
264 return llvm::ArrayRef(BuiltinInfo,
265 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
266}
267
268void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
269 MacroBuilder &Builder) const {
270 Builder.defineMacro(Name: "__AMD__");
271 Builder.defineMacro(Name: "__AMDGPU__");
272
273 if (isAMDGCN(TT: getTriple()))
274 Builder.defineMacro(Name: "__AMDGCN__");
275 else
276 Builder.defineMacro(Name: "__R600__");
277
278 // Legacy HIP host code relies on these default attributes to be defined.
279 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
280 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
281 return;
282
283 llvm::SmallString<16> CanonName =
284 (isAMDGCN(TT: getTriple()) ? getArchNameAMDGCN(AK: GPUKind)
285 : getArchNameR600(AK: GPUKind));
286
287 // Sanitize the name of generic targets.
288 // e.g. gfx10-1-generic -> gfx10_1_generic
289 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
290 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
291 std::replace(first: CanonName.begin(), last: CanonName.end(), old_value: '-', new_value: '_');
292 }
293
294 Builder.defineMacro(Name: Twine("__") + Twine(CanonName) + Twine("__"));
295 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
296 if (isAMDGCN(TT: getTriple()) && !IsHIPHost) {
297 assert(StringRef(CanonName).starts_with("gfx") &&
298 "Invalid amdgcn canonical name");
299 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
300 Builder.defineMacro(Name: Twine("__") + Twine(CanonFamilyName.upper()) +
301 Twine("__"));
302 Builder.defineMacro(Name: "__amdgcn_processor__",
303 Value: Twine("\"") + Twine(CanonName) + Twine("\""));
304 Builder.defineMacro(Name: "__amdgcn_target_id__",
305 Value: Twine("\"") + Twine(*getTargetID()) + Twine("\""));
306 for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
307 auto Loc = OffloadArchFeatures.find(Key: F);
308 if (Loc != OffloadArchFeatures.end()) {
309 std::string NewF = F.str();
310 std::replace(first: NewF.begin(), last: NewF.end(), old_value: '-', new_value: '_');
311 Builder.defineMacro(Name: Twine("__amdgcn_feature_") + Twine(NewF) +
312 Twine("__"),
313 Value: Loc->second ? "1" : "0");
314 }
315 }
316 }
317
318 if (AllowAMDGPUUnsafeFPAtomics)
319 Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
320
321 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
322 // removed in the near future.
323 if (hasFMAF())
324 Builder.defineMacro(Name: "__HAS_FMAF__");
325 if (hasFastFMAF())
326 Builder.defineMacro(Name: "FP_FAST_FMAF");
327 if (hasLDEXPF())
328 Builder.defineMacro(Name: "__HAS_LDEXPF__");
329 if (hasFP64())
330 Builder.defineMacro(Name: "__HAS_FP64__");
331 if (hasFastFMA())
332 Builder.defineMacro(Name: "FP_FAST_FMA");
333
334 Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE__", Value: Twine(WavefrontSize));
335 // ToDo: deprecate this macro for naming consistency.
336 Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE", Value: Twine(WavefrontSize));
337 Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine(CUMode));
338}
339
340void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
341 assert(HalfFormat == Aux->HalfFormat);
342 assert(FloatFormat == Aux->FloatFormat);
343 assert(DoubleFormat == Aux->DoubleFormat);
344
345 // On x86_64 long double is 80-bit extended precision format, which is
346 // not supported by AMDGPU. 128-bit floating point format is also not
347 // supported by AMDGPU. Therefore keep its own format for these two types.
348 auto SaveLongDoubleFormat = LongDoubleFormat;
349 auto SaveFloat128Format = Float128Format;
350 auto SaveLongDoubleWidth = LongDoubleWidth;
351 auto SaveLongDoubleAlign = LongDoubleAlign;
352 copyAuxTarget(Aux);
353 LongDoubleFormat = SaveLongDoubleFormat;
354 Float128Format = SaveFloat128Format;
355 LongDoubleWidth = SaveLongDoubleWidth;
356 LongDoubleAlign = SaveLongDoubleAlign;
357 // For certain builtin types support on the host target, claim they are
358 // support to pass the compilation of the host code during the device-side
359 // compilation.
360 // FIXME: As the side effect, we also accept `__float128` uses in the device
361 // code. To rejct these builtin types supported in the host target but not in
362 // the device target, one approach would support `device_builtin` attribute
363 // so that we could tell the device builtin types from the host ones. The
364 // also solves the different representations of the same builtin type, such
365 // as `size_t` in the MSVC environment.
366 if (Aux->hasFloat128Type()) {
367 HasFloat128 = true;
368 Float128Format = DoubleFormat;
369 }
370}
371

source code of clang/lib/Basic/Targets/AMDGPU.cpp