1 | //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares AMDGPU TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
14 | #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
15 | |
16 | #include "clang/Basic/TargetID.h" |
17 | #include "clang/Basic/TargetInfo.h" |
18 | #include "clang/Basic/TargetOptions.h" |
19 | #include "llvm/ADT/StringSet.h" |
20 | #include "llvm/Support/AMDGPUAddrSpace.h" |
21 | #include "llvm/Support/Compiler.h" |
22 | #include "llvm/TargetParser/TargetParser.h" |
23 | #include "llvm/TargetParser/Triple.h" |
24 | #include <optional> |
25 | |
26 | namespace clang { |
27 | namespace targets { |
28 | |
29 | class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { |
30 | |
31 | static const char *const GCCRegNames[]; |
32 | |
33 | static const LangASMap AMDGPUDefIsGenMap; |
34 | static const LangASMap AMDGPUDefIsPrivMap; |
35 | |
36 | llvm::AMDGPU::GPUKind GPUKind; |
37 | unsigned GPUFeatures; |
38 | unsigned WavefrontSize; |
39 | |
40 | /// Whether to use cumode or WGP mode. True for cumode. False for WGP mode. |
41 | bool CUMode; |
42 | |
43 | /// Whether having image instructions. |
44 | bool HasImage = false; |
45 | |
46 | /// Target ID is device name followed by optional feature name postfixed |
47 | /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. |
48 | /// If the target ID contains feature+, map it to true. |
49 | /// If the target ID contains feature-, map it to false. |
50 | /// If the target ID does not contain a feature (default), do not map it. |
51 | llvm::StringMap<bool> OffloadArchFeatures; |
52 | std::string TargetID; |
53 | |
54 | bool hasFP64() const { |
55 | return getTriple().getArch() == llvm::Triple::amdgcn || |
56 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); |
57 | } |
58 | |
59 | /// Has fast fma f32 |
60 | bool hasFastFMAF() const { |
61 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32); |
62 | } |
63 | |
64 | /// Has fast fma f64 |
65 | bool hasFastFMA() const { |
66 | return getTriple().getArch() == llvm::Triple::amdgcn; |
67 | } |
68 | |
69 | bool hasFMAF() const { |
70 | return getTriple().getArch() == llvm::Triple::amdgcn || |
71 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA); |
72 | } |
73 | |
74 | bool hasFullRateDenormalsF32() const { |
75 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
76 | } |
77 | |
78 | bool hasLDEXPF() const { |
79 | return getTriple().getArch() == llvm::Triple::amdgcn || |
80 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP); |
81 | } |
82 | |
83 | static bool isAMDGCN(const llvm::Triple &TT) { |
84 | return TT.getArch() == llvm::Triple::amdgcn; |
85 | } |
86 | |
87 | static bool isR600(const llvm::Triple &TT) { |
88 | return TT.getArch() == llvm::Triple::r600; |
89 | } |
90 | |
91 | public: |
92 | AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); |
93 | |
94 | void setAddressSpaceMap(bool DefaultIsPrivate); |
95 | |
96 | void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; |
97 | |
98 | uint64_t getPointerWidthV(LangAS AS) const override { |
99 | if (isR600(TT: getTriple())) |
100 | return 32; |
101 | unsigned TargetAS = getTargetAddressSpace(AS); |
102 | |
103 | if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS || |
104 | TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS) |
105 | return 32; |
106 | |
107 | return 64; |
108 | } |
109 | |
110 | uint64_t getPointerAlignV(LangAS AddrSpace) const override { |
111 | return getPointerWidthV(AS: AddrSpace); |
112 | } |
113 | |
114 | uint64_t getMaxPointerWidth() const override { |
115 | return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32; |
116 | } |
117 | |
118 | bool hasBFloat16Type() const override { return isAMDGCN(TT: getTriple()); } |
119 | |
120 | std::string_view getClobbers() const override { return "" ; } |
121 | |
122 | ArrayRef<const char *> getGCCRegNames() const override; |
123 | |
124 | ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override { |
125 | return std::nullopt; |
126 | } |
127 | |
128 | /// Accepted register names: (n, m is unsigned integer, n < m) |
129 | /// v |
130 | /// s |
131 | /// a |
132 | /// {vn}, {v[n]} |
133 | /// {sn}, {s[n]} |
134 | /// {an}, {a[n]} |
135 | /// {S} , where S is a special register name |
136 | ////{v[n:m]} |
137 | /// {s[n:m]} |
138 | /// {a[n:m]} |
139 | bool validateAsmConstraint(const char *&Name, |
140 | TargetInfo::ConstraintInfo &Info) const override { |
141 | static const ::llvm::StringSet<> SpecialRegs({ |
142 | "exec" , "vcc" , "flat_scratch" , "m0" , "scc" , "tba" , "tma" , |
143 | "flat_scratch_lo" , "flat_scratch_hi" , "vcc_lo" , "vcc_hi" , "exec_lo" , |
144 | "exec_hi" , "tma_lo" , "tma_hi" , "tba_lo" , "tba_hi" , |
145 | }); |
146 | |
147 | switch (*Name) { |
148 | case 'I': |
149 | Info.setRequiresImmediate(Min: -16, Max: 64); |
150 | return true; |
151 | case 'J': |
152 | Info.setRequiresImmediate(Min: -32768, Max: 32767); |
153 | return true; |
154 | case 'A': |
155 | case 'B': |
156 | case 'C': |
157 | Info.setRequiresImmediate(); |
158 | return true; |
159 | default: |
160 | break; |
161 | } |
162 | |
163 | StringRef S(Name); |
164 | |
165 | if (S == "DA" || S == "DB" ) { |
166 | Name++; |
167 | Info.setRequiresImmediate(); |
168 | return true; |
169 | } |
170 | |
171 | bool HasLeftParen = S.consume_front(Prefix: "{" ); |
172 | if (S.empty()) |
173 | return false; |
174 | if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') { |
175 | if (!HasLeftParen) |
176 | return false; |
177 | auto E = S.find(C: '}'); |
178 | if (!SpecialRegs.count(Key: S.substr(Start: 0, N: E))) |
179 | return false; |
180 | S = S.drop_front(N: E + 1); |
181 | if (!S.empty()) |
182 | return false; |
183 | // Found {S} where S is a special register. |
184 | Info.setAllowsRegister(); |
185 | Name = S.data() - 1; |
186 | return true; |
187 | } |
188 | S = S.drop_front(); |
189 | if (!HasLeftParen) { |
190 | if (!S.empty()) |
191 | return false; |
192 | // Found s, v or a. |
193 | Info.setAllowsRegister(); |
194 | Name = S.data() - 1; |
195 | return true; |
196 | } |
197 | bool HasLeftBracket = S.consume_front(Prefix: "[" ); |
198 | unsigned long long N; |
199 | if (S.empty() || consumeUnsignedInteger(Str&: S, Radix: 10, Result&: N)) |
200 | return false; |
201 | if (S.consume_front(Prefix: ":" )) { |
202 | if (!HasLeftBracket) |
203 | return false; |
204 | unsigned long long M; |
205 | if (consumeUnsignedInteger(Str&: S, Radix: 10, Result&: M) || N >= M) |
206 | return false; |
207 | } |
208 | if (HasLeftBracket) { |
209 | if (!S.consume_front(Prefix: "]" )) |
210 | return false; |
211 | } |
212 | if (!S.consume_front(Prefix: "}" )) |
213 | return false; |
214 | if (!S.empty()) |
215 | return false; |
216 | // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} |
217 | // or {a[n:m]}. |
218 | Info.setAllowsRegister(); |
219 | Name = S.data() - 1; |
220 | return true; |
221 | } |
222 | |
223 | // \p Constraint will be left pointing at the last character of |
224 | // the constraint. In practice, it won't be changed unless the |
225 | // constraint is longer than one character. |
226 | std::string convertConstraint(const char *&Constraint) const override { |
227 | |
228 | StringRef S(Constraint); |
229 | if (S == "DA" || S == "DB" ) { |
230 | return std::string("^" ) + std::string(Constraint++, 2); |
231 | } |
232 | |
233 | const char *Begin = Constraint; |
234 | TargetInfo::ConstraintInfo Info("" , "" ); |
235 | if (validateAsmConstraint(Name&: Constraint, Info)) |
236 | return std::string(Begin).substr(pos: 0, n: Constraint - Begin + 1); |
237 | |
238 | Constraint = Begin; |
239 | return std::string(1, *Constraint); |
240 | } |
241 | |
242 | bool |
243 | initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, |
244 | StringRef CPU, |
245 | const std::vector<std::string> &FeatureVec) const override; |
246 | |
247 | ArrayRef<Builtin::Info> getTargetBuiltins() const override; |
248 | |
249 | bool useFP16ConversionIntrinsics() const override { return false; } |
250 | |
251 | void getTargetDefines(const LangOptions &Opts, |
252 | MacroBuilder &Builder) const override; |
253 | |
254 | BuiltinVaListKind getBuiltinVaListKind() const override { |
255 | return TargetInfo::CharPtrBuiltinVaList; |
256 | } |
257 | |
258 | bool isValidCPUName(StringRef Name) const override { |
259 | if (getTriple().getArch() == llvm::Triple::amdgcn) |
260 | return llvm::AMDGPU::parseArchAMDGCN(CPU: Name) != llvm::AMDGPU::GK_NONE; |
261 | return llvm::AMDGPU::parseArchR600(CPU: Name) != llvm::AMDGPU::GK_NONE; |
262 | } |
263 | |
264 | void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; |
265 | |
266 | bool setCPU(const std::string &Name) override { |
267 | if (getTriple().getArch() == llvm::Triple::amdgcn) { |
268 | GPUKind = llvm::AMDGPU::parseArchAMDGCN(CPU: Name); |
269 | GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind); |
270 | } else { |
271 | GPUKind = llvm::AMDGPU::parseArchR600(CPU: Name); |
272 | GPUFeatures = llvm::AMDGPU::getArchAttrR600(AK: GPUKind); |
273 | } |
274 | |
275 | return GPUKind != llvm::AMDGPU::GK_NONE; |
276 | } |
277 | |
278 | void setSupportedOpenCLOpts() override { |
279 | auto &Opts = getSupportedOpenCLOpts(); |
280 | Opts["cl_clang_storage_class_specifiers" ] = true; |
281 | Opts["__cl_clang_variadic_functions" ] = true; |
282 | Opts["__cl_clang_function_pointers" ] = true; |
283 | Opts["__cl_clang_non_portable_kernel_param_types" ] = true; |
284 | Opts["__cl_clang_bitfields" ] = true; |
285 | |
286 | bool IsAMDGCN = isAMDGCN(TT: getTriple()); |
287 | |
288 | Opts["cl_khr_fp64" ] = hasFP64(); |
289 | Opts["__opencl_c_fp64" ] = hasFP64(); |
290 | |
291 | if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) { |
292 | Opts["cl_khr_byte_addressable_store" ] = true; |
293 | Opts["cl_khr_global_int32_base_atomics" ] = true; |
294 | Opts["cl_khr_global_int32_extended_atomics" ] = true; |
295 | Opts["cl_khr_local_int32_base_atomics" ] = true; |
296 | Opts["cl_khr_local_int32_extended_atomics" ] = true; |
297 | } |
298 | |
299 | if (IsAMDGCN) { |
300 | Opts["cl_khr_fp16" ] = true; |
301 | Opts["cl_khr_int64_base_atomics" ] = true; |
302 | Opts["cl_khr_int64_extended_atomics" ] = true; |
303 | Opts["cl_khr_mipmap_image" ] = true; |
304 | Opts["cl_khr_mipmap_image_writes" ] = true; |
305 | Opts["cl_khr_subgroups" ] = true; |
306 | Opts["cl_amd_media_ops" ] = true; |
307 | Opts["cl_amd_media_ops2" ] = true; |
308 | |
309 | Opts["__opencl_c_images" ] = true; |
310 | Opts["__opencl_c_3d_image_writes" ] = true; |
311 | Opts["cl_khr_3d_image_writes" ] = true; |
312 | } |
313 | } |
314 | |
315 | LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override { |
316 | switch (TK) { |
317 | case OCLTK_Image: |
318 | return LangAS::opencl_constant; |
319 | |
320 | case OCLTK_ClkEvent: |
321 | case OCLTK_Queue: |
322 | case OCLTK_ReserveID: |
323 | return LangAS::opencl_global; |
324 | |
325 | default: |
326 | return TargetInfo::getOpenCLTypeAddrSpace(TK); |
327 | } |
328 | } |
329 | |
330 | LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { |
331 | switch (AS) { |
332 | case 0: |
333 | return LangAS::opencl_generic; |
334 | case 1: |
335 | return LangAS::opencl_global; |
336 | case 3: |
337 | return LangAS::opencl_local; |
338 | case 4: |
339 | return LangAS::opencl_constant; |
340 | case 5: |
341 | return LangAS::opencl_private; |
342 | default: |
343 | return getLangASFromTargetAS(TargetAS: AS); |
344 | } |
345 | } |
346 | |
347 | LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { |
348 | switch (AS) { |
349 | case 0: |
350 | return LangAS::Default; |
351 | case 1: |
352 | return LangAS::cuda_device; |
353 | case 3: |
354 | return LangAS::cuda_shared; |
355 | case 4: |
356 | return LangAS::cuda_constant; |
357 | default: |
358 | return getLangASFromTargetAS(TargetAS: AS); |
359 | } |
360 | } |
361 | |
362 | std::optional<LangAS> getConstantAddressSpace() const override { |
363 | return getLangASFromTargetAS(TargetAS: llvm::AMDGPUAS::CONSTANT_ADDRESS); |
364 | } |
365 | |
366 | const llvm::omp::GV &getGridValue() const override { |
367 | switch (WavefrontSize) { |
368 | case 32: |
369 | return llvm::omp::getAMDGPUGridValues<32>(); |
370 | case 64: |
371 | return llvm::omp::getAMDGPUGridValues<64>(); |
372 | default: |
373 | llvm_unreachable("getGridValue not implemented for this wavesize" ); |
374 | } |
375 | } |
376 | |
377 | /// \returns Target specific vtbl ptr address space. |
378 | unsigned getVtblPtrAddressSpace() const override { |
379 | return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS); |
380 | } |
381 | |
382 | /// \returns If a target requires an address within a target specific address |
383 | /// space \p AddressSpace to be converted in order to be used, then return the |
384 | /// corresponding target specific DWARF address space. |
385 | /// |
386 | /// \returns Otherwise return std::nullopt and no conversion will be emitted |
387 | /// in the DWARF. |
388 | std::optional<unsigned> |
389 | getDWARFAddressSpace(unsigned AddressSpace) const override { |
390 | const unsigned DWARF_Private = 1; |
391 | const unsigned DWARF_Local = 2; |
392 | if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) { |
393 | return DWARF_Private; |
394 | } else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) { |
395 | return DWARF_Local; |
396 | } else { |
397 | return std::nullopt; |
398 | } |
399 | } |
400 | |
401 | CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { |
402 | switch (CC) { |
403 | default: |
404 | return CCCR_Warning; |
405 | case CC_C: |
406 | case CC_OpenCLKernel: |
407 | case CC_AMDGPUKernelCall: |
408 | return CCCR_OK; |
409 | } |
410 | } |
411 | |
412 | // In amdgcn target the null pointer in global, constant, and generic |
413 | // address space has value 0 but in private and local address space has |
414 | // value ~0. |
415 | uint64_t getNullPointerValue(LangAS AS) const override { |
416 | // FIXME: Also should handle region. |
417 | return (AS == LangAS::opencl_local || AS == LangAS::opencl_private || |
418 | AS == LangAS::sycl_local || AS == LangAS::sycl_private) |
419 | ? ~0 |
420 | : 0; |
421 | } |
422 | |
423 | void setAuxTarget(const TargetInfo *Aux) override; |
424 | |
425 | bool hasBitIntType() const override { return true; } |
426 | |
427 | // Record offload arch features since they are needed for defining the |
428 | // pre-defined macros. |
429 | bool handleTargetFeatures(std::vector<std::string> &Features, |
430 | DiagnosticsEngine &Diags) override { |
431 | auto TargetIDFeatures = |
432 | getAllPossibleTargetIDFeatures(T: getTriple(), Processor: getArchNameAMDGCN(AK: GPUKind)); |
433 | for (const auto &F : Features) { |
434 | assert(F.front() == '+' || F.front() == '-'); |
435 | if (F == "+wavefrontsize64" ) |
436 | WavefrontSize = 64; |
437 | else if (F == "+cumode" ) |
438 | CUMode = true; |
439 | else if (F == "-cumode" ) |
440 | CUMode = false; |
441 | else if (F == "+image-insts" ) |
442 | HasImage = true; |
443 | bool IsOn = F.front() == '+'; |
444 | StringRef Name = StringRef(F).drop_front(); |
445 | if (!llvm::is_contained(Range&: TargetIDFeatures, Element: Name)) |
446 | continue; |
447 | assert(!OffloadArchFeatures.contains(Name)); |
448 | OffloadArchFeatures[Name] = IsOn; |
449 | } |
450 | return true; |
451 | } |
452 | |
453 | std::optional<std::string> getTargetID() const override { |
454 | if (!isAMDGCN(TT: getTriple())) |
455 | return std::nullopt; |
456 | // When -target-cpu is not set, we assume generic code that it is valid |
457 | // for all GPU and use an empty string as target ID to represent that. |
458 | if (GPUKind == llvm::AMDGPU::GK_NONE) |
459 | return std::string("" ); |
460 | return getCanonicalTargetID(Processor: getArchNameAMDGCN(AK: GPUKind), |
461 | Features: OffloadArchFeatures); |
462 | } |
463 | |
464 | bool hasHIPImageSupport() const override { return HasImage; } |
465 | }; |
466 | |
467 | } // namespace targets |
468 | } // namespace clang |
469 | |
470 | #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
471 | |