1 | //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares AMDGPU TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
14 | #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
15 | |
16 | #include "clang/Basic/TargetID.h" |
17 | #include "clang/Basic/TargetInfo.h" |
18 | #include "clang/Basic/TargetOptions.h" |
19 | #include "llvm/ADT/StringSet.h" |
20 | #include "llvm/Support/AMDGPUAddrSpace.h" |
21 | #include "llvm/Support/Compiler.h" |
22 | #include "llvm/TargetParser/TargetParser.h" |
23 | #include "llvm/TargetParser/Triple.h" |
24 | #include <optional> |
25 | |
26 | namespace clang { |
27 | namespace targets { |
28 | |
29 | class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { |
30 | |
31 | static const char *const GCCRegNames[]; |
32 | |
33 | static const LangASMap AMDGPUDefIsGenMap; |
34 | static const LangASMap AMDGPUDefIsPrivMap; |
35 | |
36 | llvm::AMDGPU::GPUKind GPUKind; |
37 | unsigned GPUFeatures; |
38 | unsigned WavefrontSize; |
39 | |
40 | /// Whether to use cumode or WGP mode. True for cumode. False for WGP mode. |
41 | bool CUMode; |
42 | |
43 | /// Whether having image instructions. |
44 | bool HasImage = false; |
45 | |
46 | /// Target ID is device name followed by optional feature name postfixed |
47 | /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. |
48 | /// If the target ID contains feature+, map it to true. |
49 | /// If the target ID contains feature-, map it to false. |
50 | /// If the target ID does not contain a feature (default), do not map it. |
51 | llvm::StringMap<bool> OffloadArchFeatures; |
52 | std::string TargetID; |
53 | |
54 | bool hasFP64() const { |
55 | return getTriple().isAMDGCN() || |
56 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); |
57 | } |
58 | |
59 | /// Has fast fma f32 |
60 | bool hasFastFMAF() const { |
61 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32); |
62 | } |
63 | |
64 | /// Has fast fma f64 |
65 | bool hasFastFMA() const { return getTriple().isAMDGCN(); } |
66 | |
67 | bool hasFMAF() const { |
68 | return getTriple().isAMDGCN() || |
69 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA); |
70 | } |
71 | |
72 | bool hasFullRateDenormalsF32() const { |
73 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
74 | } |
75 | |
76 | bool hasLDEXPF() const { |
77 | return getTriple().isAMDGCN() || |
78 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP); |
79 | } |
80 | |
81 | static bool isAMDGCN(const llvm::Triple &TT) { return TT.isAMDGCN(); } |
82 | |
83 | static bool isR600(const llvm::Triple &TT) { |
84 | return TT.getArch() == llvm::Triple::r600; |
85 | } |
86 | |
87 | public: |
88 | AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); |
89 | |
90 | void setAddressSpaceMap(bool DefaultIsPrivate); |
91 | |
92 | void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; |
93 | |
94 | uint64_t getPointerWidthV(LangAS AS) const override { |
95 | if (isR600(TT: getTriple())) |
96 | return 32; |
97 | unsigned TargetAS = getTargetAddressSpace(AS); |
98 | |
99 | if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS || |
100 | TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS) |
101 | return 32; |
102 | |
103 | return 64; |
104 | } |
105 | |
106 | uint64_t getPointerAlignV(LangAS AddrSpace) const override { |
107 | return getPointerWidthV(AS: AddrSpace); |
108 | } |
109 | |
110 | virtual bool isAddressSpaceSupersetOf(LangAS A, LangAS B) const override { |
111 | // The flat address space AS(0) is a superset of all the other address |
112 | // spaces used by the backend target. |
113 | return A == B || |
114 | ((A == LangAS::Default || |
115 | (isTargetAddressSpace(AS: A) && |
116 | toTargetAddressSpace(AS: A) == llvm::AMDGPUAS::FLAT_ADDRESS)) && |
117 | isTargetAddressSpace(AS: B) && |
118 | toTargetAddressSpace(AS: B) >= llvm::AMDGPUAS::FLAT_ADDRESS && |
119 | toTargetAddressSpace(AS: B) <= llvm::AMDGPUAS::PRIVATE_ADDRESS && |
120 | toTargetAddressSpace(AS: B) != llvm::AMDGPUAS::REGION_ADDRESS); |
121 | } |
122 | |
123 | uint64_t getMaxPointerWidth() const override { |
124 | return getTriple().isAMDGCN() ? 64 : 32; |
125 | } |
126 | |
127 | bool hasBFloat16Type() const override { return isAMDGCN(TT: getTriple()); } |
128 | |
129 | std::string_view getClobbers() const override { return ""; } |
130 | |
131 | ArrayRef<const char *> getGCCRegNames() const override; |
132 | |
133 | ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override { |
134 | return {}; |
135 | } |
136 | |
137 | /// Accepted register names: (n, m is unsigned integer, n < m) |
138 | /// v |
139 | /// s |
140 | /// a |
141 | /// {vn}, {v[n]} |
142 | /// {sn}, {s[n]} |
143 | /// {an}, {a[n]} |
144 | /// {S} , where S is a special register name |
145 | ////{v[n:m]} |
146 | /// {s[n:m]} |
147 | /// {a[n:m]} |
148 | bool validateAsmConstraint(const char *&Name, |
149 | TargetInfo::ConstraintInfo &Info) const override { |
150 | static const ::llvm::StringSet<> SpecialRegs({ |
151 | "exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", |
152 | "flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", |
153 | "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", |
154 | }); |
155 | |
156 | switch (*Name) { |
157 | case 'I': |
158 | Info.setRequiresImmediate(Min: -16, Max: 64); |
159 | return true; |
160 | case 'J': |
161 | Info.setRequiresImmediate(Min: -32768, Max: 32767); |
162 | return true; |
163 | case 'A': |
164 | case 'B': |
165 | case 'C': |
166 | Info.setRequiresImmediate(); |
167 | return true; |
168 | default: |
169 | break; |
170 | } |
171 | |
172 | StringRef S(Name); |
173 | |
174 | if (S == "DA"|| S == "DB") { |
175 | Name++; |
176 | Info.setRequiresImmediate(); |
177 | return true; |
178 | } |
179 | |
180 | bool HasLeftParen = S.consume_front(Prefix: "{"); |
181 | if (S.empty()) |
182 | return false; |
183 | if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') { |
184 | if (!HasLeftParen) |
185 | return false; |
186 | auto E = S.find(C: '}'); |
187 | if (!SpecialRegs.count(Key: S.substr(Start: 0, N: E))) |
188 | return false; |
189 | S = S.drop_front(N: E + 1); |
190 | if (!S.empty()) |
191 | return false; |
192 | // Found {S} where S is a special register. |
193 | Info.setAllowsRegister(); |
194 | Name = S.data() - 1; |
195 | return true; |
196 | } |
197 | S = S.drop_front(); |
198 | if (!HasLeftParen) { |
199 | if (!S.empty()) |
200 | return false; |
201 | // Found s, v or a. |
202 | Info.setAllowsRegister(); |
203 | Name = S.data() - 1; |
204 | return true; |
205 | } |
206 | bool HasLeftBracket = S.consume_front(Prefix: "["); |
207 | unsigned long long N; |
208 | if (S.empty() || consumeUnsignedInteger(Str&: S, Radix: 10, Result&: N)) |
209 | return false; |
210 | if (S.consume_front(Prefix: ":")) { |
211 | if (!HasLeftBracket) |
212 | return false; |
213 | unsigned long long M; |
214 | if (consumeUnsignedInteger(Str&: S, Radix: 10, Result&: M) || N >= M) |
215 | return false; |
216 | } |
217 | if (HasLeftBracket) { |
218 | if (!S.consume_front(Prefix: "]")) |
219 | return false; |
220 | } |
221 | if (!S.consume_front(Prefix: "}")) |
222 | return false; |
223 | if (!S.empty()) |
224 | return false; |
225 | // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} |
226 | // or {a[n:m]}. |
227 | Info.setAllowsRegister(); |
228 | Name = S.data() - 1; |
229 | return true; |
230 | } |
231 | |
232 | // \p Constraint will be left pointing at the last character of |
233 | // the constraint. In practice, it won't be changed unless the |
234 | // constraint is longer than one character. |
235 | std::string convertConstraint(const char *&Constraint) const override { |
236 | |
237 | StringRef S(Constraint); |
238 | if (S == "DA"|| S == "DB") { |
239 | return std::string("^") + std::string(Constraint++, 2); |
240 | } |
241 | |
242 | const char *Begin = Constraint; |
243 | TargetInfo::ConstraintInfo Info("", ""); |
244 | if (validateAsmConstraint(Name&: Constraint, Info)) |
245 | return std::string(Begin).substr(pos: 0, n: Constraint - Begin + 1); |
246 | |
247 | Constraint = Begin; |
248 | return std::string(1, *Constraint); |
249 | } |
250 | |
251 | bool |
252 | initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, |
253 | StringRef CPU, |
254 | const std::vector<std::string> &FeatureVec) const override; |
255 | |
256 | llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override; |
257 | |
258 | bool useFP16ConversionIntrinsics() const override { return false; } |
259 | |
260 | void getTargetDefines(const LangOptions &Opts, |
261 | MacroBuilder &Builder) const override; |
262 | |
263 | BuiltinVaListKind getBuiltinVaListKind() const override { |
264 | return TargetInfo::CharPtrBuiltinVaList; |
265 | } |
266 | |
267 | bool isValidCPUName(StringRef Name) const override { |
268 | if (getTriple().isAMDGCN()) |
269 | return llvm::AMDGPU::parseArchAMDGCN(CPU: Name) != llvm::AMDGPU::GK_NONE; |
270 | return llvm::AMDGPU::parseArchR600(CPU: Name) != llvm::AMDGPU::GK_NONE; |
271 | } |
272 | |
273 | void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; |
274 | |
275 | bool setCPU(const std::string &Name) override { |
276 | if (getTriple().isAMDGCN()) { |
277 | GPUKind = llvm::AMDGPU::parseArchAMDGCN(CPU: Name); |
278 | GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind); |
279 | } else { |
280 | GPUKind = llvm::AMDGPU::parseArchR600(CPU: Name); |
281 | GPUFeatures = llvm::AMDGPU::getArchAttrR600(AK: GPUKind); |
282 | } |
283 | |
284 | return GPUKind != llvm::AMDGPU::GK_NONE; |
285 | } |
286 | |
287 | void setSupportedOpenCLOpts() override { |
288 | auto &Opts = getSupportedOpenCLOpts(); |
289 | Opts["cl_clang_storage_class_specifiers"] = true; |
290 | Opts["__cl_clang_variadic_functions"] = true; |
291 | Opts["__cl_clang_function_pointers"] = true; |
292 | Opts["__cl_clang_non_portable_kernel_param_types"] = true; |
293 | Opts["__cl_clang_bitfields"] = true; |
294 | |
295 | bool IsAMDGCN = isAMDGCN(TT: getTriple()); |
296 | |
297 | Opts["cl_khr_fp64"] = hasFP64(); |
298 | Opts["__opencl_c_fp64"] = hasFP64(); |
299 | |
300 | if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) { |
301 | Opts["cl_khr_byte_addressable_store"] = true; |
302 | Opts["cl_khr_global_int32_base_atomics"] = true; |
303 | Opts["cl_khr_global_int32_extended_atomics"] = true; |
304 | Opts["cl_khr_local_int32_base_atomics"] = true; |
305 | Opts["cl_khr_local_int32_extended_atomics"] = true; |
306 | } |
307 | |
308 | if (IsAMDGCN) { |
309 | Opts["cl_khr_fp16"] = true; |
310 | Opts["cl_khr_int64_base_atomics"] = true; |
311 | Opts["cl_khr_int64_extended_atomics"] = true; |
312 | Opts["cl_khr_mipmap_image"] = true; |
313 | Opts["cl_khr_mipmap_image_writes"] = true; |
314 | Opts["cl_khr_subgroups"] = true; |
315 | Opts["cl_amd_media_ops"] = true; |
316 | Opts["cl_amd_media_ops2"] = true; |
317 | |
318 | Opts["__opencl_c_images"] = true; |
319 | Opts["__opencl_c_3d_image_writes"] = true; |
320 | Opts["cl_khr_3d_image_writes"] = true; |
321 | |
322 | Opts["__opencl_c_generic_address_space"] = |
323 | GPUKind >= llvm::AMDGPU::GK_GFX700; |
324 | } |
325 | } |
326 | |
327 | LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override { |
328 | switch (TK) { |
329 | case OCLTK_Image: |
330 | return LangAS::opencl_constant; |
331 | |
332 | case OCLTK_ClkEvent: |
333 | case OCLTK_Queue: |
334 | case OCLTK_ReserveID: |
335 | return LangAS::opencl_global; |
336 | |
337 | default: |
338 | return TargetInfo::getOpenCLTypeAddrSpace(TK); |
339 | } |
340 | } |
341 | |
342 | LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { |
343 | switch (AS) { |
344 | case 0: |
345 | return LangAS::opencl_generic; |
346 | case 1: |
347 | return LangAS::opencl_global; |
348 | case 3: |
349 | return LangAS::opencl_local; |
350 | case 4: |
351 | return LangAS::opencl_constant; |
352 | case 5: |
353 | return LangAS::opencl_private; |
354 | default: |
355 | return getLangASFromTargetAS(TargetAS: AS); |
356 | } |
357 | } |
358 | |
359 | LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { |
360 | switch (AS) { |
361 | case 0: |
362 | return LangAS::Default; |
363 | case 1: |
364 | return LangAS::cuda_device; |
365 | case 3: |
366 | return LangAS::cuda_shared; |
367 | case 4: |
368 | return LangAS::cuda_constant; |
369 | default: |
370 | return getLangASFromTargetAS(TargetAS: AS); |
371 | } |
372 | } |
373 | |
374 | std::optional<LangAS> getConstantAddressSpace() const override { |
375 | return getLangASFromTargetAS(TargetAS: llvm::AMDGPUAS::CONSTANT_ADDRESS); |
376 | } |
377 | |
378 | const llvm::omp::GV &getGridValue() const override { |
379 | switch (WavefrontSize) { |
380 | case 32: |
381 | return llvm::omp::getAMDGPUGridValues<32>(); |
382 | case 64: |
383 | return llvm::omp::getAMDGPUGridValues<64>(); |
384 | default: |
385 | llvm_unreachable("getGridValue not implemented for this wavesize"); |
386 | } |
387 | } |
388 | |
389 | /// \returns Target specific vtbl ptr address space. |
390 | unsigned getVtblPtrAddressSpace() const override { |
391 | return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS); |
392 | } |
393 | |
394 | /// \returns If a target requires an address within a target specific address |
395 | /// space \p AddressSpace to be converted in order to be used, then return the |
396 | /// corresponding target specific DWARF address space. |
397 | /// |
398 | /// \returns Otherwise return std::nullopt and no conversion will be emitted |
399 | /// in the DWARF. |
400 | std::optional<unsigned> |
401 | getDWARFAddressSpace(unsigned AddressSpace) const override { |
402 | const unsigned DWARF_Private = 1; |
403 | const unsigned DWARF_Local = 2; |
404 | if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) { |
405 | return DWARF_Private; |
406 | } else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) { |
407 | return DWARF_Local; |
408 | } else { |
409 | return std::nullopt; |
410 | } |
411 | } |
412 | |
413 | CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { |
414 | switch (CC) { |
415 | default: |
416 | return CCCR_Warning; |
417 | case CC_C: |
418 | case CC_DeviceKernel: |
419 | return CCCR_OK; |
420 | } |
421 | } |
422 | |
423 | // In amdgcn target the null pointer in global, constant, and generic |
424 | // address space has value 0 but in private and local address space has |
425 | // value ~0. |
426 | uint64_t getNullPointerValue(LangAS AS) const override { |
427 | // FIXME: Also should handle region. |
428 | return (AS == LangAS::opencl_local || AS == LangAS::opencl_private || |
429 | AS == LangAS::sycl_local || AS == LangAS::sycl_private) |
430 | ? ~0 |
431 | : 0; |
432 | } |
433 | |
434 | void setAuxTarget(const TargetInfo *Aux) override; |
435 | |
436 | bool hasBitIntType() const override { return true; } |
437 | |
438 | // Record offload arch features since they are needed for defining the |
439 | // pre-defined macros. |
440 | bool handleTargetFeatures(std::vector<std::string> &Features, |
441 | DiagnosticsEngine &Diags) override { |
442 | auto TargetIDFeatures = |
443 | getAllPossibleTargetIDFeatures(T: getTriple(), Processor: getArchNameAMDGCN(AK: GPUKind)); |
444 | for (const auto &F : Features) { |
445 | assert(F.front() == '+' || F.front() == '-'); |
446 | if (F == "+wavefrontsize64") |
447 | WavefrontSize = 64; |
448 | else if (F == "+cumode") |
449 | CUMode = true; |
450 | else if (F == "-cumode") |
451 | CUMode = false; |
452 | else if (F == "+image-insts") |
453 | HasImage = true; |
454 | bool IsOn = F.front() == '+'; |
455 | StringRef Name = StringRef(F).drop_front(); |
456 | if (!llvm::is_contained(Range&: TargetIDFeatures, Element: Name)) |
457 | continue; |
458 | assert(!OffloadArchFeatures.contains(Name)); |
459 | OffloadArchFeatures[Name] = IsOn; |
460 | } |
461 | return true; |
462 | } |
463 | |
464 | std::optional<std::string> getTargetID() const override { |
465 | if (!isAMDGCN(TT: getTriple())) |
466 | return std::nullopt; |
467 | // When -target-cpu is not set, we assume generic code that it is valid |
468 | // for all GPU and use an empty string as target ID to represent that. |
469 | if (GPUKind == llvm::AMDGPU::GK_NONE) |
470 | return std::string(""); |
471 | return getCanonicalTargetID(Processor: getArchNameAMDGCN(AK: GPUKind), |
472 | Features: OffloadArchFeatures); |
473 | } |
474 | |
475 | bool hasHIPImageSupport() const override { return HasImage; } |
476 | |
477 | std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override { |
478 | // This is imprecise as the value can vary between 64, 128 (even 256!) bytes |
479 | // depending on the level of cache and the target architecture. We select |
480 | // the size that corresponds to the largest L1 cache line for all |
481 | // architectures. |
482 | return std::make_pair(x: 128, y: 128); |
483 | } |
484 | }; |
485 | |
486 | } // namespace targets |
487 | } // namespace clang |
488 | |
489 | #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
490 |
Definitions
- AMDGPUTargetInfo
- hasFP64
- hasFastFMAF
- hasFastFMA
- hasFMAF
- hasFullRateDenormalsF32
- hasLDEXPF
- isAMDGCN
- isR600
- getPointerWidthV
- getPointerAlignV
- isAddressSpaceSupersetOf
- getMaxPointerWidth
- hasBFloat16Type
- getClobbers
- getGCCRegAliases
- validateAsmConstraint
- convertConstraint
- useFP16ConversionIntrinsics
- getBuiltinVaListKind
- isValidCPUName
- setCPU
- setSupportedOpenCLOpts
- getOpenCLTypeAddrSpace
- getOpenCLBuiltinAddressSpace
- getCUDABuiltinAddressSpace
- getConstantAddressSpace
- getGridValue
- getVtblPtrAddressSpace
- getDWARFAddressSpace
- checkCallingConvention
- getNullPointerValue
- hasBitIntType
- handleTargetFeatures
- getTargetID
- hasHIPImageSupport
Improve your Profiling and Debugging skills
Find out more