| 1 | //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file declares AMDGPU TargetInfo objects. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
| 14 | #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
| 15 | |
| 16 | #include "clang/Basic/TargetID.h" |
| 17 | #include "clang/Basic/TargetInfo.h" |
| 18 | #include "clang/Basic/TargetOptions.h" |
| 19 | #include "llvm/ADT/StringSet.h" |
| 20 | #include "llvm/Support/AMDGPUAddrSpace.h" |
| 21 | #include "llvm/Support/Compiler.h" |
| 22 | #include "llvm/TargetParser/TargetParser.h" |
| 23 | #include "llvm/TargetParser/Triple.h" |
| 24 | #include <optional> |
| 25 | |
| 26 | namespace clang { |
| 27 | namespace targets { |
| 28 | |
| 29 | class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { |
| 30 | |
| 31 | static const char *const GCCRegNames[]; |
| 32 | |
| 33 | static const LangASMap AMDGPUDefIsGenMap; |
| 34 | static const LangASMap AMDGPUDefIsPrivMap; |
| 35 | |
| 36 | llvm::AMDGPU::GPUKind GPUKind; |
| 37 | unsigned GPUFeatures; |
| 38 | unsigned WavefrontSize; |
| 39 | |
| 40 | /// Whether to use cumode or WGP mode. True for cumode. False for WGP mode. |
| 41 | bool CUMode; |
| 42 | |
| 43 | /// Whether having image instructions. |
| 44 | bool HasImage = false; |
| 45 | |
| 46 | /// Target ID is device name followed by optional feature name postfixed |
| 47 | /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. |
| 48 | /// If the target ID contains feature+, map it to true. |
| 49 | /// If the target ID contains feature-, map it to false. |
| 50 | /// If the target ID does not contain a feature (default), do not map it. |
| 51 | llvm::StringMap<bool> OffloadArchFeatures; |
| 52 | std::string TargetID; |
| 53 | |
| 54 | bool hasFP64() const { |
| 55 | return getTriple().isAMDGCN() || |
| 56 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); |
| 57 | } |
| 58 | |
| 59 | /// Has fast fma f32 |
| 60 | bool hasFastFMAF() const { |
| 61 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32); |
| 62 | } |
| 63 | |
| 64 | /// Has fast fma f64 |
| 65 | bool hasFastFMA() const { return getTriple().isAMDGCN(); } |
| 66 | |
| 67 | bool hasFMAF() const { |
| 68 | return getTriple().isAMDGCN() || |
| 69 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA); |
| 70 | } |
| 71 | |
| 72 | bool hasFullRateDenormalsF32() const { |
| 73 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
| 74 | } |
| 75 | |
| 76 | bool hasLDEXPF() const { |
| 77 | return getTriple().isAMDGCN() || |
| 78 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP); |
| 79 | } |
| 80 | |
| 81 | static bool isAMDGCN(const llvm::Triple &TT) { return TT.isAMDGCN(); } |
| 82 | |
| 83 | static bool isR600(const llvm::Triple &TT) { |
| 84 | return TT.getArch() == llvm::Triple::r600; |
| 85 | } |
| 86 | |
| 87 | public: |
| 88 | AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); |
| 89 | |
| 90 | void setAddressSpaceMap(bool DefaultIsPrivate); |
| 91 | |
| 92 | void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; |
| 93 | |
| 94 | uint64_t getPointerWidthV(LangAS AS) const override { |
| 95 | if (isR600(TT: getTriple())) |
| 96 | return 32; |
| 97 | unsigned TargetAS = getTargetAddressSpace(AS); |
| 98 | |
| 99 | if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS || |
| 100 | TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS) |
| 101 | return 32; |
| 102 | |
| 103 | return 64; |
| 104 | } |
| 105 | |
| 106 | uint64_t getPointerAlignV(LangAS AddrSpace) const override { |
| 107 | return getPointerWidthV(AS: AddrSpace); |
| 108 | } |
| 109 | |
| 110 | virtual bool isAddressSpaceSupersetOf(LangAS A, LangAS B) const override { |
| 111 | // The flat address space AS(0) is a superset of all the other address |
| 112 | // spaces used by the backend target. |
| 113 | return A == B || |
| 114 | ((A == LangAS::Default || |
| 115 | (isTargetAddressSpace(AS: A) && |
| 116 | toTargetAddressSpace(AS: A) == llvm::AMDGPUAS::FLAT_ADDRESS)) && |
| 117 | isTargetAddressSpace(AS: B) && |
| 118 | toTargetAddressSpace(AS: B) >= llvm::AMDGPUAS::FLAT_ADDRESS && |
| 119 | toTargetAddressSpace(AS: B) <= llvm::AMDGPUAS::PRIVATE_ADDRESS && |
| 120 | toTargetAddressSpace(AS: B) != llvm::AMDGPUAS::REGION_ADDRESS); |
| 121 | } |
| 122 | |
| 123 | uint64_t getMaxPointerWidth() const override { |
| 124 | return getTriple().isAMDGCN() ? 64 : 32; |
| 125 | } |
| 126 | |
| 127 | bool hasBFloat16Type() const override { return isAMDGCN(TT: getTriple()); } |
| 128 | |
| 129 | std::string_view getClobbers() const override { return "" ; } |
| 130 | |
| 131 | ArrayRef<const char *> getGCCRegNames() const override; |
| 132 | |
| 133 | ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override { |
| 134 | return {}; |
| 135 | } |
| 136 | |
| 137 | /// Accepted register names: (n, m is unsigned integer, n < m) |
| 138 | /// v |
| 139 | /// s |
| 140 | /// a |
| 141 | /// {vn}, {v[n]} |
| 142 | /// {sn}, {s[n]} |
| 143 | /// {an}, {a[n]} |
| 144 | /// {S} , where S is a special register name |
| 145 | ////{v[n:m]} |
| 146 | /// {s[n:m]} |
| 147 | /// {a[n:m]} |
| 148 | bool validateAsmConstraint(const char *&Name, |
| 149 | TargetInfo::ConstraintInfo &Info) const override { |
| 150 | static const ::llvm::StringSet<> SpecialRegs({ |
| 151 | "exec" , "vcc" , "flat_scratch" , "m0" , "scc" , "tba" , "tma" , |
| 152 | "flat_scratch_lo" , "flat_scratch_hi" , "vcc_lo" , "vcc_hi" , "exec_lo" , |
| 153 | "exec_hi" , "tma_lo" , "tma_hi" , "tba_lo" , "tba_hi" , |
| 154 | }); |
| 155 | |
| 156 | switch (*Name) { |
| 157 | case 'I': |
| 158 | Info.setRequiresImmediate(Min: -16, Max: 64); |
| 159 | return true; |
| 160 | case 'J': |
| 161 | Info.setRequiresImmediate(Min: -32768, Max: 32767); |
| 162 | return true; |
| 163 | case 'A': |
| 164 | case 'B': |
| 165 | case 'C': |
| 166 | Info.setRequiresImmediate(); |
| 167 | return true; |
| 168 | default: |
| 169 | break; |
| 170 | } |
| 171 | |
| 172 | StringRef S(Name); |
| 173 | |
| 174 | if (S == "DA" || S == "DB" ) { |
| 175 | Name++; |
| 176 | Info.setRequiresImmediate(); |
| 177 | return true; |
| 178 | } |
| 179 | |
| 180 | bool HasLeftParen = S.consume_front(Prefix: "{" ); |
| 181 | if (S.empty()) |
| 182 | return false; |
| 183 | if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') { |
| 184 | if (!HasLeftParen) |
| 185 | return false; |
| 186 | auto E = S.find(C: '}'); |
| 187 | if (!SpecialRegs.count(Key: S.substr(Start: 0, N: E))) |
| 188 | return false; |
| 189 | S = S.drop_front(N: E + 1); |
| 190 | if (!S.empty()) |
| 191 | return false; |
| 192 | // Found {S} where S is a special register. |
| 193 | Info.setAllowsRegister(); |
| 194 | Name = S.data() - 1; |
| 195 | return true; |
| 196 | } |
| 197 | S = S.drop_front(); |
| 198 | if (!HasLeftParen) { |
| 199 | if (!S.empty()) |
| 200 | return false; |
| 201 | // Found s, v or a. |
| 202 | Info.setAllowsRegister(); |
| 203 | Name = S.data() - 1; |
| 204 | return true; |
| 205 | } |
| 206 | bool HasLeftBracket = S.consume_front(Prefix: "[" ); |
| 207 | unsigned long long N; |
| 208 | if (S.empty() || consumeUnsignedInteger(Str&: S, Radix: 10, Result&: N)) |
| 209 | return false; |
| 210 | if (S.consume_front(Prefix: ":" )) { |
| 211 | if (!HasLeftBracket) |
| 212 | return false; |
| 213 | unsigned long long M; |
| 214 | if (consumeUnsignedInteger(Str&: S, Radix: 10, Result&: M) || N >= M) |
| 215 | return false; |
| 216 | } |
| 217 | if (HasLeftBracket) { |
| 218 | if (!S.consume_front(Prefix: "]" )) |
| 219 | return false; |
| 220 | } |
| 221 | if (!S.consume_front(Prefix: "}" )) |
| 222 | return false; |
| 223 | if (!S.empty()) |
| 224 | return false; |
| 225 | // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} |
| 226 | // or {a[n:m]}. |
| 227 | Info.setAllowsRegister(); |
| 228 | Name = S.data() - 1; |
| 229 | return true; |
| 230 | } |
| 231 | |
| 232 | // \p Constraint will be left pointing at the last character of |
| 233 | // the constraint. In practice, it won't be changed unless the |
| 234 | // constraint is longer than one character. |
| 235 | std::string convertConstraint(const char *&Constraint) const override { |
| 236 | |
| 237 | StringRef S(Constraint); |
| 238 | if (S == "DA" || S == "DB" ) { |
| 239 | return std::string("^" ) + std::string(Constraint++, 2); |
| 240 | } |
| 241 | |
| 242 | const char *Begin = Constraint; |
| 243 | TargetInfo::ConstraintInfo Info("" , "" ); |
| 244 | if (validateAsmConstraint(Name&: Constraint, Info)) |
| 245 | return std::string(Begin).substr(pos: 0, n: Constraint - Begin + 1); |
| 246 | |
| 247 | Constraint = Begin; |
| 248 | return std::string(1, *Constraint); |
| 249 | } |
| 250 | |
| 251 | bool |
| 252 | initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, |
| 253 | StringRef CPU, |
| 254 | const std::vector<std::string> &FeatureVec) const override; |
| 255 | |
| 256 | llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override; |
| 257 | |
| 258 | bool useFP16ConversionIntrinsics() const override { return false; } |
| 259 | |
| 260 | void getTargetDefines(const LangOptions &Opts, |
| 261 | MacroBuilder &Builder) const override; |
| 262 | |
| 263 | BuiltinVaListKind getBuiltinVaListKind() const override { |
| 264 | return TargetInfo::CharPtrBuiltinVaList; |
| 265 | } |
| 266 | |
| 267 | bool isValidCPUName(StringRef Name) const override { |
| 268 | if (getTriple().isAMDGCN()) |
| 269 | return llvm::AMDGPU::parseArchAMDGCN(CPU: Name) != llvm::AMDGPU::GK_NONE; |
| 270 | return llvm::AMDGPU::parseArchR600(CPU: Name) != llvm::AMDGPU::GK_NONE; |
| 271 | } |
| 272 | |
| 273 | void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; |
| 274 | |
| 275 | bool setCPU(const std::string &Name) override { |
| 276 | if (getTriple().isAMDGCN()) { |
| 277 | GPUKind = llvm::AMDGPU::parseArchAMDGCN(CPU: Name); |
| 278 | GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind); |
| 279 | } else { |
| 280 | GPUKind = llvm::AMDGPU::parseArchR600(CPU: Name); |
| 281 | GPUFeatures = llvm::AMDGPU::getArchAttrR600(AK: GPUKind); |
| 282 | } |
| 283 | |
| 284 | return GPUKind != llvm::AMDGPU::GK_NONE; |
| 285 | } |
| 286 | |
| 287 | void setSupportedOpenCLOpts() override { |
| 288 | auto &Opts = getSupportedOpenCLOpts(); |
| 289 | Opts["cl_clang_storage_class_specifiers" ] = true; |
| 290 | Opts["__cl_clang_variadic_functions" ] = true; |
| 291 | Opts["__cl_clang_function_pointers" ] = true; |
| 292 | Opts["__cl_clang_non_portable_kernel_param_types" ] = true; |
| 293 | Opts["__cl_clang_bitfields" ] = true; |
| 294 | |
| 295 | bool IsAMDGCN = isAMDGCN(TT: getTriple()); |
| 296 | |
| 297 | Opts["cl_khr_fp64" ] = hasFP64(); |
| 298 | Opts["__opencl_c_fp64" ] = hasFP64(); |
| 299 | |
| 300 | if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) { |
| 301 | Opts["cl_khr_byte_addressable_store" ] = true; |
| 302 | Opts["cl_khr_global_int32_base_atomics" ] = true; |
| 303 | Opts["cl_khr_global_int32_extended_atomics" ] = true; |
| 304 | Opts["cl_khr_local_int32_base_atomics" ] = true; |
| 305 | Opts["cl_khr_local_int32_extended_atomics" ] = true; |
| 306 | } |
| 307 | |
| 308 | if (IsAMDGCN) { |
| 309 | Opts["cl_khr_fp16" ] = true; |
| 310 | Opts["cl_khr_int64_base_atomics" ] = true; |
| 311 | Opts["cl_khr_int64_extended_atomics" ] = true; |
| 312 | Opts["cl_khr_mipmap_image" ] = true; |
| 313 | Opts["cl_khr_mipmap_image_writes" ] = true; |
| 314 | Opts["cl_khr_subgroups" ] = true; |
| 315 | Opts["cl_amd_media_ops" ] = true; |
| 316 | Opts["cl_amd_media_ops2" ] = true; |
| 317 | |
| 318 | Opts["__opencl_c_images" ] = true; |
| 319 | Opts["__opencl_c_3d_image_writes" ] = true; |
| 320 | Opts["cl_khr_3d_image_writes" ] = true; |
| 321 | |
| 322 | Opts["__opencl_c_generic_address_space" ] = |
| 323 | GPUKind >= llvm::AMDGPU::GK_GFX700; |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override { |
| 328 | switch (TK) { |
| 329 | case OCLTK_Image: |
| 330 | return LangAS::opencl_constant; |
| 331 | |
| 332 | case OCLTK_ClkEvent: |
| 333 | case OCLTK_Queue: |
| 334 | case OCLTK_ReserveID: |
| 335 | return LangAS::opencl_global; |
| 336 | |
| 337 | default: |
| 338 | return TargetInfo::getOpenCLTypeAddrSpace(TK); |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { |
| 343 | switch (AS) { |
| 344 | case 0: |
| 345 | return LangAS::opencl_generic; |
| 346 | case 1: |
| 347 | return LangAS::opencl_global; |
| 348 | case 3: |
| 349 | return LangAS::opencl_local; |
| 350 | case 4: |
| 351 | return LangAS::opencl_constant; |
| 352 | case 5: |
| 353 | return LangAS::opencl_private; |
| 354 | default: |
| 355 | return getLangASFromTargetAS(TargetAS: AS); |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { |
| 360 | switch (AS) { |
| 361 | case 0: |
| 362 | return LangAS::Default; |
| 363 | case 1: |
| 364 | return LangAS::cuda_device; |
| 365 | case 3: |
| 366 | return LangAS::cuda_shared; |
| 367 | case 4: |
| 368 | return LangAS::cuda_constant; |
| 369 | default: |
| 370 | return getLangASFromTargetAS(TargetAS: AS); |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | std::optional<LangAS> getConstantAddressSpace() const override { |
| 375 | return getLangASFromTargetAS(TargetAS: llvm::AMDGPUAS::CONSTANT_ADDRESS); |
| 376 | } |
| 377 | |
| 378 | const llvm::omp::GV &getGridValue() const override { |
| 379 | switch (WavefrontSize) { |
| 380 | case 32: |
| 381 | return llvm::omp::getAMDGPUGridValues<32>(); |
| 382 | case 64: |
| 383 | return llvm::omp::getAMDGPUGridValues<64>(); |
| 384 | default: |
| 385 | llvm_unreachable("getGridValue not implemented for this wavesize" ); |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | /// \returns Target specific vtbl ptr address space. |
| 390 | unsigned getVtblPtrAddressSpace() const override { |
| 391 | return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS); |
| 392 | } |
| 393 | |
| 394 | /// \returns If a target requires an address within a target specific address |
| 395 | /// space \p AddressSpace to be converted in order to be used, then return the |
| 396 | /// corresponding target specific DWARF address space. |
| 397 | /// |
| 398 | /// \returns Otherwise return std::nullopt and no conversion will be emitted |
| 399 | /// in the DWARF. |
| 400 | std::optional<unsigned> |
| 401 | getDWARFAddressSpace(unsigned AddressSpace) const override { |
| 402 | const unsigned DWARF_Private = 1; |
| 403 | const unsigned DWARF_Local = 2; |
| 404 | if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) { |
| 405 | return DWARF_Private; |
| 406 | } else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) { |
| 407 | return DWARF_Local; |
| 408 | } else { |
| 409 | return std::nullopt; |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { |
| 414 | switch (CC) { |
| 415 | default: |
| 416 | return CCCR_Warning; |
| 417 | case CC_C: |
| 418 | case CC_DeviceKernel: |
| 419 | return CCCR_OK; |
| 420 | } |
| 421 | } |
| 422 | |
| 423 | // In amdgcn target the null pointer in global, constant, and generic |
| 424 | // address space has value 0 but in private and local address space has |
| 425 | // value ~0. |
| 426 | uint64_t getNullPointerValue(LangAS AS) const override { |
| 427 | // FIXME: Also should handle region. |
| 428 | return (AS == LangAS::opencl_local || AS == LangAS::opencl_private || |
| 429 | AS == LangAS::sycl_local || AS == LangAS::sycl_private) |
| 430 | ? ~0 |
| 431 | : 0; |
| 432 | } |
| 433 | |
| 434 | void setAuxTarget(const TargetInfo *Aux) override; |
| 435 | |
| 436 | bool hasBitIntType() const override { return true; } |
| 437 | |
| 438 | // Record offload arch features since they are needed for defining the |
| 439 | // pre-defined macros. |
| 440 | bool handleTargetFeatures(std::vector<std::string> &Features, |
| 441 | DiagnosticsEngine &Diags) override { |
| 442 | auto TargetIDFeatures = |
| 443 | getAllPossibleTargetIDFeatures(T: getTriple(), Processor: getArchNameAMDGCN(AK: GPUKind)); |
| 444 | for (const auto &F : Features) { |
| 445 | assert(F.front() == '+' || F.front() == '-'); |
| 446 | if (F == "+wavefrontsize64" ) |
| 447 | WavefrontSize = 64; |
| 448 | else if (F == "+cumode" ) |
| 449 | CUMode = true; |
| 450 | else if (F == "-cumode" ) |
| 451 | CUMode = false; |
| 452 | else if (F == "+image-insts" ) |
| 453 | HasImage = true; |
| 454 | bool IsOn = F.front() == '+'; |
| 455 | StringRef Name = StringRef(F).drop_front(); |
| 456 | if (!llvm::is_contained(Range&: TargetIDFeatures, Element: Name)) |
| 457 | continue; |
| 458 | assert(!OffloadArchFeatures.contains(Name)); |
| 459 | OffloadArchFeatures[Name] = IsOn; |
| 460 | } |
| 461 | return true; |
| 462 | } |
| 463 | |
| 464 | std::optional<std::string> getTargetID() const override { |
| 465 | if (!isAMDGCN(TT: getTriple())) |
| 466 | return std::nullopt; |
| 467 | // When -target-cpu is not set, we assume generic code that it is valid |
| 468 | // for all GPU and use an empty string as target ID to represent that. |
| 469 | if (GPUKind == llvm::AMDGPU::GK_NONE) |
| 470 | return std::string("" ); |
| 471 | return getCanonicalTargetID(Processor: getArchNameAMDGCN(AK: GPUKind), |
| 472 | Features: OffloadArchFeatures); |
| 473 | } |
| 474 | |
| 475 | bool hasHIPImageSupport() const override { return HasImage; } |
| 476 | |
| 477 | std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override { |
| 478 | // This is imprecise as the value can vary between 64, 128 (even 256!) bytes |
| 479 | // depending on the level of cache and the target architecture. We select |
| 480 | // the size that corresponds to the largest L1 cache line for all |
| 481 | // architectures. |
| 482 | return std::make_pair(x: 128, y: 128); |
| 483 | } |
| 484 | }; |
| 485 | |
| 486 | } // namespace targets |
| 487 | } // namespace clang |
| 488 | |
| 489 | #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
| 490 | |