1 | #include "clang/Basic/Cuda.h" |
2 | |
3 | #include "llvm/ADT/StringRef.h" |
4 | #include "llvm/ADT/Twine.h" |
5 | #include "llvm/Support/ErrorHandling.h" |
6 | #include "llvm/Support/VersionTuple.h" |
7 | |
8 | namespace clang { |
9 | |
10 | struct CudaVersionMapEntry { |
11 | const char *Name; |
12 | CudaVersion Version; |
13 | llvm::VersionTuple TVersion; |
14 | }; |
15 | #define CUDA_ENTRY(major, minor) \ |
16 | { \ |
17 | #major "." #minor, CudaVersion::CUDA_##major##minor, \ |
18 | llvm::VersionTuple(major, minor) \ |
19 | } |
20 | |
21 | static const CudaVersionMapEntry CudaNameVersionMap[] = { |
22 | CUDA_ENTRY(7, 0), |
23 | CUDA_ENTRY(7, 5), |
24 | CUDA_ENTRY(8, 0), |
25 | CUDA_ENTRY(9, 0), |
26 | CUDA_ENTRY(9, 1), |
27 | CUDA_ENTRY(9, 2), |
28 | CUDA_ENTRY(10, 0), |
29 | CUDA_ENTRY(10, 1), |
30 | CUDA_ENTRY(10, 2), |
31 | CUDA_ENTRY(11, 0), |
32 | CUDA_ENTRY(11, 1), |
33 | CUDA_ENTRY(11, 2), |
34 | CUDA_ENTRY(11, 3), |
35 | CUDA_ENTRY(11, 4), |
36 | CUDA_ENTRY(11, 5), |
37 | CUDA_ENTRY(11, 6), |
38 | CUDA_ENTRY(11, 7), |
39 | CUDA_ENTRY(11, 8), |
40 | CUDA_ENTRY(12, 0), |
41 | CUDA_ENTRY(12, 1), |
42 | CUDA_ENTRY(12, 2), |
43 | CUDA_ENTRY(12, 3), |
44 | {.Name: "" , .Version: CudaVersion::NEW, .TVersion: llvm::VersionTuple(std::numeric_limits<int>::max())}, |
45 | {.Name: "unknown" , .Version: CudaVersion::UNKNOWN, .TVersion: {}} // End of list tombstone. |
46 | }; |
47 | #undef CUDA_ENTRY |
48 | |
49 | const char *CudaVersionToString(CudaVersion V) { |
50 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
51 | if (I->Version == V) |
52 | return I->Name; |
53 | |
54 | return CudaVersionToString(V: CudaVersion::UNKNOWN); |
55 | } |
56 | |
57 | CudaVersion CudaStringToVersion(const llvm::Twine &S) { |
58 | std::string VS = S.str(); |
59 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
60 | if (I->Name == VS) |
61 | return I->Version; |
62 | return CudaVersion::UNKNOWN; |
63 | } |
64 | |
65 | CudaVersion ToCudaVersion(llvm::VersionTuple Version) { |
66 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
67 | if (I->TVersion == Version) |
68 | return I->Version; |
69 | return CudaVersion::UNKNOWN; |
70 | } |
71 | |
72 | namespace { |
73 | struct CudaArchToStringMap { |
74 | CudaArch arch; |
75 | const char *arch_name; |
76 | const char *virtual_arch_name; |
77 | }; |
78 | } // namespace |
79 | |
80 | #define SM2(sm, ca) \ |
81 | { CudaArch::SM_##sm, "sm_" #sm, ca } |
82 | #define SM(sm) SM2(sm, "compute_" #sm) |
83 | #define GFX(gpu) \ |
84 | { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" } |
85 | static const CudaArchToStringMap arch_names[] = { |
86 | // clang-format off |
87 | {.arch: CudaArch::UNUSED, .arch_name: "" , .virtual_arch_name: "" }, |
88 | SM2(20, "compute_20" ), SM2(21, "compute_20" ), // Fermi |
89 | SM(30), {.arch: CudaArch::SM_32_, .arch_name: "sm_32" , .virtual_arch_name: "compute_32" }, SM(35), SM(37), // Kepler |
90 | SM(50), SM(52), SM(53), // Maxwell |
91 | SM(60), SM(61), SM(62), // Pascal |
92 | SM(70), SM(72), // Volta |
93 | SM(75), // Turing |
94 | SM(80), SM(86), // Ampere |
95 | SM(87), // Jetson/Drive AGX Orin |
96 | SM(89), // Ada Lovelace |
97 | SM(90), // Hopper |
98 | SM(90a), // Hopper |
99 | GFX(600), // gfx600 |
100 | GFX(601), // gfx601 |
101 | GFX(602), // gfx602 |
102 | GFX(700), // gfx700 |
103 | GFX(701), // gfx701 |
104 | GFX(702), // gfx702 |
105 | GFX(703), // gfx703 |
106 | GFX(704), // gfx704 |
107 | GFX(705), // gfx705 |
108 | GFX(801), // gfx801 |
109 | GFX(802), // gfx802 |
110 | GFX(803), // gfx803 |
111 | GFX(805), // gfx805 |
112 | GFX(810), // gfx810 |
113 | GFX(900), // gfx900 |
114 | GFX(902), // gfx902 |
115 | GFX(904), // gfx903 |
116 | GFX(906), // gfx906 |
117 | GFX(908), // gfx908 |
118 | GFX(909), // gfx909 |
119 | GFX(90a), // gfx90a |
120 | GFX(90c), // gfx90c |
121 | GFX(940), // gfx940 |
122 | GFX(941), // gfx941 |
123 | GFX(942), // gfx942 |
124 | GFX(1010), // gfx1010 |
125 | GFX(1011), // gfx1011 |
126 | GFX(1012), // gfx1012 |
127 | GFX(1013), // gfx1013 |
128 | GFX(1030), // gfx1030 |
129 | GFX(1031), // gfx1031 |
130 | GFX(1032), // gfx1032 |
131 | GFX(1033), // gfx1033 |
132 | GFX(1034), // gfx1034 |
133 | GFX(1035), // gfx1035 |
134 | GFX(1036), // gfx1036 |
135 | GFX(1100), // gfx1100 |
136 | GFX(1101), // gfx1101 |
137 | GFX(1102), // gfx1102 |
138 | GFX(1103), // gfx1103 |
139 | GFX(1150), // gfx1150 |
140 | GFX(1151), // gfx1151 |
141 | GFX(1200), // gfx1200 |
142 | GFX(1201), // gfx1201 |
143 | {.arch: CudaArch::Generic, .arch_name: "generic" , .virtual_arch_name: "" }, |
144 | // clang-format on |
145 | }; |
146 | #undef SM |
147 | #undef SM2 |
148 | #undef GFX |
149 | |
150 | const char *CudaArchToString(CudaArch A) { |
151 | auto result = std::find_if( |
152 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
153 | pred: [A](const CudaArchToStringMap &map) { return A == map.arch; }); |
154 | if (result == std::end(arr: arch_names)) |
155 | return "unknown" ; |
156 | return result->arch_name; |
157 | } |
158 | |
159 | const char *CudaArchToVirtualArchString(CudaArch A) { |
160 | auto result = std::find_if( |
161 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
162 | pred: [A](const CudaArchToStringMap &map) { return A == map.arch; }); |
163 | if (result == std::end(arr: arch_names)) |
164 | return "unknown" ; |
165 | return result->virtual_arch_name; |
166 | } |
167 | |
168 | CudaArch StringToCudaArch(llvm::StringRef S) { |
169 | auto result = std::find_if( |
170 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
171 | pred: [S](const CudaArchToStringMap &map) { return S == map.arch_name; }); |
172 | if (result == std::end(arr: arch_names)) |
173 | return CudaArch::UNKNOWN; |
174 | return result->arch; |
175 | } |
176 | |
177 | CudaVersion MinVersionForCudaArch(CudaArch A) { |
178 | if (A == CudaArch::UNKNOWN) |
179 | return CudaVersion::UNKNOWN; |
180 | |
181 | // AMD GPUs do not depend on CUDA versions. |
182 | if (IsAMDGpuArch(A)) |
183 | return CudaVersion::CUDA_70; |
184 | |
185 | switch (A) { |
186 | case CudaArch::SM_20: |
187 | case CudaArch::SM_21: |
188 | case CudaArch::SM_30: |
189 | case CudaArch::SM_32_: |
190 | case CudaArch::SM_35: |
191 | case CudaArch::SM_37: |
192 | case CudaArch::SM_50: |
193 | case CudaArch::SM_52: |
194 | case CudaArch::SM_53: |
195 | return CudaVersion::CUDA_70; |
196 | case CudaArch::SM_60: |
197 | case CudaArch::SM_61: |
198 | case CudaArch::SM_62: |
199 | return CudaVersion::CUDA_80; |
200 | case CudaArch::SM_70: |
201 | return CudaVersion::CUDA_90; |
202 | case CudaArch::SM_72: |
203 | return CudaVersion::CUDA_91; |
204 | case CudaArch::SM_75: |
205 | return CudaVersion::CUDA_100; |
206 | case CudaArch::SM_80: |
207 | return CudaVersion::CUDA_110; |
208 | case CudaArch::SM_86: |
209 | return CudaVersion::CUDA_111; |
210 | case CudaArch::SM_87: |
211 | return CudaVersion::CUDA_114; |
212 | case CudaArch::SM_89: |
213 | case CudaArch::SM_90: |
214 | return CudaVersion::CUDA_118; |
215 | case CudaArch::SM_90a: |
216 | return CudaVersion::CUDA_120; |
217 | default: |
218 | llvm_unreachable("invalid enum" ); |
219 | } |
220 | } |
221 | |
222 | CudaVersion MaxVersionForCudaArch(CudaArch A) { |
223 | // AMD GPUs do not depend on CUDA versions. |
224 | if (IsAMDGpuArch(A)) |
225 | return CudaVersion::NEW; |
226 | |
227 | switch (A) { |
228 | case CudaArch::UNKNOWN: |
229 | return CudaVersion::UNKNOWN; |
230 | case CudaArch::SM_20: |
231 | case CudaArch::SM_21: |
232 | return CudaVersion::CUDA_80; |
233 | case CudaArch::SM_30: |
234 | case CudaArch::SM_32_: |
235 | return CudaVersion::CUDA_102; |
236 | case CudaArch::SM_35: |
237 | case CudaArch::SM_37: |
238 | return CudaVersion::CUDA_118; |
239 | default: |
240 | return CudaVersion::NEW; |
241 | } |
242 | } |
243 | |
244 | bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) { |
245 | return CudaFeatureEnabled(ToCudaVersion(Version), Feature); |
246 | } |
247 | |
248 | bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { |
249 | switch (Feature) { |
250 | case CudaFeature::CUDA_USES_NEW_LAUNCH: |
251 | return Version >= CudaVersion::CUDA_92; |
252 | case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: |
253 | return Version >= CudaVersion::CUDA_101; |
254 | } |
255 | llvm_unreachable("Unknown CUDA feature." ); |
256 | } |
257 | } // namespace clang |
258 | |