1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPU.h" |
10 | #include "clang/Basic/TargetID.h" |
11 | #include "clang/Config/config.h" |
12 | #include "clang/Driver/CommonArgs.h" |
13 | #include "clang/Driver/Compilation.h" |
14 | #include "clang/Driver/InputInfo.h" |
15 | #include "clang/Driver/Options.h" |
16 | #include "clang/Driver/SanitizerArgs.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | #include "llvm/Option/ArgList.h" |
19 | #include "llvm/Support/Error.h" |
20 | #include "llvm/Support/LineIterator.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/Support/Process.h" |
23 | #include "llvm/Support/VirtualFileSystem.h" |
24 | #include "llvm/TargetParser/Host.h" |
25 | #include <optional> |
26 | #include <system_error> |
27 | |
28 | using namespace clang::driver; |
29 | using namespace clang::driver::tools; |
30 | using namespace clang::driver::toolchains; |
31 | using namespace clang; |
32 | using namespace llvm::opt; |
33 | |
34 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
35 | assert(!Path.empty()); |
36 | |
37 | const StringRef Suffix(".bc"); |
38 | const StringRef Suffix2(".amdgcn.bc"); |
39 | |
40 | std::error_code EC; |
41 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Dir: Path, EC), LE; |
42 | !EC && LI != LE; LI = LI.increment(EC)) { |
43 | StringRef FilePath = LI->path(); |
44 | StringRef FileName = llvm::sys::path::filename(path: FilePath); |
45 | if (!FileName.ends_with(Suffix)) |
46 | continue; |
47 | |
48 | StringRef BaseName; |
49 | if (FileName.ends_with(Suffix: Suffix2)) |
50 | BaseName = FileName.drop_back(N: Suffix2.size()); |
51 | else if (FileName.ends_with(Suffix)) |
52 | BaseName = FileName.drop_back(N: Suffix.size()); |
53 | |
54 | const StringRef ABIVersionPrefix = "oclc_abi_version_"; |
55 | if (BaseName == "ocml") { |
56 | OCML = FilePath; |
57 | } else if (BaseName == "ockl") { |
58 | OCKL = FilePath; |
59 | } else if (BaseName == "opencl") { |
60 | OpenCL = FilePath; |
61 | } else if (BaseName == "asanrtl") { |
62 | AsanRTL = FilePath; |
63 | } else if (BaseName == "oclc_finite_only_off") { |
64 | FiniteOnly.Off = FilePath; |
65 | } else if (BaseName == "oclc_finite_only_on") { |
66 | FiniteOnly.On = FilePath; |
67 | } else if (BaseName == "oclc_daz_opt_on") { |
68 | DenormalsAreZero.On = FilePath; |
69 | } else if (BaseName == "oclc_daz_opt_off") { |
70 | DenormalsAreZero.Off = FilePath; |
71 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { |
72 | CorrectlyRoundedSqrt.On = FilePath; |
73 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { |
74 | CorrectlyRoundedSqrt.Off = FilePath; |
75 | } else if (BaseName == "oclc_unsafe_math_on") { |
76 | UnsafeMath.On = FilePath; |
77 | } else if (BaseName == "oclc_unsafe_math_off") { |
78 | UnsafeMath.Off = FilePath; |
79 | } else if (BaseName == "oclc_wavefrontsize64_on") { |
80 | WavefrontSize64.On = FilePath; |
81 | } else if (BaseName == "oclc_wavefrontsize64_off") { |
82 | WavefrontSize64.Off = FilePath; |
83 | } else if (BaseName.starts_with(Prefix: ABIVersionPrefix)) { |
84 | unsigned ABIVersionNumber; |
85 | if (BaseName.drop_front(N: ABIVersionPrefix.size()) |
86 | .getAsInteger(/*Redex=*/Radix: 0, Result&: ABIVersionNumber)) |
87 | continue; |
88 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
89 | } else { |
90 | // Process all bitcode filenames that look like |
91 | // ocl_isa_version_XXX.amdgcn.bc |
92 | const StringRef DeviceLibPrefix = "oclc_isa_version_"; |
93 | if (!BaseName.starts_with(Prefix: DeviceLibPrefix)) |
94 | continue; |
95 | |
96 | StringRef IsaVersionNumber = |
97 | BaseName.drop_front(N: DeviceLibPrefix.size()); |
98 | |
99 | llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; |
100 | SmallString<8> Tmp; |
101 | LibDeviceMap.insert( |
102 | KV: std::make_pair(x: GfxName.toStringRef(Out&: Tmp), y: FilePath.str())); |
103 | } |
104 | } |
105 | } |
106 | |
107 | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
108 | // the parsing fails. |
109 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
110 | SmallVector<StringRef, 4> VersionParts; |
111 | V.split(A&: VersionParts, Separator: '\n'); |
112 | unsigned Major = ~0U; |
113 | unsigned Minor = ~0U; |
114 | for (auto Part : VersionParts) { |
115 | auto Splits = Part.rtrim().split(Separator: '='); |
116 | if (Splits.first == "HIP_VERSION_MAJOR") { |
117 | if (Splits.second.getAsInteger(Radix: 0, Result&: Major)) |
118 | return true; |
119 | } else if (Splits.first == "HIP_VERSION_MINOR") { |
120 | if (Splits.second.getAsInteger(Radix: 0, Result&: Minor)) |
121 | return true; |
122 | } else if (Splits.first == "HIP_VERSION_PATCH") |
123 | VersionPatch = Splits.second.str(); |
124 | } |
125 | if (Major == ~0U || Minor == ~0U) |
126 | return true; |
127 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
128 | DetectedVersion = |
129 | (Twine(Major) + "."+ Twine(Minor) + "."+ VersionPatch).str(); |
130 | return false; |
131 | } |
132 | |
133 | /// \returns a list of candidate directories for ROCm installation, which is |
134 | /// cached and populated only once. |
135 | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
136 | RocmInstallationDetector::getInstallationPathCandidates() { |
137 | |
138 | // Return the cached candidate list if it has already been populated. |
139 | if (!ROCmSearchDirs.empty()) |
140 | return ROCmSearchDirs; |
141 | |
142 | auto DoPrintROCmSearchDirs = [&]() { |
143 | if (PrintROCmSearchDirs) |
144 | for (auto Cand : ROCmSearchDirs) { |
145 | llvm::errs() << "ROCm installation search path: "<< Cand.Path << '\n'; |
146 | } |
147 | }; |
148 | |
149 | // For candidate specified by --rocm-path we do not do strict check, i.e., |
150 | // checking existence of HIP version file and device library files. |
151 | if (!RocmPathArg.empty()) { |
152 | ROCmSearchDirs.emplace_back(Args: RocmPathArg.str()); |
153 | DoPrintROCmSearchDirs(); |
154 | return ROCmSearchDirs; |
155 | } else if (std::optional<std::string> RocmPathEnv = |
156 | llvm::sys::Process::GetEnv(name: "ROCM_PATH")) { |
157 | if (!RocmPathEnv->empty()) { |
158 | ROCmSearchDirs.emplace_back(Args: std::move(*RocmPathEnv)); |
159 | DoPrintROCmSearchDirs(); |
160 | return ROCmSearchDirs; |
161 | } |
162 | } |
163 | |
164 | // Try to find relative to the compiler binary. |
165 | StringRef InstallDir = D.Dir; |
166 | |
167 | // Check both a normal Unix prefix position of the clang binary, as well as |
168 | // the Windows-esque layout the ROCm packages use with the host architecture |
169 | // subdirectory of bin. |
170 | auto DeduceROCmPath = [](StringRef ClangPath) { |
171 | // Strip off directory (usually bin) |
172 | StringRef ParentDir = llvm::sys::path::parent_path(path: ClangPath); |
173 | StringRef ParentName = llvm::sys::path::filename(path: ParentDir); |
174 | |
175 | // Some builds use bin/{host arch}, so go up again. |
176 | if (ParentName == "bin") { |
177 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
178 | ParentName = llvm::sys::path::filename(path: ParentDir); |
179 | } |
180 | |
181 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
182 | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
183 | if (ParentName == "llvm"|| ParentName.starts_with(Prefix: "aomp")) |
184 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
185 | |
186 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
187 | }; |
188 | |
189 | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
190 | // link of clang itself. |
191 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(InstallDir)); |
192 | |
193 | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
194 | // link of clang itself. |
195 | llvm::SmallString<256> RealClangPath; |
196 | llvm::sys::fs::real_path(path: D.getClangProgramPath(), output&: RealClangPath); |
197 | auto ParentPath = llvm::sys::path::parent_path(path: RealClangPath); |
198 | if (ParentPath != InstallDir) |
199 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(ParentPath)); |
200 | |
201 | // Device library may be installed in clang or resource directory. |
202 | auto ClangRoot = llvm::sys::path::parent_path(path: InstallDir); |
203 | auto RealClangRoot = llvm::sys::path::parent_path(path: ParentPath); |
204 | ROCmSearchDirs.emplace_back(Args: ClangRoot.str(), /*StrictChecking=*/Args: true); |
205 | if (RealClangRoot != ClangRoot) |
206 | ROCmSearchDirs.emplace_back(Args: RealClangRoot.str(), /*StrictChecking=*/Args: true); |
207 | ROCmSearchDirs.emplace_back(Args: D.ResourceDir, |
208 | /*StrictChecking=*/Args: true); |
209 | |
210 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/rocm", |
211 | /*StrictChecking=*/Args: true); |
212 | |
213 | // Find the latest /opt/rocm-{release} directory. |
214 | std::error_code EC; |
215 | std::string LatestROCm; |
216 | llvm::VersionTuple LatestVer; |
217 | // Get ROCm version from ROCm directory name. |
218 | auto GetROCmVersion = [](StringRef DirName) { |
219 | llvm::VersionTuple V; |
220 | std::string VerStr = DirName.drop_front(N: strlen(s: "rocm-")).str(); |
221 | // The ROCm directory name follows the format of |
222 | // rocm-{major}.{minor}.{subMinor}[-{build}] |
223 | llvm::replace(Range&: VerStr, OldValue: '-', NewValue: '.'); |
224 | V.tryParse(string: VerStr); |
225 | return V; |
226 | }; |
227 | for (llvm::vfs::directory_iterator |
228 | File = D.getVFS().dir_begin(Dir: D.SysRoot + "/opt", EC), |
229 | FileEnd; |
230 | File != FileEnd && !EC; File.increment(EC)) { |
231 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
232 | if (!FileName.starts_with(Prefix: "rocm-")) |
233 | continue; |
234 | if (LatestROCm.empty()) { |
235 | LatestROCm = FileName.str(); |
236 | LatestVer = GetROCmVersion(LatestROCm); |
237 | continue; |
238 | } |
239 | auto Ver = GetROCmVersion(FileName); |
240 | if (LatestVer < Ver) { |
241 | LatestROCm = FileName.str(); |
242 | LatestVer = Ver; |
243 | } |
244 | } |
245 | if (!LatestROCm.empty()) |
246 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/"+ LatestROCm, |
247 | /*StrictChecking=*/Args: true); |
248 | |
249 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr/local", |
250 | /*StrictChecking=*/Args: true); |
251 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr", |
252 | /*StrictChecking=*/Args: true); |
253 | |
254 | DoPrintROCmSearchDirs(); |
255 | return ROCmSearchDirs; |
256 | } |
257 | |
258 | RocmInstallationDetector::RocmInstallationDetector( |
259 | const Driver &D, const llvm::Triple &HostTriple, |
260 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
261 | : D(D) { |
262 | Verbose = Args.hasArg(options::OPT_v); |
263 | RocmPathArg = Args.getLastArgValue(clang::driver::options::Id: OPT_rocm_path_EQ); |
264 | PrintROCmSearchDirs = |
265 | Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); |
266 | RocmDeviceLibPathArg = |
267 | Args.getAllArgValues(clang::driver::options::Id: OPT_rocm_device_lib_path_EQ); |
268 | HIPPathArg = Args.getLastArgValue(clang::driver::options::Id: OPT_hip_path_EQ); |
269 | HIPStdParPathArg = |
270 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_path_EQ); |
271 | HasHIPStdParLibrary = |
272 | !HIPStdParPathArg.empty() && D.getVFS().exists(Path: HIPStdParPathArg + |
273 | "/hipstdpar_lib.hpp"); |
274 | HIPRocThrustPathArg = |
275 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_thrust_path_EQ); |
276 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
277 | D.getVFS().exists(Path: HIPRocThrustPathArg + "/thrust"); |
278 | HIPRocPrimPathArg = |
279 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_prim_path_EQ); |
280 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
281 | D.getVFS().exists(Path: HIPRocPrimPathArg + "/rocprim"); |
282 | |
283 | if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { |
284 | HIPVersionArg = A->getValue(); |
285 | unsigned Major = ~0U; |
286 | unsigned Minor = ~0U; |
287 | SmallVector<StringRef, 3> Parts; |
288 | HIPVersionArg.split(A&: Parts, Separator: '.'); |
289 | if (Parts.size()) |
290 | Parts[0].getAsInteger(Radix: 0, Result&: Major); |
291 | if (Parts.size() > 1) |
292 | Parts[1].getAsInteger(Radix: 0, Result&: Minor); |
293 | if (Parts.size() > 2) |
294 | VersionPatch = Parts[2].str(); |
295 | if (VersionPatch.empty()) |
296 | VersionPatch = "0"; |
297 | if (Major != ~0U && Minor == ~0U) |
298 | Minor = 0; |
299 | if (Major == ~0U || Minor == ~0U) |
300 | D.Diag(diag::DiagID: err_drv_invalid_value) |
301 | << A->getAsString(Args) << HIPVersionArg; |
302 | |
303 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
304 | DetectedVersion = |
305 | (Twine(Major) + "."+ Twine(Minor) + "."+ VersionPatch).str(); |
306 | } else { |
307 | VersionPatch = DefaultVersionPatch; |
308 | VersionMajorMinor = |
309 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
310 | DetectedVersion = (Twine(DefaultVersionMajor) + "."+ |
311 | Twine(DefaultVersionMinor) + "."+ VersionPatch) |
312 | .str(); |
313 | } |
314 | |
315 | if (DetectHIPRuntime) |
316 | detectHIPRuntime(); |
317 | if (DetectDeviceLib) |
318 | detectDeviceLibrary(); |
319 | } |
320 | |
321 | void RocmInstallationDetector::detectDeviceLibrary() { |
322 | assert(LibDevicePath.empty()); |
323 | |
324 | if (!RocmDeviceLibPathArg.empty()) |
325 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
326 | else if (std::optional<std::string> LibPathEnv = |
327 | llvm::sys::Process::GetEnv(name: "HIP_DEVICE_LIB_PATH")) |
328 | LibDevicePath = std::move(*LibPathEnv); |
329 | |
330 | auto &FS = D.getVFS(); |
331 | if (!LibDevicePath.empty()) { |
332 | // Maintain compatability with HIP flag/envvar pointing directly at the |
333 | // bitcode library directory. This points directly at the library path instead |
334 | // of the rocm root installation. |
335 | if (!FS.exists(Path: LibDevicePath)) |
336 | return; |
337 | |
338 | scanLibDevicePath(Path: LibDevicePath); |
339 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
340 | return; |
341 | } |
342 | |
343 | // Check device library exists at the given path. |
344 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
345 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
346 | if (CheckLibDevice && !FS.exists(Path)) |
347 | return false; |
348 | |
349 | scanLibDevicePath(Path); |
350 | |
351 | if (!NoBuiltinLibs) { |
352 | // Check that the required non-target libraries are all available. |
353 | if (!allGenericLibsValid()) |
354 | return false; |
355 | |
356 | // Check that we have found at least one libdevice that we can link in |
357 | // if -nobuiltinlib hasn't been specified. |
358 | if (LibDeviceMap.empty()) |
359 | return false; |
360 | } |
361 | return true; |
362 | }; |
363 | |
364 | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
365 | LibDevicePath = D.ResourceDir; |
366 | llvm::sys::path::append(path&: LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
367 | b: "amdgcn", c: "bitcode"); |
368 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
369 | if (HasDeviceLibrary) |
370 | return; |
371 | |
372 | // Find device libraries in a legacy ROCm directory structure |
373 | // ${ROCM_ROOT}/amdgcn/bitcode/* |
374 | auto &ROCmDirs = getInstallationPathCandidates(); |
375 | for (const auto &Candidate : ROCmDirs) { |
376 | LibDevicePath = Candidate.Path; |
377 | llvm::sys::path::append(path&: LibDevicePath, a: "amdgcn", b: "bitcode"); |
378 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
379 | if (HasDeviceLibrary) |
380 | return; |
381 | } |
382 | } |
383 | |
384 | void RocmInstallationDetector::detectHIPRuntime() { |
385 | SmallVector<Candidate, 4> HIPSearchDirs; |
386 | if (!HIPPathArg.empty()) |
387 | HIPSearchDirs.emplace_back(Args: HIPPathArg.str()); |
388 | else if (std::optional<std::string> HIPPathEnv = |
389 | llvm::sys::Process::GetEnv(name: "HIP_PATH")) { |
390 | if (!HIPPathEnv->empty()) |
391 | HIPSearchDirs.emplace_back(Args: std::move(*HIPPathEnv)); |
392 | } |
393 | if (HIPSearchDirs.empty()) |
394 | HIPSearchDirs.append(RHS: getInstallationPathCandidates()); |
395 | auto &FS = D.getVFS(); |
396 | |
397 | for (const auto &Candidate : HIPSearchDirs) { |
398 | InstallPath = Candidate.Path; |
399 | if (InstallPath.empty() || !FS.exists(Path: InstallPath)) |
400 | continue; |
401 | |
402 | BinPath = InstallPath; |
403 | llvm::sys::path::append(path&: BinPath, a: "bin"); |
404 | IncludePath = InstallPath; |
405 | llvm::sys::path::append(path&: IncludePath, a: "include"); |
406 | LibPath = InstallPath; |
407 | llvm::sys::path::append(path&: LibPath, a: "lib"); |
408 | SharePath = InstallPath; |
409 | llvm::sys::path::append(path&: SharePath, a: "share"); |
410 | |
411 | // Get parent of InstallPath and append "share" |
412 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(path: InstallPath); |
413 | llvm::sys::path::append(path&: ParentSharePath, a: "share"); |
414 | |
415 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "", |
416 | const Twine &c = "", const Twine &d = "") { |
417 | SmallString<0> newpath = path; |
418 | llvm::sys::path::append(path&: newpath, a, b, c, d); |
419 | return newpath; |
420 | }; |
421 | // If HIP version file can be found and parsed, use HIP version from there. |
422 | std::vector<SmallString<0>> VersionFilePaths = { |
423 | Append(SharePath, "hip", "version"), |
424 | InstallPath != D.SysRoot + "/usr/local" |
425 | ? Append(ParentSharePath, "hip", "version") |
426 | : SmallString<0>(), |
427 | Append(BinPath, ".hipVersion")}; |
428 | |
429 | for (const auto &VersionFilePath : VersionFilePaths) { |
430 | if (VersionFilePath.empty()) |
431 | continue; |
432 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
433 | FS.getBufferForFile(Name: VersionFilePath); |
434 | if (!VersionFile) |
435 | continue; |
436 | if (HIPVersionArg.empty() && VersionFile) |
437 | if (parseHIPVersionFile(V: (*VersionFile)->getBuffer())) |
438 | continue; |
439 | |
440 | HasHIPRuntime = true; |
441 | return; |
442 | } |
443 | // Otherwise, if -rocm-path is specified (no strict checking), use the |
444 | // default HIP version or specified by --hip-version. |
445 | if (!Candidate.StrictChecking) { |
446 | HasHIPRuntime = true; |
447 | return; |
448 | } |
449 | } |
450 | HasHIPRuntime = false; |
451 | } |
452 | |
453 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
454 | if (hasHIPRuntime()) |
455 | OS << "Found HIP installation: "<< InstallPath << ", version " |
456 | << DetectedVersion << '\n'; |
457 | } |
458 | |
459 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
460 | ArgStringList &CC1Args) const { |
461 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
462 | !DriverArgs.hasArg(options::OPT_nohipwrapperinc); |
463 | bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar); |
464 | |
465 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { |
466 | // HIP header includes standard library wrapper headers under clang |
467 | // cuda_wrappers directory. Since these wrapper headers include_next |
468 | // standard C++ headers, whereas libc++ headers include_next other clang |
469 | // headers. The include paths have to follow this order: |
470 | // - wrapper include path |
471 | // - standard C++ include path |
472 | // - other clang include path |
473 | // Since standard C++ and other clang include paths are added in other |
474 | // places after this function, here we only need to make sure wrapper |
475 | // include path is added. |
476 | // |
477 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
478 | // a workaround. |
479 | SmallString<128> P(D.ResourceDir); |
480 | if (UsesRuntimeWrapper) |
481 | llvm::sys::path::append(path&: P, a: "include", b: "cuda_wrappers"); |
482 | CC1Args.push_back(Elt: "-internal-isystem"); |
483 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P)); |
484 | } |
485 | |
486 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
487 | StringRef Inc = getIncludePath(); |
488 | auto &FS = D.getVFS(); |
489 | |
490 | if (!hasHIPStdParLibrary()) |
491 | if (!HIPStdParPathArg.empty() || |
492 | !FS.exists(Path: Inc + "/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp")) { |
493 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_lib); |
494 | return; |
495 | } |
496 | if (!HasRocThrustLibrary && !FS.exists(Path: Inc + "/thrust")) { |
497 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_thrust_lib); |
498 | return; |
499 | } |
500 | if (!HasRocPrimLibrary && !FS.exists(Path: Inc + "/rocprim")) { |
501 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_prim_lib); |
502 | return; |
503 | } |
504 | const char *ThrustPath; |
505 | if (HasRocThrustLibrary) |
506 | ThrustPath = DriverArgs.MakeArgString(Str: HIPRocThrustPathArg); |
507 | else |
508 | ThrustPath = DriverArgs.MakeArgString(Str: Inc + "/thrust"); |
509 | |
510 | const char *HIPStdParPath; |
511 | if (hasHIPStdParLibrary()) |
512 | HIPStdParPath = DriverArgs.MakeArgString(Str: HIPStdParPathArg); |
513 | else |
514 | HIPStdParPath = DriverArgs.MakeArgString(Str: StringRef(ThrustPath) + |
515 | "/system/hip/hipstdpar"); |
516 | |
517 | const char *PrimPath; |
518 | if (HasRocPrimLibrary) |
519 | PrimPath = DriverArgs.MakeArgString(Str: HIPRocPrimPathArg); |
520 | else |
521 | PrimPath = DriverArgs.MakeArgString(Str: getIncludePath() + "/rocprim"); |
522 | |
523 | CC1Args.append(IL: {"-idirafter", ThrustPath, "-idirafter", PrimPath, |
524 | "-idirafter", HIPStdParPath, "-include", |
525 | "hipstdpar_lib.hpp"}); |
526 | }; |
527 | |
528 | if (DriverArgs.hasArg(options::OPT_nogpuinc)) { |
529 | if (HasHipStdPar) |
530 | HandleHipStdPar(); |
531 | |
532 | return; |
533 | } |
534 | |
535 | if (!hasHIPRuntime()) { |
536 | D.Diag(diag::DiagID: err_drv_no_hip_runtime); |
537 | return; |
538 | } |
539 | |
540 | CC1Args.push_back(Elt: "-idirafter"); |
541 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: getIncludePath())); |
542 | if (UsesRuntimeWrapper) |
543 | CC1Args.append(IL: {"-include", "__clang_hip_runtime_wrapper.h"}); |
544 | if (HasHipStdPar) |
545 | HandleHipStdPar(); |
546 | } |
547 | |
548 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
549 | const InputInfo &Output, |
550 | const InputInfoList &Inputs, |
551 | const ArgList &Args, |
552 | const char *LinkingOutput) const { |
553 | std::string Linker = getToolChain().GetLinkerPath(); |
554 | ArgStringList CmdArgs; |
555 | if (!Args.hasArg(options::OPT_r)) { |
556 | CmdArgs.push_back(Elt: "--no-undefined"); |
557 | CmdArgs.push_back(Elt: "-shared"); |
558 | } |
559 | |
560 | if (C.getDriver().isUsingLTO()) { |
561 | const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin); |
562 | addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs, IsThinLTO: ThinLTO); |
563 | } else if (Args.hasArg(options::OPT_mcpu_EQ)) { |
564 | CmdArgs.push_back(Elt: Args.MakeArgString( |
565 | Str: "-plugin-opt=mcpu="+ |
566 | getProcessorFromTargetID(getToolChain().getTriple(), |
567 | Args.getLastArgValue(options::Id: OPT_mcpu_EQ)))); |
568 | } |
569 | addLinkerCompressDebugSectionsOption(TC: getToolChain(), Args, CmdArgs); |
570 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
571 | Args.AddAllArgs(Output&: CmdArgs, options::Id0: OPT_L); |
572 | AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA); |
573 | |
574 | // Always pass the target-id features to the LTO job. |
575 | std::vector<StringRef> Features; |
576 | getAMDGPUTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args, |
577 | Features); |
578 | if (!Features.empty()) { |
579 | CmdArgs.push_back( |
580 | Elt: Args.MakeArgString(Str: "-plugin-opt=-mattr="+ llvm::join(R&: Features, Separator: ","))); |
581 | } |
582 | |
583 | if (Args.hasArg(options::OPT_stdlib)) |
584 | CmdArgs.append(IL: {"-lc", "-lm"}); |
585 | if (Args.hasArg(options::OPT_startfiles)) { |
586 | std::optional<std::string> IncludePath = getToolChain().getStdlibPath(); |
587 | if (!IncludePath) |
588 | IncludePath = "/lib"; |
589 | SmallString<128> P(*IncludePath); |
590 | llvm::sys::path::append(path&: P, a: "crt1.o"); |
591 | CmdArgs.push_back(Elt: Args.MakeArgString(Str: P)); |
592 | } |
593 | |
594 | CmdArgs.push_back(Elt: "-o"); |
595 | CmdArgs.push_back(Elt: Output.getFilename()); |
596 | C.addCommand(C: std::make_unique<Command>( |
597 | args: JA, args: *this, args: ResponseFileSupport::AtFileCurCP(), args: Args.MakeArgString(Str: Linker), |
598 | args&: CmdArgs, args: Inputs, args: Output)); |
599 | } |
600 | |
601 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
602 | const llvm::Triple &Triple, |
603 | const llvm::opt::ArgList &Args, |
604 | std::vector<StringRef> &Features) { |
605 | // Add target ID features to -target-feature options. No diagnostics should |
606 | // be emitted here since invalid target ID is diagnosed at other places. |
607 | StringRef TargetID; |
608 | if (Args.hasArg(options::OPT_mcpu_EQ)) |
609 | TargetID = Args.getLastArgValue(options::Id: OPT_mcpu_EQ); |
610 | else if (Args.hasArg(options::OPT_march_EQ)) |
611 | TargetID = Args.getLastArgValue(options::Id: OPT_march_EQ); |
612 | if (!TargetID.empty()) { |
613 | llvm::StringMap<bool> FeatureMap; |
614 | auto OptionalGpuArch = parseTargetID(T: Triple, OffloadArch: TargetID, FeatureMap: &FeatureMap); |
615 | if (OptionalGpuArch) { |
616 | StringRef GpuArch = *OptionalGpuArch; |
617 | // Iterate through all possible target ID features for the given GPU. |
618 | // If it is mapped to true, add +feature. |
619 | // If it is mapped to false, add -feature. |
620 | // If it is not in the map (default), do not add it |
621 | for (auto &&Feature : getAllPossibleTargetIDFeatures(T: Triple, Processor: GpuArch)) { |
622 | auto Pos = FeatureMap.find(Key: Feature); |
623 | if (Pos == FeatureMap.end()) |
624 | continue; |
625 | Features.push_back(x: Args.MakeArgStringRef( |
626 | Str: (Twine(Pos->second ? "+": "-") + Feature).str())); |
627 | } |
628 | } |
629 | } |
630 | |
631 | if (Args.hasFlag(options::OPT_mwavefrontsize64, |
632 | options::OPT_mno_wavefrontsize64, false)) |
633 | Features.push_back(x: "+wavefrontsize64"); |
634 | |
635 | if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op, |
636 | options::OPT_mno_amdgpu_precise_memory_op, false)) |
637 | Features.push_back(x: "+precise-memory"); |
638 | |
639 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
640 | options::OPT_m_amdgpu_Features_Group); |
641 | } |
642 | |
643 | /// AMDGPU Toolchain |
644 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
645 | const ArgList &Args) |
646 | : Generic_ELF(D, Triple, Args), |
647 | OptionsDefault( |
648 | {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) { |
649 | // Check code object version options. Emit warnings for legacy options |
650 | // and errors for the last invalid code object version options. |
651 | // It is done here to avoid repeated warning or error messages for |
652 | // each tool invocation. |
653 | checkAMDGPUCodeObjectVersion(D, Args); |
654 | } |
655 | |
656 | Tool *AMDGPUToolChain::buildLinker() const { |
657 | return new tools::amdgpu::Linker(*this); |
658 | } |
659 | |
660 | DerivedArgList * |
661 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
662 | Action::OffloadKind DeviceOffloadKind) const { |
663 | |
664 | DerivedArgList *DAL = |
665 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
666 | |
667 | const OptTable &Opts = getDriver().getOpts(); |
668 | |
669 | if (!DAL) |
670 | DAL = new DerivedArgList(Args.getBaseArgs()); |
671 | |
672 | for (Arg *A : Args) |
673 | DAL->append(A); |
674 | |
675 | // Replace -mcpu=native with detected GPU. |
676 | Arg *LastMCPUArg = DAL->getLastArg(options::OPT_mcpu_EQ); |
677 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native") { |
678 | DAL->eraseArg(options::Id: OPT_mcpu_EQ); |
679 | auto GPUsOrErr = getSystemGPUArchs(Args); |
680 | if (!GPUsOrErr) { |
681 | getDriver().Diag(diag::DiagID: err_drv_undetermined_gpu_arch) |
682 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
683 | << llvm::toString(E: GPUsOrErr.takeError()) << "-mcpu"; |
684 | } else { |
685 | auto &GPUs = *GPUsOrErr; |
686 | if (GPUs.size() > 1) { |
687 | getDriver().Diag(diag::DiagID: warn_drv_multi_gpu_arch) |
688 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
689 | << llvm::join(R&: GPUs, Separator: ", ") << "-mcpu"; |
690 | } |
691 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_mcpu_EQ), |
692 | Value: Args.MakeArgString(Str: GPUs.front())); |
693 | } |
694 | } |
695 | |
696 | checkTargetID(DriverArgs: *DAL); |
697 | |
698 | if (Args.getLastArgValue(options::Id: OPT_x) != "cl") |
699 | return DAL; |
700 | |
701 | // Phase 1 (.cl -> .bc) |
702 | if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { |
703 | DAL->AddFlagArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: getTriple().isArch64Bit() |
704 | ? options::OPT_m64 |
705 | : options::OPT_m32)); |
706 | |
707 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
708 | // as they defined that way in Options.td |
709 | if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, |
710 | options::OPT_Ofast)) |
711 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), |
712 | getOptionDefault(options::OPT_O)); |
713 | } |
714 | |
715 | return DAL; |
716 | } |
717 | |
718 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
719 | llvm::AMDGPU::GPUKind Kind) { |
720 | |
721 | // Assume nothing without a specific target. |
722 | if (Kind == llvm::AMDGPU::GK_NONE) |
723 | return false; |
724 | |
725 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
726 | |
727 | // Default to enabling f32 denormals by default on subtargets where fma is |
728 | // fast with denormals |
729 | const bool BothDenormAndFMAFast = |
730 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
731 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
732 | return !BothDenormAndFMAFast; |
733 | } |
734 | |
735 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
736 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
737 | const llvm::fltSemantics *FPType) const { |
738 | // Denormals should always be enabled for f16 and f64. |
739 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
740 | return llvm::DenormalMode::getIEEE(); |
741 | |
742 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
743 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
744 | auto Arch = getProcessorFromTargetID(T: getTriple(), OffloadArch: JA.getOffloadingArch()); |
745 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: Arch); |
746 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
747 | DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
748 | options::OPT_fno_gpu_flush_denormals_to_zero, |
749 | getDefaultDenormsAreZeroForTarget(Kind))) |
750 | return llvm::DenormalMode::getPreserveSign(); |
751 | |
752 | return llvm::DenormalMode::getIEEE(); |
753 | } |
754 | |
755 | const StringRef GpuArch = getGPUArch(DriverArgs); |
756 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
757 | |
758 | // TODO: There are way too many flags that change this. Do we need to check |
759 | // them all? |
760 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
761 | getDefaultDenormsAreZeroForTarget(Kind); |
762 | |
763 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
764 | // also implicit treated as zero (DAZ). |
765 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
766 | llvm::DenormalMode::getIEEE(); |
767 | } |
768 | |
769 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
770 | llvm::AMDGPU::GPUKind Kind) { |
771 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
772 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
773 | |
774 | return !HasWave32 || DriverArgs.hasFlag( |
775 | options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); |
776 | } |
777 | |
778 | |
779 | /// ROCM Toolchain |
780 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
781 | const ArgList &Args) |
782 | : AMDGPUToolChain(D, Triple, Args) { |
783 | RocmInstallation->detectDeviceLibrary(); |
784 | } |
785 | |
786 | void AMDGPUToolChain::addClangTargetOptions( |
787 | const llvm::opt::ArgList &DriverArgs, |
788 | llvm::opt::ArgStringList &CC1Args, |
789 | Action::OffloadKind DeviceOffloadingKind) const { |
790 | // Default to "hidden" visibility, as object level linking will not be |
791 | // supported for the foreseeable future. |
792 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
793 | options::OPT_fvisibility_ms_compat)) { |
794 | CC1Args.push_back(Elt: "-fvisibility=hidden"); |
795 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs"); |
796 | } |
797 | } |
798 | |
799 | void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
800 | // AMDGPU does not support atomic lib call. Treat atomic alignment |
801 | // warnings as errors. |
802 | CC1Args.push_back(Elt: "-Werror=atomic-alignment"); |
803 | } |
804 | |
805 | StringRef |
806 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
807 | return getProcessorFromTargetID( |
808 | getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); |
809 | } |
810 | |
811 | AMDGPUToolChain::ParsedTargetIDType |
812 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
813 | StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); |
814 | if (TargetID.empty()) |
815 | return {.OptionalTargetID: std::nullopt, .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
816 | |
817 | llvm::StringMap<bool> FeatureMap; |
818 | auto OptionalGpuArch = parseTargetID(T: getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
819 | if (!OptionalGpuArch) |
820 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
821 | |
822 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: OptionalGpuArch->str(), .OptionalFeatures: FeatureMap}; |
823 | } |
824 | |
825 | void AMDGPUToolChain::checkTargetID( |
826 | const llvm::opt::ArgList &DriverArgs) const { |
827 | auto PTID = getParsedTargetID(DriverArgs); |
828 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
829 | getDriver().Diag(clang::diag::err_drv_bad_target_id) |
830 | << *PTID.OptionalTargetID; |
831 | } |
832 | } |
833 | |
834 | Expected<SmallVector<std::string>> |
835 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
836 | // Detect AMD GPUs availible on the system. |
837 | std::string Program; |
838 | if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) |
839 | Program = A->getValue(); |
840 | else |
841 | Program = GetProgramPath(Name: "amdgpu-arch"); |
842 | |
843 | auto StdoutOrErr = executeToolChainProgram(Executable: Program); |
844 | if (!StdoutOrErr) |
845 | return StdoutOrErr.takeError(); |
846 | |
847 | SmallVector<std::string, 1> GPUArchs; |
848 | for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n")) |
849 | if (!Arch.empty()) |
850 | GPUArchs.push_back(Elt: Arch.str()); |
851 | |
852 | if (GPUArchs.empty()) |
853 | return llvm::createStringError(EC: std::error_code(), |
854 | S: "No AMD GPU detected in the system"); |
855 | |
856 | return std::move(GPUArchs); |
857 | } |
858 | |
859 | void ROCMToolChain::addClangTargetOptions( |
860 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
861 | Action::OffloadKind DeviceOffloadingKind) const { |
862 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
863 | DeviceOffloadingKind); |
864 | |
865 | // For the OpenCL case where there is no offload target, accept -nostdlib to |
866 | // disable bitcode linking. |
867 | if (DeviceOffloadingKind == Action::OFK_None && |
868 | DriverArgs.hasArg(options::OPT_nostdlib)) |
869 | return; |
870 | |
871 | if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, |
872 | true)) |
873 | return; |
874 | |
875 | // Get the device name and canonicalize it |
876 | const StringRef GpuArch = getGPUArch(DriverArgs); |
877 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
878 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
879 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
880 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
881 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
882 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
883 | ABIVer)) |
884 | return; |
885 | |
886 | bool Wave64 = isWave64(DriverArgs, Kind); |
887 | // TODO: There are way too many flags that change this. Do we need to check |
888 | // them all? |
889 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
890 | getDefaultDenormsAreZeroForTarget(Kind); |
891 | bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); |
892 | |
893 | bool UnsafeMathOpt = |
894 | DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); |
895 | bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); |
896 | bool CorrectSqrt = |
897 | DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
898 | |
899 | // GPU Sanitizer currently only supports ASan and is enabled through host |
900 | // ASan. |
901 | bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, |
902 | options::OPT_fno_gpu_sanitize, true) && |
903 | getSanitizerArgs(DriverArgs).needsAsanRt(); |
904 | |
905 | // Add the OpenCL specific bitcode library. |
906 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
907 | BCLibs.emplace_back(Args: RocmInstallation->getOpenCLPath().str()); |
908 | |
909 | // Add the generic set of libraries. |
910 | BCLibs.append(RHS: RocmInstallation->getCommonBitcodeLibs( |
911 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
912 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP: false)); |
913 | |
914 | for (auto [BCFile, Internalize] : BCLibs) { |
915 | if (Internalize) |
916 | CC1Args.push_back(Elt: "-mlink-builtin-bitcode"); |
917 | else |
918 | CC1Args.push_back(Elt: "-mlink-bitcode-file"); |
919 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile)); |
920 | } |
921 | } |
922 | |
923 | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
924 | StringRef GPUArch, StringRef LibDeviceFile, |
925 | DeviceLibABIVersion ABIVer) const { |
926 | if (!hasDeviceLibrary()) { |
927 | D.Diag(diag::err_drv_no_rocm_device_lib) << 0; |
928 | return false; |
929 | } |
930 | if (LibDeviceFile.empty()) { |
931 | D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
932 | return false; |
933 | } |
934 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
935 | // Starting from COV6, we will report minimum ROCm version requirement in |
936 | // the error message. |
937 | if (ABIVer.getAsCodeObjectVersion() < 6) |
938 | D.Diag(diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString() << 0; |
939 | else |
940 | D.Diag(diag::err_drv_no_rocm_device_lib) |
941 | << 2 << ABIVer.toString() << 1 << "6.3"; |
942 | return false; |
943 | } |
944 | return true; |
945 | } |
946 | |
947 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
948 | RocmInstallationDetector::getCommonBitcodeLibs( |
949 | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
950 | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
951 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, |
952 | bool isOpenMP) const { |
953 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; |
954 | |
955 | auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, |
956 | bool Internalize = true) { |
957 | BCLib.ShouldInternalize = Internalize; |
958 | BCLibs.emplace_back(Args&: BCLib); |
959 | }; |
960 | auto AddSanBCLibs = [&]() { |
961 | if (GPUSan) |
962 | AddBCLib(getAsanRTLPath(), false); |
963 | }; |
964 | |
965 | AddSanBCLibs(); |
966 | AddBCLib(getOCMLPath()); |
967 | if (!isOpenMP) |
968 | AddBCLib(getOCKLPath()); |
969 | else if (GPUSan && isOpenMP) |
970 | AddBCLib(getOCKLPath(), false); |
971 | AddBCLib(getDenormalsAreZeroPath(Enabled: DAZ)); |
972 | AddBCLib(getUnsafeMathPath(Enabled: UnsafeMathOpt || FastRelaxedMath)); |
973 | AddBCLib(getFiniteOnlyPath(Enabled: FiniteOnly || FastRelaxedMath)); |
974 | AddBCLib(getCorrectlyRoundedSqrtPath(Enabled: CorrectSqrt)); |
975 | AddBCLib(getWavefrontSize64Path(Enabled: Wave64)); |
976 | AddBCLib(LibDeviceFile); |
977 | auto ABIVerPath = getABIVersionPath(ABIVer); |
978 | if (!ABIVerPath.empty()) |
979 | AddBCLib(ABIVerPath); |
980 | |
981 | return BCLibs; |
982 | } |
983 | |
984 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
985 | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
986 | const std::string &GPUArch, |
987 | bool isOpenMP) const { |
988 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GPUArch); |
989 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
990 | |
991 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
992 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
993 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
994 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
995 | ABIVer)) |
996 | return {}; |
997 | |
998 | // If --hip-device-lib is not set, add the default bitcode libraries. |
999 | // TODO: There are way too many flags that change this. Do we need to check |
1000 | // them all? |
1001 | bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
1002 | options::OPT_fno_gpu_flush_denormals_to_zero, |
1003 | getDefaultDenormsAreZeroForTarget(Kind)); |
1004 | bool FiniteOnly = DriverArgs.hasFlag( |
1005 | options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); |
1006 | bool UnsafeMathOpt = |
1007 | DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, |
1008 | options::OPT_fno_unsafe_math_optimizations, false); |
1009 | bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, |
1010 | options::OPT_fno_fast_math, false); |
1011 | bool CorrectSqrt = DriverArgs.hasFlag( |
1012 | options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
1013 | options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); |
1014 | bool Wave64 = isWave64(DriverArgs, Kind); |
1015 | |
1016 | // GPU Sanitizer currently only supports ASan and is enabled through host |
1017 | // ASan. |
1018 | bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, |
1019 | options::OPT_fno_gpu_sanitize, true) && |
1020 | getSanitizerArgs(DriverArgs).needsAsanRt(); |
1021 | |
1022 | return RocmInstallation->getCommonBitcodeLibs( |
1023 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
1024 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); |
1025 | } |
1026 | |
1027 | bool AMDGPUToolChain::shouldSkipSanitizeOption( |
1028 | const ToolChain &TC, const llvm::opt::ArgList &DriverArgs, |
1029 | StringRef TargetID, const llvm::opt::Arg *A) const { |
1030 | // For actions without targetID, do nothing. |
1031 | if (TargetID.empty()) |
1032 | return false; |
1033 | Option O = A->getOption(); |
1034 | |
1035 | if (!O.matches(options::OPT_fsanitize_EQ)) |
1036 | return false; |
1037 | |
1038 | if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize, |
1039 | options::OPT_fno_gpu_sanitize, true)) |
1040 | return true; |
1041 | |
1042 | auto &Diags = TC.getDriver().getDiags(); |
1043 | |
1044 | // For simplicity, we only allow -fsanitize=address |
1045 | SanitizerMask K = parseSanitizerValue(Value: A->getValue(), /*AllowGroups=*/false); |
1046 | if (K != SanitizerKind::Address) |
1047 | return true; |
1048 | |
1049 | llvm::StringMap<bool> FeatureMap; |
1050 | auto OptionalGpuArch = parseTargetID(T: TC.getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
1051 | |
1052 | assert(OptionalGpuArch && "Invalid Target ID"); |
1053 | (void)OptionalGpuArch; |
1054 | auto Loc = FeatureMap.find(Key: "xnack"); |
1055 | if (Loc == FeatureMap.end() || !Loc->second) { |
1056 | Diags.Report( |
1057 | clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature) |
1058 | << A->getAsString(DriverArgs) << TargetID << "xnack+"; |
1059 | return true; |
1060 | } |
1061 | return false; |
1062 | } |
1063 |
Definitions
- scanLibDevicePath
- parseHIPVersionFile
- getInstallationPathCandidates
- RocmInstallationDetector
- detectDeviceLibrary
- detectHIPRuntime
- AddHIPIncludeArgs
- ConstructJob
- getAMDGPUTargetFeatures
- AMDGPUToolChain
- buildLinker
- TranslateArgs
- getDefaultDenormsAreZeroForTarget
- getDefaultDenormalModeForType
- isWave64
- ROCMToolChain
- addClangTargetOptions
- addClangWarningOptions
- getGPUArch
- getParsedTargetID
- checkTargetID
- getSystemGPUArchs
- addClangTargetOptions
- checkCommonBitcodeLibs
- getCommonBitcodeLibs
- getCommonDeviceLibNames
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more