1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPU.h" |
10 | #include "CommonArgs.h" |
11 | #include "clang/Basic/TargetID.h" |
12 | #include "clang/Config/config.h" |
13 | #include "clang/Driver/Compilation.h" |
14 | #include "clang/Driver/DriverDiagnostic.h" |
15 | #include "clang/Driver/InputInfo.h" |
16 | #include "clang/Driver/Options.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | #include "llvm/Option/ArgList.h" |
19 | #include "llvm/Support/Error.h" |
20 | #include "llvm/Support/LineIterator.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/Support/Process.h" |
23 | #include "llvm/Support/VirtualFileSystem.h" |
24 | #include "llvm/TargetParser/Host.h" |
25 | #include <optional> |
26 | #include <system_error> |
27 | |
28 | using namespace clang::driver; |
29 | using namespace clang::driver::tools; |
30 | using namespace clang::driver::toolchains; |
31 | using namespace clang; |
32 | using namespace llvm::opt; |
33 | |
34 | // Look for sub-directory starts with PackageName under ROCm candidate path. |
35 | // If there is one and only one matching sub-directory found, append the |
36 | // sub-directory to Path. If there is no matching sub-directory or there are |
37 | // more than one matching sub-directories, diagnose them. Returns the full |
38 | // path of the package if there is only one matching sub-directory, otherwise |
39 | // returns an empty string. |
40 | llvm::SmallString<0> |
41 | RocmInstallationDetector::findSPACKPackage(const Candidate &Cand, |
42 | StringRef PackageName) { |
43 | if (!Cand.isSPACK()) |
44 | return {}; |
45 | std::error_code EC; |
46 | std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str(); |
47 | llvm::SmallVector<llvm::SmallString<0>> SubDirs; |
48 | for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Dir: Cand.Path, EC), |
49 | FileEnd; |
50 | File != FileEnd && !EC; File.increment(EC)) { |
51 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
52 | if (FileName.starts_with(Prefix)) { |
53 | SubDirs.push_back(Elt: FileName); |
54 | if (SubDirs.size() > 1) |
55 | break; |
56 | } |
57 | } |
58 | if (SubDirs.size() == 1) { |
59 | auto PackagePath = Cand.Path; |
60 | llvm::sys::path::append(path&: PackagePath, a: SubDirs[0]); |
61 | return PackagePath; |
62 | } |
63 | if (SubDirs.size() == 0 && Verbose) { |
64 | llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path |
65 | << '\n'; |
66 | return {}; |
67 | } |
68 | |
69 | if (SubDirs.size() > 1 && Verbose) { |
70 | llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path |
71 | << " due to multiple installations for the same version\n" ; |
72 | } |
73 | return {}; |
74 | } |
75 | |
76 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
77 | assert(!Path.empty()); |
78 | |
79 | const StringRef Suffix(".bc" ); |
80 | const StringRef Suffix2(".amdgcn.bc" ); |
81 | |
82 | std::error_code EC; |
83 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Dir: Path, EC), LE; |
84 | !EC && LI != LE; LI = LI.increment(EC)) { |
85 | StringRef FilePath = LI->path(); |
86 | StringRef FileName = llvm::sys::path::filename(path: FilePath); |
87 | if (!FileName.ends_with(Suffix)) |
88 | continue; |
89 | |
90 | StringRef BaseName; |
91 | if (FileName.ends_with(Suffix: Suffix2)) |
92 | BaseName = FileName.drop_back(N: Suffix2.size()); |
93 | else if (FileName.ends_with(Suffix)) |
94 | BaseName = FileName.drop_back(N: Suffix.size()); |
95 | |
96 | const StringRef ABIVersionPrefix = "oclc_abi_version_" ; |
97 | if (BaseName == "ocml" ) { |
98 | OCML = FilePath; |
99 | } else if (BaseName == "ockl" ) { |
100 | OCKL = FilePath; |
101 | } else if (BaseName == "opencl" ) { |
102 | OpenCL = FilePath; |
103 | } else if (BaseName == "hip" ) { |
104 | HIP = FilePath; |
105 | } else if (BaseName == "asanrtl" ) { |
106 | AsanRTL = FilePath; |
107 | } else if (BaseName == "oclc_finite_only_off" ) { |
108 | FiniteOnly.Off = FilePath; |
109 | } else if (BaseName == "oclc_finite_only_on" ) { |
110 | FiniteOnly.On = FilePath; |
111 | } else if (BaseName == "oclc_daz_opt_on" ) { |
112 | DenormalsAreZero.On = FilePath; |
113 | } else if (BaseName == "oclc_daz_opt_off" ) { |
114 | DenormalsAreZero.Off = FilePath; |
115 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on" ) { |
116 | CorrectlyRoundedSqrt.On = FilePath; |
117 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off" ) { |
118 | CorrectlyRoundedSqrt.Off = FilePath; |
119 | } else if (BaseName == "oclc_unsafe_math_on" ) { |
120 | UnsafeMath.On = FilePath; |
121 | } else if (BaseName == "oclc_unsafe_math_off" ) { |
122 | UnsafeMath.Off = FilePath; |
123 | } else if (BaseName == "oclc_wavefrontsize64_on" ) { |
124 | WavefrontSize64.On = FilePath; |
125 | } else if (BaseName == "oclc_wavefrontsize64_off" ) { |
126 | WavefrontSize64.Off = FilePath; |
127 | } else if (BaseName.starts_with(Prefix: ABIVersionPrefix)) { |
128 | unsigned ABIVersionNumber; |
129 | if (BaseName.drop_front(N: ABIVersionPrefix.size()) |
130 | .getAsInteger(/*Redex=*/Radix: 0, Result&: ABIVersionNumber)) |
131 | continue; |
132 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
133 | } else { |
134 | // Process all bitcode filenames that look like |
135 | // ocl_isa_version_XXX.amdgcn.bc |
136 | const StringRef DeviceLibPrefix = "oclc_isa_version_" ; |
137 | if (!BaseName.starts_with(Prefix: DeviceLibPrefix)) |
138 | continue; |
139 | |
140 | StringRef IsaVersionNumber = |
141 | BaseName.drop_front(N: DeviceLibPrefix.size()); |
142 | |
143 | llvm::Twine GfxName = Twine("gfx" ) + IsaVersionNumber; |
144 | SmallString<8> Tmp; |
145 | LibDeviceMap.insert( |
146 | KV: std::make_pair(x: GfxName.toStringRef(Out&: Tmp), y: FilePath.str())); |
147 | } |
148 | } |
149 | } |
150 | |
151 | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
152 | // the parsing fails. |
153 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
154 | SmallVector<StringRef, 4> VersionParts; |
155 | V.split(A&: VersionParts, Separator: '\n'); |
156 | unsigned Major = ~0U; |
157 | unsigned Minor = ~0U; |
158 | for (auto Part : VersionParts) { |
159 | auto Splits = Part.rtrim().split(Separator: '='); |
160 | if (Splits.first == "HIP_VERSION_MAJOR" ) { |
161 | if (Splits.second.getAsInteger(Radix: 0, Result&: Major)) |
162 | return true; |
163 | } else if (Splits.first == "HIP_VERSION_MINOR" ) { |
164 | if (Splits.second.getAsInteger(Radix: 0, Result&: Minor)) |
165 | return true; |
166 | } else if (Splits.first == "HIP_VERSION_PATCH" ) |
167 | VersionPatch = Splits.second.str(); |
168 | } |
169 | if (Major == ~0U || Minor == ~0U) |
170 | return true; |
171 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
172 | DetectedVersion = |
173 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
174 | return false; |
175 | } |
176 | |
177 | /// \returns a list of candidate directories for ROCm installation, which is |
178 | /// cached and populated only once. |
179 | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
180 | RocmInstallationDetector::getInstallationPathCandidates() { |
181 | |
182 | // Return the cached candidate list if it has already been populated. |
183 | if (!ROCmSearchDirs.empty()) |
184 | return ROCmSearchDirs; |
185 | |
186 | auto DoPrintROCmSearchDirs = [&]() { |
187 | if (PrintROCmSearchDirs) |
188 | for (auto Cand : ROCmSearchDirs) { |
189 | llvm::errs() << "ROCm installation search path" ; |
190 | if (Cand.isSPACK()) |
191 | llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")" ; |
192 | llvm::errs() << ": " << Cand.Path << '\n'; |
193 | } |
194 | }; |
195 | |
196 | // For candidate specified by --rocm-path we do not do strict check, i.e., |
197 | // checking existence of HIP version file and device library files. |
198 | if (!RocmPathArg.empty()) { |
199 | ROCmSearchDirs.emplace_back(Args: RocmPathArg.str()); |
200 | DoPrintROCmSearchDirs(); |
201 | return ROCmSearchDirs; |
202 | } else if (std::optional<std::string> RocmPathEnv = |
203 | llvm::sys::Process::GetEnv(name: "ROCM_PATH" )) { |
204 | if (!RocmPathEnv->empty()) { |
205 | ROCmSearchDirs.emplace_back(Args: std::move(*RocmPathEnv)); |
206 | DoPrintROCmSearchDirs(); |
207 | return ROCmSearchDirs; |
208 | } |
209 | } |
210 | |
211 | // Try to find relative to the compiler binary. |
212 | StringRef InstallDir = D.Dir; |
213 | |
214 | // Check both a normal Unix prefix position of the clang binary, as well as |
215 | // the Windows-esque layout the ROCm packages use with the host architecture |
216 | // subdirectory of bin. |
217 | auto DeduceROCmPath = [](StringRef ClangPath) { |
218 | // Strip off directory (usually bin) |
219 | StringRef ParentDir = llvm::sys::path::parent_path(path: ClangPath); |
220 | StringRef ParentName = llvm::sys::path::filename(path: ParentDir); |
221 | |
222 | // Some builds use bin/{host arch}, so go up again. |
223 | if (ParentName == "bin" ) { |
224 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
225 | ParentName = llvm::sys::path::filename(path: ParentDir); |
226 | } |
227 | |
228 | // Detect ROCm packages built with SPACK. |
229 | // clang is installed at |
230 | // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory. |
231 | // We only consider the parent directory of llvm-amdgpu package as ROCm |
232 | // installation candidate for SPACK. |
233 | if (ParentName.starts_with(Prefix: "llvm-amdgpu-" )) { |
234 | auto SPACKPostfix = |
235 | ParentName.drop_front(N: strlen(s: "llvm-amdgpu-" )).split(Separator: '-'); |
236 | auto SPACKReleaseStr = SPACKPostfix.first; |
237 | if (!SPACKReleaseStr.empty()) { |
238 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
239 | return Candidate(ParentDir.str(), /*StrictChecking=*/true, |
240 | SPACKReleaseStr); |
241 | } |
242 | } |
243 | |
244 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
245 | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
246 | if (ParentName == "llvm" || ParentName.starts_with(Prefix: "aomp" )) |
247 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
248 | |
249 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
250 | }; |
251 | |
252 | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
253 | // link of clang itself. |
254 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(InstallDir)); |
255 | |
256 | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
257 | // link of clang itself. |
258 | llvm::SmallString<256> RealClangPath; |
259 | llvm::sys::fs::real_path(path: D.getClangProgramPath(), output&: RealClangPath); |
260 | auto ParentPath = llvm::sys::path::parent_path(path: RealClangPath); |
261 | if (ParentPath != InstallDir) |
262 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(ParentPath)); |
263 | |
264 | // Device library may be installed in clang or resource directory. |
265 | auto ClangRoot = llvm::sys::path::parent_path(path: InstallDir); |
266 | auto RealClangRoot = llvm::sys::path::parent_path(path: ParentPath); |
267 | ROCmSearchDirs.emplace_back(Args: ClangRoot.str(), /*StrictChecking=*/Args: true); |
268 | if (RealClangRoot != ClangRoot) |
269 | ROCmSearchDirs.emplace_back(Args: RealClangRoot.str(), /*StrictChecking=*/Args: true); |
270 | ROCmSearchDirs.emplace_back(Args: D.ResourceDir, |
271 | /*StrictChecking=*/Args: true); |
272 | |
273 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/rocm" , |
274 | /*StrictChecking=*/Args: true); |
275 | |
276 | // Find the latest /opt/rocm-{release} directory. |
277 | std::error_code EC; |
278 | std::string LatestROCm; |
279 | llvm::VersionTuple LatestVer; |
280 | // Get ROCm version from ROCm directory name. |
281 | auto GetROCmVersion = [](StringRef DirName) { |
282 | llvm::VersionTuple V; |
283 | std::string VerStr = DirName.drop_front(N: strlen(s: "rocm-" )).str(); |
284 | // The ROCm directory name follows the format of |
285 | // rocm-{major}.{minor}.{subMinor}[-{build}] |
286 | std::replace(first: VerStr.begin(), last: VerStr.end(), old_value: '-', new_value: '.'); |
287 | V.tryParse(string: VerStr); |
288 | return V; |
289 | }; |
290 | for (llvm::vfs::directory_iterator |
291 | File = D.getVFS().dir_begin(Dir: D.SysRoot + "/opt" , EC), |
292 | FileEnd; |
293 | File != FileEnd && !EC; File.increment(EC)) { |
294 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
295 | if (!FileName.starts_with(Prefix: "rocm-" )) |
296 | continue; |
297 | if (LatestROCm.empty()) { |
298 | LatestROCm = FileName.str(); |
299 | LatestVer = GetROCmVersion(LatestROCm); |
300 | continue; |
301 | } |
302 | auto Ver = GetROCmVersion(FileName); |
303 | if (LatestVer < Ver) { |
304 | LatestROCm = FileName.str(); |
305 | LatestVer = Ver; |
306 | } |
307 | } |
308 | if (!LatestROCm.empty()) |
309 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/" + LatestROCm, |
310 | /*StrictChecking=*/Args: true); |
311 | |
312 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr/local" , |
313 | /*StrictChecking=*/Args: true); |
314 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr" , |
315 | /*StrictChecking=*/Args: true); |
316 | |
317 | DoPrintROCmSearchDirs(); |
318 | return ROCmSearchDirs; |
319 | } |
320 | |
321 | RocmInstallationDetector::RocmInstallationDetector( |
322 | const Driver &D, const llvm::Triple &HostTriple, |
323 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
324 | : D(D) { |
325 | Verbose = Args.hasArg(options::OPT_v); |
326 | RocmPathArg = Args.getLastArgValue(clang::driver::options::Id: OPT_rocm_path_EQ); |
327 | PrintROCmSearchDirs = |
328 | Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); |
329 | RocmDeviceLibPathArg = |
330 | Args.getAllArgValues(clang::driver::options::Id: OPT_rocm_device_lib_path_EQ); |
331 | HIPPathArg = Args.getLastArgValue(clang::driver::options::Id: OPT_hip_path_EQ); |
332 | HIPStdParPathArg = |
333 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_path_EQ); |
334 | HasHIPStdParLibrary = |
335 | !HIPStdParPathArg.empty() && D.getVFS().exists(Path: HIPStdParPathArg + |
336 | "/hipstdpar_lib.hpp" ); |
337 | HIPRocThrustPathArg = |
338 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_thrust_path_EQ); |
339 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
340 | D.getVFS().exists(Path: HIPRocThrustPathArg + "/thrust" ); |
341 | HIPRocPrimPathArg = |
342 | Args.getLastArgValue(clang::driver::options::Id: OPT_hipstdpar_prim_path_EQ); |
343 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
344 | D.getVFS().exists(Path: HIPRocPrimPathArg + "/rocprim" ); |
345 | |
346 | if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { |
347 | HIPVersionArg = A->getValue(); |
348 | unsigned Major = ~0U; |
349 | unsigned Minor = ~0U; |
350 | SmallVector<StringRef, 3> Parts; |
351 | HIPVersionArg.split(A&: Parts, Separator: '.'); |
352 | if (Parts.size()) |
353 | Parts[0].getAsInteger(Radix: 0, Result&: Major); |
354 | if (Parts.size() > 1) |
355 | Parts[1].getAsInteger(Radix: 0, Result&: Minor); |
356 | if (Parts.size() > 2) |
357 | VersionPatch = Parts[2].str(); |
358 | if (VersionPatch.empty()) |
359 | VersionPatch = "0" ; |
360 | if (Major != ~0U && Minor == ~0U) |
361 | Minor = 0; |
362 | if (Major == ~0U || Minor == ~0U) |
363 | D.Diag(diag::DiagID: err_drv_invalid_value) |
364 | << A->getAsString(Args) << HIPVersionArg; |
365 | |
366 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
367 | DetectedVersion = |
368 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
369 | } else { |
370 | VersionPatch = DefaultVersionPatch; |
371 | VersionMajorMinor = |
372 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
373 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
374 | Twine(DefaultVersionMinor) + "." + VersionPatch) |
375 | .str(); |
376 | } |
377 | |
378 | if (DetectHIPRuntime) |
379 | detectHIPRuntime(); |
380 | if (DetectDeviceLib) |
381 | detectDeviceLibrary(); |
382 | } |
383 | |
384 | void RocmInstallationDetector::detectDeviceLibrary() { |
385 | assert(LibDevicePath.empty()); |
386 | |
387 | if (!RocmDeviceLibPathArg.empty()) |
388 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
389 | else if (std::optional<std::string> LibPathEnv = |
390 | llvm::sys::Process::GetEnv(name: "HIP_DEVICE_LIB_PATH" )) |
391 | LibDevicePath = std::move(*LibPathEnv); |
392 | |
393 | auto &FS = D.getVFS(); |
394 | if (!LibDevicePath.empty()) { |
395 | // Maintain compatability with HIP flag/envvar pointing directly at the |
396 | // bitcode library directory. This points directly at the library path instead |
397 | // of the rocm root installation. |
398 | if (!FS.exists(Path: LibDevicePath)) |
399 | return; |
400 | |
401 | scanLibDevicePath(Path: LibDevicePath); |
402 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
403 | return; |
404 | } |
405 | |
406 | // Check device library exists at the given path. |
407 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
408 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
409 | if (CheckLibDevice && !FS.exists(Path)) |
410 | return false; |
411 | |
412 | scanLibDevicePath(Path); |
413 | |
414 | if (!NoBuiltinLibs) { |
415 | // Check that the required non-target libraries are all available. |
416 | if (!allGenericLibsValid()) |
417 | return false; |
418 | |
419 | // Check that we have found at least one libdevice that we can link in |
420 | // if -nobuiltinlib hasn't been specified. |
421 | if (LibDeviceMap.empty()) |
422 | return false; |
423 | } |
424 | return true; |
425 | }; |
426 | |
427 | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
428 | LibDevicePath = D.ResourceDir; |
429 | llvm::sys::path::append(path&: LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
430 | b: "amdgcn" , c: "bitcode" ); |
431 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
432 | if (HasDeviceLibrary) |
433 | return; |
434 | |
435 | // Find device libraries in a legacy ROCm directory structure |
436 | // ${ROCM_ROOT}/amdgcn/bitcode/* |
437 | auto &ROCmDirs = getInstallationPathCandidates(); |
438 | for (const auto &Candidate : ROCmDirs) { |
439 | LibDevicePath = Candidate.Path; |
440 | llvm::sys::path::append(path&: LibDevicePath, a: "amdgcn" , b: "bitcode" ); |
441 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
442 | if (HasDeviceLibrary) |
443 | return; |
444 | } |
445 | } |
446 | |
447 | void RocmInstallationDetector::detectHIPRuntime() { |
448 | SmallVector<Candidate, 4> HIPSearchDirs; |
449 | if (!HIPPathArg.empty()) |
450 | HIPSearchDirs.emplace_back(Args: HIPPathArg.str()); |
451 | else if (std::optional<std::string> HIPPathEnv = |
452 | llvm::sys::Process::GetEnv(name: "HIP_PATH" )) { |
453 | if (!HIPPathEnv->empty()) |
454 | HIPSearchDirs.emplace_back(Args: std::move(*HIPPathEnv)); |
455 | } |
456 | if (HIPSearchDirs.empty()) |
457 | HIPSearchDirs.append(RHS: getInstallationPathCandidates()); |
458 | auto &FS = D.getVFS(); |
459 | |
460 | for (const auto &Candidate : HIPSearchDirs) { |
461 | InstallPath = Candidate.Path; |
462 | if (InstallPath.empty() || !FS.exists(Path: InstallPath)) |
463 | continue; |
464 | // HIP runtime built by SPACK is installed to |
465 | // <rocm_root>/hip-<rocm_release_string>-<hash> directory. |
466 | auto SPACKPath = findSPACKPackage(Cand: Candidate, PackageName: "hip" ); |
467 | InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath; |
468 | |
469 | BinPath = InstallPath; |
470 | llvm::sys::path::append(path&: BinPath, a: "bin" ); |
471 | IncludePath = InstallPath; |
472 | llvm::sys::path::append(path&: IncludePath, a: "include" ); |
473 | LibPath = InstallPath; |
474 | llvm::sys::path::append(path&: LibPath, a: "lib" ); |
475 | SharePath = InstallPath; |
476 | llvm::sys::path::append(path&: SharePath, a: "share" ); |
477 | |
478 | // Get parent of InstallPath and append "share" |
479 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(path: InstallPath); |
480 | llvm::sys::path::append(path&: ParentSharePath, a: "share" ); |
481 | |
482 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "" , |
483 | const Twine &c = "" , const Twine &d = "" ) { |
484 | SmallString<0> newpath = path; |
485 | llvm::sys::path::append(path&: newpath, a, b, c, d); |
486 | return newpath; |
487 | }; |
488 | // If HIP version file can be found and parsed, use HIP version from there. |
489 | std::vector<SmallString<0>> VersionFilePaths = { |
490 | Append(SharePath, "hip" , "version" ), |
491 | InstallPath != D.SysRoot + "/usr/local" |
492 | ? Append(ParentSharePath, "hip" , "version" ) |
493 | : SmallString<0>(), |
494 | Append(BinPath, ".hipVersion" )}; |
495 | |
496 | for (const auto &VersionFilePath : VersionFilePaths) { |
497 | if (VersionFilePath.empty()) |
498 | continue; |
499 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
500 | FS.getBufferForFile(Name: VersionFilePath); |
501 | if (!VersionFile) |
502 | continue; |
503 | if (HIPVersionArg.empty() && VersionFile) |
504 | if (parseHIPVersionFile(V: (*VersionFile)->getBuffer())) |
505 | continue; |
506 | |
507 | HasHIPRuntime = true; |
508 | return; |
509 | } |
510 | // Otherwise, if -rocm-path is specified (no strict checking), use the |
511 | // default HIP version or specified by --hip-version. |
512 | if (!Candidate.StrictChecking) { |
513 | HasHIPRuntime = true; |
514 | return; |
515 | } |
516 | } |
517 | HasHIPRuntime = false; |
518 | } |
519 | |
520 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
521 | if (hasHIPRuntime()) |
522 | OS << "Found HIP installation: " << InstallPath << ", version " |
523 | << DetectedVersion << '\n'; |
524 | } |
525 | |
526 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
527 | ArgStringList &CC1Args) const { |
528 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
529 | !DriverArgs.hasArg(options::OPT_nohipwrapperinc); |
530 | bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar); |
531 | |
532 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { |
533 | // HIP header includes standard library wrapper headers under clang |
534 | // cuda_wrappers directory. Since these wrapper headers include_next |
535 | // standard C++ headers, whereas libc++ headers include_next other clang |
536 | // headers. The include paths have to follow this order: |
537 | // - wrapper include path |
538 | // - standard C++ include path |
539 | // - other clang include path |
540 | // Since standard C++ and other clang include paths are added in other |
541 | // places after this function, here we only need to make sure wrapper |
542 | // include path is added. |
543 | // |
544 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
545 | // a workaround. |
546 | SmallString<128> P(D.ResourceDir); |
547 | if (UsesRuntimeWrapper) |
548 | llvm::sys::path::append(path&: P, a: "include" , b: "cuda_wrappers" ); |
549 | CC1Args.push_back(Elt: "-internal-isystem" ); |
550 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P)); |
551 | } |
552 | |
553 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
554 | StringRef Inc = getIncludePath(); |
555 | auto &FS = D.getVFS(); |
556 | |
557 | if (!hasHIPStdParLibrary()) |
558 | if (!HIPStdParPathArg.empty() || |
559 | !FS.exists(Path: Inc + "/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp" )) { |
560 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_lib); |
561 | return; |
562 | } |
563 | if (!HasRocThrustLibrary && !FS.exists(Path: Inc + "/thrust" )) { |
564 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_thrust_lib); |
565 | return; |
566 | } |
567 | if (!HasRocPrimLibrary && !FS.exists(Path: Inc + "/rocprim" )) { |
568 | D.Diag(diag::DiagID: err_drv_no_hipstdpar_prim_lib); |
569 | return; |
570 | } |
571 | const char *ThrustPath; |
572 | if (HasRocThrustLibrary) |
573 | ThrustPath = DriverArgs.MakeArgString(Str: HIPRocThrustPathArg); |
574 | else |
575 | ThrustPath = DriverArgs.MakeArgString(Str: Inc + "/thrust" ); |
576 | |
577 | const char *HIPStdParPath; |
578 | if (hasHIPStdParLibrary()) |
579 | HIPStdParPath = DriverArgs.MakeArgString(Str: HIPStdParPathArg); |
580 | else |
581 | HIPStdParPath = DriverArgs.MakeArgString(Str: StringRef(ThrustPath) + |
582 | "/system/hip/hipstdpar" ); |
583 | |
584 | const char *PrimPath; |
585 | if (HasRocPrimLibrary) |
586 | PrimPath = DriverArgs.MakeArgString(Str: HIPRocPrimPathArg); |
587 | else |
588 | PrimPath = DriverArgs.MakeArgString(Str: getIncludePath() + "/rocprim" ); |
589 | |
590 | CC1Args.append(IL: {"-idirafter" , ThrustPath, "-idirafter" , PrimPath, |
591 | "-idirafter" , HIPStdParPath, "-include" , |
592 | "hipstdpar_lib.hpp" }); |
593 | }; |
594 | |
595 | if (DriverArgs.hasArg(options::OPT_nogpuinc)) { |
596 | if (HasHipStdPar) |
597 | HandleHipStdPar(); |
598 | |
599 | return; |
600 | } |
601 | |
602 | if (!hasHIPRuntime()) { |
603 | D.Diag(diag::DiagID: err_drv_no_hip_runtime); |
604 | return; |
605 | } |
606 | |
607 | CC1Args.push_back(Elt: "-idirafter" ); |
608 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: getIncludePath())); |
609 | if (UsesRuntimeWrapper) |
610 | CC1Args.append(IL: {"-include" , "__clang_hip_runtime_wrapper.h" }); |
611 | if (HasHipStdPar) |
612 | HandleHipStdPar(); |
613 | } |
614 | |
615 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
616 | const InputInfo &Output, |
617 | const InputInfoList &Inputs, |
618 | const ArgList &Args, |
619 | const char *LinkingOutput) const { |
620 | std::string Linker = getToolChain().GetLinkerPath(); |
621 | ArgStringList CmdArgs; |
622 | CmdArgs.push_back(Elt: "--no-undefined" ); |
623 | CmdArgs.push_back(Elt: "-shared" ); |
624 | |
625 | addLinkerCompressDebugSectionsOption(TC: getToolChain(), Args, CmdArgs); |
626 | Args.AddAllArgs(Output&: CmdArgs, options::Id0: OPT_L); |
627 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
628 | AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA); |
629 | if (C.getDriver().isUsingLTO()) |
630 | addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Input: Inputs[0], |
631 | IsThinLTO: C.getDriver().getLTOMode() == LTOK_Thin); |
632 | else if (Args.hasArg(options::OPT_mcpu_EQ)) |
633 | CmdArgs.push_back(Elt: Args.MakeArgString( |
634 | Str: "-plugin-opt=mcpu=" + Args.getLastArgValue(options::Id: OPT_mcpu_EQ))); |
635 | CmdArgs.push_back(Elt: "-o" ); |
636 | CmdArgs.push_back(Elt: Output.getFilename()); |
637 | C.addCommand(C: std::make_unique<Command>( |
638 | args: JA, args: *this, args: ResponseFileSupport::AtFileCurCP(), args: Args.MakeArgString(Str: Linker), |
639 | args&: CmdArgs, args: Inputs, args: Output)); |
640 | } |
641 | |
642 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
643 | const llvm::Triple &Triple, |
644 | const llvm::opt::ArgList &Args, |
645 | std::vector<StringRef> &Features) { |
646 | // Add target ID features to -target-feature options. No diagnostics should |
647 | // be emitted here since invalid target ID is diagnosed at other places. |
648 | StringRef TargetID = Args.getLastArgValue(options::Id: OPT_mcpu_EQ); |
649 | if (!TargetID.empty()) { |
650 | llvm::StringMap<bool> FeatureMap; |
651 | auto OptionalGpuArch = parseTargetID(T: Triple, OffloadArch: TargetID, FeatureMap: &FeatureMap); |
652 | if (OptionalGpuArch) { |
653 | StringRef GpuArch = *OptionalGpuArch; |
654 | // Iterate through all possible target ID features for the given GPU. |
655 | // If it is mapped to true, add +feature. |
656 | // If it is mapped to false, add -feature. |
657 | // If it is not in the map (default), do not add it |
658 | for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { |
659 | auto Pos = FeatureMap.find(Feature); |
660 | if (Pos == FeatureMap.end()) |
661 | continue; |
662 | Features.push_back(Args.MakeArgStringRef( |
663 | (Twine(Pos->second ? "+" : "-" ) + Feature).str())); |
664 | } |
665 | } |
666 | } |
667 | |
668 | if (Args.hasFlag(options::OPT_mwavefrontsize64, |
669 | options::OPT_mno_wavefrontsize64, false)) |
670 | Features.push_back(x: "+wavefrontsize64" ); |
671 | |
672 | if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op, |
673 | options::OPT_mno_amdgpu_precise_memory_op, false)) |
674 | Features.push_back(x: "+precise-memory" ); |
675 | |
676 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
677 | options::OPT_m_amdgpu_Features_Group); |
678 | } |
679 | |
680 | /// AMDGPU Toolchain |
681 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
682 | const ArgList &Args) |
683 | : Generic_ELF(D, Triple, Args), |
684 | OptionsDefault( |
685 | {{options::OPT_O, "3" }, {options::OPT_cl_std_EQ, "CL1.2" }}) { |
686 | // Check code object version options. Emit warnings for legacy options |
687 | // and errors for the last invalid code object version options. |
688 | // It is done here to avoid repeated warning or error messages for |
689 | // each tool invocation. |
690 | checkAMDGPUCodeObjectVersion(D, Args); |
691 | } |
692 | |
693 | Tool *AMDGPUToolChain::buildLinker() const { |
694 | return new tools::amdgpu::Linker(*this); |
695 | } |
696 | |
697 | DerivedArgList * |
698 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
699 | Action::OffloadKind DeviceOffloadKind) const { |
700 | |
701 | DerivedArgList *DAL = |
702 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
703 | |
704 | const OptTable &Opts = getDriver().getOpts(); |
705 | |
706 | if (!DAL) |
707 | DAL = new DerivedArgList(Args.getBaseArgs()); |
708 | |
709 | for (Arg *A : Args) |
710 | DAL->append(A); |
711 | |
712 | // Replace -mcpu=native with detected GPU. |
713 | Arg *LastMCPUArg = DAL->getLastArg(options::OPT_mcpu_EQ); |
714 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native" ) { |
715 | DAL->eraseArg(options::Id: OPT_mcpu_EQ); |
716 | auto GPUsOrErr = getSystemGPUArchs(Args); |
717 | if (!GPUsOrErr) { |
718 | getDriver().Diag(diag::DiagID: err_drv_undetermined_gpu_arch) |
719 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
720 | << llvm::toString(E: GPUsOrErr.takeError()) << "-mcpu" ; |
721 | } else { |
722 | auto &GPUs = *GPUsOrErr; |
723 | if (GPUs.size() > 1) { |
724 | getDriver().Diag(diag::DiagID: warn_drv_multi_gpu_arch) |
725 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
726 | << llvm::join(R&: GPUs, Separator: ", " ) << "-mcpu" ; |
727 | } |
728 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_mcpu_EQ), |
729 | Value: Args.MakeArgString(Str: GPUs.front())); |
730 | } |
731 | } |
732 | |
733 | checkTargetID(DriverArgs: *DAL); |
734 | |
735 | if (!Args.getLastArgValue(options::Id: OPT_x).equals("cl" )) |
736 | return DAL; |
737 | |
738 | // Phase 1 (.cl -> .bc) |
739 | if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { |
740 | DAL->AddFlagArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: getTriple().isArch64Bit() |
741 | ? options::OPT_m64 |
742 | : options::OPT_m32)); |
743 | |
744 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
745 | // as they defined that way in Options.td |
746 | if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, |
747 | options::OPT_Ofast)) |
748 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_O), |
749 | Value: getOptionDefault(options::OptID: OPT_O)); |
750 | } |
751 | |
752 | return DAL; |
753 | } |
754 | |
755 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
756 | llvm::AMDGPU::GPUKind Kind) { |
757 | |
758 | // Assume nothing without a specific target. |
759 | if (Kind == llvm::AMDGPU::GK_NONE) |
760 | return false; |
761 | |
762 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
763 | |
764 | // Default to enabling f32 denormals by default on subtargets where fma is |
765 | // fast with denormals |
766 | const bool BothDenormAndFMAFast = |
767 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
768 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
769 | return !BothDenormAndFMAFast; |
770 | } |
771 | |
772 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
773 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
774 | const llvm::fltSemantics *FPType) const { |
775 | // Denormals should always be enabled for f16 and f64. |
776 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
777 | return llvm::DenormalMode::getIEEE(); |
778 | |
779 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
780 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
781 | auto Arch = getProcessorFromTargetID(T: getTriple(), OffloadArch: JA.getOffloadingArch()); |
782 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: Arch); |
783 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
784 | DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
785 | options::OPT_fno_gpu_flush_denormals_to_zero, |
786 | getDefaultDenormsAreZeroForTarget(Kind))) |
787 | return llvm::DenormalMode::getPreserveSign(); |
788 | |
789 | return llvm::DenormalMode::getIEEE(); |
790 | } |
791 | |
792 | const StringRef GpuArch = getGPUArch(DriverArgs); |
793 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
794 | |
795 | // TODO: There are way too many flags that change this. Do we need to check |
796 | // them all? |
797 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
798 | getDefaultDenormsAreZeroForTarget(Kind); |
799 | |
800 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
801 | // also implicit treated as zero (DAZ). |
802 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
803 | llvm::DenormalMode::getIEEE(); |
804 | } |
805 | |
806 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
807 | llvm::AMDGPU::GPUKind Kind) { |
808 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
809 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
810 | |
811 | return !HasWave32 || DriverArgs.hasFlag( |
812 | options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); |
813 | } |
814 | |
815 | |
816 | /// ROCM Toolchain |
817 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
818 | const ArgList &Args) |
819 | : AMDGPUToolChain(D, Triple, Args) { |
820 | RocmInstallation->detectDeviceLibrary(); |
821 | } |
822 | |
823 | void AMDGPUToolChain::addClangTargetOptions( |
824 | const llvm::opt::ArgList &DriverArgs, |
825 | llvm::opt::ArgStringList &CC1Args, |
826 | Action::OffloadKind DeviceOffloadingKind) const { |
827 | // Default to "hidden" visibility, as object level linking will not be |
828 | // supported for the foreseeable future. |
829 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
830 | options::OPT_fvisibility_ms_compat)) { |
831 | CC1Args.push_back(Elt: "-fvisibility=hidden" ); |
832 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
833 | } |
834 | } |
835 | |
836 | void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
837 | // AMDGPU does not support atomic lib call. Treat atomic alignment |
838 | // warnings as errors. |
839 | CC1Args.push_back(Elt: "-Werror=atomic-alignment" ); |
840 | } |
841 | |
842 | StringRef |
843 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
844 | return getProcessorFromTargetID( |
845 | getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); |
846 | } |
847 | |
848 | AMDGPUToolChain::ParsedTargetIDType |
849 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
850 | StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); |
851 | if (TargetID.empty()) |
852 | return {.OptionalTargetID: std::nullopt, .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
853 | |
854 | llvm::StringMap<bool> FeatureMap; |
855 | auto OptionalGpuArch = parseTargetID(T: getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
856 | if (!OptionalGpuArch) |
857 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
858 | |
859 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: OptionalGpuArch->str(), .OptionalFeatures: FeatureMap}; |
860 | } |
861 | |
862 | void AMDGPUToolChain::checkTargetID( |
863 | const llvm::opt::ArgList &DriverArgs) const { |
864 | auto PTID = getParsedTargetID(DriverArgs); |
865 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
866 | getDriver().Diag(clang::diag::err_drv_bad_target_id) |
867 | << *PTID.OptionalTargetID; |
868 | } |
869 | } |
870 | |
871 | Expected<SmallVector<std::string>> |
872 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
873 | // Detect AMD GPUs availible on the system. |
874 | std::string Program; |
875 | if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) |
876 | Program = A->getValue(); |
877 | else |
878 | Program = GetProgramPath(Name: "amdgpu-arch" ); |
879 | |
880 | auto StdoutOrErr = executeToolChainProgram(Executable: Program); |
881 | if (!StdoutOrErr) |
882 | return StdoutOrErr.takeError(); |
883 | |
884 | SmallVector<std::string, 1> GPUArchs; |
885 | for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n" )) |
886 | if (!Arch.empty()) |
887 | GPUArchs.push_back(Elt: Arch.str()); |
888 | |
889 | if (GPUArchs.empty()) |
890 | return llvm::createStringError(EC: std::error_code(), |
891 | Msg: "No AMD GPU detected in the system" ); |
892 | |
893 | return std::move(GPUArchs); |
894 | } |
895 | |
896 | void ROCMToolChain::addClangTargetOptions( |
897 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
898 | Action::OffloadKind DeviceOffloadingKind) const { |
899 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
900 | DeviceOffloadingKind); |
901 | |
902 | // For the OpenCL case where there is no offload target, accept -nostdlib to |
903 | // disable bitcode linking. |
904 | if (DeviceOffloadingKind == Action::OFK_None && |
905 | DriverArgs.hasArg(options::OPT_nostdlib)) |
906 | return; |
907 | |
908 | if (DriverArgs.hasArg(options::OPT_nogpulib)) |
909 | return; |
910 | |
911 | // Get the device name and canonicalize it |
912 | const StringRef GpuArch = getGPUArch(DriverArgs); |
913 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
914 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
915 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
916 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
917 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
918 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
919 | ABIVer)) |
920 | return; |
921 | |
922 | bool Wave64 = isWave64(DriverArgs, Kind); |
923 | |
924 | // TODO: There are way too many flags that change this. Do we need to check |
925 | // them all? |
926 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || |
927 | getDefaultDenormsAreZeroForTarget(Kind); |
928 | bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); |
929 | |
930 | bool UnsafeMathOpt = |
931 | DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); |
932 | bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); |
933 | bool CorrectSqrt = |
934 | DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
935 | |
936 | // Add the OpenCL specific bitcode library. |
937 | llvm::SmallVector<std::string, 12> BCLibs; |
938 | BCLibs.push_back(Elt: RocmInstallation->getOpenCLPath().str()); |
939 | |
940 | // Add the generic set of libraries. |
941 | BCLibs.append(RHS: RocmInstallation->getCommonBitcodeLibs( |
942 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
943 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP: false)); |
944 | |
945 | for (StringRef BCFile : BCLibs) { |
946 | CC1Args.push_back(Elt: "-mlink-builtin-bitcode" ); |
947 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile)); |
948 | } |
949 | } |
950 | |
951 | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
952 | StringRef GPUArch, StringRef LibDeviceFile, |
953 | DeviceLibABIVersion ABIVer) const { |
954 | if (!hasDeviceLibrary()) { |
955 | D.Diag(diag::err_drv_no_rocm_device_lib) << 0; |
956 | return false; |
957 | } |
958 | if (LibDeviceFile.empty()) { |
959 | D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
960 | return false; |
961 | } |
962 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
963 | D.Diag(diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString(); |
964 | return false; |
965 | } |
966 | return true; |
967 | } |
968 | |
969 | llvm::SmallVector<std::string, 12> |
970 | RocmInstallationDetector::getCommonBitcodeLibs( |
971 | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
972 | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
973 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool isOpenMP = false) const { |
974 | llvm::SmallVector<std::string, 12> BCLibs; |
975 | |
976 | auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(Elt: BCFile.str()); }; |
977 | |
978 | AddBCLib(getOCMLPath()); |
979 | if (!isOpenMP) |
980 | AddBCLib(getOCKLPath()); |
981 | AddBCLib(getDenormalsAreZeroPath(Enabled: DAZ)); |
982 | AddBCLib(getUnsafeMathPath(Enabled: UnsafeMathOpt || FastRelaxedMath)); |
983 | AddBCLib(getFiniteOnlyPath(Enabled: FiniteOnly || FastRelaxedMath)); |
984 | AddBCLib(getCorrectlyRoundedSqrtPath(Enabled: CorrectSqrt)); |
985 | AddBCLib(getWavefrontSize64Path(Enabled: Wave64)); |
986 | AddBCLib(LibDeviceFile); |
987 | auto ABIVerPath = getABIVersionPath(ABIVer); |
988 | if (!ABIVerPath.empty()) |
989 | AddBCLib(ABIVerPath); |
990 | |
991 | return BCLibs; |
992 | } |
993 | |
994 | llvm::SmallVector<std::string, 12> |
995 | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
996 | const std::string &GPUArch, |
997 | bool isOpenMP) const { |
998 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GPUArch); |
999 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
1000 | |
1001 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
1002 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
1003 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
1004 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
1005 | ABIVer)) |
1006 | return {}; |
1007 | |
1008 | // If --hip-device-lib is not set, add the default bitcode libraries. |
1009 | // TODO: There are way too many flags that change this. Do we need to check |
1010 | // them all? |
1011 | bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, |
1012 | options::OPT_fno_gpu_flush_denormals_to_zero, |
1013 | getDefaultDenormsAreZeroForTarget(Kind)); |
1014 | bool FiniteOnly = DriverArgs.hasFlag( |
1015 | options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); |
1016 | bool UnsafeMathOpt = |
1017 | DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, |
1018 | options::OPT_fno_unsafe_math_optimizations, false); |
1019 | bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, |
1020 | options::OPT_fno_fast_math, false); |
1021 | bool CorrectSqrt = DriverArgs.hasFlag( |
1022 | options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
1023 | options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); |
1024 | bool Wave64 = isWave64(DriverArgs, Kind); |
1025 | |
1026 | return RocmInstallation->getCommonBitcodeLibs( |
1027 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
1028 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP); |
1029 | } |
1030 | |