1 | //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HIPAMD.h" |
10 | #include "AMDGPU.h" |
11 | #include "CommonArgs.h" |
12 | #include "HIPUtility.h" |
13 | #include "clang/Basic/Cuda.h" |
14 | #include "clang/Basic/TargetID.h" |
15 | #include "clang/Driver/Compilation.h" |
16 | #include "clang/Driver/Driver.h" |
17 | #include "clang/Driver/DriverDiagnostic.h" |
18 | #include "clang/Driver/InputInfo.h" |
19 | #include "clang/Driver/Options.h" |
20 | #include "clang/Driver/SanitizerArgs.h" |
21 | #include "llvm/Support/Alignment.h" |
22 | #include "llvm/Support/FileSystem.h" |
23 | #include "llvm/Support/Path.h" |
24 | #include "llvm/TargetParser/TargetParser.h" |
25 | |
26 | using namespace clang::driver; |
27 | using namespace clang::driver::toolchains; |
28 | using namespace clang::driver::tools; |
29 | using namespace clang; |
30 | using namespace llvm::opt; |
31 | |
32 | #if defined(_WIN32) || defined(_WIN64) |
33 | #define NULL_FILE "nul" |
34 | #else |
35 | #define NULL_FILE "/dev/null" |
36 | #endif |
37 | |
38 | static bool shouldSkipSanitizeOption(const ToolChain &TC, |
39 | const llvm::opt::ArgList &DriverArgs, |
40 | StringRef TargetID, |
41 | const llvm::opt::Arg *A) { |
42 | // For actions without targetID, do nothing. |
43 | if (TargetID.empty()) |
44 | return false; |
45 | Option O = A->getOption(); |
46 | if (!O.matches(options::ID: OPT_fsanitize_EQ)) |
47 | return false; |
48 | |
49 | if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize, |
50 | options::OPT_fno_gpu_sanitize, true)) |
51 | return true; |
52 | |
53 | auto &Diags = TC.getDriver().getDiags(); |
54 | |
55 | // For simplicity, we only allow -fsanitize=address |
56 | SanitizerMask K = parseSanitizerValue(Value: A->getValue(), /*AllowGroups=*/false); |
57 | if (K != SanitizerKind::Address) |
58 | return true; |
59 | |
60 | llvm::StringMap<bool> FeatureMap; |
61 | auto OptionalGpuArch = parseTargetID(T: TC.getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
62 | |
63 | assert(OptionalGpuArch && "Invalid Target ID" ); |
64 | (void)OptionalGpuArch; |
65 | auto Loc = FeatureMap.find(Key: "xnack" ); |
66 | if (Loc == FeatureMap.end() || !Loc->second) { |
67 | Diags.Report( |
68 | clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature) |
69 | << A->getAsString(Args: DriverArgs) << TargetID << "xnack+" ; |
70 | return true; |
71 | } |
72 | return false; |
73 | } |
74 | |
75 | void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C, |
76 | const JobAction &JA, |
77 | const InputInfoList &Inputs, |
78 | const InputInfo &Output, |
79 | const llvm::opt::ArgList &Args) const { |
80 | // Construct llvm-link command. |
81 | // The output from llvm-link is a bitcode file. |
82 | ArgStringList LlvmLinkArgs; |
83 | |
84 | assert(!Inputs.empty() && "Must have at least one input." ); |
85 | |
86 | LlvmLinkArgs.append(IL: {"-o" , Output.getFilename()}); |
87 | for (auto Input : Inputs) |
88 | LlvmLinkArgs.push_back(Elt: Input.getFilename()); |
89 | |
90 | // Look for archive of bundled bitcode in arguments, and add temporary files |
91 | // for the extracted archive of bitcode to inputs. |
92 | auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
93 | AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn" , |
94 | TargetID, /*IsBitCodeSDL=*/true); |
95 | |
96 | const char *LlvmLink = |
97 | Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "llvm-link" )); |
98 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
99 | args&: LlvmLink, args&: LlvmLinkArgs, args: Inputs, |
100 | args: Output)); |
101 | } |
102 | |
103 | void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, |
104 | const InputInfoList &Inputs, |
105 | const InputInfo &Output, |
106 | const llvm::opt::ArgList &Args) const { |
107 | // Construct lld command. |
108 | // The output from ld.lld is an HSA code object file. |
109 | ArgStringList LldArgs{"-flavor" , |
110 | "gnu" , |
111 | "-m" , |
112 | "elf64_amdgpu" , |
113 | "--no-undefined" , |
114 | "-shared" , |
115 | "-plugin-opt=-amdgpu-internalize-symbols" }; |
116 | if (Args.hasArg(options::OPT_hipstdpar)) |
117 | LldArgs.push_back(Elt: "-plugin-opt=-amdgpu-enable-hipstdpar" ); |
118 | |
119 | auto &TC = getToolChain(); |
120 | auto &D = TC.getDriver(); |
121 | assert(!Inputs.empty() && "Must have at least one input." ); |
122 | bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin; |
123 | addLTOOptions(ToolChain: TC, Args, CmdArgs&: LldArgs, Output, Input: Inputs[0], IsThinLTO); |
124 | |
125 | // Extract all the -m options |
126 | std::vector<llvm::StringRef> Features; |
127 | amdgpu::getAMDGPUTargetFeatures(D, Triple: TC.getTriple(), Args, Features); |
128 | |
129 | // Add features to mattr such as cumode |
130 | std::string MAttrString = "-plugin-opt=-mattr=" ; |
131 | for (auto OneFeature : unifyTargetFeatures(Features)) { |
132 | MAttrString.append(s: Args.MakeArgString(Str: OneFeature)); |
133 | if (OneFeature != Features.back()) |
134 | MAttrString.append(s: "," ); |
135 | } |
136 | if (!Features.empty()) |
137 | LldArgs.push_back(Elt: Args.MakeArgString(Str: MAttrString)); |
138 | |
139 | // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. |
140 | // Since AMDGPU backend currently does not support ISA-level linking, all |
141 | // called functions need to be imported. |
142 | if (IsThinLTO) |
143 | LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-force-import-all" )); |
144 | |
145 | for (const Arg *A : Args.filtered(options::OPT_mllvm)) { |
146 | LldArgs.push_back( |
147 | Args.MakeArgString(Twine("-plugin-opt=" ) + A->getValue(0))); |
148 | } |
149 | |
150 | if (C.getDriver().isSaveTempsEnabled()) |
151 | LldArgs.push_back(Elt: "-save-temps" ); |
152 | |
153 | addLinkerCompressDebugSectionsOption(TC, Args, CmdArgs&: LldArgs); |
154 | |
155 | // Given that host and device linking happen in separate processes, the device |
156 | // linker doesn't always have the visibility as to which device symbols are |
157 | // needed by a program, especially for the device symbol dependencies that are |
158 | // introduced through the host symbol resolution. |
159 | // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() |
160 | // (B.obj) In this case, the device linker doesn't know that A.obj actually |
161 | // depends on the kernel functions in B.obj. When linking to static device |
162 | // library, the device linker may drop some of the device global symbols if |
163 | // they aren't referenced. As a workaround, we are adding to the |
164 | // --whole-archive flag such that all global symbols would be linked in. |
165 | LldArgs.push_back(Elt: "--whole-archive" ); |
166 | |
167 | for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { |
168 | StringRef ArgVal = Arg->getValue(1); |
169 | auto SplitArg = ArgVal.split("-mllvm=" ); |
170 | if (!SplitArg.second.empty()) { |
171 | LldArgs.push_back( |
172 | Args.MakeArgString(Twine("-plugin-opt=" ) + SplitArg.second)); |
173 | } else { |
174 | LldArgs.push_back(Args.MakeArgString(ArgVal)); |
175 | } |
176 | Arg->claim(); |
177 | } |
178 | |
179 | LldArgs.append(IL: {"-o" , Output.getFilename()}); |
180 | for (auto Input : Inputs) |
181 | LldArgs.push_back(Elt: Input.getFilename()); |
182 | |
183 | // Look for archive of bundled bitcode in arguments, and add temporary files |
184 | // for the extracted archive of bitcode to inputs. |
185 | auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
186 | AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn" , |
187 | TargetID, /*IsBitCodeSDL=*/true); |
188 | |
189 | LldArgs.push_back(Elt: "--no-whole-archive" ); |
190 | |
191 | const char *Lld = Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "lld" )); |
192 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
193 | args&: Lld, args&: LldArgs, args: Inputs, args: Output)); |
194 | } |
195 | |
196 | // For amdgcn the inputs of the linker job are device bitcode and output is |
197 | // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt, |
198 | // llc, then lld steps. |
199 | void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
200 | const InputInfo &Output, |
201 | const InputInfoList &Inputs, |
202 | const ArgList &Args, |
203 | const char *LinkingOutput) const { |
204 | if (Inputs.size() > 0 && |
205 | Inputs[0].getType() == types::TY_Image && |
206 | JA.getType() == types::TY_Object) |
207 | return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, |
208 | Args, JA, T: *this); |
209 | |
210 | if (JA.getType() == types::TY_HIP_FATBIN) |
211 | return HIP::constructHIPFatbinCommand(C, JA, OutputFileName: Output.getFilename(), Inputs, |
212 | TCArgs: Args, T: *this); |
213 | |
214 | if (JA.getType() == types::TY_LLVM_BC) |
215 | return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); |
216 | |
217 | return constructLldCommand(C, JA, Inputs, Output, Args); |
218 | } |
219 | |
220 | HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple, |
221 | const ToolChain &HostTC, const ArgList &Args) |
222 | : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { |
223 | // Lookup binaries into the driver directory, this is used to |
224 | // discover the clang-offload-bundler executable. |
225 | getProgramPaths().push_back(Elt: getDriver().Dir); |
226 | |
227 | // Diagnose unsupported sanitizer options only once. |
228 | if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, |
229 | true)) |
230 | return; |
231 | for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) { |
232 | SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false); |
233 | if (K != SanitizerKind::Address) |
234 | D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target) |
235 | << A->getAsString(Args) << getTriple().str(); |
236 | } |
237 | } |
238 | |
239 | void HIPAMDToolChain::addClangTargetOptions( |
240 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
241 | Action::OffloadKind DeviceOffloadingKind) const { |
242 | HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind); |
243 | |
244 | assert(DeviceOffloadingKind == Action::OFK_HIP && |
245 | "Only HIP offloading kinds are supported for GPUs." ); |
246 | |
247 | CC1Args.push_back(Elt: "-fcuda-is-device" ); |
248 | |
249 | if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, |
250 | false)) |
251 | CC1Args.append(IL: {"-mllvm" , "-amdgpu-internalize-symbols" }); |
252 | if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar)) |
253 | CC1Args.append(IL: {"-mllvm" , "-amdgpu-enable-hipstdpar" }); |
254 | |
255 | StringRef MaxThreadsPerBlock = |
256 | DriverArgs.getLastArgValue(options::Id: OPT_gpu_max_threads_per_block_EQ); |
257 | if (!MaxThreadsPerBlock.empty()) { |
258 | std::string ArgStr = |
259 | (Twine("--gpu-max-threads-per-block=" ) + MaxThreadsPerBlock).str(); |
260 | CC1Args.push_back(Elt: DriverArgs.MakeArgStringRef(Str: ArgStr)); |
261 | } |
262 | |
263 | CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions" ); |
264 | |
265 | // Default to "hidden" visibility, as object level linking will not be |
266 | // supported for the foreseeable future. |
267 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
268 | options::OPT_fvisibility_ms_compat)) { |
269 | CC1Args.append(IL: {"-fvisibility=hidden" }); |
270 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
271 | } |
272 | |
273 | for (auto BCFile : getDeviceLibs(Args: DriverArgs)) { |
274 | CC1Args.push_back(Elt: BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" |
275 | : "-mlink-bitcode-file" ); |
276 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile.Path)); |
277 | } |
278 | } |
279 | |
280 | llvm::opt::DerivedArgList * |
281 | HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, |
282 | StringRef BoundArch, |
283 | Action::OffloadKind DeviceOffloadKind) const { |
284 | DerivedArgList *DAL = |
285 | HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
286 | if (!DAL) |
287 | DAL = new DerivedArgList(Args.getBaseArgs()); |
288 | |
289 | const OptTable &Opts = getDriver().getOpts(); |
290 | |
291 | for (Arg *A : Args) { |
292 | if (!shouldSkipSanitizeOption(TC: *this, DriverArgs: Args, TargetID: BoundArch, A)) |
293 | DAL->append(A); |
294 | } |
295 | |
296 | if (!BoundArch.empty()) { |
297 | DAL->eraseArg(options::Id: OPT_mcpu_EQ); |
298 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_mcpu_EQ), Value: BoundArch); |
299 | checkTargetID(DriverArgs: *DAL); |
300 | } |
301 | |
302 | return DAL; |
303 | } |
304 | |
305 | Tool *HIPAMDToolChain::buildLinker() const { |
306 | assert(getTriple().getArch() == llvm::Triple::amdgcn); |
307 | return new tools::AMDGCN::Linker(*this); |
308 | } |
309 | |
310 | void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
311 | AMDGPUToolChain::addClangWarningOptions(CC1Args); |
312 | HostTC.addClangWarningOptions(CC1Args); |
313 | } |
314 | |
315 | ToolChain::CXXStdlibType |
316 | HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const { |
317 | return HostTC.GetCXXStdlibType(Args); |
318 | } |
319 | |
320 | void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, |
321 | ArgStringList &CC1Args) const { |
322 | HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); |
323 | } |
324 | |
325 | void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs( |
326 | const ArgList &Args, ArgStringList &CC1Args) const { |
327 | HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args); |
328 | } |
329 | |
330 | void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args, |
331 | ArgStringList &CC1Args) const { |
332 | HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args); |
333 | } |
334 | |
335 | void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, |
336 | ArgStringList &CC1Args) const { |
337 | RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args); |
338 | } |
339 | |
340 | SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const { |
341 | // The HIPAMDToolChain only supports sanitizers in the sense that it allows |
342 | // sanitizer arguments on the command line if they are supported by the host |
343 | // toolchain. The HIPAMDToolChain will actually ignore any command line |
344 | // arguments for any of these "supported" sanitizers. That means that no |
345 | // sanitization of device code is actually supported at this time. |
346 | // |
347 | // This behavior is necessary because the host and device toolchains |
348 | // invocations often share the command line, so the device toolchain must |
349 | // tolerate flags meant only for the host toolchain. |
350 | return HostTC.getSupportedSanitizers(); |
351 | } |
352 | |
353 | VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, |
354 | const ArgList &Args) const { |
355 | return HostTC.computeMSVCVersion(D, Args); |
356 | } |
357 | |
358 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
359 | HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { |
360 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
361 | if (DriverArgs.hasArg(options::OPT_nogpulib)) |
362 | return {}; |
363 | ArgStringList LibraryPaths; |
364 | |
365 | // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. |
366 | for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg()) |
367 | LibraryPaths.push_back(Elt: DriverArgs.MakeArgString(Str: Path)); |
368 | |
369 | addDirectoryList(Args: DriverArgs, CmdArgs&: LibraryPaths, ArgName: "" , EnvVar: "HIP_DEVICE_LIB_PATH" ); |
370 | |
371 | // Maintain compatability with --hip-device-lib. |
372 | auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); |
373 | if (!BCLibArgs.empty()) { |
374 | llvm::for_each(BCLibArgs, [&](StringRef BCName) { |
375 | StringRef FullName; |
376 | for (StringRef LibraryPath : LibraryPaths) { |
377 | SmallString<128> Path(LibraryPath); |
378 | llvm::sys::path::append(path&: Path, a: BCName); |
379 | FullName = Path; |
380 | if (llvm::sys::fs::exists(Path: FullName)) { |
381 | BCLibs.push_back(Elt: FullName); |
382 | return; |
383 | } |
384 | } |
385 | getDriver().Diag(diag::DiagID: err_drv_no_such_file) << BCName; |
386 | }); |
387 | } else { |
388 | if (!RocmInstallation->hasDeviceLibrary()) { |
389 | getDriver().Diag(diag::DiagID: err_drv_no_rocm_device_lib) << 0; |
390 | return {}; |
391 | } |
392 | StringRef GpuArch = getGPUArch(DriverArgs); |
393 | assert(!GpuArch.empty() && "Must have an explicit GPU arch." ); |
394 | |
395 | // If --hip-device-lib is not set, add the default bitcode libraries. |
396 | if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, |
397 | options::OPT_fno_gpu_sanitize, true) && |
398 | getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt()) { |
399 | auto AsanRTL = RocmInstallation->getAsanRTLPath(); |
400 | if (AsanRTL.empty()) { |
401 | unsigned DiagID = getDriver().getDiags().getCustomDiagID( |
402 | L: DiagnosticsEngine::Error, |
403 | FormatString: "AMDGPU address sanitizer runtime library (asanrtl) is not found. " |
404 | "Please install ROCm device library which supports address " |
405 | "sanitizer" ); |
406 | getDriver().Diag(DiagID); |
407 | return {}; |
408 | } else |
409 | BCLibs.emplace_back(Args&: AsanRTL, /*ShouldInternalize=*/Args: false); |
410 | } |
411 | |
412 | // Add the HIP specific bitcode library. |
413 | BCLibs.push_back(Elt: RocmInstallation->getHIPPath()); |
414 | |
415 | // Add common device libraries like ocml etc. |
416 | for (StringRef N : getCommonDeviceLibNames(DriverArgs, GPUArch: GpuArch.str())) |
417 | BCLibs.emplace_back(Args&: N); |
418 | |
419 | // Add instrument lib. |
420 | auto InstLib = |
421 | DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); |
422 | if (InstLib.empty()) |
423 | return BCLibs; |
424 | if (llvm::sys::fs::exists(InstLib)) |
425 | BCLibs.push_back(InstLib); |
426 | else |
427 | getDriver().Diag(diag::DiagID: err_drv_no_such_file) << InstLib; |
428 | } |
429 | |
430 | return BCLibs; |
431 | } |
432 | |
433 | void HIPAMDToolChain::checkTargetID( |
434 | const llvm::opt::ArgList &DriverArgs) const { |
435 | auto PTID = getParsedTargetID(DriverArgs); |
436 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
437 | getDriver().Diag(clang::diag::DiagID: err_drv_bad_target_id) |
438 | << *PTID.OptionalTargetID; |
439 | } |
440 | } |
441 | |