1 | //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HIPAMD.h" |
10 | #include "AMDGPU.h" |
11 | #include "HIPUtility.h" |
12 | #include "SPIRV.h" |
13 | #include "clang/Basic/Cuda.h" |
14 | #include "clang/Driver/CommonArgs.h" |
15 | #include "clang/Driver/Compilation.h" |
16 | #include "clang/Driver/Driver.h" |
17 | #include "clang/Driver/InputInfo.h" |
18 | #include "clang/Driver/Options.h" |
19 | #include "clang/Driver/SanitizerArgs.h" |
20 | #include "llvm/Support/FileSystem.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/TargetParser/TargetParser.h" |
23 | |
24 | using namespace clang::driver; |
25 | using namespace clang::driver::toolchains; |
26 | using namespace clang::driver::tools; |
27 | using namespace clang; |
28 | using namespace llvm::opt; |
29 | |
30 | #if defined(_WIN32) || defined(_WIN64) |
31 | #define NULL_FILE "nul" |
32 | #else |
33 | #define NULL_FILE "/dev/null" |
34 | #endif |
35 | |
36 | void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C, |
37 | const JobAction &JA, |
38 | const InputInfoList &Inputs, |
39 | const InputInfo &Output, |
40 | const llvm::opt::ArgList &Args) const { |
41 | // Construct llvm-link command. |
42 | // The output from llvm-link is a bitcode file. |
43 | ArgStringList LlvmLinkArgs; |
44 | |
45 | assert(!Inputs.empty() && "Must have at least one input."); |
46 | |
47 | LlvmLinkArgs.append(IL: {"-o", Output.getFilename()}); |
48 | for (auto Input : Inputs) |
49 | LlvmLinkArgs.push_back(Elt: Input.getFilename()); |
50 | |
51 | // Look for archive of bundled bitcode in arguments, and add temporary files |
52 | // for the extracted archive of bitcode to inputs. |
53 | auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
54 | AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn", |
55 | TargetID, /*IsBitCodeSDL=*/true); |
56 | |
57 | const char *LlvmLink = |
58 | Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "llvm-link")); |
59 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
60 | args&: LlvmLink, args&: LlvmLinkArgs, args: Inputs, |
61 | args: Output)); |
62 | } |
63 | |
64 | void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, |
65 | const InputInfoList &Inputs, |
66 | const InputInfo &Output, |
67 | const llvm::opt::ArgList &Args) const { |
68 | // Construct lld command. |
69 | // The output from ld.lld is an HSA code object file. |
70 | ArgStringList LldArgs{"-flavor", |
71 | "gnu", |
72 | "-m", |
73 | "elf64_amdgpu", |
74 | "--no-undefined", |
75 | "-shared", |
76 | "-plugin-opt=-amdgpu-internalize-symbols"}; |
77 | if (Args.hasArg(options::OPT_hipstdpar)) |
78 | LldArgs.push_back(Elt: "-plugin-opt=-amdgpu-enable-hipstdpar"); |
79 | |
80 | auto &TC = getToolChain(); |
81 | auto &D = TC.getDriver(); |
82 | bool IsThinLTO = D.getOffloadLTOMode() == LTOK_Thin; |
83 | addLTOOptions(ToolChain: TC, Args, CmdArgs&: LldArgs, Output, Inputs, IsThinLTO); |
84 | |
85 | // Extract all the -m options |
86 | std::vector<llvm::StringRef> Features; |
87 | amdgpu::getAMDGPUTargetFeatures(D, Triple: TC.getTriple(), Args, Features); |
88 | |
89 | // Add features to mattr such as cumode |
90 | std::string MAttrString = "-plugin-opt=-mattr="; |
91 | for (auto OneFeature : unifyTargetFeatures(Features)) { |
92 | MAttrString.append(s: Args.MakeArgString(Str: OneFeature)); |
93 | if (OneFeature != Features.back()) |
94 | MAttrString.append(s: ","); |
95 | } |
96 | if (!Features.empty()) |
97 | LldArgs.push_back(Elt: Args.MakeArgString(Str: MAttrString)); |
98 | |
99 | // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. |
100 | // Since AMDGPU backend currently does not support ISA-level linking, all |
101 | // called functions need to be imported. |
102 | if (IsThinLTO) { |
103 | LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-force-import-all")); |
104 | LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-avail-extern-to-local")); |
105 | } |
106 | |
107 | for (const Arg *A : Args.filtered(options::OPT_mllvm)) { |
108 | LldArgs.push_back( |
109 | Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0))); |
110 | } |
111 | |
112 | if (C.getDriver().isSaveTempsEnabled()) |
113 | LldArgs.push_back(Elt: "-save-temps"); |
114 | |
115 | addLinkerCompressDebugSectionsOption(TC, Args, CmdArgs&: LldArgs); |
116 | |
117 | // Given that host and device linking happen in separate processes, the device |
118 | // linker doesn't always have the visibility as to which device symbols are |
119 | // needed by a program, especially for the device symbol dependencies that are |
120 | // introduced through the host symbol resolution. |
121 | // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() |
122 | // (B.obj) In this case, the device linker doesn't know that A.obj actually |
123 | // depends on the kernel functions in B.obj. When linking to static device |
124 | // library, the device linker may drop some of the device global symbols if |
125 | // they aren't referenced. As a workaround, we are adding to the |
126 | // --whole-archive flag such that all global symbols would be linked in. |
127 | LldArgs.push_back(Elt: "--whole-archive"); |
128 | |
129 | for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { |
130 | StringRef ArgVal = Arg->getValue(1); |
131 | auto SplitArg = ArgVal.split("-mllvm="); |
132 | if (!SplitArg.second.empty()) { |
133 | LldArgs.push_back( |
134 | Args.MakeArgString(Twine("-plugin-opt=") + SplitArg.second)); |
135 | } else { |
136 | LldArgs.push_back(Args.MakeArgString(ArgVal)); |
137 | } |
138 | Arg->claim(); |
139 | } |
140 | |
141 | LldArgs.append(IL: {"-o", Output.getFilename()}); |
142 | for (auto Input : Inputs) |
143 | LldArgs.push_back(Elt: Input.getFilename()); |
144 | |
145 | // Look for archive of bundled bitcode in arguments, and add temporary files |
146 | // for the extracted archive of bitcode to inputs. |
147 | auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); |
148 | AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn", |
149 | TargetID, /*IsBitCodeSDL=*/true); |
150 | |
151 | LldArgs.push_back(Elt: "--no-whole-archive"); |
152 | |
153 | const char *Lld = Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "lld")); |
154 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
155 | args&: Lld, args&: LldArgs, args: Inputs, args: Output)); |
156 | } |
157 | |
158 | // For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode |
159 | // and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It |
160 | // calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will |
161 | // be promoted from experimental, we will switch to using that. TODO: consider |
162 | // if we want to run any targeted optimisations over IR here, over generic |
163 | // SPIR-V. |
164 | void AMDGCN::Linker::constructLinkAndEmitSpirvCommand( |
165 | Compilation &C, const JobAction &JA, const InputInfoList &Inputs, |
166 | const InputInfo &Output, const llvm::opt::ArgList &Args) const { |
167 | assert(!Inputs.empty() && "Must have at least one input."); |
168 | |
169 | constructLlvmLinkCommand(C, JA, Inputs, Output, Args); |
170 | |
171 | // Linked BC is now in Output |
172 | |
173 | // Emit SPIR-V binary. |
174 | llvm::opt::ArgStringList TrArgs{ |
175 | "--spirv-max-version=1.6", |
176 | "--spirv-ext=+all", |
177 | "--spirv-allow-unknown-intrinsics", |
178 | "--spirv-lower-const-expr", |
179 | "--spirv-preserve-auxdata", |
180 | "--spirv-debug-info-version=nonsemantic-shader-200"}; |
181 | SPIRV::constructTranslateCommand(C, T: *this, JA, Output, Input: Output, Args: TrArgs); |
182 | } |
183 | |
184 | // For amdgcn the inputs of the linker job are device bitcode and output is |
185 | // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt, |
186 | // llc, then lld steps. |
187 | void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
188 | const InputInfo &Output, |
189 | const InputInfoList &Inputs, |
190 | const ArgList &Args, |
191 | const char *LinkingOutput) const { |
192 | if (Inputs.size() > 0 && |
193 | Inputs[0].getType() == types::TY_Image && |
194 | JA.getType() == types::TY_Object) |
195 | return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, |
196 | Args, JA, T: *this); |
197 | |
198 | if (JA.getType() == types::TY_HIP_FATBIN) |
199 | return HIP::constructHIPFatbinCommand(C, JA, OutputFileName: Output.getFilename(), Inputs, |
200 | TCArgs: Args, T: *this); |
201 | |
202 | if (JA.getType() == types::TY_LLVM_BC) |
203 | return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); |
204 | |
205 | if (getToolChain().getEffectiveTriple().isSPIRV()) |
206 | return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args); |
207 | |
208 | return constructLldCommand(C, JA, Inputs, Output, Args); |
209 | } |
210 | |
211 | HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple, |
212 | const ToolChain &HostTC, const ArgList &Args) |
213 | : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { |
214 | // Lookup binaries into the driver directory, this is used to |
215 | // discover the clang-offload-bundler executable. |
216 | getProgramPaths().push_back(Elt: getDriver().Dir); |
217 | // Diagnose unsupported sanitizer options only once. |
218 | diagnoseUnsupportedSanitizers(Args); |
219 | } |
220 | |
221 | void HIPAMDToolChain::addClangTargetOptions( |
222 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
223 | Action::OffloadKind DeviceOffloadingKind) const { |
224 | HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind); |
225 | |
226 | assert(DeviceOffloadingKind == Action::OFK_HIP && |
227 | "Only HIP offloading kinds are supported for GPUs."); |
228 | |
229 | CC1Args.append(IL: {"-fcuda-is-device", "-fno-threadsafe-statics"}); |
230 | |
231 | if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, |
232 | false)) { |
233 | CC1Args.append(IL: {"-mllvm", "-amdgpu-internalize-symbols"}); |
234 | if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar)) |
235 | CC1Args.append(IL: {"-mllvm", "-amdgpu-enable-hipstdpar"}); |
236 | } |
237 | |
238 | StringRef MaxThreadsPerBlock = |
239 | DriverArgs.getLastArgValue(options::Id: OPT_gpu_max_threads_per_block_EQ); |
240 | if (!MaxThreadsPerBlock.empty()) { |
241 | std::string ArgStr = |
242 | (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str(); |
243 | CC1Args.push_back(Elt: DriverArgs.MakeArgStringRef(Str: ArgStr)); |
244 | } |
245 | |
246 | CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions"); |
247 | |
248 | // Default to "hidden" visibility, as object level linking will not be |
249 | // supported for the foreseeable future. |
250 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, |
251 | options::OPT_fvisibility_ms_compat)) { |
252 | CC1Args.append(IL: {"-fvisibility=hidden"}); |
253 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs"); |
254 | } |
255 | |
256 | if (getEffectiveTriple().isSPIRV()) { |
257 | // For SPIR-V we embed the command-line into the generated binary, in order |
258 | // to retrieve it at JIT time and be able to do target specific compilation |
259 | // with options that match the user-supplied ones. |
260 | if (!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker)) |
261 | CC1Args.push_back(Elt: "-fembed-bitcode=marker"); |
262 | return; // No DeviceLibs for SPIR-V. |
263 | } |
264 | |
265 | for (auto BCFile : getDeviceLibs(Args: DriverArgs)) { |
266 | CC1Args.push_back(Elt: BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" |
267 | : "-mlink-bitcode-file"); |
268 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile.Path)); |
269 | } |
270 | } |
271 | |
272 | llvm::opt::DerivedArgList * |
273 | HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, |
274 | StringRef BoundArch, |
275 | Action::OffloadKind DeviceOffloadKind) const { |
276 | DerivedArgList *DAL = |
277 | HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
278 | if (!DAL) |
279 | DAL = new DerivedArgList(Args.getBaseArgs()); |
280 | |
281 | const OptTable &Opts = getDriver().getOpts(); |
282 | |
283 | for (Arg *A : Args) { |
284 | if (!shouldSkipSanitizeOption(TC: *this, DriverArgs: Args, TargetID: BoundArch, A)) |
285 | DAL->append(A); |
286 | } |
287 | |
288 | if (!BoundArch.empty()) { |
289 | DAL->eraseArg(options::Id: OPT_mcpu_EQ); |
290 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_mcpu_EQ), Value: BoundArch); |
291 | checkTargetID(DriverArgs: *DAL); |
292 | } |
293 | |
294 | if (!Args.hasArg(options::OPT_flto_partitions_EQ)) |
295 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(options::Opt: OPT_flto_partitions_EQ), |
296 | Value: "8"); |
297 | |
298 | return DAL; |
299 | } |
300 | |
301 | Tool *HIPAMDToolChain::buildLinker() const { |
302 | assert(getTriple().isAMDGCN() || |
303 | getTriple().getArch() == llvm::Triple::spirv64); |
304 | return new tools::AMDGCN::Linker(*this); |
305 | } |
306 | |
307 | void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
308 | AMDGPUToolChain::addClangWarningOptions(CC1Args); |
309 | HostTC.addClangWarningOptions(CC1Args); |
310 | } |
311 | |
312 | ToolChain::CXXStdlibType |
313 | HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const { |
314 | return HostTC.GetCXXStdlibType(Args); |
315 | } |
316 | |
317 | void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, |
318 | ArgStringList &CC1Args) const { |
319 | HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); |
320 | } |
321 | |
322 | void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs( |
323 | const ArgList &Args, ArgStringList &CC1Args) const { |
324 | HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args); |
325 | } |
326 | |
327 | void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args, |
328 | ArgStringList &CC1Args) const { |
329 | HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args); |
330 | } |
331 | |
332 | void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, |
333 | ArgStringList &CC1Args) const { |
334 | RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args); |
335 | } |
336 | |
337 | SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const { |
338 | // The HIPAMDToolChain only supports sanitizers in the sense that it allows |
339 | // sanitizer arguments on the command line if they are supported by the host |
340 | // toolchain. The HIPAMDToolChain will actually ignore any command line |
341 | // arguments for any of these "supported" sanitizers. That means that no |
342 | // sanitization of device code is actually supported at this time. |
343 | // |
344 | // This behavior is necessary because the host and device toolchains |
345 | // invocations often share the command line, so the device toolchain must |
346 | // tolerate flags meant only for the host toolchain. |
347 | return HostTC.getSupportedSanitizers(); |
348 | } |
349 | |
350 | VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, |
351 | const ArgList &Args) const { |
352 | return HostTC.computeMSVCVersion(D, Args); |
353 | } |
354 | |
355 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
356 | HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { |
357 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
358 | if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, |
359 | true) || |
360 | getGPUArch(DriverArgs) == "amdgcnspirv") |
361 | return {}; |
362 | ArgStringList LibraryPaths; |
363 | |
364 | // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. |
365 | for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg()) |
366 | LibraryPaths.push_back(Elt: DriverArgs.MakeArgString(Str: Path)); |
367 | |
368 | addDirectoryList(Args: DriverArgs, CmdArgs&: LibraryPaths, ArgName: "", EnvVar: "HIP_DEVICE_LIB_PATH"); |
369 | |
370 | // Maintain compatability with --hip-device-lib. |
371 | auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); |
372 | if (!BCLibArgs.empty()) { |
373 | llvm::for_each(BCLibArgs, [&](StringRef BCName) { |
374 | StringRef FullName; |
375 | for (StringRef LibraryPath : LibraryPaths) { |
376 | SmallString<128> Path(LibraryPath); |
377 | llvm::sys::path::append(path&: Path, a: BCName); |
378 | FullName = Path; |
379 | if (llvm::sys::fs::exists(Path: FullName)) { |
380 | BCLibs.emplace_back(Args&: FullName); |
381 | return; |
382 | } |
383 | } |
384 | getDriver().Diag(diag::DiagID: err_drv_no_such_file) << BCName; |
385 | }); |
386 | } else { |
387 | if (!RocmInstallation->hasDeviceLibrary()) { |
388 | getDriver().Diag(diag::DiagID: err_drv_no_rocm_device_lib) << 0; |
389 | return {}; |
390 | } |
391 | StringRef GpuArch = getGPUArch(DriverArgs); |
392 | assert(!GpuArch.empty() && "Must have an explicit GPU arch."); |
393 | |
394 | // Add common device libraries like ocml etc. |
395 | for (auto N : getCommonDeviceLibNames(DriverArgs, GPUArch: GpuArch.str())) |
396 | BCLibs.emplace_back(Args&: N); |
397 | |
398 | // Add instrument lib. |
399 | auto InstLib = |
400 | DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); |
401 | if (InstLib.empty()) |
402 | return BCLibs; |
403 | if (llvm::sys::fs::exists(InstLib)) |
404 | BCLibs.emplace_back(InstLib); |
405 | else |
406 | getDriver().Diag(diag::DiagID: err_drv_no_such_file) << InstLib; |
407 | } |
408 | |
409 | return BCLibs; |
410 | } |
411 | |
412 | void HIPAMDToolChain::checkTargetID( |
413 | const llvm::opt::ArgList &DriverArgs) const { |
414 | auto PTID = getParsedTargetID(DriverArgs); |
415 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch && |
416 | PTID.OptionalTargetID != "amdgcnspirv") |
417 | getDriver().Diag(clang::diag::DiagID: err_drv_bad_target_id) |
418 | << *PTID.OptionalTargetID; |
419 | } |
420 |
Definitions
- constructLlvmLinkCommand
- constructLldCommand
- constructLinkAndEmitSpirvCommand
- ConstructJob
- HIPAMDToolChain
- addClangTargetOptions
- TranslateArgs
- buildLinker
- addClangWarningOptions
- GetCXXStdlibType
- AddClangSystemIncludeArgs
- AddClangCXXStdlibIncludeArgs
- AddIAMCUIncludeArgs
- AddHIPIncludeArgs
- getSupportedSanitizers
- computeMSVCVersion
- getDeviceLibs
Learn to use CMake with our Intro Training
Find out more