Cuda.cpp source code [clang/lib/Driver/ToolChains/Cuda.cpp]

1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Cuda.h"
10	#include "CommonArgs.h"
11	#include "clang/Basic/Cuda.h"
12	#include "clang/Config/config.h"
13	#include "clang/Driver/Compilation.h"
14	#include "clang/Driver/Distro.h"
15	#include "clang/Driver/Driver.h"
16	#include "clang/Driver/DriverDiagnostic.h"
17	#include "clang/Driver/InputInfo.h"
18	#include "clang/Driver/Options.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/Option/ArgList.h"
21	#include "llvm/Support/FileSystem.h"
22	#include "llvm/Support/FormatAdapters.h"
23	#include "llvm/Support/FormatVariadic.h"
24	#include "llvm/Support/Path.h"
25	#include "llvm/Support/Process.h"
26	#include "llvm/Support/Program.h"
27	#include "llvm/Support/VirtualFileSystem.h"
28	#include "llvm/TargetParser/Host.h"
29	#include "llvm/TargetParser/TargetParser.h"
30	#include <system_error>
31
32	using namespace clang::driver;
33	using namespace clang::driver::toolchains;
34	using namespace clang::driver::tools;
35	using namespace clang;
36	using namespace llvm::opt;
37
38	namespace {
39
40	CudaVersion getCudaVersion(uint32_t raw_version) {
41	if (raw_version < `7050`)
42	return CudaVersion::CUDA_70;
43	if (raw_version < `8000`)
44	return CudaVersion::CUDA_75;
45	if (raw_version < `9000`)
46	return CudaVersion::CUDA_80;
47	if (raw_version < `9010`)
48	return CudaVersion::CUDA_90;
49	if (raw_version < `9020`)
50	return CudaVersion::CUDA_91;
51	if (raw_version < `10000`)
52	return CudaVersion::CUDA_92;
53	if (raw_version < `10010`)
54	return CudaVersion::CUDA_100;
55	if (raw_version < `10020`)
56	return CudaVersion::CUDA_101;
57	if (raw_version < `11000`)
58	return CudaVersion::CUDA_102;
59	if (raw_version < `11010`)
60	return CudaVersion::CUDA_110;
61	if (raw_version < `11020`)
62	return CudaVersion::CUDA_111;
63	if (raw_version < `11030`)
64	return CudaVersion::CUDA_112;
65	if (raw_version < `11040`)
66	return CudaVersion::CUDA_113;
67	if (raw_version < `11050`)
68	return CudaVersion::CUDA_114;
69	if (raw_version < `11060`)
70	return CudaVersion::CUDA_115;
71	if (raw_version < `11070`)
72	return CudaVersion::CUDA_116;
73	if (raw_version < `11080`)
74	return CudaVersion::CUDA_117;
75	if (raw_version < `11090`)
76	return CudaVersion::CUDA_118;
77	if (raw_version < `12010`)
78	return CudaVersion::CUDA_120;
79	if (raw_version < `12020`)
80	return CudaVersion::CUDA_121;
81	if (raw_version < `12030`)
82	return CudaVersion::CUDA_122;
83	if (raw_version < `12040`)
84	return CudaVersion::CUDA_123;
85	return CudaVersion::NEW;
86	}
87
88	CudaVersion parseCudaHFile(llvm::StringRef Input) {
89	// Helper lambda which skips the words if the line starts with them or returns
90	// std::nullopt otherwise.
91	auto StartsWithWords =
92	[](llvm::StringRef Line,
93	const SmallVector<StringRef, `3`> words) -> std::optional<StringRef> {
94	for (StringRef word : words) {
95	if (!Line.consume_front(Prefix: word))
96	return {};
97	Line = Line.ltrim();
98	}
99	return Line;
100	};
101
102	Input = Input.ltrim();
103	while (!Input.empty()) {
104	if (auto Line =
105	StartsWithWords (Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
106	uint32_t RawVersion;
107	Line ->consumeInteger(Radix: `10`, Result&: RawVersion);
108	return getCudaVersion(raw_version: RawVersion);
109	}
110	// Find next non-empty line.
111	Input = Input.drop_front(N: Input.find_first_of(Chars: "\n\r")).ltrim();
112	}
113	return CudaVersion::UNKNOWN;
114	}
115	} // namespace
116
117	void CudaInstallationDetector::WarnIfUnsupportedVersion() {
118	if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
119	std::string VersionString = CudaVersionToString(V: Version);
120	if (!VersionString.empty())
121	VersionString.insert(pos: `0`, s: " ");
122	D.Diag(diag::DiagID: warn_drv_new_cuda_version)
123	<< VersionString
124	<< (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
125	<< CudaVersionToString(V: CudaVersion::PARTIALLY_SUPPORTED);
126	} else if (Version > CudaVersion::FULLY_SUPPORTED)
127	D.Diag(diag::DiagID: warn_drv_partially_supported_cuda_version)
128	<< CudaVersionToString(V: Version);
129	}
130
131	CudaInstallationDetector::CudaInstallationDetector(
132	const Driver &D, const llvm::Triple &HostTriple,
133	const llvm::opt::ArgList &Args)
134	: D(D) {
135	struct Candidate {
136	std::string Path;
137	bool StrictChecking;
138
139	Candidate(std::string Path, bool StrictChecking = false)
140	: Path (Path), StrictChecking(StrictChecking) {}
141	};
142	SmallVector<Candidate, `4`> Candidates;
143
144	// In decreasing order so we prefer newer versions to older versions.
145	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
146	auto &FS = D.getVFS();
147
148	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
149	Candidates.emplace_back(
150	Args.getLastArgValue(clang::driver::options::Id: OPT_cuda_path_EQ).str());
151	} else if (HostTriple.isOSWindows()) {
152	for (const char *Ver : Versions)
153	Candidates.emplace_back(
154	Args: D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
155	Ver);
156	} else {
157	if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
158	// Try to find ptxas binary. If the executable is located in a directory
159	// called 'bin/', its parent directory might be a good guess for a valid
160	// CUDA installation.
161	// However, some distributions might installs 'ptxas' to /usr/bin. In that
162	// case the candidate would be '/usr' which passes the following checks
163	// because '/usr/include' exists as well. To avoid this case, we always
164	// check for the directory potentially containing files for libdevice,
165	// even if the user passes -nocudalib.
166	if (llvm::ErrorOr<std::string> ptxas =
167	llvm::sys::findProgramByName(Name: "ptxas")) {
168	SmallString<`256`> ptxasAbsolutePath;
169	llvm::sys::fs::real_path(path: *ptxas, output&: ptxasAbsolutePath);
170
171	StringRef ptxasDir = llvm::sys::path::parent_path(path: ptxasAbsolutePath);
172	if (llvm::sys::path::filename(path: ptxasDir) == "bin")
173	Candidates.emplace_back(
174	Args: std::string (llvm::sys::path::parent_path(path: ptxasDir)),
175	/StrictChecking=/Args: true);
176	}
177	}
178
179	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda");
180	for (const char *Ver : Versions)
181	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda-" + Ver);
182
183	Distro Dist(FS, llvm::Triple (llvm::sys::getProcessTriple()));
184	if (Dist.IsDebian() \|\| Dist.IsUbuntu())
185	// Special case for Debian to have nvidia-cuda-toolkit work
186	// out of the box. More info on http://bugs.debian.org/882505
187	Candidates.emplace_back(Args: D.SysRoot + "/usr/lib/cuda");
188	}
189
190	bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
191
192	for (const auto &Candidate : Candidates) {
193	InstallPath = Candidate.Path;
194	if (InstallPath.empty() \|\| !FS.exists(Path: InstallPath))
195	continue;
196
197	BinPath = InstallPath + "/bin";
198	IncludePath = InstallPath + "/include";
199	LibDevicePath = InstallPath + "/nvvm/libdevice";
200
201	if (!(FS.exists(Path: IncludePath) && FS.exists(Path: BinPath)))
202	continue;
203	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
204	if (CheckLibDevice && !FS.exists(Path: LibDevicePath))
205	continue;
206
207	Version = CudaVersion::UNKNOWN;
208	if (auto CudaHFile = FS.getBufferForFile(Name: InstallPath + "/include/cuda.h"))
209	Version = parseCudaHFile(Input: (*CudaHFile)->getBuffer());
210	// As the last resort, make an educated guess between CUDA-7.0, which had
211	// old-style libdevice bitcode, and an unknown recent CUDA version.
212	if (Version == CudaVersion::UNKNOWN) {
213	Version = FS.exists(Path: LibDevicePath + "/libdevice.10.bc")
214	? CudaVersion::NEW
215	: CudaVersion::CUDA_70;
216	}
217
218	if (Version >= CudaVersion::CUDA_90) {
219	// CUDA-9+ uses single libdevice file for all GPU variants.
220	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
221	if (FS.exists(Path: FilePath)) {
222	for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
223	++Arch) {
224	CudaArch GpuArch = static_cast<CudaArch>(Arch);
225	if (!IsNVIDIAGpuArch(A: GpuArch))
226	continue;
227	std::string GpuArchName(CudaArchToString(A: GpuArch));
228	LibDeviceMap [GpuArchName] = FilePath;
229	}
230	}
231	} else {
232	std::error_code EC;
233	for (llvm::vfs::directory_iterator LI = FS.dir_begin(Dir: LibDevicePath, EC),
234	LE;
235	!EC && LI != LE; LI = LI.increment(EC)) {
236	StringRef FilePath = LI ->path();
237	StringRef FileName = llvm::sys::path::filename(path: FilePath);
238	// Process all bitcode filenames that look like
239	// libdevice.compute_XX.YY.bc
240	const StringRef LibDeviceName = "libdevice.";
241	if (!(FileName.starts_with(Prefix: LibDeviceName) && FileName.ends_with(Suffix: ".bc")))
242	continue;
243	StringRef GpuArch = FileName.slice(
244	Start: LibDeviceName.size(), End: FileName.find(C: `'.'`, From: LibDeviceName.size()));
245	LibDeviceMap [GpuArch] = FilePath.str();
246	// Insert map entries for specific devices with this compute
247	// capability. NVCC's choice of the libdevice library version is
248	// rather peculiar and depends on the CUDA version.
249	if (GpuArch == "compute_20") {
250	LibDeviceMap ["sm_20"] = std::string (FilePath);
251	LibDeviceMap ["sm_21"] = std::string (FilePath);
252	LibDeviceMap ["sm_32"] = std::string (FilePath);
253	} else if (GpuArch == "compute_30") {
254	LibDeviceMap ["sm_30"] = std::string (FilePath);
255	if (Version < CudaVersion::CUDA_80) {
256	LibDeviceMap ["sm_50"] = std::string (FilePath);
257	LibDeviceMap ["sm_52"] = std::string (FilePath);
258	LibDeviceMap ["sm_53"] = std::string (FilePath);
259	}
260	LibDeviceMap ["sm_60"] = std::string (FilePath);
261	LibDeviceMap ["sm_61"] = std::string (FilePath);
262	LibDeviceMap ["sm_62"] = std::string (FilePath);
263	} else if (GpuArch == "compute_35") {
264	LibDeviceMap ["sm_35"] = std::string (FilePath);
265	LibDeviceMap ["sm_37"] = std::string (FilePath);
266	} else if (GpuArch == "compute_50") {
267	if (Version >= CudaVersion::CUDA_80) {
268	LibDeviceMap ["sm_50"] = std::string (FilePath);
269	LibDeviceMap ["sm_52"] = std::string (FilePath);
270	LibDeviceMap ["sm_53"] = std::string (FilePath);
271	}
272	}
273	}
274	}
275
276	// Check that we have found at least one libdevice that we can link in if
277	// -nocudalib hasn't been specified.
278	if (LibDeviceMap.empty() && !NoCudaLib)
279	continue;
280
281	IsValid = true;
282	break;
283	}
284	}
285
286	void CudaInstallationDetector::AddCudaIncludeArgs(
287	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
288	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
289	// Add cuda_wrappers/ to our system include path. This lets us wrap*
290	// standard library headers.
291	SmallString<`128`> P(D.ResourceDir);
292	llvm::sys::path::append(path&: P, a: "include");
293	llvm::sys::path::append(path&: P, a: "cuda_wrappers");
294	CC1Args.push_back(Elt: "-internal-isystem");
295	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P));
296	}
297
298	if (DriverArgs.hasArg(options::OPT_nogpuinc))
299	return;
300
301	if (!isValid()) {
302	D.Diag(diag::DiagID: err_drv_no_cuda_installation);
303	return;
304	}
305
306	CC1Args.push_back(Elt: "-include");
307	CC1Args.push_back(Elt: "__clang_cuda_runtime_wrapper.h");
308	}
309
310	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
311	CudaArch Arch) const {
312	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
313	ArchsWithBadVersion [(int)Arch])
314	return;
315
316	auto MinVersion = MinVersionForCudaArch(A: Arch);
317	auto MaxVersion = MaxVersionForCudaArch(A: Arch);
318	if (Version < MinVersion \|\| Version > MaxVersion) {
319	ArchsWithBadVersion [(int)Arch] = true;
320	D.Diag(diag::DiagID: err_drv_cuda_version_unsupported)
321	<< CudaArchToString(A: Arch) << CudaVersionToString(V: MinVersion)
322	<< CudaVersionToString(V: MaxVersion) << InstallPath
323	<< CudaVersionToString(V: Version);
324	}
325	}
326
327	void CudaInstallationDetector::print(raw_ostream &OS) const {
328	if (isValid())
329	OS << "Found CUDA installation: " << InstallPath << ", version "
330	<< CudaVersionToString(V: Version) << "\n";
331	}
332
333	namespace {
334	/// Debug info level for the NVPTX devices. We may need to emit different debug
335	/// info level for the host and for the device itselfi. This type controls
336	/// emission of the debug info for the devices. It either prohibits disable info
337	/// emission completely, or emits debug directives only, or emits same debug
338	/// info as for the host.
339	enum DeviceDebugInfoLevel {
340	DisableDebugInfo, /// Do not emit debug info for the devices.
341	DebugDirectivesOnly, /// Emit only debug directives.
342	EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
343	/// host.
344	};
345	} // anonymous namespace
346
347	/// Define debug info level for the NVPTX devices. If the debug info for both
348	/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
349	/// only debug directives are requested for the both host and device
350	/// (-gline-directvies-only), or the debug info only for the device is disabled
351	/// (optimization is on and --cuda-noopt-device-debug was not specified), the
352	/// debug directves only must be emitted for the device. Otherwise, use the same
353	/// debug info level just like for the host (with the limitations of only
354	/// supported DWARF2 standard).
355	static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
356	const Arg *A = Args.getLastArg(options::OPT_O_Group);
357	bool IsDebugEnabled = !A \|\| A->getOption().matches(options::ID: OPT_O0) \|\|
358	Args.hasFlag(options::OPT_cuda_noopt_device_debug,
359	options::OPT_no_cuda_noopt_device_debug,
360	/Default=/false);
361	if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
362	const Option &Opt = A->getOption();
363	if (Opt.matches(options::ID: OPT_gN_Group)) {
364	if (Opt.matches(options::ID: OPT_g0) \|\| Opt.matches(options::ID: OPT_ggdb0))
365	return DisableDebugInfo;
366	if (Opt.matches(options::ID: OPT_gline_directives_only))
367	return DebugDirectivesOnly;
368	}
369	return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
370	}
371	return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
372	}
373
374	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
375	const InputInfo &Output,
376	const InputInfoList &Inputs,
377	const ArgList &Args,
378	const char LinkingOutput) const* {
379	const auto &TC =
380	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
381	assert(TC.getTriple().isNVPTX() && "Wrong platform");
382
383	StringRef GPUArchName;
384	// If this is a CUDA action we need to extract the device architecture
385	// from the Job's associated architecture, otherwise use the -march=arch
386	// option. This option may come from -Xopenmp-target flag or the default
387	// value.
388	if (JA.isDeviceOffloading(OKind: Action::OFK_Cuda)) {
389	GPUArchName = JA.getOffloadingArch();
390	} else {
391	GPUArchName = Args.getLastArgValue(options::Id: OPT_march_EQ);
392	if (GPUArchName.empty()) {
393	C.getDriver().Diag(diag::DiagID: err_drv_offload_missing_gpu_arch)
394	<< getToolChain().getArchName() << getShortName();
395	return;
396	}
397	}
398
399	// Obtain architecture from the action.
400	CudaArch gpu_arch = StringToCudaArch(S: GPUArchName);
401	assert(gpu_arch != CudaArch::UNKNOWN &&
402	"Device action expected to have an architecture.");
403
404	// Check that our installation's ptxas supports gpu_arch.
405	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
406	TC.CudaInstallation.CheckCudaVersionSupportsArch(Arch: gpu_arch);
407	}
408
409	ArgStringList CmdArgs;
410	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
411	DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
412	if (DIKind == EmitSameDebugInfoAsHost) {
413	// ptxas does not accept -g option if optimization is enabled, so
414	// we ignore the compiler's -O options if we want debug info.*
415	CmdArgs.push_back(Elt: "-g");
416	CmdArgs.push_back(Elt: "--dont-merge-basicblocks");
417	CmdArgs.push_back(Elt: "--return-at-end");
418	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
419	// Map the -O we received to -O{0,1,2,3}.
420	//
421	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
422	// default, so it may correspond more closely to the spirit of clang -O2.
423
424	// -O3 seems like the least-bad option when -Osomething is specified to
425	// clang but it isn't handled below.
426	StringRef OOpt = "3";
427	if (A->getOption().matches(options::ID: OPT_O4) \|\|
428	A->getOption().matches(options::ID: OPT_Ofast))
429	OOpt = "3";
430	else if (A->getOption().matches(options::ID: OPT_O0))
431	OOpt = "0";
432	else if (A->getOption().matches(options::ID: OPT_O)) {
433	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
434	OOpt = llvm::StringSwitch<const char *>(A->getValue())
435	.Case(S: "1", Value: "1")
436	.Case(S: "2", Value: "2")
437	.Case(S: "3", Value: "3")
438	.Case(S: "s", Value: "2")
439	.Case(S: "z", Value: "2")
440	.Default(Value: "2");
441	}
442	CmdArgs.push_back(Elt: Args.MakeArgString(Str: llvm::Twine ("-O") + OOpt));
443	} else {
444	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
445	// to no optimizations, but ptxas's default is -O3.
446	CmdArgs.push_back(Elt: "-O0");
447	}
448	if (DIKind == DebugDirectivesOnly)
449	CmdArgs.push_back(Elt: "-lineinfo");
450
451	// Pass -v to ptxas if it was passed to the driver.
452	if (Args.hasArg(options::OPT_v))
453	CmdArgs.push_back(Elt: "-v");
454
455	CmdArgs.push_back(Elt: "--gpu-name");
456	CmdArgs.push_back(Elt: Args.MakeArgString(Str: CudaArchToString(A: gpu_arch)));
457	CmdArgs.push_back(Elt: "--output-file");
458	std::string OutputFileName = TC.getInputFilename(Input: Output);
459
460	// If we are invoking `nvlink` internally we need to output a `.cubin` file.
461	// FIXME: This should hopefully be removed if NVIDIA updates their tooling.
462	if (!C.getInputArgs().getLastArg(options::OPT_c)) {
463	SmallString<`256`> Filename(Output.getFilename());
464	llvm::sys::path::replace_extension(path&: Filename, extension: "cubin");
465	OutputFileName = Filename.str();
466	}
467	if (Output.isFilename() && OutputFileName != Output.getFilename())
468	C.addTempFile(Name: Args.MakeArgString(Str: OutputFileName));
469
470	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OutputFileName));
471	for (const auto &II : Inputs)
472	CmdArgs.push_back(Elt: Args.MakeArgString(Str: II.getFilename()));
473
474	for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
475	CmdArgs.push_back(Args.MakeArgString(A));
476
477	bool Relocatable;
478	if (JA.isOffloading(OKind: Action::OFK_OpenMP))
479	// In OpenMP we need to generate relocatable code.
480	Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
481	options::OPT_fnoopenmp_relocatable_target,
482	/Default=/true);
483	else if (JA.isOffloading(OKind: Action::OFK_Cuda))
484	// In CUDA we generate relocatable code by default.
485	Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
486	/Default=/false);
487	else
488	// Otherwise, we are compiling directly and should create linkable output.
489	Relocatable = true;
490
491	if (Relocatable)
492	CmdArgs.push_back(Elt: "-c");
493
494	const char *Exec;
495	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
496	Exec = A->getValue();
497	else
498	Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "ptxas"));
499	C.addCommand(C: std::make_unique<Command>(
500	args: JA, args: *this,
501	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
502	.ResponseFlag: "--options-file"},
503	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
504	}
505
506	static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
507	// The new driver does not include PTX by default to avoid overhead.
508	bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
509	options::OPT_no_offload_new_driver, false);
510	for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
511	options::OPT_no_cuda_include_ptx_EQ)) {
512	A->claim();
513	const StringRef ArchStr = A->getValue();
514	if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) &&
515	(ArchStr == "all" \|\| ArchStr == InputArch))
516	includePTX = true;
517	else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) &&
518	(ArchStr == "all" \|\| ArchStr == InputArch))
519	includePTX = false;
520	}
521	return includePTX;
522	}
523
524	// All inputs to this linker must be from CudaDeviceActions, as we need to look
525	// at the Inputs' Actions in order to figure out which GPU architecture they
526	// correspond to.
527	void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
528	const InputInfo &Output,
529	const InputInfoList &Inputs,
530	const ArgList &Args,
531	const char LinkingOutput) const* {
532	const auto &TC =
533	static_cast<const toolchains::CudaToolChain &>(getToolChain());
534	assert(TC.getTriple().isNVPTX() && "Wrong platform");
535
536	ArgStringList CmdArgs;
537	if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
538	CmdArgs.push_back(Elt: "--cuda");
539	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-64" : "-32");
540	CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--create"));
541	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Output.getFilename()));
542	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
543	CmdArgs.push_back(Elt: "-g");
544
545	for (const auto &II : Inputs) {
546	auto *A = II.getAction();
547	assert(A->getInputs().size() == `1` &&
548	"Device offload action is expected to have a single input");
549	const char *gpu_arch_str = A->getOffloadingArch();
550	assert(gpu_arch_str &&
551	"Device action expected to have associated a GPU architecture!");
552	CudaArch gpu_arch = StringToCudaArch(S: gpu_arch_str);
553
554	if (II.getType() == types::TY_PP_Asm &&
555	!shouldIncludePTX(Args, InputArch: gpu_arch_str))
556	continue;
557	// We need to pass an Arch of the form "sm_XX" for cubin files and
558	// "compute_XX" for ptx.
559	const char *Arch = (II.getType() == types::TY_PP_Asm)
560	? CudaArchToVirtualArchString(A: gpu_arch)
561	: gpu_arch_str;
562	CmdArgs.push_back(
563	Elt: Args.MakeArgString(Str: llvm::Twine ("--image=profile=") + Arch +
564	",file=" + getToolChain().getInputFilename(Input: II)));
565	}
566
567	for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
568	CmdArgs.push_back(Args.MakeArgString(A));
569
570	const char *Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "fatbinary"));
571	C.addCommand(C: std::make_unique<Command>(
572	args: JA, args: *this,
573	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
574	.ResponseFlag: "--options-file"},
575	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
576	}
577
578	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
579	const InputInfo &Output,
580	const InputInfoList &Inputs,
581	const ArgList &Args,
582	const char LinkingOutput) const* {
583	const auto &TC =
584	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
585	ArgStringList CmdArgs;
586
587	assert(TC.getTriple().isNVPTX() && "Wrong platform");
588
589	assert((Output.isFilename() \|\| Output.isNothing()) && "Invalid output.");
590	if (Output.isFilename()) {
591	CmdArgs.push_back(Elt: "-o");
592	CmdArgs.push_back(Elt: Output.getFilename());
593	}
594
595	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
596	CmdArgs.push_back(Elt: "-g");
597
598	if (Args.hasArg(options::OPT_v))
599	CmdArgs.push_back(Elt: "-v");
600
601	StringRef GPUArch = Args.getLastArgValue(options::Id: OPT_march_EQ);
602	if (GPUArch.empty()) {
603	C.getDriver().Diag(diag::DiagID: err_drv_offload_missing_gpu_arch)
604	<< getToolChain().getArchName() << getShortName();
605	return;
606	}
607
608	CmdArgs.push_back(Elt: "-arch");
609	CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch));
610
611	// Add paths specified in LIBRARY_PATH environment variable as -L options.
612	addDirectoryList(Args, CmdArgs, ArgName: "-L", EnvVar: "LIBRARY_PATH");
613
614	// Add standard library search paths passed on the command line.
615	Args.AddAllArgs(Output&: CmdArgs, options::Id0: OPT_L);
616	getToolChain().AddFilePathLibArgs(Args, CmdArgs);
617
618	// Add paths for the default clang library path.
619	SmallString<`256`> DefaultLibPath =
620	llvm::sys::path::parent_path(path: TC.getDriver().Dir);
621	llvm::sys::path::append(path&: DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
622	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Twine("-L") + DefaultLibPath));
623
624	for (const auto &II : Inputs) {
625	if (II.getType() == types::TY_LLVM_IR \|\| II.getType() == types::TY_LTO_IR \|\|
626	II.getType() == types::TY_LTO_BC \|\| II.getType() == types::TY_LLVM_BC) {
627	C.getDriver().Diag(diag::DiagID: err_drv_no_linker_llvm_support)
628	<< getToolChain().getTripleString();
629	continue;
630	}
631
632	// The 'nvlink' application performs RDC-mode linking when given a '.o'
633	// file and device linking when given a '.cubin' file. We always want to
634	// perform device linking, so just rename any '.o' files.
635	// FIXME: This should hopefully be removed if NVIDIA updates their tooling.
636	if (II.isFilename()) {
637	auto InputFile = getToolChain().getInputFilename(Input: II);
638	if (llvm::sys::path::extension(path: InputFile) != ".cubin") {
639	// If there are no actions above this one then this is direct input and
640	// we can copy it. Otherwise the input is internal so a `.cubin` file
641	// should exist.
642	if (II.getAction() && II.getAction()->getInputs().size() == `0`) {
643	const char *CubinF =
644	Args.MakeArgString(Str: getToolChain().getDriver().GetTemporaryPath(
645	Prefix: llvm::sys::path::stem(path: InputFile), Suffix: "cubin"));
646	if (llvm::sys::fs::copy_file(From: InputFile, To: C.addTempFile(Name: CubinF)))
647	continue;
648
649	CmdArgs.push_back(Elt: CubinF);
650	} else {
651	SmallString<`256`> Filename(InputFile);
652	llvm::sys::path::replace_extension(path&: Filename, extension: "cubin");
653	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Filename));
654	}
655	} else {
656	CmdArgs.push_back(Elt: Args.MakeArgString(Str: InputFile));
657	}
658	} else if (!II.isNothing()) {
659	II.getInputArg().renderAsInput(Args, Output&: CmdArgs);
660	}
661	}
662
663	C.addCommand(C: std::make_unique<Command>(
664	args: JA, args: *this,
665	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
666	.ResponseFlag: "--options-file"},
667	args: Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "nvlink")), args&: CmdArgs,
668	args: Inputs, args: Output));
669	}
670
671	void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
672	const llvm::opt::ArgList &Args,
673	std::vector<StringRef> &Features) {
674	if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
675	StringRef PtxFeature =
676	Args.getLastArgValue(options::Id: OPT_cuda_feature_EQ, Default: "+ptx42");
677	Features.push_back(x: Args.MakeArgString(Str: PtxFeature));
678	return;
679	}
680	CudaInstallationDetector CudaInstallation(D, Triple, Args);
681
682	// New CUDA versions often introduce new instructions that are only supported
683	// by new PTX version, so we need to raise PTX level to enable them in NVPTX
684	// back-end.
685	const char PtxFeature = nullptr*;
686	switch (CudaInstallation.version()) {
687	#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
688	case CudaVersion::CUDA_##CUDA_VER: \
689	PtxFeature = "+ptx" #PTX_VER; \
690	break;
691	CASE_CUDA_VERSION(`123`, `83`);
692	CASE_CUDA_VERSION(`122`, `82`);
693	CASE_CUDA_VERSION(`121`, `81`);
694	CASE_CUDA_VERSION(`120`, `80`);
695	CASE_CUDA_VERSION(`118`, `78`);
696	CASE_CUDA_VERSION(`117`, `77`);
697	CASE_CUDA_VERSION(`116`, `76`);
698	CASE_CUDA_VERSION(`115`, `75`);
699	CASE_CUDA_VERSION(`114`, `74`);
700	CASE_CUDA_VERSION(`113`, `73`);
701	CASE_CUDA_VERSION(`112`, `72`);
702	CASE_CUDA_VERSION(`111`, `71`);
703	CASE_CUDA_VERSION(`110`, `70`);
704	CASE_CUDA_VERSION(`102`, `65`);
705	CASE_CUDA_VERSION(`101`, `64`);
706	CASE_CUDA_VERSION(`100`, `63`);
707	CASE_CUDA_VERSION(`92`, `61`);
708	CASE_CUDA_VERSION(`91`, `61`);
709	CASE_CUDA_VERSION(`90`, `60`);
710	#undef CASE_CUDA_VERSION
711	default:
712	PtxFeature = "+ptx42";
713	}
714	Features.push_back(x: PtxFeature);
715	}
716
717	/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
718	/// operates as a stand-alone version of the NVPTX tools without the host
719	/// toolchain.
720	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
721	const llvm::Triple &HostTriple,
722	const ArgList &Args, bool Freestanding = false)
723	: ToolChain (D, Triple, Args), CudaInstallation (D, HostTriple, Args),
724	Freestanding(Freestanding) {
725	if (CudaInstallation.isValid())
726	getProgramPaths().push_back(Elt: std::string (CudaInstallation.getBinPath()));
727	// Lookup binaries into the driver directory, this is used to
728	// discover the 'nvptx-arch' executable.
729	getProgramPaths().push_back(Elt: getDriver().Dir);
730	}
731
732	/// We only need the host triple to locate the CUDA binary utilities, use the
733	/// system's default triple if not provided.
734	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
735	const ArgList &Args)
736	: NVPTXToolChain (D, Triple, llvm::Triple (LLVM_HOST_TRIPLE), Args,
737	/Freestanding=/true) {}
738
739	llvm::opt::DerivedArgList *
740	NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
741	StringRef BoundArch,
742	Action::OffloadKind OffloadKind) const {
743	DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind: OffloadKind);
744	if (!DAL)
745	DAL = new DerivedArgList (Args.getBaseArgs());
746
747	const OptTable &Opts = getDriver().getOpts();
748
749	for (Arg *A : Args)
750	if (!llvm::is_contained(Range&: *DAL, Element: A))
751	DAL->append(A);
752
753	if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
754	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
755	CudaArchToString(CudaArch::CudaDefault));
756	} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" &&
757	OffloadKind == Action::OFK_None) {
758	DAL->eraseArg(options::OPT_march_EQ);
759	} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
760	auto GPUsOrErr = getSystemGPUArchs(Args);
761	if (!GPUsOrErr) {
762	getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
763	<< getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
764	} else {
765	if (GPUsOrErr->size() > `1`)
766	getDriver().Diag(diag::warn_drv_multi_gpu_arch)
767	<< getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march";
768	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
769	Args.MakeArgString(GPUsOrErr->front()));
770	}
771	}
772
773	return DAL;
774	}
775
776	void NVPTXToolChain::addClangTargetOptions(
777	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
778	Action::OffloadKind DeviceOffloadingKind) const {
779	// If we are compiling with a standalone NVPTX toolchain we want to try to
780	// mimic a standard environment as much as possible. So we enable lowering
781	// ctor / dtor functions to global symbols that can be registered.
782	if (Freestanding)
783	CC1Args.append(IL: {"-mllvm", "--nvptx-lower-global-ctor-dtor"});
784	}
785
786	bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg A) const* {
787	const Option &O = A->getOption();
788	return (O.matches(options::OPT_gN_Group) &&
789	!O.matches(options::OPT_gmodules)) \|\|
790	O.matches(options::OPT_g_Flag) \|\|
791	O.matches(options::OPT_ggdbN_Group) \|\| O.matches(options::OPT_ggdb) \|\|
792	O.matches(options::OPT_gdwarf) \|\| O.matches(options::OPT_gdwarf_2) \|\|
793	O.matches(options::OPT_gdwarf_3) \|\| O.matches(options::OPT_gdwarf_4) \|\|
794	O.matches(options::OPT_gdwarf_5) \|\|
795	O.matches(options::OPT_gcolumn_info);
796	}
797
798	void NVPTXToolChain::adjustDebugInfoKind(
799	llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
800	const ArgList &Args) const {
801	switch (mustEmitDebugInfo(Args)) {
802	case DisableDebugInfo:
803	DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
804	break;
805	case DebugDirectivesOnly:
806	DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
807	break;
808	case EmitSameDebugInfoAsHost:
809	// Use same debug info level as the host.
810	break;
811	}
812	}
813
814	Expected<SmallVector<std::string>>
815	NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
816	// Detect NVIDIA GPUs availible on the system.
817	std::string Program;
818	if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
819	Program = A->getValue();
820	else
821	Program = GetProgramPath(Name: "nvptx-arch");
822
823	auto StdoutOrErr = executeToolChainProgram(Executable: Program);
824	if (!StdoutOrErr)
825	return StdoutOrErr.takeError();
826
827	SmallVector<std::string, `1`> GPUArchs;
828	for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n"))
829	if (!Arch.empty())
830	GPUArchs.push_back(Elt: Arch.str());
831
832	if (GPUArchs.empty())
833	return llvm::createStringError(EC: std::error_code (),
834	Msg: "No NVIDIA GPU detected in the system");
835
836	return std::move(GPUArchs);
837	}
838
839	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
840	/// which isn't properly a linker but nonetheless performs the step of stitching
841	/// together object files from the assembler into a single blob.
842
843	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
844	const ToolChain &HostTC, const ArgList &Args)
845	: NVPTXToolChain (D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
846
847	void CudaToolChain::addClangTargetOptions(
848	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
849	Action::OffloadKind DeviceOffloadingKind) const {
850	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind);
851
852	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
853	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
854	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
855	DeviceOffloadingKind == Action::OFK_Cuda) &&
856	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
857
858	if (DeviceOffloadingKind == Action::OFK_Cuda) {
859	CC1Args.append(
860	IL: {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
861
862	// Unsized function arguments used for variadics were introduced in CUDA-9.0
863	// We still do not support generating code that actually uses variadic
864	// arguments yet, but we do need to allow parsing them as recent CUDA
865	// headers rely on that. https://github.com/llvm/llvm-project/issues/58410
866	if (CudaInstallation.version() >= CudaVersion::CUDA_90)
867	CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions");
868	}
869
870	if (DriverArgs.hasArg(options::OPT_nogpulib))
871	return;
872
873	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
874	DriverArgs.hasArg(options::OPT_S))
875	return;
876
877	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Gpu: GpuArch);
878	if (LibDeviceFile.empty()) {
879	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
880	return;
881	}
882
883	CC1Args.push_back(Elt: "-mlink-builtin-bitcode");
884	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: LibDeviceFile));
885
886	clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
887
888	if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
889	options::OPT_fno_cuda_short_ptr, false))
890	CC1Args.append(IL: {"-mllvm", "--nvptx-short-ptr"});
891
892	if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
893	CC1Args.push_back(
894	Elt: DriverArgs.MakeArgString(Str: Twine("-target-sdk-version=") +
895	CudaVersionToString(V: CudaInstallationVersion)));
896
897	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
898	if (CudaInstallationVersion < CudaVersion::CUDA_92) {
899	getDriver().Diag(
900	diag::err_drv_omp_offload_target_cuda_version_not_support)
901	<< CudaVersionToString(CudaInstallationVersion);
902	return;
903	}
904
905	// Link the bitcode library late if we're using device LTO.
906	if (getDriver().isUsingLTO(/ IsOffload / true))
907	return;
908
909	addOpenMPDeviceRTL(D: getDriver(), DriverArgs, CC1Args, BitcodeSuffix: GpuArch.str(),
910	Triple: getTriple(), HostTC);
911	}
912	}
913
914	llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
915	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
916	const llvm::fltSemantics FPType) const* {
917	if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
918	if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
919	DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
920	options::OPT_fno_gpu_flush_denormals_to_zero, false))
921	return llvm::DenormalMode::getPreserveSign();
922	}
923
924	assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
925	return llvm::DenormalMode::getIEEE();
926	}
927
928	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
929	ArgStringList &CC1Args) const {
930	// Check our CUDA version if we're going to include the CUDA headers.
931	if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
932	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
933	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
934	assert(!Arch.empty() && "Must have an explicit GPU arch.");
935	CudaInstallation.CheckCudaVersionSupportsArch(Arch: StringToCudaArch(S: Arch));
936	}
937	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
938	}
939
940	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
941	// Only object files are changed, for example assembly files keep their .s
942	// extensions. If the user requested device-only compilation don't change it.
943	if (Input.getType() != types::TY_Object \|\| getDriver().offloadDeviceOnly())
944	return ToolChain::getInputFilename(Input);
945
946	// Replace extension for object files with cubin because nvlink relies on
947	// these particular file names.
948	SmallString<`256`> Filename(ToolChain::getInputFilename(Input));
949	llvm::sys::path::replace_extension(path&: Filename, extension: "cubin");
950	return std::string(Filename);
951	}
952
953	llvm::opt::DerivedArgList *
954	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
955	StringRef BoundArch,
956	Action::OffloadKind DeviceOffloadKind) const {
957	DerivedArgList *DAL =
958	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
959	if (!DAL)
960	DAL = new DerivedArgList (Args.getBaseArgs());
961
962	const OptTable &Opts = getDriver().getOpts();
963
964	// For OpenMP device offloading, append derived arguments. Make sure
965	// flags are not duplicated.
966	// Also append the compute capability.
967	if (DeviceOffloadKind == Action::OFK_OpenMP) {
968	for (Arg *A : Args)
969	if (!llvm::is_contained(Range&: *DAL, Element: A))
970	DAL->append(A);
971
972	if (!DAL->hasArg(options::OPT_march_EQ)) {
973	StringRef Arch = BoundArch;
974	if (Arch.empty()) {
975	auto ArchsOrErr = getSystemGPUArchs(Args);
976	if (!ArchsOrErr) {
977	std::string ErrMsg =
978	llvm::formatv(Fmt: "{0}", Vals: llvm::fmt_consume(Item: ArchsOrErr.takeError()));
979	getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
980	<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
981	Arch = CudaArchToString(A: CudaArch::CudaDefault);
982	} else {
983	Arch = Args.MakeArgString(Str: ArchsOrErr ->front());
984	}
985	}
986	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
987	}
988
989	return DAL;
990	}
991
992	for (Arg *A : Args) {
993	// Make sure flags are not duplicated.
994	if (!llvm::is_contained(Range&: *DAL, Element: A)) {
995	DAL->append(A);
996	}
997	}
998
999	if (!BoundArch.empty()) {
1000	DAL->eraseArg(options::OPT_march_EQ);
1001	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
1002	BoundArch);
1003	}
1004	return DAL;
1005	}
1006
1007	Tool NVPTXToolChain::buildAssembler() const* {
1008	return new tools::NVPTX::Assembler (*this);
1009	}
1010
1011	Tool NVPTXToolChain::buildLinker() const* {
1012	return new tools::NVPTX::Linker (*this);
1013	}
1014
1015	Tool CudaToolChain::buildAssembler() const* {
1016	return new tools::NVPTX::Assembler (*this);
1017	}
1018
1019	Tool CudaToolChain::buildLinker() const* {
1020	return new tools::NVPTX::FatBinary (*this);
1021	}
1022
1023	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1024	HostTC.addClangWarningOptions(CC1Args);
1025	}
1026
1027	ToolChain::CXXStdlibType
1028	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
1029	return HostTC.GetCXXStdlibType(Args);
1030	}
1031
1032	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1033	ArgStringList &CC1Args) const {
1034	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1035
1036	if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid())
1037	CC1Args.append(
1038	IL: {"-internal-isystem",
1039	DriverArgs.MakeArgString(Str: CudaInstallation.getIncludePath())});
1040	}
1041
1042	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1043	ArgStringList &CC1Args) const {
1044	HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
1045	}
1046
1047	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1048	ArgStringList &CC1Args) const {
1049	HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
1050	}
1051
1052	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
1053	// The CudaToolChain only supports sanitizers in the sense that it allows
1054	// sanitizer arguments on the command line if they are supported by the host
1055	// toolchain. The CudaToolChain will actually ignore any command line
1056	// arguments for any of these "supported" sanitizers. That means that no
1057	// sanitization of device code is actually supported at this time.
1058	//
1059	// This behavior is necessary because the host and device toolchains
1060	// invocations often share the command line, so the device toolchain must
1061	// tolerate flags meant only for the host toolchain.
1062	return HostTC.getSupportedSanitizers();
1063	}
1064
1065	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1066	const ArgList &Args) const {
1067	return HostTC.computeMSVCVersion(D, Args);
1068	}
1069

source code of clang/lib/Driver/ToolChains/Cuda.cpp