1 | //===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===---------------------------------------------------------------------===// |
8 | // |
9 | // This tool works as a wrapper over a linking job. This tool is used to create |
10 | // linked device images for offloading. It scans the linker's input for embedded |
11 | // device offloading data stored in sections `.llvm.offloading` and extracts it |
12 | // as a temporary file. The extracted device files will then be passed to a |
13 | // device linking job to create a final device image. |
14 | // |
15 | //===---------------------------------------------------------------------===// |
16 | |
17 | #include "clang/Basic/Version.h" |
18 | #include "llvm/BinaryFormat/Magic.h" |
19 | #include "llvm/Bitcode/BitcodeWriter.h" |
20 | #include "llvm/CodeGen/CommandFlags.h" |
21 | #include "llvm/Frontend/Offloading/OffloadWrapper.h" |
22 | #include "llvm/Frontend/Offloading/Utility.h" |
23 | #include "llvm/IR/Constants.h" |
24 | #include "llvm/IR/DiagnosticPrinter.h" |
25 | #include "llvm/IR/Module.h" |
26 | #include "llvm/IRReader/IRReader.h" |
27 | #include "llvm/LTO/LTO.h" |
28 | #include "llvm/MC/TargetRegistry.h" |
29 | #include "llvm/Object/Archive.h" |
30 | #include "llvm/Object/ArchiveWriter.h" |
31 | #include "llvm/Object/Binary.h" |
32 | #include "llvm/Object/ELFObjectFile.h" |
33 | #include "llvm/Object/IRObjectFile.h" |
34 | #include "llvm/Object/ObjectFile.h" |
35 | #include "llvm/Object/OffloadBinary.h" |
36 | #include "llvm/Option/ArgList.h" |
37 | #include "llvm/Option/OptTable.h" |
38 | #include "llvm/Option/Option.h" |
39 | #include "llvm/Support/CommandLine.h" |
40 | #include "llvm/Support/Errc.h" |
41 | #include "llvm/Support/FileOutputBuffer.h" |
42 | #include "llvm/Support/FileSystem.h" |
43 | #include "llvm/Support/InitLLVM.h" |
44 | #include "llvm/Support/MemoryBuffer.h" |
45 | #include "llvm/Support/Parallel.h" |
46 | #include "llvm/Support/Path.h" |
47 | #include "llvm/Support/Program.h" |
48 | #include "llvm/Support/Signals.h" |
49 | #include "llvm/Support/SourceMgr.h" |
50 | #include "llvm/Support/StringSaver.h" |
51 | #include "llvm/Support/TargetSelect.h" |
52 | #include "llvm/Support/WithColor.h" |
53 | #include "llvm/Support/raw_ostream.h" |
54 | #include "llvm/Target/TargetMachine.h" |
55 | #include "llvm/TargetParser/Host.h" |
56 | #include <atomic> |
57 | #include <optional> |
58 | |
59 | using namespace llvm; |
60 | using namespace llvm::opt; |
61 | using namespace llvm::object; |
62 | |
63 | /// Path of the current binary. |
64 | static const char *LinkerExecutable; |
65 | |
66 | /// Ssave intermediary results. |
67 | static bool SaveTemps = false; |
68 | |
69 | /// Print arguments without executing. |
70 | static bool DryRun = false; |
71 | |
72 | /// Print verbose output. |
73 | static bool Verbose = false; |
74 | |
75 | /// Filename of the executable being created. |
76 | static StringRef ExecutableName; |
77 | |
78 | /// Binary path for the CUDA installation. |
79 | static std::string CudaBinaryPath; |
80 | |
81 | /// Mutex lock to protect writes to shared TempFiles in parallel. |
82 | static std::mutex TempFilesMutex; |
83 | |
84 | /// Temporary files created by the linker wrapper. |
85 | static std::list<SmallString<128>> TempFiles; |
86 | |
87 | /// Codegen flags for LTO backend. |
88 | static codegen::RegisterCodeGenFlags CodeGenFlags; |
89 | |
90 | /// Global flag to indicate that the LTO pipeline threw an error. |
91 | static std::atomic<bool> LTOError; |
92 | |
93 | using OffloadingImage = OffloadBinary::OffloadingImage; |
94 | |
95 | namespace llvm { |
96 | // Provide DenseMapInfo so that OffloadKind can be used in a DenseMap. |
97 | template <> struct DenseMapInfo<OffloadKind> { |
98 | static inline OffloadKind getEmptyKey() { return OFK_LAST; } |
99 | static inline OffloadKind getTombstoneKey() { |
100 | return static_cast<OffloadKind>(OFK_LAST + 1); |
101 | } |
102 | static unsigned getHashValue(const OffloadKind &Val) { return Val; } |
103 | |
104 | static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) { |
105 | return LHS == RHS; |
106 | } |
107 | }; |
108 | } // namespace llvm |
109 | |
110 | namespace { |
111 | using std::error_code; |
112 | |
113 | /// Must not overlap with llvm::opt::DriverFlag. |
114 | enum WrapperFlags { |
115 | WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper. |
116 | DeviceOnlyOption = (1 << 5), // Options only used for device linking. |
117 | }; |
118 | |
119 | enum ID { |
120 | OPT_INVALID = 0, // This is not an option ID. |
121 | #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), |
122 | #include "LinkerWrapperOpts.inc" |
123 | LastOption |
124 | #undef OPTION |
125 | }; |
126 | |
127 | #define PREFIX(NAME, VALUE) \ |
128 | static constexpr StringLiteral NAME##_init[] = VALUE; \ |
129 | static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \ |
130 | std::size(NAME##_init) - 1); |
131 | #include "LinkerWrapperOpts.inc" |
132 | #undef PREFIX |
133 | |
134 | static constexpr OptTable::Info InfoTable[] = { |
135 | #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), |
136 | #include "LinkerWrapperOpts.inc" |
137 | #undef OPTION |
138 | }; |
139 | |
140 | class WrapperOptTable : public opt::GenericOptTable { |
141 | public: |
142 | WrapperOptTable() : opt::GenericOptTable(InfoTable) {} |
143 | }; |
144 | |
145 | const OptTable &getOptTable() { |
146 | static const WrapperOptTable *Table = []() { |
147 | auto Result = std::make_unique<WrapperOptTable>(); |
148 | return Result.release(); |
149 | }(); |
150 | return *Table; |
151 | } |
152 | |
153 | void printCommands(ArrayRef<StringRef> CmdArgs) { |
154 | if (CmdArgs.empty()) |
155 | return; |
156 | |
157 | llvm::errs() << " \"" << CmdArgs.front() << "\" " ; |
158 | for (auto IC = std::next(CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC) |
159 | llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n" ); |
160 | } |
161 | |
162 | [[noreturn]] void reportError(Error E) { |
163 | outs().flush(); |
164 | logAllUnhandledErrors(std::move(E), |
165 | WithColor::error(OS&: errs(), Prefix: LinkerExecutable)); |
166 | exit(EXIT_FAILURE); |
167 | } |
168 | |
169 | /// Create an extra user-specified \p OffloadFile. |
170 | /// TODO: We should find a way to wrap these as libraries instead. |
171 | Expected<OffloadFile> getInputBitcodeLibrary(StringRef Input) { |
172 | auto [Device, Path] = StringRef(Input).split(Separator: '='); |
173 | auto [String, Arch] = Device.rsplit(Separator: '-'); |
174 | auto [Kind, Triple] = String.split(Separator: '-'); |
175 | |
176 | llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> ImageOrError = |
177 | llvm::MemoryBuffer::getFileOrSTDIN(Path); |
178 | if (std::error_code EC = ImageOrError.getError()) |
179 | return createFileError(F: Path, EC); |
180 | |
181 | OffloadingImage Image{}; |
182 | Image.TheImageKind = IMG_Bitcode; |
183 | Image.TheOffloadKind = getOffloadKind(Name: Kind); |
184 | Image.StringData["triple" ] = Triple; |
185 | Image.StringData["arch" ] = Arch; |
186 | Image.Image = std::move(*ImageOrError); |
187 | |
188 | std::unique_ptr<MemoryBuffer> Binary = |
189 | MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image)); |
190 | auto NewBinaryOrErr = OffloadBinary::create(*Binary); |
191 | if (!NewBinaryOrErr) |
192 | return NewBinaryOrErr.takeError(); |
193 | return OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary)); |
194 | } |
195 | |
196 | std::string getMainExecutable(const char *Name) { |
197 | void *Ptr = (void *)(intptr_t)&getMainExecutable; |
198 | auto COWPath = sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr); |
199 | return sys::path::parent_path(path: COWPath).str(); |
200 | } |
201 | |
202 | /// Get a temporary filename suitable for output. |
203 | Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) { |
204 | std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex); |
205 | SmallString<128> OutputFile; |
206 | if (SaveTemps) { |
207 | (Prefix + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile); |
208 | } else { |
209 | if (std::error_code EC = |
210 | sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) |
211 | return createFileError(OutputFile, EC); |
212 | } |
213 | |
214 | TempFiles.emplace_back(std::move(OutputFile)); |
215 | return TempFiles.back(); |
216 | } |
217 | |
218 | /// Execute the command \p ExecutablePath with the arguments \p Args. |
219 | Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) { |
220 | if (Verbose || DryRun) |
221 | printCommands(CmdArgs: Args); |
222 | |
223 | if (!DryRun) |
224 | if (sys::ExecuteAndWait(Program: ExecutablePath, Args)) |
225 | return createStringError(EC: inconvertibleErrorCode(), |
226 | S: "'" + sys::path::filename(path: ExecutablePath) + "'" + |
227 | " failed" ); |
228 | return Error::success(); |
229 | } |
230 | |
231 | Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) { |
232 | |
233 | ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths); |
234 | if (!Path) |
235 | Path = sys::findProgramByName(Name); |
236 | if (!Path && DryRun) |
237 | return Name.str(); |
238 | if (!Path) |
239 | return createStringError(Path.getError(), |
240 | "Unable to find '" + Name + "' in path" ); |
241 | return *Path; |
242 | } |
243 | |
244 | /// Returns the hashed value for a constant string. |
245 | std::string getHash(StringRef Str) { |
246 | llvm::MD5 Hasher; |
247 | llvm::MD5::MD5Result Hash; |
248 | Hasher.update(Str); |
249 | Hasher.final(Result&: Hash); |
250 | return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true); |
251 | } |
252 | |
253 | /// Renames offloading entry sections in a relocatable link so they do not |
254 | /// conflict with a later link job. |
255 | Error relocateOffloadSection(const ArgList &Args, StringRef Output) { |
256 | llvm::Triple Triple( |
257 | Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); |
258 | if (Triple.isOSWindows()) |
259 | return createStringError( |
260 | EC: inconvertibleErrorCode(), |
261 | Msg: "Relocatable linking is not supported on COFF targets" ); |
262 | |
263 | Expected<std::string> ObjcopyPath = |
264 | findProgram("llvm-objcopy" , {getMainExecutable("llvm-objcopy" )}); |
265 | if (!ObjcopyPath) |
266 | return ObjcopyPath.takeError(); |
267 | |
268 | // Use the linker output file to get a unique hash. This creates a unique |
269 | // identifier to rename the sections to that is deterministic to the contents. |
270 | auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "" ) |
271 | : MemoryBuffer::getFileOrSTDIN(Output); |
272 | if (!BufferOrErr) |
273 | return createStringError(inconvertibleErrorCode(), "Failed to open %s" , |
274 | Output.str().c_str()); |
275 | std::string Suffix = "_" + getHash((*BufferOrErr)->getBuffer()); |
276 | |
277 | SmallVector<StringRef> ObjcopyArgs = { |
278 | *ObjcopyPath, |
279 | Output, |
280 | }; |
281 | |
282 | // Remove the old .llvm.offloading section to prevent further linking. |
283 | ObjcopyArgs.emplace_back("--remove-section" ); |
284 | ObjcopyArgs.emplace_back(".llvm.offloading" ); |
285 | for (StringRef Prefix : {"omp" , "cuda" , "hip" }) { |
286 | auto Section = (Prefix + "_offloading_entries" ).str(); |
287 | // Rename the offloading entires to make them private to this link unit. |
288 | ObjcopyArgs.emplace_back("--rename-section" ); |
289 | ObjcopyArgs.emplace_back( |
290 | Args.MakeArgString(Section + "=" + Section + Suffix)); |
291 | |
292 | // Rename the __start_ / __stop_ symbols appropriately to iterate over the |
293 | // newly renamed section containing the offloading entries. |
294 | ObjcopyArgs.emplace_back("--redefine-sym" ); |
295 | ObjcopyArgs.emplace_back(Args.MakeArgString("__start_" + Section + "=" + |
296 | "__start_" + Section + Suffix)); |
297 | ObjcopyArgs.emplace_back("--redefine-sym" ); |
298 | ObjcopyArgs.emplace_back(Args.MakeArgString("__stop_" + Section + "=" + |
299 | "__stop_" + Section + Suffix)); |
300 | } |
301 | |
302 | if (Error Err = executeCommands(*ObjcopyPath, ObjcopyArgs)) |
303 | return Err; |
304 | |
305 | return Error::success(); |
306 | } |
307 | |
308 | /// Runs the wrapped linker job with the newly created input. |
309 | Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) { |
310 | llvm::TimeTraceScope TimeScope("Execute host linker" ); |
311 | |
312 | // Render the linker arguments and add the newly created image. We add it |
313 | // after the output file to ensure it is linked with the correct libraries. |
314 | StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ); |
315 | ArgStringList NewLinkerArgs; |
316 | for (const opt::Arg *Arg : Args) { |
317 | // Do not forward arguments only intended for the linker wrapper. |
318 | if (Arg->getOption().hasFlag(WrapperOnlyOption)) |
319 | continue; |
320 | |
321 | Arg->render(Args, NewLinkerArgs); |
322 | if (Arg->getOption().matches(OPT_o) || Arg->getOption().matches(OPT_out)) |
323 | llvm::transform(Files, std::back_inserter(NewLinkerArgs), |
324 | [&](StringRef Arg) { return Args.MakeArgString(Arg); }); |
325 | } |
326 | |
327 | SmallVector<StringRef> LinkerArgs({LinkerPath}); |
328 | for (StringRef Arg : NewLinkerArgs) |
329 | LinkerArgs.push_back(Arg); |
330 | if (Error Err = executeCommands(LinkerPath, LinkerArgs)) |
331 | return Err; |
332 | |
333 | if (Args.hasArg(OPT_relocatable)) |
334 | return relocateOffloadSection(Args, Output: ExecutableName); |
335 | |
336 | return Error::success(); |
337 | } |
338 | |
339 | void printVersion(raw_ostream &OS) { |
340 | OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper" ) << '\n'; |
341 | } |
342 | |
343 | namespace nvptx { |
344 | Expected<StringRef> |
345 | fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, |
346 | const ArgList &Args) { |
347 | llvm::TimeTraceScope TimeScope("NVPTX fatbinary" ); |
348 | // NVPTX uses the fatbinary program to bundle the linked images. |
349 | Expected<std::string> FatBinaryPath = |
350 | findProgram("fatbinary" , {CudaBinaryPath + "/bin" }); |
351 | if (!FatBinaryPath) |
352 | return FatBinaryPath.takeError(); |
353 | |
354 | llvm::Triple Triple( |
355 | Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); |
356 | |
357 | // Create a new file to write the linked device image to. |
358 | auto TempFileOrErr = |
359 | createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "fatbin" ); |
360 | if (!TempFileOrErr) |
361 | return TempFileOrErr.takeError(); |
362 | |
363 | SmallVector<StringRef, 16> CmdArgs; |
364 | CmdArgs.push_back(*FatBinaryPath); |
365 | CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32" ); |
366 | CmdArgs.push_back("--create" ); |
367 | CmdArgs.push_back(*TempFileOrErr); |
368 | for (const auto &[File, Arch] : InputFiles) |
369 | CmdArgs.push_back( |
370 | Args.MakeArgString("--image=profile=" + Arch + ",file=" + File)); |
371 | |
372 | if (Error Err = executeCommands(*FatBinaryPath, CmdArgs)) |
373 | return std::move(Err); |
374 | |
375 | return *TempFileOrErr; |
376 | } |
377 | } // namespace nvptx |
378 | |
379 | namespace amdgcn { |
380 | Expected<StringRef> |
381 | fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, |
382 | const ArgList &Args) { |
383 | llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary" ); |
384 | |
385 | // AMDGPU uses the clang-offload-bundler to bundle the linked images. |
386 | Expected<std::string> OffloadBundlerPath = findProgram( |
387 | "clang-offload-bundler" , {getMainExecutable("clang-offload-bundler" )}); |
388 | if (!OffloadBundlerPath) |
389 | return OffloadBundlerPath.takeError(); |
390 | |
391 | llvm::Triple Triple( |
392 | Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); |
393 | |
394 | // Create a new file to write the linked device image to. |
395 | auto TempFileOrErr = |
396 | createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "hipfb" ); |
397 | if (!TempFileOrErr) |
398 | return TempFileOrErr.takeError(); |
399 | |
400 | BumpPtrAllocator Alloc; |
401 | StringSaver Saver(Alloc); |
402 | |
403 | SmallVector<StringRef, 16> CmdArgs; |
404 | CmdArgs.push_back(*OffloadBundlerPath); |
405 | CmdArgs.push_back("-type=o" ); |
406 | CmdArgs.push_back("-bundle-align=4096" ); |
407 | |
408 | if (Args.hasArg(OPT_compress)) |
409 | CmdArgs.push_back("-compress" ); |
410 | if (auto *Arg = Args.getLastArg(OPT_compression_level_eq)) |
411 | CmdArgs.push_back( |
412 | Args.MakeArgString(Str: Twine("-compression-level=" ) + Arg->getValue())); |
413 | |
414 | SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux" }; |
415 | for (const auto &[File, Arch] : InputFiles) |
416 | Targets.push_back(Saver.save("hipv4-amdgcn-amd-amdhsa--" + Arch)); |
417 | CmdArgs.push_back(Saver.save(llvm::join(Targets, "," ))); |
418 | |
419 | #ifdef _WIN32 |
420 | CmdArgs.push_back("-input=NUL" ); |
421 | #else |
422 | CmdArgs.push_back("-input=/dev/null" ); |
423 | #endif |
424 | for (const auto &[File, Arch] : InputFiles) |
425 | CmdArgs.push_back(Saver.save("-input=" + File)); |
426 | |
427 | CmdArgs.push_back(Saver.save(S: "-output=" + *TempFileOrErr)); |
428 | |
429 | if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs)) |
430 | return std::move(Err); |
431 | |
432 | return *TempFileOrErr; |
433 | } |
434 | } // namespace amdgcn |
435 | |
436 | namespace generic { |
437 | Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { |
438 | llvm::TimeTraceScope TimeScope("Clang" ); |
439 | // Use `clang` to invoke the appropriate device tools. |
440 | Expected<std::string> ClangPath = |
441 | findProgram("clang" , {getMainExecutable("clang" )}); |
442 | if (!ClangPath) |
443 | return ClangPath.takeError(); |
444 | |
445 | const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); |
446 | StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); |
447 | if (Arch.empty()) |
448 | Arch = "native" ; |
449 | // Create a new file to write the linked device image to. Assume that the |
450 | // input filename already has the device and architecture. |
451 | auto TempFileOrErr = |
452 | createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." + |
453 | Triple.getArchName() + "." + Arch, |
454 | Extension: "img" ); |
455 | if (!TempFileOrErr) |
456 | return TempFileOrErr.takeError(); |
457 | |
458 | StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2" ); |
459 | SmallVector<StringRef, 16> CmdArgs{ |
460 | *ClangPath, |
461 | "--no-default-config" , |
462 | "-o" , |
463 | *TempFileOrErr, |
464 | Args.MakeArgString("--target=" + Triple.getTriple()), |
465 | Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) |
466 | : Args.MakeArgString("-march=" + Arch), |
467 | Args.MakeArgString("-" + OptLevel), |
468 | }; |
469 | |
470 | if (!Triple.isNVPTX()) |
471 | CmdArgs.push_back("-Wl,--no-undefined" ); |
472 | |
473 | for (StringRef InputFile : InputFiles) |
474 | CmdArgs.push_back(InputFile); |
475 | |
476 | // If this is CPU offloading we copy the input libraries. |
477 | if (!Triple.isAMDGPU() && !Triple.isNVPTX()) { |
478 | CmdArgs.push_back("-Wl,-Bsymbolic" ); |
479 | CmdArgs.push_back("-shared" ); |
480 | ArgStringList LinkerArgs; |
481 | for (const opt::Arg *Arg : |
482 | Args.filtered(OPT_INPUT, OPT_library, OPT_library_path, OPT_rpath, |
483 | OPT_whole_archive, OPT_no_whole_archive)) { |
484 | // Sometimes needed libraries are passed by name, such as when using |
485 | // sanitizers. We need to check the file magic for any libraries. |
486 | if (Arg->getOption().matches(OPT_INPUT)) { |
487 | if (!sys::fs::exists(Arg->getValue()) || |
488 | sys::fs::is_directory(Arg->getValue())) |
489 | continue; |
490 | |
491 | file_magic Magic; |
492 | if (auto EC = identify_magic(Arg->getValue(), Magic)) |
493 | return createStringError(inconvertibleErrorCode(), |
494 | "Failed to open %s" , Arg->getValue()); |
495 | if (Magic != file_magic::archive && |
496 | Magic != file_magic::elf_shared_object) |
497 | continue; |
498 | } |
499 | if (Arg->getOption().matches(OPT_whole_archive)) |
500 | LinkerArgs.push_back(Args.MakeArgString("-Wl,--whole-archive" )); |
501 | else if (Arg->getOption().matches(OPT_no_whole_archive)) |
502 | LinkerArgs.push_back(Args.MakeArgString("-Wl,--no-whole-archive" )); |
503 | else |
504 | Arg->render(Args, LinkerArgs); |
505 | } |
506 | llvm::copy(LinkerArgs, std::back_inserter(CmdArgs)); |
507 | } |
508 | |
509 | // Pass on -mllvm options to the clang invocation. |
510 | for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) { |
511 | CmdArgs.push_back("-mllvm" ); |
512 | CmdArgs.push_back(Arg->getValue()); |
513 | } |
514 | |
515 | if (Args.hasArg(OPT_debug)) |
516 | CmdArgs.push_back("-g" ); |
517 | |
518 | if (SaveTemps) |
519 | CmdArgs.push_back("-save-temps" ); |
520 | |
521 | if (Verbose) |
522 | CmdArgs.push_back("-v" ); |
523 | |
524 | if (!CudaBinaryPath.empty()) |
525 | CmdArgs.push_back(Args.MakeArgString(Str: "--cuda-path=" + CudaBinaryPath)); |
526 | |
527 | for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) |
528 | llvm::copy( |
529 | SmallVector<StringRef>({"-Xcuda-ptxas" , Args.MakeArgString(Arg)}), |
530 | std::back_inserter(CmdArgs)); |
531 | |
532 | for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) |
533 | CmdArgs.push_back(Args.MakeArgString(Arg)); |
534 | |
535 | for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) { |
536 | if (llvm::Triple(Arg.split('=').first) == Triple) |
537 | CmdArgs.append({"-Xclang" , "-mlink-builtin-bitcode" , "-Xclang" , |
538 | Args.MakeArgString(Arg.split('=').second)}); |
539 | } |
540 | |
541 | // The OpenMPOpt pass can introduce new calls and is expensive, we do not want |
542 | // this when running CodeGen through clang. |
543 | if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)) |
544 | CmdArgs.append({"-mllvm" , "-openmp-opt-disable" }); |
545 | |
546 | if (Error Err = executeCommands(*ClangPath, CmdArgs)) |
547 | return std::move(Err); |
548 | |
549 | return *TempFileOrErr; |
550 | } |
551 | } // namespace generic |
552 | |
553 | Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, |
554 | const ArgList &Args) { |
555 | const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); |
556 | switch (Triple.getArch()) { |
557 | case Triple::nvptx: |
558 | case Triple::nvptx64: |
559 | case Triple::amdgcn: |
560 | case Triple::x86: |
561 | case Triple::x86_64: |
562 | case Triple::aarch64: |
563 | case Triple::aarch64_be: |
564 | case Triple::ppc64: |
565 | case Triple::ppc64le: |
566 | case Triple::systemz: |
567 | return generic::clang(InputFiles, Args); |
568 | default: |
569 | return createStringError(EC: inconvertibleErrorCode(), |
570 | S: Triple.getArchName() + |
571 | " linking is not supported" ); |
572 | } |
573 | } |
574 | |
575 | void diagnosticHandler(const DiagnosticInfo &DI) { |
576 | std::string ErrStorage; |
577 | raw_string_ostream OS(ErrStorage); |
578 | DiagnosticPrinterRawOStream DP(OS); |
579 | DI.print(DP); |
580 | |
581 | switch (DI.getSeverity()) { |
582 | case DS_Error: |
583 | WithColor::error(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n" ; |
584 | LTOError = true; |
585 | break; |
586 | case DS_Warning: |
587 | WithColor::warning(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n" ; |
588 | break; |
589 | case DS_Note: |
590 | WithColor::note(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n" ; |
591 | break; |
592 | case DS_Remark: |
593 | WithColor::remark(OS&: errs()) << ErrStorage << "\n" ; |
594 | break; |
595 | } |
596 | } |
597 | |
598 | // Get the list of target features from the input file and unify them such that |
599 | // if there are multiple +xxx or -xxx features we only keep the last one. |
600 | std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) { |
601 | SmallVector<StringRef> Features; |
602 | for (const OffloadFile &File : InputFiles) { |
603 | for (auto Arg : llvm::split(File.getBinary()->getString("feature" ), "," )) |
604 | Features.emplace_back(Arg); |
605 | } |
606 | |
607 | // Only add a feature if it hasn't been seen before starting from the end. |
608 | std::vector<std::string> UnifiedFeatures; |
609 | DenseSet<StringRef> UsedFeatures; |
610 | for (StringRef Feature : llvm::reverse(Features)) { |
611 | if (UsedFeatures.insert(Feature.drop_front()).second) |
612 | UnifiedFeatures.push_back(Feature.str()); |
613 | } |
614 | |
615 | return UnifiedFeatures; |
616 | } |
617 | |
618 | template <typename ModuleHook = function_ref<bool(size_t, const Module &)>> |
619 | std::unique_ptr<lto::LTO> createLTO( |
620 | const ArgList &Args, const std::vector<std::string> &Features, |
621 | ModuleHook Hook = [](size_t, const Module &) { return true; }) { |
622 | const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); |
623 | // We need to remove AMD's target-id from the processor if present. |
624 | StringRef Arch = Args.getLastArgValue(OPT_arch_EQ).split(":" ).first; |
625 | lto::Config Conf; |
626 | lto::ThinBackend Backend; |
627 | // TODO: Handle index-only thin-LTO |
628 | Backend = |
629 | lto::createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency()); |
630 | |
631 | Conf.CPU = Arch.str(); |
632 | Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: Triple); |
633 | |
634 | StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2" ); |
635 | Conf.MAttrs = Features; |
636 | std::optional<CodeGenOptLevel> CGOptLevelOrNone = |
637 | CodeGenOpt::parseLevel(C: OptLevel[1]); |
638 | assert(CGOptLevelOrNone && "Invalid optimization level" ); |
639 | Conf.CGOptLevel = *CGOptLevelOrNone; |
640 | Conf.OptLevel = OptLevel[1] - '0'; |
641 | Conf.DefaultTriple = Triple.getTriple(); |
642 | |
643 | LTOError = false; |
644 | Conf.DiagHandler = diagnosticHandler; |
645 | |
646 | Conf.PTO.LoopVectorization = Conf.OptLevel > 1; |
647 | Conf.PTO.SLPVectorization = Conf.OptLevel > 1; |
648 | |
649 | if (SaveTemps) { |
650 | std::string TempName = (sys::path::filename(path: ExecutableName) + "." + |
651 | Triple.getTriple() + "." + Arch) |
652 | .str(); |
653 | Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) { |
654 | std::string File = |
655 | !Task ? TempName + ".postlink.bc" |
656 | : TempName + "." + std::to_string(val: Task) + ".postlink.bc" ; |
657 | error_code EC; |
658 | raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); |
659 | if (EC) |
660 | reportError(E: errorCodeToError(EC)); |
661 | WriteBitcodeToFile(M, Out&: LinkedBitcode); |
662 | return true; |
663 | }; |
664 | Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) { |
665 | std::string File = |
666 | !Task ? TempName + ".postopt.bc" |
667 | : TempName + "." + std::to_string(val: Task) + ".postopt.bc" ; |
668 | error_code EC; |
669 | raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); |
670 | if (EC) |
671 | reportError(E: errorCodeToError(EC)); |
672 | WriteBitcodeToFile(M, Out&: LinkedBitcode); |
673 | return true; |
674 | }; |
675 | } |
676 | Conf.PostOptModuleHook = Hook; |
677 | Conf.CGFileType = (Triple.isNVPTX() || SaveTemps) |
678 | ? CodeGenFileType::AssemblyFile |
679 | : CodeGenFileType::ObjectFile; |
680 | |
681 | // TODO: Handle remark files |
682 | Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); |
683 | |
684 | return std::make_unique<lto::LTO>(std::move(Conf), Backend); |
685 | } |
686 | |
687 | // Returns true if \p S is valid as a C language identifier and will be given |
688 | // `__start_` and `__stop_` symbols. |
689 | bool isValidCIdentifier(StringRef S) { |
690 | return !S.empty() && (isAlpha(C: S[0]) || S[0] == '_') && |
691 | llvm::all_of(llvm::drop_begin(S), |
692 | [](char C) { return C == '_' || isAlnum(C); }); |
693 | } |
694 | |
695 | Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles, |
696 | SmallVectorImpl<StringRef> &OutputFiles, |
697 | const ArgList &Args) { |
698 | llvm::TimeTraceScope TimeScope("Link bitcode files" ); |
699 | const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); |
700 | StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); |
701 | |
702 | SmallVector<OffloadFile, 4> BitcodeInputFiles; |
703 | DenseSet<StringRef> StrongResolutions; |
704 | DenseSet<StringRef> UsedInRegularObj; |
705 | DenseSet<StringRef> UsedInSharedLib; |
706 | BumpPtrAllocator Alloc; |
707 | StringSaver Saver(Alloc); |
708 | |
709 | // Search for bitcode files in the input and create an LTO input file. If it |
710 | // is not a bitcode file, scan its symbol table for symbols we need to save. |
711 | for (OffloadFile &File : InputFiles) { |
712 | MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "" ); |
713 | |
714 | file_magic Type = identify_magic(Buffer.getBuffer()); |
715 | switch (Type) { |
716 | case file_magic::bitcode: { |
717 | Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer); |
718 | if (!IRSymtabOrErr) |
719 | return IRSymtabOrErr.takeError(); |
720 | |
721 | // Check for any strong resolutions we need to preserve. |
722 | for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { |
723 | for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { |
724 | if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() && |
725 | !Sym.isUndefined()) |
726 | StrongResolutions.insert(Saver.save(Sym.Name)); |
727 | } |
728 | } |
729 | BitcodeInputFiles.emplace_back(std::move(File)); |
730 | continue; |
731 | } |
732 | case file_magic::elf_relocatable: |
733 | case file_magic::elf_shared_object: { |
734 | Expected<std::unique_ptr<ObjectFile>> ObjFile = |
735 | ObjectFile::createObjectFile(Buffer); |
736 | if (!ObjFile) |
737 | continue; |
738 | |
739 | for (SymbolRef Sym : (*ObjFile)->symbols()) { |
740 | Expected<StringRef> Name = Sym.getName(); |
741 | if (!Name) |
742 | return Name.takeError(); |
743 | |
744 | // Record if we've seen these symbols in any object or shared libraries. |
745 | if ((*ObjFile)->isRelocatableObject()) |
746 | UsedInRegularObj.insert(Saver.save(*Name)); |
747 | else |
748 | UsedInSharedLib.insert(Saver.save(*Name)); |
749 | } |
750 | continue; |
751 | } |
752 | default: |
753 | continue; |
754 | } |
755 | } |
756 | |
757 | if (BitcodeInputFiles.empty()) |
758 | return Error::success(); |
759 | |
760 | // Remove all the bitcode files that we moved from the original input. |
761 | llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); |
762 | |
763 | // LTO Module hook to output bitcode without running the backend. |
764 | SmallVector<StringRef> BitcodeOutput; |
765 | auto OutputBitcode = [&](size_t, const Module &M) { |
766 | auto TempFileOrErr = createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + |
767 | "-jit-" + Triple.getTriple(), |
768 | Extension: "bc" ); |
769 | if (!TempFileOrErr) |
770 | reportError(E: TempFileOrErr.takeError()); |
771 | |
772 | std::error_code EC; |
773 | raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None); |
774 | if (EC) |
775 | reportError(E: errorCodeToError(EC)); |
776 | WriteBitcodeToFile(M, Out&: LinkedBitcode); |
777 | BitcodeOutput.push_back(*TempFileOrErr); |
778 | return false; |
779 | }; |
780 | |
781 | // We assume visibility of the whole program if every input file was bitcode. |
782 | auto Features = getTargetFeatures(BitcodeInputFiles); |
783 | auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || |
784 | Args.hasArg(OPT_builtin_bitcode_EQ) || |
785 | Args.hasArg(OPT_clang_backend) |
786 | ? createLTO(Args, Features, OutputBitcode) |
787 | : createLTO(Args, Features); |
788 | |
789 | // We need to resolve the symbols so the LTO backend knows which symbols need |
790 | // to be kept or can be internalized. This is a simplified symbol resolution |
791 | // scheme to approximate the full resolution a linker would do. |
792 | uint64_t Idx = 0; |
793 | DenseSet<StringRef> PrevailingSymbols; |
794 | for (auto &BitcodeInput : BitcodeInputFiles) { |
795 | // Get a semi-unique buffer identifier for Thin-LTO. |
796 | StringRef Identifier = Saver.save( |
797 | std::to_string(Idx++) + "." + |
798 | BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier()); |
799 | MemoryBufferRef Buffer = |
800 | MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier); |
801 | Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr = |
802 | llvm::lto::InputFile::create(Buffer); |
803 | if (!BitcodeFileOrErr) |
804 | return BitcodeFileOrErr.takeError(); |
805 | |
806 | // Save the input file and the buffer associated with its memory. |
807 | const auto Symbols = (*BitcodeFileOrErr)->symbols(); |
808 | SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size()); |
809 | size_t Idx = 0; |
810 | for (auto &Sym : Symbols) { |
811 | lto::SymbolResolution &Res = Resolutions[Idx++]; |
812 | |
813 | // We will use this as the prevailing symbol definition in LTO unless |
814 | // it is undefined or another definition has already been used. |
815 | Res.Prevailing = |
816 | !Sym.isUndefined() && |
817 | !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) && |
818 | PrevailingSymbols.insert(Saver.save(Sym.getName())).second; |
819 | |
820 | // We need LTO to preseve the following global symbols: |
821 | // 1) Symbols used in regular objects. |
822 | // 2) Sections that will be given a __start/__stop symbol. |
823 | // 3) Prevailing symbols that are needed visible to external libraries. |
824 | Res.VisibleToRegularObj = |
825 | UsedInRegularObj.contains(Sym.getName()) || |
826 | isValidCIdentifier(Sym.getSectionName()) || |
827 | (Res.Prevailing && |
828 | (Sym.getVisibility() != GlobalValue::HiddenVisibility && |
829 | !Sym.canBeOmittedFromSymbolTable())); |
830 | |
831 | // Identify symbols that must be exported dynamically and can be |
832 | // referenced by other files. |
833 | Res.ExportDynamic = |
834 | Sym.getVisibility() != GlobalValue::HiddenVisibility && |
835 | (UsedInSharedLib.contains(Sym.getName()) || |
836 | !Sym.canBeOmittedFromSymbolTable()); |
837 | |
838 | // The final definition will reside in this linkage unit if the symbol is |
839 | // defined and local to the module. This only checks for bitcode files, |
840 | // full assertion will require complete symbol resolution. |
841 | Res.FinalDefinitionInLinkageUnit = |
842 | Sym.getVisibility() != GlobalValue::DefaultVisibility && |
843 | (!Sym.isUndefined() && !Sym.isCommon()); |
844 | |
845 | // We do not support linker redefined symbols (e.g. --wrap) for device |
846 | // image linking, so the symbols will not be changed after LTO. |
847 | Res.LinkerRedefined = false; |
848 | } |
849 | |
850 | // Add the bitcode file with its resolved symbols to the LTO job. |
851 | if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) |
852 | return Err; |
853 | } |
854 | |
855 | // Run the LTO job to compile the bitcode. |
856 | size_t MaxTasks = LTOBackend->getMaxTasks(); |
857 | SmallVector<StringRef> Files(MaxTasks); |
858 | auto AddStream = |
859 | [&](size_t Task, |
860 | const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> { |
861 | int FD = -1; |
862 | auto &TempFile = Files[Task]; |
863 | StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o" ; |
864 | std::string TaskStr = Task ? "." + std::to_string(Task) : "" ; |
865 | auto TempFileOrErr = |
866 | createOutputFile(sys::path::filename(ExecutableName) + "." + |
867 | Triple.getTriple() + "." + Arch + TaskStr, |
868 | Extension); |
869 | if (!TempFileOrErr) |
870 | reportError(TempFileOrErr.takeError()); |
871 | TempFile = *TempFileOrErr; |
872 | if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD)) |
873 | reportError(errorCodeToError(EC)); |
874 | return std::make_unique<CachedFileStream>( |
875 | std::make_unique<llvm::raw_fd_ostream>(FD, true)); |
876 | }; |
877 | |
878 | if (Error Err = LTOBackend->run(AddStream)) |
879 | return Err; |
880 | |
881 | if (LTOError) |
882 | return createStringError(EC: inconvertibleErrorCode(), |
883 | Msg: "Errors encountered inside the LTO pipeline." ); |
884 | |
885 | // If we are embedding bitcode we only need the intermediate output. |
886 | bool SingleOutput = Files.size() == 1; |
887 | if (Args.hasArg(OPT_embed_bitcode)) { |
888 | if (BitcodeOutput.size() != 1 || !SingleOutput) |
889 | return createStringError(EC: inconvertibleErrorCode(), |
890 | Msg: "Cannot embed bitcode with multiple files." ); |
891 | OutputFiles.push_back(Elt: Args.MakeArgString(Str: BitcodeOutput.front())); |
892 | return Error::success(); |
893 | } |
894 | |
895 | // Append the new inputs to the device linker input. If the user requested an |
896 | // internalizing link we need to pass the bitcode to clang. |
897 | for (StringRef File : |
898 | Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) |
899 | ? BitcodeOutput |
900 | : Files) |
901 | OutputFiles.push_back(File); |
902 | |
903 | return Error::success(); |
904 | } |
905 | |
906 | Expected<StringRef> writeOffloadFile(const OffloadFile &File) { |
907 | const OffloadBinary &Binary = *File.getBinary(); |
908 | |
909 | StringRef Prefix = |
910 | sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier()); |
911 | StringRef Suffix = getImageKindName(Name: Binary.getImageKind()); |
912 | |
913 | auto TempFileOrErr = createOutputFile( |
914 | Prefix: Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Extension: Suffix); |
915 | if (!TempFileOrErr) |
916 | return TempFileOrErr.takeError(); |
917 | |
918 | Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr = |
919 | FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size()); |
920 | if (!OutputOrErr) |
921 | return OutputOrErr.takeError(); |
922 | std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr); |
923 | llvm::copy(Binary.getImage(), Output->getBufferStart()); |
924 | if (Error E = Output->commit()) |
925 | return std::move(E); |
926 | |
927 | return *TempFileOrErr; |
928 | } |
929 | |
930 | // Compile the module to an object file using the appropriate target machine for |
931 | // the host triple. |
932 | Expected<StringRef> compileModule(Module &M, OffloadKind Kind) { |
933 | llvm::TimeTraceScope TimeScope("Compile module" ); |
934 | std::string Msg; |
935 | const Target *T = TargetRegistry::lookupTarget(Triple: M.getTargetTriple(), Error&: Msg); |
936 | if (!T) |
937 | return createStringError(EC: inconvertibleErrorCode(), S: Msg); |
938 | |
939 | auto Options = |
940 | codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: Triple(M.getTargetTriple())); |
941 | StringRef CPU = "" ; |
942 | StringRef Features = "" ; |
943 | std::unique_ptr<TargetMachine> TM( |
944 | T->createTargetMachine(M.getTargetTriple(), CPU, Features, Options, |
945 | Reloc::PIC_, M.getCodeModel())); |
946 | |
947 | if (M.getDataLayout().isDefault()) |
948 | M.setDataLayout(TM->createDataLayout()); |
949 | |
950 | int FD = -1; |
951 | auto TempFileOrErr = |
952 | createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." + |
953 | getOffloadKindName(Name: Kind) + ".image.wrapper" , |
954 | Extension: "o" ); |
955 | if (!TempFileOrErr) |
956 | return TempFileOrErr.takeError(); |
957 | if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD)) |
958 | return errorCodeToError(EC); |
959 | |
960 | auto OS = std::make_unique<llvm::raw_fd_ostream>(FD, true); |
961 | |
962 | legacy::PassManager CodeGenPasses; |
963 | TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple())); |
964 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
965 | if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, |
966 | CodeGenFileType::ObjectFile)) |
967 | return createStringError(EC: inconvertibleErrorCode(), |
968 | Msg: "Failed to execute host backend" ); |
969 | CodeGenPasses.run(M); |
970 | |
971 | return *TempFileOrErr; |
972 | } |
973 | |
974 | /// Creates the object file containing the device image and runtime |
975 | /// registration code from the device images stored in \p Images. |
976 | Expected<StringRef> |
977 | wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers, |
978 | const ArgList &Args, OffloadKind Kind) { |
979 | llvm::TimeTraceScope TimeScope("Wrap bundled images" ); |
980 | |
981 | SmallVector<ArrayRef<char>, 4> BuffersToWrap; |
982 | for (const auto &Buffer : Buffers) |
983 | BuffersToWrap.emplace_back( |
984 | ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize())); |
985 | |
986 | LLVMContext Context; |
987 | Module M("offload.wrapper.module" , Context); |
988 | M.setTargetTriple( |
989 | Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); |
990 | |
991 | switch (Kind) { |
992 | case OFK_OpenMP: |
993 | if (Error Err = offloading::wrapOpenMPBinaries( |
994 | M, BuffersToWrap, |
995 | offloading::getOffloadEntryArray(M, "omp_offloading_entries" ), |
996 | /*Suffix=*/"" , /*Relocatable=*/Args.hasArg(OPT_relocatable))) |
997 | return std::move(Err); |
998 | break; |
999 | case OFK_Cuda: |
1000 | if (Error Err = offloading::wrapCudaBinary( |
1001 | M, Images: BuffersToWrap.front(), |
1002 | EntryArray: offloading::getOffloadEntryArray(M, "cuda_offloading_entries" ), |
1003 | /*Suffix=*/"" , /*EmitSurfacesAndTextures=*/false)) |
1004 | return std::move(Err); |
1005 | break; |
1006 | case OFK_HIP: |
1007 | if (Error Err = offloading::wrapHIPBinary( |
1008 | M, Images: BuffersToWrap.front(), |
1009 | EntryArray: offloading::getOffloadEntryArray(M, "hip_offloading_entries" ))) |
1010 | return std::move(Err); |
1011 | break; |
1012 | default: |
1013 | return createStringError(EC: inconvertibleErrorCode(), |
1014 | S: getOffloadKindName(Name: Kind) + |
1015 | " wrapping is not supported" ); |
1016 | } |
1017 | |
1018 | if (Args.hasArg(OPT_print_wrapped_module)) |
1019 | errs() << M; |
1020 | if (Args.hasArg(OPT_save_temps)) { |
1021 | int FD = -1; |
1022 | auto TempFileOrErr = |
1023 | createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." + |
1024 | getOffloadKindName(Name: Kind) + ".image.wrapper" , |
1025 | Extension: "bc" ); |
1026 | if (!TempFileOrErr) |
1027 | return TempFileOrErr.takeError(); |
1028 | if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD)) |
1029 | return errorCodeToError(EC); |
1030 | llvm::raw_fd_ostream OS(FD, true); |
1031 | WriteBitcodeToFile(M, Out&: OS); |
1032 | } |
1033 | |
1034 | auto FileOrErr = compileModule(M, Kind); |
1035 | if (!FileOrErr) |
1036 | return FileOrErr.takeError(); |
1037 | return *FileOrErr; |
1038 | } |
1039 | |
1040 | Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> |
1041 | bundleOpenMP(ArrayRef<OffloadingImage> Images) { |
1042 | SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; |
1043 | for (const OffloadingImage &Image : Images) |
1044 | Buffers.emplace_back( |
1045 | MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image))); |
1046 | |
1047 | return std::move(Buffers); |
1048 | } |
1049 | |
1050 | Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> |
1051 | bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) { |
1052 | SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles; |
1053 | for (const OffloadingImage &Image : Images) |
1054 | InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(), |
1055 | Image.StringData.lookup("arch" ))); |
1056 | |
1057 | Triple TheTriple = Triple(Images.front().StringData.lookup("triple" )); |
1058 | auto FileOrErr = nvptx::fatbinary(InputFiles: InputFiles, Args); |
1059 | if (!FileOrErr) |
1060 | return FileOrErr.takeError(); |
1061 | |
1062 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError = |
1063 | llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr); |
1064 | |
1065 | SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; |
1066 | if (std::error_code EC = ImageOrError.getError()) |
1067 | return createFileError(*FileOrErr, EC); |
1068 | Buffers.emplace_back(std::move(*ImageOrError)); |
1069 | |
1070 | return std::move(Buffers); |
1071 | } |
1072 | |
1073 | Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> |
1074 | bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) { |
1075 | SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles; |
1076 | for (const OffloadingImage &Image : Images) |
1077 | InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(), |
1078 | Image.StringData.lookup("arch" ))); |
1079 | |
1080 | Triple TheTriple = Triple(Images.front().StringData.lookup("triple" )); |
1081 | auto FileOrErr = amdgcn::fatbinary(InputFiles: InputFiles, Args); |
1082 | if (!FileOrErr) |
1083 | return FileOrErr.takeError(); |
1084 | |
1085 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError = |
1086 | llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr); |
1087 | |
1088 | SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; |
1089 | if (std::error_code EC = ImageOrError.getError()) |
1090 | return createFileError(*FileOrErr, EC); |
1091 | Buffers.emplace_back(std::move(*ImageOrError)); |
1092 | |
1093 | return std::move(Buffers); |
1094 | } |
1095 | |
1096 | /// Transforms the input \p Images into the binary format the runtime expects |
1097 | /// for the given \p Kind. |
1098 | Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> |
1099 | bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args, |
1100 | OffloadKind Kind) { |
1101 | llvm::TimeTraceScope TimeScope("Bundle linked output" ); |
1102 | switch (Kind) { |
1103 | case OFK_OpenMP: |
1104 | return bundleOpenMP(Images); |
1105 | case OFK_Cuda: |
1106 | return bundleCuda(Images, Args); |
1107 | case OFK_HIP: |
1108 | return bundleHIP(Images, Args); |
1109 | default: |
1110 | return createStringError(EC: inconvertibleErrorCode(), |
1111 | S: getOffloadKindName(Name: Kind) + |
1112 | " bundling is not supported" ); |
1113 | } |
1114 | } |
1115 | |
1116 | /// Returns a new ArgList containg arguments used for the device linking phase. |
1117 | DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input, |
1118 | const InputArgList &Args) { |
1119 | DerivedArgList DAL = DerivedArgList(DerivedArgList(Args)); |
1120 | for (Arg *A : Args) |
1121 | DAL.append(A); |
1122 | |
1123 | // Set the subarchitecture and target triple for this compilation. |
1124 | const OptTable &Tbl = getOptTable(); |
1125 | DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ), |
1126 | Args.MakeArgString(Input.front().getBinary()->getArch())); |
1127 | DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ), |
1128 | Args.MakeArgString(Input.front().getBinary()->getTriple())); |
1129 | |
1130 | // If every input file is bitcode we have whole program visibility as we do |
1131 | // only support static linking with bitcode. |
1132 | auto ContainsBitcode = [](const OffloadFile &F) { |
1133 | return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode; |
1134 | }; |
1135 | if (llvm::all_of(Input, ContainsBitcode)) |
1136 | DAL.AddFlagArg(nullptr, Tbl.getOption(OPT_whole_program)); |
1137 | |
1138 | // Forward '-Xoffload-linker' options to the appropriate backend. |
1139 | for (StringRef Arg : Args.getAllArgValues(OPT_device_linker_args_EQ)) { |
1140 | auto [Triple, Value] = Arg.split('='); |
1141 | if (Value.empty()) |
1142 | DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ), |
1143 | Args.MakeArgString(Triple)); |
1144 | else if (Triple == DAL.getLastArgValue(OPT_triple_EQ)) |
1145 | DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ), |
1146 | Args.MakeArgString(Value)); |
1147 | } |
1148 | |
1149 | return DAL; |
1150 | } |
1151 | |
1152 | /// Transforms all the extracted offloading input files into an image that can |
1153 | /// be registered by the runtime. |
1154 | Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( |
1155 | SmallVectorImpl<SmallVector<OffloadFile>> &LinkerInputFiles, |
1156 | const InputArgList &Args, char **Argv, int Argc) { |
1157 | llvm::TimeTraceScope TimeScope("Handle all device input" ); |
1158 | |
1159 | std::mutex ImageMtx; |
1160 | DenseMap<OffloadKind, SmallVector<OffloadingImage>> Images; |
1161 | auto Err = parallelForEachError(LinkerInputFiles, [&](auto &Input) -> Error { |
1162 | llvm::TimeTraceScope TimeScope("Link device input" ); |
1163 | |
1164 | // Each thread needs its own copy of the base arguments to maintain |
1165 | // per-device argument storage of synthetic strings. |
1166 | const OptTable &Tbl = getOptTable(); |
1167 | BumpPtrAllocator Alloc; |
1168 | StringSaver Saver(Alloc); |
1169 | auto BaseArgs = |
1170 | Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) { |
1171 | reportError(E: createStringError(EC: inconvertibleErrorCode(), S: Err)); |
1172 | }); |
1173 | auto LinkerArgs = getLinkerArgs(Input, BaseArgs); |
1174 | |
1175 | DenseSet<OffloadKind> ActiveOffloadKinds; |
1176 | for (const auto &File : Input) |
1177 | if (File.getBinary()->getOffloadKind() != OFK_None) |
1178 | ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); |
1179 | |
1180 | // First link and remove all the input files containing bitcode. |
1181 | SmallVector<StringRef> InputFiles; |
1182 | if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) |
1183 | return Err; |
1184 | |
1185 | // Write any remaining device inputs to an output file for the linker. |
1186 | for (const OffloadFile &File : Input) { |
1187 | auto FileNameOrErr = writeOffloadFile(File); |
1188 | if (!FileNameOrErr) |
1189 | return FileNameOrErr.takeError(); |
1190 | InputFiles.emplace_back(*FileNameOrErr); |
1191 | } |
1192 | |
1193 | // Link the remaining device files using the device linker. |
1194 | auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode) |
1195 | ? linkDevice(InputFiles, LinkerArgs) |
1196 | : InputFiles.front(); |
1197 | if (!OutputOrErr) |
1198 | return OutputOrErr.takeError(); |
1199 | |
1200 | // Store the offloading image for each linked output file. |
1201 | for (OffloadKind Kind : ActiveOffloadKinds) { |
1202 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr = |
1203 | llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr); |
1204 | if (std::error_code EC = FileOrErr.getError()) { |
1205 | if (DryRun) |
1206 | FileOrErr = MemoryBuffer::getMemBuffer("" ); |
1207 | else |
1208 | return createFileError(*OutputOrErr, EC); |
1209 | } |
1210 | |
1211 | std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx); |
1212 | OffloadingImage TheImage{}; |
1213 | TheImage.TheImageKind = |
1214 | Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object; |
1215 | TheImage.TheOffloadKind = Kind; |
1216 | TheImage.StringData["triple" ] = |
1217 | Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ)); |
1218 | TheImage.StringData["arch" ] = |
1219 | Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_arch_EQ)); |
1220 | TheImage.Image = std::move(*FileOrErr); |
1221 | |
1222 | Images[Kind].emplace_back(std::move(TheImage)); |
1223 | } |
1224 | return Error::success(); |
1225 | }); |
1226 | if (Err) |
1227 | return std::move(Err); |
1228 | |
1229 | // Create a binary image of each offloading image and embed it into a new |
1230 | // object file. |
1231 | SmallVector<StringRef> WrappedOutput; |
1232 | for (auto &[Kind, Input] : Images) { |
1233 | // We sort the entries before bundling so they appear in a deterministic |
1234 | // order in the final binary. |
1235 | llvm::sort(Input, [](OffloadingImage &A, OffloadingImage &B) { |
1236 | return A.StringData["triple" ] > B.StringData["triple" ] || |
1237 | A.StringData["arch" ] > B.StringData["arch" ] || |
1238 | A.TheOffloadKind < B.TheOffloadKind; |
1239 | }); |
1240 | auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind); |
1241 | if (!BundledImagesOrErr) |
1242 | return BundledImagesOrErr.takeError(); |
1243 | auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind); |
1244 | if (!OutputOrErr) |
1245 | return OutputOrErr.takeError(); |
1246 | WrappedOutput.push_back(*OutputOrErr); |
1247 | } |
1248 | |
1249 | return WrappedOutput; |
1250 | } |
1251 | |
1252 | std::optional<std::string> findFile(StringRef Dir, StringRef Root, |
1253 | const Twine &Name) { |
1254 | SmallString<128> Path; |
1255 | if (Dir.starts_with(Prefix: "=" )) |
1256 | sys::path::append(Path, Root, Dir.substr(Start: 1), Name); |
1257 | else |
1258 | sys::path::append(Path, Dir, Name); |
1259 | |
1260 | if (sys::fs::exists(Path)) |
1261 | return static_cast<std::string>(Path); |
1262 | return std::nullopt; |
1263 | } |
1264 | |
1265 | std::optional<std::string> |
1266 | findFromSearchPaths(StringRef Name, StringRef Root, |
1267 | ArrayRef<StringRef> SearchPaths) { |
1268 | for (StringRef Dir : SearchPaths) |
1269 | if (std::optional<std::string> File = findFile(Dir, Root, Name)) |
1270 | return File; |
1271 | return std::nullopt; |
1272 | } |
1273 | |
1274 | std::optional<std::string> |
1275 | searchLibraryBaseName(StringRef Name, StringRef Root, |
1276 | ArrayRef<StringRef> SearchPaths) { |
1277 | for (StringRef Dir : SearchPaths) { |
1278 | if (std::optional<std::string> File = |
1279 | findFile(Dir, Root, "lib" + Name + ".so" )) |
1280 | return File; |
1281 | if (std::optional<std::string> File = |
1282 | findFile(Dir, Root, "lib" + Name + ".a" )) |
1283 | return File; |
1284 | } |
1285 | return std::nullopt; |
1286 | } |
1287 | |
1288 | /// Search for static libraries in the linker's library path given input like |
1289 | /// `-lfoo` or `-l:libfoo.a`. |
1290 | std::optional<std::string> searchLibrary(StringRef Input, StringRef Root, |
1291 | ArrayRef<StringRef> SearchPaths) { |
1292 | if (Input.starts_with(":" ) || Input.ends_with(".lib" )) |
1293 | return findFromSearchPaths(Input.drop_front(), Root, SearchPaths); |
1294 | return searchLibraryBaseName(Input, Root, SearchPaths); |
1295 | } |
1296 | |
1297 | /// Common redeclaration of needed symbol flags. |
1298 | enum Symbol : uint32_t { |
1299 | Sym_None = 0, |
1300 | Sym_Undefined = 1U << 1, |
1301 | Sym_Weak = 1U << 2, |
1302 | }; |
1303 | |
1304 | /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to |
1305 | /// extract any symbols from it. |
1306 | Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, |
1307 | bool IsArchive, StringSaver &Saver, |
1308 | DenseMap<StringRef, Symbol> &Syms) { |
1309 | Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer); |
1310 | if (!IRSymtabOrErr) |
1311 | return IRSymtabOrErr.takeError(); |
1312 | |
1313 | bool = !IsArchive; |
1314 | DenseMap<StringRef, Symbol> TmpSyms; |
1315 | for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { |
1316 | for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { |
1317 | if (Sym.isFormatSpecific() || !Sym.isGlobal()) |
1318 | continue; |
1319 | |
1320 | bool NewSymbol = Syms.count(Sym.getName()) == 0; |
1321 | auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()]; |
1322 | |
1323 | // We will extract if it defines a currenlty undefined non-weak symbol. |
1324 | bool ResolvesStrongReference = |
1325 | ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) && |
1326 | !Sym.isUndefined()); |
1327 | // We will extract if it defines a new global symbol visible to the host. |
1328 | // This is only necessary for code targeting an offloading language. |
1329 | bool NewGlobalSymbol = |
1330 | ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() && |
1331 | !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None && |
1332 | (Sym.getVisibility() != GlobalValue::HiddenVisibility)); |
1333 | ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; |
1334 | |
1335 | // Update this symbol in the "table" with the new information. |
1336 | if (OldSym & Sym_Undefined && !Sym.isUndefined()) |
1337 | TmpSyms[Saver.save(Sym.getName())] = |
1338 | static_cast<Symbol>(OldSym & ~Sym_Undefined); |
1339 | if (Sym.isUndefined() && NewSymbol) |
1340 | TmpSyms[Saver.save(Sym.getName())] = |
1341 | static_cast<Symbol>(OldSym | Sym_Undefined); |
1342 | if (Sym.isWeak()) |
1343 | TmpSyms[Saver.save(Sym.getName())] = |
1344 | static_cast<Symbol>(OldSym | Sym_Weak); |
1345 | } |
1346 | } |
1347 | |
1348 | // If the file gets extracted we update the table with the new symbols. |
1349 | if (ShouldExtract) |
1350 | Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); |
1351 | |
1352 | return ShouldExtract; |
1353 | } |
1354 | |
1355 | /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract |
1356 | /// any symbols from it. |
1357 | Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, |
1358 | bool IsArchive, StringSaver &Saver, |
1359 | DenseMap<StringRef, Symbol> &Syms) { |
1360 | bool = !IsArchive; |
1361 | DenseMap<StringRef, Symbol> TmpSyms; |
1362 | for (SymbolRef Sym : Obj.symbols()) { |
1363 | auto FlagsOrErr = Sym.getFlags(); |
1364 | if (!FlagsOrErr) |
1365 | return FlagsOrErr.takeError(); |
1366 | |
1367 | if (!(*FlagsOrErr & SymbolRef::SF_Global) || |
1368 | (*FlagsOrErr & SymbolRef::SF_FormatSpecific)) |
1369 | continue; |
1370 | |
1371 | auto NameOrErr = Sym.getName(); |
1372 | if (!NameOrErr) |
1373 | return NameOrErr.takeError(); |
1374 | |
1375 | bool NewSymbol = Syms.count(*NameOrErr) == 0; |
1376 | auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr]; |
1377 | |
1378 | // We will extract if it defines a currenlty undefined non-weak symbol. |
1379 | bool ResolvesStrongReference = (OldSym & Sym_Undefined) && |
1380 | !(OldSym & Sym_Weak) && |
1381 | !(*FlagsOrErr & SymbolRef::SF_Undefined); |
1382 | |
1383 | // We will extract if it defines a new global symbol visible to the host. |
1384 | // This is only necessary for code targeting an offloading language. |
1385 | bool NewGlobalSymbol = |
1386 | ((NewSymbol || (OldSym & Sym_Undefined)) && |
1387 | !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None && |
1388 | !(*FlagsOrErr & SymbolRef::SF_Hidden)); |
1389 | ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; |
1390 | |
1391 | // Update this symbol in the "table" with the new information. |
1392 | if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined)) |
1393 | TmpSyms[Saver.save(*NameOrErr)] = |
1394 | static_cast<Symbol>(OldSym & ~Sym_Undefined); |
1395 | if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol) |
1396 | TmpSyms[Saver.save(*NameOrErr)] = |
1397 | static_cast<Symbol>(OldSym | Sym_Undefined); |
1398 | if (*FlagsOrErr & SymbolRef::SF_Weak) |
1399 | TmpSyms[Saver.save(*NameOrErr)] = static_cast<Symbol>(OldSym | Sym_Weak); |
1400 | } |
1401 | |
1402 | // If the file gets extracted we update the table with the new symbols. |
1403 | if (ShouldExtract) |
1404 | Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); |
1405 | |
1406 | return ShouldExtract; |
1407 | } |
1408 | |
1409 | /// Attempt to 'resolve' symbols found in input files. We use this to |
1410 | /// determine if an archive member needs to be extracted. An archive member |
1411 | /// will be extracted if any of the following is true. |
1412 | /// 1) It defines an undefined symbol in a regular object filie. |
1413 | /// 2) It defines a global symbol without hidden visibility that has not |
1414 | /// yet been defined. |
1415 | Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive, |
1416 | StringSaver &Saver, |
1417 | DenseMap<StringRef, Symbol> &Syms) { |
1418 | MemoryBufferRef Buffer = MemoryBufferRef(Image, "" ); |
1419 | switch (identify_magic(magic: Image)) { |
1420 | case file_magic::bitcode: |
1421 | return getSymbolsFromBitcode(Buffer, Kind, IsArchive, Saver, Syms); |
1422 | case file_magic::elf_relocatable: { |
1423 | Expected<std::unique_ptr<ObjectFile>> ObjFile = |
1424 | ObjectFile::createObjectFile(Buffer); |
1425 | if (!ObjFile) |
1426 | return ObjFile.takeError(); |
1427 | return getSymbolsFromObject(**ObjFile, Kind, IsArchive, Saver, Syms); |
1428 | } |
1429 | default: |
1430 | return false; |
1431 | } |
1432 | } |
1433 | |
1434 | /// Search the input files and libraries for embedded device offloading code |
1435 | /// and add it to the list of files to be linked. Files coming from static |
1436 | /// libraries are only added to the input if they are used by an existing |
1437 | /// input file. Returns a list of input files intended for a single linking job. |
1438 | Expected<SmallVector<SmallVector<OffloadFile>>> |
1439 | getDeviceInput(const ArgList &Args) { |
1440 | llvm::TimeTraceScope TimeScope("ExtractDeviceCode" ); |
1441 | |
1442 | StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ); |
1443 | SmallVector<StringRef> LibraryPaths; |
1444 | for (const opt::Arg *Arg : Args.filtered(OPT_library_path, OPT_libpath)) |
1445 | LibraryPaths.push_back(Arg->getValue()); |
1446 | |
1447 | BumpPtrAllocator Alloc; |
1448 | StringSaver Saver(Alloc); |
1449 | |
1450 | // Try to extract device code from the linker input files. |
1451 | bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false; |
1452 | SmallVector<OffloadFile> ; |
1453 | SmallVector<OffloadFile> ; |
1454 | for (const opt::Arg *Arg : Args.filtered( |
1455 | OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { |
1456 | if (Arg->getOption().matches(OPT_whole_archive) || |
1457 | Arg->getOption().matches(OPT_no_whole_archive)) { |
1458 | WholeArchive = Arg->getOption().matches(OPT_whole_archive); |
1459 | continue; |
1460 | } |
1461 | |
1462 | std::optional<std::string> Filename = |
1463 | Arg->getOption().matches(OPT_library) |
1464 | ? searchLibrary(Arg->getValue(), Root, LibraryPaths) |
1465 | : std::string(Arg->getValue()); |
1466 | |
1467 | if (!Filename && Arg->getOption().matches(OPT_library)) |
1468 | reportError(createStringError(inconvertibleErrorCode(), |
1469 | "unable to find library -l%s" , |
1470 | Arg->getValue())); |
1471 | |
1472 | if (!Filename || !sys::fs::exists(*Filename) || |
1473 | sys::fs::is_directory(*Filename)) |
1474 | continue; |
1475 | |
1476 | ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = |
1477 | MemoryBuffer::getFileOrSTDIN(*Filename); |
1478 | if (std::error_code EC = BufferOrErr.getError()) |
1479 | return createFileError(*Filename, EC); |
1480 | |
1481 | MemoryBufferRef Buffer = **BufferOrErr; |
1482 | if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object) |
1483 | continue; |
1484 | |
1485 | SmallVector<OffloadFile> Binaries; |
1486 | if (Error Err = extractOffloadBinaries(Buffer, Binaries)) |
1487 | return std::move(Err); |
1488 | |
1489 | for (auto &OffloadFile : Binaries) { |
1490 | if (identify_magic(Buffer.getBuffer()) == file_magic::archive && |
1491 | !WholeArchive) |
1492 | ArchiveFilesToExtract.emplace_back(std::move(OffloadFile)); |
1493 | else |
1494 | ObjectFilesToExtract.emplace_back(std::move(OffloadFile)); |
1495 | } |
1496 | } |
1497 | |
1498 | // Link all standard input files and update the list of symbols. |
1499 | DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputFiles; |
1500 | DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms; |
1501 | for (OffloadFile &Binary : ObjectFilesToExtract) { |
1502 | if (!Binary.getBinary()) |
1503 | continue; |
1504 | |
1505 | SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary}; |
1506 | for (const auto &[ID, Input] : InputFiles) |
1507 | if (object::areTargetsCompatible(Binary, ID)) |
1508 | CompatibleTargets.emplace_back(ID); |
1509 | |
1510 | for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { |
1511 | Expected<bool> ExtractOrErr = getSymbols( |
1512 | Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(), |
1513 | /*IsArchive=*/false, Saver, Syms[ID]); |
1514 | if (!ExtractOrErr) |
1515 | return ExtractOrErr.takeError(); |
1516 | |
1517 | // If another target needs this binary it must be copied instead. |
1518 | if (Index == CompatibleTargets.size() - 1) |
1519 | InputFiles[ID].emplace_back(std::move(Binary)); |
1520 | else |
1521 | InputFiles[ID].emplace_back(Binary.copy()); |
1522 | } |
1523 | } |
1524 | |
1525 | // Archive members only extract if they define needed symbols. We do this |
1526 | // after every regular input file so that libraries may be included out of |
1527 | // order. This follows 'ld.lld' semantics which are more lenient. |
1528 | bool = true; |
1529 | while (Extracted) { |
1530 | Extracted = false; |
1531 | for (OffloadFile &Binary : ArchiveFilesToExtract) { |
1532 | // If the binary was previously extracted it will be set to null. |
1533 | if (!Binary.getBinary()) |
1534 | continue; |
1535 | |
1536 | SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary}; |
1537 | for (const auto &[ID, Input] : InputFiles) |
1538 | if (object::areTargetsCompatible(Binary, ID)) |
1539 | CompatibleTargets.emplace_back(ID); |
1540 | |
1541 | for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { |
1542 | // Only extract an if we have an an object matching this target. |
1543 | if (!InputFiles.count(ID)) |
1544 | continue; |
1545 | |
1546 | Expected<bool> ExtractOrErr = |
1547 | getSymbols(Binary.getBinary()->getImage(), |
1548 | Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true, |
1549 | Saver, Syms[ID]); |
1550 | if (!ExtractOrErr) |
1551 | return ExtractOrErr.takeError(); |
1552 | |
1553 | Extracted = *ExtractOrErr; |
1554 | |
1555 | // Skip including the file if it is an archive that does not resolve |
1556 | // any symbols. |
1557 | if (!Extracted) |
1558 | continue; |
1559 | |
1560 | // If another target needs this binary it must be copied instead. |
1561 | if (Index == CompatibleTargets.size() - 1) |
1562 | InputFiles[ID].emplace_back(std::move(Binary)); |
1563 | else |
1564 | InputFiles[ID].emplace_back(Binary.copy()); |
1565 | } |
1566 | |
1567 | // If we extracted any files we need to check all the symbols again. |
1568 | if (Extracted) |
1569 | break; |
1570 | } |
1571 | } |
1572 | |
1573 | for (StringRef Library : Args.getAllArgValues(OPT_bitcode_library_EQ)) { |
1574 | auto FileOrErr = getInputBitcodeLibrary(Library); |
1575 | if (!FileOrErr) |
1576 | return FileOrErr.takeError(); |
1577 | InputFiles[*FileOrErr].push_back(std::move(*FileOrErr)); |
1578 | } |
1579 | |
1580 | SmallVector<SmallVector<OffloadFile>> InputsForTarget; |
1581 | for (auto &[ID, Input] : InputFiles) |
1582 | InputsForTarget.emplace_back(std::move(Input)); |
1583 | |
1584 | return std::move(InputsForTarget); |
1585 | } |
1586 | |
1587 | } // namespace |
1588 | |
1589 | int main(int Argc, char **Argv) { |
1590 | InitLLVM X(Argc, Argv); |
1591 | InitializeAllTargetInfos(); |
1592 | InitializeAllTargets(); |
1593 | InitializeAllTargetMCs(); |
1594 | InitializeAllAsmParsers(); |
1595 | InitializeAllAsmPrinters(); |
1596 | |
1597 | LinkerExecutable = Argv[0]; |
1598 | sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]); |
1599 | |
1600 | const OptTable &Tbl = getOptTable(); |
1601 | BumpPtrAllocator Alloc; |
1602 | StringSaver Saver(Alloc); |
1603 | auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) { |
1604 | reportError(E: createStringError(EC: inconvertibleErrorCode(), S: Err)); |
1605 | }); |
1606 | |
1607 | if (Args.hasArg(OPT_help) || Args.hasArg(OPT_help_hidden)) { |
1608 | Tbl.printHelp( |
1609 | outs(), |
1610 | "clang-linker-wrapper [options] -- <options to passed to the linker>" , |
1611 | "\nA wrapper utility over the host linker. It scans the input files\n" |
1612 | "for sections that require additional processing prior to linking.\n" |
1613 | "The will then transparently pass all arguments and input to the\n" |
1614 | "specified host linker to create the final binary.\n" , |
1615 | Args.hasArg(OPT_help_hidden), Args.hasArg(OPT_help_hidden)); |
1616 | return EXIT_SUCCESS; |
1617 | } |
1618 | if (Args.hasArg(OPT_v)) { |
1619 | printVersion(OS&: outs()); |
1620 | return EXIT_SUCCESS; |
1621 | } |
1622 | |
1623 | // This forwards '-mllvm' arguments to LLVM if present. |
1624 | SmallVector<const char *> NewArgv = {Argv[0]}; |
1625 | for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) |
1626 | NewArgv.push_back(Arg->getValue()); |
1627 | for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus)) |
1628 | NewArgv.push_back(Args.MakeArgString(StringRef("-" ) + Arg->getValue())); |
1629 | cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]); |
1630 | |
1631 | Verbose = Args.hasArg(OPT_verbose); |
1632 | DryRun = Args.hasArg(OPT_dry_run); |
1633 | SaveTemps = Args.hasArg(OPT_save_temps); |
1634 | CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str(); |
1635 | |
1636 | llvm::Triple Triple( |
1637 | Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple())); |
1638 | if (Args.hasArg(OPT_o)) |
1639 | ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out" ); |
1640 | else if (Args.hasArg(OPT_out)) |
1641 | ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe" ); |
1642 | else |
1643 | ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out" ; |
1644 | |
1645 | parallel::strategy = hardware_concurrency(ThreadCount: 1); |
1646 | if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) { |
1647 | unsigned Threads = 0; |
1648 | if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0) |
1649 | reportError(createStringError( |
1650 | inconvertibleErrorCode(), "%s: expected a positive integer, got '%s'" , |
1651 | Arg->getSpelling().data(), Arg->getValue())); |
1652 | parallel::strategy = hardware_concurrency(ThreadCount: Threads); |
1653 | } |
1654 | |
1655 | if (Args.hasArg(OPT_wrapper_time_trace_eq)) { |
1656 | unsigned Granularity; |
1657 | Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500" ) |
1658 | .getAsInteger(10, Granularity); |
1659 | timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]); |
1660 | } |
1661 | |
1662 | { |
1663 | llvm::TimeTraceScope TimeScope("Execute linker wrapper" ); |
1664 | |
1665 | // Extract the device input files stored in the host fat binary. |
1666 | auto DeviceInputFiles = getDeviceInput(Args); |
1667 | if (!DeviceInputFiles) |
1668 | reportError(DeviceInputFiles.takeError()); |
1669 | |
1670 | // Link and wrap the device images extracted from the linker input. |
1671 | auto FilesOrErr = |
1672 | linkAndWrapDeviceFiles(*DeviceInputFiles, Args, Argv, Argc); |
1673 | if (!FilesOrErr) |
1674 | reportError(FilesOrErr.takeError()); |
1675 | |
1676 | // Run the host linking job with the rendered arguments. |
1677 | if (Error Err = runLinker(*FilesOrErr, Args)) |
1678 | reportError(E: std::move(Err)); |
1679 | } |
1680 | |
1681 | if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) { |
1682 | if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName)) |
1683 | reportError(E: std::move(Err)); |
1684 | timeTraceProfilerCleanup(); |
1685 | } |
1686 | |
1687 | // Remove the temporary files created. |
1688 | if (!SaveTemps) |
1689 | for (const auto &TempFile : TempFiles) |
1690 | if (std::error_code EC = sys::fs::remove(path: TempFile)) |
1691 | reportError(E: createFileError(F: TempFile, EC)); |
1692 | |
1693 | return EXIT_SUCCESS; |
1694 | } |
1695 | |