1 | //===--- HIPUtility.cpp - Common HIP Tool Chain Utilities -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HIPUtility.h" |
10 | #include "clang/Driver/CommonArgs.h" |
11 | #include "clang/Driver/Compilation.h" |
12 | #include "clang/Driver/Options.h" |
13 | #include "llvm/ADT/StringExtras.h" |
14 | #include "llvm/ADT/StringRef.h" |
15 | #include "llvm/Object/Archive.h" |
16 | #include "llvm/Object/ObjectFile.h" |
17 | #include "llvm/Support/MD5.h" |
18 | #include "llvm/Support/MemoryBuffer.h" |
19 | #include "llvm/Support/Path.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | #include "llvm/TargetParser/Triple.h" |
22 | #include <deque> |
23 | #include <set> |
24 | |
25 | using namespace clang; |
26 | using namespace clang::driver; |
27 | using namespace clang::driver::tools; |
28 | using namespace llvm::opt; |
29 | using llvm::dyn_cast; |
30 | |
31 | #if defined(_WIN32) || defined(_WIN64) |
32 | #define NULL_FILE "nul" |
33 | #else |
34 | #define NULL_FILE "/dev/null" |
35 | #endif |
36 | |
37 | namespace { |
38 | const unsigned HIPCodeObjectAlign = 4096; |
39 | } // namespace |
40 | |
41 | // Constructs a triple string for clang offload bundler. |
42 | static std::string normalizeForBundler(const llvm::Triple &T, |
43 | bool HasTargetID) { |
44 | return HasTargetID ? (T.getArchName() + "-" + T.getVendorName() + "-" + |
45 | T.getOSName() + "-" + T.getEnvironmentName()) |
46 | .str() |
47 | : T.normalize(Form: llvm::Triple::CanonicalForm::FOUR_IDENT); |
48 | } |
49 | |
50 | // Collect undefined __hip_fatbin* and __hip_gpubin_handle* symbols from all |
51 | // input object or archive files. |
52 | class HIPUndefinedFatBinSymbols { |
53 | public: |
54 | HIPUndefinedFatBinSymbols(const Compilation &C, |
55 | const llvm::opt::ArgList &Args_) |
56 | : C(C), Args(Args_), |
57 | DiagID(C.getDriver().getDiags().getCustomDiagID( |
58 | L: DiagnosticsEngine::Error, |
59 | FormatString: "Error collecting HIP undefined fatbin symbols: %0" )), |
60 | Quiet(C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)), |
61 | Verbose(C.getArgs().hasArg(options::OPT_v)) { |
62 | populateSymbols(); |
63 | processStaticLibraries(); |
64 | if (Verbose) { |
65 | for (const auto &Name : FatBinSymbols) |
66 | llvm::errs() << "Found undefined HIP fatbin symbol: " << Name << "\n" ; |
67 | for (const auto &Name : GPUBinHandleSymbols) |
68 | llvm::errs() << "Found undefined HIP gpubin handle symbol: " << Name |
69 | << "\n" ; |
70 | } |
71 | } |
72 | |
73 | const std::set<std::string> &getFatBinSymbols() const { |
74 | return FatBinSymbols; |
75 | } |
76 | |
77 | const std::set<std::string> &getGPUBinHandleSymbols() const { |
78 | return GPUBinHandleSymbols; |
79 | } |
80 | |
81 | // Collect symbols from static libraries specified by -l options. |
82 | void processStaticLibraries() { |
83 | llvm::SmallVector<llvm::StringRef, 16> LibNames; |
84 | llvm::SmallVector<llvm::StringRef, 16> LibPaths; |
85 | llvm::SmallVector<llvm::StringRef, 16> ExactLibNames; |
86 | llvm::Triple Triple(C.getDriver().getTargetTriple()); |
87 | bool IsMSVC = Triple.isWindowsMSVCEnvironment(); |
88 | llvm::StringRef Ext = IsMSVC ? ".lib" : ".a" ; |
89 | |
90 | for (const auto *Arg : Args.filtered(options::OPT_l)) { |
91 | llvm::StringRef Value = Arg->getValue(); |
92 | if (Value.starts_with(":" )) |
93 | ExactLibNames.push_back(Value.drop_front()); |
94 | else |
95 | LibNames.push_back(Value); |
96 | } |
97 | for (const auto *Arg : Args.filtered(options::OPT_L)) { |
98 | auto Path = Arg->getValue(); |
99 | LibPaths.push_back(Path); |
100 | if (Verbose) |
101 | llvm::errs() << "HIP fatbin symbol search uses library path: " << Path |
102 | << "\n" ; |
103 | } |
104 | |
105 | auto ProcessLib = [&](llvm::StringRef LibName, bool IsExact) { |
106 | llvm::SmallString<256> FullLibName( |
107 | IsExact ? Twine(LibName).str() |
108 | : IsMSVC ? (Twine(LibName) + Ext).str() |
109 | : (Twine("lib" ) + LibName + Ext).str()); |
110 | |
111 | bool Found = false; |
112 | for (const auto Path : LibPaths) { |
113 | llvm::SmallString<256> FullPath = Path; |
114 | llvm::sys::path::append(path&: FullPath, a: FullLibName); |
115 | |
116 | if (llvm::sys::fs::exists(Path: FullPath)) { |
117 | if (Verbose) |
118 | llvm::errs() << "HIP fatbin symbol search found library: " |
119 | << FullPath << "\n" ; |
120 | auto BufferOrErr = llvm::MemoryBuffer::getFile(Filename: FullPath); |
121 | if (!BufferOrErr) { |
122 | errorHandler(Err: llvm::errorCodeToError(EC: BufferOrErr.getError())); |
123 | continue; |
124 | } |
125 | processInput(Buffer: BufferOrErr.get()->getMemBufferRef()); |
126 | Found = true; |
127 | break; |
128 | } |
129 | } |
130 | if (!Found && Verbose) |
131 | llvm::errs() << "HIP fatbin symbol search could not find library: " |
132 | << FullLibName << "\n" ; |
133 | }; |
134 | |
135 | for (const auto LibName : ExactLibNames) |
136 | ProcessLib(LibName, true); |
137 | |
138 | for (const auto LibName : LibNames) |
139 | ProcessLib(LibName, false); |
140 | } |
141 | |
142 | private: |
143 | const Compilation &C; |
144 | const llvm::opt::ArgList &Args; |
145 | unsigned DiagID; |
146 | bool Quiet; |
147 | bool Verbose; |
148 | std::set<std::string> FatBinSymbols; |
149 | std::set<std::string> GPUBinHandleSymbols; |
150 | std::set<std::string, std::less<>> DefinedFatBinSymbols; |
151 | std::set<std::string, std::less<>> DefinedGPUBinHandleSymbols; |
152 | const std::string FatBinPrefix = "__hip_fatbin" ; |
153 | const std::string GPUBinHandlePrefix = "__hip_gpubin_handle" ; |
154 | |
155 | void populateSymbols() { |
156 | std::deque<const Action *> WorkList; |
157 | std::set<const Action *> Visited; |
158 | |
159 | for (const auto &Action : C.getActions()) |
160 | WorkList.push_back(x: Action); |
161 | |
162 | while (!WorkList.empty()) { |
163 | const Action *CurrentAction = WorkList.front(); |
164 | WorkList.pop_front(); |
165 | |
166 | if (!CurrentAction || !Visited.insert(x: CurrentAction).second) |
167 | continue; |
168 | |
169 | if (const auto *IA = dyn_cast<InputAction>(Val: CurrentAction)) { |
170 | std::string ID = IA->getId().str(); |
171 | if (!ID.empty()) { |
172 | ID = llvm::utohexstr(X: llvm::MD5Hash(Str: ID), /*LowerCase=*/true); |
173 | FatBinSymbols.insert(x: (FatBinPrefix + Twine('_') + ID).str()); |
174 | GPUBinHandleSymbols.insert( |
175 | x: (GPUBinHandlePrefix + Twine('_') + ID).str()); |
176 | continue; |
177 | } |
178 | if (IA->getInputArg().getNumValues() == 0) |
179 | continue; |
180 | const char *Filename = IA->getInputArg().getValue(); |
181 | if (!Filename) |
182 | continue; |
183 | auto BufferOrErr = llvm::MemoryBuffer::getFile(Filename); |
184 | // Input action could be options to linker, therefore, ignore it |
185 | // if cannot read it. If it turns out to be a file that cannot be read, |
186 | // the error will be caught by the linker. |
187 | if (!BufferOrErr) |
188 | continue; |
189 | |
190 | processInput(Buffer: BufferOrErr.get()->getMemBufferRef()); |
191 | } else |
192 | llvm::append_range(C&: WorkList, R: CurrentAction->getInputs()); |
193 | } |
194 | } |
195 | |
196 | void processInput(const llvm::MemoryBufferRef &Buffer) { |
197 | // Try processing as object file first. |
198 | auto ObjFileOrErr = llvm::object::ObjectFile::createObjectFile(Object: Buffer); |
199 | if (ObjFileOrErr) { |
200 | processSymbols(Obj: **ObjFileOrErr); |
201 | return; |
202 | } |
203 | |
204 | // Then try processing as archive files. |
205 | llvm::consumeError(Err: ObjFileOrErr.takeError()); |
206 | auto ArchiveOrErr = llvm::object::Archive::create(Source: Buffer); |
207 | if (ArchiveOrErr) { |
208 | llvm::Error Err = llvm::Error::success(); |
209 | llvm::object::Archive &Archive = *ArchiveOrErr.get(); |
210 | for (auto &Child : Archive.children(Err)) { |
211 | auto ChildBufOrErr = Child.getMemoryBufferRef(); |
212 | if (ChildBufOrErr) |
213 | processInput(Buffer: *ChildBufOrErr); |
214 | else |
215 | errorHandler(Err: ChildBufOrErr.takeError()); |
216 | } |
217 | |
218 | if (Err) |
219 | errorHandler(Err: std::move(Err)); |
220 | return; |
221 | } |
222 | |
223 | // Ignore other files. |
224 | llvm::consumeError(Err: ArchiveOrErr.takeError()); |
225 | } |
226 | |
227 | void processSymbols(const llvm::object::ObjectFile &Obj) { |
228 | for (const auto &Symbol : Obj.symbols()) { |
229 | auto FlagOrErr = Symbol.getFlags(); |
230 | if (!FlagOrErr) { |
231 | errorHandler(Err: FlagOrErr.takeError()); |
232 | continue; |
233 | } |
234 | |
235 | auto NameOrErr = Symbol.getName(); |
236 | if (!NameOrErr) { |
237 | errorHandler(Err: NameOrErr.takeError()); |
238 | continue; |
239 | } |
240 | llvm::StringRef Name = *NameOrErr; |
241 | |
242 | bool isUndefined = |
243 | FlagOrErr.get() & llvm::object::SymbolRef::SF_Undefined; |
244 | bool isFatBinSymbol = Name.starts_with(Prefix: FatBinPrefix); |
245 | bool isGPUBinHandleSymbol = Name.starts_with(Prefix: GPUBinHandlePrefix); |
246 | |
247 | // Handling for defined symbols |
248 | if (!isUndefined) { |
249 | if (isFatBinSymbol) { |
250 | DefinedFatBinSymbols.insert(x: Name.str()); |
251 | FatBinSymbols.erase(x: Name.str()); |
252 | } else if (isGPUBinHandleSymbol) { |
253 | DefinedGPUBinHandleSymbols.insert(x: Name.str()); |
254 | GPUBinHandleSymbols.erase(x: Name.str()); |
255 | } |
256 | continue; |
257 | } |
258 | |
259 | // Add undefined symbols if they are not in the defined sets |
260 | if (isFatBinSymbol && |
261 | DefinedFatBinSymbols.find(x: Name) == DefinedFatBinSymbols.end()) |
262 | FatBinSymbols.insert(x: Name.str()); |
263 | else if (isGPUBinHandleSymbol && DefinedGPUBinHandleSymbols.find(x: Name) == |
264 | DefinedGPUBinHandleSymbols.end()) |
265 | GPUBinHandleSymbols.insert(x: Name.str()); |
266 | } |
267 | } |
268 | |
269 | void errorHandler(llvm::Error Err) { |
270 | if (Quiet) |
271 | return; |
272 | C.getDriver().Diag(DiagID) << llvm::toString(E: std::move(Err)); |
273 | } |
274 | }; |
275 | |
276 | // Construct a clang-offload-bundler command to bundle code objects for |
277 | // different devices into a HIP fat binary. |
278 | void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, |
279 | llvm::StringRef OutputFileName, |
280 | const InputInfoList &Inputs, |
281 | const llvm::opt::ArgList &Args, |
282 | const Tool &T) { |
283 | // Construct clang-offload-bundler command to bundle object files for |
284 | // for different GPU archs. |
285 | ArgStringList BundlerArgs; |
286 | BundlerArgs.push_back(Elt: Args.MakeArgString(Str: "-type=o" )); |
287 | BundlerArgs.push_back( |
288 | Elt: Args.MakeArgString(Str: "-bundle-align=" + Twine(HIPCodeObjectAlign))); |
289 | |
290 | // ToDo: Remove the dummy host binary entry which is required by |
291 | // clang-offload-bundler. |
292 | std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux-gnu" ; |
293 | // AMDGCN: |
294 | // For code object version 2 and 3, the offload kind in bundle ID is 'hip' |
295 | // for backward compatibility. For code object version 4 and greater, the |
296 | // offload kind in bundle ID is 'hipv4'. |
297 | std::string OffloadKind = "hip" ; |
298 | auto &TT = T.getToolChain().getTriple(); |
299 | if (TT.isAMDGCN() && getAMDGPUCodeObjectVersion(D: C.getDriver(), Args) >= 4) |
300 | OffloadKind = OffloadKind + "v4" ; |
301 | for (const auto &II : Inputs) { |
302 | const auto *A = II.getAction(); |
303 | auto ArchStr = llvm::StringRef(A->getOffloadingArch()); |
304 | BundlerTargetArg += ',' + OffloadKind + '-'; |
305 | if (ArchStr == "amdgcnspirv" ) |
306 | BundlerTargetArg += |
307 | normalizeForBundler(T: llvm::Triple("spirv64-amd-amdhsa" ), HasTargetID: true); |
308 | else |
309 | BundlerTargetArg += normalizeForBundler(T: TT, HasTargetID: !ArchStr.empty()); |
310 | if (!ArchStr.empty()) |
311 | BundlerTargetArg += '-' + ArchStr.str(); |
312 | } |
313 | BundlerArgs.push_back(Elt: Args.MakeArgString(Str: BundlerTargetArg)); |
314 | |
315 | // Use a NULL file as input for the dummy host binary entry |
316 | std::string BundlerInputArg = "-input=" NULL_FILE; |
317 | BundlerArgs.push_back(Elt: Args.MakeArgString(Str: BundlerInputArg)); |
318 | for (const auto &II : Inputs) { |
319 | BundlerInputArg = std::string("-input=" ) + II.getFilename(); |
320 | BundlerArgs.push_back(Elt: Args.MakeArgString(Str: BundlerInputArg)); |
321 | } |
322 | |
323 | std::string Output = std::string(OutputFileName); |
324 | auto *BundlerOutputArg = |
325 | Args.MakeArgString(Str: std::string("-output=" ).append(str: Output)); |
326 | BundlerArgs.push_back(Elt: BundlerOutputArg); |
327 | |
328 | addOffloadCompressArgs(TCArgs: Args, CmdArgs&: BundlerArgs); |
329 | |
330 | const char *Bundler = Args.MakeArgString( |
331 | Str: T.getToolChain().GetProgramPath(Name: "clang-offload-bundler" )); |
332 | C.addCommand(C: std::make_unique<Command>( |
333 | args: JA, args: T, args: ResponseFileSupport::None(), args&: Bundler, args&: BundlerArgs, args: Inputs, |
334 | args: InputInfo(&JA, Args.MakeArgString(Str: Output)))); |
335 | } |
336 | |
337 | /// Add Generated HIP Object File which has device images embedded into the |
338 | /// host to the argument list for linking. Using MC directives, embed the |
339 | /// device code and also define symbols required by the code generation so that |
340 | /// the image can be retrieved at runtime. |
341 | void HIP::constructGenerateObjFileFromHIPFatBinary( |
342 | Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, |
343 | const ArgList &Args, const JobAction &JA, const Tool &T) { |
344 | const Driver &D = C.getDriver(); |
345 | std::string Name = std::string(llvm::sys::path::stem(path: Output.getFilename())); |
346 | |
347 | // Create Temp Object File Generator, |
348 | // Offload Bundled file and Bundled Object file. |
349 | // Keep them if save-temps is enabled. |
350 | const char *ObjinFile; |
351 | const char *BundleFile; |
352 | if (D.isSaveTempsEnabled()) { |
353 | ObjinFile = C.getArgs().MakeArgString(Str: Name + ".mcin" ); |
354 | BundleFile = C.getArgs().MakeArgString(Str: Name + ".hipfb" ); |
355 | } else { |
356 | auto TmpNameMcin = D.GetTemporaryPath(Prefix: Name, Suffix: "mcin" ); |
357 | ObjinFile = C.addTempFile(Name: C.getArgs().MakeArgString(Str: TmpNameMcin)); |
358 | auto TmpNameFb = D.GetTemporaryPath(Prefix: Name, Suffix: "hipfb" ); |
359 | BundleFile = C.addTempFile(Name: C.getArgs().MakeArgString(Str: TmpNameFb)); |
360 | } |
361 | HIP::constructHIPFatbinCommand(C, JA, OutputFileName: BundleFile, Inputs, Args, T); |
362 | |
363 | // Create a buffer to write the contents of the temp obj generator. |
364 | std::string ObjBuffer; |
365 | llvm::raw_string_ostream ObjStream(ObjBuffer); |
366 | |
367 | auto HostTriple = |
368 | C.getSingleOffloadToolChain<Action::OFK_Host>()->getTriple(); |
369 | |
370 | HIPUndefinedFatBinSymbols Symbols(C, Args); |
371 | |
372 | std::string PrimaryHipFatbinSymbol; |
373 | std::string PrimaryGpuBinHandleSymbol; |
374 | bool FoundPrimaryHipFatbinSymbol = false; |
375 | bool FoundPrimaryGpuBinHandleSymbol = false; |
376 | |
377 | std::vector<std::string> AliasHipFatbinSymbols; |
378 | std::vector<std::string> AliasGpuBinHandleSymbols; |
379 | |
380 | // Iterate through symbols to find the primary ones and collect others for |
381 | // aliasing |
382 | for (const auto &Symbol : Symbols.getFatBinSymbols()) { |
383 | if (!FoundPrimaryHipFatbinSymbol) { |
384 | PrimaryHipFatbinSymbol = Symbol; |
385 | FoundPrimaryHipFatbinSymbol = true; |
386 | } else |
387 | AliasHipFatbinSymbols.push_back(x: Symbol); |
388 | } |
389 | |
390 | for (const auto &Symbol : Symbols.getGPUBinHandleSymbols()) { |
391 | if (!FoundPrimaryGpuBinHandleSymbol) { |
392 | PrimaryGpuBinHandleSymbol = Symbol; |
393 | FoundPrimaryGpuBinHandleSymbol = true; |
394 | } else |
395 | AliasGpuBinHandleSymbols.push_back(x: Symbol); |
396 | } |
397 | |
398 | // Add MC directives to embed target binaries. We ensure that each |
399 | // section and image is 16-byte aligned. This is not mandatory, but |
400 | // increases the likelihood of data to be aligned with a cache block |
401 | // in several main host machines. |
402 | ObjStream << "# HIP Object Generator\n" ; |
403 | ObjStream << "# *** Automatically generated by Clang ***\n" ; |
404 | if (FoundPrimaryGpuBinHandleSymbol) { |
405 | // Define the first gpubin handle symbol |
406 | if (HostTriple.isWindowsMSVCEnvironment()) |
407 | ObjStream << " .section .hip_gpubin_handle,\"dw\"\n" ; |
408 | else { |
409 | ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n" ; |
410 | ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n" ; |
411 | ObjStream << " .section .hip_gpubin_handle,\"aw\"\n" ; |
412 | } |
413 | ObjStream << " .globl " << PrimaryGpuBinHandleSymbol << "\n" ; |
414 | ObjStream << " .p2align 3\n" ; // Align 8 |
415 | ObjStream << PrimaryGpuBinHandleSymbol << ":\n" ; |
416 | ObjStream << " .zero 8\n" ; // Size 8 |
417 | |
418 | // Generate alias directives for other gpubin handle symbols |
419 | for (const auto &AliasSymbol : AliasGpuBinHandleSymbols) { |
420 | ObjStream << " .globl " << AliasSymbol << "\n" ; |
421 | ObjStream << " .set " << AliasSymbol << "," << PrimaryGpuBinHandleSymbol |
422 | << "\n" ; |
423 | } |
424 | } |
425 | if (FoundPrimaryHipFatbinSymbol) { |
426 | // Define the first fatbin symbol |
427 | if (HostTriple.isWindowsMSVCEnvironment()) |
428 | ObjStream << " .section .hip_fatbin,\"dw\"\n" ; |
429 | else { |
430 | ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n" ; |
431 | ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n" ; |
432 | ObjStream << " .section .hip_fatbin,\"a\",@progbits\n" ; |
433 | } |
434 | ObjStream << " .globl " << PrimaryHipFatbinSymbol << "\n" ; |
435 | ObjStream << " .p2align " << llvm::Log2(A: llvm::Align(HIPCodeObjectAlign)) |
436 | << "\n" ; |
437 | // Generate alias directives for other fatbin symbols |
438 | for (const auto &AliasSymbol : AliasHipFatbinSymbols) { |
439 | ObjStream << " .globl " << AliasSymbol << "\n" ; |
440 | ObjStream << " .set " << AliasSymbol << "," << PrimaryHipFatbinSymbol |
441 | << "\n" ; |
442 | } |
443 | ObjStream << PrimaryHipFatbinSymbol << ":\n" ; |
444 | ObjStream << " .incbin " ; |
445 | llvm::sys::printArg(OS&: ObjStream, Arg: BundleFile, /*Quote=*/true); |
446 | ObjStream << "\n" ; |
447 | } |
448 | if (HostTriple.isOSLinux() && HostTriple.isOSBinFormatELF()) |
449 | ObjStream << " .section .note.GNU-stack, \"\", @progbits\n" ; |
450 | |
451 | // Dump the contents of the temp object file gen if the user requested that. |
452 | // We support this option to enable testing of behavior with -###. |
453 | if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) |
454 | llvm::errs() << ObjBuffer; |
455 | |
456 | // Open script file and write the contents. |
457 | std::error_code EC; |
458 | llvm::raw_fd_ostream Objf(ObjinFile, EC, llvm::sys::fs::OF_None); |
459 | |
460 | if (EC) { |
461 | D.Diag(clang::diag::DiagID: err_unable_to_make_temp) << EC.message(); |
462 | return; |
463 | } |
464 | |
465 | Objf << ObjBuffer; |
466 | |
467 | ArgStringList ClangArgs{"-target" , Args.MakeArgString(Str: HostTriple.normalize()), |
468 | "-o" , Output.getFilename(), |
469 | "-x" , "assembler" , |
470 | ObjinFile, "-c" }; |
471 | C.addCommand(C: std::make_unique<Command>(args: JA, args: T, args: ResponseFileSupport::None(), |
472 | args: D.getClangProgramPath(), args&: ClangArgs, |
473 | args: Inputs, args: Output, args: D.getPrependArg())); |
474 | } |
475 | |