1 | //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the "backend" phase of LTO, i.e. it performs |
10 | // optimization and code generation on a loaded module. It is generally used |
11 | // internally by the LTO class but can also be used independently, for example |
12 | // to implement a standalone ThinLTO backend. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/LTO/LTOBackend.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/Analysis/CGSCCPassManager.h" |
19 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
20 | #include "llvm/Analysis/TargetLibraryInfo.h" |
21 | #include "llvm/Bitcode/BitcodeReader.h" |
22 | #include "llvm/Bitcode/BitcodeWriter.h" |
23 | #include "llvm/IR/LLVMRemarkStreamer.h" |
24 | #include "llvm/IR/LegacyPassManager.h" |
25 | #include "llvm/IR/PassManager.h" |
26 | #include "llvm/IR/Verifier.h" |
27 | #include "llvm/LTO/LTO.h" |
28 | #include "llvm/MC/TargetRegistry.h" |
29 | #include "llvm/Object/ModuleSymbolTable.h" |
30 | #include "llvm/Passes/PassBuilder.h" |
31 | #include "llvm/Passes/PassPlugin.h" |
32 | #include "llvm/Passes/StandardInstrumentations.h" |
33 | #include "llvm/Support/Error.h" |
34 | #include "llvm/Support/FileSystem.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/Path.h" |
37 | #include "llvm/Support/Program.h" |
38 | #include "llvm/Support/ThreadPool.h" |
39 | #include "llvm/Support/ToolOutputFile.h" |
40 | #include "llvm/Support/VirtualFileSystem.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Target/TargetMachine.h" |
43 | #include "llvm/TargetParser/SubtargetFeature.h" |
44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
45 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
46 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
47 | #include "llvm/Transforms/Utils/SplitModule.h" |
48 | #include <optional> |
49 | |
50 | using namespace llvm; |
51 | using namespace lto; |
52 | |
53 | #define DEBUG_TYPE "lto-backend" |
54 | |
55 | enum class LTOBitcodeEmbedding { |
56 | DoNotEmbed = 0, |
57 | EmbedOptimized = 1, |
58 | EmbedPostMergePreOptimized = 2 |
59 | }; |
60 | |
61 | static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( |
62 | "lto-embed-bitcode" , cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed), |
63 | cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none" , |
64 | "Do not embed" ), |
65 | clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized" , |
66 | "Embed after all optimization passes" ), |
67 | clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, |
68 | "post-merge-pre-opt" , |
69 | "Embed post merge, but before optimizations" )), |
70 | cl::desc("Embed LLVM bitcode in object files produced by LTO" )); |
71 | |
72 | static cl::opt<bool> ThinLTOAssumeMerged( |
73 | "thinlto-assume-merged" , cl::init(Val: false), |
74 | cl::desc("Assume the input has already undergone ThinLTO function " |
75 | "importing and the other pre-optimization pipeline changes." )); |
76 | |
77 | namespace llvm { |
78 | extern cl::opt<bool> NoPGOWarnMismatch; |
79 | } |
80 | |
81 | [[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) { |
82 | errs() << "failed to open " << Path << ": " << Msg << '\n'; |
83 | errs().flush(); |
84 | exit(status: 1); |
85 | } |
86 | |
87 | Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, |
88 | const DenseSet<StringRef> &SaveTempsArgs) { |
89 | ShouldDiscardValueNames = false; |
90 | |
91 | std::error_code EC; |
92 | if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution" )) { |
93 | ResolutionFile = |
94 | std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt" , args&: EC, |
95 | args: sys::fs::OpenFlags::OF_TextWithCRLF); |
96 | if (EC) { |
97 | ResolutionFile.reset(); |
98 | return errorCodeToError(EC); |
99 | } |
100 | } |
101 | |
102 | auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { |
103 | // Keep track of the hook provided by the linker, which also needs to run. |
104 | ModuleHookFn LinkerHook = Hook; |
105 | Hook = [=](unsigned Task, const Module &M) { |
106 | // If the linker's hook returned false, we need to pass that result |
107 | // through. |
108 | if (LinkerHook && !LinkerHook(Task, M)) |
109 | return false; |
110 | |
111 | std::string PathPrefix; |
112 | // If this is the combined module (not a ThinLTO backend compile) or the |
113 | // user hasn't requested using the input module's path, emit to a file |
114 | // named from the provided OutputFileName with the Task ID appended. |
115 | if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { |
116 | PathPrefix = OutputFileName; |
117 | if (Task != (unsigned)-1) |
118 | PathPrefix += utostr(X: Task) + "." ; |
119 | } else |
120 | PathPrefix = M.getModuleIdentifier() + "." ; |
121 | std::string Path = PathPrefix + PathSuffix + ".bc" ; |
122 | std::error_code EC; |
123 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
124 | // Because -save-temps is a debugging feature, we report the error |
125 | // directly and exit. |
126 | if (EC) |
127 | reportOpenError(Path, Msg: EC.message()); |
128 | WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false); |
129 | return true; |
130 | }; |
131 | }; |
132 | |
133 | auto SaveCombinedIndex = |
134 | [=](const ModuleSummaryIndex &Index, |
135 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
136 | std::string Path = OutputFileName + "index.bc" ; |
137 | std::error_code EC; |
138 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
139 | // Because -save-temps is a debugging feature, we report the error |
140 | // directly and exit. |
141 | if (EC) |
142 | reportOpenError(Path, Msg: EC.message()); |
143 | writeIndexToFile(Index, Out&: OS); |
144 | |
145 | Path = OutputFileName + "index.dot" ; |
146 | raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); |
147 | if (EC) |
148 | reportOpenError(Path, Msg: EC.message()); |
149 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols); |
150 | return true; |
151 | }; |
152 | |
153 | if (SaveTempsArgs.empty()) { |
154 | setHook("0.preopt" , PreOptModuleHook); |
155 | setHook("1.promote" , PostPromoteModuleHook); |
156 | setHook("2.internalize" , PostInternalizeModuleHook); |
157 | setHook("3.import" , PostImportModuleHook); |
158 | setHook("4.opt" , PostOptModuleHook); |
159 | setHook("5.precodegen" , PreCodeGenModuleHook); |
160 | CombinedIndexHook = SaveCombinedIndex; |
161 | } else { |
162 | if (SaveTempsArgs.contains(V: "preopt" )) |
163 | setHook("0.preopt" , PreOptModuleHook); |
164 | if (SaveTempsArgs.contains(V: "promote" )) |
165 | setHook("1.promote" , PostPromoteModuleHook); |
166 | if (SaveTempsArgs.contains(V: "internalize" )) |
167 | setHook("2.internalize" , PostInternalizeModuleHook); |
168 | if (SaveTempsArgs.contains(V: "import" )) |
169 | setHook("3.import" , PostImportModuleHook); |
170 | if (SaveTempsArgs.contains(V: "opt" )) |
171 | setHook("4.opt" , PostOptModuleHook); |
172 | if (SaveTempsArgs.contains(V: "precodegen" )) |
173 | setHook("5.precodegen" , PreCodeGenModuleHook); |
174 | if (SaveTempsArgs.contains(V: "combinedindex" )) |
175 | CombinedIndexHook = SaveCombinedIndex; |
176 | } |
177 | |
178 | return Error::success(); |
179 | } |
180 | |
181 | #define HANDLE_EXTENSION(Ext) \ |
182 | llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); |
183 | #include "llvm/Support/Extension.def" |
184 | |
185 | static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, |
186 | PassBuilder &PB) { |
187 | #define HANDLE_EXTENSION(Ext) \ |
188 | get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); |
189 | #include "llvm/Support/Extension.def" |
190 | |
191 | // Load requested pass plugins and let them register pass builder callbacks |
192 | for (auto &PluginFN : PassPlugins) { |
193 | auto PassPlugin = PassPlugin::Load(Filename: PluginFN); |
194 | if (!PassPlugin) { |
195 | errs() << "Failed to load passes from '" << PluginFN |
196 | << "'. Request ignored.\n" ; |
197 | continue; |
198 | } |
199 | |
200 | PassPlugin->registerPassBuilderCallbacks(PB); |
201 | } |
202 | } |
203 | |
204 | static std::unique_ptr<TargetMachine> |
205 | createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { |
206 | StringRef TheTriple = M.getTargetTriple(); |
207 | SubtargetFeatures Features; |
208 | Features.getDefaultSubtargetFeatures(Triple: Triple(TheTriple)); |
209 | for (const std::string &A : Conf.MAttrs) |
210 | Features.AddFeature(String: A); |
211 | |
212 | std::optional<Reloc::Model> RelocModel; |
213 | if (Conf.RelocModel) |
214 | RelocModel = *Conf.RelocModel; |
215 | else if (M.getModuleFlag(Key: "PIC Level" )) |
216 | RelocModel = |
217 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
218 | |
219 | std::optional<CodeModel::Model> CodeModel; |
220 | if (Conf.CodeModel) |
221 | CodeModel = *Conf.CodeModel; |
222 | else |
223 | CodeModel = M.getCodeModel(); |
224 | |
225 | std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( |
226 | TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: Conf.Options, RM: RelocModel, |
227 | CM: CodeModel, OL: Conf.CGOptLevel)); |
228 | |
229 | assert(TM && "Failed to create target machine" ); |
230 | |
231 | if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold()) |
232 | TM->setLargeDataThreshold(*LargeDataThreshold); |
233 | |
234 | return TM; |
235 | } |
236 | |
237 | static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, |
238 | unsigned OptLevel, bool IsThinLTO, |
239 | ModuleSummaryIndex *ExportSummary, |
240 | const ModuleSummaryIndex *ImportSummary) { |
241 | auto FS = vfs::getRealFileSystem(); |
242 | std::optional<PGOOptions> PGOOpt; |
243 | if (!Conf.SampleProfile.empty()) |
244 | PGOOpt = PGOOptions(Conf.SampleProfile, "" , Conf.ProfileRemapping, |
245 | /*MemoryProfile=*/"" , FS, PGOOptions::SampleUse, |
246 | PGOOptions::NoCSAction, |
247 | PGOOptions::ColdFuncOpt::Default, true); |
248 | else if (Conf.RunCSIRInstr) { |
249 | PGOOpt = PGOOptions("" , Conf.CSIRProfile, Conf.ProfileRemapping, |
250 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
251 | PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default, |
252 | Conf.AddFSDiscriminator); |
253 | } else if (!Conf.CSIRProfile.empty()) { |
254 | PGOOpt = PGOOptions(Conf.CSIRProfile, "" , Conf.ProfileRemapping, |
255 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
256 | PGOOptions::CSIRUse, PGOOptions::ColdFuncOpt::Default, |
257 | Conf.AddFSDiscriminator); |
258 | NoPGOWarnMismatch = !Conf.PGOWarnMismatch; |
259 | } else if (Conf.AddFSDiscriminator) { |
260 | PGOOpt = PGOOptions("" , "" , "" , /*MemoryProfile=*/"" , nullptr, |
261 | PGOOptions::NoAction, PGOOptions::NoCSAction, |
262 | PGOOptions::ColdFuncOpt::Default, true); |
263 | } |
264 | TM->setPGOOption(PGOOpt); |
265 | |
266 | LoopAnalysisManager LAM; |
267 | FunctionAnalysisManager FAM; |
268 | CGSCCAnalysisManager CGAM; |
269 | ModuleAnalysisManager MAM; |
270 | |
271 | PassInstrumentationCallbacks PIC; |
272 | StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager, |
273 | Conf.VerifyEach); |
274 | SI.registerCallbacks(PIC, MAM: &MAM); |
275 | PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); |
276 | |
277 | RegisterPassPlugins(PassPlugins: Conf.PassPlugins, PB); |
278 | |
279 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
280 | new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); |
281 | if (Conf.Freestanding) |
282 | TLII->disableAllFunctions(); |
283 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
284 | |
285 | // Parse a custom AA pipeline if asked to. |
286 | if (!Conf.AAPipeline.empty()) { |
287 | AAManager AA; |
288 | if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) { |
289 | report_fatal_error(reason: Twine("unable to parse AA pipeline description '" ) + |
290 | Conf.AAPipeline + "': " + toString(E: std::move(Err))); |
291 | } |
292 | // Register the AA manager first so that our version is the one used. |
293 | FAM.registerPass(PassBuilder: [&] { return std::move(AA); }); |
294 | } |
295 | |
296 | // Register all the basic analyses with the managers. |
297 | PB.registerModuleAnalyses(MAM); |
298 | PB.registerCGSCCAnalyses(CGAM); |
299 | PB.registerFunctionAnalyses(FAM); |
300 | PB.registerLoopAnalyses(LAM); |
301 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
302 | |
303 | ModulePassManager MPM; |
304 | |
305 | if (!Conf.DisableVerify) |
306 | MPM.addPass(Pass: VerifierPass()); |
307 | |
308 | OptimizationLevel OL; |
309 | |
310 | switch (OptLevel) { |
311 | default: |
312 | llvm_unreachable("Invalid optimization level" ); |
313 | case 0: |
314 | OL = OptimizationLevel::O0; |
315 | break; |
316 | case 1: |
317 | OL = OptimizationLevel::O1; |
318 | break; |
319 | case 2: |
320 | OL = OptimizationLevel::O2; |
321 | break; |
322 | case 3: |
323 | OL = OptimizationLevel::O3; |
324 | break; |
325 | } |
326 | |
327 | // Parse a custom pipeline if asked to. |
328 | if (!Conf.OptPipeline.empty()) { |
329 | if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) { |
330 | report_fatal_error(reason: Twine("unable to parse pass pipeline description '" ) + |
331 | Conf.OptPipeline + "': " + toString(E: std::move(Err))); |
332 | } |
333 | } else if (IsThinLTO) { |
334 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary)); |
335 | } else { |
336 | MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary)); |
337 | } |
338 | |
339 | if (!Conf.DisableVerify) |
340 | MPM.addPass(Pass: VerifierPass()); |
341 | |
342 | MPM.run(IR&: Mod, AM&: MAM); |
343 | } |
344 | |
345 | bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, |
346 | bool IsThinLTO, ModuleSummaryIndex *ExportSummary, |
347 | const ModuleSummaryIndex *ImportSummary, |
348 | const std::vector<uint8_t> &CmdArgs) { |
349 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { |
350 | // FIXME: the motivation for capturing post-merge bitcode and command line |
351 | // is replicating the compilation environment from bitcode, without needing |
352 | // to understand the dependencies (the functions to be imported). This |
353 | // assumes a clang - based invocation, case in which we have the command |
354 | // line. |
355 | // It's not very clear how the above motivation would map in the |
356 | // linker-based case, so we currently don't plumb the command line args in |
357 | // that case. |
358 | if (CmdArgs.empty()) |
359 | LLVM_DEBUG( |
360 | dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " |
361 | "command line arguments are not available" ); |
362 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
363 | /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, |
364 | /*Cmdline*/ CmdArgs); |
365 | } |
366 | // FIXME: Plumb the combined index into the new pass manager. |
367 | runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary, |
368 | ImportSummary); |
369 | return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); |
370 | } |
371 | |
372 | static void codegen(const Config &Conf, TargetMachine *TM, |
373 | AddStreamFn AddStream, unsigned Task, Module &Mod, |
374 | const ModuleSummaryIndex &CombinedIndex) { |
375 | if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) |
376 | return; |
377 | |
378 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) |
379 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
380 | /*EmbedBitcode*/ true, |
381 | /*EmbedCmdline*/ false, |
382 | /*CmdArgs*/ std::vector<uint8_t>()); |
383 | |
384 | std::unique_ptr<ToolOutputFile> DwoOut; |
385 | SmallString<1024> DwoFile(Conf.SplitDwarfOutput); |
386 | if (!Conf.DwoDir.empty()) { |
387 | std::error_code EC; |
388 | if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir)) |
389 | report_fatal_error(reason: Twine("Failed to create directory " ) + Conf.DwoDir + |
390 | ": " + EC.message()); |
391 | |
392 | DwoFile = Conf.DwoDir; |
393 | sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo" ); |
394 | TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); |
395 | } else |
396 | TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; |
397 | |
398 | if (!DwoFile.empty()) { |
399 | std::error_code EC; |
400 | DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None); |
401 | if (EC) |
402 | report_fatal_error(reason: Twine("Failed to open " ) + DwoFile + ": " + |
403 | EC.message()); |
404 | } |
405 | |
406 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
407 | AddStream(Task, Mod.getModuleIdentifier()); |
408 | if (Error Err = StreamOrErr.takeError()) |
409 | report_fatal_error(Err: std::move(Err)); |
410 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
411 | TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; |
412 | |
413 | legacy::PassManager CodeGenPasses; |
414 | TargetLibraryInfoImpl TLII(Triple(Mod.getTargetTriple())); |
415 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
416 | CodeGenPasses.add( |
417 | P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex)); |
418 | if (Conf.PreCodeGenPassesHook) |
419 | Conf.PreCodeGenPassesHook(CodeGenPasses); |
420 | if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, |
421 | DwoOut ? &DwoOut->os() : nullptr, |
422 | Conf.CGFileType)) |
423 | report_fatal_error(reason: "Failed to setup codegen" ); |
424 | CodeGenPasses.run(M&: Mod); |
425 | |
426 | if (DwoOut) |
427 | DwoOut->keep(); |
428 | } |
429 | |
430 | static void splitCodeGen(const Config &C, TargetMachine *TM, |
431 | AddStreamFn AddStream, |
432 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
433 | const ModuleSummaryIndex &CombinedIndex) { |
434 | DefaultThreadPool CodegenThreadPool( |
435 | heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel)); |
436 | unsigned ThreadCount = 0; |
437 | const Target *T = &TM->getTarget(); |
438 | |
439 | const auto HandleModulePartition = |
440 | [&](std::unique_ptr<Module> MPart) { |
441 | // We want to clone the module in a new context to multi-thread the |
442 | // codegen. We do it by serializing partition modules to bitcode |
443 | // (while still on the main thread, in order to avoid data races) and |
444 | // spinning up new threads which deserialize the partitions into |
445 | // separate contexts. |
446 | // FIXME: Provide a more direct way to do this in LLVM. |
447 | SmallString<0> BC; |
448 | raw_svector_ostream BCOS(BC); |
449 | WriteBitcodeToFile(M: *MPart, Out&: BCOS); |
450 | |
451 | // Enqueue the task |
452 | CodegenThreadPool.async( |
453 | F: [&](const SmallString<0> &BC, unsigned ThreadId) { |
454 | LTOLLVMContext Ctx(C); |
455 | Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( |
456 | Buffer: MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o" ), |
457 | Context&: Ctx); |
458 | if (!MOrErr) |
459 | report_fatal_error(reason: "Failed to read bitcode" ); |
460 | std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); |
461 | |
462 | std::unique_ptr<TargetMachine> TM = |
463 | createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx); |
464 | |
465 | codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx, |
466 | CombinedIndex); |
467 | }, |
468 | // Pass BC using std::move to ensure that it get moved rather than |
469 | // copied into the thread's context. |
470 | ArgList: std::move(BC), ArgList: ThreadCount++); |
471 | }; |
472 | |
473 | // Try target-specific module splitting first, then fallback to the default. |
474 | if (!TM->splitModule(M&: Mod, NumParts: ParallelCodeGenParallelismLevel, |
475 | ModuleCallback: HandleModulePartition)) { |
476 | SplitModule(M&: Mod, N: ParallelCodeGenParallelismLevel, ModuleCallback: HandleModulePartition, |
477 | PreserveLocals: false); |
478 | } |
479 | |
480 | // Because the inner lambda (which runs in a worker thread) captures our local |
481 | // variables, we need to wait for the worker threads to terminate before we |
482 | // can leave the function scope. |
483 | CodegenThreadPool.wait(); |
484 | } |
485 | |
486 | static Expected<const Target *> initAndLookupTarget(const Config &C, |
487 | Module &Mod) { |
488 | if (!C.OverrideTriple.empty()) |
489 | Mod.setTargetTriple(C.OverrideTriple); |
490 | else if (Mod.getTargetTriple().empty()) |
491 | Mod.setTargetTriple(C.DefaultTriple); |
492 | |
493 | std::string Msg; |
494 | const Target *T = TargetRegistry::lookupTarget(Triple: Mod.getTargetTriple(), Error&: Msg); |
495 | if (!T) |
496 | return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode()); |
497 | return T; |
498 | } |
499 | |
500 | Error lto::( |
501 | std::unique_ptr<ToolOutputFile> DiagOutputFile) { |
502 | // Make sure we flush the diagnostic remarks file in case the linker doesn't |
503 | // call the global destructors before exiting. |
504 | if (!DiagOutputFile) |
505 | return Error::success(); |
506 | DiagOutputFile->keep(); |
507 | DiagOutputFile->os().flush(); |
508 | return Error::success(); |
509 | } |
510 | |
511 | Error lto::backend(const Config &C, AddStreamFn AddStream, |
512 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
513 | ModuleSummaryIndex &CombinedIndex) { |
514 | Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod); |
515 | if (!TOrErr) |
516 | return TOrErr.takeError(); |
517 | |
518 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod); |
519 | |
520 | LLVM_DEBUG(dbgs() << "Running regular LTO\n" ); |
521 | if (!C.CodeGenOnly) { |
522 | if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false, |
523 | /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, |
524 | /*CmdArgs*/ std::vector<uint8_t>())) |
525 | return Error::success(); |
526 | } |
527 | |
528 | if (ParallelCodeGenParallelismLevel == 1) { |
529 | codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex); |
530 | } else { |
531 | splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, |
532 | CombinedIndex); |
533 | } |
534 | return Error::success(); |
535 | } |
536 | |
537 | static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, |
538 | const ModuleSummaryIndex &Index) { |
539 | std::vector<GlobalValue*> DeadGVs; |
540 | for (auto &GV : Mod.global_values()) |
541 | if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID())) |
542 | if (!Index.isGlobalValueLive(GVS)) { |
543 | DeadGVs.push_back(x: &GV); |
544 | convertToDeclaration(GV); |
545 | } |
546 | |
547 | // Now that all dead bodies have been dropped, delete the actual objects |
548 | // themselves when possible. |
549 | for (GlobalValue *GV : DeadGVs) { |
550 | GV->removeDeadConstantUsers(); |
551 | // Might reference something defined in native object (i.e. dropped a |
552 | // non-prevailing IR def, but we need to keep the declaration). |
553 | if (GV->use_empty()) |
554 | GV->eraseFromParent(); |
555 | } |
556 | } |
557 | |
558 | Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, |
559 | Module &Mod, const ModuleSummaryIndex &CombinedIndex, |
560 | const FunctionImporter::ImportMapTy &ImportList, |
561 | const GVSummaryMapTy &DefinedGlobals, |
562 | MapVector<StringRef, BitcodeModule> *ModuleMap, |
563 | const std::vector<uint8_t> &CmdArgs) { |
564 | Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod); |
565 | if (!TOrErr) |
566 | return TOrErr.takeError(); |
567 | |
568 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod); |
569 | |
570 | // Setup optimization remarks. |
571 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
572 | Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, |
573 | RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold, |
574 | Count: Task); |
575 | if (!DiagFileOrErr) |
576 | return DiagFileOrErr.takeError(); |
577 | auto DiagnosticOutputFile = std::move(*DiagFileOrErr); |
578 | |
579 | // Set the partial sample profile ratio in the profile summary module flag of |
580 | // the module, if applicable. |
581 | Mod.setPartialSampleProfileRatio(CombinedIndex); |
582 | |
583 | LLVM_DEBUG(dbgs() << "Running ThinLTO\n" ); |
584 | if (Conf.CodeGenOnly) { |
585 | codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex); |
586 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
587 | } |
588 | |
589 | if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) |
590 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
591 | |
592 | auto OptimizeAndCodegen = |
593 | [&](Module &Mod, TargetMachine *TM, |
594 | std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { |
595 | if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, |
596 | /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, |
597 | CmdArgs)) |
598 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
599 | |
600 | codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); |
601 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
602 | }; |
603 | |
604 | if (ThinLTOAssumeMerged) |
605 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
606 | |
607 | // When linking an ELF shared object, dso_local should be dropped. We |
608 | // conservatively do this for -fpic. |
609 | bool ClearDSOLocalOnDeclarations = |
610 | TM->getTargetTriple().isOSBinFormatELF() && |
611 | TM->getRelocationModel() != Reloc::Static && |
612 | Mod.getPIELevel() == PIELevel::Default; |
613 | renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations); |
614 | |
615 | dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex); |
616 | |
617 | thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true); |
618 | |
619 | if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) |
620 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
621 | |
622 | if (!DefinedGlobals.empty()) |
623 | thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals); |
624 | |
625 | if (Conf.PostInternalizeModuleHook && |
626 | !Conf.PostInternalizeModuleHook(Task, Mod)) |
627 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
628 | |
629 | auto ModuleLoader = [&](StringRef Identifier) { |
630 | assert(Mod.getContext().isODRUniquingDebugTypes() && |
631 | "ODR Type uniquing should be enabled on the context" ); |
632 | if (ModuleMap) { |
633 | auto I = ModuleMap->find(Key: Identifier); |
634 | assert(I != ModuleMap->end()); |
635 | return I->second.getLazyModule(Context&: Mod.getContext(), |
636 | /*ShouldLazyLoadMetadata=*/true, |
637 | /*IsImporting*/ true); |
638 | } |
639 | |
640 | ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = |
641 | llvm::MemoryBuffer::getFile(Filename: Identifier); |
642 | if (!MBOrErr) |
643 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
644 | Args: Twine("Error loading imported file " ) + Identifier + " : " , |
645 | Args: MBOrErr.getError())); |
646 | |
647 | Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr); |
648 | if (!BMOrErr) |
649 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
650 | Args: Twine("Error loading imported file " ) + Identifier + " : " + |
651 | toString(E: BMOrErr.takeError()), |
652 | Args: inconvertibleErrorCode())); |
653 | |
654 | Expected<std::unique_ptr<Module>> MOrErr = |
655 | BMOrErr->getLazyModule(Context&: Mod.getContext(), |
656 | /*ShouldLazyLoadMetadata=*/true, |
657 | /*IsImporting*/ true); |
658 | if (MOrErr) |
659 | (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr)); |
660 | return MOrErr; |
661 | }; |
662 | |
663 | FunctionImporter Importer(CombinedIndex, ModuleLoader, |
664 | ClearDSOLocalOnDeclarations); |
665 | if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError()) |
666 | return Err; |
667 | |
668 | // Do this after any importing so that imported code is updated. |
669 | updateMemProfAttributes(Mod, Index: CombinedIndex); |
670 | updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility()); |
671 | |
672 | if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) |
673 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
674 | |
675 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
676 | } |
677 | |
678 | BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { |
679 | if (ThinLTOAssumeMerged && BMs.size() == 1) |
680 | return BMs.begin(); |
681 | |
682 | for (BitcodeModule &BM : BMs) { |
683 | Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); |
684 | if (LTOInfo && LTOInfo->IsThinLTO) |
685 | return &BM; |
686 | } |
687 | return nullptr; |
688 | } |
689 | |
690 | Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { |
691 | Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef); |
692 | if (!BMsOrErr) |
693 | return BMsOrErr.takeError(); |
694 | |
695 | // The bitcode file may contain multiple modules, we want the one that is |
696 | // marked as being the ThinLTO module. |
697 | if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr)) |
698 | return *Bm; |
699 | |
700 | return make_error<StringError>(Args: "Could not find module summary" , |
701 | Args: inconvertibleErrorCode()); |
702 | } |
703 | |
704 | bool lto::initImportList(const Module &M, |
705 | const ModuleSummaryIndex &CombinedIndex, |
706 | FunctionImporter::ImportMapTy &ImportList) { |
707 | if (ThinLTOAssumeMerged) |
708 | return true; |
709 | // We can simply import the values mentioned in the combined index, since |
710 | // we should only invoke this using the individual indexes written out |
711 | // via a WriteIndexesThinBackend. |
712 | for (const auto &GlobalList : CombinedIndex) { |
713 | // Ignore entries for undefined references. |
714 | if (GlobalList.second.SummaryList.empty()) |
715 | continue; |
716 | |
717 | auto GUID = GlobalList.first; |
718 | for (const auto &Summary : GlobalList.second.SummaryList) { |
719 | // Skip the summaries for the importing module. These are included to |
720 | // e.g. record required linkage changes. |
721 | if (Summary->modulePath() == M.getModuleIdentifier()) |
722 | continue; |
723 | // Add an entry to provoke importing by thinBackend. |
724 | ImportList[Summary->modulePath()].insert(x: GUID); |
725 | } |
726 | } |
727 | return true; |
728 | } |
729 | |