1 | //===- JIT.cpp - Target independent JIT infrastructure --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "JIT.h" |
12 | |
13 | #include "Shared/Debug.h" |
14 | #include "Shared/Utils.h" |
15 | |
16 | #include "PluginInterface.h" |
17 | #include "omptarget.h" |
18 | |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/CodeGen/CommandFlags.h" |
21 | #include "llvm/CodeGen/MachineModuleInfo.h" |
22 | #include "llvm/IR/LLVMContext.h" |
23 | #include "llvm/IR/LLVMRemarkStreamer.h" |
24 | #include "llvm/IR/LegacyPassManager.h" |
25 | #include "llvm/IRReader/IRReader.h" |
26 | #include "llvm/InitializePasses.h" |
27 | #include "llvm/MC/TargetRegistry.h" |
28 | #include "llvm/Object/IRObjectFile.h" |
29 | #include "llvm/Passes/OptimizationLevel.h" |
30 | #include "llvm/Passes/PassBuilder.h" |
31 | #include "llvm/Support/MemoryBuffer.h" |
32 | #include "llvm/Support/SourceMgr.h" |
33 | #include "llvm/Support/TargetSelect.h" |
34 | #include "llvm/Support/TimeProfiler.h" |
35 | #include "llvm/Support/ToolOutputFile.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | #include "llvm/Target/TargetMachine.h" |
38 | #include "llvm/Target/TargetOptions.h" |
39 | #include "llvm/TargetParser/SubtargetFeature.h" |
40 | |
41 | #include <mutex> |
42 | #include <shared_mutex> |
43 | #include <system_error> |
44 | |
45 | using namespace llvm; |
46 | using namespace llvm::object; |
47 | using namespace omp; |
48 | using namespace omp::target; |
49 | |
50 | namespace { |
51 | |
52 | bool isImageBitcode(const __tgt_device_image &Image) { |
53 | StringRef Binary(reinterpret_cast<const char *>(Image.ImageStart), |
54 | utils::getPtrDiff(Image.ImageEnd, Image.ImageStart)); |
55 | |
56 | return identify_magic(magic: Binary) == file_magic::bitcode; |
57 | } |
58 | |
59 | Expected<std::unique_ptr<Module>> |
60 | createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB, |
61 | LLVMContext &Context) { |
62 | SMDiagnostic Err; |
63 | auto Mod = parseIR(*MB, Err, Context); |
64 | if (!Mod) |
65 | return error::createOffloadError(error::ErrorCode::UNKNOWN, |
66 | "failed to create module" ); |
67 | return std::move(Mod); |
68 | } |
69 | Expected<std::unique_ptr<Module>> |
70 | createModuleFromImage(const __tgt_device_image &Image, LLVMContext &Context) { |
71 | StringRef Data((const char *)Image.ImageStart, |
72 | utils::getPtrDiff(Image.ImageEnd, Image.ImageStart)); |
73 | std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer( |
74 | InputData: Data, /*BufferName=*/"" , /*RequiresNullTerminator=*/false); |
75 | return createModuleFromMemoryBuffer(MB, Context); |
76 | } |
77 | |
78 | OptimizationLevel getOptLevel(unsigned OptLevel) { |
79 | switch (OptLevel) { |
80 | case 0: |
81 | return OptimizationLevel::O0; |
82 | case 1: |
83 | return OptimizationLevel::O1; |
84 | case 2: |
85 | return OptimizationLevel::O2; |
86 | case 3: |
87 | return OptimizationLevel::O3; |
88 | } |
89 | llvm_unreachable("Invalid optimization level" ); |
90 | } |
91 | |
92 | Expected<std::unique_ptr<TargetMachine>> |
93 | createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) { |
94 | Triple TT(M.getTargetTriple()); |
95 | std::optional<CodeGenOptLevel> CGOptLevelOrNone = |
96 | CodeGenOpt::getLevel(OptLevel); |
97 | assert(CGOptLevelOrNone && "Invalid optimization level" ); |
98 | CodeGenOptLevel CGOptLevel = *CGOptLevelOrNone; |
99 | |
100 | std::string Msg; |
101 | const Target *T = TargetRegistry::lookupTarget(TheTriple: M.getTargetTriple(), Error&: Msg); |
102 | if (!T) |
103 | return error::createOffloadError(error::ErrorCode::INVALID_BINARY, |
104 | Msg.data()); |
105 | |
106 | SubtargetFeatures Features; |
107 | Features.getDefaultSubtargetFeatures(Triple: TT); |
108 | |
109 | std::optional<Reloc::Model> RelocModel; |
110 | if (M.getModuleFlag("PIC Level" )) |
111 | RelocModel = |
112 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
113 | |
114 | std::optional<CodeModel::Model> CodeModel = M.getCodeModel(); |
115 | |
116 | TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: TT); |
117 | |
118 | std::unique_ptr<TargetMachine> TM( |
119 | T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(), |
120 | Options, RelocModel, CodeModel, CGOptLevel)); |
121 | if (!TM) |
122 | return error::createOffloadError(error::ErrorCode::INVALID_BINARY, |
123 | "failed to create target machine" ); |
124 | return std::move(TM); |
125 | } |
126 | |
127 | } // namespace |
128 | |
129 | JITEngine::JITEngine(Triple::ArchType TA) : TT(Triple::getArchTypeName(TA)) { |
130 | codegen::RegisterCodeGenFlags(); |
131 | #ifdef LIBOMPTARGET_JIT_NVPTX |
132 | if (TT.isNVPTX()) { |
133 | LLVMInitializeNVPTXTargetInfo(); |
134 | LLVMInitializeNVPTXTarget(); |
135 | LLVMInitializeNVPTXTargetMC(); |
136 | LLVMInitializeNVPTXAsmPrinter(); |
137 | } |
138 | #endif |
139 | #ifdef LIBOMPTARGET_JIT_AMDGPU |
140 | if (TT.isAMDGPU()) { |
141 | LLVMInitializeAMDGPUTargetInfo(); |
142 | LLVMInitializeAMDGPUTarget(); |
143 | LLVMInitializeAMDGPUTargetMC(); |
144 | LLVMInitializeAMDGPUAsmPrinter(); |
145 | } |
146 | #endif |
147 | } |
148 | |
149 | void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, |
150 | unsigned OptLevel) { |
151 | PipelineTuningOptions PTO; |
152 | std::optional<PGOOptions> PGOOpt; |
153 | |
154 | LoopAnalysisManager LAM; |
155 | FunctionAnalysisManager FAM; |
156 | CGSCCAnalysisManager CGAM; |
157 | ModuleAnalysisManager MAM; |
158 | ModulePassManager MPM; |
159 | |
160 | PassBuilder PB(TM, PTO, PGOOpt, nullptr); |
161 | |
162 | FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); |
163 | |
164 | // Register all the basic analyses with the managers. |
165 | PB.registerModuleAnalyses(MAM); |
166 | PB.registerCGSCCAnalyses(CGAM); |
167 | PB.registerFunctionAnalyses(FAM); |
168 | PB.registerLoopAnalyses(LAM); |
169 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
170 | |
171 | MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel))); |
172 | MPM.run(M, MAM); |
173 | } |
174 | |
175 | void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, |
176 | Module &M, raw_pwrite_stream &OS) { |
177 | legacy::PassManager PM; |
178 | PM.add(new TargetLibraryInfoWrapperPass(*TLII)); |
179 | MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(TM); |
180 | TM->addPassesToEmitFile(PM, OS, nullptr, |
181 | TT.isNVPTX() ? CodeGenFileType::AssemblyFile |
182 | : CodeGenFileType::ObjectFile, |
183 | /*DisableVerify=*/false, MMIWP); |
184 | |
185 | PM.run(M); |
186 | } |
187 | |
188 | Expected<std::unique_ptr<MemoryBuffer>> |
189 | JITEngine::backend(Module &M, const std::string &ComputeUnitKind, |
190 | unsigned OptLevel) { |
191 | |
192 | auto RemarksFileOrErr = setupLLVMOptimizationRemarks( |
193 | M.getContext(), /*RemarksFilename=*/"" , /*RemarksPasses=*/"" , |
194 | /*RemarksFormat=*/"" , /*RemarksWithHotness=*/false); |
195 | if (Error E = RemarksFileOrErr.takeError()) |
196 | return std::move(E); |
197 | if (*RemarksFileOrErr) |
198 | (*RemarksFileOrErr)->keep(); |
199 | |
200 | auto TMOrErr = createTargetMachine(M, ComputeUnitKind, OptLevel); |
201 | if (!TMOrErr) |
202 | return TMOrErr.takeError(); |
203 | |
204 | std::unique_ptr<TargetMachine> TM = std::move(*TMOrErr); |
205 | TargetLibraryInfoImpl TLII(TT); |
206 | |
207 | if (PreOptIRModuleFileName.isPresent()) { |
208 | std::error_code EC; |
209 | raw_fd_stream FD(PreOptIRModuleFileName.get(), EC); |
210 | if (EC) |
211 | return createStringError( |
212 | EC, "Could not open %s to write the pre-opt IR module\n" , |
213 | PreOptIRModuleFileName.get().c_str()); |
214 | M.print(FD, nullptr); |
215 | } |
216 | |
217 | if (!JITSkipOpt) |
218 | opt(TM.get(), &TLII, M, OptLevel); |
219 | |
220 | if (PostOptIRModuleFileName.isPresent()) { |
221 | std::error_code EC; |
222 | raw_fd_stream FD(PostOptIRModuleFileName.get(), EC); |
223 | if (EC) |
224 | return createStringError( |
225 | error::ErrorCode::HOST_IO, |
226 | "Could not open %s to write the post-opt IR module\n" , |
227 | PostOptIRModuleFileName.get().c_str()); |
228 | M.print(FD, nullptr); |
229 | } |
230 | |
231 | // Prepare the output buffer and stream for codegen. |
232 | SmallVector<char> CGOutputBuffer; |
233 | raw_svector_ostream OS(CGOutputBuffer); |
234 | |
235 | codegen(TM.get(), &TLII, M, OS); |
236 | |
237 | return MemoryBuffer::getMemBufferCopy(OS.str()); |
238 | } |
239 | |
240 | Expected<std::unique_ptr<MemoryBuffer>> |
241 | JITEngine::getOrCreateObjFile(const __tgt_device_image &Image, LLVMContext &Ctx, |
242 | const std::string &ComputeUnitKind) { |
243 | |
244 | // Check if the user replaces the module at runtime with a finished object. |
245 | if (ReplacementObjectFileName.isPresent()) { |
246 | auto MBOrErr = |
247 | MemoryBuffer::getFileOrSTDIN(ReplacementObjectFileName.get()); |
248 | if (!MBOrErr) |
249 | return createStringError(MBOrErr.getError(), |
250 | "Could not read replacement obj from %s\n" , |
251 | ReplacementModuleFileName.get().c_str()); |
252 | return std::move(*MBOrErr); |
253 | } |
254 | |
255 | Module *Mod = nullptr; |
256 | // Check if the user replaces the module at runtime or we read it from the |
257 | // image. |
258 | // TODO: Allow the user to specify images per device (Arch + ComputeUnitKind). |
259 | if (!ReplacementModuleFileName.isPresent()) { |
260 | auto ModOrErr = createModuleFromImage(Image, Ctx); |
261 | if (!ModOrErr) |
262 | return ModOrErr.takeError(); |
263 | Mod = ModOrErr->release(); |
264 | } else { |
265 | auto MBOrErr = |
266 | MemoryBuffer::getFileOrSTDIN(ReplacementModuleFileName.get()); |
267 | if (!MBOrErr) |
268 | return createStringError(MBOrErr.getError(), |
269 | "Could not read replacement module from %s\n" , |
270 | ReplacementModuleFileName.get().c_str()); |
271 | auto ModOrErr = createModuleFromMemoryBuffer(MBOrErr.get(), Ctx); |
272 | if (!ModOrErr) |
273 | return ModOrErr.takeError(); |
274 | Mod = ModOrErr->release(); |
275 | } |
276 | |
277 | return backend(*Mod, ComputeUnitKind, JITOptLevel); |
278 | } |
279 | |
280 | Expected<const __tgt_device_image *> |
281 | JITEngine::compile(const __tgt_device_image &Image, |
282 | const std::string &ComputeUnitKind, |
283 | PostProcessingFn PostProcessing) { |
284 | std::lock_guard<std::mutex> Lock(ComputeUnitMapMutex); |
285 | |
286 | // Check if we JITed this image for the given compute unit kind before. |
287 | ComputeUnitInfo &CUI = ComputeUnitMap[ComputeUnitKind]; |
288 | if (__tgt_device_image *JITedImage = CUI.TgtImageMap.lookup(&Image)) |
289 | return JITedImage; |
290 | |
291 | auto ObjMBOrErr = getOrCreateObjFile(Image, CUI.Context, ComputeUnitKind); |
292 | if (!ObjMBOrErr) |
293 | return ObjMBOrErr.takeError(); |
294 | |
295 | auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr)); |
296 | if (!ImageMBOrErr) |
297 | return ImageMBOrErr.takeError(); |
298 | |
299 | CUI.JITImages.push_back(std::move(*ImageMBOrErr)); |
300 | __tgt_device_image *&JITedImage = CUI.TgtImageMap[&Image]; |
301 | JITedImage = new __tgt_device_image(); |
302 | *JITedImage = Image; |
303 | |
304 | auto &ImageMB = CUI.JITImages.back(); |
305 | |
306 | JITedImage->ImageStart = const_cast<char *>(ImageMB->getBufferStart()); |
307 | JITedImage->ImageEnd = const_cast<char *>(ImageMB->getBufferEnd()); |
308 | |
309 | return JITedImage; |
310 | } |
311 | |
312 | Expected<const __tgt_device_image *> |
313 | JITEngine::process(const __tgt_device_image &Image, |
314 | target::plugin::GenericDeviceTy &Device) { |
315 | const std::string &ComputeUnitKind = Device.getComputeUnitKind(); |
316 | |
317 | PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB) |
318 | -> Expected<std::unique_ptr<MemoryBuffer>> { |
319 | return Device.doJITPostProcessing(std::move(MB)); |
320 | }; |
321 | |
322 | if (isImageBitcode(Image)) |
323 | return compile(Image, ComputeUnitKind, PostProcessing); |
324 | |
325 | return &Image; |
326 | } |
327 | |