1//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements offloading to CUDA devices.
10//
11//===----------------------------------------------------------------------===//
12
13#include "DeviceOffload.h"
14
15#include "clang/Basic/TargetOptions.h"
16#include "clang/CodeGen/ModuleBuilder.h"
17#include "clang/Frontend/CompilerInstance.h"
18
19#include "llvm/IR/LegacyPassManager.h"
20#include "llvm/MC/TargetRegistry.h"
21#include "llvm/Target/TargetMachine.h"
22
23namespace clang {
24
25IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
26 Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
27 IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
28 llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
29 llvm::Error &Err)
30 : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
31 HostParser(HostParser), VFS(FS) {
32 if (Err)
33 return;
34 StringRef Arch = CI->getTargetOpts().CPU;
35 if (!Arch.starts_with(Prefix: "sm_") || Arch.substr(Start: 3).getAsInteger(Radix: 10, Result&: SMVersion)) {
36 Err = llvm::joinErrors(E1: std::move(Err), E2: llvm::make_error<llvm::StringError>(
37 Args: "Invalid CUDA architecture",
38 Args: llvm::inconvertibleErrorCode()));
39 return;
40 }
41}
42
43llvm::Expected<PartialTranslationUnit &>
44IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
45 auto PTU = IncrementalParser::Parse(Input);
46 if (!PTU)
47 return PTU.takeError();
48
49 auto PTX = GeneratePTX();
50 if (!PTX)
51 return PTX.takeError();
52
53 auto Err = GenerateFatbinary();
54 if (Err)
55 return std::move(Err);
56
57 std::string FatbinFileName =
58 "/incr_module_" + std::to_string(val: PTUs.size()) + ".fatbin";
59 VFS->addFile(Path: FatbinFileName, ModificationTime: 0,
60 Buffer: llvm::MemoryBuffer::getMemBuffer(
61 InputData: llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
62 BufferName: "", RequiresNullTerminator: false));
63
64 HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
65
66 FatbinContent.clear();
67
68 return PTU;
69}
70
71llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
72 auto &PTU = PTUs.back();
73 std::string Error;
74
75 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
76 Triple: PTU.TheModule->getTargetTriple(), Error);
77 if (!Target)
78 return llvm::make_error<llvm::StringError>(Args: std::move(Error),
79 Args: std::error_code());
80 llvm::TargetOptions TO = llvm::TargetOptions();
81 llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
82 TT: PTU.TheModule->getTargetTriple(), CPU: getCI()->getTargetOpts().CPU, Features: "", Options: TO,
83 RM: llvm::Reloc::Model::PIC_);
84 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
85
86 PTXCode.clear();
87 llvm::raw_svector_ostream dest(PTXCode);
88
89 llvm::legacy::PassManager PM;
90 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
91 llvm::CodeGenFileType::AssemblyFile)) {
92 return llvm::make_error<llvm::StringError>(
93 Args: "NVPTX backend cannot produce PTX code.",
94 Args: llvm::inconvertibleErrorCode());
95 }
96
97 if (!PM.run(M&: *PTU.TheModule))
98 return llvm::make_error<llvm::StringError>(Args: "Failed to emit PTX code.",
99 Args: llvm::inconvertibleErrorCode());
100
101 PTXCode += '\0';
102 while (PTXCode.size() % 8)
103 PTXCode += '\0';
104 return PTXCode.str();
105}
106
107llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
108 enum FatBinFlags {
109 AddressSize64 = 0x01,
110 HasDebugInfo = 0x02,
111 ProducerCuda = 0x04,
112 HostLinux = 0x10,
113 HostMac = 0x20,
114 HostWindows = 0x40
115 };
116
117 struct FatBinInnerHeader {
118 uint16_t Kind; // 0x00
119 uint16_t unknown02; // 0x02
120 uint32_t HeaderSize; // 0x04
121 uint32_t DataSize; // 0x08
122 uint32_t unknown0c; // 0x0c
123 uint32_t CompressedSize; // 0x10
124 uint32_t SubHeaderSize; // 0x14
125 uint16_t VersionMinor; // 0x18
126 uint16_t VersionMajor; // 0x1a
127 uint32_t CudaArch; // 0x1c
128 uint32_t unknown20; // 0x20
129 uint32_t unknown24; // 0x24
130 uint32_t Flags; // 0x28
131 uint32_t unknown2c; // 0x2c
132 uint32_t unknown30; // 0x30
133 uint32_t unknown34; // 0x34
134 uint32_t UncompressedSize; // 0x38
135 uint32_t unknown3c; // 0x3c
136 uint32_t unknown40; // 0x40
137 uint32_t unknown44; // 0x44
138 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
139 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
140 DataSize(DataSize), unknown0c(0), CompressedSize(0),
141 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
142 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
143 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
144 unknown3c(0), unknown40(0), unknown44(0) {}
145 };
146
147 struct FatBinHeader {
148 uint32_t Magic; // 0x00
149 uint16_t Version; // 0x04
150 uint16_t HeaderSize; // 0x06
151 uint32_t DataSize; // 0x08
152 uint32_t unknown0c; // 0x0c
153 public:
154 FatBinHeader(uint32_t DataSize)
155 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
156 DataSize(DataSize), unknown0c(0) {}
157 };
158
159 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
160 FatbinContent.append(in_start: (char *)&OuterHeader,
161 in_end: ((char *)&OuterHeader) + OuterHeader.HeaderSize);
162
163 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
164 FatBinFlags::AddressSize64 |
165 FatBinFlags::HostLinux);
166 FatbinContent.append(in_start: (char *)&InnerHeader,
167 in_end: ((char *)&InnerHeader) + InnerHeader.HeaderSize);
168
169 FatbinContent.append(in_start: PTXCode.begin(), in_end: PTXCode.end());
170
171 return llvm::Error::success();
172}
173
174IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
175
176} // namespace clang
177

source code of clang/lib/Interpreter/DeviceOffload.cpp