1 | //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // RTL NextGen for generic 64-bit machine |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include <cassert> |
14 | #include <cstddef> |
15 | #include <ffi.h> |
16 | #include <string> |
17 | #include <unordered_map> |
18 | |
19 | #include "Shared/Debug.h" |
20 | #include "Shared/Environment.h" |
21 | #include "Utils/ELF.h" |
22 | |
23 | #include "GlobalHandler.h" |
24 | #include "OpenMP/OMPT/Callback.h" |
25 | #include "PluginInterface.h" |
26 | #include "omptarget.h" |
27 | |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
30 | #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" |
31 | #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
32 | #include "llvm/Support/DynamicLibrary.h" |
33 | |
34 | #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ |
35 | !defined(__ORDER_BIG_ENDIAN__) |
36 | #error "Missing preprocessor definitions for endianness detection." |
37 | #endif |
38 | |
39 | #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) |
40 | #define LITTLEENDIAN_CPU |
41 | #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) |
42 | #define BIGENDIAN_CPU |
43 | #endif |
44 | |
45 | // The number of devices in this plugin. |
46 | #define NUM_DEVICES 4 |
47 | |
48 | namespace llvm { |
49 | namespace omp { |
50 | namespace target { |
51 | namespace plugin { |
52 | |
53 | /// Forward declarations for all specialized data structures. |
54 | struct GenELF64KernelTy; |
55 | struct GenELF64DeviceTy; |
56 | struct GenELF64PluginTy; |
57 | |
58 | using llvm::sys::DynamicLibrary; |
59 | using namespace error; |
60 | |
61 | /// Class implementing kernel functionalities for GenELF64. |
62 | struct GenELF64KernelTy : public GenericKernelTy { |
63 | /// Construct the kernel with a name and an execution mode. |
64 | GenELF64KernelTy(const char *Name) : GenericKernelTy(Name), Func(nullptr) {} |
65 | |
66 | /// Initialize the kernel. |
67 | Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override { |
68 | // Functions have zero size. |
69 | GlobalTy Global(getName(), 0); |
70 | |
71 | // Get the metadata (address) of the kernel function. |
72 | GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler(); |
73 | if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global)) |
74 | return Err; |
75 | |
76 | // Check that the function pointer is valid. |
77 | if (!Global.getPtr()) |
78 | return Plugin::error(ErrorCode::INVALID_BINARY, |
79 | "invalid function for kernel %s", getName()); |
80 | |
81 | // Save the function pointer. |
82 | Func = (void (*)())Global.getPtr(); |
83 | |
84 | KernelEnvironment.Configuration.ExecMode = OMP_TGT_EXEC_MODE_GENERIC; |
85 | KernelEnvironment.Configuration.MayUseNestedParallelism = /*Unknown=*/2; |
86 | KernelEnvironment.Configuration.UseGenericStateMachine = /*Unknown=*/2; |
87 | |
88 | // Set the maximum number of threads to a single. |
89 | MaxNumThreads = 1; |
90 | return Plugin::success(); |
91 | } |
92 | |
93 | /// Launch the kernel using the libffi. |
94 | Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3], |
95 | uint32_t NumBlocks[3], KernelArgsTy &KernelArgs, |
96 | KernelLaunchParamsTy LaunchParams, |
97 | AsyncInfoWrapperTy &AsyncInfoWrapper) const override { |
98 | // Create a vector of ffi_types, one per argument. |
99 | SmallVector<ffi_type *, 16> ArgTypes(KernelArgs.NumArgs, &ffi_type_pointer); |
100 | ffi_type **ArgTypesPtr = (ArgTypes.size()) ? &ArgTypes[0] : nullptr; |
101 | |
102 | // Prepare the cif structure before running the kernel function. |
103 | ffi_cif Cif; |
104 | ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs.NumArgs, |
105 | &ffi_type_void, ArgTypesPtr); |
106 | if (Status != FFI_OK) |
107 | return Plugin::error(ErrorCode::UNKNOWN, "error in ffi_prep_cif: %d", |
108 | Status); |
109 | |
110 | // Call the kernel function through libffi. |
111 | long Return; |
112 | ffi_call(cif: &Cif, fn: Func, rvalue: &Return, avalue: (void **)LaunchParams.Ptrs); |
113 | |
114 | return Plugin::success(); |
115 | } |
116 | |
117 | private: |
118 | /// The kernel function to execute. |
119 | void (*Func)(void); |
120 | }; |
121 | |
122 | /// Class implementing the GenELF64 device images properties. |
123 | struct GenELF64DeviceImageTy : public DeviceImageTy { |
124 | /// Create the GenELF64 image with the id and the target image pointer. |
125 | GenELF64DeviceImageTy(int32_t ImageId, GenericDeviceTy &Device, |
126 | const __tgt_device_image *TgtImage) |
127 | : DeviceImageTy(ImageId, Device, TgtImage), DynLib() {} |
128 | |
129 | /// Getter and setter for the dynamic library. |
130 | DynamicLibrary &getDynamicLibrary() { return DynLib; } |
131 | void setDynamicLibrary(const DynamicLibrary &Lib) { DynLib = Lib; } |
132 | |
133 | private: |
134 | /// The dynamic library that loaded the image. |
135 | DynamicLibrary DynLib; |
136 | }; |
137 | |
138 | /// Class implementing the device functionalities for GenELF64. |
139 | struct GenELF64DeviceTy : public GenericDeviceTy { |
140 | /// Create the device with a specific id. |
141 | GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, |
142 | int32_t NumDevices) |
143 | : GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {} |
144 | |
145 | ~GenELF64DeviceTy() {} |
146 | |
147 | /// Initialize the device, which is a no-op |
148 | Error initImpl(GenericPluginTy &Plugin) override { return Plugin::success(); } |
149 | |
150 | /// Deinitialize the device, which is a no-op |
151 | Error deinitImpl() override { return Plugin::success(); } |
152 | |
153 | /// See GenericDeviceTy::getComputeUnitKind(). |
154 | std::string getComputeUnitKind() const override { return "generic-64bit"; } |
155 | |
156 | /// Construct the kernel for a specific image on the device. |
157 | Expected<GenericKernelTy &> constructKernel(const char *Name) override { |
158 | // Allocate and construct the kernel. |
159 | GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>(); |
160 | if (!GenELF64Kernel) |
161 | return Plugin::error(ErrorCode::OUT_OF_RESOURCES, |
162 | "failed to allocate memory for GenELF64 kernel"); |
163 | |
164 | new (GenELF64Kernel) GenELF64KernelTy(Name); |
165 | |
166 | return *GenELF64Kernel; |
167 | } |
168 | |
169 | /// Set the current context to this device, which is a no-op. |
170 | Error setContext() override { return Plugin::success(); } |
171 | |
172 | /// Load the binary image into the device and allocate an image object. |
173 | Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage, |
174 | int32_t ImageId) override { |
175 | // Allocate and initialize the image object. |
176 | GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>(); |
177 | new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage); |
178 | |
179 | // Create a temporary file. |
180 | char TmpFileName[] = "/tmp/tmpfile_XXXXXX"; |
181 | int TmpFileFd = mkstemp(template: TmpFileName); |
182 | if (TmpFileFd == -1) |
183 | return Plugin::error(ErrorCode::HOST_IO, |
184 | "failed to create tmpfile for loading target image"); |
185 | |
186 | // Open the temporary file. |
187 | FILE *TmpFile = fdopen(fd: TmpFileFd, modes: "wb"); |
188 | if (!TmpFile) |
189 | return Plugin::error(ErrorCode::HOST_IO, |
190 | "failed to open tmpfile %s for loading target image", |
191 | TmpFileName); |
192 | |
193 | // Write the image into the temporary file. |
194 | size_t Written = fwrite(Image->getStart(), Image->getSize(), 1, TmpFile); |
195 | if (Written != 1) |
196 | return Plugin::error(ErrorCode::HOST_IO, |
197 | "failed to write target image to tmpfile %s", |
198 | TmpFileName); |
199 | |
200 | // Close the temporary file. |
201 | int Ret = fclose(stream: TmpFile); |
202 | if (Ret) |
203 | return Plugin::error(ErrorCode::HOST_IO, |
204 | "failed to close tmpfile %s with the target image", |
205 | TmpFileName); |
206 | |
207 | // Load the temporary file as a dynamic library. |
208 | std::string ErrMsg; |
209 | DynamicLibrary DynLib = |
210 | DynamicLibrary::getPermanentLibrary(filename: TmpFileName, errMsg: &ErrMsg); |
211 | |
212 | // Check if the loaded library is valid. |
213 | if (!DynLib.isValid()) |
214 | return Plugin::error(ErrorCode::INVALID_BINARY, |
215 | "failed to load target image: %s", ErrMsg.c_str()); |
216 | |
217 | // Save a reference of the image's dynamic library. |
218 | Image->setDynamicLibrary(DynLib); |
219 | |
220 | return Image; |
221 | } |
222 | |
223 | /// Allocate memory. Use std::malloc in all cases. |
224 | void *allocate(size_t Size, void *, TargetAllocTy Kind) override { |
225 | if (Size == 0) |
226 | return nullptr; |
227 | |
228 | void *MemAlloc = nullptr; |
229 | switch (Kind) { |
230 | case TARGET_ALLOC_DEFAULT: |
231 | case TARGET_ALLOC_DEVICE: |
232 | case TARGET_ALLOC_HOST: |
233 | case TARGET_ALLOC_SHARED: |
234 | case TARGET_ALLOC_DEVICE_NON_BLOCKING: |
235 | MemAlloc = std::malloc(size: Size); |
236 | break; |
237 | } |
238 | return MemAlloc; |
239 | } |
240 | |
241 | /// Free the memory. Use std::free in all cases. |
242 | int free(void *TgtPtr, TargetAllocTy Kind) override { |
243 | std::free(ptr: TgtPtr); |
244 | return OFFLOAD_SUCCESS; |
245 | } |
246 | |
247 | /// This plugin does nothing to lock buffers. Do not return an error, just |
248 | /// return the same pointer as the device pointer. |
249 | Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) override { |
250 | return HstPtr; |
251 | } |
252 | |
253 | /// Nothing to do when unlocking the buffer. |
254 | Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } |
255 | |
256 | /// Indicate that the buffer is not pinned. |
257 | Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, |
258 | void *&BaseDevAccessiblePtr, |
259 | size_t &BaseSize) const override { |
260 | return false; |
261 | } |
262 | |
263 | /// Submit data to the device (host to device transfer). |
264 | Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, |
265 | AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
266 | std::memcpy(dest: TgtPtr, src: HstPtr, n: Size); |
267 | return Plugin::success(); |
268 | } |
269 | |
270 | /// Retrieve data from the device (device to host transfer). |
271 | Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, |
272 | AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
273 | std::memcpy(dest: HstPtr, src: TgtPtr, n: Size); |
274 | return Plugin::success(); |
275 | } |
276 | |
277 | /// Exchange data between two devices within the plugin. This function is not |
278 | /// supported in this plugin. |
279 | Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstGenericDevice, |
280 | void *DstPtr, int64_t Size, |
281 | AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
282 | // This function should never be called because the function |
283 | // GenELF64PluginTy::isDataExchangable() returns false. |
284 | return Plugin::error(ErrorCode::UNSUPPORTED, |
285 | "dataExchangeImpl not supported"); |
286 | } |
287 | |
288 | /// All functions are already synchronous. No need to do anything on this |
289 | /// synchronization function. |
290 | Error synchronizeImpl(__tgt_async_info &AsyncInfo) override { |
291 | return Plugin::success(); |
292 | } |
293 | |
294 | /// All functions are already synchronous. No need to do anything on this |
295 | /// query function. |
296 | Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override { |
297 | return Plugin::success(); |
298 | } |
299 | |
300 | /// This plugin does not support interoperability |
301 | Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
302 | return Plugin::error(ErrorCode::UNSUPPORTED, |
303 | "initAsyncInfoImpl not supported"); |
304 | } |
305 | |
306 | /// This plugin does not support interoperability |
307 | Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override { |
308 | return Plugin::error(ErrorCode::UNSUPPORTED, |
309 | "initDeviceInfoImpl not supported"); |
310 | } |
311 | |
312 | /// This plugin does not support the event API. Do nothing without failing. |
313 | Error createEventImpl(void **EventPtrStorage) override { |
314 | *EventPtrStorage = nullptr; |
315 | return Plugin::success(); |
316 | } |
317 | Error destroyEventImpl(void *EventPtr) override { return Plugin::success(); } |
318 | Error recordEventImpl(void *EventPtr, |
319 | AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
320 | return Plugin::success(); |
321 | } |
322 | Error waitEventImpl(void *EventPtr, |
323 | AsyncInfoWrapperTy &AsyncInfoWrapper) override { |
324 | return Plugin::success(); |
325 | } |
326 | Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } |
327 | |
328 | /// Print information about the device. |
329 | Error obtainInfoImpl(InfoQueueTy &Info) override { |
330 | Info.add("Device Type", "Generic-elf-64bit"); |
331 | return Plugin::success(); |
332 | } |
333 | |
334 | /// This plugin should not setup the device environment or memory pool. |
335 | virtual bool shouldSetupDeviceEnvironment() const override { return false; }; |
336 | virtual bool shouldSetupDeviceMemoryPool() const override { return false; }; |
337 | |
338 | /// Getters and setters for stack size and heap size not relevant. |
339 | Error getDeviceStackSize(uint64_t &Value) override { |
340 | Value = 0; |
341 | return Plugin::success(); |
342 | } |
343 | Error setDeviceStackSize(uint64_t Value) override { |
344 | return Plugin::success(); |
345 | } |
346 | Error getDeviceHeapSize(uint64_t &Value) override { |
347 | Value = 0; |
348 | return Plugin::success(); |
349 | } |
350 | Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); } |
351 | |
352 | private: |
353 | /// Grid values for Generic ELF64 plugins. |
354 | static constexpr GV GenELF64GridValues = { |
355 | .GV_Slot_Size: 1, // GV_Slot_Size |
356 | .GV_Warp_Size: 1, // GV_Warp_Size |
357 | .GV_Max_Teams: 1, // GV_Max_Teams |
358 | .GV_Default_Num_Teams: 1, // GV_Default_Num_Teams |
359 | .GV_SimpleBufferSize: 1, // GV_SimpleBufferSize |
360 | .GV_Max_WG_Size: 1, // GV_Max_WG_Size |
361 | .GV_Default_WG_Size: 1, // GV_Default_WG_Size |
362 | }; |
363 | }; |
364 | |
365 | class GenELF64GlobalHandlerTy final : public GenericGlobalHandlerTy { |
366 | public: |
367 | Error getGlobalMetadataFromDevice(GenericDeviceTy &GenericDevice, |
368 | DeviceImageTy &Image, |
369 | GlobalTy &DeviceGlobal) override { |
370 | const char *GlobalName = DeviceGlobal.getName().data(); |
371 | GenELF64DeviceImageTy &GenELF64Image = |
372 | static_cast<GenELF64DeviceImageTy &>(Image); |
373 | |
374 | // Get dynamic library that has loaded the device image. |
375 | DynamicLibrary &DynLib = GenELF64Image.getDynamicLibrary(); |
376 | |
377 | // Get the address of the symbol. |
378 | void *Addr = DynLib.getAddressOfSymbol(symbolName: GlobalName); |
379 | if (Addr == nullptr) { |
380 | return Plugin::error(ErrorCode::NOT_FOUND, "failed to load global '%s'", |
381 | GlobalName); |
382 | } |
383 | |
384 | // Save the pointer to the symbol. |
385 | DeviceGlobal.setPtr(Addr); |
386 | |
387 | return Plugin::success(); |
388 | } |
389 | }; |
390 | |
391 | /// Class implementing the plugin functionalities for GenELF64. |
392 | struct GenELF64PluginTy final : public GenericPluginTy { |
393 | /// Create the GenELF64 plugin. |
394 | GenELF64PluginTy() : GenericPluginTy(getTripleArch()) {} |
395 | |
396 | /// This class should not be copied. |
397 | GenELF64PluginTy(const GenELF64PluginTy &) = delete; |
398 | GenELF64PluginTy(GenELF64PluginTy &&) = delete; |
399 | |
400 | /// Initialize the plugin and return the number of devices. |
401 | Expected<int32_t> initImpl() override { |
402 | #ifdef USES_DYNAMIC_FFI |
403 | if (auto Err = Plugin::check(ffi_init(), "failed to initialize libffi")) |
404 | return std::move(Err); |
405 | #endif |
406 | |
407 | return NUM_DEVICES; |
408 | } |
409 | |
410 | /// Deinitialize the plugin. |
411 | Error deinitImpl() override { return Plugin::success(); } |
412 | |
413 | /// Creates a generic ELF device. |
414 | GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId, |
415 | int32_t NumDevices) override { |
416 | return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices); |
417 | } |
418 | |
419 | /// Creates a generic global handler. |
420 | GenericGlobalHandlerTy *createGlobalHandler() override { |
421 | return new GenELF64GlobalHandlerTy(); |
422 | } |
423 | |
424 | /// Get the ELF code to recognize the compatible binary images. |
425 | uint16_t getMagicElfBits() const override { |
426 | return utils::elf::getTargetMachine(); |
427 | } |
428 | |
429 | /// This plugin does not support exchanging data between two devices. |
430 | bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { |
431 | return false; |
432 | } |
433 | |
434 | /// All images (ELF-compatible) should be compatible with this plugin. |
435 | Expected<bool> isELFCompatible(uint32_t, StringRef) const override { |
436 | return true; |
437 | } |
438 | |
439 | Triple::ArchType getTripleArch() const override { |
440 | #if defined(__x86_64__) |
441 | return llvm::Triple::x86_64; |
442 | #elif defined(__s390x__) |
443 | return llvm::Triple::systemz; |
444 | #elif defined(__aarch64__) |
445 | #ifdef LITTLEENDIAN_CPU |
446 | return llvm::Triple::aarch64; |
447 | #else |
448 | return llvm::Triple::aarch64_be; |
449 | #endif |
450 | #elif defined(__powerpc64__) |
451 | #ifdef LITTLEENDIAN_CPU |
452 | return llvm::Triple::ppc64le; |
453 | #else |
454 | return llvm::Triple::ppc64; |
455 | #endif |
456 | #elif defined(__riscv) && (__riscv_xlen == 64) |
457 | return llvm::Triple::riscv64; |
458 | #elif defined(__loongarch__) && (__loongarch_grlen == 64) |
459 | return llvm::Triple::loongarch64; |
460 | #else |
461 | return llvm::Triple::UnknownArch; |
462 | #endif |
463 | } |
464 | |
465 | const char *getName() const override { return GETNAME(TARGET_NAME); } |
466 | }; |
467 | |
468 | template <typename... ArgsTy> |
469 | static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { |
470 | if (Code == 0) |
471 | return Plugin::success(); |
472 | |
473 | return Plugin::error(ErrorCode::UNKNOWN, ErrMsg, Args..., |
474 | std::to_string(Code).data()); |
475 | } |
476 | |
477 | } // namespace plugin |
478 | } // namespace target |
479 | } // namespace omp |
480 | } // namespace llvm |
481 | |
482 | extern "C"{ |
483 | llvm::omp::target::plugin::GenericPluginTy *createPlugin_host() { |
484 | return new llvm::omp::target::plugin::GenELF64PluginTy(); |
485 | } |
486 | } |
487 |
Definitions
- GenELF64KernelTy
- GenELF64KernelTy
- initImpl
- launchImpl
- GenELF64DeviceImageTy
- GenELF64DeviceImageTy
- getDynamicLibrary
- setDynamicLibrary
- GenELF64DeviceTy
- GenELF64DeviceTy
- ~GenELF64DeviceTy
- initImpl
- deinitImpl
- getComputeUnitKind
- constructKernel
- setContext
- loadBinaryImpl
- allocate
- free
- dataLockImpl
- dataUnlockImpl
- isPinnedPtrImpl
- dataSubmitImpl
- dataRetrieveImpl
- dataExchangeImpl
- synchronizeImpl
- queryAsyncImpl
- initAsyncInfoImpl
- initDeviceInfoImpl
- createEventImpl
- destroyEventImpl
- recordEventImpl
- waitEventImpl
- syncEventImpl
- obtainInfoImpl
- shouldSetupDeviceEnvironment
- shouldSetupDeviceMemoryPool
- getDeviceStackSize
- setDeviceStackSize
- getDeviceHeapSize
- setDeviceHeapSize
- GenELF64GlobalHandlerTy
- getGlobalMetadataFromDevice
- GenELF64PluginTy
- GenELF64PluginTy
- GenELF64PluginTy
- GenELF64PluginTy
- initImpl
- deinitImpl
- createDevice
- createGlobalHandler
- getMagicElfBits
- isDataExchangable
- isELFCompatible
- getTripleArch
- getName
Improve your Profiling and Debugging skills
Find out more