1//===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement subset of cuda api by calling into cuda library via dlopen
10// Does the dlopen/dlsym calls as part of the call to cuInit
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/DynamicLibrary.h"
15
16#include "Shared/Debug.h"
17
18#include "DLWrap.h"
19#include "cuda.h"
20
21#include <memory>
22#include <string>
23#include <unordered_map>
24
25DLWRAP_INITIALIZE()
26
27DLWRAP_INTERNAL(cuInit, 1)
28
29DLWRAP(cuCtxGetDevice, 1)
30DLWRAP(cuDeviceGet, 2)
31DLWRAP(cuDeviceGetAttribute, 3)
32DLWRAP(cuDeviceGetCount, 1)
33DLWRAP(cuFuncGetAttribute, 3)
34
35// Device info
36DLWRAP(cuDeviceGetName, 3)
37DLWRAP(cuDeviceTotalMem, 2)
38DLWRAP(cuDriverGetVersion, 1)
39
40DLWRAP(cuGetErrorString, 2)
41DLWRAP(cuLaunchKernel, 11)
42DLWRAP(cuLaunchHostFunc, 3)
43
44DLWRAP(cuMemAlloc, 2)
45DLWRAP(cuMemAllocHost, 2)
46DLWRAP(cuMemAllocManaged, 3)
47DLWRAP(cuMemAllocAsync, 3)
48
49DLWRAP(cuMemcpyDtoDAsync, 4)
50DLWRAP(cuMemcpyDtoH, 3)
51DLWRAP(cuMemcpyDtoHAsync, 4)
52DLWRAP(cuMemcpyHtoD, 3)
53DLWRAP(cuMemcpyHtoDAsync, 4)
54
55DLWRAP(cuMemFree, 1)
56DLWRAP(cuMemFreeHost, 1)
57DLWRAP(cuMemFreeAsync, 2)
58
59DLWRAP(cuModuleGetFunction, 3)
60DLWRAP(cuModuleGetGlobal, 4)
61
62DLWRAP(cuModuleUnload, 1)
63DLWRAP(cuStreamCreate, 2)
64DLWRAP(cuStreamDestroy, 1)
65DLWRAP(cuStreamSynchronize, 1)
66DLWRAP(cuStreamQuery, 1)
67DLWRAP(cuStreamAddCallback, 4)
68DLWRAP(cuCtxSetCurrent, 1)
69DLWRAP(cuDevicePrimaryCtxRelease, 1)
70DLWRAP(cuDevicePrimaryCtxGetState, 3)
71DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
72DLWRAP(cuDevicePrimaryCtxRetain, 2)
73DLWRAP(cuModuleLoadDataEx, 5)
74
75DLWRAP(cuDeviceCanAccessPeer, 3)
76DLWRAP(cuCtxEnablePeerAccess, 2)
77DLWRAP(cuMemcpyPeerAsync, 6)
78
79DLWRAP(cuCtxGetLimit, 2)
80DLWRAP(cuCtxSetLimit, 2)
81
82DLWRAP(cuEventCreate, 2)
83DLWRAP(cuEventRecord, 2)
84DLWRAP(cuStreamWaitEvent, 3)
85DLWRAP(cuEventSynchronize, 1)
86DLWRAP(cuEventDestroy, 1)
87
88DLWRAP_FINALIZE()
89
90DLWRAP(cuMemUnmap, 2)
91DLWRAP(cuMemRelease, 1)
92DLWRAP(cuMemAddressFree, 2)
93DLWRAP(cuMemGetInfo, 2)
94DLWRAP(cuMemAddressReserve, 5)
95DLWRAP(cuMemMap, 5)
96DLWRAP(cuMemCreate, 4)
97DLWRAP(cuMemSetAccess, 4)
98DLWRAP(cuMemGetAllocationGranularity, 3)
99
100#ifndef DYNAMIC_CUDA_PATH
101#define DYNAMIC_CUDA_PATH "libcuda.so"
102#endif
103
104#ifndef TARGET_NAME
105#define TARGET_NAME CUDA
106#endif
107#ifndef DEBUG_PREFIX
108#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
109#endif
110
111static bool checkForCUDA() {
112 // return true if dlopen succeeded and all functions found
113
114 // Prefer _v2 versions of functions if found in the library
115 std::unordered_map<std::string, const char *> TryFirst = {
116 {"cuMemAlloc", "cuMemAlloc_v2"},
117 {"cuMemFree", "cuMemFree_v2"},
118 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
119 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
120 {"cuStreamDestroy", "cuStreamDestroy_v2"},
121 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
122 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
123 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
124 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
125 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
126 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
127 };
128
129 const char *CudaLib = DYNAMIC_CUDA_PATH;
130 std::string ErrMsg;
131 auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
132 llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg));
133 if (!DynlibHandle->isValid()) {
134 DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str());
135 return false;
136 }
137
138 for (size_t I = 0; I < dlwrap::size(); I++) {
139 const char *Sym = dlwrap::symbol(I);
140
141 auto It = TryFirst.find(Sym);
142 if (It != TryFirst.end()) {
143 const char *First = It->second;
144 void *P = DynlibHandle->getAddressOfSymbol(First);
145 if (P) {
146 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
147 *dlwrap::pointer(I) = P;
148 continue;
149 }
150 }
151
152 void *P = DynlibHandle->getAddressOfSymbol(Sym);
153 if (P == nullptr) {
154 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
155 return false;
156 }
157 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
158
159 *dlwrap::pointer(I) = P;
160 }
161
162 return true;
163}
164
165CUresult cuInit(unsigned X) {
166 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
167 // does not need to handle being called repeatedly or concurrently
168 if (!checkForCUDA()) {
169 return CUDA_ERROR_INVALID_HANDLE;
170 }
171 return dlwrap_cuInit(X);
172}
173

source code of offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp