1//===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement subset of cuda api by calling into cuda library via dlopen
10// Does the dlopen/dlsym calls as part of the call to cuInit
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/DynamicLibrary.h"
15
16#include "Shared/Debug.h"
17
18#include "DLWrap.h"
19#include "cuda.h"
20
21#include <memory>
22#include <string>
23#include <unordered_map>
24
25DLWRAP_INITIALIZE()
26
27DLWRAP_INTERNAL(cuInit, 1)
28
29DLWRAP(cuCtxGetDevice, 1)
30DLWRAP(cuDeviceGet, 2)
31DLWRAP(cuDeviceGetAttribute, 3)
32DLWRAP(cuDeviceGetCount, 1)
33DLWRAP(cuFuncGetAttribute, 3)
34
35// Device info
36DLWRAP(cuDeviceGetName, 3)
37DLWRAP(cuDeviceTotalMem, 2)
38DLWRAP(cuDriverGetVersion, 1)
39
40DLWRAP(cuGetErrorString, 2)
41DLWRAP(cuLaunchKernel, 11)
42
43DLWRAP(cuMemAlloc, 2)
44DLWRAP(cuMemAllocHost, 2)
45DLWRAP(cuMemAllocManaged, 3)
46DLWRAP(cuMemAllocAsync, 3)
47
48DLWRAP(cuMemcpyDtoDAsync, 4)
49DLWRAP(cuMemcpyDtoH, 3)
50DLWRAP(cuMemcpyDtoHAsync, 4)
51DLWRAP(cuMemcpyHtoD, 3)
52DLWRAP(cuMemcpyHtoDAsync, 4)
53
54DLWRAP(cuMemFree, 1)
55DLWRAP(cuMemFreeHost, 1)
56DLWRAP(cuMemFreeAsync, 2)
57
58DLWRAP(cuModuleGetFunction, 3)
59DLWRAP(cuModuleGetGlobal, 4)
60
61DLWRAP(cuModuleUnload, 1)
62DLWRAP(cuStreamCreate, 2)
63DLWRAP(cuStreamDestroy, 1)
64DLWRAP(cuStreamSynchronize, 1)
65DLWRAP(cuStreamQuery, 1)
66DLWRAP(cuCtxSetCurrent, 1)
67DLWRAP(cuDevicePrimaryCtxRelease, 1)
68DLWRAP(cuDevicePrimaryCtxGetState, 3)
69DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
70DLWRAP(cuDevicePrimaryCtxRetain, 2)
71DLWRAP(cuModuleLoadDataEx, 5)
72
73DLWRAP(cuDeviceCanAccessPeer, 3)
74DLWRAP(cuCtxEnablePeerAccess, 2)
75DLWRAP(cuMemcpyPeerAsync, 6)
76
77DLWRAP(cuCtxGetLimit, 2)
78DLWRAP(cuCtxSetLimit, 2)
79
80DLWRAP(cuEventCreate, 2)
81DLWRAP(cuEventRecord, 2)
82DLWRAP(cuStreamWaitEvent, 3)
83DLWRAP(cuEventSynchronize, 1)
84DLWRAP(cuEventDestroy, 1)
85
86DLWRAP_FINALIZE()
87
88DLWRAP(cuMemUnmap, 2)
89DLWRAP(cuMemRelease, 1)
90DLWRAP(cuMemAddressFree, 2)
91DLWRAP(cuMemGetInfo, 2)
92DLWRAP(cuMemAddressReserve, 5)
93DLWRAP(cuMemMap, 5)
94DLWRAP(cuMemCreate, 4)
95DLWRAP(cuMemSetAccess, 4)
96DLWRAP(cuMemGetAllocationGranularity, 3)
97
98#ifndef DYNAMIC_CUDA_PATH
99#define DYNAMIC_CUDA_PATH "libcuda.so"
100#endif
101
102#ifndef TARGET_NAME
103#define TARGET_NAME CUDA
104#endif
105#ifndef DEBUG_PREFIX
106#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
107#endif
108
109static bool checkForCUDA() {
110 // return true if dlopen succeeded and all functions found
111
112 // Prefer _v2 versions of functions if found in the library
113 std::unordered_map<std::string, const char *> TryFirst = {
114 {"cuMemAlloc", "cuMemAlloc_v2"},
115 {"cuMemFree", "cuMemFree_v2"},
116 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
117 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
118 {"cuStreamDestroy", "cuStreamDestroy_v2"},
119 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
120 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
121 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
122 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
123 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
124 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
125 };
126
127 const char *CudaLib = DYNAMIC_CUDA_PATH;
128 std::string ErrMsg;
129 auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
130 llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg));
131 if (!DynlibHandle->isValid()) {
132 DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str());
133 return false;
134 }
135
136 for (size_t I = 0; I < dlwrap::size(); I++) {
137 const char *Sym = dlwrap::symbol(I);
138
139 auto It = TryFirst.find(Sym);
140 if (It != TryFirst.end()) {
141 const char *First = It->second;
142 void *P = DynlibHandle->getAddressOfSymbol(First);
143 if (P) {
144 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
145 *dlwrap::pointer(I) = P;
146 continue;
147 }
148 }
149
150 void *P = DynlibHandle->getAddressOfSymbol(Sym);
151 if (P == nullptr) {
152 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
153 return false;
154 }
155 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
156
157 *dlwrap::pointer(I) = P;
158 }
159
160 return true;
161}
162
163CUresult cuInit(unsigned X) {
164 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
165 // does not need to handle being called repeatedly or concurrently
166 if (!checkForCUDA()) {
167 return CUDA_ERROR_INVALID_HANDLE;
168 }
169 return dlwrap_cuInit(X);
170}
171

source code of offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp