1//===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement subset of cuda api by calling into cuda library via dlopen
10// Does the dlopen/dlsym calls as part of the call to cuInit
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/DynamicLibrary.h"
15
16#include "Shared/Debug.h"
17
18#include "DLWrap.h"
19#include "cuda.h"
20
21#include <memory>
22#include <string>
23#include <unordered_map>
24
25DLWRAP_INITIALIZE()
26
27DLWRAP_INTERNAL(cuInit, 1)
28
29DLWRAP(cuCtxGetDevice, 1)
30DLWRAP(cuDeviceGet, 2)
31DLWRAP(cuDeviceGetAttribute, 3)
32DLWRAP(cuDeviceGetCount, 1)
33DLWRAP(cuFuncGetAttribute, 3)
34DLWRAP(cuFuncSetAttribute, 3)
35
36// Device info
37DLWRAP(cuDeviceGetName, 3)
38DLWRAP(cuDeviceTotalMem, 2)
39DLWRAP(cuDriverGetVersion, 1)
40
41DLWRAP(cuGetErrorString, 2)
42DLWRAP(cuLaunchKernel, 11)
43DLWRAP(cuLaunchHostFunc, 3)
44
45DLWRAP(cuMemAlloc, 2)
46DLWRAP(cuMemAllocHost, 2)
47DLWRAP(cuMemAllocManaged, 3)
48DLWRAP(cuMemAllocAsync, 3)
49
50DLWRAP(cuMemcpyDtoDAsync, 4)
51DLWRAP(cuMemcpyDtoH, 3)
52DLWRAP(cuMemcpyDtoHAsync, 4)
53DLWRAP(cuMemcpyHtoD, 3)
54DLWRAP(cuMemcpyHtoDAsync, 4)
55
56DLWRAP(cuMemFree, 1)
57DLWRAP(cuMemFreeHost, 1)
58DLWRAP(cuMemFreeAsync, 2)
59
60DLWRAP(cuModuleGetFunction, 3)
61DLWRAP(cuModuleGetGlobal, 4)
62
63DLWRAP(cuModuleUnload, 1)
64DLWRAP(cuStreamCreate, 2)
65DLWRAP(cuStreamDestroy, 1)
66DLWRAP(cuStreamSynchronize, 1)
67DLWRAP(cuStreamQuery, 1)
68DLWRAP(cuStreamAddCallback, 4)
69DLWRAP(cuCtxSetCurrent, 1)
70DLWRAP(cuDevicePrimaryCtxRelease, 1)
71DLWRAP(cuDevicePrimaryCtxGetState, 3)
72DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
73DLWRAP(cuDevicePrimaryCtxRetain, 2)
74DLWRAP(cuModuleLoadDataEx, 5)
75
76DLWRAP(cuDeviceCanAccessPeer, 3)
77DLWRAP(cuCtxEnablePeerAccess, 2)
78DLWRAP(cuMemcpyPeerAsync, 6)
79
80DLWRAP(cuCtxGetLimit, 2)
81DLWRAP(cuCtxSetLimit, 2)
82
83DLWRAP(cuEventCreate, 2)
84DLWRAP(cuEventRecord, 2)
85DLWRAP(cuStreamWaitEvent, 3)
86DLWRAP(cuEventSynchronize, 1)
87DLWRAP(cuEventDestroy, 1)
88
89DLWRAP_FINALIZE()
90
91DLWRAP(cuMemUnmap, 2)
92DLWRAP(cuMemRelease, 1)
93DLWRAP(cuMemAddressFree, 2)
94DLWRAP(cuMemGetInfo, 2)
95DLWRAP(cuMemAddressReserve, 5)
96DLWRAP(cuMemMap, 5)
97DLWRAP(cuMemCreate, 4)
98DLWRAP(cuMemSetAccess, 4)
99DLWRAP(cuMemGetAllocationGranularity, 3)
100
101#ifndef DYNAMIC_CUDA_PATH
102#define DYNAMIC_CUDA_PATH "libcuda.so"
103#endif
104
105#ifndef TARGET_NAME
106#define TARGET_NAME CUDA
107#endif
108#ifndef DEBUG_PREFIX
109#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
110#endif
111
112static bool checkForCUDA() {
113 // return true if dlopen succeeded and all functions found
114
115 // Prefer _v2 versions of functions if found in the library
116 std::unordered_map<std::string, const char *> TryFirst = {
117 {"cuMemAlloc", "cuMemAlloc_v2"},
118 {"cuMemFree", "cuMemFree_v2"},
119 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
120 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
121 {"cuStreamDestroy", "cuStreamDestroy_v2"},
122 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
123 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
124 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
125 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
126 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
127 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
128 };
129
130 const char *CudaLib = DYNAMIC_CUDA_PATH;
131 std::string ErrMsg;
132 auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
133 llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg));
134 if (!DynlibHandle->isValid()) {
135 DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str());
136 return false;
137 }
138
139 for (size_t I = 0; I < dlwrap::size(); I++) {
140 const char *Sym = dlwrap::symbol(I);
141
142 auto It = TryFirst.find(Sym);
143 if (It != TryFirst.end()) {
144 const char *First = It->second;
145 void *P = DynlibHandle->getAddressOfSymbol(First);
146 if (P) {
147 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
148 *dlwrap::pointer(I) = P;
149 continue;
150 }
151 }
152
153 void *P = DynlibHandle->getAddressOfSymbol(Sym);
154 if (P == nullptr) {
155 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
156 return false;
157 }
158 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
159
160 *dlwrap::pointer(I) = P;
161 }
162
163 return true;
164}
165
166CUresult cuInit(unsigned X) {
167 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
168 // does not need to handle being called repeatedly or concurrently
169 if (!checkForCUDA()) {
170 return CUDA_ERROR_INVALID_HANDLE;
171 }
172 return dlwrap_cuInit(X);
173}
174

source code of offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp