| 1 | //===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The parts of the cuda api that are presently in use by the openmp cuda plugin |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED |
| 14 | #define DYNAMIC_CUDA_CUDA_H_INCLUDED |
| 15 | |
| 16 | #include <cstddef> |
| 17 | #include <cstdint> |
| 18 | |
| 19 | #define cuDeviceTotalMem cuDeviceTotalMem_v2 |
| 20 | #define cuModuleGetGlobal cuModuleGetGlobal_v2 |
| 21 | #define cuMemGetInfo cuMemGetInfo_v2 |
| 22 | #define cuMemAlloc cuMemAlloc_v2 |
| 23 | #define cuMemFree cuMemFree_v2 |
| 24 | #define cuMemAllocHost cuMemAllocHost_v2 |
| 25 | #define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2 |
| 26 | #define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2 |
| 27 | |
| 28 | typedef int CUdevice; |
| 29 | typedef uintptr_t CUdeviceptr; |
| 30 | typedef struct CUmod_st *CUmodule; |
| 31 | typedef struct CUctx_st *CUcontext; |
| 32 | typedef struct CUfunc_st *CUfunction; |
| 33 | typedef void (*CUhostFn)(void *userData); |
| 34 | typedef struct CUstream_st *CUstream; |
| 35 | typedef struct CUevent_st *CUevent; |
| 36 | |
| 37 | #define CU_DEVICE_INVALID ((CUdevice)(-2)) |
| 38 | |
| 39 | typedef unsigned long long CUmemGenericAllocationHandle_v1; |
| 40 | typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle; |
| 41 | |
| 42 | #define CU_DEVICE_INVALID ((CUdevice)(-2)) |
| 43 | |
| 44 | typedef enum CUmemAllocationGranularity_flags_enum { |
| 45 | CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0, |
| 46 | CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1 |
| 47 | } CUmemAllocationGranularity_flags; |
| 48 | |
| 49 | typedef enum CUmemAccess_flags_enum { |
| 50 | CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0, |
| 51 | CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1, |
| 52 | CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3, |
| 53 | CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF |
| 54 | } CUmemAccess_flags; |
| 55 | |
| 56 | typedef enum CUmemLocationType_enum { |
| 57 | CU_MEM_LOCATION_TYPE_INVALID = 0x0, |
| 58 | CU_MEM_LOCATION_TYPE_DEVICE = 0x1, |
| 59 | CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF |
| 60 | } CUmemLocationType; |
| 61 | |
| 62 | typedef struct CUmemLocation_st { |
| 63 | CUmemLocationType type; |
| 64 | int id; |
| 65 | } CUmemLocation_v1; |
| 66 | typedef CUmemLocation_v1 CUmemLocation; |
| 67 | |
| 68 | typedef struct CUmemAccessDesc_st { |
| 69 | CUmemLocation location; |
| 70 | CUmemAccess_flags flags; |
| 71 | } CUmemAccessDesc_v1; |
| 72 | |
| 73 | typedef CUmemAccessDesc_v1 CUmemAccessDesc; |
| 74 | |
| 75 | typedef enum CUmemAllocationType_enum { |
| 76 | CU_MEM_ALLOCATION_TYPE_INVALID = 0x0, |
| 77 | CU_MEM_ALLOCATION_TYPE_PINNED = 0x1, |
| 78 | CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF |
| 79 | } CUmemAllocationType; |
| 80 | |
| 81 | typedef enum CUmemAllocationHandleType_enum { |
| 82 | CU_MEM_HANDLE_TYPE_NONE = 0x0, |
| 83 | CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1, |
| 84 | CU_MEM_HANDLE_TYPE_WIN32 = 0x2, |
| 85 | CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4, |
| 86 | CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF |
| 87 | } CUmemAllocationHandleType; |
| 88 | |
| 89 | typedef struct CUmemAllocationProp_st { |
| 90 | CUmemAllocationType type; |
| 91 | CUmemAllocationHandleType requestedHandleTypes; |
| 92 | CUmemLocation location; |
| 93 | |
| 94 | void *win32HandleMetaData; |
| 95 | struct { |
| 96 | unsigned char compressionType; |
| 97 | unsigned char gpuDirectRDMACapable; |
| 98 | unsigned short usage; |
| 99 | unsigned char reserved[4]; |
| 100 | } allocFlags; |
| 101 | } CUmemAllocationProp_v1; |
| 102 | typedef CUmemAllocationProp_v1 CUmemAllocationProp; |
| 103 | |
| 104 | typedef enum cudaError_enum { |
| 105 | CUDA_SUCCESS = 0, |
| 106 | CUDA_ERROR_INVALID_VALUE = 1, |
| 107 | CUDA_ERROR_NO_DEVICE = 100, |
| 108 | CUDA_ERROR_INVALID_HANDLE = 400, |
| 109 | CUDA_ERROR_NOT_FOUND = 500, |
| 110 | CUDA_ERROR_NOT_READY = 600, |
| 111 | CUDA_ERROR_TOO_MANY_PEERS = 711, |
| 112 | } CUresult; |
| 113 | |
| 114 | typedef enum CUstream_flags_enum { |
| 115 | CU_STREAM_DEFAULT = 0x0, |
| 116 | CU_STREAM_NON_BLOCKING = 0x1, |
| 117 | } CUstream_flags; |
| 118 | |
| 119 | typedef enum CUlimit_enum { |
| 120 | CU_LIMIT_STACK_SIZE = 0x0, |
| 121 | CU_LIMIT_PRINTF_FIFO_SIZE = 0x1, |
| 122 | CU_LIMIT_MALLOC_HEAP_SIZE = 0x2, |
| 123 | CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3, |
| 124 | CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4, |
| 125 | CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5, |
| 126 | CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6, |
| 127 | CU_LIMIT_MAX |
| 128 | } CUlimit; |
| 129 | |
| 130 | typedef enum CUdevice_attribute_enum { |
| 131 | CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, |
| 132 | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, |
| 133 | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, |
| 134 | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, |
| 135 | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, |
| 136 | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, |
| 137 | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, |
| 138 | CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, |
| 139 | CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, |
| 140 | CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, |
| 141 | CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, |
| 142 | CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, |
| 143 | CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, |
| 144 | CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, |
| 145 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, |
| 146 | CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, |
| 147 | CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, |
| 148 | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, |
| 149 | CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, |
| 150 | CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, |
| 151 | CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, |
| 152 | CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, |
| 153 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, |
| 154 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, |
| 155 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, |
| 156 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, |
| 157 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, |
| 158 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, |
| 159 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, |
| 160 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, |
| 161 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, |
| 162 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, |
| 163 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, |
| 164 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, |
| 165 | CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, |
| 166 | CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, |
| 167 | CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, |
| 168 | CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, |
| 169 | CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, |
| 170 | CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, |
| 171 | CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, |
| 172 | CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, |
| 173 | CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, |
| 174 | CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, |
| 175 | CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, |
| 176 | CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, |
| 177 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, |
| 178 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, |
| 179 | CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, |
| 180 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, |
| 181 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, |
| 182 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, |
| 183 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, |
| 184 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, |
| 185 | CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, |
| 186 | CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, |
| 187 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, |
| 188 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, |
| 189 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, |
| 190 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, |
| 191 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, |
| 192 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, |
| 193 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, |
| 194 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, |
| 195 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, |
| 196 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, |
| 197 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, |
| 198 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, |
| 199 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, |
| 200 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, |
| 201 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, |
| 202 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, |
| 203 | CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, |
| 204 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, |
| 205 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, |
| 206 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, |
| 207 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, |
| 208 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, |
| 209 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, |
| 210 | CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, |
| 211 | CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, |
| 212 | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, |
| 213 | CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, |
| 214 | CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, |
| 215 | CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, |
| 216 | CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, |
| 217 | CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, |
| 218 | CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, |
| 219 | CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, |
| 220 | CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, |
| 221 | CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, |
| 222 | CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, |
| 223 | CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, |
| 224 | CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, |
| 225 | CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, |
| 226 | CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, |
| 227 | CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, |
| 228 | CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, |
| 229 | CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, |
| 230 | CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, |
| 231 | CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, |
| 232 | CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, |
| 233 | CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, |
| 234 | CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, |
| 235 | CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, |
| 236 | CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, |
| 237 | CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, |
| 238 | CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102, |
| 239 | CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, |
| 240 | CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, |
| 241 | CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, |
| 242 | CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, |
| 243 | CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, |
| 244 | CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, |
| 245 | CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, |
| 246 | CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, |
| 247 | CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, |
| 248 | CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, |
| 249 | CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, |
| 250 | CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, |
| 251 | CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, |
| 252 | CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, |
| 253 | CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, |
| 254 | CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, |
| 255 | CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119, |
| 256 | CU_DEVICE_ATTRIBUTE_MAX, |
| 257 | } CUdevice_attribute; |
| 258 | |
| 259 | typedef enum CUfunction_attribute_enum { |
| 260 | CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, |
| 261 | } CUfunction_attribute; |
| 262 | |
| 263 | typedef enum CUctx_flags_enum { |
| 264 | CU_CTX_SCHED_BLOCKING_SYNC = 0x04, |
| 265 | CU_CTX_SCHED_MASK = 0x07, |
| 266 | } CUctx_flags; |
| 267 | |
| 268 | typedef enum CUmemAttach_flags_enum { |
| 269 | CU_MEM_ATTACH_GLOBAL = 0x1, |
| 270 | CU_MEM_ATTACH_HOST = 0x2, |
| 271 | CU_MEM_ATTACH_SINGLE = 0x4, |
| 272 | } CUmemAttach_flags; |
| 273 | |
| 274 | typedef enum CUcomputeMode_enum { |
| 275 | CU_COMPUTEMODE_DEFAULT = 0, |
| 276 | CU_COMPUTEMODE_PROHIBITED = 2, |
| 277 | CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, |
| 278 | } CUcompute_mode; |
| 279 | |
| 280 | typedef enum CUevent_flags_enum { |
| 281 | CU_EVENT_DEFAULT = 0x0, |
| 282 | CU_EVENT_BLOCKING_SYNC = 0x1, |
| 283 | CU_EVENT_DISABLE_TIMING = 0x2, |
| 284 | CU_EVENT_INTERPROCESS = 0x4 |
| 285 | } CUevent_flags; |
| 286 | |
| 287 | static inline void *CU_LAUNCH_PARAM_END = (void *)0x00; |
| 288 | static inline void *CU_LAUNCH_PARAM_BUFFER_POINTER = (void *)0x01; |
| 289 | static inline void *CU_LAUNCH_PARAM_BUFFER_SIZE = (void *)0x02; |
| 290 | |
| 291 | typedef void (*CUstreamCallback)(CUstream, CUresult, void *); |
| 292 | |
| 293 | CUresult cuCtxGetDevice(CUdevice *); |
| 294 | CUresult cuDeviceGet(CUdevice *, int); |
| 295 | CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); |
| 296 | CUresult cuDeviceGetCount(int *); |
| 297 | CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction); |
| 298 | |
| 299 | // Device info |
| 300 | CUresult cuDeviceGetName(char *, int, CUdevice); |
| 301 | CUresult cuDeviceTotalMem(size_t *, CUdevice); |
| 302 | CUresult cuDriverGetVersion(int *); |
| 303 | |
| 304 | CUresult cuGetErrorString(CUresult, const char **); |
| 305 | CUresult cuInit(unsigned); |
| 306 | CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned, |
| 307 | unsigned, unsigned, unsigned, CUstream, void **, |
| 308 | void **); |
| 309 | CUresult cuLaunchHostFunc(CUstream, CUhostFn, void *); |
| 310 | |
| 311 | CUresult cuMemAlloc(CUdeviceptr *, size_t); |
| 312 | CUresult cuMemAllocHost(void **, size_t); |
| 313 | CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int); |
| 314 | CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream); |
| 315 | |
| 316 | CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); |
| 317 | CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); |
| 318 | CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); |
| 319 | CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); |
| 320 | CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); |
| 321 | |
| 322 | CUresult cuMemFree(CUdeviceptr); |
| 323 | CUresult cuMemFreeHost(void *); |
| 324 | CUresult cuMemFreeAsync(CUdeviceptr, CUstream); |
| 325 | |
| 326 | CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); |
| 327 | CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); |
| 328 | |
| 329 | CUresult cuModuleUnload(CUmodule); |
| 330 | CUresult cuStreamCreate(CUstream *, unsigned); |
| 331 | CUresult cuStreamDestroy(CUstream); |
| 332 | CUresult cuStreamSynchronize(CUstream); |
| 333 | CUresult cuStreamQuery(CUstream); |
| 334 | CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int); |
| 335 | CUresult cuCtxSetCurrent(CUcontext); |
| 336 | CUresult cuDevicePrimaryCtxRelease(CUdevice); |
| 337 | CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *); |
| 338 | CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned); |
| 339 | CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice); |
| 340 | CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *, |
| 341 | void **); |
| 342 | |
| 343 | CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice); |
| 344 | CUresult cuCtxEnablePeerAccess(CUcontext, unsigned); |
| 345 | CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, |
| 346 | size_t, CUstream); |
| 347 | |
| 348 | CUresult cuCtxGetLimit(size_t *, CUlimit); |
| 349 | CUresult cuCtxSetLimit(CUlimit, size_t); |
| 350 | |
| 351 | CUresult cuEventCreate(CUevent *, unsigned int); |
| 352 | CUresult cuEventRecord(CUevent, CUstream); |
| 353 | CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); |
| 354 | CUresult cuEventSynchronize(CUevent); |
| 355 | CUresult cuEventDestroy(CUevent); |
| 356 | |
| 357 | CUresult cuMemUnmap(CUdeviceptr ptr, size_t size); |
| 358 | CUresult cuMemRelease(CUmemGenericAllocationHandle handle); |
| 359 | CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size); |
| 360 | CUresult cuMemGetInfo(size_t *free, size_t *total); |
| 361 | CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, |
| 362 | CUdeviceptr addr, unsigned long long flags); |
| 363 | CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, |
| 364 | CUmemGenericAllocationHandle handle, |
| 365 | unsigned long long flags); |
| 366 | CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, |
| 367 | const CUmemAllocationProp *prop, unsigned long long flags); |
| 368 | CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size, |
| 369 | const CUmemAccessDesc *desc, size_t count); |
| 370 | CUresult cuMemGetAllocationGranularity(size_t *granularity, |
| 371 | const CUmemAllocationProp *prop, |
| 372 | CUmemAllocationGranularity_flags option); |
| 373 | |
| 374 | #endif |
| 375 | |