| 1 | // | 
| 2 | // Redistribution and use in source and binary forms, with or without | 
| 3 | // modification, are permitted provided that the following conditions | 
| 4 | // are met: | 
| 5 | //  * Redistributions of source code must retain the above copyright | 
| 6 | //    notice, this list of conditions and the following disclaimer. | 
| 7 | //  * Redistributions in binary form must reproduce the above copyright | 
| 8 | //    notice, this list of conditions and the following disclaimer in the | 
| 9 | //    documentation and/or other materials provided with the distribution. | 
| 10 | //  * Neither the name of NVIDIA CORPORATION nor the names of its | 
| 11 | //    contributors may be used to endorse or promote products derived | 
| 12 | //    from this software without specific prior written permission. | 
| 13 | // | 
| 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY | 
| 15 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 16 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
| 17 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR | 
| 18 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| 19 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| 20 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 21 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
| 22 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
| 23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
| 24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| 25 | // | 
| 26 | // Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved. | 
| 27 |  | 
| 28 |  | 
| 29 | #ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H | 
| 30 | #define PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H | 
| 31 |  | 
| 32 | #include "foundation/PxPreprocessor.h" | 
| 33 |  | 
| 34 | #if PX_SUPPORT_GPU_PHYSX | 
| 35 |  | 
| 36 | #include "foundation/PxSimpleTypes.h" | 
| 37 | #include "foundation/PxErrorCallback.h" | 
| 38 | #include "foundation/PxFlags.h" | 
| 39 | #include "task/PxTaskDefine.h" | 
| 40 | #include "cudamanager/PxCudaMemoryManager.h" | 
| 41 |  | 
| 42 | /* Forward decl to avoid inclusion of cuda.h */ | 
| 43 | typedef struct CUctx_st *CUcontext; | 
| 44 | typedef struct CUgraphicsResource_st *CUgraphicsResource; | 
| 45 | typedef int CUdevice; | 
| 46 |  | 
| 47 | namespace physx | 
| 48 | {  | 
| 49 | 	 | 
| 50 | /** \brief Possible graphic/CUDA interoperability modes for context */ | 
| 51 | struct PxCudaInteropMode | 
| 52 | { | 
| 53 |     /** | 
| 54 |      * \brief Possible graphic/CUDA interoperability modes for context | 
| 55 |      */ | 
| 56 | 	enum Enum | 
| 57 | 	{ | 
| 58 | 		NO_INTEROP = 0, | 
| 59 | 		D3D10_INTEROP, | 
| 60 | 		D3D11_INTEROP, | 
| 61 | 		OGL_INTEROP, | 
| 62 |  | 
| 63 | 		COUNT | 
| 64 | 	}; | 
| 65 | }; | 
| 66 |  | 
| 67 | struct PxCudaInteropRegisterFlag | 
| 68 | { | 
| 69 | 	enum Enum | 
| 70 | 	{ | 
| 71 | 		eNONE           = 0x00, | 
| 72 | 		eREAD_ONLY      = 0x01, | 
| 73 | 		eWRITE_DISCARD  = 0x02, | 
| 74 | 		eSURFACE_LDST   = 0x04, | 
| 75 | 		eTEXTURE_GATHER = 0x08 | 
| 76 | 	}; | 
| 77 | }; | 
| 78 |  | 
| 79 | /** | 
| 80 | \brief collection of set bits defined in NxCudaInteropRegisterFlag. | 
| 81 |  | 
| 82 | @see NxCudaInteropRegisterFlag | 
| 83 | */ | 
| 84 | typedef PxFlags<PxCudaInteropRegisterFlag::Enum, uint32_t> PxCudaInteropRegisterFlags; | 
| 85 | PX_FLAGS_OPERATORS(PxCudaInteropRegisterFlag::Enum, uint32_t) | 
| 86 |  | 
| 87 | //! \brief Descriptor used to create a PxCudaContextManager | 
| 88 | class PxCudaContextManagerDesc | 
| 89 | { | 
| 90 | public: | 
| 91 |     /** | 
| 92 |      * \brief The CUDA context to manage | 
| 93 |      * | 
| 94 |      * If left NULL, the PxCudaContextManager will create a new context.  If | 
| 95 |      * graphicsDevice is also not NULL, this new CUDA context will be bound to | 
| 96 |      * that graphics device, enabling the use of CUDA/Graphics interop features. | 
| 97 |      * | 
| 98 |      * If ctx is not NULL, the specified context must be applied to the thread | 
| 99 |      * that is allocating the PxCudaContextManager at creation time (aka, it | 
| 100 |      * cannot be popped).  The PxCudaContextManager will take ownership of the | 
| 101 |      * context until the manager is released.  All access to the context must be | 
| 102 |      * gated by lock acquisition. | 
| 103 |      * | 
| 104 |      * If the user provides a context for the PxCudaContextManager, the context | 
| 105 |      * _must_ have either been created on the GPU ordinal returned by | 
| 106 |      * PxGetSuggestedCudaDeviceOrdinal() or on your graphics device. | 
| 107 |      * | 
| 108 |      * It is perfectly acceptable to allocate device or host pinned memory from | 
| 109 |      * the context outside the scope of the PxCudaMemoryManager, so long as you | 
| 110 |      * manage its eventual cleanup. | 
| 111 |      */ | 
| 112 | 	CUcontext            *ctx; | 
| 113 |  | 
| 114 |     /** | 
| 115 |      * \brief D3D device pointer or OpenGl context handle | 
| 116 |      * | 
| 117 |      * Only applicable when ctx is NULL, thus forcing a new context to be | 
| 118 |      * created.  In that case, the created context will be bound to this | 
| 119 |      * graphics device. | 
| 120 |      */ | 
| 121 | 	void	             *graphicsDevice; | 
| 122 |  | 
| 123 | #if PX_SUPPORT_GPU_PHYSX | 
| 124 | 	/** | 
| 125 | 	  * \brief Application-specific GUID | 
| 126 | 	  * | 
| 127 | 	  * If your application employs PhysX modules that use CUDA you need to use a GUID  | 
| 128 | 	  * so that patches for new architectures can be released for your game.You can obtain a GUID for your  | 
| 129 | 	  * application from Nvidia. | 
| 130 | 	  */ | 
| 131 | 	const char*			 appGUID; | 
| 132 | #endif | 
| 133 |     /** | 
| 134 |      * \brief The CUDA/Graphics interop mode of this context | 
| 135 |      * | 
| 136 |      * If ctx is NULL, this value describes the nature of the graphicsDevice | 
| 137 |      * pointer provided by the user.  Else it describes the nature of the | 
| 138 |      * context provided by the user. | 
| 139 |      */ | 
| 140 | 	PxCudaInteropMode::Enum interopMode; | 
| 141 |  | 
| 142 |  | 
| 143 |     /** | 
| 144 |      * \brief Size of persistent memory | 
| 145 |      * | 
| 146 |      * This memory is allocated up front and stays allocated until the | 
| 147 |      * PxCudaContextManager is released.  Size is in bytes, has to be power of two | 
| 148 |      * and bigger than the page size.  Set to 0 to only use dynamic pages. | 
| 149 |      * | 
| 150 |      * Note: On Vista O/S and above, there is a per-memory allocation overhead | 
| 151 |      * to every CUDA work submission, so we recommend that you carefully tune | 
| 152 |      * this initial base memory size to closely approximate the amount of | 
| 153 |      * memory your application will consume. | 
| 154 |  | 
| 155 | 	 Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured | 
| 156 | 	 for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. | 
| 157 |      */ | 
| 158 | 	uint32_t	memoryBaseSize[PxCudaBufferMemorySpace::COUNT]; | 
| 159 |  | 
| 160 |     /** | 
| 161 |      * \brief Size of memory pages | 
| 162 |      * | 
| 163 |      * The memory manager will dynamically grow and shrink in blocks multiple of | 
| 164 |      * this page size. Size has to be power of two and bigger than 0. | 
| 165 |  | 
| 166 | 	Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured | 
| 167 | 	for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. | 
| 168 |      */ | 
| 169 | 	uint32_t	memoryPageSize[PxCudaBufferMemorySpace::COUNT]; | 
| 170 |  | 
| 171 |     /** | 
| 172 |      * \brief Maximum size of memory that the memory manager will allocate | 
| 173 |  | 
| 174 | 	 Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured | 
| 175 | 	 for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. | 
| 176 |      */ | 
| 177 | 	uint32_t	maxMemorySize[PxCudaBufferMemorySpace::COUNT]; | 
| 178 |  | 
| 179 | 	PX_INLINE PxCudaContextManagerDesc() | 
| 180 | 	{ | 
| 181 | 		ctx = NULL; | 
| 182 | 		interopMode = PxCudaInteropMode::NO_INTEROP; | 
| 183 | 		graphicsDevice = 0; | 
| 184 | #if PX_SUPPORT_GPU_PHYSX | 
| 185 | 		appGUID  = NULL; | 
| 186 | #endif | 
| 187 | 		for(uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) | 
| 188 | 		{ | 
| 189 | 			memoryBaseSize[i] = 0; | 
| 190 | 			memoryPageSize[i] = 2 * 1024*1024; | 
| 191 | 			maxMemorySize[i] = UINT32_MAX; | 
| 192 | 		} | 
| 193 | 	} | 
| 194 | }; | 
| 195 |  | 
| 196 |  | 
| 197 | /** | 
| 198 |  * \brief Manages memory, thread locks, and task scheduling for a CUDA context | 
| 199 |  * | 
| 200 |  * A PxCudaContextManager manages access to a single CUDA context, allowing it to | 
| 201 |  * be shared between multiple scenes.   Memory allocations are dynamic: starting | 
| 202 |  * with an initial heap size and growing on demand by a configurable page size. | 
| 203 |  * The context must be acquired from the manager before using any CUDA APIs. | 
| 204 |  * | 
| 205 |  * The PxCudaContextManager is based on the CUDA driver API and explictly does not | 
| 206 |  * support the CUDA runtime API (aka, CUDART). | 
| 207 |  */ | 
| 208 | class PxCudaContextManager | 
| 209 | { | 
| 210 | public: | 
| 211 |     /** | 
| 212 |      * \brief Acquire the CUDA context for the current thread | 
| 213 |      * | 
| 214 |      * Acquisitions are allowed to be recursive within a single thread. | 
| 215 |      * You can acquire the context multiple times so long as you release | 
| 216 |      * it the same count. | 
| 217 |      * | 
| 218 |      * The context must be acquired before using most CUDA functions. | 
| 219 |      */ | 
| 220 |     virtual void acquireContext() = 0; | 
| 221 |  | 
| 222 |     /** | 
| 223 |      * \brief Release the CUDA context from the current thread | 
| 224 |      * | 
| 225 |      * The CUDA context should be released as soon as practically | 
| 226 |      * possible, to allow other CPU threads to work efficiently. | 
| 227 |      */ | 
| 228 |     virtual void releaseContext() = 0; | 
| 229 |  | 
| 230 | 	/** | 
| 231 | 	* \brief Return the CUcontext | 
| 232 | 	*/ | 
| 233 | 	virtual CUcontext getContext() = 0; | 
| 234 |  | 
| 235 |     /** | 
| 236 |      * \brief Return the PxCudaMemoryManager instance associated with this | 
| 237 |      * CUDA context | 
| 238 | 	 * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured | 
| 239 | 	 * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. | 
| 240 |      */ | 
| 241 | 	virtual PxCudaMemoryManager *getMemoryManager() = 0; | 
| 242 |  | 
| 243 |     /** | 
| 244 |      * \brief Context manager has a valid CUDA context | 
| 245 |      * | 
| 246 |      * This method should be called after creating a PxCudaContextManager, | 
| 247 |      * especially if the manager was responsible for allocating its own | 
| 248 |      * CUDA context (desc.ctx == NULL). | 
| 249 |      */ | 
| 250 |     virtual bool contextIsValid() const = 0; | 
| 251 |  | 
| 252 | 	/* Query CUDA context and device properties, without acquiring context */ | 
| 253 |  | 
| 254 |     virtual bool supportsArchSM10() const = 0;  //!< G80 | 
| 255 |     virtual bool supportsArchSM11() const = 0;  //!< G92 | 
| 256 |     virtual bool supportsArchSM12() const = 0;  //!< GT200 | 
| 257 |     virtual bool supportsArchSM13() const = 0;  //!< GT260 | 
| 258 |     virtual bool supportsArchSM20() const = 0;  //!< GF100 | 
| 259 |     virtual bool supportsArchSM30() const = 0;  //!< GK100 | 
| 260 | 	virtual bool supportsArchSM35() const = 0;  //!< GK110 | 
| 261 | 	virtual bool supportsArchSM50() const = 0;  //!< GM100 | 
| 262 | 	virtual bool supportsArchSM52() const = 0;  //!< GM200 | 
| 263 | 	virtual bool supportsArchSM60() const = 0;  //!< GP100 | 
| 264 | 	virtual bool isIntegrated() const = 0;      //!< true if GPU is an integrated (MCP) part | 
| 265 | 	virtual bool canMapHostMemory() const = 0;  //!< true if GPU map host memory to GPU (0-copy) | 
| 266 | 	virtual int  getDriverVersion() const = 0;  //!< returns cached value of cuGetDriverVersion() | 
| 267 | 	virtual size_t getDeviceTotalMemBytes() const = 0; //!< returns cached value of device memory size | 
| 268 | 	virtual int	getMultiprocessorCount() const = 0; //!< returns cache value of SM unit count | 
| 269 |     virtual unsigned int getClockRate() const = 0; //!< returns cached value of SM clock frequency | 
| 270 |     virtual int  getSharedMemPerBlock() const = 0; //!< returns total amount of shared memory available per block in bytes | 
| 271 | 	virtual int  getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes | 
| 272 | 	virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block | 
| 273 |     virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver | 
| 274 | 	virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver | 
| 275 | 	virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with | 
| 276 |  | 
| 277 | 	virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work | 
| 278 | 	virtual bool getUsingConcurrentStreams() const = 0; //!< true if GPU work can run in concurrent streams | 
| 279 |     /* End query methods that don't require context to be acquired */ | 
| 280 |  | 
| 281 |     /** | 
| 282 |      * \brief Register a rendering resource with CUDA | 
| 283 |      * | 
| 284 |      * This function is called to register render resources (allocated | 
| 285 |      * from OpenGL) with CUDA so that the memory may be shared | 
| 286 |      * between the two systems.  This is only required for render | 
| 287 |      * resources that are designed for interop use.  In APEX, each | 
| 288 |      * render resource descriptor that could support interop has a | 
| 289 |      * 'registerInCUDA' boolean variable. | 
| 290 |      * | 
| 291 |      * The function must be called again any time your graphics device | 
| 292 |      * is reset, to re-register the resource. | 
| 293 |      * | 
| 294 |      * Returns true if the registration succeeded.  A registered | 
| 295 |      * resource must be unregistered before it can be released. | 
| 296 |      * | 
| 297 |      * \param resource [OUT] the handle to the resource that can be used with CUDA | 
| 298 |      * \param buffer [IN] GLuint buffer index to be mapped to cuda | 
| 299 |      * \param flags [IN] cuda interop registration flags | 
| 300 |      */ | 
| 301 |     virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, uint32_t buffer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0; | 
| 302 |  | 
| 303 |      /** | 
| 304 |      * \brief Register a rendering resource with CUDA | 
| 305 |      * | 
| 306 |      * This function is called to register render resources (allocated | 
| 307 |      * from Direct3D) with CUDA so that the memory may be shared | 
| 308 |      * between the two systems.  This is only required for render | 
| 309 |      * resources that are designed for interop use.  In APEX, each | 
| 310 |      * render resource descriptor that could support interop has a | 
| 311 |      * 'registerInCUDA' boolean variable. | 
| 312 |      * | 
| 313 |      * The function must be called again any time your graphics device | 
| 314 |      * is reset, to re-register the resource. | 
| 315 |      * | 
| 316 |      * Returns true if the registration succeeded.  A registered | 
| 317 |      * resource must be unregistered before it can be released. | 
| 318 |      * | 
| 319 |      * \param resource [OUT] the handle to the resource that can be used with CUDA | 
| 320 |      * \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered. | 
| 321 |      * \param flags [IN] cuda interop registration flags | 
| 322 |      */ | 
| 323 |     virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0; | 
| 324 |  | 
| 325 |     /** | 
| 326 |      * \brief Unregister a rendering resource with CUDA | 
| 327 |      * | 
| 328 |      * If a render resource was successfully registered with CUDA using | 
| 329 |      * the registerResourceInCuda***() methods, this function must be called | 
| 330 |      * to unregister the resource before the it can be released. | 
| 331 |      */ | 
| 332 |     virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = 0; | 
| 333 |  | 
| 334 | 	/** | 
| 335 | 	 * \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel | 
| 336 | 	 * \note If using CUDA Interop, this will always return false | 
| 337 | 	 * \returns	1 if there is a dedicated GPU | 
| 338 | 	 *			0 if there is NOT a dedicated GPU | 
| 339 | 	 *			-1 if the routine is not implemented | 
| 340 | 	*/ | 
| 341 | 	virtual int	usingDedicatedGPU() const = 0; | 
| 342 |  | 
| 343 |     /** | 
| 344 |      * \brief Release the PxCudaContextManager | 
| 345 |      * | 
| 346 |      * When the manager instance is released, it also releases its | 
| 347 |      * PxCudaMemoryManager.  Before the memory manager is released, it  | 
| 348 | 	 * frees all allocated memory pages.  If the PxCudaContextManager  | 
| 349 | 	 * created the CUDA context it was responsible for, it also frees  | 
| 350 | 	 * that context. | 
| 351 |      * | 
| 352 |      * Do not release the PxCudaContextManager if there are any scenes | 
| 353 |      * using it.  Those scenes must be released first. | 
| 354 |      * | 
| 355 |      */ | 
| 356 | 	virtual void release() = 0; | 
| 357 |  | 
| 358 | protected: | 
| 359 |  | 
| 360 |     /** | 
| 361 |      * \brief protected destructor, use release() method | 
| 362 |      */ | 
| 363 |     virtual ~PxCudaContextManager() {} | 
| 364 | }; | 
| 365 |  | 
| 366 | /** | 
| 367 |  * \brief Convenience class for holding CUDA lock within a scope | 
| 368 |  */ | 
| 369 | class PxScopedCudaLock | 
| 370 | { | 
| 371 | public: | 
| 372 |     /** | 
| 373 |      * \brief ScopedCudaLock constructor | 
| 374 |      */ | 
| 375 | 	PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx) | 
| 376 | 	{ | 
| 377 | 		mCtx->acquireContext(); | 
| 378 | 	} | 
| 379 |  | 
| 380 |     /** | 
| 381 |      * \brief ScopedCudaLock destructor | 
| 382 |      */ | 
| 383 | 	~PxScopedCudaLock() | 
| 384 | 	{ | 
| 385 | 		mCtx->releaseContext(); | 
| 386 | 	} | 
| 387 |  | 
| 388 | protected: | 
| 389 |  | 
| 390 |     /** | 
| 391 |      * \brief CUDA context manager pointer (initialized in the constructor) | 
| 392 |      */ | 
| 393 |     PxCudaContextManager* mCtx; | 
| 394 | }; | 
| 395 |  | 
| 396 | } // end physx namespace | 
| 397 |  | 
| 398 | #endif // PX_SUPPORT_GPU_PHYSX | 
| 399 | #endif // PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H | 
| 400 |  |