PxCudaContextManager.h source code [qtquick3dphysics/src/3rdparty/PhysX/include/cudamanager/PxCudaContextManager.h]

1	//
2	// Redistribution and use in source and binary forms, with or without
3	// modification, are permitted provided that the following conditions
4	// are met:
5	// Redistributions of source code must retain the above copyright*
6	// notice, this list of conditions and the following disclaimer.
7	// Redistributions in binary form must reproduce the above copyright*
8	// notice, this list of conditions and the following disclaimer in the
9	// documentation and/or other materials provided with the distribution.
10	// Neither the name of NVIDIA CORPORATION nor the names of its*
11	// contributors may be used to endorse or promote products derived
12	// from this software without specific prior written permission.
13	//
14	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
15	// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	//
26	// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
27
28
29	#ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
30	#define PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
31
32	#include "foundation/PxPreprocessor.h"
33
34	#if PX_SUPPORT_GPU_PHYSX
35
36	#include "foundation/PxSimpleTypes.h"
37	#include "foundation/PxErrorCallback.h"
38	#include "foundation/PxFlags.h"
39	#include "task/PxTaskDefine.h"
40	#include "cudamanager/PxCudaMemoryManager.h"
41
42	/ Forward decl to avoid inclusion of cuda.h /
43	typedef struct CUctx_st *CUcontext;
44	typedef struct CUgraphicsResource_st *CUgraphicsResource;
45	typedef int CUdevice;
46
47	namespace physx
48	{
49
50	/* \brief Possible graphic/CUDA interoperability modes for context /
51	struct PxCudaInteropMode
52	{
53	/**
54	* \brief Possible graphic/CUDA interoperability modes for context
55	*/
56	enum Enum
57	{
58	NO_INTEROP = `0`,
59	D3D10_INTEROP,
60	D3D11_INTEROP,
61	OGL_INTEROP,
62
63	COUNT
64	};
65	};
66
67	struct PxCudaInteropRegisterFlag
68	{
69	enum Enum
70	{
71	eNONE = `0x00`,
72	eREAD_ONLY = `0x01`,
73	eWRITE_DISCARD = `0x02`,
74	eSURFACE_LDST = `0x04`,
75	eTEXTURE_GATHER = `0x08`
76	};
77	};
78
79	/**
80	\brief collection of set bits defined in NxCudaInteropRegisterFlag.
81
82	@see NxCudaInteropRegisterFlag
83	*/
84	typedef PxFlags<PxCudaInteropRegisterFlag::Enum, uint32_t> PxCudaInteropRegisterFlags;
85	PX_FLAGS_OPERATORS(PxCudaInteropRegisterFlag::Enum, uint32_t)
86
87	//! \brief Descriptor used to create a PxCudaContextManager
88	class PxCudaContextManagerDesc
89	{
90	public:
91	/**
92	* \brief The CUDA context to manage
93	*
94	* If left NULL, the PxCudaContextManager will create a new context. If
95	* graphicsDevice is also not NULL, this new CUDA context will be bound to
96	* that graphics device, enabling the use of CUDA/Graphics interop features.
97	*
98	* If ctx is not NULL, the specified context must be applied to the thread
99	* that is allocating the PxCudaContextManager at creation time (aka, it
100	* cannot be popped). The PxCudaContextManager will take ownership of the
101	* context until the manager is released. All access to the context must be
102	* gated by lock acquisition.
103	*
104	* If the user provides a context for the PxCudaContextManager, the context
105	* _must_ have either been created on the GPU ordinal returned by
106	* PxGetSuggestedCudaDeviceOrdinal() or on your graphics device.
107	*
108	* It is perfectly acceptable to allocate device or host pinned memory from
109	* the context outside the scope of the PxCudaMemoryManager, so long as you
110	* manage its eventual cleanup.
111	*/
112	CUcontext *ctx;
113
114	/**
115	* \brief D3D device pointer or OpenGl context handle
116	*
117	* Only applicable when ctx is NULL, thus forcing a new context to be
118	* created. In that case, the created context will be bound to this
119	* graphics device.
120	*/
121	void *graphicsDevice;
122
123	#if PX_SUPPORT_GPU_PHYSX
124	/**
125	* \brief Application-specific GUID
126	*
127	* If your application employs PhysX modules that use CUDA you need to use a GUID
128	* so that patches for new architectures can be released for your game.You can obtain a GUID for your
129	* application from Nvidia.
130	*/
131	const char* appGUID;
132	#endif
133	/**
134	* \brief The CUDA/Graphics interop mode of this context
135	*
136	* If ctx is NULL, this value describes the nature of the graphicsDevice
137	* pointer provided by the user. Else it describes the nature of the
138	* context provided by the user.
139	*/
140	PxCudaInteropMode::Enum interopMode;
141
142
143	/**
144	* \brief Size of persistent memory
145	*
146	* This memory is allocated up front and stays allocated until the
147	* PxCudaContextManager is released. Size is in bytes, has to be power of two
148	* and bigger than the page size. Set to 0 to only use dynamic pages.
149	*
150	* Note: On Vista O/S and above, there is a per-memory allocation overhead
151	* to every CUDA work submission, so we recommend that you carefully tune
152	* this initial base memory size to closely approximate the amount of
153	* memory your application will consume.
154
155	Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
156	for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
157	*/
158	uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT];
159
160	/**
161	* \brief Size of memory pages
162	*
163	* The memory manager will dynamically grow and shrink in blocks multiple of
164	* this page size. Size has to be power of two and bigger than 0.
165
166	Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
167	for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
168	*/
169	uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT];
170
171	/**
172	* \brief Maximum size of memory that the memory manager will allocate
173
174	Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
175	for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
176	*/
177	uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT];
178
179	PX_INLINE PxCudaContextManagerDesc()
180	{
181	ctx = NULL;
182	interopMode = PxCudaInteropMode::NO_INTEROP;
183	graphicsDevice = `0`;
184	#if PX_SUPPORT_GPU_PHYSX
185	appGUID = NULL;
186	#endif
187	for(uint32_t i = `0`; i < PxCudaBufferMemorySpace::COUNT; i++)
188	{
189	memoryBaseSize[i] = `0`;
190	memoryPageSize[i] = `2` * `1024`*`1024`;
191	maxMemorySize[i] = UINT32_MAX;
192	}
193	}
194	};
195
196
197	/**
198	* \brief Manages memory, thread locks, and task scheduling for a CUDA context
199	*
200	* A PxCudaContextManager manages access to a single CUDA context, allowing it to
201	* be shared between multiple scenes. Memory allocations are dynamic: starting
202	* with an initial heap size and growing on demand by a configurable page size.
203	* The context must be acquired from the manager before using any CUDA APIs.
204	*
205	* The PxCudaContextManager is based on the CUDA driver API and explictly does not
206	* support the CUDA runtime API (aka, CUDART).
207	*/
208	class PxCudaContextManager
209	{
210	public:
211	/**
212	* \brief Acquire the CUDA context for the current thread
213	*
214	* Acquisitions are allowed to be recursive within a single thread.
215	* You can acquire the context multiple times so long as you release
216	* it the same count.
217	*
218	* The context must be acquired before using most CUDA functions.
219	*/
220	virtual void acquireContext() = `0`;
221
222	/**
223	* \brief Release the CUDA context from the current thread
224	*
225	* The CUDA context should be released as soon as practically
226	* possible, to allow other CPU threads to work efficiently.
227	*/
228	virtual void releaseContext() = `0`;
229
230	/**
231	* \brief Return the CUcontext
232	*/
233	virtual CUcontext getContext() = `0`;
234
235	/**
236	* \brief Return the PxCudaMemoryManager instance associated with this
237	* CUDA context
238	* Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
239	* for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
240	*/
241	virtual PxCudaMemoryManager *getMemoryManager() = `0`;
242
243	/**
244	* \brief Context manager has a valid CUDA context
245	*
246	* This method should be called after creating a PxCudaContextManager,
247	* especially if the manager was responsible for allocating its own
248	* CUDA context (desc.ctx == NULL).
249	*/
250	virtual bool contextIsValid() const = `0`;
251
252	/ Query CUDA context and device properties, without acquiring context /
253
254	virtual bool supportsArchSM10() const = `0`; //!< G80
255	virtual bool supportsArchSM11() const = `0`; //!< G92
256	virtual bool supportsArchSM12() const = `0`; //!< GT200
257	virtual bool supportsArchSM13() const = `0`; //!< GT260
258	virtual bool supportsArchSM20() const = `0`; //!< GF100
259	virtual bool supportsArchSM30() const = `0`; //!< GK100
260	virtual bool supportsArchSM35() const = `0`; //!< GK110
261	virtual bool supportsArchSM50() const = `0`; //!< GM100
262	virtual bool supportsArchSM52() const = `0`; //!< GM200
263	virtual bool supportsArchSM60() const = `0`; //!< GP100
264	virtual bool isIntegrated() const = `0`; //!< true if GPU is an integrated (MCP) part
265	virtual bool canMapHostMemory() const = `0`; //!< true if GPU map host memory to GPU (0-copy)
266	virtual int getDriverVersion() const = `0`; //!< returns cached value of cuGetDriverVersion()
267	virtual size_t getDeviceTotalMemBytes() const = `0`; //!< returns cached value of device memory size
268	virtual int getMultiprocessorCount() const = `0`; //!< returns cache value of SM unit count
269	virtual unsigned int getClockRate() const = `0`; //!< returns cached value of SM clock frequency
270	virtual int getSharedMemPerBlock() const = `0`; //!< returns total amount of shared memory available per block in bytes
271	virtual int getSharedMemPerMultiprocessor() const = `0`; //!< returns total amount of shared memory available per multiprocessor in bytes
272	virtual unsigned int getMaxThreadsPerBlock() const = `0`; //!< returns the maximum number of threads per block
273	virtual const char getDeviceName() const* = `0`; //!< returns device name retrieved from driver
274	virtual CUdevice getDevice() const = `0`; //!< returns device handle retrieved from driver
275	virtual PxCudaInteropMode::Enum getInteropMode() const = `0`; //!< interop mode the context was created with
276
277	virtual void setUsingConcurrentStreams(bool) = `0`; //!< turn on/off using concurrent streams for GPU work
278	virtual bool getUsingConcurrentStreams() const = `0`; //!< true if GPU work can run in concurrent streams
279	/ End query methods that don't require context to be acquired /
280
281	/**
282	* \brief Register a rendering resource with CUDA
283	*
284	* This function is called to register render resources (allocated
285	* from OpenGL) with CUDA so that the memory may be shared
286	* between the two systems. This is only required for render
287	* resources that are designed for interop use. In APEX, each
288	* render resource descriptor that could support interop has a
289	* 'registerInCUDA' boolean variable.
290	*
291	* The function must be called again any time your graphics device
292	* is reset, to re-register the resource.
293	*
294	* Returns true if the registration succeeded. A registered
295	* resource must be unregistered before it can be released.
296	*
297	* \param resource [OUT] the handle to the resource that can be used with CUDA
298	* \param buffer [IN] GLuint buffer index to be mapped to cuda
299	* \param flags [IN] cuda interop registration flags
300	*/
301	virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, uint32_t buffer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags ()) = `0`;
302
303	/**
304	* \brief Register a rendering resource with CUDA
305	*
306	* This function is called to register render resources (allocated
307	* from Direct3D) with CUDA so that the memory may be shared
308	* between the two systems. This is only required for render
309	* resources that are designed for interop use. In APEX, each
310	* render resource descriptor that could support interop has a
311	* 'registerInCUDA' boolean variable.
312	*
313	* The function must be called again any time your graphics device
314	* is reset, to re-register the resource.
315	*
316	* Returns true if the registration succeeded. A registered
317	* resource must be unregistered before it can be released.
318	*
319	* \param resource [OUT] the handle to the resource that can be used with CUDA
320	* \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered.
321	* \param flags [IN] cuda interop registration flags
322	*/
323	virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags ()) = `0`;
324
325	/**
326	* \brief Unregister a rendering resource with CUDA
327	*
328	* If a render resource was successfully registered with CUDA using
329	* the registerResourceInCuda***() methods, this function must be called
330	* to unregister the resource before the it can be released.
331	*/
332	virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = `0`;
333
334	/**
335	* \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
336	* \note If using CUDA Interop, this will always return false
337	* \returns 1 if there is a dedicated GPU
338	* 0 if there is NOT a dedicated GPU
339	* -1 if the routine is not implemented
340	*/
341	virtual int usingDedicatedGPU() const = `0`;
342
343	/**
344	* \brief Release the PxCudaContextManager
345	*
346	* When the manager instance is released, it also releases its
347	* PxCudaMemoryManager. Before the memory manager is released, it
348	* frees all allocated memory pages. If the PxCudaContextManager
349	* created the CUDA context it was responsible for, it also frees
350	* that context.
351	*
352	* Do not release the PxCudaContextManager if there are any scenes
353	* using it. Those scenes must be released first.
354	*
355	*/
356	virtual void release() = `0`;
357
358	protected:
359
360	/**
361	* \brief protected destructor, use release() method
362	*/
363	virtual ~PxCudaContextManager() {}
364	};
365
366	/**
367	* \brief Convenience class for holding CUDA lock within a scope
368	*/
369	class PxScopedCudaLock
370	{
371	public:
372	/**
373	* \brief ScopedCudaLock constructor
374	*/
375	PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx)
376	{
377	mCtx->acquireContext();
378	}
379
380	/**
381	* \brief ScopedCudaLock destructor
382	*/
383	~PxScopedCudaLock()
384	{
385	mCtx->releaseContext();
386	}
387
388	protected:
389
390	/**
391	* \brief CUDA context manager pointer (initialized in the constructor)
392	*/
393	PxCudaContextManager* mCtx;
394	};
395
396	} // end physx namespace
397
398	#endif // PX_SUPPORT_GPU_PHYSX
399	#endif // PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
400

source code of qtquick3dphysics/src/3rdparty/PhysX/include/cudamanager/PxCudaContextManager.h