cuda.hpp source code [opencv/modules/core/include/opencv2/core/cuda.hpp]

1	/M///////////////////////////////////////////////////////////////////////////////////////*
2	//
3	// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4	//
5	// By downloading, copying, installing or using the software you agree to this license.
6	// If you do not agree to this license, do not download, install,
7	// copy or use the software.
8	//
9	//
10	// License Agreement
11	// For Open Source Computer Vision Library
12	//
13	// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14	// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15	// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16	// Third party copyrights are property of their respective owners.
17	//
18	// Redistribution and use in source and binary forms, with or without modification,
19	// are permitted provided that the following conditions are met:
20	//
21	// Redistribution's of source code must retain the above copyright notice,*
22	// this list of conditions and the following disclaimer.
23	//
24	// Redistribution's in binary form must reproduce the above copyright notice,*
25	// this list of conditions and the following disclaimer in the documentation
26	// and/or other materials provided with the distribution.
27	//
28	// The name of the copyright holders may not be used to endorse or promote products*
29	// derived from this software without specific prior written permission.
30	//
31	// This software is provided by the copyright holders and contributors "as is" and
32	// any express or implied warranties, including, but not limited to, the implied
33	// warranties of merchantability and fitness for a particular purpose are disclaimed.
34	// In no event shall the Intel Corporation or contributors be liable for any direct,
35	// indirect, incidental, special, exemplary, or consequential damages
36	// (including, but not limited to, procurement of substitute goods or services;
37	// loss of use, data, or profits; or business interruption) however caused
38	// and on any theory of liability, whether in contract, strict liability,
39	// or tort (including negligence or otherwise) arising in any way out of
40	// the use of this software, even if advised of the possibility of such damage.
41	//
42	//M/*
43
44	#ifndef OPENCV_CORE_CUDA_HPP
45	#define OPENCV_CORE_CUDA_HPP
46
47	#ifndef __cplusplus
48	# error cuda.hpp header must be compiled as C++
49	#endif
50
51	#include "opencv2/core.hpp"
52	#include "opencv2/core/cuda_types.hpp"
53
54	/**
55	@defgroup cuda CUDA-accelerated Computer Vision
56	@{
57	@defgroup cudacore Core part
58	@{
59	@defgroup cudacore_init Initialization and Information
60	@defgroup cudacore_struct Data Structures
61	@}
62	@}
63	*/
64
65	namespace cv { namespace cuda {
66
67	//! @addtogroup cudacore_struct
68	//! @{
69
70	//===================================================================================
71	// GpuMat
72	//===================================================================================
73
74	/* @brief Base storage class for GPU memory with reference counting.*
75
76	Its interface matches the Mat interface with the following limitations:
77
78	- no arbitrary dimensions support (only 2D)
79	- no functions that return references to their data (because references on GPU are not valid for
80	CPU)
81	- no expression templates technique support
82
83	Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84	allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85	passed directly to the kernel.
86
87	@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88	aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
89
90	@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91	on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92	release function returns error if the CUDA context has been destroyed before.
93
94	Some member functions are described as a "Blocking Call" while some are described as a
95	"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
96	operation is finished when the function returns. However, non-blocking functions are asynchronous to
97	host. Those functions may return even if the GPU operation is not finished.
98
99	Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
100	argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
101	streams.
102
103	@sa Mat
104	*/
105	class CV_EXPORTS_W GpuMat
106	{
107	public:
108	class CV_EXPORTS_W Allocator
109	{
110	public:
111	virtual ~Allocator() {}
112
113	// allocator must fill data, step and refcount fields
114	virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = `0`;
115	virtual void free(GpuMat* mat) = `0`;
116	};
117
118	//! default allocator
119	CV_WRAP static GpuMat::Allocator* defaultAllocator();
120	CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
121	CV_WRAP static GpuMat::Allocator* getStdAllocator();
122
123	//! default constructor
124	CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
125
126	//! constructs GpuMat of the specified size and type
127	CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
128	CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
129
130	//! constructs GpuMat and fills it with the specified value _s
131	CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
132	CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
133
134	//! copy constructor
135	CV_WRAP GpuMat(const GpuMat& m);
136
137	//! constructor for GpuMat headers pointing to user-allocated data
138	GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
139	GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
140
141	//! creates a GpuMat header for a part of the bigger matrix
142	CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
143	CV_WRAP GpuMat(const GpuMat& m, Rect roi);
144
145	//! builds GpuMat from host memory (Blocking call)
146	CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
147
148	//! destructor - calls release()
149	~GpuMat();
150
151	//! assignment operators
152	GpuMat& operator =(const GpuMat& m);
153
154	//! allocates new GpuMat data unless the GpuMat already has specified size and type
155	CV_WRAP void create(int rows, int cols, int type);
156	CV_WRAP void create(Size size, int type);
157
158	//! decreases reference counter, deallocate the data when reference counter reaches 0
159	CV_WRAP void release();
160
161	//! swaps with other smart pointer
162	CV_WRAP void swap(GpuMat& mat);
163
164	/* @brief Performs data upload to GpuMat (Blocking call)*
165
166	This function copies data from host memory to device memory. As being a blocking call, it is
167	guaranteed that the copy operation is finished when this function returns.
168	*/
169	CV_WRAP void upload(InputArray arr);
170
171	/* @brief Performs data upload to GpuMat (Non-Blocking call)*
172
173	This function copies data from host memory to device memory. As being a non-blocking call, this
174	function may return even if the copy operation is not finished.
175
176	The copy operation may be overlapped with operations in other non-default streams if \p stream is
177	not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
178	*/
179	CV_WRAP void upload(InputArray arr, Stream& stream);
180
181	/* @brief Performs data download from GpuMat (Blocking call)*
182
183	This function copies data from device memory to host memory. As being a blocking call, it is
184	guaranteed that the copy operation is finished when this function returns.
185	*/
186	CV_WRAP void download(OutputArray dst) const;
187
188	/* @brief Performs data download from GpuMat (Non-Blocking call)*
189
190	This function copies data from device memory to host memory. As being a non-blocking call, this
191	function may return even if the copy operation is not finished.
192
193	The copy operation may be overlapped with operations in other non-default streams if \p stream is
194	not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
195	*/
196	CV_WRAP void download(OutputArray dst, Stream& stream) const;
197
198	//! returns deep copy of the GpuMat, i.e. the data is copied
199	CV_WRAP GpuMat clone() const;
200
201	//! copies the GpuMat content to device memory (Blocking call)
202	void copyTo(OutputArray dst) const;
203	//! bindings overload which copies the GpuMat content to device memory (Blocking call)
204	CV_WRAP void copyTo(CV_OUT GpuMat& dst) const {
205	copyTo(dst: static_cast<OutputArray>(dst));
206	}
207
208	//! copies the GpuMat content to device memory (Non-Blocking call)
209	void copyTo(OutputArray dst, Stream& stream) const;
210	//! bindings overload which copies the GpuMat content to device memory (Non-Blocking call)
211	CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const {
212	copyTo(dst: static_cast<OutputArray>(dst), stream);
213	}
214
215	//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
216	void copyTo(OutputArray dst, InputArray mask) const;
217	//! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
218	CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const {
219	copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask));
220	}
221
222	//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
223	void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
224	//! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
225	CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const {
226	copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask), stream);
227	}
228
229	//! sets some of the GpuMat elements to s (Blocking call)
230	CV_WRAP GpuMat& setTo(Scalar s);
231
232	//! sets some of the GpuMat elements to s (Non-Blocking call)
233	CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
234
235	//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
236	CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
237
238	//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
239	CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
240
241	//! converts GpuMat to another datatype (Blocking call)
242	void convertTo(OutputArray dst, int rtype) const;
243
244	//! converts GpuMat to another datatype (Non-Blocking call)
245	void convertTo(OutputArray dst, int rtype, Stream& stream) const;
246	//! bindings overload which converts GpuMat to another datatype (Non-Blocking call)
247	CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const {
248	convertTo(dst: static_cast<OutputArray>(dst), rtype, stream);
249	}
250
251	//! converts GpuMat to another datatype with scaling (Blocking call)
252	void convertTo(OutputArray dst, int rtype, double alpha, double beta = `0.0`) const;
253	//! bindings overload which converts GpuMat to another datatype with scaling(Blocking call)
254	CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = `1.0`, double beta = `0.0`) const {
255	convertTo(dst: static_cast<OutputArray>(dst), rtype, alpha, beta);
256	}
257
258	//! converts GpuMat to another datatype with scaling (Non-Blocking call)
259	void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
260
261	//! converts GpuMat to another datatype with scaling (Non-Blocking call)
262	void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
263	//! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call)
264	CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const {
265	convertTo(dst: static_cast<OutputArray>(dst), rtype, alpha, beta, stream);
266	}
267
268	CV_WRAP void assignTo(GpuMat& m, int type = -`1`) const;
269
270	//! returns pointer to y-th row
271	uchar* ptr(int y = `0`);
272	const uchar* ptr(int y = `0`) const;
273
274	//! template version of the above method
275	template<typename _Tp> _Tp* ptr(int y = `0`);
276	template<typename _Tp> const _Tp* ptr(int y = `0`) const;
277
278	template <typename _Tp> operator PtrStepSz<_Tp>() const;
279	template <typename _Tp> operator PtrStep<_Tp>() const;
280
281	//! returns a new GpuMat header for the specified row
282	CV_WRAP GpuMat row(int y) const;
283
284	//! returns a new GpuMat header for the specified column
285	CV_WRAP GpuMat col(int x) const;
286
287	//! ... for the specified row span
288	CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
289	CV_WRAP GpuMat rowRange(Range r) const;
290
291	//! ... for the specified column span
292	CV_WRAP GpuMat colRange(int startcol, int endcol) const;
293	CV_WRAP GpuMat colRange(Range r) const;
294
295	//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
296	GpuMat operator ()(Range rowRange, Range colRange) const;
297	GpuMat operator ()(Rect roi) const;
298
299	//! creates alternative GpuMat header for the same data, with different
300	//! number of channels and/or different number of rows
301	CV_WRAP GpuMat reshape(int cn, int rows = `0`) const;
302
303	//! locates GpuMat header within a parent GpuMat
304	CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
305
306	//! moves/resizes the current GpuMat ROI inside the parent GpuMat
307	CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
308
309	//! returns true iff the GpuMat data is continuous
310	//! (i.e. when there are no gaps between successive rows)
311	CV_WRAP bool isContinuous() const;
312
313	//! returns element size in bytes
314	CV_WRAP size_t elemSize() const;
315
316	//! returns the size of element channel in bytes
317	CV_WRAP size_t elemSize1() const;
318
319	//! returns element type
320	CV_WRAP int type() const;
321
322	//! returns element type
323	CV_WRAP int depth() const;
324
325	//! returns number of channels
326	CV_WRAP int channels() const;
327
328	//! returns step/elemSize1()
329	CV_WRAP size_t step1() const;
330
331	//! returns GpuMat size : width == number of columns, height == number of rows
332	CV_WRAP Size size() const;
333
334	//! returns true if GpuMat data is NULL
335	CV_WRAP bool empty() const;
336
337	// returns pointer to cuda memory
338	CV_WRAP void* cudaPtr() const;
339
340	//! internal use method: updates the continuity flag
341	CV_WRAP void updateContinuityFlag();
342
343	/! includes several bit-fields:*
344	- the magic signature
345	- continuity flag
346	- depth
347	- number of channels
348	*/
349	int flags;
350
351	//! the number of rows and columns
352	int rows, cols;
353
354	//! a distance between successive rows in bytes; includes the gap if any
355	CV_PROP size_t step;
356
357	//! pointer to the data
358	uchar* data;
359
360	//! pointer to the reference counter;
361	//! when GpuMat points to user-allocated data, the pointer is NULL
362	int* refcount;
363
364	//! helper fields used in locateROI and adjustROI
365	uchar* datastart;
366	const uchar* dataend;
367
368	//! allocator
369	Allocator* allocator;
370	};
371
372	struct CV_EXPORTS_W GpuData
373	{
374	explicit GpuData(size_t _size);
375	~GpuData();
376
377	GpuData(const GpuData&) = delete;
378	GpuData& operator=(const GpuData&) = delete;
379
380	GpuData(GpuData&&) = delete;
381	GpuData& operator=(GpuData&&) = delete;
382
383	uchar* data;
384	size_t size;
385	};
386
387	class CV_EXPORTS_W GpuMatND
388	{
389	public:
390	using SizeArray = std::vector<int>;
391	using StepArray = std::vector<size_t>;
392	using IndexArray = std::vector<int>;
393
394	//! destructor
395	~GpuMatND();
396
397	//! default constructor
398	GpuMatND();
399
400	/* @overload*
401	@param size Array of integers specifying an n-dimensional array shape.
402	@param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
403	CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
404	*/
405	GpuMatND(SizeArray size, int type);
406
407	/* @overload*
408	@param size Array of integers specifying an n-dimensional array shape.
409	@param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
410	CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
411	@param data Pointer to the user data. Matrix constructors that take data and step parameters do not
412	allocate matrix data. Instead, they just initialize the matrix header that points to the specified
413	data, which means that no data is copied. This operation is very efficient and can be used to
414	process external data using OpenCV functions. The external data is not automatically deallocated, so
415	you should take care of it.
416	@param step Array of _size.size() or _size.size()-1 steps in case of a multi-dimensional array
417	(if specified, the last step must be equal to the element size, otherwise it will be added as such).
418	If not specified, the matrix is assumed to be continuous.
419	*/
420	GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray ());
421
422	/* @brief Allocates GPU memory.*
423	Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
424	GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
425	the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
426	(i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
427	this method is always continuous and is not a sub-region of another GpuMatND.
428	*/
429	void create(SizeArray size, int type);
430
431	void release();
432
433	void swap(GpuMatND& m) noexcept;
434
435	/* @brief Creates a full copy of the array and the underlying data.*
436	The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
437	the original step is not taken into account. So, the array copy is a continuous array
438	occupying total()\elemSize() bytes.*
439	*/
440	GpuMatND clone() const;
441
442	/* @overload*
443	This overload is non-blocking, so it may return even if the copy operation is not finished.
444	*/
445	GpuMatND clone(Stream& stream) const;
446
447	/* @brief Extracts a sub-matrix.*
448	The operator makes a new header for the specified sub-array of \this.*
449	The operator is an O(1) operation, that is, no matrix data is copied.
450	@param ranges Array of selected ranges along each dimension.
451	*/
452	GpuMatND operator()(const std::vector<Range>& ranges) const;
453
454	/* @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.*
455	@note The returned GpuMat is constructed with the constructor for user-allocated data.
456	That is, It does not perform reference counting.
457	@note This function does not increment this GpuMatND's reference counter.
458	*/
459	GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
460
461	/* @overload*
462	Creates a GpuMat header if this GpuMatND is effectively 2D.
463	@note The returned GpuMat is constructed with the constructor for user-allocated data.
464	That is, It does not perform reference counting.
465	@note This function does not increment this GpuMatND's reference counter.
466	*/
467	GpuMat createGpuMatHeader() const;
468
469	/* @brief Extracts a 2D plane part of an n-dim matrix.*
470	It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
471	GpuMatND to the returned GpuMat.
472	@note This operator does not increment this GpuMatND's reference counter;
473	*/
474	GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
475
476	/* @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.*
477	It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
478	@note This operator does not increment this GpuMatND's reference counter;
479	*/
480	operator GpuMat() const;
481
482	GpuMatND(const GpuMatND&) = default;
483	GpuMatND& operator=(const GpuMatND&) = default;
484
485	#if defined(__GNUC__) && __GNUC__ < 5
486	// error: function '...' defaulted on its first declaration with an exception-specification
487	// that differs from the implicit declaration '...'
488
489	GpuMatND(GpuMatND&&) = default;
490	GpuMatND& operator=(GpuMatND&&) = default;
491	#else
492	GpuMatND(GpuMatND&&) noexcept = default;
493	GpuMatND& operator=(GpuMatND&&) noexcept = default;
494	#endif
495
496	void upload(InputArray src);
497	void upload(InputArray src, Stream& stream);
498	void download(OutputArray dst) const;
499	void download(OutputArray dst, Stream& stream) const;
500
501	//! returns true iff the GpuMatND data is continuous
502	//! (i.e. when there are no gaps between successive rows)
503	bool isContinuous() const;
504
505	//! returns true if the matrix is a sub-matrix of another matrix
506	bool isSubmatrix() const;
507
508	//! returns element size in bytes
509	size_t elemSize() const;
510
511	//! returns the size of element channel in bytes
512	size_t elemSize1() const;
513
514	//! returns true if data is null
515	bool empty() const;
516
517	//! returns true if not empty and points to external(user-allocated) gpu memory
518	bool external() const;
519
520	//! returns pointer to the first byte of the GPU memory
521	uchar* getDevicePtr() const;
522
523	//! returns the total number of array elements
524	size_t total() const;
525
526	//! returns the size of underlying memory in bytes
527	size_t totalMemSize() const;
528
529	//! returns element type
530	int type() const;
531
532	private:
533	//! internal use
534	void setFields(SizeArray size, int type, StepArray step = StepArray ());
535
536	public:
537	/! includes several bit-fields:*
538	- the magic signature
539	- continuity flag
540	- depth
541	- number of channels
542	*/
543	int flags;
544
545	//! matrix dimensionality
546	int dims;
547
548	//! shape of this array
549	SizeArray size;
550
551	/! step values*
552	Their semantics is identical to the semantics of step for Mat.
553	*/
554	StepArray step;
555
556	private:
557	/! internal use*
558	If this GpuMatND holds external memory, this is empty.
559	*/
560	std::shared_ptr<GpuData> data_;
561
562	/! internal use*
563	If this GpuMatND manages memory with reference counting, this value is
564	always equal to data_->data. If this GpuMatND holds external memory,
565	data_ is empty and data points to the external memory.
566	*/
567	uchar* data;
568
569	/! internal use*
570	If this GpuMatND is a sub-matrix of a larger matrix, this value is the
571	difference of the first byte between the sub-matrix and the whole matrix.
572	*/
573	size_t offset;
574	};
575
576	/* @brief Creates a continuous matrix.*
577
578	@param rows Row count.
579	@param cols Column count.
580	@param type Type of the matrix.
581	@param arr Destination matrix. This parameter changes only if it has a proper type and area (
582	\f$\texttt{rows} \times \texttt{cols}\f$ ).
583
584	Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
585	end of each row.
586	*/
587	CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
588
589	/* @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.*
590
591	@param rows Minimum desired number of rows.
592	@param cols Minimum desired number of columns.
593	@param type Desired matrix type.
594	@param arr Destination matrix.
595
596	The function does not reallocate memory if the matrix has proper attributes already.
597	*/
598	CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
599
600	/* @brief Bindings overload to create a GpuMat from existing GPU memory.*
601	@param rows Row count.
602	@param cols Column count.
603	@param type Type of the matrix.
604	@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
605	@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
606	@note Overload for generation of bindings only, not exported or intended for use internally from C++.
607	*/
608	CV_EXPORTS_W GpuMat inline createGpuMatFromCudaMemory(int rows, int cols, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
609	return GpuMat (rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
610	}
611
612	/* @overload*
613	@param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
614	@param type Type of the matrix.
615	@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
616	@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
617	@note Overload for generation of bindings only, not exported or intended for use internally from C++.
618	*/
619	CV_EXPORTS_W inline GpuMat createGpuMatFromCudaMemory(Size size, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
620	return GpuMat (size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
621	}
622
623	/* @brief BufferPool for use with CUDA streams*
624
625	BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
626	only useful when enabled with #setBufferPoolUsage.
627
628	@code
629	setBufferPoolUsage(true);
630	@endcode
631
632	@note #setBufferPoolUsage must be called \em before any Stream declaration.
633
634	Users may specify custom allocator for Stream and may implement their own stream based
635	functions utilizing the same underlying GPU memory management.
636
637	If custom allocator is not specified, BufferPool utilizes StackAllocator by
638	default. StackAllocator allocates a chunk of GPU device memory beforehand,
639	and when GpuMat is declared later on, it is given the pre-allocated memory.
640	This kind of strategy reduces the number of calls for memory allocating APIs
641	such as cudaMalloc or cudaMallocPitch.
642
643	Below is an example that utilizes BufferPool with StackAllocator:
644
645	@code
646	#include <opencv2/opencv.hpp>
647
648	using namespace cv;
649	using namespace cv::cuda
650
651	int main()
652	{
653	setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
654	setBufferPoolConfig(getDevice(), 1024 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)*
655
656	Stream stream1, stream2; // Each stream uses 1 stack
657	BufferPool pool1(stream1), pool2(stream2);
658
659	GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
660	GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
661
662	GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
663	GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
664
665	cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
666	cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
667	}
668	@endcode
669
670	If we allocate another GpuMat on pool1 in the above example, it will be carried out by
671	the DefaultAllocator since the stack for pool1 is full.
672
673	@code
674	GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
675	@endcode
676
677	If a third stream is declared in the above example, allocating with #getBuffer
678	within that stream will also be carried out by the DefaultAllocator because we've run out of
679	stacks.
680
681	@code
682	Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
683	BufferPool pool3(stream3);
684	GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
685	@endcode
686
687	@warning When utilizing StackAllocator, deallocation order is important.
688
689	Just like a stack, deallocation must be done in LIFO order. Below is an example of
690	erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
691	sample code will emit CV_Assert error.
692
693	@code
694	int main()
695	{
696	setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
697	Stream stream; // A default size (10 MB) stack is allocated to this stream
698	BufferPool pool(stream);
699
700	GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
701	GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
702
703	mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
704	}
705	@endcode
706
707	Since C++ local variables are destroyed in the reverse order of construction,
708	the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
709	and the corresponding memory is automatically returned to the pool for later usage.
710
711	@code
712	int main()
713	{
714	setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
715	setBufferPoolConfig(getDevice(), 1024 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)*
716
717	Stream stream1, stream2; // Each stream uses 1 stack
718	BufferPool pool1(stream1), pool2(stream2);
719
720	for (int i = 0; i < 10; i++)
721	{
722	GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
723	GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
724
725	GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
726	GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
727
728	d_src1.setTo(Scalar(i), stream1);
729	d_src2.setTo(Scalar(i), stream2);
730
731	cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
732	cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
733	// The order of destruction of the local variables is:
734	// d_dst2 => d_src2 => d_dst1 => d_src1
735	// LIFO rule is satisfied, this code runs without error
736	}
737	}
738	@endcode
739	*/
740	class CV_EXPORTS_W BufferPool
741	{
742	public:
743
744	//! Gets the BufferPool for the given stream.
745	CV_WRAP explicit BufferPool(Stream& stream);
746
747	//! Allocates a new GpuMat of given size and type.
748	CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
749
750	// WARNING: unreachable code using Ninja
751	#if defined _MSC_VER && _MSC_VER >= 1920
752	#pragma warning(push)
753	#pragma warning(disable: 4702)
754	#endif
755	//! Allocates a new GpuMat of given size and type.
756	CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(rows: size.height, cols: size.width, type); }
757	#if defined _MSC_VER && _MSC_VER >= 1920
758	#pragma warning(pop)
759	#endif
760
761	//! Returns the allocator associated with the stream.
762	CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
763
764	private:
765	Ptr<GpuMat::Allocator> allocator_;
766	};
767
768	//! BufferPool management (must be called before Stream creation)
769	CV_EXPORTS_W void setBufferPoolUsage(bool on);
770	CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
771
772	//===================================================================================
773	// HostMem
774	//===================================================================================
775
776	/* @brief Class with reference counting wrapping special memory type allocation functions from CUDA.*
777
778	Its interface is also Mat-like but with additional memory type parameters.
779
780	- PAGE_LOCKED* sets a page locked memory type used commonly for fast and asynchronous*
781	uploading/downloading data from/to GPU.
782	- SHARED* specifies a zero copy memory allocation that enables mapping the host memory to GPU*
783	address space, if supported.
784	- WRITE_COMBINED* sets the write combined buffer that is not cached by CPU. Such buffers are*
785	used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
786	utilization.
787
788	@note Allocation size of such memory types is usually limited. For more details, see CUDA 2.2*
789	Pinned Memory APIs document or CUDA C Programming Guide.*
790	*/
791	class CV_EXPORTS_W HostMem
792	{
793	public:
794	enum AllocType { PAGE_LOCKED = `1`, SHARED = `2`, WRITE_COMBINED = `4` };
795
796	static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
797
798	CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
799
800	HostMem(const HostMem& m);
801
802	CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
803	CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
804
805	//! creates from host memory with coping data
806	CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
807
808	~HostMem();
809
810	HostMem& operator =(const HostMem& m);
811
812	//! swaps with other smart pointer
813	CV_WRAP void swap(HostMem& b);
814
815	//! returns deep copy of the matrix, i.e. the data is copied
816	CV_WRAP HostMem clone() const;
817
818	//! allocates new matrix data unless the matrix already has specified size and type.
819	CV_WRAP void create(int rows, int cols, int type);
820	void create(Size size, int type);
821
822	//! creates alternative HostMem header for the same data, with different
823	//! number of channels and/or different number of rows
824	CV_WRAP HostMem reshape(int cn, int rows = `0`) const;
825
826	//! decrements reference counter and released memory if needed.
827	void release();
828
829	//! returns matrix header with disabled reference counting for HostMem data.
830	CV_WRAP Mat createMatHeader() const;
831
832	/* @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting*
833	for it.
834
835	This can be done only if memory was allocated with the SHARED flag and if it is supported by the
836	hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
837	eliminates an extra copy.
838	*/
839	GpuMat createGpuMatHeader() const;
840
841	// Please see cv::Mat for descriptions
842	CV_WRAP bool isContinuous() const;
843	CV_WRAP size_t elemSize() const;
844	CV_WRAP size_t elemSize1() const;
845	CV_WRAP int type() const;
846	CV_WRAP int depth() const;
847	CV_WRAP int channels() const;
848	CV_WRAP size_t step1() const;
849	CV_WRAP Size size() const;
850	CV_WRAP bool empty() const;
851
852	// Please see cv::Mat for descriptions
853	int flags;
854	int rows, cols;
855	CV_PROP size_t step;
856
857	uchar* data;
858	int* refcount;
859
860	uchar* datastart;
861	const uchar* dataend;
862
863	AllocType alloc_type;
864	};
865
866	/* @brief Page-locks the memory of matrix and maps it for the device(s).*
867
868	@param m Input matrix.
869	*/
870	CV_EXPORTS_W void registerPageLocked(Mat& m);
871
872	/* @brief Unmaps the memory of matrix and makes it pageable again.*
873
874	@param m Input matrix.
875	*/
876	CV_EXPORTS_W void unregisterPageLocked(Mat& m);
877
878	//===================================================================================
879	// Stream
880	//===================================================================================
881
882	/* @brief This class encapsulates a queue of asynchronous calls.*
883
884	@note Currently, you may face problems if an operation is enqueued twice with different data. Some
885	functions use the constant GPU memory, and next call may update the memory before the previous one
886	has been finished. But calling different operations asynchronously is safe because each operation
887	has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
888	also safe.
889
890	@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
891
892	@code
893	void thread1()
894	{
895	cv::cuda::Stream stream1;
896	cv::cuda::func1(..., stream1);
897	}
898
899	void thread2()
900	{
901	cv::cuda::Stream stream2;
902	cv::cuda::func2(..., stream2);
903	}
904	@endcode
905
906	@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
907	In multi-threading environment the stream objects must be passed explicitly (see previous note).
908	*/
909	class CV_EXPORTS_W Stream
910	{
911	typedef void (Stream::bool_type)() const*;
912	void this_type_does_not_support_comparisons() const {}
913
914	public:
915	typedef void (StreamCallback)(int* status, void* userData);
916
917	//! creates a new asynchronous stream
918	CV_WRAP Stream();
919
920	//! creates a new asynchronous stream with custom allocator
921	CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
922
923	/* @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream*
924
925	@note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and
926	supports the same parameter values.
927	@code
928	// creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking,
929	// non-default CUDA stream
930	cv::cuda::Stream cvStream(cudaStreamNonBlocking);
931	@endcode
932	*/
933	CV_WRAP Stream(const size_t cudaFlags);
934
935	/* @brief Returns true if the current stream queue is finished. Otherwise, it returns false.*
936	*/
937	CV_WRAP bool queryIfComplete() const;
938
939	/* @brief Blocks the current CPU thread until all operations in the stream are complete.*
940	*/
941	CV_WRAP void waitForCompletion();
942
943	/* @brief Makes a compute stream wait on an event.*
944	*/
945	CV_WRAP void waitEvent(const Event& event);
946
947	/* @brief Adds a callback to be called on the host after all currently enqueued items in the stream have*
948	completed.
949
950	@note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
951	that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
952	Callbacks without a mandated order (in independent streams) execute in undefined order and may be
953	serialized.
954	*/
955	void enqueueHostCallback(StreamCallback callback, void* userData);
956
957	//! return Stream object for default CUDA stream
958	CV_WRAP static Stream& Null();
959
960	//! returns true if stream object is not default (!= 0)
961	operator bool_type() const;
962
963	//! return Pointer to CUDA stream
964	CV_WRAP void* cudaPtr() const;
965
966	class Impl;
967
968	private:
969	Ptr<Impl> impl_;
970	Stream(const Ptr<Impl>& impl);
971
972	friend struct StreamAccessor;
973	friend class BufferPool;
974	friend class DefaultDeviceInitializer;
975	};
976
977
978	/* @brief Bindings overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t).*
979	@param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform any allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
980	@note Overload for generation of bindings only, not exported or intended for use internally from C++.
981	*/
982	CV_EXPORTS_W Stream wrapStream(size_t cudaStreamMemoryAddress);
983
984	class CV_EXPORTS_W Event
985	{
986	public:
987	enum CreateFlags
988	{
989	DEFAULT = `0x00`, /< Default event flag /*
990	BLOCKING_SYNC = `0x01`, /< Event uses blocking synchronization /*
991	DISABLE_TIMING = `0x02`, /< Event will not record timing data /*
992	INTERPROCESS = `0x04` /< Event is suitable for interprocess use. DisableTiming must be set /*
993	};
994
995	CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
996
997	//! records an event
998	CV_WRAP void record(Stream& stream = Stream::Null());
999
1000	//! queries an event's status
1001	CV_WRAP bool queryIfComplete() const;
1002
1003	//! waits for an event to complete
1004	CV_WRAP void waitForCompletion();
1005
1006	//! computes the elapsed time between events
1007	CV_WRAP static float elapsedTime(const Event& start, const Event& end);
1008
1009	class Impl;
1010
1011	private:
1012	Ptr<Impl> impl_;
1013	Event(const Ptr<Impl>& impl);
1014
1015	friend struct EventAccessor;
1016	};
1017	CV_ENUM_FLAGS(Event::CreateFlags)
1018
1019	//! @} cudacore_struct
1020
1021	//===================================================================================
1022	// Initialization & Info
1023	//===================================================================================
1024
1025	//! @addtogroup cudacore_init
1026	//! @{
1027
1028	/* @brief Returns the number of installed CUDA-enabled devices.*
1029
1030	Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
1031	this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
1032	returns -1.
1033	*/
1034	CV_EXPORTS_W int getCudaEnabledDeviceCount();
1035
1036	/* @brief Sets a device and initializes it for the current thread.*
1037
1038	@param device System index of a CUDA device starting with 0.
1039
1040	If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
1041	*/
1042	CV_EXPORTS_W void setDevice(int device);
1043
1044	/* @brief Returns the current device index set by cuda::setDevice or initialized by default.*
1045	*/
1046	CV_EXPORTS_W int getDevice();
1047
1048	/* @brief Explicitly destroys and cleans up all resources associated with the current device in the current*
1049	process.
1050
1051	Any subsequent API call to this device will reinitialize the device.
1052	*/
1053	CV_EXPORTS_W void resetDevice();
1054
1055	/* @brief Enumeration providing CUDA computing features.*
1056	*/
1057	enum FeatureSet
1058	{
1059	FEATURE_SET_COMPUTE_10 = `10`,
1060	FEATURE_SET_COMPUTE_11 = `11`,
1061	FEATURE_SET_COMPUTE_12 = `12`,
1062	FEATURE_SET_COMPUTE_13 = `13`,
1063	FEATURE_SET_COMPUTE_20 = `20`,
1064	FEATURE_SET_COMPUTE_21 = `21`,
1065	FEATURE_SET_COMPUTE_30 = `30`,
1066	FEATURE_SET_COMPUTE_32 = `32`,
1067	FEATURE_SET_COMPUTE_35 = `35`,
1068	FEATURE_SET_COMPUTE_50 = `50`,
1069
1070	GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1071	SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1072	NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1073	WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1074	DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1075	};
1076
1077	//! checks whether current device supports the given feature
1078	CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
1079
1080	/* @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was*
1081	built for.
1082
1083	According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
1084	capability can always be compiled to binary code of greater or equal compute capability".
1085	*/
1086	class CV_EXPORTS_W TargetArchs
1087	{
1088	public:
1089	/* @brief The following method checks whether the module was built with the support of the given feature:*
1090
1091	@param feature_set Features to be checked. See :ocvcuda::FeatureSet.
1092	*/
1093	static bool builtWith(FeatureSet feature_set);
1094
1095	/* @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA*
1096	code for the given architecture(s):
1097
1098	@param major Major compute capability version.
1099	@param minor Minor compute capability version.
1100	*/
1101	CV_WRAP static bool has(int major, int minor);
1102	CV_WRAP static bool hasPtx(int major, int minor);
1103	CV_WRAP static bool hasBin(int major, int minor);
1104
1105	CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
1106	CV_WRAP static bool hasEqualOrGreater(int major, int minor);
1107	CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
1108	CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
1109	};
1110
1111	/* @brief Class providing functionality for querying the specified GPU properties.*
1112	*/
1113	class CV_EXPORTS_W DeviceInfo
1114	{
1115	public:
1116	//! creates DeviceInfo object for the current GPU
1117	CV_WRAP DeviceInfo();
1118
1119	/* @brief The constructors.*
1120
1121	@param device_id System index of the CUDA device starting with 0.
1122
1123	Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
1124	constructs an object for the current device.
1125	*/
1126	CV_WRAP DeviceInfo(int device_id);
1127
1128	/* @brief Returns system index of the CUDA device starting with 0.*
1129	*/
1130	CV_WRAP int deviceID() const;
1131
1132	//! ASCII string identifying device
1133	const char* name() const;
1134
1135	//! global memory available on device in bytes
1136	CV_WRAP size_t totalGlobalMem() const;
1137
1138	//! shared memory available per block in bytes
1139	CV_WRAP size_t sharedMemPerBlock() const;
1140
1141	//! 32-bit registers available per block
1142	CV_WRAP int regsPerBlock() const;
1143
1144	//! warp size in threads
1145	CV_WRAP int warpSize() const;
1146
1147	//! maximum pitch in bytes allowed by memory copies
1148	CV_WRAP size_t memPitch() const;
1149
1150	//! maximum number of threads per block
1151	CV_WRAP int maxThreadsPerBlock() const;
1152
1153	//! maximum size of each dimension of a block
1154	CV_WRAP Vec3i maxThreadsDim() const;
1155
1156	//! maximum size of each dimension of a grid
1157	CV_WRAP Vec3i maxGridSize() const;
1158
1159	//! clock frequency in kilohertz
1160	CV_WRAP int clockRate() const;
1161
1162	//! constant memory available on device in bytes
1163	CV_WRAP size_t totalConstMem() const;
1164
1165	//! major compute capability
1166	CV_WRAP int majorVersion() const;
1167
1168	//! minor compute capability
1169	CV_WRAP int minorVersion() const;
1170
1171	//! alignment requirement for textures
1172	CV_WRAP size_t textureAlignment() const;
1173
1174	//! pitch alignment requirement for texture references bound to pitched memory
1175	CV_WRAP size_t texturePitchAlignment() const;
1176
1177	//! number of multiprocessors on device
1178	CV_WRAP int multiProcessorCount() const;
1179
1180	//! specified whether there is a run time limit on kernels
1181	CV_WRAP bool kernelExecTimeoutEnabled() const;
1182
1183	//! device is integrated as opposed to discrete
1184	CV_WRAP bool integrated() const;
1185
1186	//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
1187	CV_WRAP bool canMapHostMemory() const;
1188
1189	enum ComputeMode
1190	{
1191	ComputeModeDefault, /< default compute mode (Multiple threads can use cudaSetDevice with this device) /*
1192	ComputeModeExclusive, /< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) /*
1193	ComputeModeProhibited, /< compute-prohibited mode (No threads can use cudaSetDevice with this device) /*
1194	ComputeModeExclusiveProcess /< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) /*
1195	};
1196
1197	//! compute mode
1198	CV_WRAP DeviceInfo::ComputeMode computeMode() const;
1199
1200	//! maximum 1D texture size
1201	CV_WRAP int maxTexture1D() const;
1202
1203	//! maximum 1D mipmapped texture size
1204	CV_WRAP int maxTexture1DMipmap() const;
1205
1206	//! maximum size for 1D textures bound to linear memory
1207	CV_WRAP int maxTexture1DLinear() const;
1208
1209	//! maximum 2D texture dimensions
1210	CV_WRAP Vec2i maxTexture2D() const;
1211
1212	//! maximum 2D mipmapped texture dimensions
1213	CV_WRAP Vec2i maxTexture2DMipmap() const;
1214
1215	//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
1216	CV_WRAP Vec3i maxTexture2DLinear() const;
1217
1218	//! maximum 2D texture dimensions if texture gather operations have to be performed
1219	CV_WRAP Vec2i maxTexture2DGather() const;
1220
1221	//! maximum 3D texture dimensions
1222	CV_WRAP Vec3i maxTexture3D() const;
1223
1224	//! maximum Cubemap texture dimensions
1225	CV_WRAP int maxTextureCubemap() const;
1226
1227	//! maximum 1D layered texture dimensions
1228	CV_WRAP Vec2i maxTexture1DLayered() const;
1229
1230	//! maximum 2D layered texture dimensions
1231	CV_WRAP Vec3i maxTexture2DLayered() const;
1232
1233	//! maximum Cubemap layered texture dimensions
1234	CV_WRAP Vec2i maxTextureCubemapLayered() const;
1235
1236	//! maximum 1D surface size
1237	CV_WRAP int maxSurface1D() const;
1238
1239	//! maximum 2D surface dimensions
1240	CV_WRAP Vec2i maxSurface2D() const;
1241
1242	//! maximum 3D surface dimensions
1243	CV_WRAP Vec3i maxSurface3D() const;
1244
1245	//! maximum 1D layered surface dimensions
1246	CV_WRAP Vec2i maxSurface1DLayered() const;
1247
1248	//! maximum 2D layered surface dimensions
1249	CV_WRAP Vec3i maxSurface2DLayered() const;
1250
1251	//! maximum Cubemap surface dimensions
1252	CV_WRAP int maxSurfaceCubemap() const;
1253
1254	//! maximum Cubemap layered surface dimensions
1255	CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
1256
1257	//! alignment requirements for surfaces
1258	CV_WRAP size_t surfaceAlignment() const;
1259
1260	//! device can possibly execute multiple kernels concurrently
1261	CV_WRAP bool concurrentKernels() const;
1262
1263	//! device has ECC support enabled
1264	CV_WRAP bool ECCEnabled() const;
1265
1266	//! PCI bus ID of the device
1267	CV_WRAP int pciBusID() const;
1268
1269	//! PCI device ID of the device
1270	CV_WRAP int pciDeviceID() const;
1271
1272	//! PCI domain ID of the device
1273	CV_WRAP int pciDomainID() const;
1274
1275	//! true if device is a Tesla device using TCC driver, false otherwise
1276	CV_WRAP bool tccDriver() const;
1277
1278	//! number of asynchronous engines
1279	CV_WRAP int asyncEngineCount() const;
1280
1281	//! device shares a unified address space with the host
1282	CV_WRAP bool unifiedAddressing() const;
1283
1284	//! peak memory clock frequency in kilohertz
1285	CV_WRAP int memoryClockRate() const;
1286
1287	//! global memory bus width in bits
1288	CV_WRAP int memoryBusWidth() const;
1289
1290	//! size of L2 cache in bytes
1291	CV_WRAP int l2CacheSize() const;
1292
1293	//! maximum resident threads per multiprocessor
1294	CV_WRAP int maxThreadsPerMultiProcessor() const;
1295
1296	//! gets free and total device memory
1297	CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
1298	CV_WRAP size_t freeMemory() const;
1299	CV_WRAP size_t totalMemory() const;
1300
1301	/* @brief Provides information on CUDA feature support.*
1302
1303	@param feature_set Features to be checked. See cuda::FeatureSet.
1304
1305	This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
1306	*/
1307	bool supports(FeatureSet feature_set) const;
1308
1309	/* @brief Checks the CUDA module and device compatibility.*
1310
1311	This function returns true if the CUDA module can be run on the specified device. Otherwise, it
1312	returns false .
1313	*/
1314	CV_WRAP bool isCompatible() const;
1315
1316	private:
1317	int device_id_;
1318	};
1319
1320	CV_EXPORTS_W void printCudaDeviceInfo(int device);
1321	CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
1322
1323	/* @brief Converts an array to half precision floating number.*
1324
1325	@param _src input array.
1326	@param _dst output array.
1327	@param stream Stream for the asynchronous version.
1328	@sa convertFp16
1329	*/
1330	CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
1331
1332	//! @} cudacore_init
1333
1334	}} // namespace cv { namespace cuda {
1335
1336
1337	#include "opencv2/core/cuda.inl.hpp"
1338
1339	#endif /* OPENCV_CORE_CUDA_HPP */
1340

Provided by KDAB

Definitions

source code of opencv/modules/core/include/opencv2/core/cuda.hpp