1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21// * Redistribution's of source code must retain the above copyright notice,
22// this list of conditions and the following disclaimer.
23//
24// * Redistribution's in binary form must reproduce the above copyright notice,
25// this list of conditions and the following disclaimer in the documentation
26// and/or other materials provided with the distribution.
27//
28// * The name of the copyright holders may not be used to endorse or promote products
29// derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44#ifndef OPENCV_CORE_CUDA_HPP
45#define OPENCV_CORE_CUDA_HPP
46
47#ifndef __cplusplus
48# error cuda.hpp header must be compiled as C++
49#endif
50
51#include "opencv2/core.hpp"
52#include "opencv2/core/cuda_types.hpp"
53
54/**
55 @defgroup cuda CUDA-accelerated Computer Vision
56 @{
57 @defgroup cudacore Core part
58 @{
59 @defgroup cudacore_init Initialization and Information
60 @defgroup cudacore_struct Data Structures
61 @}
62 @}
63 */
64
65namespace cv { namespace cuda {
66
67//! @addtogroup cudacore_struct
68//! @{
69
70//===================================================================================
71// GpuMat
72//===================================================================================
73
74/** @brief Base storage class for GPU memory with reference counting.
75
76Its interface matches the Mat interface with the following limitations:
77
78- no arbitrary dimensions support (only 2D)
79- no functions that return references to their data (because references on GPU are not valid for
80 CPU)
81- no expression templates technique support
82
83Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85passed directly to the kernel.
86
87@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
89
90@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92release function returns error if the CUDA context has been destroyed before.
93
94Some member functions are described as a "Blocking Call" while some are described as a
95"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
96operation is finished when the function returns. However, non-blocking functions are asynchronous to
97host. Those functions may return even if the GPU operation is not finished.
98
99Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
100argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
101streams.
102
103@sa Mat
104 */
105class CV_EXPORTS_W GpuMat
106{
107public:
108 class CV_EXPORTS_W Allocator
109 {
110 public:
111 virtual ~Allocator() {}
112
113 // allocator must fill data, step and refcount fields
114 virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
115 virtual void free(GpuMat* mat) = 0;
116 };
117
118 //! default allocator
119 CV_WRAP static GpuMat::Allocator* defaultAllocator();
120 CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
121 CV_WRAP static GpuMat::Allocator* getStdAllocator();
122
123 //! default constructor
124 CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
125
126 //! constructs GpuMat of the specified size and type
127 CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
128 CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
129
130 //! constructs GpuMat and fills it with the specified value _s
131 CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
132 CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
133
134 //! copy constructor
135 CV_WRAP GpuMat(const GpuMat& m);
136
137 //! constructor for GpuMat headers pointing to user-allocated data
138 GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
139 GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
140
141 //! creates a GpuMat header for a part of the bigger matrix
142 CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
143 CV_WRAP GpuMat(const GpuMat& m, Rect roi);
144
145 //! builds GpuMat from host memory (Blocking call)
146 CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
147
148 //! destructor - calls release()
149 ~GpuMat();
150
151 //! assignment operators
152 GpuMat& operator =(const GpuMat& m);
153
154 //! allocates new GpuMat data unless the GpuMat already has specified size and type
155 CV_WRAP void create(int rows, int cols, int type);
156 CV_WRAP void create(Size size, int type);
157
158 //! decreases reference counter, deallocate the data when reference counter reaches 0
159 CV_WRAP void release();
160
161 //! swaps with other smart pointer
162 CV_WRAP void swap(GpuMat& mat);
163
164 /** @brief Performs data upload to GpuMat (Blocking call)
165
166 This function copies data from host memory to device memory. As being a blocking call, it is
167 guaranteed that the copy operation is finished when this function returns.
168 */
169 CV_WRAP void upload(InputArray arr);
170
171 /** @brief Performs data upload to GpuMat (Non-Blocking call)
172
173 This function copies data from host memory to device memory. As being a non-blocking call, this
174 function may return even if the copy operation is not finished.
175
176 The copy operation may be overlapped with operations in other non-default streams if \p stream is
177 not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
178 */
179 CV_WRAP void upload(InputArray arr, Stream& stream);
180
181 /** @brief Performs data download from GpuMat (Blocking call)
182
183 This function copies data from device memory to host memory. As being a blocking call, it is
184 guaranteed that the copy operation is finished when this function returns.
185 */
186 CV_WRAP void download(OutputArray dst) const;
187
188 /** @brief Performs data download from GpuMat (Non-Blocking call)
189
190 This function copies data from device memory to host memory. As being a non-blocking call, this
191 function may return even if the copy operation is not finished.
192
193 The copy operation may be overlapped with operations in other non-default streams if \p stream is
194 not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
195 */
196 CV_WRAP void download(OutputArray dst, Stream& stream) const;
197
198 //! returns deep copy of the GpuMat, i.e. the data is copied
199 CV_WRAP GpuMat clone() const;
200
201 //! copies the GpuMat content to device memory (Blocking call)
202 void copyTo(OutputArray dst) const;
203 //! bindings overload which copies the GpuMat content to device memory (Blocking call)
204 CV_WRAP void copyTo(CV_OUT GpuMat& dst) const {
205 copyTo(dst: static_cast<OutputArray>(dst));
206 }
207
208 //! copies the GpuMat content to device memory (Non-Blocking call)
209 void copyTo(OutputArray dst, Stream& stream) const;
210 //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call)
211 CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const {
212 copyTo(dst: static_cast<OutputArray>(dst), stream);
213 }
214
215 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
216 void copyTo(OutputArray dst, InputArray mask) const;
217 //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
218 CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const {
219 copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask));
220 }
221
222 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
223 void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
224 //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
225 CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const {
226 copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask), stream);
227 }
228
229 //! sets some of the GpuMat elements to s (Blocking call)
230 CV_WRAP GpuMat& setTo(Scalar s);
231
232 //! sets some of the GpuMat elements to s (Non-Blocking call)
233 CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
234
235 //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
236 CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
237
238 //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
239 CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
240
241 //! converts GpuMat to another datatype (Blocking call)
242 void convertTo(OutputArray dst, int rtype) const;
243 //! bindings overload which converts GpuMat to another datatype (Blocking call)
244 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype) const {
245 convertTo(dst: static_cast<OutputArray>(dst), rtype);
246 }
247
248 //! converts GpuMat to another datatype (Non-Blocking call)
249 void convertTo(OutputArray dst, int rtype, Stream& stream) const;
250 //! bindings overload which converts GpuMat to another datatype (Non-Blocking call)
251 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const {
252 convertTo(dst: static_cast<OutputArray>(dst), rtype, stream);
253 }
254
255 //! converts GpuMat to another datatype with scaling (Blocking call)
256 void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
257
258 //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call)
259#ifdef OPENCV_BINDINGS_PARSER
260 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha=1.0, double beta = 0.0) const {
261 convertTo(static_cast<OutputArray>(dst), rtype, alpha, beta);
262 }
263#endif
264
265 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
266 void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
267
268 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
269 void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
270 //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call)
271 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const {
272 convertTo(dst: static_cast<OutputArray>(dst), rtype, alpha, beta, stream);
273 }
274
275 CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
276
277 //! returns pointer to y-th row
278 uchar* ptr(int y = 0);
279 const uchar* ptr(int y = 0) const;
280
281 //! template version of the above method
282 template<typename _Tp> _Tp* ptr(int y = 0);
283 template<typename _Tp> const _Tp* ptr(int y = 0) const;
284
285 template <typename _Tp> operator PtrStepSz<_Tp>() const;
286 template <typename _Tp> operator PtrStep<_Tp>() const;
287
288 //! returns a new GpuMat header for the specified row
289 CV_WRAP GpuMat row(int y) const;
290
291 //! returns a new GpuMat header for the specified column
292 CV_WRAP GpuMat col(int x) const;
293
294 //! ... for the specified row span
295 CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
296 CV_WRAP GpuMat rowRange(Range r) const;
297
298 //! ... for the specified column span
299 CV_WRAP GpuMat colRange(int startcol, int endcol) const;
300 CV_WRAP GpuMat colRange(Range r) const;
301
302 //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
303 GpuMat operator ()(Range rowRange, Range colRange) const;
304 GpuMat operator ()(Rect roi) const;
305
306 //! creates alternative GpuMat header for the same data, with different
307 //! number of channels and/or different number of rows
308 CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
309
310 //! locates GpuMat header within a parent GpuMat
311 CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
312
313 //! moves/resizes the current GpuMat ROI inside the parent GpuMat
314 CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
315
316 //! returns true iff the GpuMat data is continuous
317 //! (i.e. when there are no gaps between successive rows)
318 CV_WRAP bool isContinuous() const;
319
320 //! returns element size in bytes
321 CV_WRAP size_t elemSize() const;
322
323 //! returns the size of element channel in bytes
324 CV_WRAP size_t elemSize1() const;
325
326 //! returns element type
327 CV_WRAP int type() const;
328
329 //! returns element type
330 CV_WRAP int depth() const;
331
332 //! returns number of channels
333 CV_WRAP int channels() const;
334
335 //! returns step/elemSize1()
336 CV_WRAP size_t step1() const;
337
338 //! returns GpuMat size : width == number of columns, height == number of rows
339 CV_WRAP Size size() const;
340
341 //! returns true if GpuMat data is NULL
342 CV_WRAP bool empty() const;
343
344 // returns pointer to cuda memory
345 CV_WRAP void* cudaPtr() const;
346
347 //! internal use method: updates the continuity flag
348 CV_WRAP void updateContinuityFlag();
349
350 /*! includes several bit-fields:
351 - the magic signature
352 - continuity flag
353 - depth
354 - number of channels
355 */
356 int flags;
357
358 //! the number of rows and columns
359 int rows, cols;
360
361 //! a distance between successive rows in bytes; includes the gap if any
362 CV_PROP size_t step;
363
364 //! pointer to the data
365 uchar* data;
366
367 //! pointer to the reference counter;
368 //! when GpuMat points to user-allocated data, the pointer is NULL
369 int* refcount;
370
371 //! helper fields used in locateROI and adjustROI
372 uchar* datastart;
373 const uchar* dataend;
374
375 //! allocator
376 Allocator* allocator;
377};
378
379struct CV_EXPORTS_W GpuData
380{
381 explicit GpuData(size_t _size);
382 ~GpuData();
383
384 GpuData(const GpuData&) = delete;
385 GpuData& operator=(const GpuData&) = delete;
386
387 GpuData(GpuData&&) = delete;
388 GpuData& operator=(GpuData&&) = delete;
389
390 uchar* data;
391 size_t size;
392};
393
394class CV_EXPORTS_W GpuMatND
395{
396public:
397 using SizeArray = std::vector<int>;
398 using StepArray = std::vector<size_t>;
399 using IndexArray = std::vector<int>;
400
401 //! destructor
402 ~GpuMatND();
403
404 //! default constructor
405 GpuMatND();
406
407 /** @overload
408 @param size Array of integers specifying an n-dimensional array shape.
409 @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
410 CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
411 */
412 GpuMatND(SizeArray size, int type);
413
414 /** @overload
415 @param size Array of integers specifying an n-dimensional array shape.
416 @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
417 CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
418 @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
419 allocate matrix data. Instead, they just initialize the matrix header that points to the specified
420 data, which means that no data is copied. This operation is very efficient and can be used to
421 process external data using OpenCV functions. The external data is not automatically deallocated, so
422 you should take care of it.
423 @param step Array of _size.size() or _size.size()-1 steps in case of a multi-dimensional array
424 (if specified, the last step must be equal to the element size, otherwise it will be added as such).
425 If not specified, the matrix is assumed to be continuous.
426 */
427 GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray());
428
429 /** @brief Allocates GPU memory.
430 Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
431 GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
432 the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
433 (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
434 this method is always continuous and is not a sub-region of another GpuMatND.
435 */
436 void create(SizeArray size, int type);
437
438 void release();
439
440 void swap(GpuMatND& m) noexcept;
441
442 /** @brief Creates a full copy of the array and the underlying data.
443 The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
444 the original step is not taken into account. So, the array copy is a continuous array
445 occupying total()\*elemSize() bytes.
446 */
447 GpuMatND clone() const;
448
449 /** @overload
450 This overload is non-blocking, so it may return even if the copy operation is not finished.
451 */
452 GpuMatND clone(Stream& stream) const;
453
454 /** @brief Extracts a sub-matrix.
455 The operator makes a new header for the specified sub-array of \*this.
456 The operator is an O(1) operation, that is, no matrix data is copied.
457 @param ranges Array of selected ranges along each dimension.
458 */
459 GpuMatND operator()(const std::vector<Range>& ranges) const;
460
461 /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.
462 @note The returned GpuMat is constructed with the constructor for user-allocated data.
463 That is, It does not perform reference counting.
464 @note This function does not increment this GpuMatND's reference counter.
465 */
466 GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
467
468 /** @overload
469 Creates a GpuMat header if this GpuMatND is effectively 2D.
470 @note The returned GpuMat is constructed with the constructor for user-allocated data.
471 That is, It does not perform reference counting.
472 @note This function does not increment this GpuMatND's reference counter.
473 */
474 GpuMat createGpuMatHeader() const;
475
476 /** @brief Extracts a 2D plane part of an n-dim matrix.
477 It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
478 GpuMatND to the returned GpuMat.
479 @note This operator does not increment this GpuMatND's reference counter;
480 */
481 GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
482
483 /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.
484 It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
485 @note This operator does not increment this GpuMatND's reference counter;
486 */
487 operator GpuMat() const;
488
489 GpuMatND(const GpuMatND&) = default;
490 GpuMatND& operator=(const GpuMatND&) = default;
491
492#if defined(__GNUC__) && __GNUC__ < 5
493 // error: function '...' defaulted on its first declaration with an exception-specification
494 // that differs from the implicit declaration '...'
495
496 GpuMatND(GpuMatND&&) = default;
497 GpuMatND& operator=(GpuMatND&&) = default;
498#else
499 GpuMatND(GpuMatND&&) noexcept = default;
500 GpuMatND& operator=(GpuMatND&&) noexcept = default;
501#endif
502
503 void upload(InputArray src);
504 void upload(InputArray src, Stream& stream);
505 void download(OutputArray dst) const;
506 void download(OutputArray dst, Stream& stream) const;
507
508 //! returns true iff the GpuMatND data is continuous
509 //! (i.e. when there are no gaps between successive rows)
510 bool isContinuous() const;
511
512 //! returns true if the matrix is a sub-matrix of another matrix
513 bool isSubmatrix() const;
514
515 //! returns element size in bytes
516 size_t elemSize() const;
517
518 //! returns the size of element channel in bytes
519 size_t elemSize1() const;
520
521 //! returns true if data is null
522 bool empty() const;
523
524 //! returns true if not empty and points to external(user-allocated) gpu memory
525 bool external() const;
526
527 //! returns pointer to the first byte of the GPU memory
528 uchar* getDevicePtr() const;
529
530 //! returns the total number of array elements
531 size_t total() const;
532
533 //! returns the size of underlying memory in bytes
534 size_t totalMemSize() const;
535
536 //! returns element type
537 int type() const;
538
539private:
540 //! internal use
541 void setFields(SizeArray size, int type, StepArray step = StepArray());
542
543public:
544 /*! includes several bit-fields:
545 - the magic signature
546 - continuity flag
547 - depth
548 - number of channels
549 */
550 int flags;
551
552 //! matrix dimensionality
553 int dims;
554
555 //! shape of this array
556 SizeArray size;
557
558 /*! step values
559 Their semantics is identical to the semantics of step for Mat.
560 */
561 StepArray step;
562
563private:
564 /*! internal use
565 If this GpuMatND holds external memory, this is empty.
566 */
567 std::shared_ptr<GpuData> data_;
568
569 /*! internal use
570 If this GpuMatND manages memory with reference counting, this value is
571 always equal to data_->data. If this GpuMatND holds external memory,
572 data_ is empty and data points to the external memory.
573 */
574 uchar* data;
575
576 /*! internal use
577 If this GpuMatND is a sub-matrix of a larger matrix, this value is the
578 difference of the first byte between the sub-matrix and the whole matrix.
579 */
580 size_t offset;
581};
582
583/** @brief Creates a continuous matrix.
584
585@param rows Row count.
586@param cols Column count.
587@param type Type of the matrix.
588@param arr Destination matrix. This parameter changes only if it has a proper type and area (
589\f$\texttt{rows} \times \texttt{cols}\f$ ).
590
591Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
592end of each row.
593 */
594CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
595
596/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
597
598@param rows Minimum desired number of rows.
599@param cols Minimum desired number of columns.
600@param type Desired matrix type.
601@param arr Destination matrix.
602
603The function does not reallocate memory if the matrix has proper attributes already.
604 */
605CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
606
607/** @brief Bindings overload to create a GpuMat from existing GPU memory.
608@param rows Row count.
609@param cols Column count.
610@param type Type of the matrix.
611@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
612@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
613@note Overload for generation of bindings only, not exported or intended for use internally from C++.
614 */
615CV_EXPORTS_W GpuMat inline createGpuMatFromCudaMemory(int rows, int cols, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
616 return GpuMat(rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
617}
618
619 /** @overload
620@param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
621@param type Type of the matrix.
622@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
623@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
624@note Overload for generation of bindings only, not exported or intended for use internally from C++.
625 */
626CV_EXPORTS_W inline GpuMat createGpuMatFromCudaMemory(Size size, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
627 return GpuMat(size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
628}
629
630/** @brief BufferPool for use with CUDA streams
631
632BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
633only useful when enabled with #setBufferPoolUsage.
634
635@code
636 setBufferPoolUsage(true);
637@endcode
638
639@note #setBufferPoolUsage must be called \em before any Stream declaration.
640
641Users may specify custom allocator for Stream and may implement their own stream based
642functions utilizing the same underlying GPU memory management.
643
644If custom allocator is not specified, BufferPool utilizes StackAllocator by
645default. StackAllocator allocates a chunk of GPU device memory beforehand,
646and when GpuMat is declared later on, it is given the pre-allocated memory.
647This kind of strategy reduces the number of calls for memory allocating APIs
648such as cudaMalloc or cudaMallocPitch.
649
650Below is an example that utilizes BufferPool with StackAllocator:
651
652@code
653 #include <opencv2/opencv.hpp>
654
655 using namespace cv;
656 using namespace cv::cuda
657
658 int main()
659 {
660 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
661 setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
662
663 Stream stream1, stream2; // Each stream uses 1 stack
664 BufferPool pool1(stream1), pool2(stream2);
665
666 GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
667 GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
668
669 GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
670 GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
671
672 cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
673 cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
674 }
675@endcode
676
677If we allocate another GpuMat on pool1 in the above example, it will be carried out by
678the DefaultAllocator since the stack for pool1 is full.
679
680@code
681 GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
682@endcode
683
684If a third stream is declared in the above example, allocating with #getBuffer
685within that stream will also be carried out by the DefaultAllocator because we've run out of
686stacks.
687
688@code
689 Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
690 BufferPool pool3(stream3);
691 GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
692@endcode
693
694@warning When utilizing StackAllocator, deallocation order is important.
695
696Just like a stack, deallocation must be done in LIFO order. Below is an example of
697erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
698sample code will emit CV_Assert error.
699
700@code
701 int main()
702 {
703 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
704 Stream stream; // A default size (10 MB) stack is allocated to this stream
705 BufferPool pool(stream);
706
707 GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
708 GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
709
710 mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
711 }
712@endcode
713
714Since C++ local variables are destroyed in the reverse order of construction,
715the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
716and the corresponding memory is automatically returned to the pool for later usage.
717
718@code
719 int main()
720 {
721 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
722 setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
723
724 Stream stream1, stream2; // Each stream uses 1 stack
725 BufferPool pool1(stream1), pool2(stream2);
726
727 for (int i = 0; i < 10; i++)
728 {
729 GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
730 GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
731
732 GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
733 GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
734
735 d_src1.setTo(Scalar(i), stream1);
736 d_src2.setTo(Scalar(i), stream2);
737
738 cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
739 cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
740 // The order of destruction of the local variables is:
741 // d_dst2 => d_src2 => d_dst1 => d_src1
742 // LIFO rule is satisfied, this code runs without error
743 }
744 }
745@endcode
746 */
747class CV_EXPORTS_W BufferPool
748{
749public:
750
751 //! Gets the BufferPool for the given stream.
752 CV_WRAP explicit BufferPool(Stream& stream);
753
754 //! Allocates a new GpuMat of given size and type.
755 CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
756
757// WARNING: unreachable code using Ninja
758#if defined _MSC_VER && _MSC_VER >= 1920
759#pragma warning(push)
760#pragma warning(disable: 4702)
761#endif
762 //! Allocates a new GpuMat of given size and type.
763 CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(rows: size.height, cols: size.width, type); }
764#if defined _MSC_VER && _MSC_VER >= 1920
765#pragma warning(pop)
766#endif
767
768 //! Returns the allocator associated with the stream.
769 CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
770
771private:
772 Ptr<GpuMat::Allocator> allocator_;
773};
774
775//! BufferPool management (must be called before Stream creation)
776CV_EXPORTS_W void setBufferPoolUsage(bool on);
777CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
778
779//===================================================================================
780// HostMem
781//===================================================================================
782
783/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
784
785Its interface is also Mat-like but with additional memory type parameters.
786
787- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
788 uploading/downloading data from/to GPU.
789- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
790 address space, if supported.
791- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
792 used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
793 utilization.
794
795@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
796Pinned Memory APIs* document or *CUDA C Programming Guide*.
797 */
798class CV_EXPORTS_W HostMem
799{
800public:
801 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
802
803 static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
804
805 CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
806
807 HostMem(const HostMem& m);
808
809 CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
810 CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
811
812 //! creates from host memory with coping data
813 CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
814
815 ~HostMem();
816
817 HostMem& operator =(const HostMem& m);
818
819 //! swaps with other smart pointer
820 CV_WRAP void swap(HostMem& b);
821
822 //! returns deep copy of the matrix, i.e. the data is copied
823 CV_WRAP HostMem clone() const;
824
825 //! allocates new matrix data unless the matrix already has specified size and type.
826 CV_WRAP void create(int rows, int cols, int type);
827 void create(Size size, int type);
828
829 //! creates alternative HostMem header for the same data, with different
830 //! number of channels and/or different number of rows
831 CV_WRAP HostMem reshape(int cn, int rows = 0) const;
832
833 //! decrements reference counter and released memory if needed.
834 void release();
835
836 //! returns matrix header with disabled reference counting for HostMem data.
837 CV_WRAP Mat createMatHeader() const;
838
839 /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
840 for it.
841
842 This can be done only if memory was allocated with the SHARED flag and if it is supported by the
843 hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
844 eliminates an extra copy.
845 */
846 GpuMat createGpuMatHeader() const;
847
848 // Please see cv::Mat for descriptions
849 CV_WRAP bool isContinuous() const;
850 CV_WRAP size_t elemSize() const;
851 CV_WRAP size_t elemSize1() const;
852 CV_WRAP int type() const;
853 CV_WRAP int depth() const;
854 CV_WRAP int channels() const;
855 CV_WRAP size_t step1() const;
856 CV_WRAP Size size() const;
857 CV_WRAP bool empty() const;
858
859 // Please see cv::Mat for descriptions
860 int flags;
861 int rows, cols;
862 CV_PROP size_t step;
863
864 uchar* data;
865 int* refcount;
866
867 uchar* datastart;
868 const uchar* dataend;
869
870 AllocType alloc_type;
871};
872
873/** @brief Page-locks the memory of matrix and maps it for the device(s).
874
875@param m Input matrix.
876 */
877CV_EXPORTS_W void registerPageLocked(Mat& m);
878
879/** @brief Unmaps the memory of matrix and makes it pageable again.
880
881@param m Input matrix.
882 */
883CV_EXPORTS_W void unregisterPageLocked(Mat& m);
884
885//===================================================================================
886// Stream
887//===================================================================================
888
889/** @brief This class encapsulates a queue of asynchronous calls.
890
891@note Currently, you may face problems if an operation is enqueued twice with different data. Some
892functions use the constant GPU memory, and next call may update the memory before the previous one
893has been finished. But calling different operations asynchronously is safe because each operation
894has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
895also safe.
896
897@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
898
899@code
900void thread1()
901{
902 cv::cuda::Stream stream1;
903 cv::cuda::func1(..., stream1);
904}
905
906void thread2()
907{
908 cv::cuda::Stream stream2;
909 cv::cuda::func2(..., stream2);
910}
911@endcode
912
913@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
914In multi-threading environment the stream objects must be passed explicitly (see previous note).
915 */
916class CV_EXPORTS_W Stream
917{
918 typedef void (Stream::*bool_type)() const;
919 void this_type_does_not_support_comparisons() const {}
920
921public:
922 typedef void (*StreamCallback)(int status, void* userData);
923
924 //! creates a new asynchronous stream
925 CV_WRAP Stream();
926
927 //! creates a new asynchronous stream with custom allocator
928 CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
929
930 /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream
931
932 @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and
933 supports the same parameter values.
934 @code
935 // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking,
936 // non-default CUDA stream
937 cv::cuda::Stream cvStream(cudaStreamNonBlocking);
938 @endcode
939 */
940 CV_WRAP Stream(const size_t cudaFlags);
941
942 /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
943 */
944 CV_WRAP bool queryIfComplete() const;
945
946 /** @brief Blocks the current CPU thread until all operations in the stream are complete.
947 */
948 CV_WRAP void waitForCompletion();
949
950 /** @brief Makes a compute stream wait on an event.
951 */
952 CV_WRAP void waitEvent(const Event& event);
953
954 /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
955 completed.
956
957 @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
958 that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
959 Callbacks without a mandated order (in independent streams) execute in undefined order and may be
960 serialized.
961 */
962 void enqueueHostCallback(StreamCallback callback, void* userData);
963
964 //! return Stream object for default CUDA stream
965 CV_WRAP static Stream& Null();
966
967 //! returns true if stream object is not default (!= 0)
968 operator bool_type() const;
969
970 //! return Pointer to CUDA stream
971 CV_WRAP void* cudaPtr() const;
972
973 class Impl;
974
975private:
976 Ptr<Impl> impl_;
977 Stream(const Ptr<Impl>& impl);
978
979 friend struct StreamAccessor;
980 friend class BufferPool;
981 friend class DefaultDeviceInitializer;
982};
983
984
985/** @brief Bindings overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t).
986@param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform any allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
987@note Overload for generation of bindings only, not exported or intended for use internally from C++.
988 */
989CV_EXPORTS_W Stream wrapStream(size_t cudaStreamMemoryAddress);
990
991class CV_EXPORTS_W Event
992{
993public:
994 enum CreateFlags
995 {
996 DEFAULT = 0x00, /**< Default event flag */
997 BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
998 DISABLE_TIMING = 0x02, /**< Event will not record timing data */
999 INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
1000 };
1001
1002 CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
1003
1004 //! records an event
1005 CV_WRAP void record(Stream& stream = Stream::Null());
1006
1007 //! queries an event's status
1008 CV_WRAP bool queryIfComplete() const;
1009
1010 //! waits for an event to complete
1011 CV_WRAP void waitForCompletion();
1012
1013 //! computes the elapsed time between events
1014 CV_WRAP static float elapsedTime(const Event& start, const Event& end);
1015
1016 class Impl;
1017
1018private:
1019 Ptr<Impl> impl_;
1020 Event(const Ptr<Impl>& impl);
1021
1022 friend struct EventAccessor;
1023};
1024CV_ENUM_FLAGS(Event::CreateFlags)
1025
1026//! @} cudacore_struct
1027
1028//===================================================================================
1029// Initialization & Info
1030//===================================================================================
1031
1032//! @addtogroup cudacore_init
1033//! @{
1034
1035/** @brief Returns the number of installed CUDA-enabled devices.
1036
1037Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
1038this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
1039returns -1.
1040 */
1041CV_EXPORTS_W int getCudaEnabledDeviceCount();
1042
1043/** @brief Sets a device and initializes it for the current thread.
1044
1045@param device System index of a CUDA device starting with 0.
1046
1047If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
1048 */
1049CV_EXPORTS_W void setDevice(int device);
1050
1051/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
1052 */
1053CV_EXPORTS_W int getDevice();
1054
1055/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
1056process.
1057
1058Any subsequent API call to this device will reinitialize the device.
1059 */
1060CV_EXPORTS_W void resetDevice();
1061
1062/** @brief Enumeration providing CUDA computing features.
1063 */
1064enum FeatureSet
1065{
1066 FEATURE_SET_COMPUTE_10 = 10,
1067 FEATURE_SET_COMPUTE_11 = 11,
1068 FEATURE_SET_COMPUTE_12 = 12,
1069 FEATURE_SET_COMPUTE_13 = 13,
1070 FEATURE_SET_COMPUTE_20 = 20,
1071 FEATURE_SET_COMPUTE_21 = 21,
1072 FEATURE_SET_COMPUTE_30 = 30,
1073 FEATURE_SET_COMPUTE_32 = 32,
1074 FEATURE_SET_COMPUTE_35 = 35,
1075 FEATURE_SET_COMPUTE_50 = 50,
1076
1077 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1078 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1079 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1080 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1081 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1082};
1083
1084//! checks whether current device supports the given feature
1085CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
1086
1087/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
1088built for.
1089
1090According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
1091capability can always be compiled to binary code of greater or equal compute capability".
1092 */
1093class CV_EXPORTS_W TargetArchs
1094{
1095public:
1096 /** @brief The following method checks whether the module was built with the support of the given feature:
1097
1098 @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
1099 */
1100 static bool builtWith(FeatureSet feature_set);
1101
1102 /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
1103 code for the given architecture(s):
1104
1105 @param major Major compute capability version.
1106 @param minor Minor compute capability version.
1107 */
1108 CV_WRAP static bool has(int major, int minor);
1109 CV_WRAP static bool hasPtx(int major, int minor);
1110 CV_WRAP static bool hasBin(int major, int minor);
1111
1112 CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
1113 CV_WRAP static bool hasEqualOrGreater(int major, int minor);
1114 CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
1115 CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
1116};
1117
1118/** @brief Class providing functionality for querying the specified GPU properties.
1119 */
1120class CV_EXPORTS_W DeviceInfo
1121{
1122public:
1123 //! creates DeviceInfo object for the current GPU
1124 CV_WRAP DeviceInfo();
1125
1126 /** @brief The constructors.
1127
1128 @param device_id System index of the CUDA device starting with 0.
1129
1130 Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
1131 constructs an object for the current device.
1132 */
1133 CV_WRAP DeviceInfo(int device_id);
1134
1135 /** @brief Returns system index of the CUDA device starting with 0.
1136 */
1137 CV_WRAP int deviceID() const;
1138
1139 //! ASCII string identifying device
1140 const char* name() const;
1141
1142 //! global memory available on device in bytes
1143 CV_WRAP size_t totalGlobalMem() const;
1144
1145 //! shared memory available per block in bytes
1146 CV_WRAP size_t sharedMemPerBlock() const;
1147
1148 //! 32-bit registers available per block
1149 CV_WRAP int regsPerBlock() const;
1150
1151 //! warp size in threads
1152 CV_WRAP int warpSize() const;
1153
1154 //! maximum pitch in bytes allowed by memory copies
1155 CV_WRAP size_t memPitch() const;
1156
1157 //! maximum number of threads per block
1158 CV_WRAP int maxThreadsPerBlock() const;
1159
1160 //! maximum size of each dimension of a block
1161 CV_WRAP Vec3i maxThreadsDim() const;
1162
1163 //! maximum size of each dimension of a grid
1164 CV_WRAP Vec3i maxGridSize() const;
1165
1166 //! clock frequency in kilohertz
1167 CV_WRAP int clockRate() const;
1168
1169 //! constant memory available on device in bytes
1170 CV_WRAP size_t totalConstMem() const;
1171
1172 //! major compute capability
1173 CV_WRAP int majorVersion() const;
1174
1175 //! minor compute capability
1176 CV_WRAP int minorVersion() const;
1177
1178 //! alignment requirement for textures
1179 CV_WRAP size_t textureAlignment() const;
1180
1181 //! pitch alignment requirement for texture references bound to pitched memory
1182 CV_WRAP size_t texturePitchAlignment() const;
1183
1184 //! number of multiprocessors on device
1185 CV_WRAP int multiProcessorCount() const;
1186
1187 //! specified whether there is a run time limit on kernels
1188 CV_WRAP bool kernelExecTimeoutEnabled() const;
1189
1190 //! device is integrated as opposed to discrete
1191 CV_WRAP bool integrated() const;
1192
1193 //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
1194 CV_WRAP bool canMapHostMemory() const;
1195
1196 enum ComputeMode
1197 {
1198 ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
1199 ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
1200 ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
1201 ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
1202 };
1203
1204 //! compute mode
1205 CV_WRAP DeviceInfo::ComputeMode computeMode() const;
1206
1207 //! maximum 1D texture size
1208 CV_WRAP int maxTexture1D() const;
1209
1210 //! maximum 1D mipmapped texture size
1211 CV_WRAP int maxTexture1DMipmap() const;
1212
1213 //! maximum size for 1D textures bound to linear memory
1214 CV_WRAP int maxTexture1DLinear() const;
1215
1216 //! maximum 2D texture dimensions
1217 CV_WRAP Vec2i maxTexture2D() const;
1218
1219 //! maximum 2D mipmapped texture dimensions
1220 CV_WRAP Vec2i maxTexture2DMipmap() const;
1221
1222 //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
1223 CV_WRAP Vec3i maxTexture2DLinear() const;
1224
1225 //! maximum 2D texture dimensions if texture gather operations have to be performed
1226 CV_WRAP Vec2i maxTexture2DGather() const;
1227
1228 //! maximum 3D texture dimensions
1229 CV_WRAP Vec3i maxTexture3D() const;
1230
1231 //! maximum Cubemap texture dimensions
1232 CV_WRAP int maxTextureCubemap() const;
1233
1234 //! maximum 1D layered texture dimensions
1235 CV_WRAP Vec2i maxTexture1DLayered() const;
1236
1237 //! maximum 2D layered texture dimensions
1238 CV_WRAP Vec3i maxTexture2DLayered() const;
1239
1240 //! maximum Cubemap layered texture dimensions
1241 CV_WRAP Vec2i maxTextureCubemapLayered() const;
1242
1243 //! maximum 1D surface size
1244 CV_WRAP int maxSurface1D() const;
1245
1246 //! maximum 2D surface dimensions
1247 CV_WRAP Vec2i maxSurface2D() const;
1248
1249 //! maximum 3D surface dimensions
1250 CV_WRAP Vec3i maxSurface3D() const;
1251
1252 //! maximum 1D layered surface dimensions
1253 CV_WRAP Vec2i maxSurface1DLayered() const;
1254
1255 //! maximum 2D layered surface dimensions
1256 CV_WRAP Vec3i maxSurface2DLayered() const;
1257
1258 //! maximum Cubemap surface dimensions
1259 CV_WRAP int maxSurfaceCubemap() const;
1260
1261 //! maximum Cubemap layered surface dimensions
1262 CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
1263
1264 //! alignment requirements for surfaces
1265 CV_WRAP size_t surfaceAlignment() const;
1266
1267 //! device can possibly execute multiple kernels concurrently
1268 CV_WRAP bool concurrentKernels() const;
1269
1270 //! device has ECC support enabled
1271 CV_WRAP bool ECCEnabled() const;
1272
1273 //! PCI bus ID of the device
1274 CV_WRAP int pciBusID() const;
1275
1276 //! PCI device ID of the device
1277 CV_WRAP int pciDeviceID() const;
1278
1279 //! PCI domain ID of the device
1280 CV_WRAP int pciDomainID() const;
1281
1282 //! true if device is a Tesla device using TCC driver, false otherwise
1283 CV_WRAP bool tccDriver() const;
1284
1285 //! number of asynchronous engines
1286 CV_WRAP int asyncEngineCount() const;
1287
1288 //! device shares a unified address space with the host
1289 CV_WRAP bool unifiedAddressing() const;
1290
1291 //! peak memory clock frequency in kilohertz
1292 CV_WRAP int memoryClockRate() const;
1293
1294 //! global memory bus width in bits
1295 CV_WRAP int memoryBusWidth() const;
1296
1297 //! size of L2 cache in bytes
1298 CV_WRAP int l2CacheSize() const;
1299
1300 //! maximum resident threads per multiprocessor
1301 CV_WRAP int maxThreadsPerMultiProcessor() const;
1302
1303 //! gets free and total device memory
1304 CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
1305 CV_WRAP size_t freeMemory() const;
1306 CV_WRAP size_t totalMemory() const;
1307
1308 /** @brief Provides information on CUDA feature support.
1309
1310 @param feature_set Features to be checked. See cuda::FeatureSet.
1311
1312 This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
1313 */
1314 bool supports(FeatureSet feature_set) const;
1315
1316 /** @brief Checks the CUDA module and device compatibility.
1317
1318 This function returns true if the CUDA module can be run on the specified device. Otherwise, it
1319 returns false .
1320 */
1321 CV_WRAP bool isCompatible() const;
1322
1323private:
1324 int device_id_;
1325};
1326
1327CV_EXPORTS_W void printCudaDeviceInfo(int device);
1328CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
1329
1330/** @brief Converts an array to half precision floating number.
1331
1332@param _src input array.
1333@param _dst output array.
1334@param stream Stream for the asynchronous version.
1335@sa convertFp16
1336*/
1337CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
1338
1339//! @} cudacore_init
1340
1341}} // namespace cv { namespace cuda {
1342
1343
1344#include "opencv2/core/cuda.inl.hpp"
1345
1346#endif /* OPENCV_CORE_CUDA_HPP */
1347

source code of opencv/modules/core/include/opencv2/core/cuda.hpp