1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21// * Redistribution's of source code must retain the above copyright notice,
22// this list of conditions and the following disclaimer.
23//
24// * Redistribution's in binary form must reproduce the above copyright notice,
25// this list of conditions and the following disclaimer in the documentation
26// and/or other materials provided with the distribution.
27//
28// * The name of the copyright holders may not be used to endorse or promote products
29// derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44#ifndef OPENCV_CORE_CUDA_HPP
45#define OPENCV_CORE_CUDA_HPP
46
47#ifndef __cplusplus
48# error cuda.hpp header must be compiled as C++
49#endif
50
51#include "opencv2/core.hpp"
52#include "opencv2/core/cuda_types.hpp"
53
54/**
55 @defgroup cuda CUDA-accelerated Computer Vision
56 @{
57 @defgroup cudacore Core part
58 @{
59 @defgroup cudacore_init Initialization and Information
60 @defgroup cudacore_struct Data Structures
61 @}
62 @}
63 */
64
65namespace cv { namespace cuda {
66
67//! @addtogroup cudacore_struct
68//! @{
69
70//===================================================================================
71// GpuMat
72//===================================================================================
73
74/** @brief Base storage class for GPU memory with reference counting.
75
76Its interface matches the Mat interface with the following limitations:
77
78- no arbitrary dimensions support (only 2D)
79- no functions that return references to their data (because references on GPU are not valid for
80 CPU)
81- no expression templates technique support
82
83Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85passed directly to the kernel.
86
87@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
89
90@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92release function returns error if the CUDA context has been destroyed before.
93
94Some member functions are described as a "Blocking Call" while some are described as a
95"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
96operation is finished when the function returns. However, non-blocking functions are asynchronous to
97host. Those functions may return even if the GPU operation is not finished.
98
99Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
100argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
101streams.
102
103@sa Mat
104 */
105class CV_EXPORTS_W GpuMat
106{
107public:
108 class CV_EXPORTS_W Allocator
109 {
110 public:
111 virtual ~Allocator() {}
112
113 // allocator must fill data, step and refcount fields
114 virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
115 virtual void free(GpuMat* mat) = 0;
116 };
117
118 //! default allocator
119 CV_WRAP static GpuMat::Allocator* defaultAllocator();
120 CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
121
122 //! default constructor
123 CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
124
125 //! constructs GpuMat of the specified size and type
126 CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
127 CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
128
129 //! constructs GpuMat and fills it with the specified value _s
130 CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
131 CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
132
133 //! copy constructor
134 CV_WRAP GpuMat(const GpuMat& m);
135
136 //! constructor for GpuMat headers pointing to user-allocated data
137 GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
138 GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
139
140 //! creates a GpuMat header for a part of the bigger matrix
141 CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
142 CV_WRAP GpuMat(const GpuMat& m, Rect roi);
143
144 //! builds GpuMat from host memory (Blocking call)
145 CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
146
147 //! destructor - calls release()
148 ~GpuMat();
149
150 //! assignment operators
151 GpuMat& operator =(const GpuMat& m);
152
153 //! allocates new GpuMat data unless the GpuMat already has specified size and type
154 CV_WRAP void create(int rows, int cols, int type);
155 CV_WRAP void create(Size size, int type);
156
157 //! decreases reference counter, deallocate the data when reference counter reaches 0
158 CV_WRAP void release();
159
160 //! swaps with other smart pointer
161 CV_WRAP void swap(GpuMat& mat);
162
163 /** @brief Performs data upload to GpuMat (Blocking call)
164
165 This function copies data from host memory to device memory. As being a blocking call, it is
166 guaranteed that the copy operation is finished when this function returns.
167 */
168 CV_WRAP void upload(InputArray arr);
169
170 /** @brief Performs data upload to GpuMat (Non-Blocking call)
171
172 This function copies data from host memory to device memory. As being a non-blocking call, this
173 function may return even if the copy operation is not finished.
174
175 The copy operation may be overlapped with operations in other non-default streams if \p stream is
176 not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
177 */
178 CV_WRAP void upload(InputArray arr, Stream& stream);
179
180 /** @brief Performs data download from GpuMat (Blocking call)
181
182 This function copies data from device memory to host memory. As being a blocking call, it is
183 guaranteed that the copy operation is finished when this function returns.
184 */
185 CV_WRAP void download(OutputArray dst) const;
186
187 /** @brief Performs data download from GpuMat (Non-Blocking call)
188
189 This function copies data from device memory to host memory. As being a non-blocking call, this
190 function may return even if the copy operation is not finished.
191
192 The copy operation may be overlapped with operations in other non-default streams if \p stream is
193 not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
194 */
195 CV_WRAP void download(OutputArray dst, Stream& stream) const;
196
197 //! returns deep copy of the GpuMat, i.e. the data is copied
198 CV_WRAP GpuMat clone() const;
199
200 //! copies the GpuMat content to device memory (Blocking call)
201 void copyTo(OutputArray dst) const;
202 //! bindings overload which copies the GpuMat content to device memory (Blocking call)
203 CV_WRAP void copyTo(CV_OUT GpuMat& dst) const {
204 copyTo(dst: static_cast<OutputArray>(dst));
205 }
206
207 //! copies the GpuMat content to device memory (Non-Blocking call)
208 void copyTo(OutputArray dst, Stream& stream) const;
209 //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call)
210 CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const {
211 copyTo(dst: static_cast<OutputArray>(dst), stream);
212 }
213
214 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
215 void copyTo(OutputArray dst, InputArray mask) const;
216 //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
217 CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const {
218 copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask));
219 }
220
221 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
222 void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
223 //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
224 CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const {
225 copyTo(dst: static_cast<OutputArray>(dst), mask: static_cast<InputArray>(mask), stream);
226 }
227
228 //! sets some of the GpuMat elements to s (Blocking call)
229 CV_WRAP GpuMat& setTo(Scalar s);
230
231 //! sets some of the GpuMat elements to s (Non-Blocking call)
232 CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
233
234 //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
235 CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
236
237 //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
238 CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
239
240 //! converts GpuMat to another datatype (Blocking call)
241 void convertTo(OutputArray dst, int rtype) const;
242
243 //! converts GpuMat to another datatype (Non-Blocking call)
244 void convertTo(OutputArray dst, int rtype, Stream& stream) const;
245 //! bindings overload which converts GpuMat to another datatype (Non-Blocking call)
246 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const {
247 convertTo(dst: static_cast<OutputArray>(dst), rtype, stream);
248 }
249
250 //! converts GpuMat to another datatype with scaling (Blocking call)
251 void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
252 //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call)
253 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = 1.0, double beta = 0.0) const {
254 convertTo(dst: static_cast<OutputArray>(dst), rtype, alpha, beta);
255 }
256
257 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
258 void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
259
260 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
261 void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
262 //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call)
263 CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const {
264 convertTo(dst: static_cast<OutputArray>(dst), rtype, alpha, beta, stream);
265 }
266
267 CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
268
269 //! returns pointer to y-th row
270 uchar* ptr(int y = 0);
271 const uchar* ptr(int y = 0) const;
272
273 //! template version of the above method
274 template<typename _Tp> _Tp* ptr(int y = 0);
275 template<typename _Tp> const _Tp* ptr(int y = 0) const;
276
277 template <typename _Tp> operator PtrStepSz<_Tp>() const;
278 template <typename _Tp> operator PtrStep<_Tp>() const;
279
280 //! returns a new GpuMat header for the specified row
281 CV_WRAP GpuMat row(int y) const;
282
283 //! returns a new GpuMat header for the specified column
284 CV_WRAP GpuMat col(int x) const;
285
286 //! ... for the specified row span
287 CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
288 CV_WRAP GpuMat rowRange(Range r) const;
289
290 //! ... for the specified column span
291 CV_WRAP GpuMat colRange(int startcol, int endcol) const;
292 CV_WRAP GpuMat colRange(Range r) const;
293
294 //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
295 GpuMat operator ()(Range rowRange, Range colRange) const;
296 GpuMat operator ()(Rect roi) const;
297
298 //! creates alternative GpuMat header for the same data, with different
299 //! number of channels and/or different number of rows
300 CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
301
302 //! locates GpuMat header within a parent GpuMat
303 CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
304
305 //! moves/resizes the current GpuMat ROI inside the parent GpuMat
306 CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
307
308 //! returns true iff the GpuMat data is continuous
309 //! (i.e. when there are no gaps between successive rows)
310 CV_WRAP bool isContinuous() const;
311
312 //! returns element size in bytes
313 CV_WRAP size_t elemSize() const;
314
315 //! returns the size of element channel in bytes
316 CV_WRAP size_t elemSize1() const;
317
318 //! returns element type
319 CV_WRAP int type() const;
320
321 //! returns element type
322 CV_WRAP int depth() const;
323
324 //! returns number of channels
325 CV_WRAP int channels() const;
326
327 //! returns step/elemSize1()
328 CV_WRAP size_t step1() const;
329
330 //! returns GpuMat size : width == number of columns, height == number of rows
331 CV_WRAP Size size() const;
332
333 //! returns true if GpuMat data is NULL
334 CV_WRAP bool empty() const;
335
336 // returns pointer to cuda memory
337 CV_WRAP void* cudaPtr() const;
338
339 //! internal use method: updates the continuity flag
340 CV_WRAP void updateContinuityFlag();
341
342 /*! includes several bit-fields:
343 - the magic signature
344 - continuity flag
345 - depth
346 - number of channels
347 */
348 int flags;
349
350 //! the number of rows and columns
351 int rows, cols;
352
353 //! a distance between successive rows in bytes; includes the gap if any
354 CV_PROP size_t step;
355
356 //! pointer to the data
357 uchar* data;
358
359 //! pointer to the reference counter;
360 //! when GpuMat points to user-allocated data, the pointer is NULL
361 int* refcount;
362
363 //! helper fields used in locateROI and adjustROI
364 uchar* datastart;
365 const uchar* dataend;
366
367 //! allocator
368 Allocator* allocator;
369};
370
371struct CV_EXPORTS_W GpuData
372{
373 explicit GpuData(size_t _size);
374 ~GpuData();
375
376 GpuData(const GpuData&) = delete;
377 GpuData& operator=(const GpuData&) = delete;
378
379 GpuData(GpuData&&) = delete;
380 GpuData& operator=(GpuData&&) = delete;
381
382 uchar* data;
383 size_t size;
384};
385
386class CV_EXPORTS_W GpuMatND
387{
388public:
389 using SizeArray = std::vector<int>;
390 using StepArray = std::vector<size_t>;
391 using IndexArray = std::vector<int>;
392
393 //! destructor
394 ~GpuMatND();
395
396 //! default constructor
397 GpuMatND();
398
399 /** @overload
400 @param size Array of integers specifying an n-dimensional array shape.
401 @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
402 CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
403 */
404 GpuMatND(SizeArray size, int type);
405
406 /** @overload
407 @param size Array of integers specifying an n-dimensional array shape.
408 @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
409 CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
410 @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
411 allocate matrix data. Instead, they just initialize the matrix header that points to the specified
412 data, which means that no data is copied. This operation is very efficient and can be used to
413 process external data using OpenCV functions. The external data is not automatically deallocated, so
414 you should take care of it.
415 @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always
416 set to the element size). If not specified, the matrix is assumed to be continuous.
417 */
418 GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray());
419
420 /** @brief Allocates GPU memory.
421 Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
422 GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
423 the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
424 (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
425 this method is always continuous and is not a sub-region of another GpuMatND.
426 */
427 void create(SizeArray size, int type);
428
429 void release();
430
431 void swap(GpuMatND& m) noexcept;
432
433 /** @brief Creates a full copy of the array and the underlying data.
434 The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
435 the original step is not taken into account. So, the array copy is a continuous array
436 occupying total()\*elemSize() bytes.
437 */
438 GpuMatND clone() const;
439
440 /** @overload
441 This overload is non-blocking, so it may return even if the copy operation is not finished.
442 */
443 GpuMatND clone(Stream& stream) const;
444
445 /** @brief Extracts a sub-matrix.
446 The operator makes a new header for the specified sub-array of \*this.
447 The operator is an O(1) operation, that is, no matrix data is copied.
448 @param ranges Array of selected ranges along each dimension.
449 */
450 GpuMatND operator()(const std::vector<Range>& ranges) const;
451
452 /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.
453 @note The returned GpuMat is constructed with the constructor for user-allocated data.
454 That is, It does not perform reference counting.
455 @note This function does not increment this GpuMatND's reference counter.
456 */
457 GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
458
459 /** @overload
460 Creates a GpuMat header if this GpuMatND is effectively 2D.
461 @note The returned GpuMat is constructed with the constructor for user-allocated data.
462 That is, It does not perform reference counting.
463 @note This function does not increment this GpuMatND's reference counter.
464 */
465 GpuMat createGpuMatHeader() const;
466
467 /** @brief Extracts a 2D plane part of an n-dim matrix.
468 It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
469 GpuMatND to the returned GpuMat.
470 @note This operator does not increment this GpuMatND's reference counter;
471 */
472 GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
473
474 /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.
475 It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
476 @note This operator does not increment this GpuMatND's reference counter;
477 */
478 operator GpuMat() const;
479
480 GpuMatND(const GpuMatND&) = default;
481 GpuMatND& operator=(const GpuMatND&) = default;
482
483#if defined(__GNUC__) && __GNUC__ < 5
484 // error: function '...' defaulted on its first declaration with an exception-specification
485 // that differs from the implicit declaration '...'
486
487 GpuMatND(GpuMatND&&) = default;
488 GpuMatND& operator=(GpuMatND&&) = default;
489#else
490 GpuMatND(GpuMatND&&) noexcept = default;
491 GpuMatND& operator=(GpuMatND&&) noexcept = default;
492#endif
493
494 void upload(InputArray src);
495 void upload(InputArray src, Stream& stream);
496 void download(OutputArray dst) const;
497 void download(OutputArray dst, Stream& stream) const;
498
499 //! returns true iff the GpuMatND data is continuous
500 //! (i.e. when there are no gaps between successive rows)
501 bool isContinuous() const;
502
503 //! returns true if the matrix is a sub-matrix of another matrix
504 bool isSubmatrix() const;
505
506 //! returns element size in bytes
507 size_t elemSize() const;
508
509 //! returns the size of element channel in bytes
510 size_t elemSize1() const;
511
512 //! returns true if data is null
513 bool empty() const;
514
515 //! returns true if not empty and points to external(user-allocated) gpu memory
516 bool external() const;
517
518 //! returns pointer to the first byte of the GPU memory
519 uchar* getDevicePtr() const;
520
521 //! returns the total number of array elements
522 size_t total() const;
523
524 //! returns the size of underlying memory in bytes
525 size_t totalMemSize() const;
526
527 //! returns element type
528 int type() const;
529
530private:
531 //! internal use
532 void setFields(SizeArray size, int type, StepArray step = StepArray());
533
534public:
535 /*! includes several bit-fields:
536 - the magic signature
537 - continuity flag
538 - depth
539 - number of channels
540 */
541 int flags;
542
543 //! matrix dimensionality
544 int dims;
545
546 //! shape of this array
547 SizeArray size;
548
549 /*! step values
550 Their semantics is identical to the semantics of step for Mat.
551 */
552 StepArray step;
553
554private:
555 /*! internal use
556 If this GpuMatND holds external memory, this is empty.
557 */
558 std::shared_ptr<GpuData> data_;
559
560 /*! internal use
561 If this GpuMatND manages memory with reference counting, this value is
562 always equal to data_->data. If this GpuMatND holds external memory,
563 data_ is empty and data points to the external memory.
564 */
565 uchar* data;
566
567 /*! internal use
568 If this GpuMatND is a sub-matrix of a larger matrix, this value is the
569 difference of the first byte between the sub-matrix and the whole matrix.
570 */
571 size_t offset;
572};
573
574/** @brief Creates a continuous matrix.
575
576@param rows Row count.
577@param cols Column count.
578@param type Type of the matrix.
579@param arr Destination matrix. This parameter changes only if it has a proper type and area (
580\f$\texttt{rows} \times \texttt{cols}\f$ ).
581
582Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
583end of each row.
584 */
585CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
586
587/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
588
589@param rows Minimum desired number of rows.
590@param cols Minimum desired number of columns.
591@param type Desired matrix type.
592@param arr Destination matrix.
593
594The function does not reallocate memory if the matrix has proper attributes already.
595 */
596CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
597
598/** @brief Bindings overload to create a GpuMat from existing GPU memory.
599@param rows Row count.
600@param cols Column count.
601@param type Type of the matrix.
602@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
603@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
604@note Overload for generation of bindings only, not exported or intended for use internally from C++.
605 */
606CV_EXPORTS_W GpuMat inline createGpuMatFromCudaMemory(int rows, int cols, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
607 return GpuMat(rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
608}
609
610 /** @overload
611@param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
612@param type Type of the matrix.
613@param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
614@param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
615@note Overload for generation of bindings only, not exported or intended for use internally from C++.
616 */
617CV_EXPORTS_W inline GpuMat createGpuMatFromCudaMemory(Size size, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
618 return GpuMat(size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
619}
620
621/** @brief BufferPool for use with CUDA streams
622
623BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
624only useful when enabled with #setBufferPoolUsage.
625
626@code
627 setBufferPoolUsage(true);
628@endcode
629
630@note #setBufferPoolUsage must be called \em before any Stream declaration.
631
632Users may specify custom allocator for Stream and may implement their own stream based
633functions utilizing the same underlying GPU memory management.
634
635If custom allocator is not specified, BufferPool utilizes StackAllocator by
636default. StackAllocator allocates a chunk of GPU device memory beforehand,
637and when GpuMat is declared later on, it is given the pre-allocated memory.
638This kind of strategy reduces the number of calls for memory allocating APIs
639such as cudaMalloc or cudaMallocPitch.
640
641Below is an example that utilizes BufferPool with StackAllocator:
642
643@code
644 #include <opencv2/opencv.hpp>
645
646 using namespace cv;
647 using namespace cv::cuda
648
649 int main()
650 {
651 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
652 setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
653
654 Stream stream1, stream2; // Each stream uses 1 stack
655 BufferPool pool1(stream1), pool2(stream2);
656
657 GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
658 GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
659
660 GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
661 GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
662
663 cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
664 cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
665 }
666@endcode
667
668If we allocate another GpuMat on pool1 in the above example, it will be carried out by
669the DefaultAllocator since the stack for pool1 is full.
670
671@code
672 GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
673@endcode
674
675If a third stream is declared in the above example, allocating with #getBuffer
676within that stream will also be carried out by the DefaultAllocator because we've run out of
677stacks.
678
679@code
680 Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
681 BufferPool pool3(stream3);
682 GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
683@endcode
684
685@warning When utilizing StackAllocator, deallocation order is important.
686
687Just like a stack, deallocation must be done in LIFO order. Below is an example of
688erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
689sample code will emit CV_Assert error.
690
691@code
692 int main()
693 {
694 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
695 Stream stream; // A default size (10 MB) stack is allocated to this stream
696 BufferPool pool(stream);
697
698 GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
699 GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
700
701 mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
702 }
703@endcode
704
705Since C++ local variables are destroyed in the reverse order of construction,
706the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
707and the corresponding memory is automatically returned to the pool for later usage.
708
709@code
710 int main()
711 {
712 setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
713 setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
714
715 Stream stream1, stream2; // Each stream uses 1 stack
716 BufferPool pool1(stream1), pool2(stream2);
717
718 for (int i = 0; i < 10; i++)
719 {
720 GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
721 GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
722
723 GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
724 GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
725
726 d_src1.setTo(Scalar(i), stream1);
727 d_src2.setTo(Scalar(i), stream2);
728
729 cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
730 cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
731 // The order of destruction of the local variables is:
732 // d_dst2 => d_src2 => d_dst1 => d_src1
733 // LIFO rule is satisfied, this code runs without error
734 }
735 }
736@endcode
737 */
738class CV_EXPORTS_W BufferPool
739{
740public:
741
742 //! Gets the BufferPool for the given stream.
743 CV_WRAP explicit BufferPool(Stream& stream);
744
745 //! Allocates a new GpuMat of given size and type.
746 CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
747
748// WARNING: unreachable code using Ninja
749#if defined _MSC_VER && _MSC_VER >= 1920
750#pragma warning(push)
751#pragma warning(disable: 4702)
752#endif
753 //! Allocates a new GpuMat of given size and type.
754 CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(rows: size.height, cols: size.width, type); }
755#if defined _MSC_VER && _MSC_VER >= 1920
756#pragma warning(pop)
757#endif
758
759 //! Returns the allocator associated with the stream.
760 CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
761
762private:
763 Ptr<GpuMat::Allocator> allocator_;
764};
765
766//! BufferPool management (must be called before Stream creation)
767CV_EXPORTS_W void setBufferPoolUsage(bool on);
768CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
769
770//===================================================================================
771// HostMem
772//===================================================================================
773
774/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
775
776Its interface is also Mat-like but with additional memory type parameters.
777
778- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
779 uploading/downloading data from/to GPU.
780- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
781 address space, if supported.
782- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
783 used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
784 utilization.
785
786@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
787Pinned Memory APIs* document or *CUDA C Programming Guide*.
788 */
789class CV_EXPORTS_W HostMem
790{
791public:
792 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
793
794 static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
795
796 CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
797
798 HostMem(const HostMem& m);
799
800 CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
801 CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
802
803 //! creates from host memory with coping data
804 CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
805
806 ~HostMem();
807
808 HostMem& operator =(const HostMem& m);
809
810 //! swaps with other smart pointer
811 CV_WRAP void swap(HostMem& b);
812
813 //! returns deep copy of the matrix, i.e. the data is copied
814 CV_WRAP HostMem clone() const;
815
816 //! allocates new matrix data unless the matrix already has specified size and type.
817 CV_WRAP void create(int rows, int cols, int type);
818 void create(Size size, int type);
819
820 //! creates alternative HostMem header for the same data, with different
821 //! number of channels and/or different number of rows
822 CV_WRAP HostMem reshape(int cn, int rows = 0) const;
823
824 //! decrements reference counter and released memory if needed.
825 void release();
826
827 //! returns matrix header with disabled reference counting for HostMem data.
828 CV_WRAP Mat createMatHeader() const;
829
830 /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
831 for it.
832
833 This can be done only if memory was allocated with the SHARED flag and if it is supported by the
834 hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
835 eliminates an extra copy.
836 */
837 GpuMat createGpuMatHeader() const;
838
839 // Please see cv::Mat for descriptions
840 CV_WRAP bool isContinuous() const;
841 CV_WRAP size_t elemSize() const;
842 CV_WRAP size_t elemSize1() const;
843 CV_WRAP int type() const;
844 CV_WRAP int depth() const;
845 CV_WRAP int channels() const;
846 CV_WRAP size_t step1() const;
847 CV_WRAP Size size() const;
848 CV_WRAP bool empty() const;
849
850 // Please see cv::Mat for descriptions
851 int flags;
852 int rows, cols;
853 CV_PROP size_t step;
854
855 uchar* data;
856 int* refcount;
857
858 uchar* datastart;
859 const uchar* dataend;
860
861 AllocType alloc_type;
862};
863
864/** @brief Page-locks the memory of matrix and maps it for the device(s).
865
866@param m Input matrix.
867 */
868CV_EXPORTS_W void registerPageLocked(Mat& m);
869
870/** @brief Unmaps the memory of matrix and makes it pageable again.
871
872@param m Input matrix.
873 */
874CV_EXPORTS_W void unregisterPageLocked(Mat& m);
875
876//===================================================================================
877// Stream
878//===================================================================================
879
880/** @brief This class encapsulates a queue of asynchronous calls.
881
882@note Currently, you may face problems if an operation is enqueued twice with different data. Some
883functions use the constant GPU memory, and next call may update the memory before the previous one
884has been finished. But calling different operations asynchronously is safe because each operation
885has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
886also safe.
887
888@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
889
890@code
891void thread1()
892{
893 cv::cuda::Stream stream1;
894 cv::cuda::func1(..., stream1);
895}
896
897void thread2()
898{
899 cv::cuda::Stream stream2;
900 cv::cuda::func2(..., stream2);
901}
902@endcode
903
904@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
905In multi-threading environment the stream objects must be passed explicitly (see previous note).
906 */
907class CV_EXPORTS_W Stream
908{
909 typedef void (Stream::*bool_type)() const;
910 void this_type_does_not_support_comparisons() const {}
911
912public:
913 typedef void (*StreamCallback)(int status, void* userData);
914
915 //! creates a new asynchronous stream
916 CV_WRAP Stream();
917
918 //! creates a new asynchronous stream with custom allocator
919 CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
920
921 /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream
922
923 @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and
924 supports the same parameter values.
925 @code
926 // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking,
927 // non-default CUDA stream
928 cv::cuda::Stream cvStream(cudaStreamNonBlocking);
929 @endcode
930 */
931 CV_WRAP Stream(const size_t cudaFlags);
932
933 /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
934 */
935 CV_WRAP bool queryIfComplete() const;
936
937 /** @brief Blocks the current CPU thread until all operations in the stream are complete.
938 */
939 CV_WRAP void waitForCompletion();
940
941 /** @brief Makes a compute stream wait on an event.
942 */
943 CV_WRAP void waitEvent(const Event& event);
944
945 /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
946 completed.
947
948 @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
949 that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
950 Callbacks without a mandated order (in independent streams) execute in undefined order and may be
951 serialized.
952 */
953 void enqueueHostCallback(StreamCallback callback, void* userData);
954
955 //! return Stream object for default CUDA stream
956 CV_WRAP static Stream& Null();
957
958 //! returns true if stream object is not default (!= 0)
959 operator bool_type() const;
960
961 //! return Pointer to CUDA stream
962 CV_WRAP void* cudaPtr() const;
963
964 class Impl;
965
966private:
967 Ptr<Impl> impl_;
968 Stream(const Ptr<Impl>& impl);
969
970 friend struct StreamAccessor;
971 friend class BufferPool;
972 friend class DefaultDeviceInitializer;
973};
974
975
976/** @brief Bindings overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t).
977@param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform any allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
978@note Overload for generation of bindings only, not exported or intended for use internally from C++.
979 */
980CV_EXPORTS_W Stream wrapStream(size_t cudaStreamMemoryAddress);
981
982class CV_EXPORTS_W Event
983{
984public:
985 enum CreateFlags
986 {
987 DEFAULT = 0x00, /**< Default event flag */
988 BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
989 DISABLE_TIMING = 0x02, /**< Event will not record timing data */
990 INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
991 };
992
993 CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
994
995 //! records an event
996 CV_WRAP void record(Stream& stream = Stream::Null());
997
998 //! queries an event's status
999 CV_WRAP bool queryIfComplete() const;
1000
1001 //! waits for an event to complete
1002 CV_WRAP void waitForCompletion();
1003
1004 //! computes the elapsed time between events
1005 CV_WRAP static float elapsedTime(const Event& start, const Event& end);
1006
1007 class Impl;
1008
1009private:
1010 Ptr<Impl> impl_;
1011 Event(const Ptr<Impl>& impl);
1012
1013 friend struct EventAccessor;
1014};
1015CV_ENUM_FLAGS(Event::CreateFlags)
1016
1017//! @} cudacore_struct
1018
1019//===================================================================================
1020// Initialization & Info
1021//===================================================================================
1022
1023//! @addtogroup cudacore_init
1024//! @{
1025
1026/** @brief Returns the number of installed CUDA-enabled devices.
1027
1028Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
1029this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
1030returns -1.
1031 */
1032CV_EXPORTS_W int getCudaEnabledDeviceCount();
1033
1034/** @brief Sets a device and initializes it for the current thread.
1035
1036@param device System index of a CUDA device starting with 0.
1037
1038If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
1039 */
1040CV_EXPORTS_W void setDevice(int device);
1041
1042/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
1043 */
1044CV_EXPORTS_W int getDevice();
1045
1046/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
1047process.
1048
1049Any subsequent API call to this device will reinitialize the device.
1050 */
1051CV_EXPORTS_W void resetDevice();
1052
1053/** @brief Enumeration providing CUDA computing features.
1054 */
1055enum FeatureSet
1056{
1057 FEATURE_SET_COMPUTE_10 = 10,
1058 FEATURE_SET_COMPUTE_11 = 11,
1059 FEATURE_SET_COMPUTE_12 = 12,
1060 FEATURE_SET_COMPUTE_13 = 13,
1061 FEATURE_SET_COMPUTE_20 = 20,
1062 FEATURE_SET_COMPUTE_21 = 21,
1063 FEATURE_SET_COMPUTE_30 = 30,
1064 FEATURE_SET_COMPUTE_32 = 32,
1065 FEATURE_SET_COMPUTE_35 = 35,
1066 FEATURE_SET_COMPUTE_50 = 50,
1067
1068 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1069 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1070 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1071 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1072 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1073};
1074
1075//! checks whether current device supports the given feature
1076CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
1077
1078/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
1079built for.
1080
1081According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
1082capability can always be compiled to binary code of greater or equal compute capability".
1083 */
1084class CV_EXPORTS_W TargetArchs
1085{
1086public:
1087 /** @brief The following method checks whether the module was built with the support of the given feature:
1088
1089 @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
1090 */
1091 static bool builtWith(FeatureSet feature_set);
1092
1093 /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
1094 code for the given architecture(s):
1095
1096 @param major Major compute capability version.
1097 @param minor Minor compute capability version.
1098 */
1099 CV_WRAP static bool has(int major, int minor);
1100 CV_WRAP static bool hasPtx(int major, int minor);
1101 CV_WRAP static bool hasBin(int major, int minor);
1102
1103 CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
1104 CV_WRAP static bool hasEqualOrGreater(int major, int minor);
1105 CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
1106 CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
1107};
1108
1109/** @brief Class providing functionality for querying the specified GPU properties.
1110 */
1111class CV_EXPORTS_W DeviceInfo
1112{
1113public:
1114 //! creates DeviceInfo object for the current GPU
1115 CV_WRAP DeviceInfo();
1116
1117 /** @brief The constructors.
1118
1119 @param device_id System index of the CUDA device starting with 0.
1120
1121 Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
1122 constructs an object for the current device.
1123 */
1124 CV_WRAP DeviceInfo(int device_id);
1125
1126 /** @brief Returns system index of the CUDA device starting with 0.
1127 */
1128 CV_WRAP int deviceID() const;
1129
1130 //! ASCII string identifying device
1131 const char* name() const;
1132
1133 //! global memory available on device in bytes
1134 CV_WRAP size_t totalGlobalMem() const;
1135
1136 //! shared memory available per block in bytes
1137 CV_WRAP size_t sharedMemPerBlock() const;
1138
1139 //! 32-bit registers available per block
1140 CV_WRAP int regsPerBlock() const;
1141
1142 //! warp size in threads
1143 CV_WRAP int warpSize() const;
1144
1145 //! maximum pitch in bytes allowed by memory copies
1146 CV_WRAP size_t memPitch() const;
1147
1148 //! maximum number of threads per block
1149 CV_WRAP int maxThreadsPerBlock() const;
1150
1151 //! maximum size of each dimension of a block
1152 CV_WRAP Vec3i maxThreadsDim() const;
1153
1154 //! maximum size of each dimension of a grid
1155 CV_WRAP Vec3i maxGridSize() const;
1156
1157 //! clock frequency in kilohertz
1158 CV_WRAP int clockRate() const;
1159
1160 //! constant memory available on device in bytes
1161 CV_WRAP size_t totalConstMem() const;
1162
1163 //! major compute capability
1164 CV_WRAP int majorVersion() const;
1165
1166 //! minor compute capability
1167 CV_WRAP int minorVersion() const;
1168
1169 //! alignment requirement for textures
1170 CV_WRAP size_t textureAlignment() const;
1171
1172 //! pitch alignment requirement for texture references bound to pitched memory
1173 CV_WRAP size_t texturePitchAlignment() const;
1174
1175 //! number of multiprocessors on device
1176 CV_WRAP int multiProcessorCount() const;
1177
1178 //! specified whether there is a run time limit on kernels
1179 CV_WRAP bool kernelExecTimeoutEnabled() const;
1180
1181 //! device is integrated as opposed to discrete
1182 CV_WRAP bool integrated() const;
1183
1184 //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
1185 CV_WRAP bool canMapHostMemory() const;
1186
1187 enum ComputeMode
1188 {
1189 ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
1190 ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
1191 ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
1192 ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
1193 };
1194
1195 //! compute mode
1196 CV_WRAP DeviceInfo::ComputeMode computeMode() const;
1197
1198 //! maximum 1D texture size
1199 CV_WRAP int maxTexture1D() const;
1200
1201 //! maximum 1D mipmapped texture size
1202 CV_WRAP int maxTexture1DMipmap() const;
1203
1204 //! maximum size for 1D textures bound to linear memory
1205 CV_WRAP int maxTexture1DLinear() const;
1206
1207 //! maximum 2D texture dimensions
1208 CV_WRAP Vec2i maxTexture2D() const;
1209
1210 //! maximum 2D mipmapped texture dimensions
1211 CV_WRAP Vec2i maxTexture2DMipmap() const;
1212
1213 //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
1214 CV_WRAP Vec3i maxTexture2DLinear() const;
1215
1216 //! maximum 2D texture dimensions if texture gather operations have to be performed
1217 CV_WRAP Vec2i maxTexture2DGather() const;
1218
1219 //! maximum 3D texture dimensions
1220 CV_WRAP Vec3i maxTexture3D() const;
1221
1222 //! maximum Cubemap texture dimensions
1223 CV_WRAP int maxTextureCubemap() const;
1224
1225 //! maximum 1D layered texture dimensions
1226 CV_WRAP Vec2i maxTexture1DLayered() const;
1227
1228 //! maximum 2D layered texture dimensions
1229 CV_WRAP Vec3i maxTexture2DLayered() const;
1230
1231 //! maximum Cubemap layered texture dimensions
1232 CV_WRAP Vec2i maxTextureCubemapLayered() const;
1233
1234 //! maximum 1D surface size
1235 CV_WRAP int maxSurface1D() const;
1236
1237 //! maximum 2D surface dimensions
1238 CV_WRAP Vec2i maxSurface2D() const;
1239
1240 //! maximum 3D surface dimensions
1241 CV_WRAP Vec3i maxSurface3D() const;
1242
1243 //! maximum 1D layered surface dimensions
1244 CV_WRAP Vec2i maxSurface1DLayered() const;
1245
1246 //! maximum 2D layered surface dimensions
1247 CV_WRAP Vec3i maxSurface2DLayered() const;
1248
1249 //! maximum Cubemap surface dimensions
1250 CV_WRAP int maxSurfaceCubemap() const;
1251
1252 //! maximum Cubemap layered surface dimensions
1253 CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
1254
1255 //! alignment requirements for surfaces
1256 CV_WRAP size_t surfaceAlignment() const;
1257
1258 //! device can possibly execute multiple kernels concurrently
1259 CV_WRAP bool concurrentKernels() const;
1260
1261 //! device has ECC support enabled
1262 CV_WRAP bool ECCEnabled() const;
1263
1264 //! PCI bus ID of the device
1265 CV_WRAP int pciBusID() const;
1266
1267 //! PCI device ID of the device
1268 CV_WRAP int pciDeviceID() const;
1269
1270 //! PCI domain ID of the device
1271 CV_WRAP int pciDomainID() const;
1272
1273 //! true if device is a Tesla device using TCC driver, false otherwise
1274 CV_WRAP bool tccDriver() const;
1275
1276 //! number of asynchronous engines
1277 CV_WRAP int asyncEngineCount() const;
1278
1279 //! device shares a unified address space with the host
1280 CV_WRAP bool unifiedAddressing() const;
1281
1282 //! peak memory clock frequency in kilohertz
1283 CV_WRAP int memoryClockRate() const;
1284
1285 //! global memory bus width in bits
1286 CV_WRAP int memoryBusWidth() const;
1287
1288 //! size of L2 cache in bytes
1289 CV_WRAP int l2CacheSize() const;
1290
1291 //! maximum resident threads per multiprocessor
1292 CV_WRAP int maxThreadsPerMultiProcessor() const;
1293
1294 //! gets free and total device memory
1295 CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
1296 CV_WRAP size_t freeMemory() const;
1297 CV_WRAP size_t totalMemory() const;
1298
1299 /** @brief Provides information on CUDA feature support.
1300
1301 @param feature_set Features to be checked. See cuda::FeatureSet.
1302
1303 This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
1304 */
1305 bool supports(FeatureSet feature_set) const;
1306
1307 /** @brief Checks the CUDA module and device compatibility.
1308
1309 This function returns true if the CUDA module can be run on the specified device. Otherwise, it
1310 returns false .
1311 */
1312 CV_WRAP bool isCompatible() const;
1313
1314private:
1315 int device_id_;
1316};
1317
1318CV_EXPORTS_W void printCudaDeviceInfo(int device);
1319CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
1320
1321/** @brief Converts an array to half precision floating number.
1322
1323@param _src input array.
1324@param _dst output array.
1325@param stream Stream for the asynchronous version.
1326@sa convertFp16
1327*/
1328CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
1329
1330//! @} cudacore_init
1331
1332}} // namespace cv { namespace cuda {
1333
1334
1335#include "opencv2/core/cuda.inl.hpp"
1336
1337#endif /* OPENCV_CORE_CUDA_HPP */
1338

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of opencv/modules/core/include/opencv2/core/cuda.hpp