1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
---|---|
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
14 | // Third party copyrights are property of their respective owners. |
15 | // |
16 | // Redistribution and use in source and binary forms, with or without modification, |
17 | // are permitted provided that the following conditions are met: |
18 | // |
19 | // * Redistribution's of source code must retain the above copyright notice, |
20 | // this list of conditions and the following disclaimer. |
21 | // |
22 | // * Redistribution's in binary form must reproduce the above copyright notice, |
23 | // this list of conditions and the following disclaimer in the documentation |
24 | // and/or other materials provided with the distribution. |
25 | // |
26 | // * The name of the copyright holders may not be used to endorse or promote products |
27 | // derived from this software without specific prior written permission. |
28 | // |
29 | // This software is provided by the copyright holders and contributors "as is" and |
30 | // any express or implied warranties, including, but not limited to, the implied |
31 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
32 | // In no event shall the OpenCV Foundation or contributors be liable for any direct, |
33 | // indirect, incidental, special, exemplary, or consequential damages |
34 | // (including, but not limited to, procurement of substitute goods or services; |
35 | // loss of use, data, or profits; or business interruption) however caused |
36 | // and on any theory of liability, whether in contract, strict liability, |
37 | // or tort (including negligence or otherwise) arising in any way out of |
38 | // the use of this software, even if advised of the possibility of such damage. |
39 | // |
40 | //M*/ |
41 | |
42 | #ifndef OPENCV_OPENCL_HPP |
43 | #define OPENCV_OPENCL_HPP |
44 | |
45 | #include "opencv2/core.hpp" |
46 | #include <typeinfo> |
47 | #include <typeindex> |
48 | |
49 | namespace cv { namespace ocl { |
50 | |
51 | //! @addtogroup core_opencl |
52 | //! @{ |
53 | |
54 | CV_EXPORTS_W bool haveOpenCL(); |
55 | CV_EXPORTS_W bool useOpenCL(); |
56 | CV_EXPORTS_W bool haveAmdBlas(); |
57 | CV_EXPORTS_W bool haveAmdFft(); |
58 | CV_EXPORTS_W void setUseOpenCL(bool flag); |
59 | CV_EXPORTS_W void finish(); |
60 | |
61 | CV_EXPORTS bool haveSVM(); |
62 | |
63 | class CV_EXPORTS Context; |
64 | class CV_EXPORTS_W_SIMPLE Device; |
65 | class CV_EXPORTS Kernel; |
66 | class CV_EXPORTS Program; |
67 | class CV_EXPORTS ProgramSource; |
68 | class CV_EXPORTS Queue; |
69 | class CV_EXPORTS PlatformInfo; |
70 | class CV_EXPORTS Image2D; |
71 | |
72 | class CV_EXPORTS_W_SIMPLE Device |
73 | { |
74 | public: |
75 | CV_WRAP Device() CV_NOEXCEPT; |
76 | explicit Device(void* d); |
77 | Device(const Device& d); |
78 | Device& operator = (const Device& d); |
79 | Device(Device&& d) CV_NOEXCEPT; |
80 | Device& operator = (Device&& d) CV_NOEXCEPT; |
81 | CV_WRAP ~Device(); |
82 | |
83 | void set(void* d); |
84 | |
85 | enum |
86 | { |
87 | TYPE_DEFAULT = (1 << 0), |
88 | TYPE_CPU = (1 << 1), |
89 | TYPE_GPU = (1 << 2), |
90 | TYPE_ACCELERATOR = (1 << 3), |
91 | TYPE_DGPU = TYPE_GPU + (1 << 16), |
92 | TYPE_IGPU = TYPE_GPU + (1 << 17), |
93 | TYPE_ALL = 0xFFFFFFFF |
94 | }; |
95 | |
96 | CV_WRAP String name() const; |
97 | CV_WRAP String extensions() const; |
98 | CV_WRAP bool isExtensionSupported(const String& extensionName) const; |
99 | CV_WRAP String version() const; |
100 | CV_WRAP String vendorName() const; |
101 | CV_WRAP String OpenCL_C_Version() const; |
102 | CV_WRAP String OpenCLVersion() const; |
103 | CV_WRAP int deviceVersionMajor() const; |
104 | CV_WRAP int deviceVersionMinor() const; |
105 | CV_WRAP String driverVersion() const; |
106 | void* ptr() const; |
107 | |
108 | CV_WRAP int type() const; |
109 | |
110 | CV_WRAP int addressBits() const; |
111 | CV_WRAP bool available() const; |
112 | CV_WRAP bool compilerAvailable() const; |
113 | CV_WRAP bool linkerAvailable() const; |
114 | |
115 | enum |
116 | { |
117 | FP_DENORM=(1 << 0), |
118 | FP_INF_NAN=(1 << 1), |
119 | FP_ROUND_TO_NEAREST=(1 << 2), |
120 | FP_ROUND_TO_ZERO=(1 << 3), |
121 | FP_ROUND_TO_INF=(1 << 4), |
122 | FP_FMA=(1 << 5), |
123 | FP_SOFT_FLOAT=(1 << 6), |
124 | FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) |
125 | }; |
126 | CV_WRAP int doubleFPConfig() const; |
127 | CV_WRAP int singleFPConfig() const; |
128 | CV_WRAP int halfFPConfig() const; |
129 | |
130 | /// true if 'cl_khr_fp64' extension is available |
131 | CV_WRAP bool hasFP64() const; |
132 | /// true if 'cl_khr_fp16' extension is available |
133 | CV_WRAP bool hasFP16() const; |
134 | |
135 | CV_WRAP bool endianLittle() const; |
136 | CV_WRAP bool errorCorrectionSupport() const; |
137 | |
138 | enum |
139 | { |
140 | EXEC_KERNEL=(1 << 0), |
141 | EXEC_NATIVE_KERNEL=(1 << 1) |
142 | }; |
143 | CV_WRAP int executionCapabilities() const; |
144 | |
145 | CV_WRAP size_t globalMemCacheSize() const; |
146 | |
147 | enum |
148 | { |
149 | NO_CACHE=0, |
150 | READ_ONLY_CACHE=1, |
151 | READ_WRITE_CACHE=2 |
152 | }; |
153 | CV_WRAP int globalMemCacheType() const; |
154 | CV_WRAP int globalMemCacheLineSize() const; |
155 | CV_WRAP size_t globalMemSize() const; |
156 | |
157 | CV_WRAP size_t localMemSize() const; |
158 | enum |
159 | { |
160 | NO_LOCAL_MEM=0, |
161 | LOCAL_IS_LOCAL=1, |
162 | LOCAL_IS_GLOBAL=2 |
163 | }; |
164 | CV_WRAP int localMemType() const; |
165 | CV_WRAP bool hostUnifiedMemory() const; |
166 | |
167 | CV_WRAP bool imageSupport() const; |
168 | |
169 | CV_WRAP bool imageFromBufferSupport() const; |
170 | uint imagePitchAlignment() const; |
171 | uint imageBaseAddressAlignment() const; |
172 | |
173 | /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) |
174 | CV_WRAP bool intelSubgroupsSupport() const; |
175 | |
176 | CV_WRAP size_t image2DMaxWidth() const; |
177 | CV_WRAP size_t image2DMaxHeight() const; |
178 | |
179 | CV_WRAP size_t image3DMaxWidth() const; |
180 | CV_WRAP size_t image3DMaxHeight() const; |
181 | CV_WRAP size_t image3DMaxDepth() const; |
182 | |
183 | CV_WRAP size_t imageMaxBufferSize() const; |
184 | CV_WRAP size_t imageMaxArraySize() const; |
185 | |
186 | enum |
187 | { |
188 | UNKNOWN_VENDOR=0, |
189 | VENDOR_AMD=1, |
190 | VENDOR_INTEL=2, |
191 | VENDOR_NVIDIA=3 |
192 | }; |
193 | CV_WRAP int vendorID() const; |
194 | // FIXIT |
195 | // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. |
196 | // This method should use platform name instead of vendor name. |
197 | // After fix restore code in arithm.cpp: ocl_compare() |
198 | CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } |
199 | CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } |
200 | CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } |
201 | |
202 | CV_WRAP int maxClockFrequency() const; |
203 | CV_WRAP int maxComputeUnits() const; |
204 | CV_WRAP int maxConstantArgs() const; |
205 | CV_WRAP size_t maxConstantBufferSize() const; |
206 | |
207 | CV_WRAP size_t maxMemAllocSize() const; |
208 | CV_WRAP size_t maxParameterSize() const; |
209 | |
210 | CV_WRAP int maxReadImageArgs() const; |
211 | CV_WRAP int maxWriteImageArgs() const; |
212 | CV_WRAP int maxSamplers() const; |
213 | |
214 | CV_WRAP size_t maxWorkGroupSize() const; |
215 | CV_WRAP int maxWorkItemDims() const; |
216 | void maxWorkItemSizes(size_t*) const; |
217 | |
218 | CV_WRAP int memBaseAddrAlign() const; |
219 | |
220 | CV_WRAP int nativeVectorWidthChar() const; |
221 | CV_WRAP int nativeVectorWidthShort() const; |
222 | CV_WRAP int nativeVectorWidthInt() const; |
223 | CV_WRAP int nativeVectorWidthLong() const; |
224 | CV_WRAP int nativeVectorWidthFloat() const; |
225 | CV_WRAP int nativeVectorWidthDouble() const; |
226 | CV_WRAP int nativeVectorWidthHalf() const; |
227 | |
228 | CV_WRAP int preferredVectorWidthChar() const; |
229 | CV_WRAP int preferredVectorWidthShort() const; |
230 | CV_WRAP int preferredVectorWidthInt() const; |
231 | CV_WRAP int preferredVectorWidthLong() const; |
232 | CV_WRAP int preferredVectorWidthFloat() const; |
233 | CV_WRAP int preferredVectorWidthDouble() const; |
234 | CV_WRAP int preferredVectorWidthHalf() const; |
235 | |
236 | CV_WRAP size_t printfBufferSize() const; |
237 | CV_WRAP size_t profilingTimerResolution() const; |
238 | |
239 | CV_WRAP static const Device& getDefault(); |
240 | |
241 | /** |
242 | * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. |
243 | * |
244 | * @note Ownership of the passed device is passed to OpenCV on success. |
245 | * The caller should additionally call `clRetainDevice` on it if it intends |
246 | * to continue using the device. |
247 | */ |
248 | static Device fromHandle(void* d); |
249 | |
250 | struct Impl; |
251 | inline Impl* getImpl() const { return (Impl*)p; } |
252 | inline bool empty() const { return !p; } |
253 | protected: |
254 | Impl* p; |
255 | }; |
256 | |
257 | |
258 | class CV_EXPORTS Context |
259 | { |
260 | public: |
261 | Context() CV_NOEXCEPT; |
262 | explicit Context(int dtype); //!< @deprecated |
263 | ~Context(); |
264 | Context(const Context& c); |
265 | Context& operator= (const Context& c); |
266 | Context(Context&& c) CV_NOEXCEPT; |
267 | Context& operator = (Context&& c) CV_NOEXCEPT; |
268 | |
269 | /** @deprecated */ |
270 | bool create(); |
271 | /** @deprecated */ |
272 | bool create(int dtype); |
273 | |
274 | size_t ndevices() const; |
275 | Device& device(size_t idx) const; |
276 | Program getProg(const ProgramSource& prog, |
277 | const String& buildopt, String& errmsg); |
278 | void unloadProg(Program& prog); |
279 | |
280 | |
281 | /** Get thread-local OpenCL context (initialize if necessary) */ |
282 | #if 0 // OpenCV 5.0 |
283 | static Context& getDefault(); |
284 | #else |
285 | static Context& getDefault(bool initialize = true); |
286 | #endif |
287 | |
288 | /** @returns cl_context value */ |
289 | void* ptr() const; |
290 | |
291 | /** |
292 | * @brief Get OpenCL context property specified on context creation |
293 | * @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type) |
294 | * @returns Property value if property was specified on clCreateContext, or NULL if context created without the property |
295 | */ |
296 | void* getOpenCLContextProperty(int propertyId) const; |
297 | |
298 | bool useSVM() const; |
299 | void setUseSVM(bool enabled); |
300 | |
301 | /** |
302 | * @param context OpenCL handle (cl_context). clRetainContext() is called on success |
303 | */ |
304 | static Context fromHandle(void* context); |
305 | static Context fromDevice(const ocl::Device& device); |
306 | static Context create(const std::string& configuration); |
307 | |
308 | void release(); |
309 | |
310 | class CV_EXPORTS UserContext { |
311 | public: |
312 | virtual ~UserContext(); |
313 | }; |
314 | template <typename T> |
315 | inline void setUserContext(const std::shared_ptr<T>& userContext) { |
316 | setUserContext(typeid(T), userContext); |
317 | } |
318 | template <typename T> |
319 | inline std::shared_ptr<T> getUserContext() { |
320 | return std::dynamic_pointer_cast<T>(getUserContext(typeId: typeid(T))); |
321 | } |
322 | void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext); |
323 | std::shared_ptr<UserContext> getUserContext(std::type_index typeId); |
324 | |
325 | struct Impl; |
326 | inline Impl* getImpl() const { return (Impl*)p; } |
327 | inline bool empty() const { return !p; } |
328 | // TODO OpenCV 5.0 |
329 | //protected: |
330 | Impl* p; |
331 | }; |
332 | |
333 | /** @deprecated */ |
334 | class CV_EXPORTS Platform |
335 | { |
336 | public: |
337 | Platform() CV_NOEXCEPT; |
338 | ~Platform(); |
339 | Platform(const Platform& p); |
340 | Platform& operator = (const Platform& p); |
341 | Platform(Platform&& p) CV_NOEXCEPT; |
342 | Platform& operator = (Platform&& p) CV_NOEXCEPT; |
343 | |
344 | void* ptr() const; |
345 | |
346 | /** @deprecated */ |
347 | static Platform& getDefault(); |
348 | |
349 | struct Impl; |
350 | inline Impl* getImpl() const { return (Impl*)p; } |
351 | inline bool empty() const { return !p; } |
352 | protected: |
353 | Impl* p; |
354 | }; |
355 | |
356 | /** @brief Attaches OpenCL context to OpenCV |
357 | @note |
358 | OpenCV will check if available OpenCL platform has platformName name, then assign context to |
359 | OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and |
360 | new command queue will be created. |
361 | @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime |
362 | @param platformID ID of platform attached context was created for |
363 | @param context OpenCL context to be attached to OpenCV |
364 | @param deviceID ID of device, must be created from attached context |
365 | */ |
366 | CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); |
367 | |
368 | /** @brief Convert OpenCL buffer to UMat |
369 | @note |
370 | OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory |
371 | content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and |
372 | `clRetainMemObject` is called. |
373 | @param cl_mem_buffer source clBuffer handle |
374 | @param step num of bytes in single row |
375 | @param rows number of rows |
376 | @param cols number of cols |
377 | @param type OpenCV type of image |
378 | @param dst destination UMat |
379 | */ |
380 | CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); |
381 | |
382 | /** @brief Convert OpenCL image2d_t to UMat |
383 | @note |
384 | OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content |
385 | is copied from image to UMat with `clEnqueueCopyImageToBuffer` function. |
386 | @param cl_mem_image source image2d_t handle |
387 | @param dst destination UMat |
388 | */ |
389 | CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); |
390 | |
391 | // TODO Move to internal header |
392 | /// @deprecated |
393 | void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); |
394 | |
395 | class CV_EXPORTS Queue |
396 | { |
397 | public: |
398 | Queue() CV_NOEXCEPT; |
399 | explicit Queue(const Context& c, const Device& d=Device()); |
400 | ~Queue(); |
401 | Queue(const Queue& q); |
402 | Queue& operator = (const Queue& q); |
403 | Queue(Queue&& q) CV_NOEXCEPT; |
404 | Queue& operator = (Queue&& q) CV_NOEXCEPT; |
405 | |
406 | bool create(const Context& c=Context(), const Device& d=Device()); |
407 | void finish(); |
408 | void* ptr() const; |
409 | static Queue& getDefault(); |
410 | |
411 | /// @brief Returns OpenCL command queue with enable profiling mode support |
412 | const Queue& getProfilingQueue() const; |
413 | |
414 | struct Impl; friend struct Impl; |
415 | inline Impl* getImpl() const { return p; } |
416 | inline bool empty() const { return !p; } |
417 | protected: |
418 | Impl* p; |
419 | }; |
420 | |
421 | |
422 | class CV_EXPORTS KernelArg |
423 | { |
424 | public: |
425 | enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 }; |
426 | KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); |
427 | KernelArg() CV_NOEXCEPT; |
428 | |
429 | static KernelArg Local(size_t localMemSize) |
430 | { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } |
431 | static KernelArg PtrWriteOnly(const UMat& m) |
432 | { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } |
433 | static KernelArg PtrReadOnly(const UMat& m) |
434 | { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); } |
435 | static KernelArg PtrReadWrite(const UMat& m) |
436 | { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); } |
437 | static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1) |
438 | { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); } |
439 | static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1) |
440 | { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
441 | static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1) |
442 | { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); } |
443 | static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1) |
444 | { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); } |
445 | static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) |
446 | { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
447 | static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) |
448 | { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
449 | static KernelArg Constant(const Mat& m); |
450 | template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n) |
451 | { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); } |
452 | |
453 | int flags; |
454 | UMat* m; |
455 | const void* obj; |
456 | size_t sz; |
457 | int wscale, iwscale; |
458 | }; |
459 | |
460 | |
461 | class CV_EXPORTS Kernel |
462 | { |
463 | public: |
464 | Kernel() CV_NOEXCEPT; |
465 | Kernel(const char* kname, const Program& prog); |
466 | Kernel(const char* kname, const ProgramSource& prog, |
467 | const String& buildopts = String(), String* errmsg=0); |
468 | ~Kernel(); |
469 | Kernel(const Kernel& k); |
470 | Kernel& operator = (const Kernel& k); |
471 | Kernel(Kernel&& k) CV_NOEXCEPT; |
472 | Kernel& operator = (Kernel&& k) CV_NOEXCEPT; |
473 | |
474 | bool empty() const; |
475 | bool create(const char* kname, const Program& prog); |
476 | bool create(const char* kname, const ProgramSource& prog, |
477 | const String& buildopts, String* errmsg=0); |
478 | |
479 | int set(int i, const void* value, size_t sz); |
480 | int set(int i, const Image2D& image2D); |
481 | int set(int i, const UMat& m); |
482 | int set(int i, const KernelArg& arg); |
483 | template<typename _Tp> int set(int i, const _Tp& value) |
484 | { return set(i, &value, sizeof(value)); } |
485 | |
486 | |
487 | protected: |
488 | template<typename _Tp0> inline |
489 | int set_args_(int i, const _Tp0& a0) { return set(i, a0); } |
490 | template<typename _Tp0, typename... _Tps> inline |
491 | int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); } |
492 | public: |
493 | /** @brief Setup OpenCL Kernel arguments. |
494 | Avoid direct using of set(i, ...) methods. |
495 | @code |
496 | bool ok = kernel |
497 | .args( |
498 | srcUMat, dstUMat, |
499 | (float)some_float_param |
500 | ).run(ndims, globalSize, localSize); |
501 | if (!ok) return false; |
502 | @endcode |
503 | */ |
504 | template<typename... _Tps> inline |
505 | Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; } |
506 | |
507 | /** @brief Run the OpenCL kernel (globalsize value may be adjusted) |
508 | |
509 | @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. |
510 | @param globalsize work items for each dimension. It is not the final globalsize passed to |
511 | OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding |
512 | value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The |
513 | adjusted values are greater than or equal to the original values. |
514 | @param localsize work-group size for each dimension. |
515 | @param sync specify whether to wait for OpenCL computation to finish before return. |
516 | @param q command queue |
517 | |
518 | @note Use run_() if your kernel code doesn't support adjusted globalsize. |
519 | */ |
520 | bool run(int dims, size_t globalsize[], |
521 | size_t localsize[], bool sync, const Queue& q=Queue()); |
522 | |
523 | /** @brief Run the OpenCL kernel |
524 | * |
525 | * @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. |
526 | * @param globalsize work items for each dimension. This value is passed to OpenCL without changes. |
527 | * @param localsize work-group size for each dimension. |
528 | * @param sync specify whether to wait for OpenCL computation to finish before return. |
529 | * @param q command queue |
530 | */ |
531 | bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue()); |
532 | |
533 | bool runTask(bool sync, const Queue& q=Queue()); |
534 | |
535 | /** @brief Similar to synchronized run_() call with returning of kernel execution time |
536 | * |
537 | * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE) |
538 | * @return Execution time in nanoseconds or negative number on error |
539 | */ |
540 | int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue()); |
541 | |
542 | size_t workGroupSize() const; |
543 | size_t preferedWorkGroupSizeMultiple() const; |
544 | bool compileWorkGroupSize(size_t wsz[]) const; |
545 | size_t localMemSize() const; |
546 | |
547 | void* ptr() const; |
548 | struct Impl; |
549 | |
550 | protected: |
551 | Impl* p; |
552 | }; |
553 | |
554 | class CV_EXPORTS Program |
555 | { |
556 | public: |
557 | Program() CV_NOEXCEPT; |
558 | Program(const ProgramSource& src, |
559 | const String& buildflags, String& errmsg); |
560 | Program(const Program& prog); |
561 | Program& operator = (const Program& prog); |
562 | Program(Program&& prog) CV_NOEXCEPT; |
563 | Program& operator = (Program&& prog) CV_NOEXCEPT; |
564 | ~Program(); |
565 | |
566 | bool create(const ProgramSource& src, |
567 | const String& buildflags, String& errmsg); |
568 | |
569 | void* ptr() const; |
570 | |
571 | /** |
572 | * @brief Query device-specific program binary. |
573 | * |
574 | * Returns RAW OpenCL executable binary without additional attachments. |
575 | * |
576 | * @sa ProgramSource::fromBinary |
577 | * |
578 | * @param[out] binary output buffer |
579 | */ |
580 | void getBinary(std::vector<char>& binary) const; |
581 | |
582 | struct Impl; friend struct Impl; |
583 | inline Impl* getImpl() const { return (Impl*)p; } |
584 | inline bool empty() const { return !p; } |
585 | protected: |
586 | Impl* p; |
587 | public: |
588 | #ifndef OPENCV_REMOVE_DEPRECATED_API |
589 | // TODO Remove this |
590 | CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead |
591 | CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary) |
592 | CV_DEPRECATED const ProgramSource& source() const; // implementation removed |
593 | CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced |
594 | CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced |
595 | #endif |
596 | }; |
597 | |
598 | |
599 | class CV_EXPORTS ProgramSource |
600 | { |
601 | public: |
602 | typedef uint64 hash_t; // deprecated |
603 | |
604 | ProgramSource() CV_NOEXCEPT; |
605 | explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); |
606 | explicit ProgramSource(const String& prog); // deprecated |
607 | explicit ProgramSource(const char* prog); // deprecated |
608 | ~ProgramSource(); |
609 | ProgramSource(const ProgramSource& prog); |
610 | ProgramSource& operator = (const ProgramSource& prog); |
611 | ProgramSource(ProgramSource&& prog) CV_NOEXCEPT; |
612 | ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT; |
613 | |
614 | const String& source() const; // deprecated |
615 | hash_t hash() const; // deprecated |
616 | |
617 | |
618 | /** @brief Describe OpenCL program binary. |
619 | * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). |
620 | * |
621 | * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). |
622 | * |
623 | * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version. |
624 | * |
625 | * @param module name of program owner module |
626 | * @param name unique name of program (module+name is used as key for OpenCL program caching) |
627 | * @param binary buffer address. See buffer lifetime requirement in description. |
628 | * @param size buffer size |
629 | * @param buildOptions additional program-related build options passed to clBuildProgram() |
630 | * @return created ProgramSource object |
631 | */ |
632 | static ProgramSource fromBinary(const String& module, const String& name, |
633 | const unsigned char* binary, const size_t size, |
634 | const cv::String& buildOptions = cv::String()); |
635 | |
636 | /** @brief Describe OpenCL program in SPIR format. |
637 | * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). |
638 | * |
639 | * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior) |
640 | * |
641 | * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). |
642 | * |
643 | * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension: |
644 | * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html |
645 | * (but they are not portable between different platforms: 32-bit / 64-bit) |
646 | * |
647 | * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'. |
648 | * |
649 | * @param module name of program owner module |
650 | * @param name unique name of program (module+name is used as key for OpenCL program caching) |
651 | * @param binary buffer address. See buffer lifetime requirement in description. |
652 | * @param size buffer size |
653 | * @param buildOptions additional program-related build options passed to clBuildProgram() |
654 | * (these options are added automatically: '-x spir' and '-spir-std=1.2') |
655 | * @return created ProgramSource object. |
656 | */ |
657 | static ProgramSource fromSPIR(const String& module, const String& name, |
658 | const unsigned char* binary, const size_t size, |
659 | const cv::String& buildOptions = cv::String()); |
660 | |
661 | //OpenCL 2.1+ only |
662 | //static Program fromSPIRV(const String& module, const String& name, |
663 | // const unsigned char* binary, const size_t size, |
664 | // const cv::String& buildOptions = cv::String()); |
665 | |
666 | struct Impl; friend struct Impl; |
667 | inline Impl* getImpl() const { return (Impl*)p; } |
668 | inline bool empty() const { return !p; } |
669 | protected: |
670 | Impl* p; |
671 | }; |
672 | |
673 | class CV_EXPORTS PlatformInfo |
674 | { |
675 | public: |
676 | PlatformInfo() CV_NOEXCEPT; |
677 | /** |
678 | * @param id pointer cl_platform_id (cl_platform_id*) |
679 | */ |
680 | explicit PlatformInfo(void* id); |
681 | ~PlatformInfo(); |
682 | |
683 | PlatformInfo(const PlatformInfo& i); |
684 | PlatformInfo& operator =(const PlatformInfo& i); |
685 | PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT; |
686 | PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT; |
687 | |
688 | String name() const; |
689 | String vendor() const; |
690 | |
691 | /// See CL_PLATFORM_VERSION |
692 | String version() const; |
693 | int versionMajor() const; |
694 | int versionMinor() const; |
695 | |
696 | int deviceNumber() const; |
697 | void getDevice(Device& device, int d) const; |
698 | |
699 | struct Impl; |
700 | bool empty() const { return !p; } |
701 | protected: |
702 | Impl* p; |
703 | }; |
704 | |
705 | CV_EXPORTS CV_DEPRECATED const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); |
706 | CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf, size_t buf_size); |
707 | CV_EXPORTS const char* typeToStr(int t); |
708 | CV_EXPORTS const char* memopTypeToStr(int t); |
709 | CV_EXPORTS const char* vecopTypeToStr(int t); |
710 | CV_EXPORTS const char* getOpenCLErrorString(int errorCode); |
711 | CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); |
712 | CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info); |
713 | |
714 | |
715 | enum OclVectorStrategy |
716 | { |
717 | // all matrices have its own vector width |
718 | OCL_VECTOR_OWN = 0, |
719 | // all matrices have maximal vector width among all matrices |
720 | // (useful for cases when matrices have different data types) |
721 | OCL_VECTOR_MAX = 1, |
722 | |
723 | // default strategy |
724 | OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN |
725 | }; |
726 | |
727 | CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
728 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
729 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), |
730 | OclVectorStrategy strat = OCL_VECTOR_DEFAULT); |
731 | |
732 | CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, |
733 | InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
734 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
735 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), |
736 | OclVectorStrategy strat = OCL_VECTOR_DEFAULT); |
737 | |
738 | // with OCL_VECTOR_MAX strategy |
739 | CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
740 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
741 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); |
742 | |
743 | CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); |
744 | |
745 | class CV_EXPORTS Image2D |
746 | { |
747 | public: |
748 | Image2D() CV_NOEXCEPT; |
749 | |
750 | /** |
751 | @param src UMat object from which to get image properties and data |
752 | @param norm flag to enable the use of normalized channel data types |
753 | @param alias flag indicating that the image should alias the src UMat. If true, changes to the |
754 | image or src will be reflected in both objects. |
755 | */ |
756 | explicit Image2D(const UMat &src, bool norm = false, bool alias = false); |
757 | Image2D(const Image2D & i); |
758 | ~Image2D(); |
759 | |
760 | Image2D & operator = (const Image2D & i); |
761 | Image2D(Image2D &&) CV_NOEXCEPT; |
762 | Image2D &operator=(Image2D &&) CV_NOEXCEPT; |
763 | |
764 | /** Indicates if creating an aliased image should succeed. |
765 | Depends on the underlying platform and the dimensions of the UMat. |
766 | */ |
767 | static bool canCreateAlias(const UMat &u); |
768 | |
769 | /** Indicates if the image format is supported. |
770 | */ |
771 | static bool isFormatSupported(int depth, int cn, bool norm); |
772 | |
773 | void* ptr() const; |
774 | protected: |
775 | struct Impl; |
776 | Impl* p; |
777 | }; |
778 | |
779 | class CV_EXPORTS Timer |
780 | { |
781 | public: |
782 | Timer(const Queue& q); |
783 | ~Timer(); |
784 | void start(); |
785 | void stop(); |
786 | |
787 | uint64 durationNS() const; ///< duration in nanoseconds |
788 | |
789 | protected: |
790 | struct Impl; |
791 | Impl* const p; |
792 | |
793 | private: |
794 | Timer(const Timer&); // disabled |
795 | Timer& operator=(const Timer&); // disabled |
796 | }; |
797 | |
798 | CV_EXPORTS MatAllocator* getOpenCLAllocator(); |
799 | |
800 | |
801 | class CV_EXPORTS_W OpenCLExecutionContext |
802 | { |
803 | public: |
804 | OpenCLExecutionContext() = default; |
805 | ~OpenCLExecutionContext() = default; |
806 | |
807 | OpenCLExecutionContext(const OpenCLExecutionContext&) = default; |
808 | OpenCLExecutionContext(OpenCLExecutionContext&&) = default; |
809 | |
810 | OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default; |
811 | OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default; |
812 | |
813 | /** Get associated ocl::Context */ |
814 | Context& getContext() const; |
815 | /** Get the single default associated ocl::Device */ |
816 | Device& getDevice() const; |
817 | /** Get the single ocl::Queue that is associated with the ocl::Context and |
818 | * the single default ocl::Device |
819 | */ |
820 | Queue& getQueue() const; |
821 | |
822 | bool useOpenCL() const; |
823 | void setUseOpenCL(bool flag); |
824 | |
825 | /** Get OpenCL execution context of current thread. |
826 | * |
827 | * Initialize OpenCL execution context if it is empty |
828 | * - create new |
829 | * - reuse context of the main thread (threadID = 0) |
830 | */ |
831 | static OpenCLExecutionContext& getCurrent(); |
832 | |
833 | /** Get OpenCL execution context of current thread (can be empty) */ |
834 | static OpenCLExecutionContext& getCurrentRef(); |
835 | |
836 | /** Bind this OpenCL execution context to current thread. |
837 | * |
838 | * Context can't be empty. |
839 | * |
840 | * @note clFinish is not called for queue of previous execution context |
841 | */ |
842 | void bind() const; |
843 | |
844 | /** Creates new execution context with same OpenCV context and device |
845 | * |
846 | * @param q OpenCL queue |
847 | */ |
848 | OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const; |
849 | /** @overload */ |
850 | OpenCLExecutionContext cloneWithNewQueue() const; |
851 | |
852 | /** @brief Creates OpenCL execution context |
853 | * OpenCV will check if available OpenCL platform has platformName name, |
854 | * then assign context to OpenCV. |
855 | * The deviceID device will be used as target device and a new command queue will be created. |
856 | * |
857 | * @note On success, ownership of one reference of the context and device is taken. |
858 | * The caller should additionally call `clRetainContext` and/or `clRetainDevice` |
859 | * to increase the reference count if it wishes to continue using them. |
860 | * |
861 | * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime |
862 | * @param platformID ID of platform attached context was created for (cl_platform_id) |
863 | * @param context OpenCL context to be attached to OpenCV (cl_context) |
864 | * @param deviceID OpenCL device (cl_device_id) |
865 | */ |
866 | static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID); |
867 | |
868 | /** @brief Creates OpenCL execution context |
869 | * |
870 | * @param context non-empty OpenCL context |
871 | * @param device non-empty OpenCL device (must be a part of context) |
872 | * @param queue non-empty OpenCL queue for provided context and device |
873 | */ |
874 | static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue); |
875 | /** @overload */ |
876 | static OpenCLExecutionContext create(const Context& context, const Device& device); |
877 | |
878 | struct Impl; |
879 | inline bool empty() const { return !p; } |
880 | void release(); |
881 | protected: |
882 | std::shared_ptr<Impl> p; |
883 | }; |
884 | |
885 | class OpenCLExecutionContextScope |
886 | { |
887 | OpenCLExecutionContext ctx_; |
888 | public: |
889 | inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx) |
890 | { |
891 | CV_Assert(!ctx.empty()); |
892 | ctx_ = OpenCLExecutionContext::getCurrentRef(); |
893 | ctx.bind(); |
894 | } |
895 | |
896 | inline ~OpenCLExecutionContextScope() |
897 | { |
898 | if (!ctx_.empty()) |
899 | { |
900 | ctx_.bind(); |
901 | } |
902 | } |
903 | }; |
904 | |
905 | #ifdef __OPENCV_BUILD |
906 | namespace internal { |
907 | |
908 | CV_EXPORTS bool isOpenCLForced(); |
909 | #define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition)) |
910 | |
911 | CV_EXPORTS bool isPerformanceCheckBypassed(); |
912 | #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) |
913 | |
914 | CV_EXPORTS bool isCLBuffer(UMat& u); |
915 | |
916 | } // namespace internal |
917 | #endif |
918 | |
919 | //! @} |
920 | |
921 | }} |
922 | |
923 | #endif |
924 |
Definitions
- Device
- isAMD
- isIntel
- isNVidia
- getImpl
- empty
- Context
- UserContext
- setUserContext
- getUserContext
- getImpl
- empty
- Platform
- getImpl
- empty
- Queue
- getImpl
- empty
- KernelArg
- Local
- PtrWriteOnly
- PtrReadOnly
- PtrReadWrite
- ReadWrite
- ReadWriteNoSize
- ReadOnly
- WriteOnly
- ReadOnlyNoSize
- WriteOnlyNoSize
- Constant
- Kernel
- set
- set_args_
- set_args_
- args
- Program
- getImpl
- empty
- ProgramSource
- getImpl
- empty
- PlatformInfo
- empty
- OclVectorStrategy
- Image2D
- Timer
- OpenCLExecutionContext
- OpenCLExecutionContext
- ~OpenCLExecutionContext
- OpenCLExecutionContext
- OpenCLExecutionContext
- operator=
- operator=
- empty
- OpenCLExecutionContextScope
- OpenCLExecutionContextScope
Learn to use CMake with our Intro Training
Find out more