1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
---|---|
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
14 | // Third party copyrights are property of their respective owners. |
15 | // |
16 | // Redistribution and use in source and binary forms, with or without modification, |
17 | // are permitted provided that the following conditions are met: |
18 | // |
19 | // * Redistribution's of source code must retain the above copyright notice, |
20 | // this list of conditions and the following disclaimer. |
21 | // |
22 | // * Redistribution's in binary form must reproduce the above copyright notice, |
23 | // this list of conditions and the following disclaimer in the documentation |
24 | // and/or other materials provided with the distribution. |
25 | // |
26 | // * The name of the copyright holders may not be used to endorse or promote products |
27 | // derived from this software without specific prior written permission. |
28 | // |
29 | // This software is provided by the copyright holders and contributors "as is" and |
30 | // any express or implied warranties, including, but not limited to, the implied |
31 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
32 | // In no event shall the OpenCV Foundation or contributors be liable for any direct, |
33 | // indirect, incidental, special, exemplary, or consequential damages |
34 | // (including, but not limited to, procurement of substitute goods or services; |
35 | // loss of use, data, or profits; or business interruption) however caused |
36 | // and on any theory of liability, whether in contract, strict liability, |
37 | // or tort (including negligence or otherwise) arising in any way out of |
38 | // the use of this software, even if advised of the possibility of such damage. |
39 | // |
40 | //M*/ |
41 | |
42 | #ifndef OPENCV_OPENCL_HPP |
43 | #define OPENCV_OPENCL_HPP |
44 | |
45 | #include "opencv2/core.hpp" |
46 | #include <typeinfo> |
47 | #include <typeindex> |
48 | |
49 | namespace cv { namespace ocl { |
50 | |
51 | //! @addtogroup core_opencl |
52 | //! @{ |
53 | |
54 | CV_EXPORTS_W bool haveOpenCL(); |
55 | CV_EXPORTS_W bool useOpenCL(); |
56 | CV_EXPORTS_W bool haveAmdBlas(); |
57 | CV_EXPORTS_W bool haveAmdFft(); |
58 | CV_EXPORTS_W void setUseOpenCL(bool flag); |
59 | CV_EXPORTS_W void finish(); |
60 | |
61 | CV_EXPORTS bool haveSVM(); |
62 | |
63 | class CV_EXPORTS Context; |
64 | class CV_EXPORTS_W_SIMPLE Device; |
65 | class CV_EXPORTS Kernel; |
66 | class CV_EXPORTS Program; |
67 | class CV_EXPORTS ProgramSource; |
68 | class CV_EXPORTS Queue; |
69 | class CV_EXPORTS PlatformInfo; |
70 | class CV_EXPORTS Image2D; |
71 | |
72 | class CV_EXPORTS_W_SIMPLE Device |
73 | { |
74 | public: |
75 | CV_WRAP Device() CV_NOEXCEPT; |
76 | explicit Device(void* d); |
77 | Device(const Device& d); |
78 | Device& operator = (const Device& d); |
79 | Device(Device&& d) CV_NOEXCEPT; |
80 | Device& operator = (Device&& d) CV_NOEXCEPT; |
81 | CV_WRAP ~Device(); |
82 | |
83 | void set(void* d); |
84 | |
85 | enum |
86 | { |
87 | TYPE_DEFAULT = (1 << 0), |
88 | TYPE_CPU = (1 << 1), |
89 | TYPE_GPU = (1 << 2), |
90 | TYPE_ACCELERATOR = (1 << 3), |
91 | TYPE_DGPU = TYPE_GPU + (1 << 16), |
92 | TYPE_IGPU = TYPE_GPU + (1 << 17), |
93 | TYPE_ALL = 0xFFFFFFFF |
94 | }; |
95 | |
96 | CV_WRAP String name() const; |
97 | CV_WRAP String extensions() const; |
98 | CV_WRAP bool isExtensionSupported(const String& extensionName) const; |
99 | CV_WRAP String version() const; |
100 | CV_WRAP String vendorName() const; |
101 | CV_WRAP String OpenCL_C_Version() const; |
102 | CV_WRAP String OpenCLVersion() const; |
103 | CV_WRAP int deviceVersionMajor() const; |
104 | CV_WRAP int deviceVersionMinor() const; |
105 | CV_WRAP String driverVersion() const; |
106 | void* ptr() const; |
107 | |
108 | CV_WRAP int type() const; |
109 | |
110 | CV_WRAP int addressBits() const; |
111 | CV_WRAP bool available() const; |
112 | CV_WRAP bool compilerAvailable() const; |
113 | CV_WRAP bool linkerAvailable() const; |
114 | |
115 | enum |
116 | { |
117 | FP_DENORM=(1 << 0), |
118 | FP_INF_NAN=(1 << 1), |
119 | FP_ROUND_TO_NEAREST=(1 << 2), |
120 | FP_ROUND_TO_ZERO=(1 << 3), |
121 | FP_ROUND_TO_INF=(1 << 4), |
122 | FP_FMA=(1 << 5), |
123 | FP_SOFT_FLOAT=(1 << 6), |
124 | FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) |
125 | }; |
126 | CV_WRAP int doubleFPConfig() const; |
127 | CV_WRAP int singleFPConfig() const; |
128 | CV_WRAP int halfFPConfig() const; |
129 | |
130 | CV_WRAP bool endianLittle() const; |
131 | CV_WRAP bool errorCorrectionSupport() const; |
132 | |
133 | enum |
134 | { |
135 | EXEC_KERNEL=(1 << 0), |
136 | EXEC_NATIVE_KERNEL=(1 << 1) |
137 | }; |
138 | CV_WRAP int executionCapabilities() const; |
139 | |
140 | CV_WRAP size_t globalMemCacheSize() const; |
141 | |
142 | enum |
143 | { |
144 | NO_CACHE=0, |
145 | READ_ONLY_CACHE=1, |
146 | READ_WRITE_CACHE=2 |
147 | }; |
148 | CV_WRAP int globalMemCacheType() const; |
149 | CV_WRAP int globalMemCacheLineSize() const; |
150 | CV_WRAP size_t globalMemSize() const; |
151 | |
152 | CV_WRAP size_t localMemSize() const; |
153 | enum |
154 | { |
155 | NO_LOCAL_MEM=0, |
156 | LOCAL_IS_LOCAL=1, |
157 | LOCAL_IS_GLOBAL=2 |
158 | }; |
159 | CV_WRAP int localMemType() const; |
160 | CV_WRAP bool hostUnifiedMemory() const; |
161 | |
162 | CV_WRAP bool imageSupport() const; |
163 | |
164 | CV_WRAP bool imageFromBufferSupport() const; |
165 | uint imagePitchAlignment() const; |
166 | uint imageBaseAddressAlignment() const; |
167 | |
168 | /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) |
169 | CV_WRAP bool intelSubgroupsSupport() const; |
170 | |
171 | CV_WRAP size_t image2DMaxWidth() const; |
172 | CV_WRAP size_t image2DMaxHeight() const; |
173 | |
174 | CV_WRAP size_t image3DMaxWidth() const; |
175 | CV_WRAP size_t image3DMaxHeight() const; |
176 | CV_WRAP size_t image3DMaxDepth() const; |
177 | |
178 | CV_WRAP size_t imageMaxBufferSize() const; |
179 | CV_WRAP size_t imageMaxArraySize() const; |
180 | |
181 | enum |
182 | { |
183 | UNKNOWN_VENDOR=0, |
184 | VENDOR_AMD=1, |
185 | VENDOR_INTEL=2, |
186 | VENDOR_NVIDIA=3 |
187 | }; |
188 | CV_WRAP int vendorID() const; |
189 | // FIXIT |
190 | // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. |
191 | // This method should use platform name instead of vendor name. |
192 | // After fix restore code in arithm.cpp: ocl_compare() |
193 | CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } |
194 | CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } |
195 | CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } |
196 | |
197 | CV_WRAP int maxClockFrequency() const; |
198 | CV_WRAP int maxComputeUnits() const; |
199 | CV_WRAP int maxConstantArgs() const; |
200 | CV_WRAP size_t maxConstantBufferSize() const; |
201 | |
202 | CV_WRAP size_t maxMemAllocSize() const; |
203 | CV_WRAP size_t maxParameterSize() const; |
204 | |
205 | CV_WRAP int maxReadImageArgs() const; |
206 | CV_WRAP int maxWriteImageArgs() const; |
207 | CV_WRAP int maxSamplers() const; |
208 | |
209 | CV_WRAP size_t maxWorkGroupSize() const; |
210 | CV_WRAP int maxWorkItemDims() const; |
211 | void maxWorkItemSizes(size_t*) const; |
212 | |
213 | CV_WRAP int memBaseAddrAlign() const; |
214 | |
215 | CV_WRAP int nativeVectorWidthChar() const; |
216 | CV_WRAP int nativeVectorWidthShort() const; |
217 | CV_WRAP int nativeVectorWidthInt() const; |
218 | CV_WRAP int nativeVectorWidthLong() const; |
219 | CV_WRAP int nativeVectorWidthFloat() const; |
220 | CV_WRAP int nativeVectorWidthDouble() const; |
221 | CV_WRAP int nativeVectorWidthHalf() const; |
222 | |
223 | CV_WRAP int preferredVectorWidthChar() const; |
224 | CV_WRAP int preferredVectorWidthShort() const; |
225 | CV_WRAP int preferredVectorWidthInt() const; |
226 | CV_WRAP int preferredVectorWidthLong() const; |
227 | CV_WRAP int preferredVectorWidthFloat() const; |
228 | CV_WRAP int preferredVectorWidthDouble() const; |
229 | CV_WRAP int preferredVectorWidthHalf() const; |
230 | |
231 | CV_WRAP size_t printfBufferSize() const; |
232 | CV_WRAP size_t profilingTimerResolution() const; |
233 | |
234 | CV_WRAP static const Device& getDefault(); |
235 | |
236 | /** |
237 | * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. |
238 | * |
239 | * @note Ownership of the passed device is passed to OpenCV on success. |
240 | * The caller should additionally call `clRetainDevice` on it if it intends |
241 | * to continue using the device. |
242 | */ |
243 | static Device fromHandle(void* d); |
244 | |
245 | struct Impl; |
246 | inline Impl* getImpl() const { return (Impl*)p; } |
247 | inline bool empty() const { return !p; } |
248 | protected: |
249 | Impl* p; |
250 | }; |
251 | |
252 | |
253 | class CV_EXPORTS Context |
254 | { |
255 | public: |
256 | Context() CV_NOEXCEPT; |
257 | explicit Context(int dtype); //!< @deprecated |
258 | ~Context(); |
259 | Context(const Context& c); |
260 | Context& operator= (const Context& c); |
261 | Context(Context&& c) CV_NOEXCEPT; |
262 | Context& operator = (Context&& c) CV_NOEXCEPT; |
263 | |
264 | /** @deprecated */ |
265 | bool create(); |
266 | /** @deprecated */ |
267 | bool create(int dtype); |
268 | |
269 | size_t ndevices() const; |
270 | Device& device(size_t idx) const; |
271 | Program getProg(const ProgramSource& prog, |
272 | const String& buildopt, String& errmsg); |
273 | void unloadProg(Program& prog); |
274 | |
275 | |
276 | /** Get thread-local OpenCL context (initialize if necessary) */ |
277 | #if 0 // OpenCV 5.0 |
278 | static Context& getDefault(); |
279 | #else |
280 | static Context& getDefault(bool initialize = true); |
281 | #endif |
282 | |
283 | /** @returns cl_context value */ |
284 | void* ptr() const; |
285 | |
286 | /** |
287 | * @brief Get OpenCL context property specified on context creation |
288 | * @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type) |
289 | * @returns Property value if property was specified on clCreateContext, or NULL if context created without the property |
290 | */ |
291 | void* getOpenCLContextProperty(int propertyId) const; |
292 | |
293 | bool useSVM() const; |
294 | void setUseSVM(bool enabled); |
295 | |
296 | /** |
297 | * @param context OpenCL handle (cl_context). clRetainContext() is called on success |
298 | */ |
299 | static Context fromHandle(void* context); |
300 | static Context fromDevice(const ocl::Device& device); |
301 | static Context create(const std::string& configuration); |
302 | |
303 | void release(); |
304 | |
305 | class CV_EXPORTS UserContext { |
306 | public: |
307 | virtual ~UserContext(); |
308 | }; |
309 | template <typename T> |
310 | inline void setUserContext(const std::shared_ptr<T>& userContext) { |
311 | setUserContext(typeid(T), userContext); |
312 | } |
313 | template <typename T> |
314 | inline std::shared_ptr<T> getUserContext() { |
315 | return std::dynamic_pointer_cast<T>(getUserContext(typeId: typeid(T))); |
316 | } |
317 | void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext); |
318 | std::shared_ptr<UserContext> getUserContext(std::type_index typeId); |
319 | |
320 | struct Impl; |
321 | inline Impl* getImpl() const { return (Impl*)p; } |
322 | inline bool empty() const { return !p; } |
323 | // TODO OpenCV 5.0 |
324 | //protected: |
325 | Impl* p; |
326 | }; |
327 | |
328 | /** @deprecated */ |
329 | class CV_EXPORTS Platform |
330 | { |
331 | public: |
332 | Platform() CV_NOEXCEPT; |
333 | ~Platform(); |
334 | Platform(const Platform& p); |
335 | Platform& operator = (const Platform& p); |
336 | Platform(Platform&& p) CV_NOEXCEPT; |
337 | Platform& operator = (Platform&& p) CV_NOEXCEPT; |
338 | |
339 | void* ptr() const; |
340 | |
341 | /** @deprecated */ |
342 | static Platform& getDefault(); |
343 | |
344 | struct Impl; |
345 | inline Impl* getImpl() const { return (Impl*)p; } |
346 | inline bool empty() const { return !p; } |
347 | protected: |
348 | Impl* p; |
349 | }; |
350 | |
351 | /** @brief Attaches OpenCL context to OpenCV |
352 | @note |
353 | OpenCV will check if available OpenCL platform has platformName name, then assign context to |
354 | OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and |
355 | new command queue will be created. |
356 | @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime |
357 | @param platformID ID of platform attached context was created for |
358 | @param context OpenCL context to be attached to OpenCV |
359 | @param deviceID ID of device, must be created from attached context |
360 | */ |
361 | CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); |
362 | |
363 | /** @brief Convert OpenCL buffer to UMat |
364 | @note |
365 | OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory |
366 | content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and |
367 | `clRetainMemObject` is called. |
368 | @param cl_mem_buffer source clBuffer handle |
369 | @param step num of bytes in single row |
370 | @param rows number of rows |
371 | @param cols number of cols |
372 | @param type OpenCV type of image |
373 | @param dst destination UMat |
374 | */ |
375 | CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); |
376 | |
377 | /** @brief Convert OpenCL image2d_t to UMat |
378 | @note |
379 | OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content |
380 | is copied from image to UMat with `clEnqueueCopyImageToBuffer` function. |
381 | @param cl_mem_image source image2d_t handle |
382 | @param dst destination UMat |
383 | */ |
384 | CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); |
385 | |
386 | // TODO Move to internal header |
387 | /// @deprecated |
388 | void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); |
389 | |
390 | class CV_EXPORTS Queue |
391 | { |
392 | public: |
393 | Queue() CV_NOEXCEPT; |
394 | explicit Queue(const Context& c, const Device& d=Device()); |
395 | ~Queue(); |
396 | Queue(const Queue& q); |
397 | Queue& operator = (const Queue& q); |
398 | Queue(Queue&& q) CV_NOEXCEPT; |
399 | Queue& operator = (Queue&& q) CV_NOEXCEPT; |
400 | |
401 | bool create(const Context& c=Context(), const Device& d=Device()); |
402 | void finish(); |
403 | void* ptr() const; |
404 | static Queue& getDefault(); |
405 | |
406 | /// @brief Returns OpenCL command queue with enable profiling mode support |
407 | const Queue& getProfilingQueue() const; |
408 | |
409 | struct Impl; friend struct Impl; |
410 | inline Impl* getImpl() const { return p; } |
411 | inline bool empty() const { return !p; } |
412 | protected: |
413 | Impl* p; |
414 | }; |
415 | |
416 | |
417 | class CV_EXPORTS KernelArg |
418 | { |
419 | public: |
420 | enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 }; |
421 | KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); |
422 | KernelArg() CV_NOEXCEPT; |
423 | |
424 | static KernelArg Local(size_t localMemSize) |
425 | { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } |
426 | static KernelArg PtrWriteOnly(const UMat& m) |
427 | { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } |
428 | static KernelArg PtrReadOnly(const UMat& m) |
429 | { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); } |
430 | static KernelArg PtrReadWrite(const UMat& m) |
431 | { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); } |
432 | static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1) |
433 | { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); } |
434 | static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1) |
435 | { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
436 | static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1) |
437 | { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); } |
438 | static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1) |
439 | { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); } |
440 | static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) |
441 | { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
442 | static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) |
443 | { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } |
444 | static KernelArg Constant(const Mat& m); |
445 | template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n) |
446 | { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); } |
447 | |
448 | int flags; |
449 | UMat* m; |
450 | const void* obj; |
451 | size_t sz; |
452 | int wscale, iwscale; |
453 | }; |
454 | |
455 | |
456 | class CV_EXPORTS Kernel |
457 | { |
458 | public: |
459 | Kernel() CV_NOEXCEPT; |
460 | Kernel(const char* kname, const Program& prog); |
461 | Kernel(const char* kname, const ProgramSource& prog, |
462 | const String& buildopts = String(), String* errmsg=0); |
463 | ~Kernel(); |
464 | Kernel(const Kernel& k); |
465 | Kernel& operator = (const Kernel& k); |
466 | Kernel(Kernel&& k) CV_NOEXCEPT; |
467 | Kernel& operator = (Kernel&& k) CV_NOEXCEPT; |
468 | |
469 | bool empty() const; |
470 | bool create(const char* kname, const Program& prog); |
471 | bool create(const char* kname, const ProgramSource& prog, |
472 | const String& buildopts, String* errmsg=0); |
473 | |
474 | int set(int i, const void* value, size_t sz); |
475 | int set(int i, const Image2D& image2D); |
476 | int set(int i, const UMat& m); |
477 | int set(int i, const KernelArg& arg); |
478 | template<typename _Tp> int set(int i, const _Tp& value) |
479 | { return set(i, &value, sizeof(value)); } |
480 | |
481 | |
482 | protected: |
483 | template<typename _Tp0> inline |
484 | int set_args_(int i, const _Tp0& a0) { return set(i, a0); } |
485 | template<typename _Tp0, typename... _Tps> inline |
486 | int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); } |
487 | public: |
488 | /** @brief Setup OpenCL Kernel arguments. |
489 | Avoid direct using of set(i, ...) methods. |
490 | @code |
491 | bool ok = kernel |
492 | .args( |
493 | srcUMat, dstUMat, |
494 | (float)some_float_param |
495 | ).run(ndims, globalSize, localSize); |
496 | if (!ok) return false; |
497 | @endcode |
498 | */ |
499 | template<typename... _Tps> inline |
500 | Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; } |
501 | |
502 | /** @brief Run the OpenCL kernel (globalsize value may be adjusted) |
503 | |
504 | @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. |
505 | @param globalsize work items for each dimension. It is not the final globalsize passed to |
506 | OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding |
507 | value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The |
508 | adjusted values are greater than or equal to the original values. |
509 | @param localsize work-group size for each dimension. |
510 | @param sync specify whether to wait for OpenCL computation to finish before return. |
511 | @param q command queue |
512 | |
513 | @note Use run_() if your kernel code doesn't support adjusted globalsize. |
514 | */ |
515 | bool run(int dims, size_t globalsize[], |
516 | size_t localsize[], bool sync, const Queue& q=Queue()); |
517 | |
518 | /** @brief Run the OpenCL kernel |
519 | * |
520 | * @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. |
521 | * @param globalsize work items for each dimension. This value is passed to OpenCL without changes. |
522 | * @param localsize work-group size for each dimension. |
523 | * @param sync specify whether to wait for OpenCL computation to finish before return. |
524 | * @param q command queue |
525 | */ |
526 | bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue()); |
527 | |
528 | bool runTask(bool sync, const Queue& q=Queue()); |
529 | |
530 | /** @brief Similar to synchronized run_() call with returning of kernel execution time |
531 | * |
532 | * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE) |
533 | * @return Execution time in nanoseconds or negative number on error |
534 | */ |
535 | int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue()); |
536 | |
537 | size_t workGroupSize() const; |
538 | size_t preferedWorkGroupSizeMultiple() const; |
539 | bool compileWorkGroupSize(size_t wsz[]) const; |
540 | size_t localMemSize() const; |
541 | |
542 | void* ptr() const; |
543 | struct Impl; |
544 | |
545 | protected: |
546 | Impl* p; |
547 | }; |
548 | |
549 | class CV_EXPORTS Program |
550 | { |
551 | public: |
552 | Program() CV_NOEXCEPT; |
553 | Program(const ProgramSource& src, |
554 | const String& buildflags, String& errmsg); |
555 | Program(const Program& prog); |
556 | Program& operator = (const Program& prog); |
557 | Program(Program&& prog) CV_NOEXCEPT; |
558 | Program& operator = (Program&& prog) CV_NOEXCEPT; |
559 | ~Program(); |
560 | |
561 | bool create(const ProgramSource& src, |
562 | const String& buildflags, String& errmsg); |
563 | |
564 | void* ptr() const; |
565 | |
566 | /** |
567 | * @brief Query device-specific program binary. |
568 | * |
569 | * Returns RAW OpenCL executable binary without additional attachments. |
570 | * |
571 | * @sa ProgramSource::fromBinary |
572 | * |
573 | * @param[out] binary output buffer |
574 | */ |
575 | void getBinary(std::vector<char>& binary) const; |
576 | |
577 | struct Impl; friend struct Impl; |
578 | inline Impl* getImpl() const { return (Impl*)p; } |
579 | inline bool empty() const { return !p; } |
580 | protected: |
581 | Impl* p; |
582 | public: |
583 | #ifndef OPENCV_REMOVE_DEPRECATED_API |
584 | // TODO Remove this |
585 | CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead |
586 | CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary) |
587 | CV_DEPRECATED const ProgramSource& source() const; // implementation removed |
588 | CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced |
589 | CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced |
590 | #endif |
591 | }; |
592 | |
593 | |
594 | class CV_EXPORTS ProgramSource |
595 | { |
596 | public: |
597 | typedef uint64 hash_t; // deprecated |
598 | |
599 | ProgramSource() CV_NOEXCEPT; |
600 | explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); |
601 | explicit ProgramSource(const String& prog); // deprecated |
602 | explicit ProgramSource(const char* prog); // deprecated |
603 | ~ProgramSource(); |
604 | ProgramSource(const ProgramSource& prog); |
605 | ProgramSource& operator = (const ProgramSource& prog); |
606 | ProgramSource(ProgramSource&& prog) CV_NOEXCEPT; |
607 | ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT; |
608 | |
609 | const String& source() const; // deprecated |
610 | hash_t hash() const; // deprecated |
611 | |
612 | |
613 | /** @brief Describe OpenCL program binary. |
614 | * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). |
615 | * |
616 | * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). |
617 | * |
618 | * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version. |
619 | * |
620 | * @param module name of program owner module |
621 | * @param name unique name of program (module+name is used as key for OpenCL program caching) |
622 | * @param binary buffer address. See buffer lifetime requirement in description. |
623 | * @param size buffer size |
624 | * @param buildOptions additional program-related build options passed to clBuildProgram() |
625 | * @return created ProgramSource object |
626 | */ |
627 | static ProgramSource fromBinary(const String& module, const String& name, |
628 | const unsigned char* binary, const size_t size, |
629 | const cv::String& buildOptions = cv::String()); |
630 | |
631 | /** @brief Describe OpenCL program in SPIR format. |
632 | * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). |
633 | * |
634 | * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior) |
635 | * |
636 | * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). |
637 | * |
638 | * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension: |
639 | * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html |
640 | * (but they are not portable between different platforms: 32-bit / 64-bit) |
641 | * |
642 | * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'. |
643 | * |
644 | * @param module name of program owner module |
645 | * @param name unique name of program (module+name is used as key for OpenCL program caching) |
646 | * @param binary buffer address. See buffer lifetime requirement in description. |
647 | * @param size buffer size |
648 | * @param buildOptions additional program-related build options passed to clBuildProgram() |
649 | * (these options are added automatically: '-x spir' and '-spir-std=1.2') |
650 | * @return created ProgramSource object. |
651 | */ |
652 | static ProgramSource fromSPIR(const String& module, const String& name, |
653 | const unsigned char* binary, const size_t size, |
654 | const cv::String& buildOptions = cv::String()); |
655 | |
656 | //OpenCL 2.1+ only |
657 | //static Program fromSPIRV(const String& module, const String& name, |
658 | // const unsigned char* binary, const size_t size, |
659 | // const cv::String& buildOptions = cv::String()); |
660 | |
661 | struct Impl; friend struct Impl; |
662 | inline Impl* getImpl() const { return (Impl*)p; } |
663 | inline bool empty() const { return !p; } |
664 | protected: |
665 | Impl* p; |
666 | }; |
667 | |
668 | class CV_EXPORTS PlatformInfo |
669 | { |
670 | public: |
671 | PlatformInfo() CV_NOEXCEPT; |
672 | /** |
673 | * @param id pointer cl_platform_id (cl_platform_id*) |
674 | */ |
675 | explicit PlatformInfo(void* id); |
676 | ~PlatformInfo(); |
677 | |
678 | PlatformInfo(const PlatformInfo& i); |
679 | PlatformInfo& operator =(const PlatformInfo& i); |
680 | PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT; |
681 | PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT; |
682 | |
683 | String name() const; |
684 | String vendor() const; |
685 | |
686 | /// See CL_PLATFORM_VERSION |
687 | String version() const; |
688 | int versionMajor() const; |
689 | int versionMinor() const; |
690 | |
691 | int deviceNumber() const; |
692 | void getDevice(Device& device, int d) const; |
693 | |
694 | struct Impl; |
695 | bool empty() const { return !p; } |
696 | protected: |
697 | Impl* p; |
698 | }; |
699 | |
700 | CV_EXPORTS CV_DEPRECATED const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); |
701 | CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf, size_t buf_size); |
702 | CV_EXPORTS const char* typeToStr(int t); |
703 | CV_EXPORTS const char* memopTypeToStr(int t); |
704 | CV_EXPORTS const char* vecopTypeToStr(int t); |
705 | CV_EXPORTS const char* getOpenCLErrorString(int errorCode); |
706 | CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); |
707 | CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info); |
708 | |
709 | |
710 | enum OclVectorStrategy |
711 | { |
712 | // all matrices have its own vector width |
713 | OCL_VECTOR_OWN = 0, |
714 | // all matrices have maximal vector width among all matrices |
715 | // (useful for cases when matrices have different data types) |
716 | OCL_VECTOR_MAX = 1, |
717 | |
718 | // default strategy |
719 | OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN |
720 | }; |
721 | |
722 | CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
723 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
724 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), |
725 | OclVectorStrategy strat = OCL_VECTOR_DEFAULT); |
726 | |
727 | CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, |
728 | InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
729 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
730 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), |
731 | OclVectorStrategy strat = OCL_VECTOR_DEFAULT); |
732 | |
733 | // with OCL_VECTOR_MAX strategy |
734 | CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), |
735 | InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), |
736 | InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); |
737 | |
738 | CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); |
739 | |
740 | class CV_EXPORTS Image2D |
741 | { |
742 | public: |
743 | Image2D() CV_NOEXCEPT; |
744 | |
745 | /** |
746 | @param src UMat object from which to get image properties and data |
747 | @param norm flag to enable the use of normalized channel data types |
748 | @param alias flag indicating that the image should alias the src UMat. If true, changes to the |
749 | image or src will be reflected in both objects. |
750 | */ |
751 | explicit Image2D(const UMat &src, bool norm = false, bool alias = false); |
752 | Image2D(const Image2D & i); |
753 | ~Image2D(); |
754 | |
755 | Image2D & operator = (const Image2D & i); |
756 | Image2D(Image2D &&) CV_NOEXCEPT; |
757 | Image2D &operator=(Image2D &&) CV_NOEXCEPT; |
758 | |
759 | /** Indicates if creating an aliased image should succeed. |
760 | Depends on the underlying platform and the dimensions of the UMat. |
761 | */ |
762 | static bool canCreateAlias(const UMat &u); |
763 | |
764 | /** Indicates if the image format is supported. |
765 | */ |
766 | static bool isFormatSupported(int depth, int cn, bool norm); |
767 | |
768 | void* ptr() const; |
769 | protected: |
770 | struct Impl; |
771 | Impl* p; |
772 | }; |
773 | |
774 | class CV_EXPORTS Timer |
775 | { |
776 | public: |
777 | Timer(const Queue& q); |
778 | ~Timer(); |
779 | void start(); |
780 | void stop(); |
781 | |
782 | uint64 durationNS() const; ///< duration in nanoseconds |
783 | |
784 | protected: |
785 | struct Impl; |
786 | Impl* const p; |
787 | |
788 | private: |
789 | Timer(const Timer&); // disabled |
790 | Timer& operator=(const Timer&); // disabled |
791 | }; |
792 | |
793 | CV_EXPORTS MatAllocator* getOpenCLAllocator(); |
794 | |
795 | |
796 | class CV_EXPORTS_W OpenCLExecutionContext |
797 | { |
798 | public: |
799 | OpenCLExecutionContext() = default; |
800 | ~OpenCLExecutionContext() = default; |
801 | |
802 | OpenCLExecutionContext(const OpenCLExecutionContext&) = default; |
803 | OpenCLExecutionContext(OpenCLExecutionContext&&) = default; |
804 | |
805 | OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default; |
806 | OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default; |
807 | |
808 | /** Get associated ocl::Context */ |
809 | Context& getContext() const; |
810 | /** Get the single default associated ocl::Device */ |
811 | Device& getDevice() const; |
812 | /** Get the single ocl::Queue that is associated with the ocl::Context and |
813 | * the single default ocl::Device |
814 | */ |
815 | Queue& getQueue() const; |
816 | |
817 | bool useOpenCL() const; |
818 | void setUseOpenCL(bool flag); |
819 | |
820 | /** Get OpenCL execution context of current thread. |
821 | * |
822 | * Initialize OpenCL execution context if it is empty |
823 | * - create new |
824 | * - reuse context of the main thread (threadID = 0) |
825 | */ |
826 | static OpenCLExecutionContext& getCurrent(); |
827 | |
828 | /** Get OpenCL execution context of current thread (can be empty) */ |
829 | static OpenCLExecutionContext& getCurrentRef(); |
830 | |
831 | /** Bind this OpenCL execution context to current thread. |
832 | * |
833 | * Context can't be empty. |
834 | * |
835 | * @note clFinish is not called for queue of previous execution context |
836 | */ |
837 | void bind() const; |
838 | |
839 | /** Creates new execution context with same OpenCV context and device |
840 | * |
841 | * @param q OpenCL queue |
842 | */ |
843 | OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const; |
844 | /** @overload */ |
845 | OpenCLExecutionContext cloneWithNewQueue() const; |
846 | |
847 | /** @brief Creates OpenCL execution context |
848 | * OpenCV will check if available OpenCL platform has platformName name, |
849 | * then assign context to OpenCV. |
850 | * The deviceID device will be used as target device and a new command queue will be created. |
851 | * |
852 | * @note On success, ownership of one reference of the context and device is taken. |
853 | * The caller should additionally call `clRetainContext` and/or `clRetainDevice` |
854 | * to increase the reference count if it wishes to continue using them. |
855 | * |
856 | * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime |
857 | * @param platformID ID of platform attached context was created for (cl_platform_id) |
858 | * @param context OpenCL context to be attached to OpenCV (cl_context) |
859 | * @param deviceID OpenCL device (cl_device_id) |
860 | */ |
861 | static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID); |
862 | |
863 | /** @brief Creates OpenCL execution context |
864 | * |
865 | * @param context non-empty OpenCL context |
866 | * @param device non-empty OpenCL device (must be a part of context) |
867 | * @param queue non-empty OpenCL queue for provided context and device |
868 | */ |
869 | static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue); |
870 | /** @overload */ |
871 | static OpenCLExecutionContext create(const Context& context, const Device& device); |
872 | |
873 | struct Impl; |
874 | inline bool empty() const { return !p; } |
875 | void release(); |
876 | protected: |
877 | std::shared_ptr<Impl> p; |
878 | }; |
879 | |
880 | class OpenCLExecutionContextScope |
881 | { |
882 | OpenCLExecutionContext ctx_; |
883 | public: |
884 | inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx) |
885 | { |
886 | CV_Assert(!ctx.empty()); |
887 | ctx_ = OpenCLExecutionContext::getCurrentRef(); |
888 | ctx.bind(); |
889 | } |
890 | |
891 | inline ~OpenCLExecutionContextScope() |
892 | { |
893 | if (!ctx_.empty()) |
894 | { |
895 | ctx_.bind(); |
896 | } |
897 | } |
898 | }; |
899 | |
900 | #ifdef __OPENCV_BUILD |
901 | namespace internal { |
902 | |
903 | CV_EXPORTS bool isOpenCLForced(); |
904 | #define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition)) |
905 | |
906 | CV_EXPORTS bool isPerformanceCheckBypassed(); |
907 | #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) |
908 | |
909 | CV_EXPORTS bool isCLBuffer(UMat& u); |
910 | |
911 | } // namespace internal |
912 | #endif |
913 | |
914 | //! @} |
915 | |
916 | }} |
917 | |
918 | #endif |
919 |
Definitions
- Device
- isAMD
- isIntel
- isNVidia
- getImpl
- empty
- Context
- UserContext
- setUserContext
- getUserContext
- getImpl
- empty
- Platform
- getImpl
- empty
- Queue
- getImpl
- empty
- KernelArg
- Local
- PtrWriteOnly
- PtrReadOnly
- PtrReadWrite
- ReadWrite
- ReadWriteNoSize
- ReadOnly
- WriteOnly
- ReadOnlyNoSize
- WriteOnlyNoSize
- Constant
- Kernel
- set
- set_args_
- set_args_
- args
- Program
- getImpl
- empty
- ProgramSource
- getImpl
- empty
- PlatformInfo
- empty
- OclVectorStrategy
- Image2D
- Timer
- OpenCLExecutionContext
- OpenCLExecutionContext
- ~OpenCLExecutionContext
- OpenCLExecutionContext
- OpenCLExecutionContext
- operator=
- operator=
- empty
- OpenCLExecutionContextScope
- OpenCLExecutionContextScope
Improve your Profiling and Debugging skills
Find out more