1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19// * Redistribution's of source code must retain the above copyright notice,
20// this list of conditions and the following disclaimer.
21//
22// * Redistribution's in binary form must reproduce the above copyright notice,
23// this list of conditions and the following disclaimer in the documentation
24// and/or other materials provided with the distribution.
25//
26// * The name of the copyright holders may not be used to endorse or promote products
27// derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the OpenCV Foundation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "precomp.hpp"
43
44#ifndef HAVE_OPENCL
45#include "ocl_disabled.impl.hpp"
46#else // HAVE_OPENCL
47
48#include <list>
49#include <map>
50#include <deque>
51#include <set>
52#include <string>
53#include <sstream>
54#include <fstream>
55#if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
56#include <inttypes.h>
57#endif
58
59#include <opencv2/core/utils/configuration.private.hpp>
60
61#include <opencv2/core/utils/logger.defines.hpp>
62#undef CV_LOG_STRIP_LEVEL
63#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
64#include <opencv2/core/utils/logger.hpp>
65
66#include "opencv2/core/ocl_genbase.hpp"
67#include "opencl_kernels_core.hpp"
68
69#include "opencv2/core/utils/lock.private.hpp"
70#include "opencv2/core/utils/filesystem.hpp"
71#include "opencv2/core/utils/filesystem.private.hpp"
72
73#define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
74#include "opencv2/core/utils/allocator_stats.impl.hpp"
75#undef CV__ALLOCATOR_STATS_LOG
76
77#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
78#define CV_OPENCL_SHOW_BUILD_OPTIONS 0
79#define CV_OPENCL_SHOW_BUILD_KERNELS 0
80
81#define CV_OPENCL_SHOW_RUN_KERNELS 0
82#define CV_OPENCL_SYNC_RUN_KERNELS 0
83#define CV_OPENCL_TRACE_CHECK 0
84
85#define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
86
87#define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
88#define CV_OPENCL_SHOW_SVM_LOG 0
89
90#include "opencv2/core/bufferpool.hpp"
91#ifndef LOG_BUFFER_POOL
92# if 0
93# define LOG_BUFFER_POOL printf
94# else
95# define LOG_BUFFER_POOL(...)
96# endif
97#endif
98
99#if CV_OPENCL_SHOW_SVM_LOG
100// TODO add timestamp logging
101#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
102#else
103#define CV_OPENCL_SVM_TRACE_P(...)
104#endif
105
106#if CV_OPENCL_SHOW_SVM_ERROR_LOG
107// TODO add timestamp logging
108#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
109#else
110#define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111#endif
112
113#include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
114#include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
115
116#include "opencv2/core/opencl/runtime/opencl_core.hpp"
117
118#ifdef HAVE_OPENCL_SVM
119#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
120#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
121#include "opencv2/core/opencl/opencl_svm.hpp"
122#endif
123
124#include "umatrix.hpp"
125
126namespace cv { namespace ocl {
127
128#define IMPLEMENT_REFCOUNTABLE() \
129 void addref() { CV_XADD(&refcount, 1); } \
130 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 int refcount
132
133static cv::utils::AllocatorStatistics opencl_allocator_stats;
134
135CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
136cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
137{
138 return opencl_allocator_stats;
139}
140
141#ifndef _DEBUG
142static bool isRaiseError()
143{
144 static bool initialized = false;
145 static bool value = false;
146 if (!initialized)
147 {
148 value = cv::utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_RAISE_ERROR", defaultValue: false);
149 initialized = true;
150 }
151 return value;
152}
153#endif
154
155static void onOpenCLKernelBuildError()
156{
157 // NB: no need to cache this value
158 bool value = cv::utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_ABORT_ON_BUILD_ERROR", defaultValue: false);
159 if (value)
160 {
161 fprintf(stderr, format: "Abort on OpenCL kernel build failure!\n");
162 abort();
163 }
164}
165
166#if CV_OPENCL_TRACE_CHECK
167static inline
168void traceOpenCLCheck(cl_int status, const char* message)
169{
170 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
171}
172#define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
173#else
174#define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
175#endif
176
177#define CV_OCL_API_ERROR_MSG(check_result, msg) \
178 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
179
180#define CV_OCL_CHECK_RESULT(check_result, msg) \
181 do { \
182 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
183 if (check_result != CL_SUCCESS) \
184 { \
185 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
186 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
187 CV_Error(Error::OpenCLApiCallError, error_msg); \
188 } \
189 } while (0)
190
191#define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
192
193#define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
194
195#ifdef _DEBUG
196#define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
197#define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
198#define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
199#else
200#define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
201 do { \
202 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
203 if (check_result != CL_SUCCESS && isRaiseError()) \
204 { \
205 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
206 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
207 CV_Error(Error::OpenCLApiCallError, error_msg); \
208 } \
209 } while (0)
210#define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
211#define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
212#endif
213
214
215static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_CACHE_ENABLE", defaultValue: true);
216static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_CACHE_WRITE", defaultValue: true);
217static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_CACHE_LOCK_ENABLE", defaultValue: true);
218static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_CACHE_CLEANUP", defaultValue: true);
219
220#if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
221static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", defaultValue: false);
222#endif
223
224// Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
225static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
226#ifdef __APPLE__
227 true
228#else
229 defaultValue: false
230#endif
231);
232
233static String getBuildExtraOptions()
234{
235 static String param_buildExtraOptions;
236 static bool initialized = false;
237 if (!initialized)
238 {
239 param_buildExtraOptions = utils::getConfigurationParameterString(name: "OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", defaultValue: "");
240 initialized = true;
241 if (!param_buildExtraOptions.empty())
242 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
243 }
244 return param_buildExtraOptions;
245}
246
247static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", defaultValue: true);
248static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT(name: "OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", defaultValue: 4);
249
250
251struct UMat2D
252{
253 UMat2D(const UMat& m)
254 {
255 offset = (int)m.offset;
256 step = (int)m.step;
257 rows = m.rows;
258 cols = m.cols;
259 }
260 int offset;
261 int step;
262 int rows;
263 int cols;
264};
265
266struct UMat3D
267{
268 UMat3D(const UMat& m)
269 {
270 offset = (int)m.offset;
271 step = (int)m.step.p[1];
272 slicestep = (int)m.step.p[0];
273 slices = (int)m.size.p[0];
274 rows = m.size.p[1];
275 cols = m.size.p[2];
276 }
277 int offset;
278 int slicestep;
279 int step;
280 int slices;
281 int rows;
282 int cols;
283};
284
285// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
286static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
287{
288 static uint64 table[256];
289 static bool initialized = false;
290
291 if( !initialized )
292 {
293 for( int i = 0; i < 256; i++ )
294 {
295 uint64 c = i;
296 for( int j = 0; j < 8; j++ )
297 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
298 table[i] = c;
299 }
300 initialized = true;
301 }
302
303 uint64 crc = ~crc0;
304 for( size_t idx = 0; idx < size; idx++ )
305 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
306
307 return ~crc;
308}
309
310#if OPENCV_HAVE_FILESYSTEM_SUPPORT
311struct OpenCLBinaryCacheConfigurator
312{
313 cv::String cache_path_;
314 cv::String cache_lock_filename_;
315 cv::Ptr<utils::fs::FileLock> cache_lock_;
316
317 typedef std::map<std::string, std::string> ContextCacheType;
318 ContextCacheType prepared_contexts_;
319 Mutex mutex_prepared_contexts_;
320
321 OpenCLBinaryCacheConfigurator()
322 {
323 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
324 if (!CV_OPENCL_CACHE_ENABLE)
325 {
326 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
327 return;
328 }
329 cache_path_ = utils::fs::getCacheDirectory(sub_directory_name: "opencl_cache", configuration_name: "OPENCV_OPENCL_CACHE_DIR");
330 if (cache_path_.empty())
331 {
332 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
333 }
334 do
335 {
336 try
337 {
338 if (cache_path_.empty())
339 break;
340 if (cache_path_ == "disabled")
341 break;
342 if (!utils::fs::createDirectories(path: cache_path_))
343 {
344 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
345 clear();
346 break;
347 }
348
349 if (CV_OPENCL_CACHE_LOCK_ENABLE)
350 {
351 cache_lock_filename_ = cache_path_ + ".lock";
352 if (!utils::fs::exists(path: cache_lock_filename_))
353 {
354 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
355 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
356 if (!lock_filename.is_open())
357 {
358 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
359 break;
360 }
361 }
362
363 try
364 {
365 cache_lock_ = makePtr<utils::fs::FileLock>(a1: cache_lock_filename_.c_str());
366 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
367 {
368 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
369 }
370 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
371 }
372 catch (const cv::Exception& e)
373 {
374 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
375 }
376 catch (...)
377 {
378 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
379 }
380 }
381 else
382 {
383 if (CV_OPENCL_CACHE_WRITE)
384 {
385 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
386 "(not safe for multiprocess environment)");
387 }
388 else
389 {
390 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
391 }
392 }
393 }
394 catch (const cv::Exception& e)
395 {
396 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
397 clear();
398 }
399 } while (0);
400 if (!cache_path_.empty())
401 {
402 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
403 {
404 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
405 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
406 }
407 else
408 {
409 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
410 }
411 }
412 }
413
414 void clear()
415 {
416 cache_path_.clear();
417 cache_lock_filename_.clear();
418 cache_lock_.release();
419 }
420
421 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
422 const std::string& cleanup_prefix)
423 {
424 if (cache_path_.empty())
425 return std::string();
426
427 AutoLock lock(mutex_prepared_contexts_);
428
429 ContextCacheType::iterator found_it = prepared_contexts_.find(x: ctx_prefix);
430 if (found_it != prepared_contexts_.end())
431 return found_it->second;
432
433 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
434
435 std::string target_directory = cache_path_ + ctx_prefix + "/";
436 bool result = utils::fs::isDirectory(path: target_directory);
437 if (!result)
438 {
439 try
440 {
441 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
442 if (utils::fs::createDirectories(path: target_directory))
443 {
444 result = true;
445 }
446 else
447 {
448 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
449 }
450 }
451 catch (const cv::Exception& e)
452 {
453 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
454 }
455 }
456 target_directory = result ? target_directory : std::string();
457 prepared_contexts_.insert(x: std::pair<std::string, std::string>(ctx_prefix, target_directory));
458
459 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
460 {
461 try
462 {
463 std::vector<String> entries;
464 utils::fs::glob_relative(directory: cache_path_, pattern: cleanup_prefix + "*", result&: entries, recursive: false, includeDirectories: true);
465 std::vector<String> remove_entries;
466 for (size_t i = 0; i < entries.size(); i++)
467 {
468 const String& name = entries[i];
469 if (0 == name.find(str: cleanup_prefix))
470 {
471 if (0 == name.find(str: ctx_prefix))
472 continue; // skip current
473 remove_entries.push_back(x: name);
474 }
475 }
476 if (!remove_entries.empty())
477 {
478 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
479 ? "Detected OpenCL cache directory for other version of OpenCL device."
480 : "Detected OpenCL cache directories for other versions of OpenCL device.")
481 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
482 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
483 for (size_t i = 0; i < remove_entries.size(); i++)
484 {
485 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
486 }
487 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
488
489 for (size_t i = 0; i < remove_entries.size(); i++)
490 {
491 const String& name = remove_entries[i];
492 cv::String path = utils::fs::join(base: cache_path_, path: name);
493 try
494 {
495 utils::fs::remove_all(path);
496 CV_LOG_WARNING(NULL, "Removed: " << path);
497 }
498 catch (const cv::Exception& e)
499 {
500 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
501 }
502 }
503 }
504 }
505 catch (...)
506 {
507 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
508 }
509 }
510
511 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
512 return target_directory;
513 }
514
515 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
516 {
517 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
518 }
519};
520class BinaryProgramFile
521{
522 enum { MAX_ENTRIES = 64 };
523
524 typedef unsigned int uint32_t;
525
526 struct CV_DECL_ALIGNED(4) FileHeader
527 {
528 uint32_t sourceSignatureSize;
529 //char sourceSignature[];
530 };
531
532 struct CV_DECL_ALIGNED(4) FileTable
533 {
534 uint32_t numberOfEntries;
535 //uint32_t firstEntryOffset[];
536 };
537
538 struct CV_DECL_ALIGNED(4) FileEntry
539 {
540 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
541 uint32_t keySize;
542 uint32_t dataSize;
543 //char key[];
544 //char data[];
545 };
546
547 const std::string fileName_;
548 const char* const sourceSignature_;
549 const size_t sourceSignatureSize_;
550
551 std::fstream f;
552
553 uint32_t entryOffsets[MAX_ENTRIES];
554
555 uint32_t getHash(const std::string& options)
556 {
557 uint64 hash = crc64(data: (const uchar*)options.c_str(), size: options.size(), crc0: 0);
558 return hash & (MAX_ENTRIES - 1);
559 }
560
561 inline size_t getFileSize()
562 {
563 size_t pos = (size_t)f.tellg();
564 f.seekg(0, std::fstream::end);
565 size_t fileSize = (size_t)f.tellg();
566 f.seekg(pos, std::fstream::beg);
567 return fileSize;
568 }
569 inline uint32_t readUInt32()
570 {
571 uint32_t res = 0;
572 f.read(s: (char*)&res, n: sizeof(uint32_t));
573 CV_Assert(!f.fail());
574 return res;
575 }
576 inline void writeUInt32(const uint32_t value)
577 {
578 uint32_t v = value;
579 f.write(s: (char*)&v, n: sizeof(uint32_t));
580 CV_Assert(!f.fail());
581 }
582
583 inline void seekReadAbsolute(size_t pos)
584 {
585 f.seekg(pos, std::fstream::beg);
586 CV_Assert(!f.fail());
587 }
588 inline void seekReadRelative(size_t pos)
589 {
590 f.seekg(pos, std::fstream::cur);
591 CV_Assert(!f.fail());
592 }
593
594 inline void seekWriteAbsolute(size_t pos)
595 {
596 f.seekp(pos, std::fstream::beg);
597 CV_Assert(!f.fail());
598 }
599
600 void clearFile()
601 {
602 f.close();
603 if (0 != remove(filename: fileName_.c_str()))
604 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
605 return;
606 }
607
608public:
609 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
610 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(s: sourceSignature_) : 0)
611 {
612 CV_StaticAssert(sizeof(uint32_t) == 4, "");
613 CV_Assert(sourceSignature_ != NULL);
614 CV_Assert(sourceSignatureSize_ > 0);
615 memset(s: entryOffsets, c: 0, n: sizeof(entryOffsets));
616
617 f.rdbuf()->pubsetbuf(s: 0, n: 0); // disable buffering
618 f.open(s: fileName_.c_str(), mode: std::ios::in|std::ios::out|std::ios::binary);
619 if(f.is_open() && getFileSize() > 0)
620 {
621 bool isValid = false;
622 try
623 {
624 uint32_t fileSourceSignatureSize = readUInt32();
625 if (fileSourceSignatureSize == sourceSignatureSize_)
626 {
627 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
628 f.read(s: fileSourceSignature.data(), n: fileSourceSignatureSize);
629 if (f.eof())
630 {
631 CV_LOG_ERROR(NULL, "Unexpected EOF");
632 }
633 else if (memcmp(s1: sourceSignature, s2: fileSourceSignature.data(), n: fileSourceSignatureSize) == 0)
634 {
635 isValid = true;
636 }
637 }
638 if (!isValid)
639 {
640 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
641 }
642 }
643 catch (const cv::Exception& e)
644 {
645 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
646 }
647 catch (...)
648 {
649 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
650 }
651 if (!isValid)
652 {
653 clearFile();
654 }
655 else
656 {
657 seekReadAbsolute(pos: 0);
658 }
659 }
660 }
661
662 bool read(const std::string& key, std::vector<char>& buf)
663 {
664 if (!f.is_open())
665 return false;
666
667 size_t fileSize = getFileSize();
668 if (fileSize == 0)
669 {
670 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
671 clearFile();
672 return false;
673 }
674 seekReadAbsolute(pos: 0);
675
676 // bypass FileHeader
677 uint32_t fileSourceSignatureSize = readUInt32();
678 CV_Assert(fileSourceSignatureSize > 0);
679 seekReadRelative(pos: fileSourceSignatureSize);
680
681 uint32_t numberOfEntries = readUInt32();
682 CV_Assert(numberOfEntries > 0);
683 if (numberOfEntries != MAX_ENTRIES)
684 {
685 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
686 clearFile();
687 return false;
688 }
689 f.read(s: (char*)&entryOffsets[0], n: sizeof(entryOffsets));
690 CV_Assert(!f.fail());
691
692 uint32_t entryNum = getHash(options: key);
693
694 uint32_t entryOffset = entryOffsets[entryNum];
695 FileEntry entry;
696 while (entryOffset > 0)
697 {
698 seekReadAbsolute(pos: entryOffset);
699 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
700 f.read(s: (char*)&entry, n: sizeof(entry));
701 CV_Assert(!f.fail());
702 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
703 if (key.size() == entry.keySize)
704 {
705 if (entry.keySize > 0)
706 {
707 f.read(s: fileKey.data(), n: entry.keySize);
708 CV_Assert(!f.fail());
709 }
710 if (memcmp(s1: fileKey.data(), s2: key.c_str(), n: entry.keySize) == 0)
711 {
712 buf.resize(new_size: entry.dataSize);
713 f.read(s: &buf[0], n: entry.dataSize);
714 CV_Assert(!f.fail());
715 seekReadAbsolute(pos: 0);
716 CV_LOG_VERBOSE(NULL, 0, "Read...");
717 return true;
718 }
719 }
720 if (entry.nextEntryFileOffset == 0)
721 break;
722 entryOffset = entry.nextEntryFileOffset;
723 }
724 return false;
725 }
726
727 bool write(const std::string& key, std::vector<char>& buf)
728 {
729 if (!f.is_open())
730 {
731 f.open(s: fileName_.c_str(), mode: std::ios::in|std::ios::out|std::ios::binary);
732 if (!f.is_open())
733 {
734 f.open(s: fileName_.c_str(), mode: std::ios::out|std::ios::binary);
735 if (!f.is_open())
736 {
737 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
738 return false;
739 }
740 }
741 }
742
743 size_t fileSize = getFileSize();
744 if (fileSize == 0)
745 {
746 // Write header
747 seekWriteAbsolute(pos: 0);
748 writeUInt32(value: (uint32_t)sourceSignatureSize_);
749 f.write(s: sourceSignature_, n: sourceSignatureSize_);
750 CV_Assert(!f.fail());
751
752 writeUInt32(value: MAX_ENTRIES);
753 memset(s: entryOffsets, c: 0, n: sizeof(entryOffsets));
754 f.write(s: (char*)entryOffsets, n: sizeof(entryOffsets));
755 CV_Assert(!f.fail());
756 f.flush();
757 CV_Assert(!f.fail());
758 f.close();
759 f.open(s: fileName_.c_str(), mode: std::ios::in|std::ios::out|std::ios::binary);
760 CV_Assert(f.is_open());
761 fileSize = getFileSize();
762 }
763 seekReadAbsolute(pos: 0);
764
765 // bypass FileHeader
766 uint32_t fileSourceSignatureSize = readUInt32();
767 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
768 seekReadRelative(pos: fileSourceSignatureSize);
769
770 uint32_t numberOfEntries = readUInt32();
771 CV_Assert(numberOfEntries > 0);
772 if (numberOfEntries != MAX_ENTRIES)
773 {
774 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
775 clearFile();
776 return false;
777 }
778 size_t tableEntriesOffset = (size_t)f.tellg();
779 f.read(s: (char*)&entryOffsets[0], n: sizeof(entryOffsets));
780 CV_Assert(!f.fail());
781
782 uint32_t entryNum = getHash(options: key);
783
784 uint32_t entryOffset = entryOffsets[entryNum];
785 FileEntry entry;
786 while (entryOffset > 0)
787 {
788 seekReadAbsolute(pos: entryOffset);
789 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
790 f.read(s: (char*)&entry, n: sizeof(entry));
791 CV_Assert(!f.fail());
792 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
793 if (key.size() == entry.keySize)
794 {
795 if (entry.keySize > 0)
796 {
797 f.read(s: fileKey.data(), n: entry.keySize);
798 CV_Assert(!f.fail());
799 }
800 if (0 == memcmp(s1: fileKey.data(), s2: key.c_str(), n: entry.keySize))
801 {
802 // duplicate
803 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
804 return false;
805 }
806 }
807 if (entry.nextEntryFileOffset == 0)
808 break;
809 entryOffset = entry.nextEntryFileOffset;
810 }
811 seekReadAbsolute(pos: 0);
812 if (entryOffset > 0)
813 {
814 seekWriteAbsolute(pos: entryOffset);
815 entry.nextEntryFileOffset = (uint32_t)fileSize;
816 f.write(s: (char*)&entry, n: sizeof(entry));
817 CV_Assert(!f.fail());
818 }
819 else
820 {
821 entryOffsets[entryNum] = (uint32_t)fileSize;
822 seekWriteAbsolute(pos: tableEntriesOffset);
823 f.write(s: (char*)entryOffsets, n: sizeof(entryOffsets));
824 CV_Assert(!f.fail());
825 }
826 seekWriteAbsolute(pos: fileSize);
827 entry.nextEntryFileOffset = 0;
828 entry.dataSize = (uint32_t)buf.size();
829 entry.keySize = (uint32_t)key.size();
830 f.write(s: (char*)&entry, n: sizeof(entry));
831 CV_Assert(!f.fail());
832 f.write(s: key.c_str(), n: entry.keySize);
833 CV_Assert(!f.fail());
834 f.write(s: &buf[0], n: entry.dataSize);
835 CV_Assert(!f.fail());
836 f.flush();
837 CV_Assert(!f.fail());
838 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
839 return true;
840 }
841};
842#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
843
844
845
846struct OpenCLExecutionContext::Impl
847{
848 ocl::Context context_;
849 int device_; // device index in context
850 ocl::Queue queue_;
851 int useOpenCL_;
852
853protected:
854 Impl() = delete;
855
856 void _init_device(cl_device_id deviceID)
857 {
858 CV_Assert(deviceID);
859 int ndevices = (int)context_.ndevices();
860 CV_Assert(ndevices > 0);
861 bool found = false;
862 for (int i = 0; i < ndevices; i++)
863 {
864 ocl::Device d = context_.device(idx: i);
865 cl_device_id dhandle = (cl_device_id)d.ptr();
866 if (dhandle == deviceID)
867 {
868 device_ = i;
869 found = true;
870 break;
871 }
872 }
873 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
874 }
875
876 void _init_device(const ocl::Device& device)
877 {
878 CV_Assert(device.ptr());
879 int ndevices = (int)context_.ndevices();
880 CV_Assert(ndevices > 0);
881 bool found = false;
882 for (int i = 0; i < ndevices; i++)
883 {
884 ocl::Device d = context_.device(idx: i);
885 if (d.getImpl() == device.getImpl())
886 {
887 device_ = i;
888 found = true;
889 break;
890 }
891 }
892 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
893 }
894
895public:
896 Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
897 : device_(0), useOpenCL_(-1)
898 {
899 CV_UNUSED(platformID);
900 CV_Assert(context);
901 CV_Assert(deviceID);
902
903 context_ = Context::fromHandle(context);
904 _init_device(deviceID);
905 queue_ = Queue(context_, context_.device(idx: device_));
906 }
907
908 Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
909 : device_(0), useOpenCL_(-1)
910 {
911 CV_Assert(context.ptr());
912 CV_Assert(device.ptr());
913
914 context_ = context;
915 _init_device(device);
916 queue_ = queue;
917 }
918
919 Impl(const ocl::Context& context, const ocl::Device& device)
920 : device_(0), useOpenCL_(-1)
921 {
922 CV_Assert(context.ptr());
923 CV_Assert(device.ptr());
924
925 context_ = context;
926 _init_device(device);
927 queue_ = Queue(context_, context_.device(idx: device_));
928 }
929
930 Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
931 : context_(context)
932 , device_(device)
933 , queue_(queue)
934 , useOpenCL_(-1)
935 {
936 // nothing
937 }
938 Impl(const Impl& other)
939 : context_(other.context_)
940 , device_(other.device_)
941 , queue_(other.queue_)
942 , useOpenCL_(-1)
943 {
944 // nothing
945 }
946
947 inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
948 bool useOpenCL()
949 {
950 if (useOpenCL_ < 0)
951 {
952 try
953 {
954 useOpenCL_ = 0;
955 if (!context_.empty() && context_.ndevices() > 0)
956 {
957 const Device& d = context_.device(idx: device_);
958 useOpenCL_ = d.available();
959 }
960 }
961 catch (const cv::Exception&)
962 {
963 // nothing
964 }
965 if (!useOpenCL_)
966 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
967 }
968 return useOpenCL_ > 0;
969 }
970
971 void setUseOpenCL(bool flag)
972 {
973 if (!flag)
974 useOpenCL_ = 0;
975 else
976 useOpenCL_ = -1;
977 }
978
979 static const std::shared_ptr<Impl>& getInitializedExecutionContext()
980 {
981 CV_TRACE_FUNCTION();
982
983 CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
984
985 static bool initialized = false;
986 static std::shared_ptr<Impl> g_primaryExecutionContext;
987
988 if (!initialized)
989 {
990 cv::AutoLock lock(getInitializationMutex());
991 if (!initialized)
992 {
993 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
994 try
995 {
996 Context c = ocl::Context::create(configuration: std::string());
997 if (c.ndevices())
998 {
999 int deviceId = 0;
1000 auto& d = c.device(idx: deviceId);
1001 if (d.available())
1002 {
1003 auto q = ocl::Queue(c, d);
1004 if (!q.ptr())
1005 {
1006 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
1007 }
1008 else
1009 {
1010 g_primaryExecutionContext = std::make_shared<Impl>(args&: c, args&: deviceId, args&: q);
1011 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
1012 }
1013 }
1014 else
1015 {
1016 CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1017 }
1018 }
1019 else
1020 {
1021 CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1022 }
1023 }
1024 catch (const std::exception& e)
1025 {
1026 CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1027 }
1028 catch (...)
1029 {
1030 CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1031 }
1032 initialized = true;
1033 }
1034 }
1035 return g_primaryExecutionContext;
1036 }
1037};
1038
1039Context& OpenCLExecutionContext::getContext() const
1040{
1041 CV_Assert(p);
1042 return p->context_;
1043}
1044Device& OpenCLExecutionContext::getDevice() const
1045{
1046 CV_Assert(p);
1047 return p->context_.device(idx: p->device_);
1048}
1049Queue& OpenCLExecutionContext::getQueue() const
1050{
1051 CV_Assert(p);
1052 return p->queue_;
1053}
1054
1055bool OpenCLExecutionContext::useOpenCL() const
1056{
1057 if (p)
1058 return p->useOpenCL();
1059 return false;
1060}
1061void OpenCLExecutionContext::setUseOpenCL(bool flag)
1062{
1063 CV_Assert(p);
1064 p->setUseOpenCL(flag);
1065}
1066
1067/* static */
1068OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1069{
1070 CV_TRACE_FUNCTION();
1071 CoreTLSData& data = getCoreTlsData();
1072 OpenCLExecutionContext& c = data.oclExecutionContext;
1073 if (!data.oclExecutionContextInitialized)
1074 {
1075 data.oclExecutionContextInitialized = true;
1076 if (c.empty() && haveOpenCL())
1077 c.p = Impl::getInitializedExecutionContext();
1078 }
1079 return c;
1080}
1081
1082/* static */
1083OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1084{
1085 CV_TRACE_FUNCTION();
1086 CoreTLSData& data = getCoreTlsData();
1087 OpenCLExecutionContext& c = data.oclExecutionContext;
1088 return c;
1089}
1090
1091void OpenCLExecutionContext::bind() const
1092{
1093 CV_TRACE_FUNCTION();
1094 CV_Assert(p);
1095 CoreTLSData& data = getCoreTlsData();
1096 data.oclExecutionContext = *this;
1097 data.oclExecutionContextInitialized = true;
1098 data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL()
1099}
1100
1101
1102OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1103{
1104 CV_TRACE_FUNCTION();
1105 CV_Assert(p);
1106 const Queue q(getContext(), getDevice());
1107 return cloneWithNewQueue(q);
1108}
1109
1110OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1111{
1112 CV_TRACE_FUNCTION();
1113 CV_Assert(p);
1114 CV_Assert(q.ptr() != NULL);
1115 OpenCLExecutionContext c;
1116 c.p = std::make_shared<Impl>(args&: p->context_, args&: p->device_, args: q);
1117 return c;
1118}
1119
1120/* static */
1121OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1122{
1123 CV_TRACE_FUNCTION();
1124 if (!haveOpenCL())
1125 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1126
1127 CV_Assert(!context.empty());
1128 CV_Assert(context.ptr());
1129 CV_Assert(!device.empty());
1130 CV_Assert(device.ptr());
1131 OpenCLExecutionContext ctx;
1132 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(args: context, args: device, args: queue);
1133 return ctx;
1134
1135}
1136
1137/* static */
1138OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1139{
1140 CV_TRACE_FUNCTION();
1141 if (!haveOpenCL())
1142 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1143
1144 CV_Assert(!context.empty());
1145 CV_Assert(context.ptr());
1146 CV_Assert(!device.empty());
1147 CV_Assert(device.ptr());
1148 OpenCLExecutionContext ctx;
1149 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(args: context, args: device);
1150 return ctx;
1151
1152}
1153
1154void OpenCLExecutionContext::release()
1155{
1156 CV_TRACE_FUNCTION();
1157 p.reset();
1158}
1159
1160
1161
1162// true if we have initialized OpenCL subsystem with available platforms
1163static bool g_isOpenCLInitialized = false;
1164static bool g_isOpenCLAvailable = false;
1165
1166bool haveOpenCL()
1167{
1168 CV_TRACE_FUNCTION();
1169
1170 if (!g_isOpenCLInitialized)
1171 {
1172 CV_TRACE_REGION("Init_OpenCL_Runtime");
1173 std::string envPath = utils::getConfigurationParameterString(name: "OPENCV_OPENCL_RUNTIME");
1174 if (!envPath.empty())
1175 {
1176 if (envPath == "disabled")
1177 {
1178 g_isOpenCLAvailable = false;
1179 g_isOpenCLInitialized = true;
1180 return false;
1181 }
1182 }
1183
1184 cv::AutoLock lock(getInitializationMutex());
1185 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1186 try
1187 {
1188 cl_uint n = 0;
1189 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1190 g_isOpenCLAvailable &= n > 0;
1191 CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1192 }
1193 catch (...)
1194 {
1195 g_isOpenCLAvailable = false;
1196 }
1197 g_isOpenCLInitialized = true;
1198 }
1199 return g_isOpenCLAvailable;
1200}
1201
1202bool useOpenCL()
1203{
1204 CoreTLSData& data = getCoreTlsData();
1205 if (data.useOpenCL < 0)
1206 {
1207 try
1208 {
1209 data.useOpenCL = 0;
1210 if (haveOpenCL())
1211 {
1212 auto c = OpenCLExecutionContext::getCurrent();
1213 data.useOpenCL = c.useOpenCL();
1214 }
1215 }
1216 catch (...)
1217 {
1218 CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1219 }
1220 }
1221 return data.useOpenCL > 0;
1222}
1223
1224bool isOpenCLActivated()
1225{
1226 if (!g_isOpenCLAvailable)
1227 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1228 return useOpenCL();
1229}
1230
1231void setUseOpenCL(bool flag)
1232{
1233 CV_TRACE_FUNCTION();
1234
1235 CoreTLSData& data = getCoreTlsData();
1236 auto& c = OpenCLExecutionContext::getCurrentRef();
1237 if (!c.empty())
1238 {
1239 c.setUseOpenCL(flag);
1240 data.useOpenCL = c.useOpenCL();
1241 }
1242 else
1243 {
1244 if (!flag)
1245 data.useOpenCL = 0;
1246 else
1247 data.useOpenCL = -1; // enabled by default (if context is not initialized)
1248 }
1249}
1250
1251
1252
1253#ifdef HAVE_CLAMDBLAS
1254
1255class AmdBlasHelper
1256{
1257public:
1258 static AmdBlasHelper & getInstance()
1259 {
1260 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1261 }
1262
1263 bool isAvailable() const
1264 {
1265 return g_isAmdBlasAvailable;
1266 }
1267
1268 ~AmdBlasHelper()
1269 {
1270 // Do not tear down clBLAS.
1271 // The user application may still use clBLAS even after OpenCV is unloaded.
1272 /*try
1273 {
1274 clblasTeardown();
1275 }
1276 catch (...) { }*/
1277 }
1278
1279protected:
1280 AmdBlasHelper()
1281 {
1282 if (!g_isAmdBlasInitialized)
1283 {
1284 AutoLock lock(getInitializationMutex());
1285
1286 if (!g_isAmdBlasInitialized)
1287 {
1288 if (haveOpenCL())
1289 {
1290 try
1291 {
1292 g_isAmdBlasAvailable = clblasSetup() == clblasSuccess;
1293 }
1294 catch (...)
1295 {
1296 g_isAmdBlasAvailable = false;
1297 }
1298 }
1299 else
1300 g_isAmdBlasAvailable = false;
1301
1302 g_isAmdBlasInitialized = true;
1303 }
1304 }
1305 }
1306
1307private:
1308 static bool g_isAmdBlasInitialized;
1309 static bool g_isAmdBlasAvailable;
1310};
1311
1312bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1313bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1314
1315bool haveAmdBlas()
1316{
1317 return AmdBlasHelper::getInstance().isAvailable();
1318}
1319
1320#else
1321
1322bool haveAmdBlas()
1323{
1324 return false;
1325}
1326
1327#endif
1328
1329#ifdef HAVE_CLAMDFFT
1330
1331class AmdFftHelper
1332{
1333public:
1334 static AmdFftHelper & getInstance()
1335 {
1336 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1337 }
1338
1339 bool isAvailable() const
1340 {
1341 return g_isAmdFftAvailable;
1342 }
1343
1344 ~AmdFftHelper()
1345 {
1346 // Do not tear down clFFT.
1347 // The user application may still use clFFT even after OpenCV is unloaded.
1348 /*try
1349 {
1350 clfftTeardown();
1351 }
1352 catch (...) { }*/
1353 }
1354
1355protected:
1356 AmdFftHelper()
1357 {
1358 if (!g_isAmdFftInitialized)
1359 {
1360 AutoLock lock(getInitializationMutex());
1361
1362 if (!g_isAmdFftInitialized)
1363 {
1364 if (haveOpenCL())
1365 {
1366 try
1367 {
1368 cl_uint major, minor, patch;
1369 CV_Assert(clfftInitSetupData(&setupData) == CLFFT_SUCCESS);
1370
1371 // it throws exception in case AmdFft binaries are not found
1372 CV_Assert(clfftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1373 g_isAmdFftAvailable = true;
1374 }
1375 catch (const Exception &)
1376 {
1377 g_isAmdFftAvailable = false;
1378 }
1379 }
1380 else
1381 g_isAmdFftAvailable = false;
1382
1383 g_isAmdFftInitialized = true;
1384 }
1385 }
1386 }
1387
1388private:
1389 static clfftSetupData setupData;
1390 static bool g_isAmdFftInitialized;
1391 static bool g_isAmdFftAvailable;
1392};
1393
1394clfftSetupData AmdFftHelper::setupData;
1395bool AmdFftHelper::g_isAmdFftAvailable = false;
1396bool AmdFftHelper::g_isAmdFftInitialized = false;
1397
1398bool haveAmdFft()
1399{
1400 return AmdFftHelper::getInstance().isAvailable();
1401}
1402
1403#else
1404
1405bool haveAmdFft()
1406{
1407 return false;
1408}
1409
1410#endif
1411
1412bool haveSVM()
1413{
1414#ifdef HAVE_OPENCL_SVM
1415 return true;
1416#else
1417 return false;
1418#endif
1419}
1420
1421void finish()
1422{
1423 Queue::getDefault().finish();
1424}
1425
1426/////////////////////////////////////////// Platform /////////////////////////////////////////////
1427
1428struct Platform::Impl
1429{
1430 Impl()
1431 {
1432 refcount = 1;
1433 handle = 0;
1434 initialized = false;
1435 }
1436
1437 ~Impl() {}
1438
1439 void init()
1440 {
1441 if( !initialized )
1442 {
1443 //cl_uint num_entries
1444 cl_uint n = 0;
1445 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1446 handle = 0;
1447 if( handle != 0 )
1448 {
1449 char buf[1000];
1450 size_t len = 0;
1451 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1452 buf[len] = '\0';
1453 vendor = String(buf);
1454 }
1455
1456 initialized = true;
1457 }
1458 }
1459
1460 IMPLEMENT_REFCOUNTABLE();
1461
1462 cl_platform_id handle;
1463 String vendor;
1464 bool initialized;
1465};
1466
1467Platform::Platform() CV_NOEXCEPT
1468{
1469 p = 0;
1470}
1471
1472Platform::~Platform()
1473{
1474 if(p)
1475 p->release();
1476}
1477
1478Platform::Platform(const Platform& pl)
1479{
1480 p = (Impl*)pl.p;
1481 if(p)
1482 p->addref();
1483}
1484
1485Platform& Platform::operator = (const Platform& pl)
1486{
1487 Impl* newp = (Impl*)pl.p;
1488 if(newp)
1489 newp->addref();
1490 if(p)
1491 p->release();
1492 p = newp;
1493 return *this;
1494}
1495
1496Platform::Platform(Platform&& pl) CV_NOEXCEPT
1497{
1498 p = pl.p;
1499 pl.p = nullptr;
1500}
1501
1502Platform& Platform::operator = (Platform&& pl) CV_NOEXCEPT
1503{
1504 if (this != &pl) {
1505 if(p)
1506 p->release();
1507 p = pl.p;
1508 pl.p = nullptr;
1509 }
1510 return *this;
1511}
1512
1513void* Platform::ptr() const
1514{
1515 return p ? p->handle : 0;
1516}
1517
1518Platform& Platform::getDefault()
1519{
1520 CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1521 static Platform p;
1522 if( !p.p )
1523 {
1524 p.p = new Impl;
1525 p.p->init();
1526 }
1527 return p;
1528}
1529
1530/////////////////////////////////////// Device ////////////////////////////////////////////
1531
1532// Version has format:
1533// OpenCL<space><major_version.minor_version><space><vendor-specific information>
1534// by specification
1535// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1536// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1537// https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
1538// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
1539static void parseOpenCLVersion(const String &version, int &major, int &minor)
1540{
1541 major = minor = 0;
1542 if (10 >= version.length())
1543 return;
1544 const char *pstr = version.c_str();
1545 if (0 != strncmp(s1: pstr, s2: "OpenCL ", n: 7))
1546 return;
1547 size_t ppos = version.find(c: '.', pos: 7);
1548 if (String::npos == ppos)
1549 return;
1550 String temp = version.substr(pos: 7, n: ppos - 7);
1551 major = atoi(nptr: temp.c_str());
1552 temp = version.substr(pos: ppos + 1);
1553 minor = atoi(nptr: temp.c_str());
1554}
1555
1556struct Device::Impl
1557{
1558 Impl(void* d)
1559 : refcount(1)
1560 , handle(0)
1561 {
1562 try
1563 {
1564 cl_device_id device = (cl_device_id)d;
1565 _init(d: device);
1566 CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only
1567 }
1568 catch (...)
1569 {
1570 throw;
1571 }
1572 }
1573
1574 void _init(cl_device_id d)
1575 {
1576 handle = (cl_device_id)d;
1577
1578 name_ = getStrProp(CL_DEVICE_NAME);
1579 version_ = getStrProp(CL_DEVICE_VERSION);
1580 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1581 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1582 halfFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG);
1583 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1584 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1585 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1586 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1587 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1588 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1589
1590 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1591 parseOpenCLVersion(version: deviceVersion_, major&: deviceVersionMajor_, minor&: deviceVersionMinor_);
1592
1593 size_t pos = 0;
1594 while (pos < extensions_.size())
1595 {
1596 size_t pos2 = extensions_.find(c: ' ', pos: pos);
1597 if (pos2 == String::npos)
1598 pos2 = extensions_.size();
1599 if (pos2 > pos)
1600 {
1601 std::string extensionName = extensions_.substr(pos: pos, n: pos2 - pos);
1602 extensions_set_.insert(x: extensionName);
1603 }
1604 pos = pos2 + 1;
1605 }
1606
1607 khr_fp64_support_ = isExtensionSupported(extensionName: "cl_khr_fp64");
1608 khr_fp16_support_ = isExtensionSupported(extensionName: "cl_khr_fp16");
1609
1610 intelSubgroupsSupport_ = isExtensionSupported(extensionName: "cl_intel_subgroups");
1611
1612 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1613 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1614 vendorName_ == "AMD")
1615 vendorID_ = VENDOR_AMD;
1616 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || vendorName_ == "Intel Inc." || strstr(haystack: name_.c_str(), needle: "Iris") != 0)
1617 vendorID_ = VENDOR_INTEL;
1618 else if (vendorName_ == "NVIDIA Corporation")
1619 vendorID_ = VENDOR_NVIDIA;
1620 else
1621 vendorID_ = UNKNOWN_VENDOR;
1622
1623 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT(name: "OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", defaultValue: 0);
1624 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1625 {
1626 const size_t new_maxWorkGroupSize = std::min(a: maxWorkGroupSize_, b: CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1627 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1628 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1629 maxWorkGroupSize_ = new_maxWorkGroupSize;
1630 }
1631#if 0
1632 if (isExtensionSupported("cl_khr_spir"))
1633 {
1634#ifndef CL_DEVICE_SPIR_VERSIONS
1635#define CL_DEVICE_SPIR_VERSIONS 0x40E0
1636#endif
1637 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1638 std::cout << spir_versions << std::endl;
1639 }
1640#endif
1641 }
1642
1643 ~Impl()
1644 {
1645#ifdef _WIN32
1646 if (!cv::__termination)
1647#endif
1648 {
1649 if (handle)
1650 {
1651 CV_OCL_CHECK(clReleaseDevice(handle));
1652 handle = 0;
1653 }
1654 }
1655 }
1656
1657 template<typename _TpCL, typename _TpOut>
1658 _TpOut getProp(cl_device_info prop) const
1659 {
1660 _TpCL temp=_TpCL();
1661 size_t sz = 0;
1662
1663 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1664 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1665 }
1666
1667 bool getBoolProp(cl_device_info prop) const
1668 {
1669 cl_bool temp = CL_FALSE;
1670 size_t sz = 0;
1671
1672 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1673 sz == sizeof(temp) ? temp != 0 : false;
1674 }
1675
1676 String getStrProp(cl_device_info prop) const
1677 {
1678 char buf[4096];
1679 size_t sz=0;
1680 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1681 sz < sizeof(buf) ? String(buf) : String();
1682 }
1683
1684 bool isExtensionSupported(const std::string& extensionName) const
1685 {
1686 return extensions_set_.count(x: extensionName) > 0;
1687 }
1688
1689
1690 IMPLEMENT_REFCOUNTABLE();
1691
1692 cl_device_id handle;
1693
1694 String name_;
1695 String version_;
1696 std::string extensions_;
1697 int doubleFPConfig_;
1698 bool khr_fp64_support_;
1699 int halfFPConfig_;
1700 bool khr_fp16_support_;
1701 bool hostUnifiedMemory_;
1702 int maxComputeUnits_;
1703 size_t maxWorkGroupSize_;
1704 int type_;
1705 int addressBits_;
1706 int deviceVersionMajor_;
1707 int deviceVersionMinor_;
1708 String driverVersion_;
1709 String vendorName_;
1710 int vendorID_;
1711 bool intelSubgroupsSupport_;
1712
1713 std::set<std::string> extensions_set_;
1714};
1715
1716
1717Device::Device() CV_NOEXCEPT
1718{
1719 p = 0;
1720}
1721
1722Device::Device(void* d)
1723{
1724 p = 0;
1725 set(d);
1726}
1727
1728Device::Device(const Device& d)
1729{
1730 p = d.p;
1731 if(p)
1732 p->addref();
1733}
1734
1735Device& Device::operator = (const Device& d)
1736{
1737 Impl* newp = (Impl*)d.p;
1738 if(newp)
1739 newp->addref();
1740 if(p)
1741 p->release();
1742 p = newp;
1743 return *this;
1744}
1745
1746Device::Device(Device&& d) CV_NOEXCEPT
1747{
1748 p = d.p;
1749 d.p = nullptr;
1750}
1751
1752Device& Device::operator = (Device&& d) CV_NOEXCEPT
1753{
1754 if (this != &d) {
1755 if(p)
1756 p->release();
1757 p = d.p;
1758 d.p = nullptr;
1759 }
1760 return *this;
1761}
1762
1763Device::~Device()
1764{
1765 if(p)
1766 p->release();
1767}
1768
1769void Device::set(void* d)
1770{
1771 if(p)
1772 p->release();
1773 p = new Impl(d);
1774 if (p->handle)
1775 {
1776 CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1777 }
1778}
1779
1780Device Device::fromHandle(void* d)
1781{
1782 Device device(d);
1783 return device;
1784}
1785
1786void* Device::ptr() const
1787{
1788 return p ? p->handle : 0;
1789}
1790
1791String Device::name() const
1792{ return p ? p->name_ : String(); }
1793
1794String Device::extensions() const
1795{ return p ? String(p->extensions_) : String(); }
1796
1797bool Device::isExtensionSupported(const String& extensionName) const
1798{ return p ? p->isExtensionSupported(extensionName) : false; }
1799
1800String Device::version() const
1801{ return p ? p->version_ : String(); }
1802
1803String Device::vendorName() const
1804{ return p ? p->vendorName_ : String(); }
1805
1806int Device::vendorID() const
1807{ return p ? p->vendorID_ : 0; }
1808
1809String Device::OpenCL_C_Version() const
1810{ return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1811
1812String Device::OpenCLVersion() const
1813{ return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1814
1815int Device::deviceVersionMajor() const
1816{ return p ? p->deviceVersionMajor_ : 0; }
1817
1818int Device::deviceVersionMinor() const
1819{ return p ? p->deviceVersionMinor_ : 0; }
1820
1821String Device::driverVersion() const
1822{ return p ? p->driverVersion_ : String(); }
1823
1824int Device::type() const
1825{ return p ? p->type_ : 0; }
1826
1827int Device::addressBits() const
1828{ return p ? p->addressBits_ : 0; }
1829
1830bool Device::available() const
1831{ return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1832
1833bool Device::compilerAvailable() const
1834{ return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1835
1836bool Device::linkerAvailable() const
1837#ifdef CL_VERSION_1_2
1838{ return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1839#else
1840{ CV_REQUIRE_OPENCL_1_2_ERROR; }
1841#endif
1842
1843int Device::doubleFPConfig() const
1844{ return p ? p->doubleFPConfig_ : 0; }
1845
1846int Device::singleFPConfig() const
1847{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1848
1849int Device::halfFPConfig() const
1850{ return p ? p->halfFPConfig_ : 0; }
1851
1852bool Device::hasFP64() const
1853{ return p ? p->khr_fp64_support_ : false; }
1854bool Device::hasFP16() const
1855{ return p ? p->khr_fp16_support_ : false; }
1856
1857bool Device::endianLittle() const
1858{ return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1859
1860bool Device::errorCorrectionSupport() const
1861{ return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1862
1863int Device::executionCapabilities() const
1864{ return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1865
1866size_t Device::globalMemCacheSize() const
1867{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1868
1869int Device::globalMemCacheType() const
1870{ return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1871
1872int Device::globalMemCacheLineSize() const
1873{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1874
1875size_t Device::globalMemSize() const
1876{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1877
1878size_t Device::localMemSize() const
1879{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1880
1881int Device::localMemType() const
1882{ return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1883
1884bool Device::hostUnifiedMemory() const
1885{ return p ? p->hostUnifiedMemory_ : false; }
1886
1887bool Device::imageSupport() const
1888{ return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1889
1890bool Device::imageFromBufferSupport() const
1891{
1892 return p ? p->isExtensionSupported(extensionName: "cl_khr_image2d_from_buffer") : false;
1893}
1894
1895uint Device::imagePitchAlignment() const
1896{
1897#ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1898 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1899#else
1900 return 0;
1901#endif
1902}
1903
1904uint Device::imageBaseAddressAlignment() const
1905{
1906#ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1907 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1908#else
1909 return 0;
1910#endif
1911}
1912
1913size_t Device::image2DMaxWidth() const
1914{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1915
1916size_t Device::image2DMaxHeight() const
1917{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1918
1919size_t Device::image3DMaxWidth() const
1920{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1921
1922size_t Device::image3DMaxHeight() const
1923{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1924
1925size_t Device::image3DMaxDepth() const
1926{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1927
1928size_t Device::imageMaxBufferSize() const
1929#ifdef CL_VERSION_1_2
1930{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1931#else
1932{ CV_REQUIRE_OPENCL_1_2_ERROR; }
1933#endif
1934
1935size_t Device::imageMaxArraySize() const
1936#ifdef CL_VERSION_1_2
1937{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1938#else
1939{ CV_REQUIRE_OPENCL_1_2_ERROR; }
1940#endif
1941
1942bool Device::intelSubgroupsSupport() const
1943{ return p ? p->intelSubgroupsSupport_ : false; }
1944
1945int Device::maxClockFrequency() const
1946{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1947
1948int Device::maxComputeUnits() const
1949{ return p ? p->maxComputeUnits_ : 0; }
1950
1951int Device::maxConstantArgs() const
1952{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1953
1954size_t Device::maxConstantBufferSize() const
1955{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1956
1957size_t Device::maxMemAllocSize() const
1958{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1959
1960size_t Device::maxParameterSize() const
1961{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1962
1963int Device::maxReadImageArgs() const
1964{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1965
1966int Device::maxWriteImageArgs() const
1967{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1968
1969int Device::maxSamplers() const
1970{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1971
1972size_t Device::maxWorkGroupSize() const
1973{ return p ? p->maxWorkGroupSize_ : 0; }
1974
1975int Device::maxWorkItemDims() const
1976{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1977
1978void Device::maxWorkItemSizes(size_t* sizes) const
1979{
1980 if(p)
1981 {
1982 const int MAX_DIMS = 32;
1983 size_t retsz = 0;
1984 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1985 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1986 }
1987}
1988
1989int Device::memBaseAddrAlign() const
1990{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1991
1992int Device::nativeVectorWidthChar() const
1993{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1994
1995int Device::nativeVectorWidthShort() const
1996{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1997
1998int Device::nativeVectorWidthInt() const
1999{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
2000
2001int Device::nativeVectorWidthLong() const
2002{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
2003
2004int Device::nativeVectorWidthFloat() const
2005{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
2006
2007int Device::nativeVectorWidthDouble() const
2008{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
2009
2010int Device::nativeVectorWidthHalf() const
2011{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
2012
2013int Device::preferredVectorWidthChar() const
2014{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
2015
2016int Device::preferredVectorWidthShort() const
2017{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
2018
2019int Device::preferredVectorWidthInt() const
2020{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
2021
2022int Device::preferredVectorWidthLong() const
2023{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2024
2025int Device::preferredVectorWidthFloat() const
2026{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2027
2028int Device::preferredVectorWidthDouble() const
2029{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2030
2031int Device::preferredVectorWidthHalf() const
2032{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2033
2034size_t Device::printfBufferSize() const
2035#ifdef CL_VERSION_1_2
2036{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2037#else
2038{ CV_REQUIRE_OPENCL_1_2_ERROR; }
2039#endif
2040
2041
2042size_t Device::profilingTimerResolution() const
2043{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2044
2045const Device& Device::getDefault()
2046{
2047 auto& c = OpenCLExecutionContext::getCurrent();
2048 if (!c.empty())
2049 {
2050 return c.getDevice();
2051 }
2052
2053 static Device dummy;
2054 return dummy;
2055}
2056
2057////////////////////////////////////// Context ///////////////////////////////////////////////////
2058
2059template <typename Functor, typename ObjectType>
2060inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2061{
2062 ::size_t required;
2063 cl_int err = f(obj, name, 0, NULL, &required);
2064 if (err != CL_SUCCESS)
2065 return err;
2066
2067 param.clear();
2068 if (required > 0)
2069 {
2070 AutoBuffer<char> buf(required + 1);
2071 char* ptr = buf.data(); // cleanup is not needed
2072 err = f(obj, name, required, ptr, NULL);
2073 if (err != CL_SUCCESS)
2074 return err;
2075 param = ptr;
2076 }
2077
2078 return CL_SUCCESS;
2079}
2080
2081static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2082{
2083 elems.clear();
2084 if (s.size() == 0)
2085 return;
2086 std::istringstream ss(s);
2087 std::string item;
2088 while (!ss.eof())
2089 {
2090 std::getline(in&: ss, str&: item, delim: delim);
2091 elems.push_back(x: item);
2092 }
2093}
2094
2095// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2096// Sample: AMD:GPU:
2097// Sample: AMD:GPU:Tahiti
2098// Sample: :GPU|CPU: = '' = ':' = '::'
2099static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2100 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2101{
2102 std::vector<std::string> parts;
2103 split(s: configurationStr, delim: ':', elems&: parts);
2104 if (parts.size() > 3)
2105 {
2106 CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2107 return false;
2108 }
2109 if (parts.size() > 2)
2110 deviceNameOrID = parts[2];
2111 if (parts.size() > 1)
2112 {
2113 split(s: parts[1], delim: '|', elems&: deviceTypes);
2114 }
2115 if (parts.size() > 0)
2116 {
2117 platform = parts[0];
2118 }
2119 return true;
2120}
2121
2122static cl_device_id selectOpenCLDevice(const std::string & configuration_ = std::string())
2123{
2124 std::string platform, deviceName;
2125 std::vector<std::string> deviceTypes;
2126
2127 std::string configuration(configuration_);
2128 if (configuration.empty())
2129 configuration = utils::getConfigurationParameterString(name: "OPENCV_OPENCL_DEVICE");
2130
2131 if (!configuration.empty() &&
2132 (configuration == "disabled" ||
2133 !parseOpenCLDeviceConfiguration(configurationStr: configuration, platform, deviceTypes, deviceNameOrID&: deviceName)
2134 ))
2135 return NULL;
2136
2137 bool isID = false;
2138 int deviceID = -1;
2139 if (deviceName.length() == 1)
2140 // We limit ID range to 0..9, because we want to write:
2141 // - '2500' to mean i5-2500
2142 // - '8350' to mean AMD FX-8350
2143 // - '650' to mean GeForce 650
2144 // To extend ID range change condition to '> 0'
2145 {
2146 isID = true;
2147 for (size_t i = 0; i < deviceName.length(); i++)
2148 {
2149 if (!isdigit(deviceName[i]))
2150 {
2151 isID = false;
2152 break;
2153 }
2154 }
2155 if (isID)
2156 {
2157 deviceID = atoi(nptr: deviceName.c_str());
2158 if (deviceID < 0)
2159 return NULL;
2160 }
2161 }
2162
2163 std::vector<cl_platform_id> platforms;
2164 {
2165 cl_uint numPlatforms = 0;
2166 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2167
2168 if (numPlatforms == 0)
2169 return NULL;
2170 platforms.resize(new_size: (size_t)numPlatforms);
2171 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2172 platforms.resize(new_size: numPlatforms);
2173 }
2174
2175 if (platform.length() > 0)
2176 {
2177 for (std::vector<cl_platform_id>::iterator currentPlatform = platforms.begin(); currentPlatform != platforms.end();)
2178 {
2179 std::string name;
2180 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, *currentPlatform, CL_PLATFORM_NAME, name));
2181 if (name.find(str: platform) != std::string::npos)
2182 {
2183 ++currentPlatform;
2184 }
2185 else
2186 {
2187 currentPlatform = platforms.erase(position: currentPlatform);
2188 }
2189 }
2190 if (platforms.size() == 0)
2191 {
2192 CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2193 goto not_found;
2194 }
2195 }
2196 if (deviceTypes.size() == 0)
2197 {
2198 if (!isID)
2199 {
2200 deviceTypes.push_back(x: "GPU");
2201 if (!configuration.empty())
2202 deviceTypes.push_back(x: "CPU");
2203 }
2204 else
2205 deviceTypes.push_back(x: "ALL");
2206 }
2207 for (size_t t = 0; t < deviceTypes.size(); t++)
2208 {
2209 int deviceType = 0;
2210 std::string tempStrDeviceType = deviceTypes[t];
2211 std::transform(first: tempStrDeviceType.begin(), last: tempStrDeviceType.end(), result: tempStrDeviceType.begin(), unary_op: details::char_tolower);
2212
2213 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2214 deviceType = Device::TYPE_GPU;
2215 else if (tempStrDeviceType == "cpu")
2216 deviceType = Device::TYPE_CPU;
2217 else if (tempStrDeviceType == "accelerator")
2218 deviceType = Device::TYPE_ACCELERATOR;
2219 else if (tempStrDeviceType == "all")
2220 deviceType = Device::TYPE_ALL;
2221 else
2222 {
2223 CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2224 goto not_found;
2225 }
2226
2227 std::vector<cl_device_id> devices;
2228 for (std::vector<cl_platform_id>::iterator currentPlatform = platforms.begin(); currentPlatform != platforms.end(); ++currentPlatform)
2229 {
2230 cl_uint count = 0;
2231 cl_int status = clGetDeviceIDs(*currentPlatform, deviceType, 0, NULL, &count);
2232 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2233 {
2234 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2235 }
2236 if (count == 0)
2237 continue;
2238 size_t base = devices.size();
2239 devices.resize(new_size: base + count);
2240 status = clGetDeviceIDs(*currentPlatform, deviceType, count, &devices[base], &count);
2241 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2242 {
2243 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2244 }
2245 }
2246
2247 for (size_t i = (isID ? deviceID : 0);
2248 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2249 i++)
2250 {
2251 std::string name;
2252 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2253 cl_bool useGPU = true;
2254 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2255 {
2256 cl_bool isIGPU = CL_FALSE;
2257 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2258 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2259 }
2260 if ( (isID || name.find(str: deviceName) != std::string::npos) && useGPU)
2261 {
2262 // TODO check for OpenCL 1.1
2263 return devices[i];
2264 }
2265 }
2266 }
2267
2268not_found:
2269 if (configuration.empty())
2270 return NULL; // suppress messages on stderr
2271
2272 std::ostringstream msg;
2273 msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2274 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2275 << " Device types:";
2276 for (size_t t = 0; t < deviceTypes.size(); t++)
2277 msg << ' ' << deviceTypes[t];
2278
2279 msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2280
2281 CV_LOG_ERROR(NULL, msg.str());
2282 return NULL;
2283}
2284
2285#ifdef HAVE_OPENCL_SVM
2286namespace svm {
2287
2288enum AllocatorFlags { // don't use first 16 bits
2289 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2290 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2291 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2292 OPENCL_SVM_BUFFER_MASK = 3 << 16,
2293 OPENCL_SVM_BUFFER_MAP = 4 << 16
2294};
2295
2296static bool checkForceSVMUmatUsage()
2297{
2298 static bool initialized = false;
2299 static bool force = false;
2300 if (!initialized)
2301 {
2302 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2303 initialized = true;
2304 }
2305 return force;
2306}
2307static bool checkDisableSVMUMatUsage()
2308{
2309 static bool initialized = false;
2310 static bool force = false;
2311 if (!initialized)
2312 {
2313 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2314 initialized = true;
2315 }
2316 return force;
2317}
2318static bool checkDisableSVM()
2319{
2320 static bool initialized = false;
2321 static bool force = false;
2322 if (!initialized)
2323 {
2324 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2325 initialized = true;
2326 }
2327 return force;
2328}
2329// see SVMCapabilities
2330static unsigned int getSVMCapabilitiesMask()
2331{
2332 static bool initialized = false;
2333 static unsigned int mask = 0;
2334 if (!initialized)
2335 {
2336 const std::string envValue = utils::getConfigurationParameterString("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2337 if (envValue.empty())
2338 {
2339 return ~0U; // all bits 1
2340 }
2341 mask = atoi(envValue.c_str());
2342 initialized = true;
2343 }
2344 return mask;
2345}
2346} // namespace
2347#endif
2348
2349static size_t getProgramCountLimit()
2350{
2351 static bool initialized = false;
2352 static size_t count = 0;
2353 if (!initialized)
2354 {
2355 count = utils::getConfigurationParameterSizeT(name: "OPENCV_OPENCL_PROGRAM_CACHE", defaultValue: 0);
2356 initialized = true;
2357 }
2358 return count;
2359}
2360
2361static int g_contextId = 0;
2362
2363class OpenCLBufferPoolImpl;
2364class OpenCLSVMBufferPoolImpl;
2365
2366struct Context::Impl
2367{
2368 static Context::Impl* get(Context& context) { return context.p; }
2369
2370 typedef std::deque<Context::Impl*> container_t;
2371 static container_t& getGlobalContainer()
2372 {
2373 // never delete this container (Impl lifetime is greater due to TLS storage)
2374 static container_t* g_contexts = new container_t();
2375 return *g_contexts;
2376 }
2377
2378protected:
2379 Impl(const std::string& configuration_)
2380 : refcount(1)
2381 , contextId(CV_XADD(&g_contextId, 1))
2382 , configuration(configuration_)
2383 , handle(0)
2384#ifdef HAVE_OPENCL_SVM
2385 , svmInitialized(false)
2386#endif
2387 {
2388 if (!haveOpenCL())
2389 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2390
2391 cv::AutoLock lock(cv::getInitializationMutex());
2392 auto& container = getGlobalContainer();
2393 container.resize(new_size: std::max(a: container.size(), b: (size_t)contextId + 1));
2394 container[contextId] = this;
2395 }
2396
2397 ~Impl()
2398 {
2399#ifdef _WIN32
2400 if (!cv::__termination)
2401#endif
2402 {
2403 if (handle)
2404 {
2405 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2406 handle = NULL;
2407 }
2408 devices.clear();
2409 }
2410
2411 userContextStorage.clear();
2412
2413 {
2414 cv::AutoLock lock(cv::getInitializationMutex());
2415 auto& container = getGlobalContainer();
2416 CV_CheckLT((size_t)contextId, container.size(), "");
2417 container[contextId] = NULL;
2418 }
2419 }
2420
2421 void init_device_list()
2422 {
2423 CV_Assert(handle);
2424
2425 cl_uint ndevices = 0;
2426 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2427 CV_Assert(ndevices > 0);
2428
2429 cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2430 size_t devices_ret_size = 0;
2431 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2432 CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2433
2434 devices.clear();
2435 for (unsigned i = 0; i < ndevices; i++)
2436 {
2437 devices.emplace_back(args: Device::fromHandle(d: cl_devices[i]));
2438 }
2439 }
2440
2441 void __init_buffer_pools(); // w/o synchronization
2442 void _init_buffer_pools() const
2443 {
2444 if (!bufferPool_)
2445 {
2446 cv::AutoLock lock(cv::getInitializationMutex());
2447 if (!bufferPool_)
2448 {
2449 const_cast<Impl*>(this)->__init_buffer_pools();
2450 }
2451 }
2452 }
2453public:
2454 static Impl* findContext(const std::string& configuration)
2455 {
2456 CV_TRACE_FUNCTION();
2457 cv::AutoLock lock(cv::getInitializationMutex());
2458 auto& container = getGlobalContainer();
2459 if (configuration.empty() && !container.empty())
2460 return container[0];
2461 for (auto it = container.begin(); it != container.end(); ++it)
2462 {
2463 Impl* i = *it;
2464 if (i && i->configuration == configuration)
2465 {
2466 return i;
2467 }
2468 }
2469 return NULL;
2470 }
2471
2472 static Impl* findOrCreateContext(const std::string& configuration_)
2473 {
2474 CV_TRACE_FUNCTION();
2475 std::string configuration = configuration_;
2476 if (configuration_.empty())
2477 {
2478 const std::string c = utils::getConfigurationParameterString(name: "OPENCV_OPENCL_DEVICE");
2479 if (!c.empty())
2480 configuration = c;
2481 }
2482 Impl* impl = findContext(configuration);
2483 if (impl)
2484 {
2485 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2486 impl->addref();
2487 return impl;
2488 }
2489
2490 cl_device_id d = selectOpenCLDevice(configuration_: configuration);
2491 if (d == NULL)
2492 return NULL;
2493
2494 impl = new Impl(configuration);
2495 try
2496 {
2497 impl->createFromDevice(d);
2498 if (impl->handle)
2499 return impl;
2500 delete impl;
2501 return NULL;
2502 }
2503 catch (...)
2504 {
2505 delete impl;
2506 throw;
2507 }
2508 }
2509
2510 static Impl* findOrCreateContext(cl_context h)
2511 {
2512 CV_TRACE_FUNCTION();
2513
2514 CV_Assert(h);
2515
2516 std::string configuration = cv::format(fmt: "@ctx-%p", (void*)h);
2517 Impl* impl = findContext(configuration);
2518 if (impl)
2519 {
2520 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2521 impl->addref();
2522 return impl;
2523 }
2524
2525 impl = new Impl(configuration);
2526 try
2527 {
2528 CV_OCL_CHECK(clRetainContext(h));
2529 impl->handle = h;
2530 impl->init_device_list();
2531 return impl;
2532 }
2533 catch (...)
2534 {
2535 delete impl;
2536 throw;
2537 }
2538 }
2539
2540 static Impl* findOrCreateContext(const ocl::Device& device)
2541 {
2542 CV_TRACE_FUNCTION();
2543
2544 CV_Assert(!device.empty());
2545 cl_device_id d = (cl_device_id)device.ptr();
2546 CV_Assert(d);
2547
2548 std::string configuration = cv::format(fmt: "@dev-%p", (void*)d);
2549 Impl* impl = findContext(configuration);
2550 if (impl)
2551 {
2552 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2553 impl->addref();
2554 return impl;
2555 }
2556
2557 impl = new Impl(configuration);
2558 try
2559 {
2560 impl->createFromDevice(d);
2561 CV_Assert(impl->handle);
2562 return impl;
2563 }
2564 catch (...)
2565 {
2566 delete impl;
2567 throw;
2568 }
2569 }
2570
2571 void setDefault()
2572 {
2573 CV_TRACE_FUNCTION();
2574 cl_device_id d = selectOpenCLDevice();
2575
2576 if (d == NULL)
2577 return;
2578
2579 createFromDevice(d);
2580 }
2581
2582 void createFromDevice(cl_device_id d)
2583 {
2584 CV_TRACE_FUNCTION();
2585 CV_Assert(handle == NULL);
2586
2587 cl_platform_id pl = NULL;
2588 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2589
2590 cl_context_properties prop[] =
2591 {
2592 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2593 0
2594 };
2595
2596 // !!! in the current implementation force the number of devices to 1 !!!
2597 cl_uint nd = 1;
2598 cl_int status;
2599
2600 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2601 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2602
2603 bool ok = handle != 0 && status == CL_SUCCESS;
2604 if( ok )
2605 {
2606 devices.resize(new_size: nd);
2607 devices[0].set(d);
2608 }
2609 else
2610 handle = NULL;
2611 }
2612
2613 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2614
2615 void unloadProg(Program& prog)
2616 {
2617 cv::AutoLock lock(program_cache_mutex);
2618 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2619 {
2620 phash_t::iterator it = phash.find(x: *i);
2621 if (it != phash.end())
2622 {
2623 if (it->second.ptr() == prog.ptr())
2624 {
2625 phash.erase(x: *i);
2626 cacheList.erase(position: i);
2627 return;
2628 }
2629 }
2630 }
2631 }
2632
2633 std::string& getPrefixString()
2634 {
2635 if (prefix.empty())
2636 {
2637 cv::AutoLock lock(program_cache_mutex);
2638 if (prefix.empty())
2639 {
2640 CV_Assert(!devices.empty());
2641 const Device& d = devices[0];
2642 int bits = d.addressBits();
2643 if (bits > 0 && bits != 64)
2644 prefix = cv::format(fmt: "%d-bit--", bits);
2645 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2646 // sanitize chars
2647 for (size_t i = 0; i < prefix.size(); i++)
2648 {
2649 char c = prefix[i];
2650 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2651 {
2652 prefix[i] = '_';
2653 }
2654 }
2655 }
2656 }
2657 return prefix;
2658 }
2659
2660 std::string& getPrefixBase()
2661 {
2662 if (prefix_base.empty())
2663 {
2664 cv::AutoLock lock(program_cache_mutex);
2665 if (prefix_base.empty())
2666 {
2667 const Device& d = devices[0];
2668 int bits = d.addressBits();
2669 if (bits > 0 && bits != 64)
2670 prefix_base = cv::format(fmt: "%d-bit--", bits);
2671 prefix_base += d.vendorName() + "--" + d.name() + "--";
2672 // sanitize chars
2673 for (size_t i = 0; i < prefix_base.size(); i++)
2674 {
2675 char c = prefix_base[i];
2676 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2677 {
2678 prefix_base[i] = '_';
2679 }
2680 }
2681 }
2682 }
2683 return prefix_base;
2684 }
2685
2686 IMPLEMENT_REFCOUNTABLE();
2687
2688 const int contextId; // global unique ID
2689 const std::string configuration;
2690
2691 cl_context handle;
2692 std::vector<Device> devices;
2693
2694 std::string prefix;
2695 std::string prefix_base;
2696
2697 cv::Mutex program_cache_mutex;
2698 typedef std::map<std::string, Program> phash_t;
2699 phash_t phash;
2700 typedef std::list<cv::String> CacheList;
2701 CacheList cacheList;
2702
2703 std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2704 std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
2705 OpenCLBufferPoolImpl& getBufferPool() const
2706 {
2707 _init_buffer_pools();
2708 CV_DbgAssert(bufferPool_);
2709 return *bufferPool_.get();
2710 }
2711 OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2712 {
2713 _init_buffer_pools();
2714 CV_DbgAssert(bufferPoolHostPtr_);
2715 return *bufferPoolHostPtr_.get();
2716 }
2717
2718 std::map<std::type_index, std::shared_ptr<UserContext>> userContextStorage;
2719 cv::Mutex userContextMutex;
2720 void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext) {
2721 cv::AutoLock lock(userContextMutex);
2722 userContextStorage[typeId] = userContext;
2723 }
2724 std::shared_ptr<UserContext> getUserContext(std::type_index typeId) {
2725 cv::AutoLock lock(userContextMutex);
2726 auto it = userContextStorage.find(x: typeId);
2727 if (it != userContextStorage.end())
2728 return it->second;
2729 else
2730 return nullptr;
2731 }
2732
2733#ifdef HAVE_OPENCL_SVM
2734 bool svmInitialized;
2735 bool svmAvailable;
2736 bool svmEnabled;
2737 svm::SVMCapabilities svmCapabilities;
2738 svm::SVMFunctions svmFunctions;
2739
2740 void svmInit()
2741 {
2742 CV_Assert(handle != NULL);
2743 const Device& device = devices[0];
2744 cl_device_svm_capabilities deviceCaps = 0;
2745 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2746 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2747 if (status != CL_SUCCESS)
2748 {
2749 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2750 goto noSVM;
2751 }
2752 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2753 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2754 svmCapabilities.value_ =
2755 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2756 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2757 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2758 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2759 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2760 if (svmCapabilities.value_ == 0)
2761 {
2762 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2763 goto noSVM;
2764 }
2765 try
2766 {
2767 // Try OpenCL 2.0
2768 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2769 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2770 if (!ptr)
2771 {
2772 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2773 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2774 }
2775 try
2776 {
2777 bool error = false;
2778 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2779 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2780 {
2781 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2782 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2783 }
2784 clFinish(q);
2785 try
2786 {
2787 ((int*)ptr)[0] = 100;
2788 }
2789 catch (...)
2790 {
2791 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2792 error = true;
2793 }
2794 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2795 {
2796 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2797 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2798 }
2799 clFinish(q);
2800 if (error)
2801 {
2802 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2803 }
2804 }
2805 catch (...)
2806 {
2807 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2808 clSVMFree(handle, ptr);
2809 throw;
2810 }
2811 clSVMFree(handle, ptr);
2812 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2813 svmFunctions.fn_clSVMFree = clSVMFree;
2814 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2815 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2816 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2817 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2818 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2819 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2820 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2821 }
2822 catch (...)
2823 {
2824 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2825 try
2826 {
2827 // Try HSA extension
2828 String extensions = device.extensions();
2829 if (extensions.find("cl_amd_svm") == String::npos)
2830 {
2831 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2832 goto noSVM;
2833 }
2834 cl_platform_id p = NULL;
2835 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2836 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2837 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2838 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2839 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2840 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2841 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2842 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2843 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2844 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2845 CV_Assert(svmFunctions.isValid());
2846 }
2847 catch (...)
2848 {
2849 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2850 goto noSVM;
2851 }
2852 }
2853
2854 svmAvailable = true;
2855 svmEnabled = !svm::checkDisableSVM();
2856 svmInitialized = true;
2857 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2858 return;
2859 noSVM:
2860 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2861 svmAvailable = false;
2862 svmEnabled = false;
2863 svmCapabilities.value_ = 0;
2864 svmInitialized = true;
2865 svmFunctions.fn_clSVMAlloc = NULL;
2866 return;
2867 }
2868
2869 std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2870
2871 OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2872 {
2873 _init_buffer_pools();
2874 CV_DbgAssert(bufferPoolSVM_);
2875 return *bufferPoolSVM_.get();
2876 }
2877#endif
2878
2879 friend class Program;
2880};
2881
2882
2883Context::Context() CV_NOEXCEPT
2884{
2885 p = 0;
2886}
2887
2888Context::~Context()
2889{
2890 release();
2891}
2892
2893// deprecated
2894Context::Context(int dtype)
2895{
2896 p = 0;
2897 create(dtype);
2898}
2899
2900void Context::release()
2901{
2902 if (p)
2903 {
2904 p->release();
2905 p = NULL;
2906 }
2907}
2908
2909bool Context::create()
2910{
2911 release();
2912 if (!haveOpenCL())
2913 return false;
2914 p = Impl::findOrCreateContext(configuration_: std::string());
2915 if (p && p->handle)
2916 return true;
2917 release();
2918 return false;
2919}
2920
2921// deprecated
2922bool Context::create(int dtype)
2923{
2924 if( !haveOpenCL() )
2925 return false;
2926 release();
2927 if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2928 {
2929 p = Impl::findOrCreateContext(configuration_: "");
2930 }
2931 else if (dtype == CL_DEVICE_TYPE_GPU)
2932 {
2933 p = Impl::findOrCreateContext(configuration_: ":GPU:");
2934 }
2935 else if (dtype == CL_DEVICE_TYPE_CPU)
2936 {
2937 p = Impl::findOrCreateContext(configuration_: ":CPU:");
2938 }
2939 else
2940 {
2941 CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2942 }
2943 if (p && !p->handle)
2944 {
2945 release();
2946 }
2947 return p != 0;
2948}
2949
2950Context::Context(const Context& c)
2951{
2952 p = (Impl*)c.p;
2953 if(p)
2954 p->addref();
2955}
2956
2957Context& Context::operator = (const Context& c)
2958{
2959 Impl* newp = (Impl*)c.p;
2960 if(newp)
2961 newp->addref();
2962 if(p)
2963 p->release();
2964 p = newp;
2965 return *this;
2966}
2967
2968Context::Context(Context&& c) CV_NOEXCEPT
2969{
2970 p = c.p;
2971 c.p = nullptr;
2972}
2973
2974Context& Context::operator = (Context&& c) CV_NOEXCEPT
2975{
2976 if (this != &c) {
2977 if(p)
2978 p->release();
2979 p = c.p;
2980 c.p = nullptr;
2981 }
2982 return *this;
2983}
2984
2985void* Context::ptr() const
2986{
2987 return p == NULL ? NULL : p->handle;
2988}
2989
2990size_t Context::ndevices() const
2991{
2992 return p ? p->devices.size() : 0;
2993}
2994
2995Device& Context::device(size_t idx) const
2996{
2997 static Device dummy;
2998 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2999}
3000
3001Context& Context::getDefault(bool initialize)
3002{
3003 auto& c = OpenCLExecutionContext::getCurrent();
3004 if (!c.empty())
3005 {
3006 auto& ctx = c.getContext();
3007 return ctx;
3008 }
3009
3010 CV_UNUSED(initialize);
3011 static Context dummy;
3012 return dummy;
3013}
3014
3015Program Context::getProg(const ProgramSource& prog,
3016 const String& buildopts, String& errmsg)
3017{
3018 return p ? p->getProg(src: prog, buildflags: buildopts, errmsg) : Program();
3019}
3020
3021void Context::unloadProg(Program& prog)
3022{
3023 if (p)
3024 p->unloadProg(prog);
3025}
3026
3027/* static */
3028Context Context::fromHandle(void* context)
3029{
3030 Context ctx;
3031 ctx.p = Impl::findOrCreateContext(h: (cl_context)context);
3032 return ctx;
3033}
3034
3035/* static */
3036Context Context::fromDevice(const ocl::Device& device)
3037{
3038 Context ctx;
3039 ctx.p = Impl::findOrCreateContext(device);
3040 return ctx;
3041}
3042
3043/* static */
3044Context Context::create(const std::string& configuration)
3045{
3046 Context ctx;
3047 ctx.p = Impl::findOrCreateContext(configuration_: configuration);
3048 return ctx;
3049}
3050
3051void* Context::getOpenCLContextProperty(int propertyId) const
3052{
3053 if (p == NULL)
3054 return nullptr;
3055 ::size_t size = 0;
3056 CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, 0, NULL, &size));
3057 std::vector<cl_context_properties> prop(size / sizeof(cl_context_properties), (cl_context_properties)0);
3058 CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, size, prop.data(), NULL));
3059 for (size_t i = 0; i < prop.size(); i += 2)
3060 {
3061 if (prop[i] == (cl_context_properties)propertyId)
3062 {
3063 CV_LOG_DEBUG(NULL, "OpenCL: found context property=" << propertyId << ") => " << (void*)prop[i + 1]);
3064 return (void*)prop[i + 1];
3065 }
3066 }
3067 return nullptr;
3068}
3069
3070#ifdef HAVE_OPENCL_SVM
3071bool Context::useSVM() const
3072{
3073 Context::Impl* i = p;
3074 CV_Assert(i);
3075 if (!i->svmInitialized)
3076 i->svmInit();
3077 return i->svmEnabled;
3078}
3079void Context::setUseSVM(bool enabled)
3080{
3081 Context::Impl* i = p;
3082 CV_Assert(i);
3083 if (!i->svmInitialized)
3084 i->svmInit();
3085 if (enabled && !i->svmAvailable)
3086 {
3087 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
3088 }
3089 i->svmEnabled = enabled;
3090}
3091#else
3092bool Context::useSVM() const { return false; }
3093void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
3094#endif
3095
3096#ifdef HAVE_OPENCL_SVM
3097namespace svm {
3098
3099const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
3100{
3101 Context::Impl* i = context.p;
3102 CV_Assert(i);
3103 if (!i->svmInitialized)
3104 i->svmInit();
3105 return i->svmCapabilities;
3106}
3107
3108CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3109{
3110 Context::Impl* i = context.p;
3111 CV_Assert(i);
3112 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3113 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3114 return &i->svmFunctions;
3115}
3116
3117CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3118{
3119 if (checkForceSVMUmatUsage())
3120 return true;
3121 if (checkDisableSVMUMatUsage())
3122 return false;
3123 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3124 return true;
3125 return false; // don't use SVM by default
3126}
3127
3128} // namespace cv::ocl::svm
3129#endif // HAVE_OPENCL_SVM
3130
3131Context::UserContext::~UserContext()
3132{
3133}
3134
3135void Context::setUserContext(std::type_index typeId, const std::shared_ptr<Context::UserContext>& userContext)
3136{
3137 CV_Assert(p);
3138 p->setUserContext(typeId, userContext);
3139}
3140
3141std::shared_ptr<Context::UserContext> Context::getUserContext(std::type_index typeId)
3142{
3143 CV_Assert(p);
3144 return p->getUserContext(typeId);
3145}
3146
3147static void get_platform_name(cl_platform_id id, String& name)
3148{
3149 // get platform name string length
3150 size_t sz = 0;
3151 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3152
3153 // get platform name string
3154 AutoBuffer<char> buf(sz + 1);
3155 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3156
3157 // just in case, ensure trailing zero for ASCIIZ string
3158 buf[sz] = 0;
3159
3160 name = buf.data();
3161}
3162
3163/*
3164// Attaches OpenCL context to OpenCV
3165*/
3166void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3167{
3168 auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3169 ctx.bind();
3170}
3171
3172/* static */
3173OpenCLExecutionContext OpenCLExecutionContext::create(
3174 const std::string& platformName, void* platformID, void* context, void* deviceID
3175)
3176{
3177 if (!haveOpenCL())
3178 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3179
3180 cl_uint cnt = 0;
3181 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3182
3183 if (cnt == 0)
3184 CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3185
3186 std::vector<cl_platform_id> platforms(cnt);
3187
3188 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3189
3190 bool platformAvailable = false;
3191
3192 // check if external platformName contained in list of available platforms in OpenCV
3193 for (unsigned int i = 0; i < cnt; i++)
3194 {
3195 String availablePlatformName;
3196 get_platform_name(id: platforms[i], name&: availablePlatformName);
3197 // external platform is found in the list of available platforms
3198 if (platformName == availablePlatformName)
3199 {
3200 platformAvailable = true;
3201 break;
3202 }
3203 }
3204
3205 if (!platformAvailable)
3206 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3207
3208 // check if platformID corresponds to platformName
3209 String actualPlatformName;
3210 get_platform_name(id: (cl_platform_id)platformID, name&: actualPlatformName);
3211 if (platformName != actualPlatformName)
3212 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3213
3214 OpenCLExecutionContext ctx;
3215 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(args: (cl_platform_id)platformID, args: (cl_context)context, args: (cl_device_id)deviceID);
3216 CV_OCL_CHECK(clReleaseContext((cl_context)context));
3217 CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3218 return ctx;
3219}
3220
3221void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3222{
3223 // internal call, less checks
3224 cl_platform_id platformID = (cl_platform_id)_platform;
3225 cl_context context = (cl_context)_context;
3226 cl_device_id deviceID = (cl_device_id)_device;
3227
3228 std::string platformName = PlatformInfo(&platformID).name();
3229
3230 auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3231 CV_Assert(!clExecCtx.empty());
3232 ctx = clExecCtx.getContext();
3233}
3234
3235/////////////////////////////////////////// Queue /////////////////////////////////////////////
3236
3237struct Queue::Impl
3238{
3239 inline void __init()
3240 {
3241 refcount = 1;
3242 handle = 0;
3243 isProfilingQueue_ = false;
3244 }
3245
3246 Impl(cl_command_queue q)
3247 {
3248 __init();
3249 handle = q;
3250
3251 cl_command_queue_properties props = 0;
3252 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3253 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3254 }
3255
3256 Impl(cl_command_queue q, bool isProfilingQueue)
3257 {
3258 __init();
3259 handle = q;
3260 isProfilingQueue_ = isProfilingQueue;
3261 }
3262
3263 Impl(const Context& c, const Device& d, bool withProfiling = false)
3264 {
3265 __init();
3266
3267 const Context* pc = &c;
3268 cl_context ch = (cl_context)pc->ptr();
3269 if( !ch )
3270 {
3271 pc = &Context::getDefault();
3272 ch = (cl_context)pc->ptr();
3273 }
3274 cl_device_id dh = (cl_device_id)d.ptr();
3275 if( !dh )
3276 dh = (cl_device_id)pc->device(idx: 0).ptr();
3277 cl_int retval = 0;
3278 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3279 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3280 isProfilingQueue_ = withProfiling;
3281 }
3282
3283 ~Impl()
3284 {
3285#ifdef _WIN32
3286 if (!cv::__termination)
3287#endif
3288 {
3289 if(handle)
3290 {
3291 CV_OCL_DBG_CHECK(clFinish(handle));
3292 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3293 handle = NULL;
3294 }
3295 }
3296 }
3297
3298 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3299 {
3300 if (isProfilingQueue_)
3301 return self;
3302
3303 if (profiling_queue_.ptr())
3304 return profiling_queue_;
3305
3306 cl_context ctx = 0;
3307 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3308
3309 cl_device_id device = 0;
3310 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3311
3312 cl_int result = CL_SUCCESS;
3313 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3314 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3315 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3316
3317 Queue queue;
3318 queue.p = new Impl(q, true);
3319 profiling_queue_ = queue;
3320
3321 return profiling_queue_;
3322 }
3323
3324 IMPLEMENT_REFCOUNTABLE();
3325
3326 cl_command_queue handle;
3327 bool isProfilingQueue_;
3328 cv::ocl::Queue profiling_queue_;
3329};
3330
3331Queue::Queue() CV_NOEXCEPT
3332{
3333 p = 0;
3334}
3335
3336Queue::Queue(const Context& c, const Device& d)
3337{
3338 p = 0;
3339 create(c, d);
3340}
3341
3342Queue::Queue(const Queue& q)
3343{
3344 p = q.p;
3345 if(p)
3346 p->addref();
3347}
3348
3349Queue& Queue::operator = (const Queue& q)
3350{
3351 Impl* newp = (Impl*)q.p;
3352 if(newp)
3353 newp->addref();
3354 if(p)
3355 p->release();
3356 p = newp;
3357 return *this;
3358}
3359
3360Queue::Queue(Queue&& q) CV_NOEXCEPT
3361{
3362 p = q.p;
3363 q.p = nullptr;
3364}
3365
3366Queue& Queue::operator = (Queue&& q) CV_NOEXCEPT
3367{
3368 if (this != &q) {
3369 if(p)
3370 p->release();
3371 p = q.p;
3372 q.p = nullptr;
3373 }
3374 return *this;
3375}
3376
3377Queue::~Queue()
3378{
3379 if(p)
3380 p->release();
3381}
3382
3383bool Queue::create(const Context& c, const Device& d)
3384{
3385 if(p)
3386 p->release();
3387 p = new Impl(c, d);
3388 return p->handle != 0;
3389}
3390
3391void Queue::finish()
3392{
3393 if(p && p->handle)
3394 {
3395 CV_OCL_DBG_CHECK(clFinish(p->handle));
3396 }
3397}
3398
3399const Queue& Queue::getProfilingQueue() const
3400{
3401 CV_Assert(p);
3402 return p->getProfilingQueue(self: *this);
3403}
3404
3405void* Queue::ptr() const
3406{
3407 return p ? p->handle : 0;
3408}
3409
3410Queue& Queue::getDefault()
3411{
3412 auto& c = OpenCLExecutionContext::getCurrent();
3413 if (!c.empty())
3414 {
3415 auto& q = c.getQueue();
3416 return q;
3417 }
3418 static Queue dummy;
3419 return dummy;
3420}
3421
3422static cl_command_queue getQueue(const Queue& q)
3423{
3424 cl_command_queue qq = (cl_command_queue)q.ptr();
3425 if(!qq)
3426 qq = (cl_command_queue)Queue::getDefault().ptr();
3427 return qq;
3428}
3429
3430/////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3431
3432KernelArg::KernelArg() CV_NOEXCEPT
3433 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3434{
3435}
3436
3437KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3438 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3439{
3440 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3441}
3442
3443KernelArg KernelArg::Constant(const Mat& m)
3444{
3445 CV_Assert(m.isContinuous());
3446 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3447}
3448
3449/////////////////////////////////////////// Kernel /////////////////////////////////////////////
3450
3451struct Kernel::Impl
3452{
3453 Impl(const char* kname, const Program& prog) :
3454 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
3455 {
3456 cl_program ph = (cl_program)prog.ptr();
3457 cl_int retval = 0;
3458 name = kname;
3459 if (ph)
3460 {
3461 handle = clCreateKernel(ph, kname, &retval);
3462 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3463 }
3464 for( int i = 0; i < MAX_ARRS; i++ )
3465 u[i] = 0;
3466 haveTempDstUMats = false;
3467 haveTempSrcUMats = false;
3468 }
3469
3470 void cleanupUMats()
3471 {
3472 bool exceptionOccurred = false;
3473 for( int i = 0; i < MAX_ARRS; i++ )
3474 {
3475 if( u[i] )
3476 {
3477 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3478 {
3479 u[i]->flags |= UMatData::ASYNC_CLEANUP;
3480 try
3481 {
3482 u[i]->currAllocator->deallocate(data: u[i]);
3483 }
3484 catch(const std::exception& exc)
3485 {
3486 // limited by legacy before C++11, therefore log and
3487 // remember some exception occurred to throw below
3488 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL Kernel::Impl::cleanupUMats(): " << exc.what());
3489 exceptionOccurred = true;
3490 }
3491 }
3492 u[i] = 0;
3493 }
3494 }
3495 nu = 0;
3496 haveTempDstUMats = false;
3497 haveTempSrcUMats = false;
3498 CV_Assert(!exceptionOccurred);
3499 }
3500
3501 void addUMat(const UMat& m, bool dst)
3502 {
3503 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3504 u[nu] = m.u;
3505 CV_XADD(&m.u->urefcount, 1);
3506 nu++;
3507 if(dst && m.u->tempUMat())
3508 haveTempDstUMats = true;
3509 if(m.u->originalUMatData == NULL && m.u->tempUMat())
3510 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3511 }
3512
3513 /// Preserve image lifetime (while it is specified as Kernel argument)
3514 void registerImageArgument(int arg, const Image2D& image)
3515 {
3516 CV_CheckGE(arg, 0, "");
3517 if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr()) // TODO future: replace ptr => impl (more strong check)
3518 {
3519 CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed");
3520 }
3521 shadow_images.reserve(n: MAX_ARRS);
3522 shadow_images.resize(new_size: std::max(a: shadow_images.size(), b: (size_t)arg + 1));
3523 shadow_images[arg] = image;
3524 }
3525
3526 void finit(cl_event e)
3527 {
3528 CV_UNUSED(e);
3529 isInProgress = false;
3530 try
3531 {
3532 cleanupUMats();
3533 }
3534 catch(...)
3535 {
3536 release();
3537 throw;
3538 }
3539 release();
3540 }
3541
3542 bool run(int dims, size_t _globalsize[], size_t _localsize[],
3543 bool sync, int64* timeNS, const Queue& q);
3544
3545 ~Impl()
3546 {
3547 if(handle)
3548 {
3549 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3550 }
3551 }
3552
3553 IMPLEMENT_REFCOUNTABLE();
3554
3555 cv::String name;
3556 cl_kernel handle;
3557 enum { MAX_ARRS = 16 };
3558 UMatData* u[MAX_ARRS];
3559 bool isInProgress;
3560 bool isAsyncRun; // true if kernel was scheduled in async mode
3561 int nu;
3562 std::vector<Image2D> shadow_images;
3563 bool haveTempDstUMats;
3564 bool haveTempSrcUMats;
3565};
3566
3567}} // namespace cv::ocl
3568
3569extern "C" {
3570
3571static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3572{
3573 try
3574 {
3575 ((cv::ocl::Kernel::Impl*)p)->finit(e);
3576 }
3577 catch (const cv::Exception& exc)
3578 {
3579 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3580 }
3581 catch (const std::exception& exc)
3582 {
3583 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3584 }
3585 catch (...)
3586 {
3587 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3588 }
3589}
3590
3591}
3592
3593namespace cv { namespace ocl {
3594
3595Kernel::Kernel() CV_NOEXCEPT
3596{
3597 p = 0;
3598}
3599
3600Kernel::Kernel(const char* kname, const Program& prog)
3601{
3602 p = 0;
3603 create(kname, prog);
3604}
3605
3606Kernel::Kernel(const char* kname, const ProgramSource& src,
3607 const String& buildopts, String* errmsg)
3608{
3609 p = 0;
3610 create(kname, prog: src, buildopts, errmsg);
3611}
3612
3613Kernel::Kernel(const Kernel& k)
3614{
3615 p = k.p;
3616 if(p)
3617 p->addref();
3618}
3619
3620Kernel& Kernel::operator = (const Kernel& k)
3621{
3622 Impl* newp = (Impl*)k.p;
3623 if(newp)
3624 newp->addref();
3625 if(p)
3626 p->release();
3627 p = newp;
3628 return *this;
3629}
3630
3631Kernel::Kernel(Kernel&& k) CV_NOEXCEPT
3632{
3633 p = k.p;
3634 k.p = nullptr;
3635}
3636
3637Kernel& Kernel::operator = (Kernel&& k) CV_NOEXCEPT
3638{
3639 if (this != &k) {
3640 if(p)
3641 p->release();
3642 p = k.p;
3643 k.p = nullptr;
3644 }
3645 return *this;
3646}
3647
3648Kernel::~Kernel()
3649{
3650 if(p)
3651 p->release();
3652}
3653
3654bool Kernel::create(const char* kname, const Program& prog)
3655{
3656 if(p)
3657 p->release();
3658 p = new Impl(kname, prog);
3659 if(p->handle == 0)
3660 {
3661 p->release();
3662 p = 0;
3663 }
3664#ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3665 CV_Assert(p);
3666#endif
3667 return p != 0;
3668}
3669
3670bool Kernel::create(const char* kname, const ProgramSource& src,
3671 const String& buildopts, String* errmsg)
3672{
3673 if(p)
3674 {
3675 p->release();
3676 p = 0;
3677 }
3678 String tempmsg;
3679 if( !errmsg ) errmsg = &tempmsg;
3680 const Program prog = Context::getDefault().getProg(prog: src, buildopts, errmsg&: *errmsg);
3681 return create(kname, prog);
3682}
3683
3684void* Kernel::ptr() const
3685{
3686 return p ? p->handle : 0;
3687}
3688
3689bool Kernel::empty() const
3690{
3691 return ptr() == 0;
3692}
3693
3694static cv::String dumpValue(size_t sz, const void* p)
3695{
3696 if (!p)
3697 return "NULL";
3698 if (sz == 2)
3699 return cv::format(fmt: "%d / %uu / 0x%04x", *(short*)p, *(unsigned short*)p, *(short*)p);
3700 if (sz == 4)
3701 return cv::format(fmt: "%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
3702 if (sz == 8)
3703 return cv::format(fmt: "%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
3704 return cv::format(fmt: "%p", p);
3705}
3706
3707int Kernel::set(int i, const void* value, size_t sz)
3708{
3709 if (!p || !p->handle)
3710 return -1;
3711 if (i < 0)
3712 return i;
3713 if( i == 0 )
3714 p->cleanupUMats();
3715
3716 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3717 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
3718 if (retval != CL_SUCCESS)
3719 return -1;
3720 return i+1;
3721}
3722
3723int Kernel::set(int i, const Image2D& image2D)
3724{
3725 cl_mem h = (cl_mem)image2D.ptr();
3726 int res = set(i, value: &h, sz: sizeof(h));
3727 if (res >= 0)
3728 p->registerImageArgument(arg: i, image: image2D);
3729 return res;
3730}
3731
3732int Kernel::set(int i, const UMat& m)
3733{
3734 return set(i, arg: KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3735}
3736
3737int Kernel::set(int i, const KernelArg& arg)
3738{
3739 if( !p || !p->handle )
3740 return -1;
3741 if (i < 0)
3742 {
3743 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3744 p->name.c_str(), (int)i));
3745 return i;
3746 }
3747 if( i == 0 )
3748 p->cleanupUMats();
3749 cl_int status = 0;
3750 if( arg.m )
3751 {
3752 AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3753 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3754 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3755 if (ptronly && arg.m->empty())
3756 {
3757 cl_mem h_null = (cl_mem)NULL;
3758 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3759 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3760 return i + 1;
3761 }
3762 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3763
3764 if (!h)
3765 {
3766 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3767 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3768 p->release();
3769 p = 0;
3770 return -1;
3771 }
3772
3773#ifdef HAVE_OPENCL_SVM
3774 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3775 {
3776 const Context& ctx = Context::getDefault();
3777 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3778 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3779 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3780#if 1 // TODO
3781 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3782#else
3783 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3784#endif
3785 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3786 }
3787 else
3788#endif
3789 {
3790 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3791 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3792 }
3793
3794 if (ptronly)
3795 {
3796 i++;
3797 }
3798 else if( arg.m->dims <= 2 )
3799 {
3800 UMat2D u2d(*arg.m);
3801 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3802 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3803 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3804 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3805 i += 3;
3806
3807 if( !(arg.flags & KernelArg::NO_SIZE) )
3808 {
3809 int cols = u2d.cols*arg.wscale/arg.iwscale;
3810 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3811 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3812 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3813 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3814 i += 2;
3815 }
3816 }
3817 else
3818 {
3819 UMat3D u3d(*arg.m);
3820 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3821 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3822 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3823 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3824 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3825 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3826 i += 4;
3827 if( !(arg.flags & KernelArg::NO_SIZE) )
3828 {
3829 int cols = u3d.cols*arg.wscale/arg.iwscale;
3830 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3831 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3832 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3833 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3834 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3835 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3836 i += 3;
3837 }
3838 }
3839 p->addUMat(m: *arg.m, dst: !!(accessFlags & ACCESS_WRITE));
3840 return i;
3841 }
3842 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3843 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3844 return i+1;
3845}
3846
3847bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3848 bool sync, const Queue& q)
3849{
3850 if (!p)
3851 return false;
3852
3853 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3854 size_t total = 1;
3855 CV_Assert(_globalsize != NULL);
3856 for (int i = 0; i < dims; i++)
3857 {
3858 size_t val = _localsize ? _localsize[i] :
3859 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3860 CV_Assert( val > 0 );
3861 total *= _globalsize[i];
3862 if (_globalsize[i] == 1 && !_localsize)
3863 val = 1;
3864 globalsize[i] = divUp(a: _globalsize[i], b: (unsigned int)val) * val;
3865 }
3866 CV_Assert(total > 0);
3867
3868 return p->run(dims, globalsize: globalsize, _localsize, sync, NULL, q);
3869}
3870
3871
3872bool Kernel::run_(int dims, size_t _globalsize[], size_t _localsize[],
3873 bool sync, const Queue& q)
3874{
3875 CV_Assert(p);
3876 return p->run(dims, _globalsize, _localsize, sync, NULL, q);
3877}
3878
3879
3880static bool isRaiseErrorOnReuseAsyncKernel()
3881{
3882 static bool initialized = false;
3883 static bool value = false;
3884 if (!initialized)
3885 {
3886 value = cv::utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", defaultValue: false);
3887 initialized = true;
3888 }
3889 return value;
3890}
3891
3892bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3893 bool sync, int64* timeNS, const Queue& q)
3894{
3895 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3896
3897 if (!handle)
3898 {
3899 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3900 return false;
3901 }
3902
3903 if (isAsyncRun)
3904 {
3905 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3906 if (isRaiseErrorOnReuseAsyncKernel())
3907 CV_Assert(0);
3908 return false; // OpenCV 5.0: raise error
3909 }
3910 isAsyncRun = !sync;
3911
3912 if (isInProgress)
3913 {
3914 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3915 if (isRaiseErrorOnReuseAsyncKernel())
3916 CV_Assert(0);
3917 return false; // OpenCV 5.0: raise error
3918 }
3919
3920#if CV_OPENCL_SYNC_RUN_KERNELS
3921 sync = true;
3922#endif
3923
3924 cl_command_queue qq = getQueue(q);
3925 if (haveTempDstUMats)
3926 sync = true;
3927 if (haveTempSrcUMats)
3928 sync = true;
3929 if (timeNS)
3930 sync = true;
3931 cl_event asyncEvent = 0;
3932 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3933 NULL, globalsize, localsize, 0, 0,
3934 (sync && !timeNS) ? 0 : &asyncEvent);
3935#if !CV_OPENCL_SHOW_RUN_KERNELS
3936 if (retval != CL_SUCCESS)
3937#endif
3938 {
3939 cv::String msg = cv::format(fmt: "clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3940 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3941 (localsize ? cv::format(fmt: "%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3942 sync ? "true" : "false"
3943 );
3944 if (retval != CL_SUCCESS)
3945 {
3946 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3947 }
3948#if CV_OPENCL_TRACE_CHECK
3949 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3950#else
3951 printf(format: "%s\n", msg.c_str());
3952 fflush(stdout);
3953#endif
3954 }
3955 if (sync || retval != CL_SUCCESS)
3956 {
3957 CV_OCL_DBG_CHECK(clFinish(qq));
3958 if (timeNS)
3959 {
3960 if (retval == CL_SUCCESS)
3961 {
3962 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3963 cl_ulong startTime, stopTime;
3964 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3965 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3966 *timeNS = (int64)(stopTime - startTime);
3967 }
3968 else
3969 {
3970 *timeNS = -1;
3971 }
3972 }
3973 cleanupUMats();
3974 }
3975 else
3976 {
3977 addref();
3978 isInProgress = true;
3979 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3980 }
3981 if (asyncEvent)
3982 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3983 return retval == CL_SUCCESS;
3984}
3985
3986bool Kernel::runTask(bool sync, const Queue& q)
3987{
3988 if(!p || !p->handle || p->isInProgress)
3989 return false;
3990
3991 cl_command_queue qq = getQueue(q);
3992 cl_event asyncEvent = 0;
3993 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3994 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3995 if (sync || retval != CL_SUCCESS)
3996 {
3997 CV_OCL_DBG_CHECK(clFinish(qq));
3998 p->cleanupUMats();
3999 }
4000 else
4001 {
4002 p->addref();
4003 p->isInProgress = true;
4004 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
4005 }
4006 if (asyncEvent)
4007 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
4008 return retval == CL_SUCCESS;
4009}
4010
4011int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
4012{
4013 CV_Assert(p && p->handle && !p->isInProgress);
4014 Queue q = q_.ptr() ? q_ : Queue::getDefault();
4015 CV_Assert(q.ptr());
4016 q.finish(); // call clFinish() on base queue
4017 Queue profilingQueue = q.getProfilingQueue();
4018 int64 timeNs = -1;
4019 bool res = p->run(dims, globalsize, localsize, sync: true, timeNS: &timeNs, q: profilingQueue);
4020 return res ? timeNs : -1;
4021}
4022
4023size_t Kernel::workGroupSize() const
4024{
4025 if(!p || !p->handle)
4026 return 0;
4027 size_t val = 0, retsz = 0;
4028 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4029 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
4030 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
4031 return status == CL_SUCCESS ? val : 0;
4032}
4033
4034size_t Kernel::preferedWorkGroupSizeMultiple() const
4035{
4036 if(!p || !p->handle)
4037 return 0;
4038 size_t val = 0, retsz = 0;
4039 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4040 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
4041 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
4042 return status == CL_SUCCESS ? val : 0;
4043}
4044
4045bool Kernel::compileWorkGroupSize(size_t wsz[]) const
4046{
4047 if(!p || !p->handle || !wsz)
4048 return 0;
4049 size_t retsz = 0;
4050 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4051 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
4052 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
4053 return status == CL_SUCCESS;
4054}
4055
4056size_t Kernel::localMemSize() const
4057{
4058 if(!p || !p->handle)
4059 return 0;
4060 size_t retsz = 0;
4061 cl_ulong val = 0;
4062 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4063 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
4064 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
4065 return status == CL_SUCCESS ? (size_t)val : 0;
4066}
4067
4068
4069
4070///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
4071
4072struct ProgramSource::Impl
4073{
4074 IMPLEMENT_REFCOUNTABLE();
4075
4076 enum KIND {
4077 PROGRAM_SOURCE_CODE = 0,
4078 PROGRAM_BINARIES,
4079 PROGRAM_SPIR,
4080 PROGRAM_SPIRV
4081 } kind_;
4082
4083 Impl(const String& src)
4084 {
4085 init(kind: PROGRAM_SOURCE_CODE, module: cv::String(), name: cv::String());
4086 initFromSource(codeStr: src, codeHash: cv::String());
4087 }
4088 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
4089 {
4090 init(kind: PROGRAM_SOURCE_CODE, module, name);
4091 initFromSource(codeStr, codeHash);
4092 }
4093
4094 /// reset fields
4095 void init(enum KIND kind, const String& module, const String& name)
4096 {
4097 refcount = 1;
4098 kind_ = kind;
4099 module_ = module;
4100 name_ = name;
4101
4102 sourceAddr_ = NULL;
4103 sourceSize_ = 0;
4104 isHashUpdated = false;
4105 }
4106
4107 void initFromSource(const String& codeStr, const String& codeHash)
4108 {
4109 codeStr_ = codeStr;
4110 sourceHash_ = codeHash;
4111 if (sourceHash_.empty())
4112 {
4113 updateHash();
4114 }
4115 else
4116 {
4117 isHashUpdated = true;
4118 }
4119 }
4120
4121 void updateHash(const char* hashStr = NULL)
4122 {
4123 if (hashStr)
4124 {
4125 sourceHash_ = cv::String(hashStr);
4126 isHashUpdated = true;
4127 return;
4128 }
4129 uint64 hash = 0;
4130 switch (kind_)
4131 {
4132 case PROGRAM_SOURCE_CODE:
4133 if (sourceAddr_)
4134 {
4135 CV_Assert(codeStr_.empty());
4136 hash = crc64(data: sourceAddr_, size: sourceSize_); // static storage
4137 }
4138 else
4139 {
4140 CV_Assert(!codeStr_.empty());
4141 hash = crc64(data: (uchar*)codeStr_.c_str(), size: codeStr_.size());
4142 }
4143 break;
4144 case PROGRAM_BINARIES:
4145 case PROGRAM_SPIR:
4146 case PROGRAM_SPIRV:
4147 hash = crc64(data: sourceAddr_, size: sourceSize_);
4148 break;
4149 default:
4150 CV_Error(Error::StsInternal, "Internal error");
4151 }
4152 sourceHash_ = cv::format(fmt: "%08jx", (uintmax_t)hash);
4153 isHashUpdated = true;
4154 }
4155
4156 Impl(enum KIND kind,
4157 const String& module, const String& name,
4158 const unsigned char* binary, const size_t size,
4159 const cv::String& buildOptions = cv::String())
4160 {
4161 init(kind, module, name);
4162
4163 sourceAddr_ = binary;
4164 sourceSize_ = size;
4165
4166 buildOptions_ = buildOptions;
4167 }
4168
4169 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
4170 const char* sourceCodeStaticStr, const char* hashStaticStr,
4171 const cv::String& buildOptions)
4172 {
4173 ProgramSource result;
4174 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
4175 (const unsigned char*)sourceCodeStaticStr, strlen(s: sourceCodeStaticStr), buildOptions);
4176 result.p->updateHash(hashStr: hashStaticStr);
4177 return result;
4178 }
4179
4180 static ProgramSource fromBinary(const String& module, const String& name,
4181 const unsigned char* binary, const size_t size,
4182 const cv::String& buildOptions)
4183 {
4184 ProgramSource result;
4185 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
4186 return result;
4187 }
4188
4189 static ProgramSource fromSPIR(const String& module, const String& name,
4190 const unsigned char* binary, const size_t size,
4191 const cv::String& buildOptions)
4192 {
4193 ProgramSource result;
4194 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
4195 return result;
4196 }
4197
4198 String module_;
4199 String name_;
4200
4201 // TODO std::vector<ProgramSource> includes_;
4202 String codeStr_; // PROGRAM_SOURCE_CODE only
4203
4204 const unsigned char* sourceAddr_;
4205 size_t sourceSize_;
4206
4207 cv::String buildOptions_;
4208
4209 String sourceHash_;
4210 bool isHashUpdated;
4211
4212 friend struct Program::Impl;
4213 friend struct internal::ProgramEntry;
4214 friend struct Context::Impl;
4215};
4216
4217
4218ProgramSource::ProgramSource() CV_NOEXCEPT
4219{
4220 p = 0;
4221}
4222
4223ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
4224{
4225 p = new Impl(module, name, codeStr, codeHash);
4226}
4227
4228ProgramSource::ProgramSource(const char* prog)
4229{
4230 p = new Impl(prog);
4231}
4232
4233ProgramSource::ProgramSource(const String& prog)
4234{
4235 p = new Impl(prog);
4236}
4237
4238ProgramSource::~ProgramSource()
4239{
4240 if(p)
4241 p->release();
4242}
4243
4244ProgramSource::ProgramSource(const ProgramSource& prog)
4245{
4246 p = prog.p;
4247 if(p)
4248 p->addref();
4249}
4250
4251ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4252{
4253 Impl* newp = (Impl*)prog.p;
4254 if(newp)
4255 newp->addref();
4256 if(p)
4257 p->release();
4258 p = newp;
4259 return *this;
4260}
4261
4262ProgramSource::ProgramSource(ProgramSource&& prog) CV_NOEXCEPT
4263{
4264 p = prog.p;
4265 prog.p = nullptr;
4266}
4267
4268ProgramSource& ProgramSource::operator = (ProgramSource&& prog) CV_NOEXCEPT
4269{
4270 if (this != &prog) {
4271 if(p)
4272 p->release();
4273 p = prog.p;
4274 prog.p = nullptr;
4275 }
4276 return *this;
4277}
4278
4279const String& ProgramSource::source() const
4280{
4281 CV_Assert(p);
4282 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4283 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4284 return p->codeStr_;
4285}
4286
4287ProgramSource::hash_t ProgramSource::hash() const
4288{
4289 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4290}
4291
4292ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4293 const unsigned char* binary, const size_t size,
4294 const cv::String& buildOptions)
4295{
4296 CV_Assert(binary);
4297 CV_Assert(size > 0);
4298 return Impl::fromBinary(module, name, binary, size, buildOptions);
4299}
4300
4301ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4302 const unsigned char* binary, const size_t size,
4303 const cv::String& buildOptions)
4304{
4305 CV_Assert(binary);
4306 CV_Assert(size > 0);
4307 return Impl::fromBinary(module, name, binary, size, buildOptions);
4308}
4309
4310
4311internal::ProgramEntry::operator ProgramSource&() const
4312{
4313 if (this->pProgramSource == NULL)
4314 {
4315 cv::AutoLock lock(cv::getInitializationMutex());
4316 if (this->pProgramSource == NULL)
4317 {
4318 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(module: this->module, name: this->name, sourceCodeStaticStr: this->programCode, hashStaticStr: this->programHash, buildOptions: cv::String());
4319 ProgramSource* ptr = new ProgramSource(ps);
4320 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4321 }
4322 }
4323 return *this->pProgramSource;
4324}
4325
4326
4327
4328/////////////////////////////////////////// Program /////////////////////////////////////////////
4329
4330static
4331cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4332{
4333 if (b.empty())
4334 return a;
4335 if (a.empty())
4336 return b;
4337 if (b[0] == ' ')
4338 return a + b;
4339 return a + (cv::String(" ") + b);
4340}
4341
4342struct Program::Impl
4343{
4344 IMPLEMENT_REFCOUNTABLE();
4345
4346 Impl(const ProgramSource& src,
4347 const String& _buildflags, String& errmsg) :
4348 refcount(1),
4349 handle(NULL),
4350 buildflags(_buildflags)
4351 {
4352 const ProgramSource::Impl* src_ = src.getImpl();
4353 CV_Assert(src_);
4354 sourceModule_ = src_->module_;
4355 sourceName_ = src_->name_;
4356 const Context ctx = Context::getDefault();
4357 Device device = ctx.device(idx: 0);
4358 if (ctx.ptr() == NULL || device.ptr() == NULL)
4359 return;
4360 buildflags = joinBuildOptions(a: buildflags, b: src_->buildOptions_);
4361 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4362 {
4363 if (device.isAMD())
4364 buildflags = joinBuildOptions(a: buildflags, b: " -D AMD_DEVICE");
4365 else if (device.isIntel())
4366 buildflags = joinBuildOptions(a: buildflags, b: " -D INTEL_DEVICE");
4367 const String param_buildExtraOptions = getBuildExtraOptions();
4368 if (!param_buildExtraOptions.empty())
4369 buildflags = joinBuildOptions(a: buildflags, b: param_buildExtraOptions);
4370 }
4371#if CV_OPENCL_SHOW_BUILD_OPTIONS
4372 CV_LOG_INFO(NULL, "OpenCL program '" << sourceModule_ << "/" << sourceName_ << "' options:" << buildflags);
4373#endif
4374 compile(ctx, src_, errmsg);
4375#if CV_OPENCL_SHOW_BUILD_KERNELS
4376 if (handle)
4377 {
4378 size_t retsz = 0;
4379 char kernels_buffer[4096] = {0};
4380 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4381 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clGetProgramInfo(CL_PROGRAM_KERNEL_NAMES: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4382 if (result == CL_SUCCESS && retsz < sizeof(kernels_buffer))
4383 {
4384 kernels_buffer[retsz] = 0;
4385 CV_LOG_INFO(NULL, "OpenCL program '" << sourceModule_ << "/" << sourceName_ << "' kernels: '" << kernels_buffer << "'");
4386 }
4387 else
4388 {
4389 CV_LOG_ERROR(NULL, "OpenCL program '" << sourceModule_ << "/" << sourceName_ << "' can't retrieve kernel names!");
4390 }
4391 }
4392#endif
4393 }
4394
4395 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4396 {
4397 CV_Assert(ctx.getImpl());
4398 CV_Assert(src_);
4399
4400 // We don't cache OpenCL binaries
4401 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4402 {
4403 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4404 bool isLoaded = createFromBinary(ctx, binaryAddr: src_->sourceAddr_, binarySize: src_->sourceSize_, errmsg);
4405 return isLoaded;
4406 }
4407 return compileWithCache(ctx, src_, errmsg);
4408 }
4409
4410 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4411 {
4412 CV_Assert(ctx.getImpl());
4413 CV_Assert(src_);
4414 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4415
4416#if OPENCV_HAVE_FILESYSTEM_SUPPORT
4417 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4418 const std::string base_dir = config.prepareCacheDirectoryForContext(
4419 ctx_prefix: ctx.getImpl()->getPrefixString(),
4420 cleanup_prefix: ctx.getImpl()->getPrefixBase()
4421 );
4422 const String& hash_str = src_->sourceHash_;
4423 cv::String fname;
4424 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4425 {
4426 CV_Assert(!hash_str.empty());
4427 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4428 fname = utils::fs::join(base: base_dir, path: fname);
4429 }
4430 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4431 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4432 {
4433 try
4434 {
4435 std::vector<char> binaryBuf;
4436 bool res = false;
4437 {
4438 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4439 BinaryProgramFile file(fname, hash_str.c_str());
4440 res = file.read(key: buildflags, buf&: binaryBuf);
4441 }
4442 if (res)
4443 {
4444 CV_Assert(!binaryBuf.empty());
4445 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4446 bool isLoaded = createFromBinary(ctx, buf: binaryBuf, errmsg);
4447 if (isLoaded)
4448 return true;
4449 }
4450 }
4451 catch (const cv::Exception& e)
4452 {
4453 CV_UNUSED(e);
4454 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4455 }
4456 catch (...)
4457 {
4458 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4459 }
4460 }
4461#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4462 CV_Assert(handle == NULL);
4463 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4464 {
4465 if (!buildFromSources(ctx, src_, errmsg))
4466 {
4467 return false;
4468 }
4469 }
4470 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4471 {
4472 buildflags = joinBuildOptions(a: buildflags, b: " -x spir");
4473 if ((cv::String(" ") + buildflags).find(s: " -spir-std=") == cv::String::npos)
4474 {
4475 buildflags = joinBuildOptions(a: buildflags, b: " -spir-std=1.2");
4476 }
4477 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4478 bool isLoaded = createFromBinary(ctx, binaryAddr: src_->sourceAddr_, binarySize: src_->sourceSize_, errmsg);
4479 if (!isLoaded)
4480 return false;
4481 }
4482 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4483 {
4484 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4485 }
4486 else
4487 {
4488 CV_Error(Error::StsInternal, "Internal error");
4489 }
4490 CV_Assert(handle != NULL);
4491#if OPENCV_HAVE_FILESYSTEM_SUPPORT
4492 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4493 {
4494 try
4495 {
4496 std::vector<char> binaryBuf;
4497 getProgramBinary(buf&: binaryBuf);
4498 {
4499 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4500 BinaryProgramFile file(fname, hash_str.c_str());
4501 file.write(key: buildflags, buf&: binaryBuf);
4502 }
4503 }
4504 catch (const cv::Exception& e)
4505 {
4506 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4507 }
4508 catch (...)
4509 {
4510 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4511 }
4512 }
4513#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4514#if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4515 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4516 {
4517 std::vector<char> binaryBuf;
4518 getProgramBinary(buf&: binaryBuf);
4519 if (!binaryBuf.empty())
4520 {
4521 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4522 handle = NULL;
4523 createFromBinary(ctx, buf: binaryBuf, errmsg);
4524 }
4525 }
4526#endif
4527 return handle != NULL;
4528 }
4529
4530 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4531 {
4532 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4533
4534 size_t retsz = 0;
4535 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4536 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4537 if (log_retval == CL_SUCCESS && retsz > 1)
4538 {
4539 buffer.resize(size: retsz + 16);
4540 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4541 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4542 if (log_retval == CL_SUCCESS)
4543 {
4544 if (retsz < buffer.size())
4545 buffer[retsz] = 0;
4546 else
4547 buffer[buffer.size() - 1] = 0;
4548 }
4549 else
4550 {
4551 buffer[0] = 0;
4552 }
4553 }
4554
4555 errmsg = String(buffer.data());
4556 printf(format: "OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4557 sourceModule_.c_str(), sourceName_.c_str(),
4558 result, getOpenCLErrorString(errorCode: result),
4559 buildflags.c_str(), errmsg.c_str());
4560 fflush(stdout);
4561 }
4562
4563 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4564 {
4565 CV_Assert(src_);
4566 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4567 CV_Assert(handle == NULL);
4568 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4569 sourceModule_.c_str(), sourceName_.c_str(),
4570 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4571
4572 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4573
4574 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4575 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4576 CV_Assert(srcptr != NULL);
4577 CV_Assert(srclen > 0);
4578
4579 cl_int retval = 0;
4580
4581 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4582 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4583 CV_Assert(handle || retval != CL_SUCCESS);
4584 if (handle && retval == CL_SUCCESS)
4585 {
4586 size_t n = ctx.ndevices();
4587 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4588 cl_device_id* deviceList = deviceListBuf.data();
4589 for (size_t i = 0; i < n; i++)
4590 {
4591 deviceList[i] = (cl_device_id)(ctx.device(idx: i).ptr());
4592 }
4593
4594 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4595 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4596#if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4597 if (retval != CL_SUCCESS)
4598#endif
4599 {
4600 dumpBuildLog_(result: retval, deviceList, errmsg);
4601
4602 // don't remove "retval != CL_SUCCESS" condition here:
4603 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4604 if (retval != CL_SUCCESS && handle)
4605 {
4606 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4607 handle = NULL;
4608 }
4609 if (retval != CL_SUCCESS &&
4610 sourceName_ != "dummy" // used for testing of compilation flags
4611 )
4612 {
4613 onOpenCLKernelBuildError();
4614 }
4615 }
4616#if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4617 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4618 {
4619 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4620 size_t retsz = 0;
4621 char kernels_buffer[4096] = {0};
4622 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4623 if (retsz < sizeof(kernels_buffer))
4624 kernels_buffer[retsz] = 0;
4625 else
4626 kernels_buffer[0] = 0;
4627 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4628 }
4629#endif
4630 }
4631 return handle != NULL;
4632 }
4633
4634 void getProgramBinary(std::vector<char>& buf)
4635 {
4636 CV_Assert(handle);
4637 size_t sz = 0;
4638 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4639 buf.resize(new_size: sz);
4640 uchar* ptr = (uchar*)&buf[0];
4641 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4642 }
4643
4644 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4645 {
4646 return createFromBinary(ctx, binaryAddr: (const unsigned char*)&buf[0], binarySize: buf.size(), errmsg);
4647 }
4648
4649 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4650 {
4651 CV_Assert(handle == NULL);
4652 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4653 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4654
4655 CV_Assert(binarySize > 0);
4656
4657 size_t ndevices = (int)ctx.ndevices();
4658 AutoBuffer<cl_device_id> devices_(ndevices);
4659 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4660 AutoBuffer<size_t> binarySizes_(ndevices);
4661
4662 cl_device_id* devices = devices_.data();
4663 const uchar** binaryPtrs = binaryPtrs_.data();
4664 size_t* binarySizes = binarySizes_.data();
4665 for (size_t i = 0; i < ndevices; i++)
4666 {
4667 devices[i] = (cl_device_id)ctx.device(idx: i).ptr();
4668 binaryPtrs[i] = binaryAddr;
4669 binarySizes[i] = binarySize;
4670 }
4671
4672 cl_int result = 0;
4673 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4674 binarySizes, binaryPtrs, NULL, &result);
4675 if (result != CL_SUCCESS)
4676 {
4677 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4678 if (handle)
4679 {
4680 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4681 handle = NULL;
4682 }
4683 }
4684 if (!handle)
4685 {
4686 return false;
4687 }
4688 // call clBuildProgram()
4689 {
4690 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4691 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4692 if (result != CL_SUCCESS)
4693 {
4694 dumpBuildLog_(result, deviceList: devices, errmsg);
4695 if (handle)
4696 {
4697 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4698 handle = NULL;
4699 }
4700 return false;
4701 }
4702 }
4703 // check build status
4704 {
4705 cl_build_status build_status = CL_BUILD_NONE;
4706 size_t retsz = 0;
4707 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4708 sizeof(build_status), &build_status, &retsz));
4709 if (result == CL_SUCCESS)
4710 {
4711 if (build_status == CL_BUILD_SUCCESS)
4712 {
4713 return true;
4714 }
4715 else
4716 {
4717 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4718 return false;
4719 }
4720 }
4721 else
4722 {
4723 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4724 if (handle)
4725 {
4726 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4727 handle = NULL;
4728 }
4729 }
4730 }
4731#if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4732 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4733 {
4734 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4735 size_t retsz = 0;
4736 char kernels_buffer[4096] = {0};
4737 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4738 if (retsz < sizeof(kernels_buffer))
4739 kernels_buffer[retsz] = 0;
4740 else
4741 kernels_buffer[0] = 0;
4742 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4743 }
4744#endif
4745 return handle != NULL;
4746 }
4747
4748 ~Impl()
4749 {
4750 if( handle )
4751 {
4752#ifdef _WIN32
4753 if (!cv::__termination)
4754#endif
4755 {
4756 clReleaseProgram(handle);
4757 }
4758 handle = NULL;
4759 }
4760 }
4761
4762 cl_program handle;
4763
4764 String buildflags;
4765 String sourceModule_;
4766 String sourceName_;
4767};
4768
4769
4770Program::Program() CV_NOEXCEPT
4771{
4772 p = 0;
4773}
4774
4775Program::Program(const ProgramSource& src,
4776 const String& buildflags, String& errmsg)
4777{
4778 p = 0;
4779 create(src, buildflags, errmsg);
4780}
4781
4782Program::Program(const Program& prog)
4783{
4784 p = prog.p;
4785 if(p)
4786 p->addref();
4787}
4788
4789Program& Program::operator = (const Program& prog)
4790{
4791 Impl* newp = (Impl*)prog.p;
4792 if(newp)
4793 newp->addref();
4794 if(p)
4795 p->release();
4796 p = newp;
4797 return *this;
4798}
4799
4800Program::Program(Program&& prog) CV_NOEXCEPT
4801{
4802 p = prog.p;
4803 prog.p = nullptr;
4804}
4805
4806Program& Program::operator = (Program&& prog) CV_NOEXCEPT
4807{
4808 if (this != &prog) {
4809 if(p)
4810 p->release();
4811 p = prog.p;
4812 prog.p = nullptr;
4813 }
4814 return *this;
4815}
4816
4817Program::~Program()
4818{
4819 if(p)
4820 p->release();
4821}
4822
4823bool Program::create(const ProgramSource& src,
4824 const String& buildflags, String& errmsg)
4825{
4826 if(p)
4827 {
4828 p->release();
4829 p = NULL;
4830 }
4831 p = new Impl(src, buildflags, errmsg);
4832 if(!p->handle)
4833 {
4834 p->release();
4835 p = 0;
4836 }
4837 return p != 0;
4838}
4839
4840void* Program::ptr() const
4841{
4842 return p ? p->handle : 0;
4843}
4844
4845#ifndef OPENCV_REMOVE_DEPRECATED_API
4846const ProgramSource& Program::source() const
4847{
4848 CV_Error(Error::StsNotImplemented, "Removed API");
4849}
4850
4851bool Program::read(const String& bin, const String& buildflags)
4852{
4853 CV_UNUSED(bin); CV_UNUSED(buildflags);
4854 CV_Error(Error::StsNotImplemented, "Removed API");
4855}
4856
4857bool Program::write(String& bin) const
4858{
4859 CV_UNUSED(bin);
4860 CV_Error(Error::StsNotImplemented, "Removed API");
4861}
4862
4863String Program::getPrefix() const
4864{
4865 if(!p)
4866 return String();
4867 Context::Impl* ctx_ = Context::getDefault().getImpl();
4868 CV_Assert(ctx_);
4869 return cv::format(fmt: "opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4870}
4871
4872String Program::getPrefix(const String& buildflags)
4873{
4874 Context::Impl* ctx_ = Context::getDefault().getImpl();
4875 CV_Assert(ctx_);
4876 return cv::format(fmt: "opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4877}
4878#endif // OPENCV_REMOVE_DEPRECATED_API
4879
4880void Program::getBinary(std::vector<char>& binary) const
4881{
4882 CV_Assert(p && "Empty program");
4883 p->getProgramBinary(buf&: binary);
4884}
4885
4886Program Context::Impl::getProg(const ProgramSource& src,
4887 const String& buildflags, String& errmsg)
4888{
4889 size_t limit = getProgramCountLimit();
4890 const ProgramSource::Impl* src_ = src.getImpl();
4891 CV_Assert(src_);
4892 String key = cv::format(fmt: "module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4893 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4894 getPrefixString().c_str(),
4895 buildflags.c_str());
4896 {
4897 cv::AutoLock lock(program_cache_mutex);
4898 phash_t::iterator it = phash.find(x: key);
4899 if (it != phash.end())
4900 {
4901 // TODO LRU cache
4902 CacheList::iterator i = std::find(first: cacheList.begin(), last: cacheList.end(), val: key);
4903 if (i != cacheList.end() && i != cacheList.begin())
4904 {
4905 cacheList.erase(position: i);
4906 cacheList.push_front(x: key);
4907 }
4908 return it->second;
4909 }
4910 { // cleanup program cache
4911 size_t sz = phash.size();
4912 if (limit > 0 && sz >= limit)
4913 {
4914 static bool warningFlag = false;
4915 if (!warningFlag)
4916 {
4917 printf(format: "\nWARNING: OpenCV-OpenCL:\n"
4918 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4919 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4920 warningFlag = true;
4921 }
4922 while (!cacheList.empty())
4923 {
4924 size_t c = phash.erase(x: cacheList.back());
4925 cacheList.pop_back();
4926 if (c != 0)
4927 break;
4928 }
4929 }
4930 }
4931 }
4932 Program prog(src, buildflags, errmsg);
4933 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4934 {
4935 cv::AutoLock lock(program_cache_mutex);
4936 phash.insert(x: std::pair<std::string, Program>(key, prog));
4937 cacheList.push_front(x: key);
4938 }
4939 return prog;
4940}
4941
4942
4943//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4944
4945template<typename T>
4946class OpenCLBufferPool
4947{
4948protected:
4949 ~OpenCLBufferPool() { }
4950public:
4951 virtual T allocate(size_t size) = 0;
4952 virtual void release(T buffer) = 0;
4953};
4954
4955template <typename Derived, typename BufferEntry, typename T>
4956class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4957{
4958private:
4959 inline Derived& derived() { return *static_cast<Derived*>(this); }
4960protected:
4961 Mutex mutex_;
4962
4963 size_t currentReservedSize;
4964 size_t maxReservedSize;
4965
4966 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4967 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4968
4969 // synchronized
4970 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4971 {
4972 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4973 for (; i != allocatedEntries_.end(); ++i)
4974 {
4975 BufferEntry& e = *i;
4976 if (e.clBuffer_ == buffer)
4977 {
4978 entry = e;
4979 allocatedEntries_.erase(i);
4980 return true;
4981 }
4982 }
4983 return false;
4984 }
4985
4986 // synchronized
4987 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4988 {
4989 if (reservedEntries_.empty())
4990 return false;
4991 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4992 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4993 BufferEntry result;
4994 size_t minDiff = (size_t)(-1);
4995 for (; i != reservedEntries_.end(); ++i)
4996 {
4997 BufferEntry& e = *i;
4998 if (e.capacity_ >= size)
4999 {
5000 size_t diff = e.capacity_ - size;
5001 if (diff < std::max(a: (size_t)4096, b: size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
5002 {
5003 minDiff = diff;
5004 result_pos = i;
5005 result = e;
5006 if (diff == 0)
5007 break;
5008 }
5009 }
5010 }
5011 if (result_pos != reservedEntries_.end())
5012 {
5013 //CV_DbgAssert(result == *result_pos);
5014 reservedEntries_.erase(result_pos);
5015 entry = result;
5016 currentReservedSize -= entry.capacity_;
5017 allocatedEntries_.push_back(entry);
5018 return true;
5019 }
5020 return false;
5021 }
5022
5023 // synchronized
5024 void _checkSizeOfReservedEntries()
5025 {
5026 while (currentReservedSize > maxReservedSize)
5027 {
5028 CV_DbgAssert(!reservedEntries_.empty());
5029 const BufferEntry& entry = reservedEntries_.back();
5030 CV_DbgAssert(currentReservedSize >= entry.capacity_);
5031 currentReservedSize -= entry.capacity_;
5032 derived()._releaseBufferEntry(entry);
5033 reservedEntries_.pop_back();
5034 }
5035 }
5036
5037 inline size_t _allocationGranularity(size_t size)
5038 {
5039 // heuristic values
5040 if (size < 1024*1024)
5041 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
5042 else if (size < 16*1024*1024)
5043 return 64*1024;
5044 else
5045 return 1024*1024;
5046 }
5047
5048public:
5049 OpenCLBufferPoolBaseImpl()
5050 : currentReservedSize(0),
5051 maxReservedSize(0)
5052 {
5053 // nothing
5054 }
5055 virtual ~OpenCLBufferPoolBaseImpl()
5056 {
5057 freeAllReservedBuffers();
5058 CV_Assert(reservedEntries_.empty());
5059 }
5060public:
5061 virtual T allocate(size_t size) CV_OVERRIDE
5062 {
5063 AutoLock locker(mutex_);
5064 BufferEntry entry;
5065 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
5066 {
5067 CV_DbgAssert(size <= entry.capacity_);
5068 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
5069 }
5070 else
5071 {
5072 derived()._allocateBufferEntry(entry, size);
5073 }
5074 return entry.clBuffer_;
5075 }
5076 virtual void release(T buffer) CV_OVERRIDE
5077 {
5078 AutoLock locker(mutex_);
5079 BufferEntry entry;
5080 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
5081 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
5082 {
5083 derived()._releaseBufferEntry(entry);
5084 }
5085 else
5086 {
5087 reservedEntries_.push_front(entry);
5088 currentReservedSize += entry.capacity_;
5089 _checkSizeOfReservedEntries();
5090 }
5091 }
5092
5093 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
5094 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
5095 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
5096 {
5097 AutoLock locker(mutex_);
5098 size_t oldMaxReservedSize = maxReservedSize;
5099 maxReservedSize = size;
5100 if (maxReservedSize < oldMaxReservedSize)
5101 {
5102 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
5103 for (; i != reservedEntries_.end();)
5104 {
5105 const BufferEntry& entry = *i;
5106 if (entry.capacity_ > maxReservedSize / 8)
5107 {
5108 CV_DbgAssert(currentReservedSize >= entry.capacity_);
5109 currentReservedSize -= entry.capacity_;
5110 derived()._releaseBufferEntry(entry);
5111 i = reservedEntries_.erase(i);
5112 continue;
5113 }
5114 ++i;
5115 }
5116 _checkSizeOfReservedEntries();
5117 }
5118 }
5119 virtual void freeAllReservedBuffers() CV_OVERRIDE
5120 {
5121 AutoLock locker(mutex_);
5122 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
5123 for (; i != reservedEntries_.end(); ++i)
5124 {
5125 const BufferEntry& entry = *i;
5126 derived()._releaseBufferEntry(entry);
5127 }
5128 reservedEntries_.clear();
5129 currentReservedSize = 0;
5130 }
5131};
5132
5133struct CLBufferEntry
5134{
5135 cl_mem clBuffer_;
5136 size_t capacity_;
5137 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
5138};
5139
5140class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
5141{
5142public:
5143 typedef struct CLBufferEntry BufferEntry;
5144protected:
5145 int createFlags_;
5146public:
5147 OpenCLBufferPoolImpl(int createFlags = 0)
5148 : createFlags_(createFlags)
5149 {
5150 }
5151
5152 void _allocateBufferEntry(BufferEntry& entry, size_t size)
5153 {
5154 CV_DbgAssert(entry.clBuffer_ == NULL);
5155 entry.capacity_ = alignSize(sz: size, n: (int)_allocationGranularity(size));
5156 Context& ctx = Context::getDefault();
5157 cl_int retval = CL_SUCCESS;
5158 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
5159 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
5160 CV_Assert(entry.clBuffer_ != NULL);
5161 if(retval == CL_SUCCESS)
5162 {
5163 CV_IMPL_ADD(CV_IMPL_OCL);
5164 }
5165 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
5166 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5167 allocatedEntries_.push_back(x: entry);
5168 }
5169
5170 void _releaseBufferEntry(const BufferEntry& entry)
5171 {
5172 CV_Assert(entry.capacity_ != 0);
5173 CV_Assert(entry.clBuffer_ != NULL);
5174 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
5175 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5176 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
5177 }
5178};
5179
5180#ifdef HAVE_OPENCL_SVM
5181struct CLSVMBufferEntry
5182{
5183 void* clBuffer_;
5184 size_t capacity_;
5185 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
5186};
5187class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
5188{
5189public:
5190 typedef struct CLSVMBufferEntry BufferEntry;
5191public:
5192 OpenCLSVMBufferPoolImpl()
5193 {
5194 }
5195
5196 void _allocateBufferEntry(BufferEntry& entry, size_t size)
5197 {
5198 CV_DbgAssert(entry.clBuffer_ == NULL);
5199 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
5200
5201 Context& ctx = Context::getDefault();
5202 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5203 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5204 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
5205 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5206
5207 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5208 CV_DbgAssert(svmFns->isValid());
5209
5210 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
5211 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
5212 CV_Assert(buf);
5213
5214 entry.clBuffer_ = buf;
5215 {
5216 CV_IMPL_ADD(CV_IMPL_OCL);
5217 }
5218 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
5219 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5220 allocatedEntries_.push_back(entry);
5221 }
5222
5223 void _releaseBufferEntry(const BufferEntry& entry)
5224 {
5225 CV_Assert(entry.capacity_ != 0);
5226 CV_Assert(entry.clBuffer_ != NULL);
5227 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
5228 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5229 Context& ctx = Context::getDefault();
5230 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5231 CV_DbgAssert(svmFns->isValid());
5232 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
5233 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
5234 }
5235};
5236#endif
5237
5238
5239
5240template <bool readAccess, bool writeAccess>
5241class AlignedDataPtr
5242{
5243protected:
5244 const size_t size_;
5245 uchar* const originPtr_;
5246 const size_t alignment_;
5247 uchar* ptr_;
5248 uchar* allocatedPtr_;
5249
5250public:
5251 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
5252 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
5253 {
5254 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5255 CV_DbgAssert(!readAccess || ptr);
5256 if (((size_t)ptr_ & (alignment - 1)) != 0)
5257 {
5258 allocatedPtr_ = new uchar[size_ + alignment - 1];
5259 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5260 if (readAccess)
5261 {
5262 memcpy(dest: ptr_, src: originPtr_, n: size_);
5263 }
5264 }
5265 }
5266
5267 uchar* getAlignedPtr() const
5268 {
5269 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5270 return ptr_;
5271 }
5272
5273 ~AlignedDataPtr()
5274 {
5275 if (allocatedPtr_)
5276 {
5277 if (writeAccess)
5278 {
5279 memcpy(dest: originPtr_, src: ptr_, n: size_);
5280 }
5281 delete[] allocatedPtr_;
5282 allocatedPtr_ = NULL;
5283 }
5284 ptr_ = NULL;
5285 }
5286private:
5287 AlignedDataPtr(const AlignedDataPtr&); // disabled
5288 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
5289};
5290
5291template <bool readAccess, bool writeAccess>
5292class AlignedDataPtr2D
5293{
5294protected:
5295 const size_t size_;
5296 uchar* const originPtr_;
5297 const size_t alignment_;
5298 uchar* ptr_;
5299 uchar* allocatedPtr_;
5300 size_t rows_;
5301 size_t cols_;
5302 size_t step_;
5303
5304public:
5305 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
5306 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
5307 {
5308 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5309 CV_DbgAssert(!readAccess || ptr != NULL);
5310 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5311 {
5312 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5313 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5314 if (readAccess)
5315 {
5316 for (size_t i = 0; i < rows_; i++)
5317 memcpy(dest: ptr_ + i*step_, src: originPtr_ + i*step_, n: cols_);
5318 }
5319 }
5320 }
5321
5322 uchar* getAlignedPtr() const
5323 {
5324 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5325 return ptr_;
5326 }
5327
5328 ~AlignedDataPtr2D()
5329 {
5330 if (allocatedPtr_)
5331 {
5332 if (writeAccess)
5333 {
5334 for (size_t i = 0; i < rows_; i++)
5335 memcpy(dest: originPtr_ + i*step_, src: ptr_ + i*step_, n: cols_);
5336 }
5337 delete[] allocatedPtr_;
5338 allocatedPtr_ = NULL;
5339 }
5340 ptr_ = NULL;
5341 }
5342private:
5343 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5344 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5345};
5346
5347#ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5348#define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5349#endif
5350
5351
5352void Context::Impl::__init_buffer_pools()
5353{
5354 bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(args: 0);
5355 OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5356 bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5357 OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5358
5359 size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5360 size_t poolSize = utils::getConfigurationParameterSizeT(name: "OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultValue: defaultPoolSize);
5361 bufferPool.setMaxReservedSize(poolSize);
5362 size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT(name: "OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultValue: defaultPoolSize);
5363 bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5364
5365#ifdef HAVE_OPENCL_SVM
5366 bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5367 OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5368 size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5369 bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5370#endif
5371
5372 CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5373}
5374
5375class OpenCLAllocator CV_FINAL : public MatAllocator
5376{
5377public:
5378 enum AllocatorFlags
5379 {
5380 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5381 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5382#ifdef HAVE_OPENCL_SVM
5383 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5384#endif
5385 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
5386 };
5387
5388 OpenCLAllocator()
5389 {
5390 matStdAllocator = Mat::getDefaultAllocator();
5391 }
5392 ~OpenCLAllocator()
5393 {
5394 flushCleanupQueue();
5395 }
5396
5397 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5398 AccessFlag flags, UMatUsageFlags usageFlags) const
5399 {
5400 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5401 return u;
5402 }
5403
5404 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5405 {
5406 static bool value = cv::utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_BUFFER_FORCE_MAPPING", defaultValue: false);
5407 return value;
5408 }
5409 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5410 {
5411 static bool value = cv::utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_BUFFER_FORCE_COPYING", defaultValue: false);
5412 return value;
5413 }
5414
5415 void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5416 {
5417 const Device& dev = ctx.device(idx: 0);
5418 createFlags = 0;
5419 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5420 createFlags |= CL_MEM_ALLOC_HOST_PTR;
5421
5422 if (!isOpenCLCopyingForced() &&
5423 (isOpenCLMapForced() ||
5424 (dev.hostUnifiedMemory()
5425#ifndef __APPLE__
5426 || dev.isIntel()
5427#endif
5428 )
5429 )
5430 )
5431 flags0 = static_cast<UMatData::MemoryFlag>(0);
5432 else
5433 flags0 = UMatData::COPY_ON_MAP;
5434 }
5435
5436 UMatData* allocate(int dims, const int* sizes, int type,
5437 void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5438 {
5439 if(!useOpenCL())
5440 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5441
5442 flushCleanupQueue();
5443
5444 CV_Assert(data == 0);
5445 size_t total = CV_ELEM_SIZE(type);
5446 for( int i = dims-1; i >= 0; i-- )
5447 {
5448 if( step )
5449 step[i] = total;
5450 total *= sizes[i];
5451 }
5452
5453 Context& ctx = Context::getDefault();
5454 if (!ctx.getImpl())
5455 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5456 Context::Impl& ctxImpl = *ctx.getImpl();
5457
5458 int createFlags = 0;
5459 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5460 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5461
5462 void* handle = NULL;
5463 int allocatorFlags = 0;
5464
5465#ifdef HAVE_OPENCL_SVM
5466 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5467 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5468 {
5469 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5470 handle = ctxImpl.getBufferPoolSVM().allocate(total);
5471
5472 // this property is constant, so single buffer pool can be used here
5473 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5474 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5475 }
5476 else
5477#endif
5478 if (createFlags == 0)
5479 {
5480 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5481 handle = ctxImpl.getBufferPool().allocate(size: total);
5482 }
5483 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5484 {
5485 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5486 handle = ctxImpl.getBufferPoolHostPtr().allocate(size: total);
5487 }
5488 else
5489 {
5490 CV_Assert(handle != NULL); // Unsupported, throw
5491 }
5492
5493 if (!handle)
5494 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5495
5496 UMatData* u = new UMatData(this);
5497 u->data = 0;
5498 u->size = total;
5499 u->handle = handle;
5500 u->flags = flags0;
5501 u->allocatorFlags_ = allocatorFlags;
5502 u->allocatorContext = std::static_pointer_cast<void>(r: std::make_shared<ocl::Context>(args&: ctx));
5503 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5504 u->markHostCopyObsolete(flag: true);
5505 opencl_allocator_stats.onAllocate(sz: u->size);
5506 return u;
5507 }
5508
5509 bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5510 {
5511 if(!u)
5512 return false;
5513
5514 flushCleanupQueue();
5515
5516 UMatDataAutoLock lock(u);
5517
5518 if(u->handle == 0)
5519 {
5520 CV_Assert(u->origdata != 0);
5521 Context& ctx = Context::getDefault();
5522 int createFlags = 0;
5523 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5524 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5525
5526 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5527
5528 cl_context ctx_handle = (cl_context)ctx.ptr();
5529 int allocatorFlags = 0;
5530 UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5531 void* handle = NULL;
5532 cl_int retval = CL_SUCCESS;
5533
5534#ifdef HAVE_OPENCL_SVM
5535 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5536 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5537 if (useSVM && svmCaps.isSupportFineGrainSystem())
5538 {
5539 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5540 tempUMatFlags = UMatData::TEMP_UMAT;
5541 handle = u->origdata;
5542 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5543 }
5544 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5545 {
5546 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5547 {
5548 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5549
5550 cl_svm_mem_flags memFlags = createFlags |
5551 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5552
5553 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5554 CV_DbgAssert(svmFns->isValid());
5555
5556 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5557 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5558 CV_Assert(handle);
5559
5560 cl_command_queue q = NULL;
5561 if (!isFineGrainBuffer)
5562 {
5563 q = (cl_command_queue)Queue::getDefault().ptr();
5564 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5565 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5566 handle, u->size,
5567 0, NULL, NULL);
5568 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5569
5570 }
5571 memcpy(handle, u->origdata, u->size);
5572 if (!isFineGrainBuffer)
5573 {
5574 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5575 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5576 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5577 }
5578
5579 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5580 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5581 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5582 }
5583 }
5584 else
5585#endif
5586 {
5587 if( copyOnMap )
5588 accessFlags &= ~ACCESS_FAST;
5589
5590 tempUMatFlags = UMatData::TEMP_UMAT;
5591 if (
5592 #ifdef __APPLE__
5593 !copyOnMap &&
5594 #endif
5595 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5596 // There are OpenCL runtime issues for less aligned data
5597 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5598 && u->origdata == cv::alignPtr(ptr: u->origdata, n: (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5599 // Avoid sharing of host memory between OpenCL buffers
5600 && !(u->originalUMatData && u->originalUMatData->handle)
5601 )
5602 {
5603 // Change the host-side origdata[size] to "pinned memory" that enables fast
5604 // DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject
5605 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR),
5606 u->size, u->origdata, &retval);
5607 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p",
5608 (long long int)u->size, u->origdata, (void*)handle).c_str());
5609 }
5610 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5611 {
5612 // Allocate device-side memory and immediately copy data from the host-side pointer origdata[size].
5613 // If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then
5614 // additionally allocate a host-side "pinned" duplicate of the origdata that is
5615 // managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios.
5616 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5617 u->size, u->origdata, &retval);
5618 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5619 (long long int)u->size, u->origdata, (void*)handle).c_str());
5620 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5621 }
5622 }
5623 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5624 if(!handle || retval != CL_SUCCESS)
5625 return false;
5626 u->handle = handle;
5627 u->prevAllocator = u->currAllocator;
5628 u->currAllocator = this;
5629 u->flags |= tempUMatFlags | flags0;
5630 u->allocatorFlags_ = allocatorFlags;
5631 }
5632 if (!!(accessFlags & ACCESS_WRITE))
5633 u->markHostCopyObsolete(flag: true);
5634 opencl_allocator_stats.onAllocate(sz: u->size);
5635 return true;
5636 }
5637
5638 /*void sync(UMatData* u) const
5639 {
5640 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5641 UMatDataAutoLock lock(u);
5642
5643 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5644 {
5645 if( u->tempCopiedUMat() )
5646 {
5647 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5648 u->size, u->origdata, 0, 0, 0);
5649 }
5650 else
5651 {
5652 cl_int retval = 0;
5653 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5654 (CL_MAP_READ | CL_MAP_WRITE),
5655 0, u->size, 0, 0, 0, &retval);
5656 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5657 clFinish(q);
5658 }
5659 u->markHostCopyObsolete(false);
5660 }
5661 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5662 {
5663 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5664 u->size, u->data, 0, 0, 0);
5665 }
5666 }*/
5667
5668 void deallocate(UMatData* u) const CV_OVERRIDE
5669 {
5670 if(!u)
5671 return;
5672
5673 CV_Assert(u->urefcount == 0);
5674 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5675
5676 CV_Assert(u->handle != 0);
5677 CV_Assert(u->mapcount == 0);
5678
5679 if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5680 addToCleanupQueue(u);
5681 else
5682 deallocate_(u);
5683 }
5684
5685 void deallocate_(UMatData* u) const
5686 {
5687 CV_Assert(u);
5688 CV_Assert(u->handle);
5689 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5690 {
5691 opencl_allocator_stats.onFree(sz: u->size);
5692 }
5693
5694#ifdef _WIN32
5695 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
5696 return; // avoid any OpenCL calls
5697#endif
5698 if(u->tempUMat())
5699 {
5700 CV_Assert(u->origdata);
5701// UMatDataAutoLock lock(u);
5702
5703 if (u->hostCopyObsolete())
5704 {
5705#ifdef HAVE_OPENCL_SVM
5706 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5707 {
5708 Context& ctx = Context::getDefault();
5709 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5710 CV_DbgAssert(svmFns->isValid());
5711
5712 if( u->tempCopiedUMat() )
5713 {
5714 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5715 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5716 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5717 cl_command_queue q = NULL;
5718 if (!isFineGrainBuffer)
5719 {
5720 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5721 q = (cl_command_queue)Queue::getDefault().ptr();
5722 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5723 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5724 u->handle, u->size,
5725 0, NULL, NULL);
5726 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5727 }
5728 clFinish(q);
5729 memcpy(u->origdata, u->handle, u->size);
5730 if (!isFineGrainBuffer)
5731 {
5732 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5733 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5734 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5735 }
5736 }
5737 else
5738 {
5739 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5740 // nothing
5741 }
5742 }
5743 else
5744#endif
5745 {
5746 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5747 if( u->tempCopiedUMat() )
5748 {
5749 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5750 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5751 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5752 }
5753 else
5754 {
5755 cl_int retval = 0;
5756 if (u->tempUMat())
5757 {
5758 CV_Assert(u->mapcount == 0);
5759 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5760 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5761 (CL_MAP_READ | CL_MAP_WRITE),
5762 0, u->size, 0, 0, 0, &retval);
5763 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5764 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5765 if (u->originalUMatData)
5766 {
5767 CV_Assert(u->originalUMatData->data == data);
5768 }
5769 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5770 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5771 CV_OCL_DBG_CHECK(clFinish(q));
5772 }
5773 }
5774 }
5775 u->markHostCopyObsolete(flag: false);
5776 }
5777 else
5778 {
5779 // nothing
5780 }
5781#ifdef HAVE_OPENCL_SVM
5782 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5783 {
5784 if( u->tempCopiedUMat() )
5785 {
5786 Context& ctx = Context::getDefault();
5787 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5788 CV_DbgAssert(svmFns->isValid());
5789
5790 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5791 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5792 }
5793 }
5794 else
5795#endif
5796 {
5797 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5798 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5799 }
5800 u->handle = 0;
5801 u->markDeviceCopyObsolete(flag: true);
5802 u->currAllocator = u->prevAllocator;
5803 u->prevAllocator = NULL;
5804 if(u->data && u->copyOnMap() && u->data != u->origdata)
5805 fastFree(ptr: u->data);
5806 u->data = u->origdata;
5807 u->currAllocator->deallocate(data: u);
5808 u = NULL;
5809 }
5810 else
5811 {
5812 CV_Assert(u->origdata == NULL);
5813 if(u->data && u->copyOnMap() && u->data != u->origdata)
5814 {
5815 fastFree(ptr: u->data);
5816 u->data = 0;
5817 u->markHostCopyObsolete(flag: true);
5818 }
5819 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5820 {
5821 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(r: u->allocatorContext);
5822 CV_Assert(pCtx);
5823 ocl::Context& ctx = *pCtx.get();
5824 CV_Assert(ctx.getImpl());
5825 ctx.getImpl()->getBufferPool().release(buffer: (cl_mem)u->handle);
5826 }
5827 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5828 {
5829 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(r: u->allocatorContext);
5830 CV_Assert(pCtx);
5831 ocl::Context& ctx = *pCtx.get();
5832 CV_Assert(ctx.getImpl());
5833 ctx.getImpl()->getBufferPoolHostPtr().release(buffer: (cl_mem)u->handle);
5834 }
5835#ifdef HAVE_OPENCL_SVM
5836 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5837 {
5838 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5839 CV_Assert(pCtx);
5840 ocl::Context& ctx = *pCtx.get();
5841 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5842 {
5843 //nothing
5844 }
5845 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5846 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5847 {
5848 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5849 CV_DbgAssert(svmFns->isValid());
5850 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5851
5852 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5853 {
5854 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5855 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5856 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5857 }
5858 }
5859 CV_Assert(ctx.getImpl());
5860 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5861 }
5862#endif
5863 else
5864 {
5865 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5866 }
5867 u->handle = 0;
5868 u->markDeviceCopyObsolete(flag: true);
5869 delete u;
5870 u = NULL;
5871 }
5872 CV_Assert(u == NULL);
5873 }
5874
5875 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5876 void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5877 {
5878 CV_Assert(u && u->handle);
5879
5880 if (!!(accessFlags & ACCESS_WRITE))
5881 u->markDeviceCopyObsolete(flag: true);
5882
5883 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5884
5885 {
5886 if( !u->copyOnMap() )
5887 {
5888 // TODO
5889 // because there can be other map requests for the same UMat with different access flags,
5890 // we use the universal (read-write) access mode.
5891#ifdef HAVE_OPENCL_SVM
5892 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5893 {
5894 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5895 {
5896 Context& ctx = Context::getDefault();
5897 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5898 CV_DbgAssert(svmFns->isValid());
5899
5900 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5901 {
5902 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5903 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5904 u->handle, u->size,
5905 0, NULL, NULL);
5906 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5907 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5908 }
5909 }
5910 clFinish(q);
5911 u->data = (uchar*)u->handle;
5912 u->markHostCopyObsolete(false);
5913 u->markDeviceMemMapped(true);
5914 return;
5915 }
5916#endif
5917
5918 cl_int retval = CL_SUCCESS;
5919 if (!u->deviceMemMapped())
5920 {
5921 CV_Assert(u->refcount == 1);
5922 CV_Assert(u->mapcount++ == 0);
5923 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5924 (CL_MAP_READ | CL_MAP_WRITE),
5925 0, u->size, 0, 0, 0, &retval);
5926 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5927 }
5928 if (u->data && retval == CL_SUCCESS)
5929 {
5930 u->markHostCopyObsolete(flag: false);
5931 u->markDeviceMemMapped(flag: true);
5932 return;
5933 }
5934
5935 // TODO Is it really a good idea and was it tested well?
5936 // if map failed, switch to copy-on-map mode for the particular buffer
5937 u->flags |= UMatData::COPY_ON_MAP;
5938 }
5939
5940 if(!u->data)
5941 {
5942 u->data = (uchar*)fastMalloc(bufSize: u->size);
5943 u->markHostCopyObsolete(flag: true);
5944 }
5945 }
5946
5947 if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5948 {
5949 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5950#ifdef HAVE_OPENCL_SVM
5951 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5952#endif
5953 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5954 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5955 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5956 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5957 u->markHostCopyObsolete(flag: false);
5958 }
5959 }
5960
5961 void unmap(UMatData* u) const CV_OVERRIDE
5962 {
5963 if(!u)
5964 return;
5965
5966
5967 CV_Assert(u->handle != 0);
5968
5969 UMatDataAutoLock autolock(u);
5970
5971 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5972 cl_int retval = 0;
5973 if( !u->copyOnMap() && u->deviceMemMapped() )
5974 {
5975 CV_Assert(u->data != NULL);
5976#ifdef HAVE_OPENCL_SVM
5977 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5978 {
5979 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5980 {
5981 Context& ctx = Context::getDefault();
5982 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5983 CV_DbgAssert(svmFns->isValid());
5984
5985 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5986 {
5987 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5988 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5989 0, NULL, NULL);
5990 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5991 clFinish(q);
5992 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5993 }
5994 }
5995 if (u->refcount == 0)
5996 u->data = 0;
5997 u->markDeviceCopyObsolete(false);
5998 u->markHostCopyObsolete(true);
5999 return;
6000 }
6001#endif
6002 if (u->refcount == 0)
6003 {
6004 CV_Assert(u->mapcount-- == 1);
6005 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
6006 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
6007 if (Device::getDefault().isAMD())
6008 {
6009 // required for multithreaded applications (see stitching test)
6010 CV_OCL_DBG_CHECK(clFinish(q));
6011 }
6012 u->markDeviceMemMapped(flag: false);
6013 u->data = 0;
6014 u->markDeviceCopyObsolete(flag: false);
6015 u->markHostCopyObsolete(flag: true);
6016 }
6017 }
6018 else if( u->copyOnMap() && u->deviceCopyObsolete() )
6019 {
6020 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
6021#ifdef HAVE_OPENCL_SVM
6022 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
6023#endif
6024 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6025 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
6026 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
6027 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6028 u->markDeviceCopyObsolete(flag: false);
6029 u->markHostCopyObsolete(flag: true);
6030 }
6031 }
6032
6033 bool checkContinuous(int dims, const size_t sz[],
6034 const size_t srcofs[], const size_t srcstep[],
6035 const size_t dstofs[], const size_t dststep[],
6036 size_t& total, size_t new_sz[],
6037 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
6038 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
6039 {
6040 bool iscontinuous = true;
6041 srcrawofs = srcofs ? srcofs[dims-1] : 0;
6042 dstrawofs = dstofs ? dstofs[dims-1] : 0;
6043 total = sz[dims-1];
6044 for( int i = dims-2; i >= 0; i-- )
6045 {
6046 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
6047 iscontinuous = false;
6048 total *= sz[i];
6049 if( srcofs )
6050 srcrawofs += srcofs[i]*srcstep[i];
6051 if( dstofs )
6052 dstrawofs += dstofs[i]*dststep[i];
6053 }
6054
6055 if( !iscontinuous )
6056 {
6057 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
6058 if( dims == 2 )
6059 {
6060 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
6061 // we assume that new_... arrays are initialized by caller
6062 // with 0's, so there is no else branch
6063 if( srcofs )
6064 {
6065 new_srcofs[0] = srcofs[1];
6066 new_srcofs[1] = srcofs[0];
6067 new_srcofs[2] = 0;
6068 }
6069
6070 if( dstofs )
6071 {
6072 new_dstofs[0] = dstofs[1];
6073 new_dstofs[1] = dstofs[0];
6074 new_dstofs[2] = 0;
6075 }
6076
6077 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
6078 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
6079 }
6080 else
6081 {
6082 // we could check for dims == 3 here,
6083 // but from user perspective this one is more informative
6084 CV_Assert(dims <= 3);
6085 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
6086 if( srcofs )
6087 {
6088 new_srcofs[0] = srcofs[2];
6089 new_srcofs[1] = srcofs[1];
6090 new_srcofs[2] = srcofs[0];
6091 }
6092
6093 if( dstofs )
6094 {
6095 new_dstofs[0] = dstofs[2];
6096 new_dstofs[1] = dstofs[1];
6097 new_dstofs[2] = dstofs[0];
6098 }
6099
6100 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
6101 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
6102 }
6103 }
6104 return iscontinuous;
6105 }
6106
6107 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
6108 const size_t srcofs[], const size_t srcstep[],
6109 const size_t dststep[]) const CV_OVERRIDE
6110 {
6111 if(!u)
6112 return;
6113 UMatDataAutoLock autolock(u);
6114
6115 if( u->data && !u->hostCopyObsolete() )
6116 {
6117 Mat::getDefaultAllocator()->download(data: u, dst: dstptr, dims, sz, srcofs, srcstep, dststep);
6118 return;
6119 }
6120 CV_Assert( u->handle != 0 );
6121
6122 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6123
6124 size_t total = 0, new_sz[] = {0, 0, 0};
6125 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6126 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6127
6128 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs: 0, dststep,
6129 total, new_sz,
6130 srcrawofs, new_srcofs, new_srcstep,
6131 dstrawofs, new_dstofs, new_dststep);
6132
6133#ifdef HAVE_OPENCL_SVM
6134 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6135 {
6136 CV_DbgAssert(u->data == NULL || u->data == u->handle);
6137 Context& ctx = Context::getDefault();
6138 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6139 CV_DbgAssert(svmFns->isValid());
6140
6141 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6142 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6143 {
6144 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6145 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
6146 u->handle, u->size,
6147 0, NULL, NULL);
6148 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6149 }
6150 clFinish(q);
6151 if( iscontinuous )
6152 {
6153 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
6154 }
6155 else
6156 {
6157 // This code is from MatAllocator::download()
6158 int isz[CV_MAX_DIM];
6159 uchar* srcptr = (uchar*)u->handle;
6160 for( int i = 0; i < dims; i++ )
6161 {
6162 CV_Assert( sz[i] <= (size_t)INT_MAX );
6163 if( sz[i] == 0 )
6164 return;
6165 if( srcofs )
6166 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6167 isz[i] = (int)sz[i];
6168 }
6169
6170 Mat src(dims, isz, CV_8U, srcptr, srcstep);
6171 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6172
6173 const Mat* arrays[] = { &src, &dst };
6174 uchar* ptrs[2];
6175 NAryMatIterator it(arrays, ptrs, 2);
6176 size_t j, planesz = it.size;
6177
6178 for( j = 0; j < it.nplanes; j++, ++it )
6179 memcpy(ptrs[1], ptrs[0], planesz);
6180 }
6181 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6182 {
6183 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6184 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6185 0, NULL, NULL);
6186 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6187 clFinish(q);
6188 }
6189 }
6190 else
6191#endif
6192 {
6193 if( iscontinuous )
6194 {
6195 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6196 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6197 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
6198 }
6199 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6200 {
6201 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6202 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6203 size_t membuf_ofs = srcrawofs - new_srcrawofs;
6204 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6205 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6206 uchar* ptr = alignedPtr.getAlignedPtr();
6207
6208 CV_Assert(new_srcstep[0] >= new_sz[0]);
6209 total = alignSize(sz: new_srcstep[0]*new_sz[1] + membuf_ofs, n: padding);
6210 total = std::min(a: total, b: u->size - new_srcrawofs);
6211 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6212 new_srcrawofs, total, ptr, 0, 0, 0));
6213 for( size_t i = 0; i < new_sz[1]; i++ )
6214 memcpy( dest: (uchar*)dstptr + i*new_dststep[0], src: ptr + i*new_srcstep[0] + membuf_ofs, n: new_sz[0]);
6215 }
6216 else
6217 {
6218 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6219 uchar* ptr = alignedPtr.getAlignedPtr();
6220
6221 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6222 new_srcofs, new_dstofs, new_sz,
6223 new_srcstep[0], 0,
6224 new_dststep[0], 0,
6225 ptr, 0, 0, 0));
6226 }
6227 }
6228 }
6229
6230 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
6231 const size_t dstofs[], const size_t dststep[],
6232 const size_t srcstep[]) const CV_OVERRIDE
6233 {
6234 if(!u)
6235 return;
6236
6237 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6238 CV_Assert(u->refcount == 0 || u->tempUMat());
6239
6240 size_t total = 0, new_sz[] = {0, 0, 0};
6241 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6242 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6243
6244 bool iscontinuous = checkContinuous(dims, sz, srcofs: 0, srcstep, dstofs, dststep,
6245 total, new_sz,
6246 srcrawofs, new_srcofs, new_srcstep,
6247 dstrawofs, new_dstofs, new_dststep);
6248
6249 UMatDataAutoLock autolock(u);
6250
6251 // if there is cached CPU copy of the GPU matrix,
6252 // we could use it as a destination.
6253 // we can do it in 2 cases:
6254 // 1. we overwrite the whole content
6255 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
6256 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
6257 {
6258 Mat::getDefaultAllocator()->upload(data: u, src: srcptr, dims, sz, dstofs, dststep, srcstep);
6259 u->markHostCopyObsolete(flag: false);
6260 u->markDeviceCopyObsolete(flag: true);
6261 return;
6262 }
6263
6264 CV_Assert( u->handle != 0 );
6265 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6266
6267#ifdef HAVE_OPENCL_SVM
6268 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6269 {
6270 CV_DbgAssert(u->data == NULL || u->data == u->handle);
6271 Context& ctx = Context::getDefault();
6272 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6273 CV_DbgAssert(svmFns->isValid());
6274
6275 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6276 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6277 {
6278 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6279 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
6280 u->handle, u->size,
6281 0, NULL, NULL);
6282 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6283 }
6284 clFinish(q);
6285 if( iscontinuous )
6286 {
6287 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
6288 }
6289 else
6290 {
6291 // This code is from MatAllocator::upload()
6292 int isz[CV_MAX_DIM];
6293 uchar* dstptr = (uchar*)u->handle;
6294 for( int i = 0; i < dims; i++ )
6295 {
6296 CV_Assert( sz[i] <= (size_t)INT_MAX );
6297 if( sz[i] == 0 )
6298 return;
6299 if( dstofs )
6300 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6301 isz[i] = (int)sz[i];
6302 }
6303
6304 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
6305 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6306
6307 const Mat* arrays[] = { &src, &dst };
6308 uchar* ptrs[2];
6309 NAryMatIterator it(arrays, ptrs, 2);
6310 size_t j, planesz = it.size;
6311
6312 for( j = 0; j < it.nplanes; j++, ++it )
6313 memcpy(ptrs[1], ptrs[0], planesz);
6314 }
6315 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6316 {
6317 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6318 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6319 0, NULL, NULL);
6320 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6321 clFinish(q);
6322 }
6323 }
6324 else
6325#endif
6326 {
6327 if( iscontinuous )
6328 {
6329 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6330 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6331 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6332 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6333 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6334 }
6335 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6336 {
6337 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6338 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6339 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6340 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6341 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6342 uchar* ptr = alignedPtr.getAlignedPtr();
6343
6344 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6345 total = alignSize(sz: new_dststep[0]*new_sz[1] + membuf_ofs, n: padding);
6346 total = std::min(a: total, b: u->size - new_dstrawofs);
6347 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6348 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6349 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6350 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6351 new_dstrawofs, total, ptr, 0, 0, 0));
6352 for( size_t i = 0; i < new_sz[1]; i++ )
6353 memcpy( dest: ptr + i*new_dststep[0] + membuf_ofs, src: (uchar*)srcptr + i*new_srcstep[0], n: new_sz[0]);
6354 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6355 new_dstrawofs, total, ptr, 0, 0, 0));
6356 }
6357 else
6358 {
6359 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6360 uchar* ptr = alignedPtr.getAlignedPtr();
6361
6362 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6363 new_dstofs, new_srcofs, new_sz,
6364 new_dststep[0], 0,
6365 new_srcstep[0], 0,
6366 ptr, 0, 0, 0));
6367 }
6368 }
6369 u->markHostCopyObsolete(flag: true);
6370#ifdef HAVE_OPENCL_SVM
6371 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6372 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6373 {
6374 // nothing
6375 }
6376 else
6377#endif
6378 {
6379 u->markHostCopyObsolete(flag: true);
6380 }
6381 u->markDeviceCopyObsolete(flag: false);
6382 }
6383
6384 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6385 const size_t srcofs[], const size_t srcstep[],
6386 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6387 {
6388 if(!src || !dst)
6389 return;
6390
6391 size_t total = 0, new_sz[] = {0, 0, 0};
6392 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6393 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6394
6395 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6396 total, new_sz,
6397 srcrawofs, new_srcofs, new_srcstep,
6398 dstrawofs, new_dstofs, new_dststep);
6399
6400 UMatDataAutoLock src_autolock(src, dst);
6401
6402 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6403 {
6404 upload(u: dst, srcptr: src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6405 return;
6406 }
6407 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6408 {
6409 download(u: src, dstptr: dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6410 dst->markHostCopyObsolete(flag: false);
6411#ifdef HAVE_OPENCL_SVM
6412 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6413 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6414 {
6415 // nothing
6416 }
6417 else
6418#endif
6419 {
6420 dst->markDeviceCopyObsolete(flag: true);
6421 }
6422 return;
6423 }
6424
6425 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6426 CV_Assert(dst->refcount == 0);
6427 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6428
6429 cl_int retval = CL_SUCCESS;
6430#ifdef HAVE_OPENCL_SVM
6431 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6432 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6433 {
6434 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6435 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6436 {
6437 Context& ctx = Context::getDefault();
6438 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6439 CV_DbgAssert(svmFns->isValid());
6440
6441 if( iscontinuous )
6442 {
6443 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6444 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6445 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6446 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6447 total, 0, NULL, NULL);
6448 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6449 }
6450 else
6451 {
6452 clFinish(q);
6453 // This code is from MatAllocator::download()/upload()
6454 int isz[CV_MAX_DIM];
6455 uchar* srcptr = (uchar*)src->handle;
6456 for( int i = 0; i < dims; i++ )
6457 {
6458 CV_Assert( sz[i] <= (size_t)INT_MAX );
6459 if( sz[i] == 0 )
6460 return;
6461 if( srcofs )
6462 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6463 isz[i] = (int)sz[i];
6464 }
6465 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6466
6467 uchar* dstptr = (uchar*)dst->handle;
6468 for( int i = 0; i < dims; i++ )
6469 {
6470 if( dstofs )
6471 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6472 }
6473 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6474
6475 const Mat* arrays[] = { &m_src, &m_dst };
6476 uchar* ptrs[2];
6477 NAryMatIterator it(arrays, ptrs, 2);
6478 size_t j, planesz = it.size;
6479
6480 for( j = 0; j < it.nplanes; j++, ++it )
6481 memcpy(ptrs[1], ptrs[0], planesz);
6482 }
6483 }
6484 else
6485 {
6486 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6487 {
6488 map(src, ACCESS_READ);
6489 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6490 unmap(src);
6491 }
6492 else
6493 {
6494 map(dst, ACCESS_WRITE);
6495 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6496 unmap(dst);
6497 }
6498 }
6499 }
6500 else
6501#endif
6502 {
6503 if( iscontinuous )
6504 {
6505 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6506 srcrawofs, dstrawofs, total, 0, 0, 0);
6507 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6508 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6509 }
6510 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6511 {
6512 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6513 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6514 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6515 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6516 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6517
6518 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6519 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6520 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6521 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6522 uchar* srcptr = srcBuf.getAlignedPtr();
6523 uchar* dstptr = dstBuf.getAlignedPtr();
6524
6525 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6526
6527 size_t src_total = alignSize(sz: new_srcstep[0]*new_sz[1] + srcmembuf_ofs, n: padding);
6528 src_total = std::min(a: src_total, b: src->size - new_srcrawofs);
6529 size_t dst_total = alignSize(sz: new_dststep[0]*new_sz[1] + dstmembuf_ofs, n: padding);
6530 dst_total = std::min(a: dst_total, b: dst->size - new_dstrawofs);
6531
6532 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6533 new_srcrawofs, src_total, srcptr, 0, 0, 0));
6534 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6535 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6536
6537 for( size_t i = 0; i < new_sz[1]; i++ )
6538 memcpy( dest: dstptr + dstmembuf_ofs + i*new_dststep[0],
6539 src: srcptr + srcmembuf_ofs + i*new_srcstep[0], n: new_sz[0]);
6540 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6541 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6542 }
6543 else
6544 {
6545 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6546 new_srcofs, new_dstofs, new_sz,
6547 new_srcstep[0], 0,
6548 new_dststep[0], 0,
6549 0, 0, 0));
6550 }
6551 }
6552 if (retval == CL_SUCCESS)
6553 {
6554 CV_IMPL_ADD(CV_IMPL_OCL)
6555 }
6556
6557#ifdef HAVE_OPENCL_SVM
6558 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6559 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6560 {
6561 // nothing
6562 }
6563 else
6564#endif
6565 {
6566 dst->markHostCopyObsolete(flag: true);
6567 }
6568 dst->markDeviceCopyObsolete(flag: false);
6569
6570 if( _sync )
6571 {
6572 CV_OCL_DBG_CHECK(clFinish(q));
6573 }
6574 }
6575
6576 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6577 {
6578 ocl::Context ctx = Context::getDefault();
6579 if (ctx.empty())
6580 return NULL;
6581#ifdef HAVE_OPENCL_SVM
6582 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6583 {
6584 return &ctx.getImpl()->getBufferPoolSVM();
6585 }
6586#endif
6587 if (id != NULL && strcmp(s1: id, s2: "HOST_ALLOC") == 0)
6588 {
6589 return &ctx.getImpl()->getBufferPoolHostPtr();
6590 }
6591 if (id != NULL && strcmp(s1: id, s2: "OCL") != 0)
6592 {
6593 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6594 }
6595 return &ctx.getImpl()->getBufferPool();
6596 }
6597
6598 MatAllocator* matStdAllocator;
6599
6600 mutable cv::Mutex cleanupQueueMutex;
6601 mutable std::deque<UMatData*> cleanupQueue;
6602
6603 void flushCleanupQueue() const
6604 {
6605 if (!cleanupQueue.empty())
6606 {
6607 std::deque<UMatData*> q;
6608 {
6609 cv::AutoLock lock(cleanupQueueMutex);
6610 q.swap(x&: cleanupQueue);
6611 }
6612 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6613 {
6614 deallocate_(u: *i);
6615 }
6616 }
6617 }
6618 void addToCleanupQueue(UMatData* u) const
6619 {
6620 //TODO: Validation check: CV_Assert(!u->tempUMat());
6621 {
6622 cv::AutoLock lock(cleanupQueueMutex);
6623 cleanupQueue.push_back(x: u);
6624 }
6625 }
6626};
6627
6628static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6629{
6630 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6631 return g_allocator;
6632}
6633MatAllocator* getOpenCLAllocator()
6634{
6635 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6636}
6637
6638}} // namespace cv::ocl
6639
6640
6641namespace cv {
6642
6643// three funcs below are implemented in umatrix.cpp
6644void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6645 bool autoSteps = false );
6646void finalizeHdr(UMat& m);
6647
6648} // namespace cv
6649
6650
6651namespace cv { namespace ocl {
6652
6653/*
6654// Convert OpenCL buffer memory to UMat
6655*/
6656void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6657{
6658 int d = 2;
6659 int sizes[] = { rows, cols };
6660
6661 CV_Assert(0 <= d && d <= CV_MAX_DIM);
6662
6663 dst.release();
6664
6665 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6666 dst.usageFlags = USAGE_DEFAULT;
6667
6668 setSize(m&: dst, dims: d, sz: sizes, steps: 0, autoSteps: true);
6669 dst.offset = 0;
6670
6671 cl_mem memobj = (cl_mem)cl_mem_buffer;
6672 cl_mem_object_type mem_type = 0;
6673
6674 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6675
6676 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6677
6678 size_t total = 0;
6679 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6680
6681 CV_OCL_CHECK(clRetainMemObject(memobj));
6682
6683 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6684 CV_Assert(total >= rows * step);
6685
6686 // attach clBuffer to UMatData
6687 dst.u = new UMatData(getOpenCLAllocator());
6688 dst.u->data = 0;
6689 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
6690 dst.u->flags = static_cast<UMatData::MemoryFlag>(0);
6691 dst.u->handle = cl_mem_buffer;
6692 dst.u->origdata = 0;
6693 dst.u->prevAllocator = 0;
6694 dst.u->size = total;
6695
6696 finalizeHdr(m&: dst);
6697 dst.addref();
6698
6699 return;
6700} // convertFromBuffer()
6701
6702
6703/*
6704// Convert OpenCL image2d_t memory to UMat
6705*/
6706void convertFromImage(void* cl_mem_image, UMat& dst)
6707{
6708 cl_mem clImage = (cl_mem)cl_mem_image;
6709 cl_mem_object_type mem_type = 0;
6710
6711 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6712
6713 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6714
6715 cl_image_format fmt = { .image_channel_order: 0, .image_channel_data_type: 0 };
6716 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6717
6718 int depth = CV_8U;
6719 switch (fmt.image_channel_data_type)
6720 {
6721 case CL_UNORM_INT8:
6722 case CL_UNSIGNED_INT8:
6723 depth = CV_8U;
6724 break;
6725
6726 case CL_SNORM_INT8:
6727 case CL_SIGNED_INT8:
6728 depth = CV_8S;
6729 break;
6730
6731 case CL_UNORM_INT16:
6732 case CL_UNSIGNED_INT16:
6733 depth = CV_16U;
6734 break;
6735
6736 case CL_SNORM_INT16:
6737 case CL_SIGNED_INT16:
6738 depth = CV_16S;
6739 break;
6740
6741 case CL_SIGNED_INT32:
6742 depth = CV_32S;
6743 break;
6744
6745 case CL_FLOAT:
6746 depth = CV_32F;
6747 break;
6748
6749 case CL_HALF_FLOAT:
6750 depth = CV_16F;
6751 break;
6752
6753 default:
6754 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6755 }
6756
6757 int type = CV_8UC1;
6758 switch (fmt.image_channel_order)
6759 {
6760 case CL_R:
6761 case CL_A:
6762 case CL_INTENSITY:
6763 case CL_LUMINANCE:
6764 type = CV_MAKE_TYPE(depth, 1);
6765 break;
6766
6767 case CL_RG:
6768 case CL_RA:
6769 type = CV_MAKE_TYPE(depth, 2);
6770 break;
6771
6772 // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
6773 // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
6774 /*case CL_RGB:
6775 type = CV_MAKE_TYPE(depth, 3);
6776 break;*/
6777
6778 case CL_RGBA:
6779 case CL_BGRA:
6780 case CL_ARGB:
6781 type = CV_MAKE_TYPE(depth, 4);
6782 break;
6783
6784 default:
6785 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6786 break;
6787 }
6788
6789 size_t step = 0;
6790 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6791
6792 size_t w = 0;
6793 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6794
6795 size_t h = 0;
6796 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6797
6798 dst.create(rows: (int)h, cols: (int)w, type);
6799
6800 cl_mem clBuffer = (cl_mem)dst.handle(accessFlags: ACCESS_READ);
6801
6802 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6803
6804 size_t offset = 0;
6805 size_t src_origin[3] = { 0, 0, 0 };
6806 size_t region[3] = { w, h, 1 };
6807 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6808
6809 CV_OCL_CHECK(clFinish(q));
6810
6811 return;
6812} // convertFromImage()
6813
6814
6815///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6816
6817static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6818{
6819 cl_uint numDevices = 0;
6820 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6821 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6822 {
6823 CV_OCL_DBG_CHECK_RESULT(status,
6824 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6825 }
6826
6827 if (numDevices == 0)
6828 {
6829 devices.clear();
6830 return;
6831 }
6832
6833 devices.resize(new_size: (size_t)numDevices);
6834 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6835}
6836
6837struct PlatformInfo::Impl
6838{
6839 Impl(void* id)
6840 {
6841 refcount = 1;
6842 handle = *(cl_platform_id*)id;
6843 getDevices(devices, platform: handle);
6844
6845 version_ = getStrProp(CL_PLATFORM_VERSION);
6846 parseOpenCLVersion(version: version_, major&: versionMajor_, minor&: versionMinor_);
6847 }
6848
6849 String getStrProp(cl_platform_info prop) const
6850 {
6851 char buf[1024];
6852 size_t sz=0;
6853 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6854 sz < sizeof(buf) ? String(buf) : String();
6855 }
6856
6857 IMPLEMENT_REFCOUNTABLE();
6858 std::vector<cl_device_id> devices;
6859 cl_platform_id handle;
6860
6861 String version_;
6862 int versionMajor_;
6863 int versionMinor_;
6864};
6865
6866PlatformInfo::PlatformInfo() CV_NOEXCEPT
6867{
6868 p = 0;
6869}
6870
6871PlatformInfo::PlatformInfo(void* platform_id)
6872{
6873 p = new Impl(platform_id);
6874}
6875
6876PlatformInfo::~PlatformInfo()
6877{
6878 if(p)
6879 p->release();
6880}
6881
6882PlatformInfo::PlatformInfo(const PlatformInfo& i)
6883{
6884 if (i.p)
6885 i.p->addref();
6886 p = i.p;
6887}
6888
6889PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6890{
6891 if (i.p != p)
6892 {
6893 if (i.p)
6894 i.p->addref();
6895 if (p)
6896 p->release();
6897 p = i.p;
6898 }
6899 return *this;
6900}
6901
6902PlatformInfo::PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT
6903{
6904 p = i.p;
6905 i.p = nullptr;
6906}
6907
6908PlatformInfo& PlatformInfo::operator = (PlatformInfo&& i) CV_NOEXCEPT
6909{
6910 if (this != &i) {
6911 if(p)
6912 p->release();
6913 p = i.p;
6914 i.p = nullptr;
6915 }
6916 return *this;
6917}
6918
6919int PlatformInfo::deviceNumber() const
6920{
6921 return p ? (int)p->devices.size() : 0;
6922}
6923
6924void PlatformInfo::getDevice(Device& device, int d) const
6925{
6926 CV_Assert(p && d < (int)p->devices.size() );
6927 if(p)
6928 device.set(p->devices[d]);
6929}
6930
6931String PlatformInfo::name() const
6932{
6933 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6934}
6935
6936String PlatformInfo::vendor() const
6937{
6938 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6939}
6940
6941String PlatformInfo::version() const
6942{
6943 return p ? p->version_ : String();
6944}
6945
6946int PlatformInfo::versionMajor() const
6947{
6948 CV_Assert(p);
6949 return p->versionMajor_;
6950}
6951
6952int PlatformInfo::versionMinor() const
6953{
6954 CV_Assert(p);
6955 return p->versionMinor_;
6956}
6957
6958static void getPlatforms(std::vector<cl_platform_id>& platforms)
6959{
6960 cl_uint numPlatforms = 0;
6961 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6962
6963 if (numPlatforms == 0)
6964 {
6965 platforms.clear();
6966 return;
6967 }
6968
6969 platforms.resize(new_size: (size_t)numPlatforms);
6970 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6971}
6972
6973void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6974{
6975 std::vector<cl_platform_id> platforms;
6976 getPlatforms(platforms);
6977
6978 for (size_t i = 0; i < platforms.size(); i++)
6979 platformsInfo.push_back( x: PlatformInfo((void*)&platforms[i]) );
6980}
6981
6982const char* typeToStr(int type)
6983{
6984 static const char* tab[]=
6985 {
6986 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6987 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6988 "ushort", "ushort2", "ushort3", "ushort4", 0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6989 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6990 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6991 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6992 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6993 "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6994 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6995 };
6996 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6997 const char* result = cn > 16 ? nullptr : tab[depth*16 + cn-1];
6998 CV_Assert(result);
6999 return result;
7000}
7001
7002const char* memopTypeToStr(int type)
7003{
7004 static const char* tab[] =
7005 {
7006 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
7007 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
7008 "ushort", "ushort2", "ushort3", "ushort4", 0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
7009 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
7010 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
7011 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
7012 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
7013 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
7014 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
7015 };
7016 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
7017 const char* result = cn > 16 ? nullptr : tab[depth*16 + cn-1];
7018 CV_Assert(result);
7019 return result;
7020}
7021
7022const char* vecopTypeToStr(int type)
7023{
7024 static const char* tab[] =
7025 {
7026 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
7027 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
7028 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
7029 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
7030 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
7031 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
7032 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
7033 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
7034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
7035 };
7036 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
7037 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
7038 CV_Assert(result);
7039 return result;
7040}
7041
7042// Deprecated due to size of buf buffer being unknowable.
7043const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
7044{
7045 // Since the size of buf is not given, we assume 50 because that's what all callers use.
7046 constexpr size_t buf_max = 50;
7047
7048 return convertTypeStr(sdepth, ddepth, cn, buf, buf_size: buf_max);
7049}
7050
7051const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf, size_t buf_size)
7052{
7053 if( sdepth == ddepth )
7054 return "noconvert";
7055 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
7056 if( ddepth >= CV_32F ||
7057 (ddepth == CV_32S && sdepth < CV_32S) ||
7058 (ddepth == CV_16S && sdepth <= CV_8S) ||
7059 (ddepth == CV_16U && sdepth == CV_8U))
7060 {
7061 snprintf(s: buf, maxlen: buf_size, format: "convert_%s", typestr);
7062 }
7063 else if( sdepth >= CV_32F )
7064 snprintf(s: buf, maxlen: buf_size, format: "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
7065 else
7066 snprintf(s: buf, maxlen: buf_size, format: "convert_%s_sat", typestr);
7067
7068 return buf;
7069}
7070
7071const char* getOpenCLErrorString(int errorCode)
7072{
7073#define CV_OCL_CODE(id) case id: return #id
7074#define CV_OCL_CODE_(id, name) case id: return #name
7075 switch (errorCode)
7076 {
7077 CV_OCL_CODE(CL_SUCCESS);
7078 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
7079 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
7080 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
7081 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
7082 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
7083 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
7084 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
7085 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
7086 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
7087 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
7088 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
7089 CV_OCL_CODE(CL_MAP_FAILURE);
7090 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
7091 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
7092 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
7093 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
7094 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
7095 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
7096 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
7097 CV_OCL_CODE(CL_INVALID_VALUE);
7098 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
7099 CV_OCL_CODE(CL_INVALID_PLATFORM);
7100 CV_OCL_CODE(CL_INVALID_DEVICE);
7101 CV_OCL_CODE(CL_INVALID_CONTEXT);
7102 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
7103 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
7104 CV_OCL_CODE(CL_INVALID_HOST_PTR);
7105 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
7106 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
7107 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
7108 CV_OCL_CODE(CL_INVALID_SAMPLER);
7109 CV_OCL_CODE(CL_INVALID_BINARY);
7110 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
7111 CV_OCL_CODE(CL_INVALID_PROGRAM);
7112 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
7113 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
7114 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
7115 CV_OCL_CODE(CL_INVALID_KERNEL);
7116 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
7117 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
7118 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
7119 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
7120 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
7121 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
7122 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
7123 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
7124 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
7125 CV_OCL_CODE(CL_INVALID_EVENT);
7126 CV_OCL_CODE(CL_INVALID_OPERATION);
7127 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
7128 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
7129 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
7130 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
7131 // OpenCL 1.1
7132 CV_OCL_CODE(CL_INVALID_PROPERTY);
7133 // OpenCL 1.2
7134 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
7135 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
7136 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
7137 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
7138 // OpenCL 2.0
7139 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
7140 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
7141 // Extensions
7142 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
7143 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
7144 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
7145 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
7146 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
7147 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
7148 default: return "Unknown OpenCL error";
7149 }
7150#undef CV_OCL_CODE
7151#undef CV_OCL_CODE_
7152}
7153
7154template <typename T>
7155static std::string kerToStr(const Mat & k)
7156{
7157 int width = k.cols - 1, depth = k.depth();
7158 const T * const data = k.ptr<T>();
7159
7160 std::ostringstream stream;
7161 stream.precision(prec: 10);
7162
7163 if (depth <= CV_8S)
7164 {
7165 for (int i = 0; i < width; ++i)
7166 stream << "DIG(" << (int)data[i] << ")";
7167 stream << "DIG(" << (int)data[width] << ")";
7168 }
7169 else if (depth == CV_32F)
7170 {
7171 stream.setf(std::ios_base::showpoint);
7172 for (int i = 0; i < width; ++i)
7173 stream << "DIG(" << data[i] << "f)";
7174 stream << "DIG(" << data[width] << "f)";
7175 }
7176 else if (depth == CV_16F)
7177 {
7178 stream.setf(std::ios_base::showpoint);
7179 for (int i = 0; i < width; ++i)
7180 stream << "DIG(" << (float)data[i] << "h)";
7181 stream << "DIG(" << (float)data[width] << "h)";
7182 }
7183 else
7184 {
7185 for (int i = 0; i < width; ++i)
7186 stream << "DIG(" << data[i] << ")";
7187 stream << "DIG(" << data[width] << ")";
7188 }
7189
7190 return stream.str();
7191}
7192
7193String kernelToStr(InputArray _kernel, int ddepth, const char * name)
7194{
7195 Mat kernel = _kernel.getMat().reshape(cn: 1, rows: 1);
7196
7197 int depth = kernel.depth();
7198 if (ddepth < 0)
7199 ddepth = depth;
7200
7201 if (ddepth != depth)
7202 kernel.convertTo(m: kernel, rtype: ddepth);
7203
7204 typedef std::string (* func_t)(const Mat &);
7205 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
7206 kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<hfloat> };
7207 const func_t func = funcs[ddepth];
7208 CV_Assert(func != 0);
7209
7210 return cv::format(fmt: " -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
7211}
7212
7213#define PROCESS_SRC(src) \
7214 do \
7215 { \
7216 if (!src.empty()) \
7217 { \
7218 CV_Assert(src.isMat() || src.isUMat()); \
7219 Size csize = src.size(); \
7220 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
7221 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
7222 if (cwidth < ckercn || ckercn <= 0) \
7223 return 1; \
7224 cols.push_back(cwidth); \
7225 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
7226 return 1; \
7227 offsets.push_back(src.offset()); \
7228 steps.push_back(src.step()); \
7229 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
7230 kercns.push_back(ckercn); \
7231 } \
7232 } \
7233 while ((void)0, 0)
7234
7235int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
7236 InputArray src4, InputArray src5, InputArray src6,
7237 InputArray src7, InputArray src8, InputArray src9,
7238 OclVectorStrategy strat)
7239{
7240 const ocl::Device & d = ocl::Device::getDefault();
7241
7242 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
7243 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
7244 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
7245 d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() };
7246
7247 // if the device says don't use vectors
7248 if (vectorWidths[0] == 1)
7249 {
7250 // it's heuristic
7251 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
7252 vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2;
7253 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
7254 }
7255
7256 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
7257}
7258
7259int checkOptimalVectorWidth(const int *vectorWidths,
7260 InputArray src1, InputArray src2, InputArray src3,
7261 InputArray src4, InputArray src5, InputArray src6,
7262 InputArray src7, InputArray src8, InputArray src9,
7263 OclVectorStrategy strat)
7264{
7265 CV_Assert(vectorWidths);
7266
7267 int ref_type = src1.type();
7268
7269 std::vector<size_t> offsets, steps, cols;
7270 std::vector<int> dividers, kercns;
7271 PROCESS_SRC(src1);
7272 PROCESS_SRC(src2);
7273 PROCESS_SRC(src3);
7274 PROCESS_SRC(src4);
7275 PROCESS_SRC(src5);
7276 PROCESS_SRC(src6);
7277 PROCESS_SRC(src7);
7278 PROCESS_SRC(src8);
7279 PROCESS_SRC(src9);
7280
7281 size_t size = offsets.size();
7282
7283 for (size_t i = 0; i < size; ++i)
7284 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
7285 dividers[i] >>= 1, kercns[i] >>= 1;
7286
7287 // default strategy
7288 int kercn = *std::min_element(first: kercns.begin(), last: kercns.end());
7289
7290 return kercn;
7291}
7292
7293int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
7294 InputArray src4, InputArray src5, InputArray src6,
7295 InputArray src7, InputArray src8, InputArray src9)
7296{
7297 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, strat: OCL_VECTOR_MAX);
7298}
7299
7300#undef PROCESS_SRC
7301
7302
7303// TODO Make this as a method of OpenCL "BuildOptions" class
7304void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
7305{
7306 if (!buildOptions.empty())
7307 buildOptions += " ";
7308 int type = _m.type(), depth = CV_MAT_DEPTH(type);
7309 buildOptions += format(
7310 fmt: "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
7311 name.c_str(), ocl::typeToStr(type),
7312 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
7313 name.c_str(), (int)CV_MAT_CN(type),
7314 name.c_str(), (int)CV_ELEM_SIZE(type),
7315 name.c_str(), (int)CV_ELEM_SIZE1(type),
7316 name.c_str(), (int)depth
7317 );
7318}
7319
7320
7321struct Image2D::Impl
7322{
7323 Impl(const UMat &src, bool norm, bool alias)
7324 {
7325 handle = 0;
7326 refcount = 1;
7327 init(src, norm, alias);
7328 }
7329
7330 ~Impl()
7331 {
7332 if (handle)
7333 clReleaseMemObject(handle);
7334 }
7335
7336 static cl_image_format getImageFormat(int depth, int cn, bool norm)
7337 {
7338 cl_image_format format;
7339 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
7340 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT };
7341 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
7342 CL_SNORM_INT16, -1, -1, -1, -1 };
7343 // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
7344 // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
7345 static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA };
7346
7347 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
7348 int channelOrder = channelOrders[cn];
7349 format.image_channel_data_type = (cl_channel_type)channelType;
7350 format.image_channel_order = (cl_channel_order)channelOrder;
7351 return format;
7352 }
7353
7354 static bool isFormatSupported(cl_image_format format)
7355 {
7356 if (!haveOpenCL())
7357 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7358
7359 cl_context context = (cl_context)Context::getDefault().ptr();
7360 if (!context)
7361 return false;
7362
7363 // Figure out how many formats are supported by this context.
7364 cl_uint numFormats = 0;
7365 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7366 CL_MEM_OBJECT_IMAGE2D, numFormats,
7367 NULL, &numFormats);
7368 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
7369 if (numFormats > 0)
7370 {
7371 AutoBuffer<cl_image_format> formats(numFormats);
7372 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7373 CL_MEM_OBJECT_IMAGE2D, numFormats,
7374 formats.data(), NULL);
7375 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
7376 for (cl_uint i = 0; i < numFormats; ++i)
7377 {
7378 if (!memcmp(s1: &formats[i], s2: &format, n: sizeof(format)))
7379 {
7380 return true;
7381 }
7382 }
7383 }
7384 return false;
7385 }
7386
7387 void init(const UMat &src, bool norm, bool alias)
7388 {
7389 if (!haveOpenCL())
7390 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7391
7392 CV_Assert(!src.empty());
7393 CV_Assert(ocl::Device::getDefault().imageSupport());
7394
7395 int err, depth = src.depth(), cn = src.channels();
7396 CV_Assert(cn <= 4);
7397 cl_image_format format = getImageFormat(depth, cn, norm);
7398
7399 if (!isFormatSupported(format))
7400 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7401
7402 if (alias && !src.handle(accessFlags: ACCESS_RW))
7403 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7404
7405 cl_context context = (cl_context)Context::getDefault().ptr();
7406 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7407
7408#ifdef CL_VERSION_1_2
7409 // this enables backwards portability to
7410 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7411 const Device & d = ocl::Device::getDefault();
7412 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7413 CV_Assert(!alias || canCreateAlias(src));
7414 if (1 < major || (1 == major && 2 <= minor))
7415 {
7416 cl_image_desc desc;
7417 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
7418 desc.image_width = src.cols;
7419 desc.image_height = src.rows;
7420 desc.image_depth = 0;
7421 desc.image_array_size = 1;
7422 desc.image_row_pitch = alias ? src.step[0] : 0;
7423 desc.image_slice_pitch = 0;
7424 desc.buffer = alias ? (cl_mem)src.handle(accessFlags: ACCESS_RW) : 0;
7425 desc.num_mip_levels = 0;
7426 desc.num_samples = 0;
7427 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7428 }
7429 else
7430#endif
7431 {
7432 CV_SUPPRESS_DEPRECATED_START
7433 CV_Assert(!alias); // This is an OpenCL 1.2 extension
7434 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7435 CV_SUPPRESS_DEPRECATED_END
7436 }
7437 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7438
7439 size_t origin[] = { 0, 0, 0 };
7440 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7441
7442 cl_mem devData;
7443 if (!alias && !src.isContinuous())
7444 {
7445 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7446 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7447 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7448 ).c_str());
7449
7450 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7451 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7452 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7453 CV_OCL_DBG_CHECK(clFlush(queue));
7454 }
7455 else
7456 {
7457 devData = (cl_mem)src.handle(accessFlags: ACCESS_READ);
7458 }
7459 CV_Assert(devData != NULL);
7460
7461 if (!alias)
7462 {
7463 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7464 if (!src.isContinuous())
7465 {
7466 CV_OCL_DBG_CHECK(clFlush(queue));
7467 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7468 }
7469 }
7470 }
7471
7472 IMPLEMENT_REFCOUNTABLE();
7473
7474 cl_mem handle;
7475};
7476
7477Image2D::Image2D() CV_NOEXCEPT
7478{
7479 p = NULL;
7480}
7481
7482Image2D::Image2D(const UMat &src, bool norm, bool alias)
7483{
7484 p = new Impl(src, norm, alias);
7485}
7486
7487bool Image2D::canCreateAlias(const UMat &m)
7488{
7489 bool ret = false;
7490 const Device & d = ocl::Device::getDefault();
7491 if (d.imageFromBufferSupport() && !m.empty())
7492 {
7493 // This is the required pitch alignment in pixels
7494 uint pitchAlign = d.imagePitchAlignment();
7495 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7496 {
7497 // We don't currently handle the case where the buffer was created
7498 // with CL_MEM_USE_HOST_PTR
7499 if (!m.u->tempUMat())
7500 {
7501 ret = true;
7502 }
7503 }
7504 }
7505 return ret;
7506}
7507
7508bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7509{
7510 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7511
7512 return Impl::isFormatSupported(format);
7513}
7514
7515Image2D::Image2D(const Image2D & i)
7516{
7517 p = i.p;
7518 if (p)
7519 p->addref();
7520}
7521
7522Image2D & Image2D::operator = (const Image2D & i)
7523{
7524 if (i.p != p)
7525 {
7526 if (i.p)
7527 i.p->addref();
7528 if (p)
7529 p->release();
7530 p = i.p;
7531 }
7532 return *this;
7533}
7534
7535Image2D::Image2D(Image2D&& i) CV_NOEXCEPT
7536{
7537 p = i.p;
7538 i.p = nullptr;
7539}
7540
7541Image2D& Image2D::operator = (Image2D&& i) CV_NOEXCEPT
7542{
7543 if (this != &i) {
7544 if (p)
7545 p->release();
7546 p = i.p;
7547 i.p = nullptr;
7548 }
7549 return *this;
7550}
7551
7552Image2D::~Image2D()
7553{
7554 if (p)
7555 p->release();
7556}
7557
7558void* Image2D::ptr() const
7559{
7560 return p ? p->handle : 0;
7561}
7562
7563bool internal::isOpenCLForced()
7564{
7565 static bool initialized = false;
7566 static bool value = false;
7567 if (!initialized)
7568 {
7569 value = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_FORCE", defaultValue: false);
7570 initialized = true;
7571 }
7572 return value;
7573}
7574
7575bool internal::isPerformanceCheckBypassed()
7576{
7577 static bool initialized = false;
7578 static bool value = false;
7579 if (!initialized)
7580 {
7581 value = utils::getConfigurationParameterBool(name: "OPENCV_OPENCL_PERF_CHECK_BYPASS", defaultValue: false);
7582 initialized = true;
7583 }
7584 return value;
7585}
7586
7587bool internal::isCLBuffer(UMat& u)
7588{
7589 void* h = u.handle(accessFlags: ACCESS_RW);
7590 if (!h)
7591 return true;
7592 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7593#if 1
7594 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7595 return false;
7596#else
7597 cl_mem_object_type type = 0;
7598 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7599 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7600 return false;
7601#endif
7602 return true;
7603}
7604
7605struct Timer::Impl
7606{
7607 const Queue queue;
7608
7609 Impl(const Queue& q)
7610 : queue(q)
7611 {
7612 }
7613
7614 ~Impl(){}
7615
7616 void start()
7617 {
7618 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7619 timer.start();
7620 }
7621
7622 void stop()
7623 {
7624 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7625 timer.stop();
7626 }
7627
7628 uint64 durationNS() const
7629 {
7630 return (uint64)(timer.getTimeSec() * 1e9);
7631 }
7632
7633 TickMeter timer;
7634};
7635
7636Timer::Timer(const Queue& q) : p(new Impl(q)) { }
7637Timer::~Timer() { delete p; }
7638
7639void Timer::start()
7640{
7641 CV_Assert(p);
7642 p->start();
7643}
7644
7645void Timer::stop()
7646{
7647 CV_Assert(p);
7648 p->stop();
7649}
7650
7651uint64 Timer::durationNS() const
7652{
7653 CV_Assert(p);
7654 return p->durationNS();
7655}
7656
7657}} // namespace
7658
7659#endif // HAVE_OPENCL
7660

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of opencv/modules/core/src/ocl.cpp