1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html |
4 | |
5 | |
6 | #include "precomp.hpp" |
7 | #include "opencl_kernels_core.hpp" |
8 | #include "opencv2/core/openvx/ovx_defs.hpp" |
9 | #include "stat.hpp" |
10 | |
11 | #ifndef OPENCV_IPP_MEAN |
12 | #undef HAVE_IPP |
13 | #undef CV_IPP_RUN_FAST |
14 | #define CV_IPP_RUN_FAST(f, ...) |
15 | #undef CV_IPP_RUN |
16 | #define CV_IPP_RUN(c, f, ...) |
17 | #endif // OPENCV_IPP_MEAN |
18 | |
19 | #include "mean.simd.hpp" |
20 | #include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content |
21 | |
22 | #ifndef OPENCV_IPP_MEAN |
23 | #undef HAVE_IPP |
24 | #undef CV_IPP_RUN_FAST |
25 | #define CV_IPP_RUN_FAST(f, ...) |
26 | #undef CV_IPP_RUN |
27 | #define CV_IPP_RUN(c, f, ...) |
28 | #endif // OPENCV_IPP_MEAN |
29 | |
30 | namespace cv { |
31 | |
32 | #if defined HAVE_IPP |
33 | static bool ipp_mean( Mat &src, Mat &mask, Scalar &ret ) |
34 | { |
35 | CV_INSTRUMENT_REGION_IPP(); |
36 | |
37 | #if IPP_VERSION_X100 >= 700 |
38 | size_t total_size = src.total(); |
39 | int cn = src.channels(); |
40 | if (cn > 4) |
41 | return false; |
42 | int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; |
43 | if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) |
44 | { |
45 | IppiSize sz = { cols, rows }; |
46 | int type = src.type(); |
47 | if( !mask.empty() ) |
48 | { |
49 | typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *); |
50 | ippiMaskMeanFuncC1 ippiMean_C1MR = |
51 | type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR : |
52 | type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR : |
53 | type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR : |
54 | 0; |
55 | if( ippiMean_C1MR ) |
56 | { |
57 | Ipp64f res; |
58 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 ) |
59 | { |
60 | ret = Scalar(res); |
61 | return true; |
62 | } |
63 | } |
64 | typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *); |
65 | ippiMaskMeanFuncC3 ippiMean_C3MR = |
66 | type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR : |
67 | type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR : |
68 | type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR : |
69 | 0; |
70 | if( ippiMean_C3MR ) |
71 | { |
72 | Ipp64f res1, res2, res3; |
73 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 && |
74 | CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 && |
75 | CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 ) |
76 | { |
77 | ret = Scalar(res1, res2, res3); |
78 | return true; |
79 | } |
80 | } |
81 | } |
82 | else |
83 | { |
84 | typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm); |
85 | typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *); |
86 | ippiMeanFuncHint ippiMeanHint = |
87 | type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R : |
88 | type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R : |
89 | type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R : |
90 | 0; |
91 | ippiMeanFuncNoHint ippiMean = |
92 | type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R : |
93 | type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R : |
94 | type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R : |
95 | type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R : |
96 | type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R : |
97 | type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R : |
98 | type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R : |
99 | type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R : |
100 | type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R : |
101 | 0; |
102 | // Make sure only zero or one version of the function pointer is valid |
103 | CV_Assert(!ippiMeanHint || !ippiMean); |
104 | if( ippiMeanHint || ippiMean ) |
105 | { |
106 | Ipp64f res[4]; |
107 | IppStatus status = ippiMeanHint ? CV_INSTRUMENT_FUN_IPP(ippiMeanHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) : |
108 | CV_INSTRUMENT_FUN_IPP(ippiMean, src.ptr(), (int)src.step[0], sz, res); |
109 | if( status >= 0 ) |
110 | { |
111 | for( int i = 0; i < cn; i++ ) |
112 | ret[i] = res[i]; |
113 | return true; |
114 | } |
115 | } |
116 | } |
117 | } |
118 | return false; |
119 | #else |
120 | return false; |
121 | #endif |
122 | } |
123 | #endif |
124 | |
125 | Scalar mean(InputArray _src, InputArray _mask) |
126 | { |
127 | CV_INSTRUMENT_REGION(); |
128 | |
129 | Mat src = _src.getMat(), mask = _mask.getMat(); |
130 | CV_Assert( mask.empty() || mask.type() == CV_8U ); |
131 | |
132 | int k, cn = src.channels(), depth = src.depth(); |
133 | Scalar s; |
134 | |
135 | CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_mean(src, mask, s), s) |
136 | |
137 | SumFunc func = getSumFunc(depth); |
138 | |
139 | CV_Assert( cn <= 4 && func != 0 ); |
140 | |
141 | const Mat* arrays[] = {&src, &mask, 0}; |
142 | uchar* ptrs[2] = {}; |
143 | NAryMatIterator it(arrays, ptrs); |
144 | int total = (int)it.size, blockSize = total, intSumBlockSize = 0; |
145 | int j, count = 0; |
146 | AutoBuffer<int> _buf; |
147 | int* buf = (int*)&s[0]; |
148 | bool blockSum = depth <= CV_16S; |
149 | size_t esz = 0, nz0 = 0; |
150 | |
151 | if( blockSum ) |
152 | { |
153 | intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15); |
154 | blockSize = std::min(a: blockSize, b: intSumBlockSize); |
155 | _buf.allocate(size: cn); |
156 | buf = _buf.data(); |
157 | |
158 | for( k = 0; k < cn; k++ ) |
159 | buf[k] = 0; |
160 | esz = src.elemSize(); |
161 | } |
162 | |
163 | for( size_t i = 0; i < it.nplanes; i++, ++it ) |
164 | { |
165 | for( j = 0; j < total; j += blockSize ) |
166 | { |
167 | int bsz = std::min(a: total - j, b: blockSize); |
168 | int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn ); |
169 | count += nz; |
170 | nz0 += nz; |
171 | if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) |
172 | { |
173 | for( k = 0; k < cn; k++ ) |
174 | { |
175 | s[k] += buf[k]; |
176 | buf[k] = 0; |
177 | } |
178 | count = 0; |
179 | } |
180 | ptrs[0] += bsz*esz; |
181 | if( ptrs[1] ) |
182 | ptrs[1] += bsz; |
183 | } |
184 | } |
185 | return s*(nz0 ? 1./nz0 : 0); |
186 | } |
187 | |
188 | static SumSqrFunc getSumSqrFunc(int depth) |
189 | { |
190 | CV_INSTRUMENT_REGION(); |
191 | CV_CPU_DISPATCH(getSumSqrFunc, (depth), |
192 | CV_CPU_DISPATCH_MODES_ALL); |
193 | } |
194 | |
195 | #ifdef HAVE_OPENCL |
196 | static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) |
197 | { |
198 | CV_INSTRUMENT_REGION_OPENCL(); |
199 | |
200 | bool haveMask = _mask.kind() != _InputArray::NONE; |
201 | int nz = haveMask ? -1 : (int)_src.total(); |
202 | Scalar mean(0), stddev(0); |
203 | const int cn = _src.channels(); |
204 | if (cn > 4) |
205 | return false; |
206 | |
207 | { |
208 | int type = _src.type(), depth = CV_MAT_DEPTH(type); |
209 | bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, |
210 | isContinuous = _src.isContinuous(), |
211 | isMaskContinuous = _mask.isContinuous(); |
212 | const ocl::Device &defDev = ocl::Device::getDefault(); |
213 | int groups = defDev.maxComputeUnits(); |
214 | if (defDev.isIntel()) |
215 | { |
216 | static const int subSliceEUCount = 10; |
217 | groups = (groups / subSliceEUCount) * 2; |
218 | } |
219 | size_t wgs = defDev.maxWorkGroupSize(); |
220 | |
221 | int ddepth = std::max(CV_32S, b: depth), sqddepth = std::max(CV_32F, b: depth), |
222 | dtype = CV_MAKE_TYPE(ddepth, cn), |
223 | sqdtype = CV_MAKETYPE(sqddepth, cn); |
224 | CV_Assert(!haveMask || _mask.type() == CV_8UC1); |
225 | |
226 | int wgs2_aligned = 1; |
227 | while (wgs2_aligned < (int)wgs) |
228 | wgs2_aligned <<= 1; |
229 | wgs2_aligned >>= 1; |
230 | |
231 | if ( (!doubleSupport && depth == CV_64F) ) |
232 | return false; |
233 | |
234 | char cvt[2][50]; |
235 | String opts = format(fmt: "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d" |
236 | " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s" |
237 | " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s" , |
238 | ocl::typeToStr(t: type), ocl::typeToStr(t: depth), |
239 | ocl::typeToStr(t: dtype), ocl::typeToStr(t: ddepth), sqddepth, |
240 | ocl::typeToStr(t: sqdtype), ocl::typeToStr(t: sqddepth), |
241 | ocl::convertTypeStr(sdepth: depth, ddepth: sqddepth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])), |
242 | cn, isContinuous ? " -D HAVE_SRC_CONT" : "" , |
243 | isMaskContinuous ? " -D HAVE_MASK_CONT" : "" , |
244 | ocl::convertTypeStr(sdepth: depth, ddepth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])), |
245 | (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "" , |
246 | doubleSupport ? " -D DOUBLE_SUPPORT" : "" ); |
247 | |
248 | ocl::Kernel k("meanStdDev" , ocl::core::meanstddev_oclsrc, opts); |
249 | if (k.empty()) |
250 | return false; |
251 | |
252 | int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) + |
253 | CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype)); |
254 | UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); |
255 | |
256 | ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src), |
257 | dbarg = ocl::KernelArg::PtrWriteOnly(m: db), |
258 | maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask); |
259 | |
260 | if (haveMask) |
261 | k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg, kernel_args: maskarg); |
262 | else |
263 | k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg); |
264 | |
265 | size_t globalsize = groups * wgs; |
266 | |
267 | if(!k.run(dims: 1, globalsize: &globalsize, localsize: &wgs, sync: false)) |
268 | return false; |
269 | |
270 | typedef Scalar (* part_sum)(Mat m); |
271 | part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> }; |
272 | Mat dbm = db.getMat(flags: ACCESS_READ); |
273 | |
274 | mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr())); |
275 | stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype))); |
276 | |
277 | if (haveMask) |
278 | nz = saturate_cast<int>(v: funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() + |
279 | groups * (CV_ELEM_SIZE(dtype) + |
280 | CV_ELEM_SIZE(sqdtype))))[0]); |
281 | } |
282 | |
283 | double total = nz != 0 ? 1.0 / nz : 0; |
284 | int k, j; |
285 | for (int i = 0; i < cn; ++i) |
286 | { |
287 | mean[i] *= total; |
288 | stddev[i] = std::sqrt(x: std::max(a: stddev[i] * total - mean[i] * mean[i] , b: 0.)); |
289 | } |
290 | |
291 | for( j = 0; j < 2; j++ ) |
292 | { |
293 | const double * const sptr = j == 0 ? &mean[0] : &stddev[0]; |
294 | _OutputArray _dst = j == 0 ? _mean : _sdv; |
295 | if( !_dst.needed() ) |
296 | continue; |
297 | |
298 | if( !_dst.fixedSize() ) |
299 | _dst.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true); |
300 | Mat dst = _dst.getMat(); |
301 | int dcn = (int)dst.total(); |
302 | CV_Assert( dst.type() == CV_64F && dst.isContinuous() && |
303 | (dst.cols == 1 || dst.rows == 1) && dcn >= cn ); |
304 | double* dptr = dst.ptr<double>(); |
305 | for( k = 0; k < cn; k++ ) |
306 | dptr[k] = sptr[k]; |
307 | for( ; k < dcn; k++ ) |
308 | dptr[k] = 0; |
309 | } |
310 | |
311 | return true; |
312 | } |
313 | #endif |
314 | |
315 | #ifdef HAVE_OPENVX |
316 | static bool openvx_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask) |
317 | { |
318 | size_t total_size = src.total(); |
319 | int rows = src.size[0], cols = rows ? (int)(total_size / rows) : 0; |
320 | if (src.type() != CV_8UC1|| !mask.empty() || |
321 | (src.dims != 2 && !(src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)) |
322 | ) |
323 | return false; |
324 | |
325 | try |
326 | { |
327 | ivx::Context ctx = ovx::getOpenVXContext(); |
328 | #ifndef VX_VERSION_1_1 |
329 | if (ctx.vendorID() == VX_ID_KHRONOS) |
330 | return false; // Do not use OpenVX meanStdDev estimation for sample 1.0.1 implementation due to lack of accuracy |
331 | #endif |
332 | |
333 | ivx::Image |
334 | ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, |
335 | ivx::Image::createAddressing(cols, rows, 1, (vx_int32)(src.step[0])), src.ptr()); |
336 | |
337 | vx_float32 mean_temp, stddev_temp; |
338 | ivx::IVX_CHECK_STATUS(vxuMeanStdDev(ctx, ia, &mean_temp, &stddev_temp)); |
339 | |
340 | if (_mean.needed()) |
341 | { |
342 | if (!_mean.fixedSize()) |
343 | _mean.create(1, 1, CV_64F, -1, true); |
344 | Mat mean = _mean.getMat(); |
345 | CV_Assert(mean.type() == CV_64F && mean.isContinuous() && |
346 | (mean.cols == 1 || mean.rows == 1) && mean.total() >= 1); |
347 | double *pmean = mean.ptr<double>(); |
348 | pmean[0] = mean_temp; |
349 | for (int c = 1; c < (int)mean.total(); c++) |
350 | pmean[c] = 0; |
351 | } |
352 | |
353 | if (_sdv.needed()) |
354 | { |
355 | if (!_sdv.fixedSize()) |
356 | _sdv.create(1, 1, CV_64F, -1, true); |
357 | Mat stddev = _sdv.getMat(); |
358 | CV_Assert(stddev.type() == CV_64F && stddev.isContinuous() && |
359 | (stddev.cols == 1 || stddev.rows == 1) && stddev.total() >= 1); |
360 | double *pstddev = stddev.ptr<double>(); |
361 | pstddev[0] = stddev_temp; |
362 | for (int c = 1; c < (int)stddev.total(); c++) |
363 | pstddev[c] = 0; |
364 | } |
365 | } |
366 | catch (const ivx::RuntimeError & e) |
367 | { |
368 | VX_DbgThrow(e.what()); |
369 | } |
370 | catch (const ivx::WrapperError & e) |
371 | { |
372 | VX_DbgThrow(e.what()); |
373 | } |
374 | |
375 | return true; |
376 | } |
377 | #endif |
378 | |
379 | #ifdef HAVE_IPP |
380 | static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask) |
381 | { |
382 | CV_INSTRUMENT_REGION_IPP(); |
383 | |
384 | #if IPP_VERSION_X100 >= 700 |
385 | int cn = src.channels(); |
386 | |
387 | #if IPP_VERSION_X100 < 201801 |
388 | // IPP_DISABLE: C3C functions can read outside of allocated memory |
389 | if (cn > 1) |
390 | return false; |
391 | #endif |
392 | #if IPP_VERSION_X100 >= 201900 && IPP_VERSION_X100 < 201901 |
393 | // IPP_DISABLE: 32f C3C functions can read outside of allocated memory |
394 | if (cn > 1 && src.depth() == CV_32F) |
395 | return false; |
396 | |
397 | // SSE4.2 buffer overrun |
398 | #if defined(_WIN32) && !defined(_WIN64) |
399 | // IPPICV doesn't have AVX2 in 32-bit builds |
400 | // However cv::ipp::getIppTopFeatures() may return AVX2 value on AVX2 capable H/W |
401 | // details #12959 |
402 | #else |
403 | if (cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) // Linux x64 + OPENCV_IPP=SSE42 is affected too |
404 | #endif |
405 | { |
406 | if (src.depth() == CV_32F && src.dims > 1 && src.size[src.dims - 1] == 6) |
407 | return false; |
408 | } |
409 | #endif |
410 | |
411 | size_t total_size = src.total(); |
412 | int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; |
413 | if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) |
414 | { |
415 | Ipp64f mean_temp[3]; |
416 | Ipp64f stddev_temp[3]; |
417 | Ipp64f *pmean = &mean_temp[0]; |
418 | Ipp64f *pstddev = &stddev_temp[0]; |
419 | Mat mean, stddev; |
420 | int dcn_mean = -1; |
421 | if( _mean.needed() ) |
422 | { |
423 | if( !_mean.fixedSize() ) |
424 | _mean.create(cn, 1, CV_64F, -1, true); |
425 | mean = _mean.getMat(); |
426 | dcn_mean = (int)mean.total(); |
427 | pmean = mean.ptr<Ipp64f>(); |
428 | } |
429 | int dcn_stddev = -1; |
430 | if( _sdv.needed() ) |
431 | { |
432 | if( !_sdv.fixedSize() ) |
433 | _sdv.create(cn, 1, CV_64F, -1, true); |
434 | stddev = _sdv.getMat(); |
435 | dcn_stddev = (int)stddev.total(); |
436 | pstddev = stddev.ptr<Ipp64f>(); |
437 | } |
438 | for( int c = cn; c < dcn_mean; c++ ) |
439 | pmean[c] = 0; |
440 | for( int c = cn; c < dcn_stddev; c++ ) |
441 | pstddev[c] = 0; |
442 | IppiSize sz = { cols, rows }; |
443 | int type = src.type(); |
444 | if( !mask.empty() ) |
445 | { |
446 | typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *); |
447 | ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR = |
448 | type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR : |
449 | type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR : |
450 | type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR : |
451 | 0; |
452 | if( ippiMean_StdDev_C1MR ) |
453 | { |
454 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 ) |
455 | { |
456 | return true; |
457 | } |
458 | } |
459 | typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); |
460 | ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR = |
461 | type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR : |
462 | type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR : |
463 | type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR : |
464 | 0; |
465 | if( ippiMean_StdDev_C3CMR ) |
466 | { |
467 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && |
468 | CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && |
469 | CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) |
470 | { |
471 | return true; |
472 | } |
473 | } |
474 | } |
475 | else |
476 | { |
477 | typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *); |
478 | ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R = |
479 | type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : |
480 | type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : |
481 | #if (IPP_VERSION_X100 >= 810) |
482 | type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 |
483 | #endif |
484 | 0; |
485 | if( ippiMean_StdDev_C1R ) |
486 | { |
487 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 ) |
488 | { |
489 | return true; |
490 | } |
491 | } |
492 | typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); |
493 | ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR = |
494 | type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR : |
495 | type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR : |
496 | type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR : |
497 | 0; |
498 | if( ippiMean_StdDev_C3CR ) |
499 | { |
500 | if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && |
501 | CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && |
502 | CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) |
503 | { |
504 | return true; |
505 | } |
506 | } |
507 | } |
508 | } |
509 | #else |
510 | CV_UNUSED(src); CV_UNUSED(_mean); CV_UNUSED(_sdv); CV_UNUSED(mask); |
511 | #endif |
512 | return false; |
513 | } |
514 | #endif |
515 | |
516 | void meanStdDev(InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask) |
517 | { |
518 | CV_INSTRUMENT_REGION(); |
519 | |
520 | CV_Assert(!_src.empty()); |
521 | CV_Assert( _mask.empty() || _mask.type() == CV_8UC1 ); |
522 | |
523 | CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, |
524 | ocl_meanStdDev(_src, _mean, _sdv, _mask)) |
525 | |
526 | Mat src = _src.getMat(), mask = _mask.getMat(); |
527 | |
528 | CV_Assert(mask.empty() || src.size == mask.size); |
529 | |
530 | CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows), |
531 | openvx_meanStdDev(src, _mean, _sdv, mask)) |
532 | |
533 | CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_meanStdDev(src, _mean, _sdv, mask)); |
534 | |
535 | int k, cn = src.channels(), depth = src.depth(); |
536 | Mat mean_mat, stddev_mat; |
537 | |
538 | if(_mean.needed()) |
539 | { |
540 | if( !_mean.fixedSize() ) |
541 | _mean.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true); |
542 | |
543 | mean_mat = _mean.getMat(); |
544 | int dcn = (int)mean_mat.total(); |
545 | CV_Assert( mean_mat.type() == CV_64F && mean_mat.isContinuous() && |
546 | (mean_mat.cols == 1 || mean_mat.rows == 1) && dcn >= cn ); |
547 | |
548 | double* dptr = mean_mat.ptr<double>(); |
549 | for(k = cn ; k < dcn; k++ ) |
550 | dptr[k] = 0; |
551 | } |
552 | |
553 | if (_sdv.needed()) |
554 | { |
555 | if( !_sdv.fixedSize() ) |
556 | _sdv.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true); |
557 | |
558 | stddev_mat = _sdv.getMat(); |
559 | int dcn = (int)stddev_mat.total(); |
560 | CV_Assert( stddev_mat.type() == CV_64F && stddev_mat.isContinuous() && |
561 | (stddev_mat.cols == 1 || stddev_mat.rows == 1) && dcn >= cn ); |
562 | |
563 | double* dptr = stddev_mat.ptr<double>(); |
564 | for(k = cn ; k < dcn; k++ ) |
565 | dptr[k] = 0; |
566 | |
567 | } |
568 | |
569 | if (src.isContinuous() && mask.isContinuous()) |
570 | { |
571 | CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, 0, (int)src.total(), 1, src.type(), |
572 | _mean.needed() ? mean_mat.ptr<double>() : nullptr, |
573 | _sdv.needed() ? stddev_mat.ptr<double>() : nullptr, |
574 | mask.data, 0); |
575 | } |
576 | else |
577 | { |
578 | if (src.dims <= 2) |
579 | { |
580 | CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, src.step, src.cols, src.rows, src.type(), |
581 | _mean.needed() ? mean_mat.ptr<double>() : nullptr, |
582 | _sdv.needed() ? stddev_mat.ptr<double>() : nullptr, |
583 | mask.data, mask.step); |
584 | } |
585 | } |
586 | |
587 | SumSqrFunc func = getSumSqrFunc(depth); |
588 | |
589 | CV_Assert( func != 0 ); |
590 | |
591 | const Mat* arrays[] = {&src, &mask, 0}; |
592 | uchar* ptrs[2] = {}; |
593 | NAryMatIterator it(arrays, ptrs); |
594 | int total = (int)it.size, blockSize = total, intSumBlockSize = 0; |
595 | int j, count = 0, nz0 = 0; |
596 | AutoBuffer<double> _buf(cn*4); |
597 | double *s = (double*)_buf.data(), *sq = s + cn; |
598 | int *sbuf = (int*)s, *sqbuf = (int*)sq; |
599 | bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S; |
600 | size_t esz = 0; |
601 | |
602 | for( k = 0; k < cn; k++ ) |
603 | s[k] = sq[k] = 0; |
604 | |
605 | if( blockSum ) |
606 | { |
607 | intSumBlockSize = 1 << 15; |
608 | blockSize = std::min(a: blockSize, b: intSumBlockSize); |
609 | sbuf = (int*)(sq + cn); |
610 | if( blockSqSum ) |
611 | sqbuf = sbuf + cn; |
612 | for( k = 0; k < cn; k++ ) |
613 | sbuf[k] = sqbuf[k] = 0; |
614 | esz = src.elemSize(); |
615 | } |
616 | |
617 | for( size_t i = 0; i < it.nplanes; i++, ++it ) |
618 | { |
619 | for( j = 0; j < total; j += blockSize ) |
620 | { |
621 | int bsz = std::min(a: total - j, b: blockSize); |
622 | int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn ); |
623 | count += nz; |
624 | nz0 += nz; |
625 | if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) |
626 | { |
627 | for( k = 0; k < cn; k++ ) |
628 | { |
629 | s[k] += sbuf[k]; |
630 | sbuf[k] = 0; |
631 | } |
632 | if( blockSqSum ) |
633 | { |
634 | for( k = 0; k < cn; k++ ) |
635 | { |
636 | sq[k] += sqbuf[k]; |
637 | sqbuf[k] = 0; |
638 | } |
639 | } |
640 | count = 0; |
641 | } |
642 | ptrs[0] += bsz*esz; |
643 | if( ptrs[1] ) |
644 | ptrs[1] += bsz; |
645 | } |
646 | } |
647 | |
648 | double scale = nz0 ? 1./nz0 : 0.; |
649 | for( k = 0; k < cn; k++ ) |
650 | { |
651 | s[k] *= scale; |
652 | sq[k] = std::sqrt(x: std::max(a: sq[k]*scale - s[k]*s[k], b: 0.)); |
653 | } |
654 | |
655 | if (_mean.needed()) |
656 | { |
657 | const double* sptr = s; |
658 | double* dptr = mean_mat.ptr<double>(); |
659 | for( k = 0; k < cn; k++ ) |
660 | dptr[k] = sptr[k]; |
661 | } |
662 | |
663 | if (_sdv.needed()) |
664 | { |
665 | const double* sptr = sq; |
666 | double* dptr = stddev_mat.ptr<double>(); |
667 | for( k = 0; k < cn; k++ ) |
668 | dptr[k] = sptr[k]; |
669 | } |
670 | } |
671 | |
672 | } // namespace |
673 | |