1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html
4
5
6#include "precomp.hpp"
7#include "opencl_kernels_core.hpp"
8#include "opencv2/core/openvx/ovx_defs.hpp"
9#include "stat.hpp"
10
11#ifndef OPENCV_IPP_MEAN
12#undef HAVE_IPP
13#undef CV_IPP_RUN_FAST
14#define CV_IPP_RUN_FAST(f, ...)
15#undef CV_IPP_RUN
16#define CV_IPP_RUN(c, f, ...)
17#endif // OPENCV_IPP_MEAN
18
19#include "mean.simd.hpp"
20#include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
21
22#ifndef OPENCV_IPP_MEAN
23#undef HAVE_IPP
24#undef CV_IPP_RUN_FAST
25#define CV_IPP_RUN_FAST(f, ...)
26#undef CV_IPP_RUN
27#define CV_IPP_RUN(c, f, ...)
28#endif // OPENCV_IPP_MEAN
29
30namespace cv {
31
32#if defined HAVE_IPP
33static bool ipp_mean( Mat &src, Mat &mask, Scalar &ret )
34{
35 CV_INSTRUMENT_REGION_IPP();
36
37#if IPP_VERSION_X100 >= 700
38 size_t total_size = src.total();
39 int cn = src.channels();
40 if (cn > 4)
41 return false;
42 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
43 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
44 {
45 IppiSize sz = { cols, rows };
46 int type = src.type();
47 if( !mask.empty() )
48 {
49 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
50 ippiMaskMeanFuncC1 ippiMean_C1MR =
51 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
52 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
53 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
54 0;
55 if( ippiMean_C1MR )
56 {
57 Ipp64f res;
58 if( CV_INSTRUMENT_FUN_IPP(ippiMean_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
59 {
60 ret = Scalar(res);
61 return true;
62 }
63 }
64 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
65 ippiMaskMeanFuncC3 ippiMean_C3MR =
66 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
67 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
68 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
69 0;
70 if( ippiMean_C3MR )
71 {
72 Ipp64f res1, res2, res3;
73 if( CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
74 CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
75 CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
76 {
77 ret = Scalar(res1, res2, res3);
78 return true;
79 }
80 }
81 }
82 else
83 {
84 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
85 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
86 ippiMeanFuncHint ippiMeanHint =
87 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
88 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
89 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
90 0;
91 ippiMeanFuncNoHint ippiMean =
92 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
93 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
94 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
95 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
96 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
97 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
98 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
99 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
100 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
101 0;
102 // Make sure only zero or one version of the function pointer is valid
103 CV_Assert(!ippiMeanHint || !ippiMean);
104 if( ippiMeanHint || ippiMean )
105 {
106 Ipp64f res[4];
107 IppStatus status = ippiMeanHint ? CV_INSTRUMENT_FUN_IPP(ippiMeanHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
108 CV_INSTRUMENT_FUN_IPP(ippiMean, src.ptr(), (int)src.step[0], sz, res);
109 if( status >= 0 )
110 {
111 for( int i = 0; i < cn; i++ )
112 ret[i] = res[i];
113 return true;
114 }
115 }
116 }
117 }
118 return false;
119#else
120 return false;
121#endif
122}
123#endif
124
125Scalar mean(InputArray _src, InputArray _mask)
126{
127 CV_INSTRUMENT_REGION();
128
129 Mat src = _src.getMat(), mask = _mask.getMat();
130 CV_Assert( mask.empty() || mask.type() == CV_8U );
131
132 int k, cn = src.channels(), depth = src.depth();
133 Scalar s;
134
135 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_mean(src, mask, s), s)
136
137 SumFunc func = getSumFunc(depth);
138
139 CV_Assert( cn <= 4 && func != 0 );
140
141 const Mat* arrays[] = {&src, &mask, 0};
142 uchar* ptrs[2] = {};
143 NAryMatIterator it(arrays, ptrs);
144 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
145 int j, count = 0;
146 AutoBuffer<int> _buf;
147 int* buf = (int*)&s[0];
148 bool blockSum = depth <= CV_16S;
149 size_t esz = 0, nz0 = 0;
150
151 if( blockSum )
152 {
153 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
154 blockSize = std::min(a: blockSize, b: intSumBlockSize);
155 _buf.allocate(size: cn);
156 buf = _buf.data();
157
158 for( k = 0; k < cn; k++ )
159 buf[k] = 0;
160 esz = src.elemSize();
161 }
162
163 for( size_t i = 0; i < it.nplanes; i++, ++it )
164 {
165 for( j = 0; j < total; j += blockSize )
166 {
167 int bsz = std::min(a: total - j, b: blockSize);
168 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
169 count += nz;
170 nz0 += nz;
171 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
172 {
173 for( k = 0; k < cn; k++ )
174 {
175 s[k] += buf[k];
176 buf[k] = 0;
177 }
178 count = 0;
179 }
180 ptrs[0] += bsz*esz;
181 if( ptrs[1] )
182 ptrs[1] += bsz;
183 }
184 }
185 return s*(nz0 ? 1./nz0 : 0);
186}
187
188static SumSqrFunc getSumSqrFunc(int depth)
189{
190 CV_INSTRUMENT_REGION();
191 CV_CPU_DISPATCH(getSumSqrFunc, (depth),
192 CV_CPU_DISPATCH_MODES_ALL);
193}
194
195#ifdef HAVE_OPENCL
196static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
197{
198 CV_INSTRUMENT_REGION_OPENCL();
199
200 bool haveMask = _mask.kind() != _InputArray::NONE;
201 int nz = haveMask ? -1 : (int)_src.total();
202 Scalar mean(0), stddev(0);
203 const int cn = _src.channels();
204 if (cn > 4)
205 return false;
206
207 {
208 int type = _src.type(), depth = CV_MAT_DEPTH(type);
209 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
210 isContinuous = _src.isContinuous(),
211 isMaskContinuous = _mask.isContinuous();
212 const ocl::Device &defDev = ocl::Device::getDefault();
213 int groups = defDev.maxComputeUnits();
214 if (defDev.isIntel())
215 {
216 static const int subSliceEUCount = 10;
217 groups = (groups / subSliceEUCount) * 2;
218 }
219 size_t wgs = defDev.maxWorkGroupSize();
220
221 int ddepth = std::max(CV_32S, b: depth), sqddepth = std::max(CV_32F, b: depth),
222 dtype = CV_MAKE_TYPE(ddepth, cn),
223 sqdtype = CV_MAKETYPE(sqddepth, cn);
224 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
225
226 int wgs2_aligned = 1;
227 while (wgs2_aligned < (int)wgs)
228 wgs2_aligned <<= 1;
229 wgs2_aligned >>= 1;
230
231 if ( (!doubleSupport && depth == CV_64F) )
232 return false;
233
234 char cvt[2][50];
235 String opts = format(fmt: "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
236 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
237 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
238 ocl::typeToStr(t: type), ocl::typeToStr(t: depth),
239 ocl::typeToStr(t: dtype), ocl::typeToStr(t: ddepth), sqddepth,
240 ocl::typeToStr(t: sqdtype), ocl::typeToStr(t: sqddepth),
241 ocl::convertTypeStr(sdepth: depth, ddepth: sqddepth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])),
242 cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
243 isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
244 ocl::convertTypeStr(sdepth: depth, ddepth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])),
245 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
246 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
247
248 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
249 if (k.empty())
250 return false;
251
252 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
253 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
254 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
255
256 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src),
257 dbarg = ocl::KernelArg::PtrWriteOnly(m: db),
258 maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask);
259
260 if (haveMask)
261 k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg, kernel_args: maskarg);
262 else
263 k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg);
264
265 size_t globalsize = groups * wgs;
266
267 if(!k.run(dims: 1, globalsize: &globalsize, localsize: &wgs, sync: false))
268 return false;
269
270 typedef Scalar (* part_sum)(Mat m);
271 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
272 Mat dbm = db.getMat(flags: ACCESS_READ);
273
274 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr()));
275 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype)));
276
277 if (haveMask)
278 nz = saturate_cast<int>(v: funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() +
279 groups * (CV_ELEM_SIZE(dtype) +
280 CV_ELEM_SIZE(sqdtype))))[0]);
281 }
282
283 double total = nz != 0 ? 1.0 / nz : 0;
284 int k, j;
285 for (int i = 0; i < cn; ++i)
286 {
287 mean[i] *= total;
288 stddev[i] = std::sqrt(x: std::max(a: stddev[i] * total - mean[i] * mean[i] , b: 0.));
289 }
290
291 for( j = 0; j < 2; j++ )
292 {
293 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
294 _OutputArray _dst = j == 0 ? _mean : _sdv;
295 if( !_dst.needed() )
296 continue;
297
298 if( !_dst.fixedSize() )
299 _dst.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true);
300 Mat dst = _dst.getMat();
301 int dcn = (int)dst.total();
302 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
303 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
304 double* dptr = dst.ptr<double>();
305 for( k = 0; k < cn; k++ )
306 dptr[k] = sptr[k];
307 for( ; k < dcn; k++ )
308 dptr[k] = 0;
309 }
310
311 return true;
312}
313#endif
314
315#ifdef HAVE_OPENVX
316 static bool openvx_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
317 {
318 size_t total_size = src.total();
319 int rows = src.size[0], cols = rows ? (int)(total_size / rows) : 0;
320 if (src.type() != CV_8UC1|| !mask.empty() ||
321 (src.dims != 2 && !(src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size))
322 )
323 return false;
324
325 try
326 {
327 ivx::Context ctx = ovx::getOpenVXContext();
328#ifndef VX_VERSION_1_1
329 if (ctx.vendorID() == VX_ID_KHRONOS)
330 return false; // Do not use OpenVX meanStdDev estimation for sample 1.0.1 implementation due to lack of accuracy
331#endif
332
333 ivx::Image
334 ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
335 ivx::Image::createAddressing(cols, rows, 1, (vx_int32)(src.step[0])), src.ptr());
336
337 vx_float32 mean_temp, stddev_temp;
338 ivx::IVX_CHECK_STATUS(vxuMeanStdDev(ctx, ia, &mean_temp, &stddev_temp));
339
340 if (_mean.needed())
341 {
342 if (!_mean.fixedSize())
343 _mean.create(1, 1, CV_64F, -1, true);
344 Mat mean = _mean.getMat();
345 CV_Assert(mean.type() == CV_64F && mean.isContinuous() &&
346 (mean.cols == 1 || mean.rows == 1) && mean.total() >= 1);
347 double *pmean = mean.ptr<double>();
348 pmean[0] = mean_temp;
349 for (int c = 1; c < (int)mean.total(); c++)
350 pmean[c] = 0;
351 }
352
353 if (_sdv.needed())
354 {
355 if (!_sdv.fixedSize())
356 _sdv.create(1, 1, CV_64F, -1, true);
357 Mat stddev = _sdv.getMat();
358 CV_Assert(stddev.type() == CV_64F && stddev.isContinuous() &&
359 (stddev.cols == 1 || stddev.rows == 1) && stddev.total() >= 1);
360 double *pstddev = stddev.ptr<double>();
361 pstddev[0] = stddev_temp;
362 for (int c = 1; c < (int)stddev.total(); c++)
363 pstddev[c] = 0;
364 }
365 }
366 catch (const ivx::RuntimeError & e)
367 {
368 VX_DbgThrow(e.what());
369 }
370 catch (const ivx::WrapperError & e)
371 {
372 VX_DbgThrow(e.what());
373 }
374
375 return true;
376 }
377#endif
378
379#ifdef HAVE_IPP
380static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
381{
382 CV_INSTRUMENT_REGION_IPP();
383
384#if IPP_VERSION_X100 >= 700
385 int cn = src.channels();
386
387#if IPP_VERSION_X100 < 201801
388 // IPP_DISABLE: C3C functions can read outside of allocated memory
389 if (cn > 1)
390 return false;
391#endif
392#if IPP_VERSION_X100 >= 201900 && IPP_VERSION_X100 < 201901
393 // IPP_DISABLE: 32f C3C functions can read outside of allocated memory
394 if (cn > 1 && src.depth() == CV_32F)
395 return false;
396
397 // SSE4.2 buffer overrun
398#if defined(_WIN32) && !defined(_WIN64)
399 // IPPICV doesn't have AVX2 in 32-bit builds
400 // However cv::ipp::getIppTopFeatures() may return AVX2 value on AVX2 capable H/W
401 // details #12959
402#else
403 if (cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) // Linux x64 + OPENCV_IPP=SSE42 is affected too
404#endif
405 {
406 if (src.depth() == CV_32F && src.dims > 1 && src.size[src.dims - 1] == 6)
407 return false;
408 }
409#endif
410
411 size_t total_size = src.total();
412 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
413 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
414 {
415 Ipp64f mean_temp[3];
416 Ipp64f stddev_temp[3];
417 Ipp64f *pmean = &mean_temp[0];
418 Ipp64f *pstddev = &stddev_temp[0];
419 Mat mean, stddev;
420 int dcn_mean = -1;
421 if( _mean.needed() )
422 {
423 if( !_mean.fixedSize() )
424 _mean.create(cn, 1, CV_64F, -1, true);
425 mean = _mean.getMat();
426 dcn_mean = (int)mean.total();
427 pmean = mean.ptr<Ipp64f>();
428 }
429 int dcn_stddev = -1;
430 if( _sdv.needed() )
431 {
432 if( !_sdv.fixedSize() )
433 _sdv.create(cn, 1, CV_64F, -1, true);
434 stddev = _sdv.getMat();
435 dcn_stddev = (int)stddev.total();
436 pstddev = stddev.ptr<Ipp64f>();
437 }
438 for( int c = cn; c < dcn_mean; c++ )
439 pmean[c] = 0;
440 for( int c = cn; c < dcn_stddev; c++ )
441 pstddev[c] = 0;
442 IppiSize sz = { cols, rows };
443 int type = src.type();
444 if( !mask.empty() )
445 {
446 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
447 ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
448 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
449 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
450 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
451 0;
452 if( ippiMean_StdDev_C1MR )
453 {
454 if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
455 {
456 return true;
457 }
458 }
459 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
460 ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
461 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
462 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
463 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
464 0;
465 if( ippiMean_StdDev_C3CMR )
466 {
467 if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
468 CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
469 CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
470 {
471 return true;
472 }
473 }
474 }
475 else
476 {
477 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
478 ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
479 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
480 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
481#if (IPP_VERSION_X100 >= 810)
482 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
483#endif
484 0;
485 if( ippiMean_StdDev_C1R )
486 {
487 if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
488 {
489 return true;
490 }
491 }
492 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
493 ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
494 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
495 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
496 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
497 0;
498 if( ippiMean_StdDev_C3CR )
499 {
500 if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
501 CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
502 CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
503 {
504 return true;
505 }
506 }
507 }
508 }
509#else
510 CV_UNUSED(src); CV_UNUSED(_mean); CV_UNUSED(_sdv); CV_UNUSED(mask);
511#endif
512 return false;
513}
514#endif
515
516void meanStdDev(InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask)
517{
518 CV_INSTRUMENT_REGION();
519
520 CV_Assert(!_src.empty());
521 CV_Assert( _mask.empty() || _mask.type() == CV_8UC1 );
522
523 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
524 ocl_meanStdDev(_src, _mean, _sdv, _mask))
525
526 Mat src = _src.getMat(), mask = _mask.getMat();
527
528 CV_Assert(mask.empty() || src.size == mask.size);
529
530 CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
531 openvx_meanStdDev(src, _mean, _sdv, mask))
532
533 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_meanStdDev(src, _mean, _sdv, mask));
534
535 int k, cn = src.channels(), depth = src.depth();
536 Mat mean_mat, stddev_mat;
537
538 if(_mean.needed())
539 {
540 if( !_mean.fixedSize() )
541 _mean.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true);
542
543 mean_mat = _mean.getMat();
544 int dcn = (int)mean_mat.total();
545 CV_Assert( mean_mat.type() == CV_64F && mean_mat.isContinuous() &&
546 (mean_mat.cols == 1 || mean_mat.rows == 1) && dcn >= cn );
547
548 double* dptr = mean_mat.ptr<double>();
549 for(k = cn ; k < dcn; k++ )
550 dptr[k] = 0;
551 }
552
553 if (_sdv.needed())
554 {
555 if( !_sdv.fixedSize() )
556 _sdv.create(rows: cn, cols: 1, CV_64F, i: -1, allowTransposed: true);
557
558 stddev_mat = _sdv.getMat();
559 int dcn = (int)stddev_mat.total();
560 CV_Assert( stddev_mat.type() == CV_64F && stddev_mat.isContinuous() &&
561 (stddev_mat.cols == 1 || stddev_mat.rows == 1) && dcn >= cn );
562
563 double* dptr = stddev_mat.ptr<double>();
564 for(k = cn ; k < dcn; k++ )
565 dptr[k] = 0;
566
567 }
568
569 if (src.isContinuous() && mask.isContinuous())
570 {
571 CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, 0, (int)src.total(), 1, src.type(),
572 _mean.needed() ? mean_mat.ptr<double>() : nullptr,
573 _sdv.needed() ? stddev_mat.ptr<double>() : nullptr,
574 mask.data, 0);
575 }
576 else
577 {
578 if (src.dims <= 2)
579 {
580 CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, src.step, src.cols, src.rows, src.type(),
581 _mean.needed() ? mean_mat.ptr<double>() : nullptr,
582 _sdv.needed() ? stddev_mat.ptr<double>() : nullptr,
583 mask.data, mask.step);
584 }
585 }
586
587 SumSqrFunc func = getSumSqrFunc(depth);
588
589 CV_Assert( func != 0 );
590
591 const Mat* arrays[] = {&src, &mask, 0};
592 uchar* ptrs[2] = {};
593 NAryMatIterator it(arrays, ptrs);
594 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
595 int j, count = 0, nz0 = 0;
596 AutoBuffer<double> _buf(cn*4);
597 double *s = (double*)_buf.data(), *sq = s + cn;
598 int *sbuf = (int*)s, *sqbuf = (int*)sq;
599 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
600 size_t esz = 0;
601
602 for( k = 0; k < cn; k++ )
603 s[k] = sq[k] = 0;
604
605 if( blockSum )
606 {
607 intSumBlockSize = 1 << 15;
608 blockSize = std::min(a: blockSize, b: intSumBlockSize);
609 sbuf = (int*)(sq + cn);
610 if( blockSqSum )
611 sqbuf = sbuf + cn;
612 for( k = 0; k < cn; k++ )
613 sbuf[k] = sqbuf[k] = 0;
614 esz = src.elemSize();
615 }
616
617 for( size_t i = 0; i < it.nplanes; i++, ++it )
618 {
619 for( j = 0; j < total; j += blockSize )
620 {
621 int bsz = std::min(a: total - j, b: blockSize);
622 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
623 count += nz;
624 nz0 += nz;
625 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
626 {
627 for( k = 0; k < cn; k++ )
628 {
629 s[k] += sbuf[k];
630 sbuf[k] = 0;
631 }
632 if( blockSqSum )
633 {
634 for( k = 0; k < cn; k++ )
635 {
636 sq[k] += sqbuf[k];
637 sqbuf[k] = 0;
638 }
639 }
640 count = 0;
641 }
642 ptrs[0] += bsz*esz;
643 if( ptrs[1] )
644 ptrs[1] += bsz;
645 }
646 }
647
648 double scale = nz0 ? 1./nz0 : 0.;
649 for( k = 0; k < cn; k++ )
650 {
651 s[k] *= scale;
652 sq[k] = std::sqrt(x: std::max(a: sq[k]*scale - s[k]*s[k], b: 0.));
653 }
654
655 if (_mean.needed())
656 {
657 const double* sptr = s;
658 double* dptr = mean_mat.ptr<double>();
659 for( k = 0; k < cn; k++ )
660 dptr[k] = sptr[k];
661 }
662
663 if (_sdv.needed())
664 {
665 const double* sptr = sq;
666 double* dptr = stddev_mat.ptr<double>();
667 for( k = 0; k < cn; k++ )
668 dptr[k] = sptr[k];
669 }
670}
671
672} // namespace
673

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of opencv/modules/core/src/mean.dispatch.cpp