mean.dispatch.cpp source code [opencv/modules/core/src/mean.dispatch.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html
4
5
6	#include "precomp.hpp"
7	#include "opencl_kernels_core.hpp"
8	#include "opencv2/core/openvx/ovx_defs.hpp"
9	#include "stat.hpp"
10
11	#ifndef OPENCV_IPP_MEAN
12	#undef HAVE_IPP
13	#undef CV_IPP_RUN_FAST
14	#define CV_IPP_RUN_FAST(f, ...)
15	#undef CV_IPP_RUN
16	#define CV_IPP_RUN(c, f, ...)
17	#endif // OPENCV_IPP_MEAN
18
19	#include "mean.simd.hpp"
20	#include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
21
22	#ifndef OPENCV_IPP_MEAN
23	#undef HAVE_IPP
24	#undef CV_IPP_RUN_FAST
25	#define CV_IPP_RUN_FAST(f, ...)
26	#undef CV_IPP_RUN
27	#define CV_IPP_RUN(c, f, ...)
28	#endif // OPENCV_IPP_MEAN
29
30	namespace cv {
31
32	#if defined HAVE_IPP
33	static bool ipp_mean( Mat &src, Mat &mask, Scalar &ret )
34	{
35	CV_INSTRUMENT_REGION_IPP();
36
37	#if IPP_VERSION_X100 >= 700
38	size_t total_size = src.total();
39	int cn = src.channels();
40	if (cn > `4`)
41	return false;
42	int rows = src.size[`0`], cols = rows ? (int)(total_size/rows) : `0`;
43	if( src.dims == `2` \|\| (src.isContinuous() && mask.isContinuous() && cols > `0` && (size_t)rows*cols == total_size) )
44	{
45	IppiSize sz = { cols, rows };
46	int type = src.type();
47	if( !mask.empty() )
48	{
49	typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void , int, const* void , int, IppiSize, Ipp64f );
50	ippiMaskMeanFuncC1 ippiMean_C1MR =
51	type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
52	type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
53	type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
54	`0`;
55	if( ippiMean_C1MR )
56	{
57	Ipp64f res;
58	if( CV_INSTRUMENT_FUN_IPP(ippiMean_C1MR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, &res) >= `0` )
59	{
60	ret = Scalar(res);
61	return true;
62	}
63	}
64	typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void , int, const* void , int, IppiSize, int, Ipp64f );
65	ippiMaskMeanFuncC3 ippiMean_C3MR =
66	type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
67	type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
68	type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
69	`0`;
70	if( ippiMean_C3MR )
71	{
72	Ipp64f res1, res2, res3;
73	if( CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `1`, &res1) >= `0` &&
74	CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `2`, &res2) >= `0` &&
75	CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `3`, &res3) >= `0` )
76	{
77	ret = Scalar(res1, res2, res3);
78	return true;
79	}
80	}
81	}
82	else
83	{
84	typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void, int, IppiSize, double* *, IppHintAlgorithm);
85	typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void, int, IppiSize, double* *);
86	ippiMeanFuncHint ippiMeanHint =
87	type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
88	type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
89	type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
90	`0`;
91	ippiMeanFuncNoHint ippiMean =
92	type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
93	type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
94	type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
95	type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
96	type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
97	type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
98	type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
99	type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
100	type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
101	`0`;
102	// Make sure only zero or one version of the function pointer is valid
103	CV_Assert(!ippiMeanHint \|\| !ippiMean);
104	if( ippiMeanHint \|\| ippiMean )
105	{
106	Ipp64f res[`4`];
107	IppStatus status = ippiMeanHint ? CV_INSTRUMENT_FUN_IPP(ippiMeanHint, src.ptr(), (int)src.step[`0`], sz, res, ippAlgHintAccurate) :
108	CV_INSTRUMENT_FUN_IPP(ippiMean, src.ptr(), (int)src.step[`0`], sz, res);
109	if( status >= `0` )
110	{
111	for( int i = `0`; i < cn; i++ )
112	ret[i] = res[i];
113	return true;
114	}
115	}
116	}
117	}
118	return false;
119	#else
120	return false;
121	#endif
122	}
123	#endif
124
125	Scalar mean(InputArray _src, InputArray _mask)
126	{
127	CV_INSTRUMENT_REGION();
128
129	Mat src = _src.getMat(), mask = _mask.getMat();
130	CV_Assert( mask.empty() \|\| mask.type() == CV_8U );
131
132	int k, cn = src.channels(), depth = src.depth();
133	Scalar s;
134
135	CV_IPP_RUN(IPP_VERSION_X100 >= `700`, ipp_mean(src, mask, s), s)
136
137	SumFunc func = getSumFunc(depth);
138
139	CV_Assert( cn <= `4` && func != `0` );
140
141	const Mat* arrays[] = {&src, &mask, `0`};
142	uchar* ptrs[`2`] = {};
143	NAryMatIterator it(arrays, ptrs);
144	int total = (int)it.size, blockSize = total, intSumBlockSize = `0`;
145	int j, count = `0`;
146	AutoBuffer<int> _buf;
147	int* buf = (int*)&s [`0`];
148	bool blockSum = depth <= CV_16S;
149	size_t esz = `0`, nz0 = `0`;
150
151	if( blockSum )
152	{
153	intSumBlockSize = depth <= CV_8S ? (`1` << `23`) : (`1` << `15`);
154	blockSize = std::min(a: blockSize, b: intSumBlockSize);
155	_buf.allocate(size: cn);
156	buf = _buf.data();
157
158	for( k = `0`; k < cn; k++ )
159	buf[k] = `0`;
160	esz = src.elemSize();
161	}
162
163	for( size_t i = `0`; i < it.nplanes; i++, ++it )
164	{
165	for( j = `0`; j < total; j += blockSize )
166	{
167	int bsz = std::min(a: total - j, b: blockSize);
168	int nz = func( ptrs[`0`], ptrs[`1`], (uchar*)buf, bsz, cn );
169	count += nz;
170	nz0 += nz;
171	if( blockSum && (count + blockSize >= intSumBlockSize \|\| (i+`1` >= it.nplanes && j+bsz >= total)) )
172	{
173	for( k = `0`; k < cn; k++ )
174	{
175	s [k] += buf[k];
176	buf[k] = `0`;
177	}
178	count = `0`;
179	}
180	ptrs[`0`] += bsz*esz;
181	if( ptrs[`1`] )
182	ptrs[`1`] += bsz;
183	}
184	}
185	return s *(nz0 ? `1.`/nz0 : `0`);
186	}
187
188	static SumSqrFunc getSumSqrFunc(int depth)
189	{
190	CV_INSTRUMENT_REGION();
191	CV_CPU_DISPATCH(getSumSqrFunc, (depth),
192	CV_CPU_DISPATCH_MODES_ALL);
193	}
194
195	#ifdef HAVE_OPENCL
196	static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
197	{
198	CV_INSTRUMENT_REGION_OPENCL();
199
200	bool haveMask = _mask.kind() != _InputArray::NONE;
201	int nz = haveMask ? -`1` : (int)_src.total();
202	Scalar mean(`0`), stddev(`0`);
203	const int cn = _src.channels();
204	if (cn > `4`)
205	return false;
206
207	{
208	int type = _src.type(), depth = CV_MAT_DEPTH(type);
209	bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > `0`,
210	isContinuous = _src.isContinuous(),
211	isMaskContinuous = _mask.isContinuous();
212	const ocl::Device &defDev = ocl::Device::getDefault();
213	int groups = defDev.maxComputeUnits();
214	if (defDev.isIntel())
215	{
216	static const int subSliceEUCount = `10`;
217	groups = (groups / subSliceEUCount) * `2`;
218	}
219	size_t wgs = defDev.maxWorkGroupSize();
220
221	int ddepth = std::max(CV_32S, b: depth), sqddepth = std::max(CV_32F, b: depth),
222	dtype = CV_MAKE_TYPE(ddepth, cn),
223	sqdtype = CV_MAKETYPE(sqddepth, cn);
224	CV_Assert(!haveMask \|\| _mask.type() == CV_8UC1);
225
226	int wgs2_aligned = `1`;
227	while (wgs2_aligned < (int)wgs)
228	wgs2_aligned <<= `1`;
229	wgs2_aligned >>= `1`;
230
231	if ( (!doubleSupport && depth == CV_64F) )
232	return false;
233
234	char cvt[`2`][`50`];
235	String opts = format(fmt: "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
236	" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
237	" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
238	ocl::typeToStr(t: type), ocl::typeToStr(t: depth),
239	ocl::typeToStr(t: dtype), ocl::typeToStr(t: ddepth), sqddepth,
240	ocl::typeToStr(t: sqdtype), ocl::typeToStr(t: sqddepth),
241	ocl::convertTypeStr(sdepth: depth, ddepth: sqddepth, cn, buf: cvt[`0`], buf_size: sizeof(cvt[`0`])),
242	cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
243	isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
244	ocl::convertTypeStr(sdepth: depth, ddepth, cn, buf: cvt[`1`], buf_size: sizeof(cvt[`1`])),
245	(int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
246	doubleSupport ? " -D DOUBLE_SUPPORT" : "");
247
248	ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
249	if (k.empty())
250	return false;
251
252	int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : `0`) +
253	CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
254	UMat src = _src.getUMat(), db(`1`, dbsize, CV_8UC1), mask = _mask.getUMat();
255
256	ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src),
257	dbarg = ocl::KernelArg::PtrWriteOnly(m: db),
258	maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask);
259
260	if (haveMask)
261	k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg, kernel_args: maskarg);
262	else
263	k.args(kernel_args: srcarg, kernel_args: src.cols, kernel_args: (int)src.total(), kernel_args: groups, kernel_args: dbarg);
264
265	size_t globalsize = groups * wgs;
266
267	if(!k.run(dims: `1`, globalsize: &globalsize, localsize: &wgs, sync: false))
268	return false;
269
270	typedef Scalar (* part_sum)(Mat m);
271	part_sum funcs[`3`] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
272	Mat dbm = db.getMat(flags: ACCESS_READ);
273
274	mean = funcs[ddepth - CV_32S](Mat (`1`, groups, dtype, dbm.ptr()));
275	stddev = funcs[sqddepth - CV_32S](Mat (`1`, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype)));
276
277	if (haveMask)
278	nz = saturate_cast<int>(v: funcs[`0`](Mat (`1`, groups, CV_32SC1, dbm.ptr() +
279	groups * (CV_ELEM_SIZE(dtype) +
280	CV_ELEM_SIZE(sqdtype))))[`0`]);
281	}
282
283	double total = nz != `0` ? `1.0` / nz : `0`;
284	int k, j;
285	for (int i = `0`; i < cn; ++i)
286	{
287	mean [i] *= total;
288	stddev [i] = std::sqrt(x: std::max(a: stddev [i] * total - mean [i] * mean [i] , b: `0.`));
289	}
290
291	for( j = `0`; j < `2`; j++ )
292	{
293	const double * const sptr = j == `0` ? &mean [`0`] : &stddev [`0`];
294	_OutputArray _dst = j == `0` ? _mean : _sdv;
295	if( !_dst.needed() )
296	continue;
297
298	if( !_dst.fixedSize() )
299	_dst.create(rows: cn, cols: `1`, CV_64F, i: -`1`, allowTransposed: true);
300	Mat dst = _dst.getMat();
301	int dcn = (int)dst.total();
302	CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
303	(dst.cols == `1` \|\| dst.rows == `1`) && dcn >= cn );
304	double* dptr = dst.ptr<double>();
305	for( k = `0`; k < cn; k++ )
306	dptr[k] = sptr[k];
307	for( ; k < dcn; k++ )
308	dptr[k] = `0`;
309	}
310
311	return true;
312	}
313	#endif
314
315	#ifdef HAVE_OPENVX
316	static bool openvx_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
317	{
318	size_t total_size = src.total();
319	int rows = src.size[`0`], cols = rows ? (int)(total_size / rows) : `0`;
320	if (src.type() != CV_8UC1\|\| !mask.empty() \|\|
321	(src.dims != `2` && !(src.isContinuous() && cols > `0` && (size_t)rows*cols == total_size))
322	)
323	return false;
324
325	try
326	{
327	ivx::Context ctx = ovx::getOpenVXContext();
328	#ifndef VX_VERSION_1_1
329	if (ctx.vendorID() == VX_ID_KHRONOS)
330	return false; // Do not use OpenVX meanStdDev estimation for sample 1.0.1 implementation due to lack of accuracy
331	#endif
332
333	ivx::Image
334	ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
335	ivx::Image::createAddressing(cols, rows, `1`, (vx_int32)(src.step[`0`])), src.ptr());
336
337	vx_float32 mean_temp, stddev_temp;
338	ivx::IVX_CHECK_STATUS(vxuMeanStdDev(ctx, ia, &mean_temp, &stddev_temp));
339
340	if (_mean.needed())
341	{
342	if (!_mean.fixedSize())
343	_mean.create(`1`, `1`, CV_64F, -`1`, true);
344	Mat mean = _mean.getMat();
345	CV_Assert(mean.type() == CV_64F && mean.isContinuous() &&
346	(mean.cols == `1` \|\| mean.rows == `1`) && mean.total() >= `1`);
347	double pmean = mean.ptr<double*>();
348	pmean[`0`] = mean_temp;
349	for (int c = `1`; c < (int)mean.total(); c++)
350	pmean[c] = `0`;
351	}
352
353	if (_sdv.needed())
354	{
355	if (!_sdv.fixedSize())
356	_sdv.create(`1`, `1`, CV_64F, -`1`, true);
357	Mat stddev = _sdv.getMat();
358	CV_Assert(stddev.type() == CV_64F && stddev.isContinuous() &&
359	(stddev.cols == `1` \|\| stddev.rows == `1`) && stddev.total() >= `1`);
360	double pstddev = stddev.ptr<double*>();
361	pstddev[`0`] = stddev_temp;
362	for (int c = `1`; c < (int)stddev.total(); c++)
363	pstddev[c] = `0`;
364	}
365	}
366	catch (const ivx::RuntimeError & e)
367	{
368	VX_DbgThrow(e.what());
369	}
370	catch (const ivx::WrapperError & e)
371	{
372	VX_DbgThrow(e.what());
373	}
374
375	return true;
376	}
377	#endif
378
379	#ifdef HAVE_IPP
380	static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
381	{
382	CV_INSTRUMENT_REGION_IPP();
383
384	#if IPP_VERSION_X100 >= 700
385	int cn = src.channels();
386
387	#if IPP_VERSION_X100 < 201801
388	// IPP_DISABLE: C3C functions can read outside of allocated memory
389	if (cn > `1`)
390	return false;
391	#endif
392	#if IPP_VERSION_X100 >= 201900 && IPP_VERSION_X100 < 201901
393	// IPP_DISABLE: 32f C3C functions can read outside of allocated memory
394	if (cn > `1` && src.depth() == CV_32F)
395	return false;
396
397	// SSE4.2 buffer overrun
398	#if defined(_WIN32) && !defined(_WIN64)
399	// IPPICV doesn't have AVX2 in 32-bit builds
400	// However cv::ipp::getIppTopFeatures() may return AVX2 value on AVX2 capable H/W
401	// details #12959
402	#else
403	if (cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) // Linux x64 + OPENCV_IPP=SSE42 is affected too
404	#endif
405	{
406	if (src.depth() == CV_32F && src.dims > `1` && src.size[src.dims - `1`] == `6`)
407	return false;
408	}
409	#endif
410
411	size_t total_size = src.total();
412	int rows = src.size[`0`], cols = rows ? (int)(total_size/rows) : `0`;
413	if( src.dims == `2` \|\| (src.isContinuous() && mask.isContinuous() && cols > `0` && (size_t)rows*cols == total_size) )
414	{
415	Ipp64f mean_temp[`3`];
416	Ipp64f stddev_temp[`3`];
417	Ipp64f *pmean = &mean_temp[`0`];
418	Ipp64f *pstddev = &stddev_temp[`0`];
419	Mat mean, stddev;
420	int dcn_mean = -`1`;
421	if( _mean.needed() )
422	{
423	if( !_mean.fixedSize() )
424	_mean.create(cn, `1`, CV_64F, -`1`, true);
425	mean = _mean.getMat();
426	dcn_mean = (int)mean.total();
427	pmean = mean.ptr<Ipp64f>();
428	}
429	int dcn_stddev = -`1`;
430	if( _sdv.needed() )
431	{
432	if( !_sdv.fixedSize() )
433	_sdv.create(cn, `1`, CV_64F, -`1`, true);
434	stddev = _sdv.getMat();
435	dcn_stddev = (int)stddev.total();
436	pstddev = stddev.ptr<Ipp64f>();
437	}
438	for( int c = cn; c < dcn_mean; c++ )
439	pmean[c] = `0`;
440	for( int c = cn; c < dcn_stddev; c++ )
441	pstddev[c] = `0`;
442	IppiSize sz = { cols, rows };
443	int type = src.type();
444	if( !mask.empty() )
445	{
446	typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void , int, const* void , int, IppiSize, Ipp64f , Ipp64f *);
447	ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
448	type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
449	type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
450	type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
451	`0`;
452	if( ippiMean_StdDev_C1MR )
453	{
454	if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, pmean, pstddev) >= `0` )
455	{
456	return true;
457	}
458	}
459	typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void , int, const* void , int, IppiSize, int, Ipp64f , Ipp64f *);
460	ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
461	type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
462	type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
463	type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
464	`0`;
465	if( ippiMean_StdDev_C3CMR )
466	{
467	if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `1`, &pmean[`0`], &pstddev[`0`]) >= `0` &&
468	CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `2`, &pmean[`1`], &pstddev[`1`]) >= `0` &&
469	CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[`0`], mask.ptr(), (int)mask.step[`0`], sz, `3`, &pmean[`2`], &pstddev[`2`]) >= `0` )
470	{
471	return true;
472	}
473	}
474	}
475	else
476	{
477	typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void , int, IppiSize, Ipp64f , Ipp64f *);
478	ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
479	type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
480	type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
481	#if (IPP_VERSION_X100 >= 810)
482	type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
483	#endif
484	`0`;
485	if( ippiMean_StdDev_C1R )
486	{
487	if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[`0`], sz, pmean, pstddev) >= `0` )
488	{
489	return true;
490	}
491	}
492	typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void , int, IppiSize, int, Ipp64f , Ipp64f *);
493	ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
494	type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
495	type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
496	type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
497	`0`;
498	if( ippiMean_StdDev_C3CR )
499	{
500	if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[`0`], sz, `1`, &pmean[`0`], &pstddev[`0`]) >= `0` &&
501	CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[`0`], sz, `2`, &pmean[`1`], &pstddev[`1`]) >= `0` &&
502	CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[`0`], sz, `3`, &pmean[`2`], &pstddev[`2`]) >= `0` )
503	{
504	return true;
505	}
506	}
507	}
508	}
509	#else
510	CV_UNUSED(src); CV_UNUSED(_mean); CV_UNUSED(_sdv); CV_UNUSED(mask);
511	#endif
512	return false;
513	}
514	#endif
515
516	void meanStdDev(InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask)
517	{
518	CV_INSTRUMENT_REGION();
519
520	CV_Assert(!_src.empty());
521	CV_Assert( _mask.empty() \|\| _mask.type() == CV_8UC1 );
522
523	CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= `2`,
524	ocl_meanStdDev(_src, _mean, _sdv, _mask))
525
526	Mat src = _src.getMat(), mask = _mask.getMat();
527
528	CV_Assert(mask.empty() \|\| src.size == mask.size);
529
530	CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
531	openvx_meanStdDev(src, _mean, _sdv, mask))
532
533	CV_IPP_RUN(IPP_VERSION_X100 >= `700`, ipp_meanStdDev(src, _mean, _sdv, mask));
534
535	int k, cn = src.channels(), depth = src.depth();
536	Mat mean_mat, stddev_mat;
537
538	if(_mean.needed())
539	{
540	if( !_mean.fixedSize() )
541	_mean.create(rows: cn, cols: `1`, CV_64F, i: -`1`, allowTransposed: true);
542
543	mean_mat = _mean.getMat();
544	int dcn = (int)mean_mat.total();
545	CV_Assert( mean_mat.type() == CV_64F && mean_mat.isContinuous() &&
546	(mean_mat.cols == `1` \|\| mean_mat.rows == `1`) && dcn >= cn );
547
548	double* dptr = mean_mat.ptr<double>();
549	for(k = cn ; k < dcn; k++ )
550	dptr[k] = `0`;
551	}
552
553	if (_sdv.needed())
554	{
555	if( !_sdv.fixedSize() )
556	_sdv.create(rows: cn, cols: `1`, CV_64F, i: -`1`, allowTransposed: true);
557
558	stddev_mat = _sdv.getMat();
559	int dcn = (int)stddev_mat.total();
560	CV_Assert( stddev_mat.type() == CV_64F && stddev_mat.isContinuous() &&
561	(stddev_mat.cols == `1` \|\| stddev_mat.rows == `1`) && dcn >= cn );
562
563	double* dptr = stddev_mat.ptr<double>();
564	for(k = cn ; k < dcn; k++ )
565	dptr[k] = `0`;
566
567	}
568
569	if (src.isContinuous() && mask.isContinuous())
570	{
571	CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, `0`, (int)src.total(), `1`, src.type(),
572	_mean.needed() ? mean_mat.ptr<double>() : nullptr,
573	_sdv.needed() ? stddev_mat.ptr<double>() : nullptr,
574	mask.data, `0`);
575	}
576	else
577	{
578	if (src.dims <= `2`)
579	{
580	CALL_HAL(meanStdDev, cv_hal_meanStdDev, src.data, src.step, src.cols, src.rows, src.type(),
581	_mean.needed() ? mean_mat.ptr<double>() : nullptr,
582	_sdv.needed() ? stddev_mat.ptr<double>() : nullptr,
583	mask.data, mask.step);
584	}
585	}
586
587	SumSqrFunc func = getSumSqrFunc(depth);
588
589	CV_Assert( func != `0` );
590
591	const Mat* arrays[] = {&src, &mask, `0`};
592	uchar* ptrs[`2`] = {};
593	NAryMatIterator it(arrays, ptrs);
594	int total = (int)it.size, blockSize = total, intSumBlockSize = `0`;
595	int j, count = `0`, nz0 = `0`;
596	AutoBuffer<double> _buf(cn*`4`);
597	double s = (double*)_buf.data(), sq = s + cn;
598	int sbuf = (int*)s, sqbuf = (int*)sq;
599	bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
600	size_t esz = `0`;
601
602	for( k = `0`; k < cn; k++ )
603	s[k] = sq[k] = `0`;
604
605	if( blockSum )
606	{
607	intSumBlockSize = `1` << `15`;
608	blockSize = std::min(a: blockSize, b: intSumBlockSize);
609	sbuf = (int*)(sq + cn);
610	if( blockSqSum )
611	sqbuf = sbuf + cn;
612	for( k = `0`; k < cn; k++ )
613	sbuf[k] = sqbuf[k] = `0`;
614	esz = src.elemSize();
615	}
616
617	for( size_t i = `0`; i < it.nplanes; i++, ++it )
618	{
619	for( j = `0`; j < total; j += blockSize )
620	{
621	int bsz = std::min(a: total - j, b: blockSize);
622	int nz = func( ptrs[`0`], ptrs[`1`], (uchar)sbuf, (uchar)sqbuf, bsz, cn );
623	count += nz;
624	nz0 += nz;
625	if( blockSum && (count + blockSize >= intSumBlockSize \|\| (i+`1` >= it.nplanes && j+bsz >= total)) )
626	{
627	for( k = `0`; k < cn; k++ )
628	{
629	s[k] += sbuf[k];
630	sbuf[k] = `0`;
631	}
632	if( blockSqSum )
633	{
634	for( k = `0`; k < cn; k++ )
635	{
636	sq[k] += sqbuf[k];
637	sqbuf[k] = `0`;
638	}
639	}
640	count = `0`;
641	}
642	ptrs[`0`] += bsz*esz;
643	if( ptrs[`1`] )
644	ptrs[`1`] += bsz;
645	}
646	}
647
648	double scale = nz0 ? `1.`/nz0 : `0.`;
649	for( k = `0`; k < cn; k++ )
650	{
651	s[k] *= scale;
652	sq[k] = std::sqrt(x: std::max(a: sq[k]scale - s[k]s[k], b: `0.`));
653	}
654
655	if (_mean.needed())
656	{
657	const double* sptr = s;
658	double* dptr = mean_mat.ptr<double>();
659	for( k = `0`; k < cn; k++ )
660	dptr[k] = sptr[k];
661	}
662
663	if (_sdv.needed())
664	{
665	const double* sptr = sq;
666	double* dptr = stddev_mat.ptr<double>();
667	for( k = `0`; k < cn; k++ )
668	dptr[k] = sptr[k];
669	}
670	}
671
672	} // namespace
673

Provided by KDAB

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

Definitions

source code of opencv/modules/core/src/mean.dispatch.cpp