count_non_zero.dispatch.cpp source code [opencv/modules/core/src/count_non_zero.dispatch.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html
4
5
6	#include "precomp.hpp"
7	#include "opencl_kernels_core.hpp"
8	#include "stat.hpp"
9
10	#include "count_non_zero.simd.hpp"
11	#include "count_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
12
13	namespace cv {
14
15	static CountNonZeroFunc getCountNonZeroTab(int depth)
16	{
17	CV_INSTRUMENT_REGION();
18	CV_CPU_DISPATCH(getCountNonZeroTab, (depth),
19	CV_CPU_DISPATCH_MODES_ALL);
20	}
21
22	#ifdef HAVE_OPENCL
23	static bool ocl_countNonZero( InputArray _src, int & res )
24	{
25	int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(src1: _src);
26	bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > `0`;
27
28	if (depth == CV_64F && !doubleSupport)
29	return false;
30
31	int dbsize = ocl::Device::getDefault().maxComputeUnits();
32	size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
33
34	int wgs2_aligned = `1`;
35	while (wgs2_aligned < (int)wgs)
36	wgs2_aligned <<= `1`;
37	wgs2_aligned >>= `1`;
38
39	ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
40	format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
41	" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
42	ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
43	ocl::typeToStr(depth), (int)wgs, kercn,
44	wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
45	_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
46	if (k.empty())
47	return false;
48
49	UMat src = _src.getUMat(), db(`1`, dbsize, CV_32SC1);
50	k.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: src), kernel_args: src.cols, kernel_args: (int)src.total(),
51	kernel_args: dbsize, kernel_args: ocl::KernelArg::PtrWriteOnly(m: db));
52
53	size_t globalsize = dbsize * wgs;
54	if (k.run(dims: `1`, globalsize: &globalsize, localsize: &wgs, sync: true))
55	return res = saturate_cast<int>(v: cv::sum(src: db.getMat(flags: ACCESS_READ))[`0`]), true;
56	return false;
57	}
58	#endif
59
60	#if defined HAVE_IPP
61	static bool ipp_countNonZero( Mat &src, int &res )
62	{
63	CV_INSTRUMENT_REGION_IPP();
64
65	#if IPP_VERSION_X100 < 201801
66	// Poor performance of SSE42
67	if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
68	return false;
69	#endif
70
71	Ipp32s count = `0`;
72	int depth = src.depth();
73
74	if(src.dims <= `2`)
75	{
76	IppStatus status;
77	IppiSize size = {.width: src.cols*src.channels(), .height: src.rows};
78
79	if(depth == CV_8U)
80	status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u )src.ptr(), (int*)src.step, size, &count, `0`, `0`);
81	else if(depth == CV_32F)
82	status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f )src.ptr(), (int*)src.step, size, &count, `0`, `0`);
83	else
84	return false;
85
86	if(status < `0`)
87	return false;
88
89	res = size.width*size.height - count;
90	}
91	else
92	{
93	IppStatus status;
94	const Mat *arrays[] = {&src, NULL};
95	Mat planes[`1`];
96	NAryMatIterator it(arrays, planes, `1`);
97	IppiSize size = {.width: (int)it.size*src.channels(), .height: `1`};
98	res = `0`;
99	for (size_t i = `0`; i < it.nplanes; i++, ++it)
100	{
101	if(depth == CV_8U)
102	status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, `0`, `0`);
103	else if(depth == CV_32F)
104	status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, `0`, `0`);
105	else
106	return false;
107
108	if(status < `0` \|\| (int)it.planes->total()*src.channels() < count)
109	return false;
110
111	res += (int)it.planes->total()*src.channels() - count;
112	}
113	}
114
115	return true;
116	}
117	#endif
118
119	int countNonZero(InputArray _src)
120	{
121	CV_INSTRUMENT_REGION();
122
123	int type = _src.type(), cn = CV_MAT_CN(type);
124	CV_Assert( cn == `1` );
125
126	#if defined HAVE_OPENCL \|\| defined HAVE_IPP
127	int res = -`1`;
128	#endif
129
130	#ifdef HAVE_OPENCL
131	CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= `2`,
132	ocl_countNonZero(_src, res),
133	res)
134	#endif
135
136	Mat src = _src.getMat();
137	CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
138
139	CountNonZeroFunc func = getCountNonZeroTab(depth: src.depth());
140	CV_Assert( func != `0` );
141
142	const Mat* arrays[] = {&src, `0`};
143	uchar* ptrs[`1`] = {};
144	NAryMatIterator it(arrays, ptrs);
145	int total = (int)it.size, nz = `0`;
146
147	for( size_t i = `0`; i < it.nplanes; i++, ++it )
148	nz += func( ptrs[`0`], total );
149
150	return nz;
151	}
152
153	void findNonZero(InputArray _src, OutputArray _idx)
154	{
155	Mat src = _src.getMat();
156	CV_Assert( src.channels() == `1` && src.dims == `2` );
157
158	int depth = src.depth();
159	std::vector<Point> idxvec;
160	int rows = src.rows, cols = src.cols;
161	AutoBuffer<int> buf_(cols + `1`);
162	int* buf = buf_.data();
163
164	for( int i = `0`; i < rows; i++ )
165	{
166	int j, k = `0`;
167	const uchar* ptr8 = src.ptr(y: i);
168	if( depth == CV_8U \|\| depth == CV_8S )
169	{
170	for( j = `0`; j < cols; j++ )
171	if( ptr8[j] != `0` ) buf[k++] = j;
172	}
173	else if( depth == CV_16U \|\| depth == CV_16S )
174	{
175	const ushort* ptr16 = (const ushort*)ptr8;
176	for( j = `0`; j < cols; j++ )
177	if( ptr16[j] != `0` ) buf[k++] = j;
178	}
179	else if( depth == CV_32S )
180	{
181	const int* ptr32s = (const int*)ptr8;
182	for( j = `0`; j < cols; j++ )
183	if( ptr32s[j] != `0` ) buf[k++] = j;
184	}
185	else if( depth == CV_32F )
186	{
187	const float* ptr32f = (const float*)ptr8;
188	for( j = `0`; j < cols; j++ )
189	if( ptr32f[j] != `0` ) buf[k++] = j;
190	}
191	else
192	{
193	const double* ptr64f = (const double*)ptr8;
194	for( j = `0`; j < cols; j++ )
195	if( ptr64f[j] != `0` ) buf[k++] = j;
196	}
197
198	if( k > `0` )
199	{
200	size_t sz = idxvec.size();
201	idxvec.resize(new_size: sz + k);
202	for( j = `0`; j < k; j++ )
203	idxvec [sz + j] = Point (buf[j], i);
204	}
205	}
206
207	if( idxvec.empty() \|\| (_idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous()) )
208	_idx.release();
209
210	if( !idxvec.empty() )
211	Mat (idxvec).copyTo(m: _idx);
212	}
213
214	} // namespace
215

source code of opencv/modules/core/src/count_non_zero.dispatch.cpp