1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html
4
5
6#include "precomp.hpp"
7#include "opencl_kernels_core.hpp"
8#include "stat.hpp"
9
10#include "count_non_zero.simd.hpp"
11#include "count_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
12
13namespace cv {
14
15static CountNonZeroFunc getCountNonZeroTab(int depth)
16{
17 CV_INSTRUMENT_REGION();
18 CV_CPU_DISPATCH(getCountNonZeroTab, (depth),
19 CV_CPU_DISPATCH_MODES_ALL);
20}
21
22#ifdef HAVE_OPENCL
23static bool ocl_countNonZero( InputArray _src, int & res )
24{
25 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(src1: _src);
26 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
27
28 if (depth == CV_64F && !doubleSupport)
29 return false;
30
31 int dbsize = ocl::Device::getDefault().maxComputeUnits();
32 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
33
34 int wgs2_aligned = 1;
35 while (wgs2_aligned < (int)wgs)
36 wgs2_aligned <<= 1;
37 wgs2_aligned >>= 1;
38
39 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
40 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
41 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
42 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
43 ocl::typeToStr(depth), (int)wgs, kercn,
44 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
45 _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
46 if (k.empty())
47 return false;
48
49 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
50 k.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: src), kernel_args: src.cols, kernel_args: (int)src.total(),
51 kernel_args: dbsize, kernel_args: ocl::KernelArg::PtrWriteOnly(m: db));
52
53 size_t globalsize = dbsize * wgs;
54 if (k.run(dims: 1, globalsize: &globalsize, localsize: &wgs, sync: true))
55 return res = saturate_cast<int>(v: cv::sum(src: db.getMat(flags: ACCESS_READ))[0]), true;
56 return false;
57}
58#endif
59
60#if defined HAVE_IPP
61static bool ipp_countNonZero( Mat &src, int &res )
62{
63 CV_INSTRUMENT_REGION_IPP();
64
65#if IPP_VERSION_X100 < 201801
66 // Poor performance of SSE42
67 if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
68 return false;
69#endif
70
71 Ipp32s count = 0;
72 int depth = src.depth();
73
74 if(src.dims <= 2)
75 {
76 IppStatus status;
77 IppiSize size = {.width: src.cols*src.channels(), .height: src.rows};
78
79 if(depth == CV_8U)
80 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
81 else if(depth == CV_32F)
82 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
83 else
84 return false;
85
86 if(status < 0)
87 return false;
88
89 res = size.width*size.height - count;
90 }
91 else
92 {
93 IppStatus status;
94 const Mat *arrays[] = {&src, NULL};
95 Mat planes[1];
96 NAryMatIterator it(arrays, planes, 1);
97 IppiSize size = {.width: (int)it.size*src.channels(), .height: 1};
98 res = 0;
99 for (size_t i = 0; i < it.nplanes; i++, ++it)
100 {
101 if(depth == CV_8U)
102 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, 0, 0);
103 else if(depth == CV_32F)
104 status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, 0, 0);
105 else
106 return false;
107
108 if(status < 0 || (int)it.planes->total()*src.channels() < count)
109 return false;
110
111 res += (int)it.planes->total()*src.channels() - count;
112 }
113 }
114
115 return true;
116}
117#endif
118
119int countNonZero(InputArray _src)
120{
121 CV_INSTRUMENT_REGION();
122
123 int type = _src.type(), cn = CV_MAT_CN(type);
124 CV_Assert( cn == 1 );
125
126#if defined HAVE_OPENCL || defined HAVE_IPP
127 int res = -1;
128#endif
129
130#ifdef HAVE_OPENCL
131 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
132 ocl_countNonZero(_src, res),
133 res)
134#endif
135
136 Mat src = _src.getMat();
137 CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
138
139 CountNonZeroFunc func = getCountNonZeroTab(depth: src.depth());
140 CV_Assert( func != 0 );
141
142 const Mat* arrays[] = {&src, 0};
143 uchar* ptrs[1] = {};
144 NAryMatIterator it(arrays, ptrs);
145 int total = (int)it.size, nz = 0;
146
147 for( size_t i = 0; i < it.nplanes; i++, ++it )
148 nz += func( ptrs[0], total );
149
150 return nz;
151}
152
153void findNonZero(InputArray _src, OutputArray _idx)
154{
155 Mat src = _src.getMat();
156 CV_Assert( src.channels() == 1 && src.dims == 2 );
157
158 int depth = src.depth();
159 std::vector<Point> idxvec;
160 int rows = src.rows, cols = src.cols;
161 AutoBuffer<int> buf_(cols + 1);
162 int* buf = buf_.data();
163
164 for( int i = 0; i < rows; i++ )
165 {
166 int j, k = 0;
167 const uchar* ptr8 = src.ptr(y: i);
168 if( depth == CV_8U || depth == CV_8S )
169 {
170 for( j = 0; j < cols; j++ )
171 if( ptr8[j] != 0 ) buf[k++] = j;
172 }
173 else if( depth == CV_16U || depth == CV_16S )
174 {
175 const ushort* ptr16 = (const ushort*)ptr8;
176 for( j = 0; j < cols; j++ )
177 if( ptr16[j] != 0 ) buf[k++] = j;
178 }
179 else if( depth == CV_32S )
180 {
181 const int* ptr32s = (const int*)ptr8;
182 for( j = 0; j < cols; j++ )
183 if( ptr32s[j] != 0 ) buf[k++] = j;
184 }
185 else if( depth == CV_32F )
186 {
187 const float* ptr32f = (const float*)ptr8;
188 for( j = 0; j < cols; j++ )
189 if( ptr32f[j] != 0 ) buf[k++] = j;
190 }
191 else
192 {
193 const double* ptr64f = (const double*)ptr8;
194 for( j = 0; j < cols; j++ )
195 if( ptr64f[j] != 0 ) buf[k++] = j;
196 }
197
198 if( k > 0 )
199 {
200 size_t sz = idxvec.size();
201 idxvec.resize(new_size: sz + k);
202 for( j = 0; j < k; j++ )
203 idxvec[sz + j] = Point(buf[j], i);
204 }
205 }
206
207 if( idxvec.empty() || (_idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous()) )
208 _idx.release();
209
210 if( !idxvec.empty() )
211 Mat(idxvec).copyTo(m: _idx);
212}
213
214} // namespace
215

source code of opencv/modules/core/src/count_non_zero.dispatch.cpp