1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html
4
5
6#include "precomp.hpp"
7#include "opencl_kernels_core.hpp"
8#include "stat.hpp"
9
10#include "norm.simd.hpp"
11#include "norm.simd_declarations.hpp"
12
13/****************************************************************************************\
14* norm *
15\****************************************************************************************/
16
17namespace cv { namespace hal {
18
19extern const uchar popCountTable[256] =
20{
21 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
22 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
23 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
24 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
25 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
26 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
27 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
28 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
29};
30
31static const uchar popCountTable2[] =
32{
33 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
34 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
35 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
36 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
37 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
38 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
39 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
40 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
41};
42
43static const uchar popCountTable4[] =
44{
45 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
53};
54
55
56int normHamming(const uchar* a, int n, int cellSize)
57{
58 int output;
59 CALL_HAL_RET(normHamming8u, cv_hal_normHamming8u, output, a, n, cellSize);
60
61 if( cellSize == 1 )
62 return normHamming(a, n);
63 const uchar* tab = 0;
64 if( cellSize == 2 )
65 tab = popCountTable2;
66 else if( cellSize == 4 )
67 tab = popCountTable4;
68 else
69 return -1;
70 int i = 0;
71 int result = 0;
72#if (CV_SIMD || CV_SIMD_SCALABLE)
73 v_uint64 t = vx_setzero_u64();
74 if ( cellSize == 2)
75 {
76 v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x55));
77 for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
78 {
79 v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i));
80 t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a0, b: v_shr<1>(a: a0)), b: mask))));
81 }
82 }
83 else // cellSize == 4
84 {
85 v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x11));
86 for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
87 {
88 v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i));
89 v_uint16 a1 = v_or(a: a0, b: v_shr<2>(a: a0));
90 t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a1, b: v_shr<1>(a: a1)), b: mask))));
91
92 }
93 }
94 result += (int)v_reduce_sum(a: t);
95 vx_cleanup();
96#elif CV_ENABLE_UNROLLED
97 for( ; i <= n - 4; i += 4 )
98 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
99#endif
100 for( ; i < n; i++ )
101 result += tab[a[i]];
102 return result;
103}
104
105int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
106{
107 int output;
108 CALL_HAL_RET(normHammingDiff8u, cv_hal_normHammingDiff8u, output, a, b, n, cellSize);
109
110 if( cellSize == 1 )
111 return normHamming(a, b, n);
112 const uchar* tab = 0;
113 if( cellSize == 2 )
114 tab = popCountTable2;
115 else if( cellSize == 4 )
116 tab = popCountTable4;
117 else
118 return -1;
119 int i = 0;
120 int result = 0;
121#if (CV_SIMD || CV_SIMD_SCALABLE)
122 v_uint64 t = vx_setzero_u64();
123 if ( cellSize == 2)
124 {
125 v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x55));
126 for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
127 {
128 v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i)));
129 t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab0, b: v_shr<1>(a: ab0)), b: mask))));
130 }
131 }
132 else // cellSize == 4
133 {
134 v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x11));
135 for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
136 {
137 v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i)));
138 v_uint16 ab1 = v_or(a: ab0, b: v_shr<2>(a: ab0));
139 t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab1, b: v_shr<1>(a: ab1)), b: mask))));
140 }
141 }
142 result += (int)v_reduce_sum(a: t);
143 vx_cleanup();
144#elif CV_ENABLE_UNROLLED
145 for( ; i <= n - 4; i += 4 )
146 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
147 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
148#endif
149 for( ; i < n; i++ )
150 result += tab[a[i] ^ b[i]];
151 return result;
152}
153
154float normL2Sqr_(const float* a, const float* b, int n)
155{
156 int j = 0; float d = 0.f;
157#if (CV_SIMD || CV_SIMD_SCALABLE)
158 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
159 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
160 for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes())
161 {
162 v_float32 t0 = v_sub(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j));
163 v_float32 t1 = v_sub(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes()));
164 v_d0 = v_muladd(a: t0, b: t0, c: v_d0);
165 v_float32 t2 = v_sub(a: vx_load(ptr: a + j + 2 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_float32>::vlanes()));
166 v_d1 = v_muladd(a: t1, b: t1, c: v_d1);
167 v_float32 t3 = v_sub(a: vx_load(ptr: a + j + 3 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_float32>::vlanes()));
168 v_d2 = v_muladd(a: t2, b: t2, c: v_d2);
169 v_d3 = v_muladd(a: t3, b: t3, c: v_d3);
170 }
171 d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3));
172#endif
173 for( ; j < n; j++ )
174 {
175 float t = a[j] - b[j];
176 d += t*t;
177 }
178 return d;
179}
180
181
182float normL1_(const float* a, const float* b, int n)
183{
184 int j = 0; float d = 0.f;
185#if (CV_SIMD || CV_SIMD_SCALABLE)
186 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
187 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
188 for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes())
189 {
190 v_d0 = v_add(a: v_d0, b: v_absdiff(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)));
191 v_d1 = v_add(a: v_d1, b: v_absdiff(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes())));
192 v_d2 = v_add(a: v_d2, b: v_absdiff(a: vx_load(ptr: a + j + 2 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_float32>::vlanes())));
193 v_d3 = v_add(a: v_d3, b: v_absdiff(a: vx_load(ptr: a + j + 3 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_float32>::vlanes())));
194 }
195 d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3));
196#endif
197 for( ; j < n; j++ )
198 d += std::abs(x: a[j] - b[j]);
199 return d;
200}
201
202int normL1_(const uchar* a, const uchar* b, int n)
203{
204 int j = 0, d = 0;
205#if (CV_SIMD || CV_SIMD_SCALABLE)
206 for (; j <= n - 4 * VTraits<v_uint8>::vlanes(); j += 4 * VTraits<v_uint8>::vlanes())
207 d += v_reduce_sad(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)) +
208 v_reduce_sad(a: vx_load(ptr: a + j + VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_uint8>::vlanes())) +
209 v_reduce_sad(a: vx_load(ptr: a + j + 2 * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_uint8>::vlanes())) +
210 v_reduce_sad(a: vx_load(ptr: a + j + 3 * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_uint8>::vlanes()));
211#endif
212 for( ; j < n; j++ )
213 d += std::abs(x: a[j] - b[j]);
214 return d;
215}
216
217} //cv::hal
218
219//==================================================================================================
220
221typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
222typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
223
224#ifdef HAVE_OPENCL
225
226static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
227{
228 const ocl::Device & d = ocl::Device::getDefault();
229
230#ifdef __ANDROID__
231 if (d.isNVidia())
232 return false;
233#endif
234 const int cn = _src.channels();
235 if (cn > 4)
236 return false;
237 int type = _src.type(), depth = CV_MAT_DEPTH(type);
238 bool doubleSupport = d.doubleFPConfig() > 0,
239 haveMask = _mask.kind() != _InputArray::NONE;
240
241 if (depth >= CV_16F)
242 return false; // TODO: support FP16
243
244 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
245 (!doubleSupport && depth == CV_64F))
246 return false;
247
248 UMat src = _src.getUMat();
249
250 if (normType == NORM_INF)
251 {
252 if (!ocl_minMaxIdx(_src, NULL, maxVal: &result, NULL, NULL, _mask,
253 ddepth: std::max(a: depth, CV_32S), absValues: depth != CV_8U && depth != CV_16U))
254 return false;
255 }
256 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
257 {
258 Scalar sc;
259 bool unstype = depth == CV_8U || depth == CV_16U;
260
261 if ( !ocl_sum(src: haveMask ? src : src.reshape(cn: 1), res&: sc, sum_op: normType == NORM_L2 || normType == NORM_L2SQR ?
262 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
263 return false;
264
265 double s = 0.0;
266 for (int i = 0; i < (haveMask ? cn : 1); ++i)
267 s += sc[i];
268
269 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(x: s);
270 }
271
272 return true;
273}
274
275#endif
276
277static NormFunc getNormFunc(int normType, int depth) {
278 CV_INSTRUMENT_REGION();
279 CV_CPU_DISPATCH(getNormFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
280}
281static NormDiffFunc getNormDiffFunc(int normType, int depth) {
282 CV_INSTRUMENT_REGION();
283 CV_CPU_DISPATCH(getNormDiffFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
284}
285
286double norm( InputArray _src, int normType, InputArray _mask )
287{
288 CV_INSTRUMENT_REGION();
289
290 normType &= NORM_TYPE_MASK;
291 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
292 normType == NORM_L2 || normType == NORM_L2SQR ||
293 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
294
295#if defined HAVE_OPENCL
296 double _result = 0;
297#endif
298
299#ifdef HAVE_OPENCL
300 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
301 ocl_norm(_src, normType, _mask, result&: _result),
302 _result)
303#endif
304
305 Mat src = _src.getMat(), mask = _mask.getMat();
306 int depth = src.depth(), cn = src.channels();
307 if( src.dims <= 2 )
308 {
309 double result;
310 CALL_HAL_RET(norm, cv_hal_norm, result, src.data, src.step, mask.data, mask.step, src.cols, src.rows, src.type(), normType);
311 }
312 else if( src.isContinuous() && mask.isContinuous() )
313 {
314 double result;
315 CALL_HAL_RET(norm, cv_hal_norm, result, src.data, 0, mask.data, 0, (int)src.total(), 1, src.type(), normType);
316 }
317
318 NormFunc func = getNormFunc(normType: normType >> 1, depth: depth == CV_16F ? CV_32F : depth);
319 CV_Assert( (normType >> 1) >= 3 || func != 0 );
320
321 if( src.isContinuous() && mask.empty() )
322 {
323 size_t len = src.total()*cn;
324 if( len == (size_t)(int)len )
325 {
326 if( depth == CV_32F )
327 {
328 const uchar* data = src.ptr<const uchar>();
329
330 if( normType == NORM_L2 || normType == NORM_L2SQR || normType == NORM_L1 )
331 {
332 double result = 0;
333 func(data, 0, (uchar*)&result, (int)len, 1);
334 return normType == NORM_L2 ? std::sqrt(x: result) : result;
335 }
336 if( normType == NORM_INF )
337 {
338 float result = 0;
339 func(data, 0, (uchar*)&result, (int)len, 1);
340 return result;
341 }
342 }
343 if( depth == CV_8U )
344 {
345 const uchar* data = src.ptr<uchar>();
346
347 if( normType == NORM_HAMMING )
348 {
349 return hal::normHamming(a: data, n: (int)len, cellSize: 1);
350 }
351
352 if( normType == NORM_HAMMING2 )
353 {
354 return hal::normHamming(a: data, n: (int)len, cellSize: 2);
355 }
356 }
357 }
358 }
359
360 CV_Assert( mask.empty() || mask.type() == CV_8U );
361
362 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
363 {
364 if( !mask.empty() )
365 {
366 Mat temp;
367 bitwise_and(src1: src, src2: mask, dst: temp);
368 return norm(src: temp, normType);
369 }
370 int cellSize = normType == NORM_HAMMING ? 1 : 2;
371
372 const Mat* arrays[] = {&src, 0};
373 uchar* ptrs[1] = {};
374 NAryMatIterator it(arrays, ptrs);
375 int total = (int)it.size;
376 int result = 0;
377
378 for( size_t i = 0; i < it.nplanes; i++, ++it )
379 {
380 result += hal::normHamming(a: ptrs[0], n: total, cellSize);
381 }
382
383 return result;
384 }
385
386 const Mat* arrays[] = {&src, &mask, 0};
387 uchar* ptrs[2] = {};
388 union
389 {
390 double d;
391 int i;
392 float f;
393 }
394 result;
395 result.d = 0;
396 NAryMatIterator it(arrays, ptrs);
397 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
398
399 if ((normType == NORM_L1 && depth <= CV_16S) ||
400 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
401 {
402 // special case to handle "integer" overflow in accumulator
403 const size_t esz = src.elemSize();
404 const int total = (int)it.size;
405 const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
406 const int blockSize = std::min(a: total, b: intSumBlockSize);
407 int isum = 0;
408 int count = 0;
409
410 for (size_t i = 0; i < it.nplanes; i++, ++it)
411 {
412 for (int j = 0; j < total; j += blockSize)
413 {
414 int bsz = std::min(a: total - j, b: blockSize);
415 func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
416 count += bsz;
417 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
418 {
419 result.d += isum;
420 isum = 0;
421 count = 0;
422 }
423 ptrs[0] += bsz*esz;
424 if (ptrs[1])
425 ptrs[1] += bsz;
426 }
427 }
428 }
429 else if (depth == CV_16F)
430 {
431 const size_t esz = src.elemSize();
432 const int total = (int)it.size;
433 const int blockSize = std::min(a: total, b: divUp(a: 1024, b: cn));
434 AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
435 float* data0 = fltbuf.data();
436 for (size_t i = 0; i < it.nplanes; i++, ++it)
437 {
438 for (int j = 0; j < total; j += blockSize)
439 {
440 int bsz = std::min(a: total - j, b: blockSize);
441 hal::cvt16f32f(src: (const hfloat*)ptrs[0], dst: data0, len: bsz * cn);
442 func((uchar*)data0, ptrs[1], (uchar*)&result.f, bsz, cn);
443 ptrs[0] += bsz*esz;
444 if (ptrs[1])
445 ptrs[1] += bsz;
446 }
447 }
448 }
449 else
450 {
451 // generic implementation
452 for (size_t i = 0; i < it.nplanes; i++, ++it)
453 {
454 func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
455 }
456 }
457
458 if( normType == NORM_INF )
459 {
460 if(depth == CV_64F)
461 return result.d;
462 else if (depth == CV_32F || depth == CV_16F)
463 return result.f;
464 else
465 return result.i;
466 }
467 else if( normType == NORM_L2 )
468 return std::sqrt(x: result.d);
469
470 return result.d;
471}
472
473//==================================================================================================
474
475#ifdef HAVE_OPENCL
476static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
477{
478#ifdef __ANDROID__
479 if (ocl::Device::getDefault().isNVidia())
480 return false;
481#endif
482
483 Scalar sc1, sc2;
484 int cn = _src1.channels();
485 if (cn > 4)
486 return false;
487 int type = _src1.type(), depth = CV_MAT_DEPTH(type);
488 bool relative = (normType & NORM_RELATIVE) != 0;
489 normType &= ~NORM_RELATIVE;
490 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
491
492#ifdef __APPLE__
493 if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
494 return false;
495#endif
496
497 if (normsum)
498 {
499 if (!ocl_sum(src: _src1, res&: sc1, sum_op: normType == NORM_L2 || normType == NORM_L2SQR ?
500 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, calc2: relative, res2: sc2))
501 return false;
502 }
503 else
504 {
505 if (!ocl_minMaxIdx(src: _src1, NULL, maxVal: &sc1[0], NULL, NULL, _mask, ddepth: std::max(CV_32S, b: depth),
506 absValues: false, _src2, maxVal2: relative ? &sc2[0] : NULL))
507 return false;
508 cn = 1;
509 }
510
511 double s2 = 0;
512 for (int i = 0; i < cn; ++i)
513 {
514 result += sc1[i];
515 if (relative)
516 s2 += sc2[i];
517 }
518
519 if (normType == NORM_L2)
520 {
521 result = std::sqrt(x: result);
522 if (relative)
523 s2 = std::sqrt(x: s2);
524 }
525
526 if (relative)
527 result /= (s2 + DBL_EPSILON);
528
529 return true;
530} // ocl_norm()
531#endif // HAVE_OPENCL
532
533double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
534{
535 CV_INSTRUMENT_REGION();
536
537 CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch");
538 CV_Assert(_src1.sameSize(_src2));
539
540#if defined HAVE_OPENCL
541 double _result = 0;
542#endif
543
544#ifdef HAVE_OPENCL
545 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
546 ocl_norm(_src1, _src2, normType, _mask, result&: _result),
547 _result)
548#endif
549
550 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
551 int depth = src1.depth(), cn = src1.channels();
552 if( src1.dims <= 2 )
553 {
554 double result;
555 CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, src1.step, src2.data, src2.step, mask.data, mask.step, src1.cols, src1.rows, src1.type(), normType);
556 }
557 else if( src1.isContinuous() && src2.isContinuous() && mask.isContinuous() )
558 {
559 double result;
560 CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, 0, src2.data, 0, mask.data, 0, (int)src1.total(), 1, src1.type(), normType);
561 }
562
563 if( normType & CV_RELATIVE )
564 {
565 return norm(_src1, _src2, normType: normType & ~CV_RELATIVE, _mask)/(norm(src: _src2, normType, _mask) + DBL_EPSILON);
566 }
567
568 normType &= 7;
569 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
570 normType == NORM_L2 || normType == NORM_L2SQR ||
571 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
572
573 NormDiffFunc func = getNormDiffFunc(normType: normType >> 1, depth: depth == CV_16F ? CV_32F : depth);
574 CV_Assert( (normType >> 1) >= 3 || func != 0 );
575
576 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
577 {
578 size_t len = src1.total()*src1.channels();
579 if( len == (size_t)(int)len )
580 {
581 if( src1.depth() == CV_32F )
582 {
583 const uchar* data1 = src1.ptr<const uchar>();
584 const uchar* data2 = src2.ptr<const uchar>();
585
586 if( normType == NORM_L2 || normType == NORM_L2SQR || normType == NORM_L1 )
587 {
588 double result = 0;
589 func(data1, data2, 0, (uchar*)&result, (int)len, 1);
590 return normType == NORM_L2 ? std::sqrt(x: result) : result;
591 }
592 if( normType == NORM_INF )
593 {
594 float result = 0;
595 func(data1, data2, 0, (uchar*)&result, (int)len, 1);
596 return result;
597 }
598 }
599 }
600 }
601
602 CV_Assert( mask.empty() || mask.type() == CV_8U );
603
604 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
605 {
606 if( !mask.empty() )
607 {
608 Mat temp;
609 bitwise_xor(src1, src2, dst: temp);
610 bitwise_and(src1: temp, src2: mask, dst: temp);
611 return norm(src: temp, normType);
612 }
613 int cellSize = normType == NORM_HAMMING ? 1 : 2;
614
615 const Mat* arrays[] = {&src1, &src2, 0};
616 uchar* ptrs[2] = {};
617 NAryMatIterator it(arrays, ptrs);
618 int total = (int)it.size;
619 int result = 0;
620
621 for( size_t i = 0; i < it.nplanes; i++, ++it )
622 {
623 result += hal::normHamming(a: ptrs[0], b: ptrs[1], n: total, cellSize);
624 }
625
626 return result;
627 }
628
629 const Mat* arrays[] = {&src1, &src2, &mask, 0};
630 uchar* ptrs[3] = {};
631 union
632 {
633 double d;
634 float f;
635 int i;
636 unsigned u;
637 }
638 result;
639 result.d = 0;
640 NAryMatIterator it(arrays, ptrs);
641 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
642
643 if ((normType == NORM_L1 && depth <= CV_16S) ||
644 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
645 {
646 // special case to handle "integer" overflow in accumulator
647 const size_t esz = src1.elemSize();
648 const int total = (int)it.size;
649 const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
650 const int blockSize = std::min(a: total, b: intSumBlockSize);
651 int isum = 0;
652 int count = 0;
653
654 for (size_t i = 0; i < it.nplanes; i++, ++it)
655 {
656 for (int j = 0; j < total; j += blockSize)
657 {
658 int bsz = std::min(a: total - j, b: blockSize);
659 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
660 count += bsz;
661 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
662 {
663 result.d += isum;
664 isum = 0;
665 count = 0;
666 }
667 ptrs[0] += bsz*esz;
668 ptrs[1] += bsz*esz;
669 if (ptrs[2])
670 ptrs[2] += bsz;
671 }
672 }
673 }
674 else if (depth == CV_16F)
675 {
676 const size_t esz = src1.elemSize();
677 const int total = (int)it.size;
678 const int blockSize = std::min(a: total, b: divUp(a: 512, b: cn));
679 AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
680 float* data0 = fltbuf.data();
681 float* data1 = fltbuf.data() + blockSize * cn;
682 for (size_t i = 0; i < it.nplanes; i++, ++it)
683 {
684 for (int j = 0; j < total; j += blockSize)
685 {
686 int bsz = std::min(a: total - j, b: blockSize);
687 hal::cvt16f32f(src: (const hfloat*)ptrs[0], dst: data0, len: bsz * cn);
688 hal::cvt16f32f(src: (const hfloat*)ptrs[1], dst: data1, len: bsz * cn);
689 func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.f, bsz, cn);
690 ptrs[0] += bsz*esz;
691 ptrs[1] += bsz*esz;
692 if (ptrs[2])
693 ptrs[2] += bsz;
694 }
695 }
696 }
697 else
698 {
699 // generic implementation
700 for (size_t i = 0; i < it.nplanes; i++, ++it)
701 {
702 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
703 }
704 }
705
706 if( normType == NORM_INF )
707 {
708 if (depth == CV_64F)
709 return result.d;
710 else if (depth == CV_32F || depth == CV_16F)
711 return result.f;
712 else
713 return result.u;
714 }
715 else if( normType == NORM_L2 )
716 return std::sqrt(x: result.d);
717
718 return result.d;
719}
720
721cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
722{
723 return cv::hal::normHamming(a, b, n: size);
724}
725
726double PSNR(InputArray _src1, InputArray _src2, double R)
727{
728 CV_INSTRUMENT_REGION();
729
730 //Input arrays must have depth CV_8U
731 CV_Assert( _src1.type() == _src2.type() );
732
733 double diff = std::sqrt(x: norm(_src1, _src2, normType: NORM_L2SQR)/(_src1.total()*_src1.channels()));
734 return 20*log10(x: R/(diff+DBL_EPSILON));
735}
736
737
738#ifdef HAVE_OPENCL
739static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
740 double scale, double delta )
741{
742 UMat src = _src.getUMat();
743
744 if( _mask.empty() )
745 src.convertTo( m: _dst, rtype: dtype, alpha: scale, beta: delta );
746 else if (src.channels() <= 4)
747 {
748 const ocl::Device & dev = ocl::Device::getDefault();
749
750 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
751 ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, b: std::max(a: sdepth, b: ddepth)),
752 rowsPerWI = dev.isIntel() ? 4 : 1;
753
754 float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
755 bool haveScale = std::fabs(x: scale - 1) > DBL_EPSILON,
756 haveZeroScale = !(std::fabs(x: scale) > DBL_EPSILON),
757 haveDelta = std::fabs(x: delta) > DBL_EPSILON,
758 doubleSupport = dev.doubleFPConfig() > 0;
759
760 if (!haveScale && !haveDelta && stype == dtype)
761 {
762 _src.copyTo(arr: _dst, mask: _mask);
763 return true;
764 }
765 if (haveZeroScale)
766 {
767 _dst.setTo(value: Scalar(delta), mask: _mask);
768 return true;
769 }
770
771 if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
772 return false;
773
774 char cvt[2][50];
775 String opts = format(fmt: "-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
776 " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
777 ocl::typeToStr(t: stype), ocl::typeToStr(t: dtype),
778 ocl::convertTypeStr(sdepth, ddepth: wdepth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])), cn,
779 rowsPerWI, ocl::convertTypeStr(sdepth: wdepth, ddepth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])),
780 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
781 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
782 haveScale ? " -D HAVE_SCALE" : "",
783 haveDelta ? " -D HAVE_DELTA" : "",
784 ocl::typeToStr(t: sdepth), ocl::typeToStr(t: ddepth));
785
786 ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
787 if (k.empty())
788 return false;
789
790 UMat mask = _mask.getUMat(), dst = _dst.getUMat();
791
792 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src),
793 maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask),
794 dstarg = ocl::KernelArg::ReadWrite(m: dst);
795
796 if (haveScale)
797 {
798 if (haveDelta)
799 k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale, kernel_args: fdelta);
800 else
801 k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale);
802 }
803 else
804 {
805 if (haveDelta)
806 k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fdelta);
807 else
808 k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg);
809 }
810
811 size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
812 return k.run(dims: 2, globalsize, NULL, sync: false);
813 }
814 else
815 {
816 UMat temp;
817 src.convertTo( m: temp, rtype: dtype, alpha: scale, beta: delta );
818 temp.copyTo( m: _dst, mask: _mask );
819 }
820
821 return true;
822} // ocl_normalize
823#endif // HAVE_OPENCL
824
825void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
826 int norm_type, int rtype, InputArray _mask)
827{
828 CV_INSTRUMENT_REGION();
829
830 double scale = 1, shift = 0;
831 int type = _src.type(), depth = CV_MAT_DEPTH(type);
832
833 if( rtype < 0 )
834 rtype = _dst.fixedType() ? _dst.depth() : depth;
835
836 if( norm_type == CV_MINMAX )
837 {
838 double smin = 0, smax = 0;
839 double dmin = MIN( a, b ), dmax = MAX( a, b );
840 minMaxIdx( src: _src, minVal: &smin, maxVal: &smax, minIdx: 0, maxIdx: 0, mask: _mask );
841 scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
842 if( rtype == CV_32F )
843 {
844 scale = (float)scale;
845 shift = (float)dmin - (float)(smin*scale);
846 }
847 else
848 shift = dmin - smin*scale;
849 }
850 else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
851 {
852 scale = norm( _src, normType: norm_type, _mask );
853 scale = scale > DBL_EPSILON ? a/scale : 0.;
854 shift = 0;
855 }
856 else
857 CV_Error( cv::Error::StsBadArg, "Unknown/unsupported norm type" );
858
859 CV_OCL_RUN(_dst.isUMat(),
860 ocl_normalize(_src, _dst, _mask, dtype: rtype, scale, delta: shift))
861
862 Mat src = _src.getMat();
863 if( _mask.empty() )
864 src.convertTo( m: _dst, rtype, alpha: scale, beta: shift );
865 else
866 {
867 Mat temp;
868 src.convertTo( m: temp, rtype, alpha: scale, beta: shift );
869 temp.copyTo( m: _dst, mask: _mask );
870 }
871}
872
873} // namespace
874

source code of opencv/modules/core/src/norm.dispatch.cpp