norm.dispatch.cpp source code [opencv/modules/core/src/norm.dispatch.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html
4
5
6	#include "precomp.hpp"
7	#include "opencl_kernels_core.hpp"
8	#include "stat.hpp"
9
10	#include "norm.simd.hpp"
11	#include "norm.simd_declarations.hpp"
12
13	/**************************************************************************************\
14	* norm *
15	\**************************************************************************************/
16
17	namespace cv { namespace hal {
18
19	extern const uchar popCountTable[`256`] =
20	{
21	`0`, `1`, `1`, `2`, `1`, `2`, `2`, `3`, `1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
22	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
23	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
24	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
25	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
26	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
27	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
28	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`, `5`, `6`, `6`, `7`, `6`, `7`, `7`, `8`
29	};
30
31	static const uchar popCountTable2[] =
32	{
33	`0`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`,
34	`1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`,
35	`1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`,
36	`2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`,
37	`1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`,
38	`2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`,
39	`1`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`,
40	`2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`, `3`, `4`, `4`, `4`
41	};
42
43	static const uchar popCountTable4[] =
44	{
45	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
46	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
47	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
48	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
49	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
50	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
51	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52	`1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`
53	};
54
55
56	int normHamming(const uchar* a, int n, int cellSize)
57	{
58	int output;
59	CALL_HAL_RET(normHamming8u, cv_hal_normHamming8u, output, a, n, cellSize);
60
61	if( cellSize == `1` )
62	return normHamming(a, n);
63	const uchar* tab = `0`;
64	if( cellSize == `2` )
65	tab = popCountTable2;
66	else if( cellSize == `4` )
67	tab = popCountTable4;
68	else
69	return -`1`;
70	int i = `0`;
71	int result = `0`;
72	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
73	v_uint64 t = vx_setzero_u64();
74	if ( cellSize == `2`)
75	{
76	v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: `0x55`));
77	for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
78	{
79	v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i));
80	t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a0, b: v_shr<`1`>(a: a0)), b: mask))));
81	}
82	}
83	else // cellSize == 4
84	{
85	v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: `0x11`));
86	for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
87	{
88	v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i));
89	v_uint16 a1 = v_or(a: a0, b: v_shr<`2`>(a: a0));
90	t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a1, b: v_shr<`1`>(a: a1)), b: mask))));
91
92	}
93	}
94	result += (int)v_reduce_sum(a: t);
95	vx_cleanup();
96	#elif CV_ENABLE_UNROLLED
97	for( ; i <= n - `4`; i += `4` )
98	result += tab[a[i]] + tab[a[i+`1`]] + tab[a[i+`2`]] + tab[a[i+`3`]];
99	#endif
100	for( ; i < n; i++ )
101	result += tab[a[i]];
102	return result;
103	}
104
105	int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
106	{
107	int output;
108	CALL_HAL_RET(normHammingDiff8u, cv_hal_normHammingDiff8u, output, a, b, n, cellSize);
109
110	if( cellSize == `1` )
111	return normHamming(a, b, n);
112	const uchar* tab = `0`;
113	if( cellSize == `2` )
114	tab = popCountTable2;
115	else if( cellSize == `4` )
116	tab = popCountTable4;
117	else
118	return -`1`;
119	int i = `0`;
120	int result = `0`;
121	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
122	v_uint64 t = vx_setzero_u64();
123	if ( cellSize == `2`)
124	{
125	v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: `0x55`));
126	for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
127	{
128	v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i)));
129	t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab0, b: v_shr<`1`>(a: ab0)), b: mask))));
130	}
131	}
132	else // cellSize == 4
133	{
134	v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: `0x11`));
135	for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
136	{
137	v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i)));
138	v_uint16 ab1 = v_or(a: ab0, b: v_shr<`2`>(a: ab0));
139	t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab1, b: v_shr<`1`>(a: ab1)), b: mask))));
140	}
141	}
142	result += (int)v_reduce_sum(a: t);
143	vx_cleanup();
144	#elif CV_ENABLE_UNROLLED
145	for( ; i <= n - `4`; i += `4` )
146	result += tab[a[i] ^ b[i]] + tab[a[i+`1`] ^ b[i+`1`]] +
147	tab[a[i+`2`] ^ b[i+`2`]] + tab[a[i+`3`] ^ b[i+`3`]];
148	#endif
149	for( ; i < n; i++ )
150	result += tab[a[i] ^ b[i]];
151	return result;
152	}
153
154	float normL2Sqr_(const float* a, const float* b, int n)
155	{
156	int j = `0`; float d = `0.f`;
157	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
158	v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
159	v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
160	for (; j <= n - `4` * VTraits<v_float32>::vlanes(); j += `4` * VTraits<v_float32>::vlanes())
161	{
162	v_float32 t0 = v_sub(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j));
163	v_float32 t1 = v_sub(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes()));
164	v_d0 = v_muladd(a: t0, b: t0, c: v_d0);
165	v_float32 t2 = v_sub(a: vx_load(ptr: a + j + `2` * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + `2` * VTraits<v_float32>::vlanes()));
166	v_d1 = v_muladd(a: t1, b: t1, c: v_d1);
167	v_float32 t3 = v_sub(a: vx_load(ptr: a + j + `3` * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + `3` * VTraits<v_float32>::vlanes()));
168	v_d2 = v_muladd(a: t2, b: t2, c: v_d2);
169	v_d3 = v_muladd(a: t3, b: t3, c: v_d3);
170	}
171	d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3));
172	#endif
173	for( ; j < n; j++ )
174	{
175	float t = a[j] - b[j];
176	d += t*t;
177	}
178	return d;
179	}
180
181
182	float normL1_(const float* a, const float* b, int n)
183	{
184	int j = `0`; float d = `0.f`;
185	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
186	v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
187	v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
188	for (; j <= n - `4` * VTraits<v_float32>::vlanes(); j += `4` * VTraits<v_float32>::vlanes())
189	{
190	v_d0 = v_add(a: v_d0, b: v_absdiff(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)));
191	v_d1 = v_add(a: v_d1, b: v_absdiff(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes())));
192	v_d2 = v_add(a: v_d2, b: v_absdiff(a: vx_load(ptr: a + j + `2` * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + `2` * VTraits<v_float32>::vlanes())));
193	v_d3 = v_add(a: v_d3, b: v_absdiff(a: vx_load(ptr: a + j + `3` * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + `3` * VTraits<v_float32>::vlanes())));
194	}
195	d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3));
196	#endif
197	for( ; j < n; j++ )
198	d += std::abs(x: a[j] - b[j]);
199	return d;
200	}
201
202	int normL1_(const uchar* a, const uchar* b, int n)
203	{
204	int j = `0`, d = `0`;
205	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
206	for (; j <= n - `4` * VTraits<v_uint8>::vlanes(); j += `4` * VTraits<v_uint8>::vlanes())
207	d += v_reduce_sad(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)) +
208	v_reduce_sad(a: vx_load(ptr: a + j + VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_uint8>::vlanes())) +
209	v_reduce_sad(a: vx_load(ptr: a + j + `2` * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + `2` * VTraits<v_uint8>::vlanes())) +
210	v_reduce_sad(a: vx_load(ptr: a + j + `3` * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + `3` * VTraits<v_uint8>::vlanes()));
211	#endif
212	for( ; j < n; j++ )
213	d += std::abs(x: a[j] - b[j]);
214	return d;
215	}
216
217	} //cv::hal
218
219	//==================================================================================================
220
221	typedef int (NormFunc)(const* uchar, const* uchar, uchar, int, int);
222	typedef int (NormDiffFunc)(const* uchar, const* uchar, const* uchar, uchar, int, int);
223
224	#ifdef HAVE_OPENCL
225
226	static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
227	{
228	const ocl::Device & d = ocl::Device::getDefault();
229
230	#ifdef __ANDROID__
231	if (d.isNVidia())
232	return false;
233	#endif
234	const int cn = _src.channels();
235	if (cn > `4`)
236	return false;
237	int type = _src.type(), depth = CV_MAT_DEPTH(type);
238	bool doubleSupport = d.doubleFPConfig() > `0`,
239	haveMask = _mask.kind() != _InputArray::NONE;
240
241	if (depth >= CV_16F)
242	return false; // TODO: support FP16
243
244	if ( !(normType == NORM_INF \|\| normType == NORM_L1 \|\| normType == NORM_L2 \|\| normType == NORM_L2SQR) \|\|
245	(!doubleSupport && depth == CV_64F))
246	return false;
247
248	UMat src = _src.getUMat();
249
250	if (normType == NORM_INF)
251	{
252	if (!ocl_minMaxIdx(_src, NULL, maxVal: &result, NULL, NULL, _mask,
253	ddepth: std::max(a: depth, CV_32S), absValues: depth != CV_8U && depth != CV_16U))
254	return false;
255	}
256	else if (normType == NORM_L1 \|\| normType == NORM_L2 \|\| normType == NORM_L2SQR)
257	{
258	Scalar sc;
259	bool unstype = depth == CV_8U \|\| depth == CV_16U;
260
261	if ( !ocl_sum(src: haveMask ? src : src.reshape(cn: `1`), res&: sc, sum_op: normType == NORM_L2 \|\| normType == NORM_L2SQR ?
262	OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
263	return false;
264
265	double s = `0.0`;
266	for (int i = `0`; i < (haveMask ? cn : `1`); ++i)
267	s += sc [i];
268
269	result = normType == NORM_L1 \|\| normType == NORM_L2SQR ? s : std::sqrt(x: s);
270	}
271
272	return true;
273	}
274
275	#endif
276
277	static NormFunc getNormFunc(int normType, int depth) {
278	CV_INSTRUMENT_REGION();
279	CV_CPU_DISPATCH(getNormFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
280	}
281	static NormDiffFunc getNormDiffFunc(int normType, int depth) {
282	CV_INSTRUMENT_REGION();
283	CV_CPU_DISPATCH(getNormDiffFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
284	}
285
286	double norm( InputArray _src, int normType, InputArray _mask )
287	{
288	CV_INSTRUMENT_REGION();
289
290	normType &= NORM_TYPE_MASK;
291	CV_Assert( normType == NORM_INF \|\| normType == NORM_L1 \|\|
292	normType == NORM_L2 \|\| normType == NORM_L2SQR \|\|
293	((normType == NORM_HAMMING \|\| normType == NORM_HAMMING2) && _src.type() == CV_8U) );
294
295	#if defined HAVE_OPENCL
296	double _result = `0`;
297	#endif
298
299	#ifdef HAVE_OPENCL
300	CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= `2`,
301	ocl_norm(_src, normType, _mask, result&: _result),
302	_result)
303	#endif
304
305	Mat src = _src.getMat(), mask = _mask.getMat();
306	int depth = src.depth(), cn = src.channels();
307	if( src.dims <= `2` )
308	{
309	double result;
310	CALL_HAL_RET(norm, cv_hal_norm, result, src.data, src.step, mask.data, mask.step, src.cols, src.rows, src.type(), normType);
311	}
312	else if( src.isContinuous() && mask.isContinuous() )
313	{
314	double result;
315	CALL_HAL_RET(norm, cv_hal_norm, result, src.data, `0`, mask.data, `0`, (int)src.total(), `1`, src.type(), normType);
316	}
317
318	NormFunc func = getNormFunc(normType: normType >> `1`, depth: depth == CV_16F ? CV_32F : depth);
319	CV_Assert( (normType >> `1`) >= `3` \|\| func != `0` );
320
321	if( src.isContinuous() && mask.empty() )
322	{
323	size_t len = src.total()*cn;
324	if( len == (size_t)(int)len )
325	{
326	if( depth == CV_32F )
327	{
328	const uchar* data = src.ptr<const uchar>();
329
330	if( normType == NORM_L2 \|\| normType == NORM_L2SQR \|\| normType == NORM_L1 )
331	{
332	double result = `0`;
333	func(data, `0`, (uchar)&result, (int*)len, `1`);
334	return normType == NORM_L2 ? std::sqrt(x: result) : result;
335	}
336	if( normType == NORM_INF )
337	{
338	float result = `0`;
339	func(data, `0`, (uchar)&result, (int*)len, `1`);
340	return result;
341	}
342	}
343	if( depth == CV_8U )
344	{
345	const uchar* data = src.ptr<uchar>();
346
347	if( normType == NORM_HAMMING )
348	{
349	return hal::normHamming(a: data, n: (int)len, cellSize: `1`);
350	}
351
352	if( normType == NORM_HAMMING2 )
353	{
354	return hal::normHamming(a: data, n: (int)len, cellSize: `2`);
355	}
356	}
357	}
358	}
359
360	CV_Assert( mask.empty() \|\| mask.type() == CV_8U );
361
362	if( normType == NORM_HAMMING \|\| normType == NORM_HAMMING2 )
363	{
364	if( !mask.empty() )
365	{
366	Mat temp;
367	bitwise_and(src1: src, src2: mask, dst: temp);
368	return norm(src: temp, normType);
369	}
370	int cellSize = normType == NORM_HAMMING ? `1` : `2`;
371
372	const Mat* arrays[] = {&src, `0`};
373	uchar* ptrs[`1`] = {};
374	NAryMatIterator it(arrays, ptrs);
375	int total = (int)it.size;
376	int result = `0`;
377
378	for( size_t i = `0`; i < it.nplanes; i++, ++it )
379	{
380	result += hal::normHamming(a: ptrs[`0`], n: total, cellSize);
381	}
382
383	return result;
384	}
385
386	const Mat* arrays[] = {&src, &mask, `0`};
387	uchar* ptrs[`2`] = {};
388	union
389	{
390	double d;
391	int i;
392	float f;
393	}
394	result;
395	result.d = `0`;
396	NAryMatIterator it(arrays, ptrs);
397	CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
398
399	if ((normType == NORM_L1 && depth <= CV_16S) \|\|
400	((normType == NORM_L2 \|\| normType == NORM_L2SQR) && depth <= CV_8S))
401	{
402	// special case to handle "integer" overflow in accumulator
403	const size_t esz = src.elemSize();
404	const int total = (int)it.size;
405	const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (`1` << `23`) : (`1` << `15`))/cn;
406	const int blockSize = std::min(a: total, b: intSumBlockSize);
407	int isum = `0`;
408	int count = `0`;
409
410	for (size_t i = `0`; i < it.nplanes; i++, ++it)
411	{
412	for (int j = `0`; j < total; j += blockSize)
413	{
414	int bsz = std::min(a: total - j, b: blockSize);
415	func(ptrs[`0`], ptrs[`1`], (uchar*)&isum, bsz, cn);
416	count += bsz;
417	if (count + blockSize >= intSumBlockSize \|\| (i+`1` >= it.nplanes && j+bsz >= total))
418	{
419	result.d += isum;
420	isum = `0`;
421	count = `0`;
422	}
423	ptrs[`0`] += bsz*esz;
424	if (ptrs[`1`])
425	ptrs[`1`] += bsz;
426	}
427	}
428	}
429	else if (depth == CV_16F)
430	{
431	const size_t esz = src.elemSize();
432	const int total = (int)it.size;
433	const int blockSize = std::min(a: total, b: divUp(a: `1024`, b: cn));
434	AutoBuffer<float, `1026`/divUp(1024,3)3/> fltbuf(blockSize cn);
435	float* data0 = fltbuf.data();
436	for (size_t i = `0`; i < it.nplanes; i++, ++it)
437	{
438	for (int j = `0`; j < total; j += blockSize)
439	{
440	int bsz = std::min(a: total - j, b: blockSize);
441	hal::cvt16f32f(src: (const hfloat)ptrs[`0`], dst: data0, len: bsz cn);
442	func((uchar)data0, ptrs[`1`], (uchar)&result.f, bsz, cn);
443	ptrs[`0`] += bsz*esz;
444	if (ptrs[`1`])
445	ptrs[`1`] += bsz;
446	}
447	}
448	}
449	else
450	{
451	// generic implementation
452	for (size_t i = `0`; i < it.nplanes; i++, ++it)
453	{
454	func(ptrs[`0`], ptrs[`1`], (uchar)&result, (int*)it.size, cn);
455	}
456	}
457
458	if( normType == NORM_INF )
459	{
460	if(depth == CV_64F)
461	return result.d;
462	else if (depth == CV_32F \|\| depth == CV_16F)
463	return result.f;
464	else
465	return result.i;
466	}
467	else if( normType == NORM_L2 )
468	return std::sqrt(x: result.d);
469
470	return result.d;
471	}
472
473	//==================================================================================================
474
475	#ifdef HAVE_OPENCL
476	static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
477	{
478	#ifdef __ANDROID__
479	if (ocl::Device::getDefault().isNVidia())
480	return false;
481	#endif
482
483	Scalar sc1, sc2;
484	int cn = _src1.channels();
485	if (cn > `4`)
486	return false;
487	int type = _src1.type(), depth = CV_MAT_DEPTH(type);
488	bool relative = (normType & NORM_RELATIVE) != `0`;
489	normType &= ~NORM_RELATIVE;
490	bool normsum = normType == NORM_L1 \|\| normType == NORM_L2 \|\| normType == NORM_L2SQR;
491
492	#ifdef __APPLE__
493	if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
494	return false;
495	#endif
496
497	if (normsum)
498	{
499	if (!ocl_sum(src: _src1, res&: sc1, sum_op: normType == NORM_L2 \|\| normType == NORM_L2SQR ?
500	OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, calc2: relative, res2: sc2))
501	return false;
502	}
503	else
504	{
505	if (!ocl_minMaxIdx(src: _src1, NULL, maxVal: &sc1 [`0`], NULL, NULL, _mask, ddepth: std::max(CV_32S, b: depth),
506	absValues: false, _src2, maxVal2: relative ? &sc2 [`0`] : NULL))
507	return false;
508	cn = `1`;
509	}
510
511	double s2 = `0`;
512	for (int i = `0`; i < cn; ++i)
513	{
514	result += sc1 [i];
515	if (relative)
516	s2 += sc2 [i];
517	}
518
519	if (normType == NORM_L2)
520	{
521	result = std::sqrt(x: result);
522	if (relative)
523	s2 = std::sqrt(x: s2);
524	}
525
526	if (relative)
527	result /= (s2 + DBL_EPSILON);
528
529	return true;
530	} // ocl_norm()
531	#endif // HAVE_OPENCL
532
533	double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
534	{
535	CV_INSTRUMENT_REGION();
536
537	CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch");
538	CV_Assert(_src1.sameSize(_src2));
539
540	#if defined HAVE_OPENCL
541	double _result = `0`;
542	#endif
543
544	#ifdef HAVE_OPENCL
545	CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
546	ocl_norm(_src1, _src2, normType, _mask, result&: _result),
547	_result)
548	#endif
549
550	Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
551	int depth = src1.depth(), cn = src1.channels();
552	if( src1.dims <= `2` )
553	{
554	double result;
555	CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, src1.step, src2.data, src2.step, mask.data, mask.step, src1.cols, src1.rows, src1.type(), normType);
556	}
557	else if( src1.isContinuous() && src2.isContinuous() && mask.isContinuous() )
558	{
559	double result;
560	CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, `0`, src2.data, `0`, mask.data, `0`, (int)src1.total(), `1`, src1.type(), normType);
561	}
562
563	if( normType & CV_RELATIVE )
564	{
565	return norm(_src1, _src2, normType: normType & ~CV_RELATIVE, _mask)/(norm(src: _src2, normType, _mask) + DBL_EPSILON);
566	}
567
568	normType &= `7`;
569	CV_Assert( normType == NORM_INF \|\| normType == NORM_L1 \|\|
570	normType == NORM_L2 \|\| normType == NORM_L2SQR \|\|
571	((normType == NORM_HAMMING \|\| normType == NORM_HAMMING2) && src1.type() == CV_8U) );
572
573	NormDiffFunc func = getNormDiffFunc(normType: normType >> `1`, depth: depth == CV_16F ? CV_32F : depth);
574	CV_Assert( (normType >> `1`) >= `3` \|\| func != `0` );
575
576	if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
577	{
578	size_t len = src1.total()*src1.channels();
579	if( len == (size_t)(int)len )
580	{
581	if( src1.depth() == CV_32F )
582	{
583	const uchar* data1 = src1.ptr<const uchar>();
584	const uchar* data2 = src2.ptr<const uchar>();
585
586	if( normType == NORM_L2 \|\| normType == NORM_L2SQR \|\| normType == NORM_L1 )
587	{
588	double result = `0`;
589	func(data1, data2, `0`, (uchar)&result, (int*)len, `1`);
590	return normType == NORM_L2 ? std::sqrt(x: result) : result;
591	}
592	if( normType == NORM_INF )
593	{
594	float result = `0`;
595	func(data1, data2, `0`, (uchar)&result, (int*)len, `1`);
596	return result;
597	}
598	}
599	}
600	}
601
602	CV_Assert( mask.empty() \|\| mask.type() == CV_8U );
603
604	if( normType == NORM_HAMMING \|\| normType == NORM_HAMMING2 )
605	{
606	if( !mask.empty() )
607	{
608	Mat temp;
609	bitwise_xor(src1, src2, dst: temp);
610	bitwise_and(src1: temp, src2: mask, dst: temp);
611	return norm(src: temp, normType);
612	}
613	int cellSize = normType == NORM_HAMMING ? `1` : `2`;
614
615	const Mat* arrays[] = {&src1, &src2, `0`};
616	uchar* ptrs[`2`] = {};
617	NAryMatIterator it(arrays, ptrs);
618	int total = (int)it.size;
619	int result = `0`;
620
621	for( size_t i = `0`; i < it.nplanes; i++, ++it )
622	{
623	result += hal::normHamming(a: ptrs[`0`], b: ptrs[`1`], n: total, cellSize);
624	}
625
626	return result;
627	}
628
629	const Mat* arrays[] = {&src1, &src2, &mask, `0`};
630	uchar* ptrs[`3`] = {};
631	union
632	{
633	double d;
634	float f;
635	int i;
636	unsigned u;
637	}
638	result;
639	result.d = `0`;
640	NAryMatIterator it(arrays, ptrs);
641	CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
642
643	if ((normType == NORM_L1 && depth <= CV_16S) \|\|
644	((normType == NORM_L2 \|\| normType == NORM_L2SQR) && depth <= CV_8S))
645	{
646	// special case to handle "integer" overflow in accumulator
647	const size_t esz = src1.elemSize();
648	const int total = (int)it.size;
649	const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (`1` << `23`) : (`1` << `15`))/cn;
650	const int blockSize = std::min(a: total, b: intSumBlockSize);
651	int isum = `0`;
652	int count = `0`;
653
654	for (size_t i = `0`; i < it.nplanes; i++, ++it)
655	{
656	for (int j = `0`; j < total; j += blockSize)
657	{
658	int bsz = std::min(a: total - j, b: blockSize);
659	func(ptrs[`0`], ptrs[`1`], ptrs[`2`], (uchar*)&isum, bsz, cn);
660	count += bsz;
661	if (count + blockSize >= intSumBlockSize \|\| (i+`1` >= it.nplanes && j+bsz >= total))
662	{
663	result.d += isum;
664	isum = `0`;
665	count = `0`;
666	}
667	ptrs[`0`] += bsz*esz;
668	ptrs[`1`] += bsz*esz;
669	if (ptrs[`2`])
670	ptrs[`2`] += bsz;
671	}
672	}
673	}
674	else if (depth == CV_16F)
675	{
676	const size_t esz = src1.elemSize();
677	const int total = (int)it.size;
678	const int blockSize = std::min(a: total, b: divUp(a: `512`, b: cn));
679	AutoBuffer<float, `1026`/divUp(512,3)32/> fltbuf(blockSize * cn * `2`);
680	float* data0 = fltbuf.data();
681	float* data1 = fltbuf.data() + blockSize * cn;
682	for (size_t i = `0`; i < it.nplanes; i++, ++it)
683	{
684	for (int j = `0`; j < total; j += blockSize)
685	{
686	int bsz = std::min(a: total - j, b: blockSize);
687	hal::cvt16f32f(src: (const hfloat)ptrs[`0`], dst: data0, len: bsz cn);
688	hal::cvt16f32f(src: (const hfloat)ptrs[`1`], dst: data1, len: bsz cn);
689	func((uchar)data0, (uchar)data1, ptrs[`2`], (uchar*)&result.f, bsz, cn);
690	ptrs[`0`] += bsz*esz;
691	ptrs[`1`] += bsz*esz;
692	if (ptrs[`2`])
693	ptrs[`2`] += bsz;
694	}
695	}
696	}
697	else
698	{
699	// generic implementation
700	for (size_t i = `0`; i < it.nplanes; i++, ++it)
701	{
702	func(ptrs[`0`], ptrs[`1`], ptrs[`2`], (uchar)&result, (int*)it.size, cn);
703	}
704	}
705
706	if( normType == NORM_INF )
707	{
708	if (depth == CV_64F)
709	return result.d;
710	else if (depth == CV_32F \|\| depth == CV_16F)
711	return result.f;
712	else
713	return result.u;
714	}
715	else if( normType == NORM_L2 )
716	return std::sqrt(x: result.d);
717
718	return result.d;
719	}
720
721	cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
722	{
723	return cv::hal::normHamming(a, b, n: size);
724	}
725
726	double PSNR(InputArray _src1, InputArray _src2, double R)
727	{
728	CV_INSTRUMENT_REGION();
729
730	//Input arrays must have depth CV_8U
731	CV_Assert( _src1.type() == _src2.type() );
732
733	double diff = std::sqrt(x: norm(_src1, _src2, normType: NORM_L2SQR)/(_src1.total()*_src1.channels()));
734	return `20`*log10(x: R/(diff+DBL_EPSILON));
735	}
736
737
738	#ifdef HAVE_OPENCL
739	static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
740	double scale, double delta )
741	{
742	UMat src = _src.getUMat();
743
744	if( _mask.empty() )
745	src.convertTo( m: _dst, rtype: dtype, alpha: scale, beta: delta );
746	else if (src.channels() <= `4`)
747	{
748	const ocl::Device & dev = ocl::Device::getDefault();
749
750	int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
751	ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, b: std::max(a: sdepth, b: ddepth)),
752	rowsPerWI = dev.isIntel() ? `4` : `1`;
753
754	float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
755	bool haveScale = std::fabs(x: scale - `1`) > DBL_EPSILON,
756	haveZeroScale = !(std::fabs(x: scale) > DBL_EPSILON),
757	haveDelta = std::fabs(x: delta) > DBL_EPSILON,
758	doubleSupport = dev.doubleFPConfig() > `0`;
759
760	if (!haveScale && !haveDelta && stype == dtype)
761	{
762	_src.copyTo(arr: _dst, mask: _mask);
763	return true;
764	}
765	if (haveZeroScale)
766	{
767	_dst.setTo(value: Scalar (delta), mask: _mask);
768	return true;
769	}
770
771	if ((sdepth == CV_64F \|\| ddepth == CV_64F) && !doubleSupport)
772	return false;
773
774	char cvt[`2`][`50`];
775	String opts = format(fmt: "-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
776	" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
777	ocl::typeToStr(t: stype), ocl::typeToStr(t: dtype),
778	ocl::convertTypeStr(sdepth, ddepth: wdepth, cn, buf: cvt[`0`], buf_size: sizeof(cvt[`0`])), cn,
779	rowsPerWI, ocl::convertTypeStr(sdepth: wdepth, ddepth, cn, buf: cvt[`1`], buf_size: sizeof(cvt[`1`])),
780	ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
781	doubleSupport ? " -D DOUBLE_SUPPORT" : "",
782	haveScale ? " -D HAVE_SCALE" : "",
783	haveDelta ? " -D HAVE_DELTA" : "",
784	ocl::typeToStr(t: sdepth), ocl::typeToStr(t: ddepth));
785
786	ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
787	if (k.empty())
788	return false;
789
790	UMat mask = _mask.getUMat(), dst = _dst.getUMat();
791
792	ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src),
793	maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask),
794	dstarg = ocl::KernelArg::ReadWrite(m: dst);
795
796	if (haveScale)
797	{
798	if (haveDelta)
799	k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale, kernel_args: fdelta);
800	else
801	k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale);
802	}
803	else
804	{
805	if (haveDelta)
806	k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fdelta);
807	else
808	k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg);
809	}
810
811	size_t globalsize[`2`] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - `1`) / rowsPerWI };
812	return k.run(dims: `2`, globalsize, NULL, sync: false);
813	}
814	else
815	{
816	UMat temp;
817	src.convertTo( m: temp, rtype: dtype, alpha: scale, beta: delta );
818	temp.copyTo( m: _dst, mask: _mask );
819	}
820
821	return true;
822	} // ocl_normalize
823	#endif // HAVE_OPENCL
824
825	void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
826	int norm_type, int rtype, InputArray _mask)
827	{
828	CV_INSTRUMENT_REGION();
829
830	double scale = `1`, shift = `0`;
831	int type = _src.type(), depth = CV_MAT_DEPTH(type);
832
833	if( rtype < `0` )
834	rtype = _dst.fixedType() ? _dst.depth() : depth;
835
836	if( norm_type == CV_MINMAX )
837	{
838	double smin = `0`, smax = `0`;
839	double dmin = MIN( a, b ), dmax = MAX( a, b );
840	minMaxIdx( src: _src, minVal: &smin, maxVal: &smax, minIdx: `0`, maxIdx: `0`, mask: _mask );
841	scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? `1.`/(smax - smin) : `0`);
842	if( rtype == CV_32F )
843	{
844	scale = (float)scale;
845	shift = (float)dmin - (float)(smin*scale);
846	}
847	else
848	shift = dmin - smin*scale;
849	}
850	else if( norm_type == CV_L2 \|\| norm_type == CV_L1 \|\| norm_type == CV_C )
851	{
852	scale = norm( _src, normType: norm_type, _mask );
853	scale = scale > DBL_EPSILON ? a/scale : `0.`;
854	shift = `0`;
855	}
856	else
857	CV_Error( cv::Error::StsBadArg, "Unknown/unsupported norm type" );
858
859	CV_OCL_RUN(_dst.isUMat(),
860	ocl_normalize(_src, _dst, _mask, dtype: rtype, scale, delta: shift))
861
862	Mat src = _src.getMat();
863	if( _mask.empty() )
864	src.convertTo( m: _dst, rtype, alpha: scale, beta: shift );
865	else
866	{
867	Mat temp;
868	src.convertTo( m: temp, rtype, alpha: scale, beta: shift );
869	temp.copyTo( m: _dst, mask: _mask );
870	}
871	}
872
873	} // namespace
874

source code of opencv/modules/core/src/norm.dispatch.cpp