convert_scale.simd.hpp source code [opencv/modules/core/src/convert_scale.simd.hpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html
4
5
6	#include "precomp.hpp"
7	#include "convert.hpp"
8
9	namespace cv {
10	CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
11
12	BinaryFunc getCvtScaleAbsFunc(int depth);
13	BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
14
15	#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
16
17	/**************************************************************************************\
18	* convertScale[Abs] *
19	\**************************************************************************************/
20
21	template<typename _Ts, typename _Td> inline void
22	cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
23	Size size, float a, float b )
24	{
25	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
26	v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
27	const int VECSZ = VTraits<v_float32>::vlanes()*`2`;
28	#endif
29	sstep /= sizeof(src[`0`]);
30	dstep /= sizeof(dst[`0`]);
31
32	for( int i = `0`; i < size.height; i++, src += sstep, dst += dstep )
33	{
34	int j = `0`;
35	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
36	for( ; j < size.width; j += VECSZ )
37	{
38	if( j > size.width - VECSZ )
39	{
40	if( j == `0` \|\| src == (_Ts*)dst )
41	break;
42	j = size.width - VECSZ;
43	}
44	v_float32 v0, v1;
45	vx_load_pair_as(src + j, v0, v1);
46	v0 = v_fma(a: v0, b: va, c: vb);
47	v1 = v_fma(a: v1, b: va, c: vb);
48	v_store_pair_as(dst + j, v_abs(x: v0), v_abs(x: v1));
49	}
50	#endif
51	for( ; j < size.width; j++ )
52	dst[j] = saturate_cast<_Td>(std::abs(src[j]*a + b));
53	}
54	}
55
56	// variant for conversions 16f <-> ... w/o unrolling
57	template<typename _Ts, typename _Td> inline void
58	cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
59	Size size, float a, float b )
60	{
61	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
62	v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
63	const int VECSZ = VTraits<v_float32>::vlanes()*`2`;
64	#endif
65	sstep /= sizeof(src[`0`]);
66	dstep /= sizeof(dst[`0`]);
67
68	for( int i = `0`; i < size.height; i++, src += sstep, dst += dstep )
69	{
70	int j = `0`;
71	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
72	for( ; j < size.width; j += VECSZ )
73	{
74	if( j > size.width - VECSZ )
75	{
76	if( j == `0` \|\| src == (_Ts*)dst )
77	break;
78	j = size.width - VECSZ;
79	}
80	v_float32 v0;
81	vx_load_as(src + j, v0);
82	v0 = v_fma(a: v0, b: va, c: vb);
83	v_store_as(dst + j, v_abs(x: v0));
84	}
85	#endif
86	for( ; j < size.width; j++ )
87	dst[j] = saturate_cast<_Td>(src[j]*a + b);
88	}
89	}
90
91	template<typename _Ts, typename _Td> inline void
92	cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
93	Size size, float a, float b )
94	{
95	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
96	v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
97	const int VECSZ = VTraits<v_float32>::vlanes()*`2`;
98	#endif
99	sstep /= sizeof(src[`0`]);
100	dstep /= sizeof(dst[`0`]);
101
102	for( int i = `0`; i < size.height; i++, src += sstep, dst += dstep )
103	{
104	int j = `0`;
105	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
106	for( ; j < size.width; j += VECSZ )
107	{
108	if( j > size.width - VECSZ )
109	{
110	if( j == `0` \|\| src == (_Ts*)dst )
111	break;
112	j = size.width - VECSZ;
113	}
114	v_float32 v0, v1;
115	vx_load_pair_as(src + j, v0, v1);
116	v0 = v_fma(a: v0, b: va, c: vb);
117	v1 = v_fma(a: v1, b: va, c: vb);
118	v_store_pair_as(dst + j, v0, v1);
119	}
120	#endif
121	for( ; j < size.width; j++ )
122	dst[j] = saturate_cast<_Td>(src[j]*a + b);
123	}
124	}
125
126	// variant for conversions 16f <-> ... w/o unrolling
127	template<typename _Ts, typename _Td> inline void
128	cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
129	Size size, float a, float b )
130	{
131	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
132	v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
133	const int VECSZ = VTraits<v_float32>::vlanes();
134	#endif
135	sstep /= sizeof(src[`0`]);
136	dstep /= sizeof(dst[`0`]);
137
138	for( int i = `0`; i < size.height; i++, src += sstep, dst += dstep )
139	{
140	int j = `0`;
141	#if (CV_SIMD \|\| CV_SIMD_SCALABLE)
142	for( ; j < size.width; j += VECSZ )
143	{
144	if( j > size.width - VECSZ )
145	{
146	if( j == `0` \|\| src == (_Ts*)dst )
147	break;
148	j = size.width - VECSZ;
149	}
150	v_float32 v0;
151	vx_load_as(src + j, v0);
152	v0 = v_fma(a: v0, b: va, c: vb);
153	v_store_as(dst + j, v0);
154	}
155	#endif
156	for( ; j < size.width; j++ )
157	dst[j] = saturate_cast<_Td>(src[j]*a + b);
158	}
159	}
160
161
162	template<typename _Ts, typename _Td> inline void
163	cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
164	Size size, double a, double b )
165	{
166	#if (CV_SIMD_64F \|\| CV_SIMD_SCALABLE_64F)
167	v_float64 va = vx_setall_f64(v: a), vb = vx_setall_f64(v: b);
168	const int VECSZ = VTraits<v_float64>::vlanes()*`2`;
169	#endif
170	sstep /= sizeof(src[`0`]);
171	dstep /= sizeof(dst[`0`]);
172
173	for( int i = `0`; i < size.height; i++, src += sstep, dst += dstep )
174	{
175	int j = `0`;
176	#if (CV_SIMD_64F \|\| CV_SIMD_SCALABLE_64F)
177	for( ; j < size.width; j += VECSZ )
178	{
179	if( j > size.width - VECSZ )
180	{
181	if( j == `0` \|\| src == (_Ts*)dst )
182	break;
183	j = size.width - VECSZ;
184	}
185	v_float64 v0, v1;
186	vx_load_pair_as(src + j, v0, v1);
187	v0 = v_fma(a: v0, b: va, c: vb);
188	v1 = v_fma(a: v1, b: va, c: vb);
189	v_store_pair_as(dst + j, v0, v1);
190	}
191	#endif
192	for( ; j < size.width; j++ )
193	dst[j] = saturate_cast<_Td>(src[j]*a + b);
194	}
195	}
196
197	//==================================================================================================
198
199	#define DEF_CVT_SCALE_ABS_FUNC(suffix, cvt, stype, dtype, wtype) \
200	static void cvtScaleAbs##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
201	uchar* dst_, size_t dstep, Size size, void* scale_) \
202	{ \
203	const stype* src = (const stype*)src_; \
204	dtype* dst = (dtype*)dst_; \
205	double* scale = (double*)scale_; \
206	cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
207	}
208
209
210	#define DEF_CVT_SCALE_FUNC(suffix, cvt, stype, dtype, wtype) \
211	static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
212	uchar* dst_, size_t dstep, Size size, void* scale_) \
213	{ \
214	const stype* src = (const stype*)src_; \
215	dtype* dst = (dtype*)dst_; \
216	double* scale = (double*)scale_; \
217	cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
218	}
219
220	DEF_CVT_SCALE_ABS_FUNC(`8u`, cvtabs_32f, uchar, uchar, float)
221	DEF_CVT_SCALE_ABS_FUNC(`8s8u`, cvtabs_32f, schar, uchar, float)
222	DEF_CVT_SCALE_ABS_FUNC(`16u8u`, cvtabs_32f, ushort, uchar, float)
223	DEF_CVT_SCALE_ABS_FUNC(`16s8u`, cvtabs_32f, short, uchar, float)
224	DEF_CVT_SCALE_ABS_FUNC(`32s8u`, cvtabs_32f, int, uchar, float)
225	DEF_CVT_SCALE_ABS_FUNC(`32f8u`, cvtabs_32f, float, uchar, float)
226	DEF_CVT_SCALE_ABS_FUNC(`64f8u`, cvtabs_32f, double, uchar, float)
227
228	DEF_CVT_SCALE_FUNC(`8u`, cvt_32f, uchar, uchar, float)
229	DEF_CVT_SCALE_FUNC(`8s8u`, cvt_32f, schar, uchar, float)
230	DEF_CVT_SCALE_FUNC(`16u8u`, cvt_32f, ushort, uchar, float)
231	DEF_CVT_SCALE_FUNC(`16s8u`, cvt_32f, short, uchar, float)
232	DEF_CVT_SCALE_FUNC(`32s8u`, cvt_32f, int, uchar, float)
233	DEF_CVT_SCALE_FUNC(`32f8u`, cvt_32f, float, uchar, float)
234	DEF_CVT_SCALE_FUNC(`64f8u`, cvt_32f, double, uchar, float)
235	DEF_CVT_SCALE_FUNC(`16f8u`, cvt_32f, hfloat, uchar, float)
236
237	DEF_CVT_SCALE_FUNC(`8u8s`, cvt_32f, uchar, schar, float)
238	DEF_CVT_SCALE_FUNC(`8s`, cvt_32f, schar, schar, float)
239	DEF_CVT_SCALE_FUNC(`16u8s`, cvt_32f, ushort, schar, float)
240	DEF_CVT_SCALE_FUNC(`16s8s`, cvt_32f, short, schar, float)
241	DEF_CVT_SCALE_FUNC(`32s8s`, cvt_32f, int, schar, float)
242	DEF_CVT_SCALE_FUNC(`32f8s`, cvt_32f, float, schar, float)
243	DEF_CVT_SCALE_FUNC(`64f8s`, cvt_32f, double, schar, float)
244	DEF_CVT_SCALE_FUNC(`16f8s`, cvt_32f, hfloat, schar, float)
245
246	DEF_CVT_SCALE_FUNC(`8u16u`, cvt_32f, uchar, ushort, float)
247	DEF_CVT_SCALE_FUNC(`8s16u`, cvt_32f, schar, ushort, float)
248	DEF_CVT_SCALE_FUNC(`16u`, cvt_32f, ushort, ushort, float)
249	DEF_CVT_SCALE_FUNC(`16s16u`, cvt_32f, short, ushort, float)
250	DEF_CVT_SCALE_FUNC(`32s16u`, cvt_32f, int, ushort, float)
251	DEF_CVT_SCALE_FUNC(`32f16u`, cvt_32f, float, ushort, float)
252	DEF_CVT_SCALE_FUNC(`64f16u`, cvt_32f, double, ushort, float)
253	DEF_CVT_SCALE_FUNC(`16f16u`, cvt1_32f, hfloat, ushort, float)
254
255	DEF_CVT_SCALE_FUNC(`8u16s`, cvt_32f, uchar, short, float)
256	DEF_CVT_SCALE_FUNC(`8s16s`, cvt_32f, schar, short, float)
257	DEF_CVT_SCALE_FUNC(`16u16s`, cvt_32f, ushort, short, float)
258	DEF_CVT_SCALE_FUNC(`16s`, cvt_32f, short, short, float)
259	DEF_CVT_SCALE_FUNC(`32s16s`, cvt_32f, int, short, float)
260	DEF_CVT_SCALE_FUNC(`32f16s`, cvt_32f, float, short, float)
261	DEF_CVT_SCALE_FUNC(`64f16s`, cvt_32f, double, short, float)
262	DEF_CVT_SCALE_FUNC(`16f16s`, cvt1_32f, hfloat, short, float)
263
264	DEF_CVT_SCALE_FUNC(`8u32s`, cvt_32f, uchar, int, float)
265	DEF_CVT_SCALE_FUNC(`8s32s`, cvt_32f, schar, int, float)
266	DEF_CVT_SCALE_FUNC(`16u32s`, cvt_32f, ushort, int, float)
267	DEF_CVT_SCALE_FUNC(`16s32s`, cvt_32f, short, int, float)
268	DEF_CVT_SCALE_FUNC(`32s`, cvt_64f, int, int, double)
269	DEF_CVT_SCALE_FUNC(`32f32s`, cvt_32f, float, int, float)
270	DEF_CVT_SCALE_FUNC(`64f32s`, cvt_64f, double, int, double)
271	DEF_CVT_SCALE_FUNC(`16f32s`, cvt1_32f, hfloat, int, float)
272
273	DEF_CVT_SCALE_FUNC(`8u32f`, cvt_32f, uchar, float, float)
274	DEF_CVT_SCALE_FUNC(`8s32f`, cvt_32f, schar, float, float)
275	DEF_CVT_SCALE_FUNC(`16u32f`, cvt_32f, ushort, float, float)
276	DEF_CVT_SCALE_FUNC(`16s32f`, cvt_32f, short, float, float)
277	DEF_CVT_SCALE_FUNC(`32s32f`, cvt_32f, int, float, float)
278	DEF_CVT_SCALE_FUNC(`32f`, cvt_32f, float, float, float)
279	DEF_CVT_SCALE_FUNC(`64f32f`, cvt_64f, double, float, double)
280	DEF_CVT_SCALE_FUNC(`16f32f`, cvt1_32f, hfloat, float, float)
281
282	DEF_CVT_SCALE_FUNC(`8u64f`, cvt_64f, uchar, double, double)
283	DEF_CVT_SCALE_FUNC(`8s64f`, cvt_64f, schar, double, double)
284	DEF_CVT_SCALE_FUNC(`16u64f`, cvt_64f, ushort, double, double)
285	DEF_CVT_SCALE_FUNC(`16s64f`, cvt_64f, short, double, double)
286	DEF_CVT_SCALE_FUNC(`32s64f`, cvt_64f, int, double, double)
287	DEF_CVT_SCALE_FUNC(`32f64f`, cvt_64f, float, double, double)
288	DEF_CVT_SCALE_FUNC(`64f`, cvt_64f, double, double, double)
289	DEF_CVT_SCALE_FUNC(`16f64f`, cvt_64f, hfloat, double, double)
290
291	DEF_CVT_SCALE_FUNC(`8u16f`, cvt1_32f, uchar, hfloat, float)
292	DEF_CVT_SCALE_FUNC(`8s16f`, cvt1_32f, schar, hfloat, float)
293	DEF_CVT_SCALE_FUNC(`16u16f`, cvt1_32f, ushort, hfloat, float)
294	DEF_CVT_SCALE_FUNC(`16s16f`, cvt1_32f, short, hfloat, float)
295	DEF_CVT_SCALE_FUNC(`32s16f`, cvt1_32f, int, hfloat, float)
296	DEF_CVT_SCALE_FUNC(`32f16f`, cvt1_32f, float, hfloat, float)
297	DEF_CVT_SCALE_FUNC(`64f16f`, cvt_64f, double, hfloat, double)
298	DEF_CVT_SCALE_FUNC(`16f`, cvt1_32f, hfloat, hfloat, float)
299
300	BinaryFunc getCvtScaleAbsFunc(int depth)
301	{
302	static BinaryFunc cvtScaleAbsTab[CV_DEPTH_MAX] =
303	{
304	(BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
305	(BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
306	(BinaryFunc)cvtScaleAbs64f8u, `0`
307	};
308
309	return cvtScaleAbsTab[depth];
310	}
311
312	BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
313	{
314	static BinaryFunc cvtScaleTab[CV_DEPTH_MAX][CV_DEPTH_MAX] =
315	{
316	{
317	(BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
318	(BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
319	(BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u
320	},
321	{
322	(BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
323	(BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
324	(BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s
325	},
326	{
327	(BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
328	(BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
329	(BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u
330	},
331	{
332	(BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
333	(BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
334	(BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s
335	},
336	{
337	(BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
338	(BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
339	(BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s
340	},
341	{
342	(BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
343	(BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
344	(BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f
345	},
346	{
347	(BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
348	(BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
349	(BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f
350	},
351	{
352	(BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
353	(BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f,
354	(BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f
355	},
356	};
357
358	return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
359	}
360
361	#endif
362
363	CV_CPU_OPTIMIZATION_NAMESPACE_END
364	} // namespace
365

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of opencv/modules/core/src/convert_scale.simd.hpp