1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html
4
5
6#include "precomp.hpp"
7#include "convert.hpp"
8
9namespace cv {
10CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
11
12BinaryFunc getCvtScaleAbsFunc(int depth);
13BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
14
15#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
16
17/****************************************************************************************\
18* convertScale[Abs] *
19\****************************************************************************************/
20
21template<typename _Ts, typename _Td> inline void
22cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
23 Size size, float a, float b )
24{
25#if (CV_SIMD || CV_SIMD_SCALABLE)
26 v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
27 const int VECSZ = VTraits<v_float32>::vlanes()*2;
28#endif
29 sstep /= sizeof(src[0]);
30 dstep /= sizeof(dst[0]);
31
32 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
33 {
34 int j = 0;
35#if (CV_SIMD || CV_SIMD_SCALABLE)
36 for( ; j < size.width; j += VECSZ )
37 {
38 if( j > size.width - VECSZ )
39 {
40 if( j == 0 || src == (_Ts*)dst )
41 break;
42 j = size.width - VECSZ;
43 }
44 v_float32 v0, v1;
45 vx_load_pair_as(src + j, v0, v1);
46 v0 = v_fma(a: v0, b: va, c: vb);
47 v1 = v_fma(a: v1, b: va, c: vb);
48 v_store_pair_as(dst + j, v_abs(x: v0), v_abs(x: v1));
49 }
50#endif
51 for( ; j < size.width; j++ )
52 dst[j] = saturate_cast<_Td>(std::abs(src[j]*a + b));
53 }
54}
55
56// variant for conversions 16f <-> ... w/o unrolling
57template<typename _Ts, typename _Td> inline void
58cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
59 Size size, float a, float b )
60{
61#if (CV_SIMD || CV_SIMD_SCALABLE)
62 v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
63 const int VECSZ = VTraits<v_float32>::vlanes()*2;
64#endif
65 sstep /= sizeof(src[0]);
66 dstep /= sizeof(dst[0]);
67
68 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
69 {
70 int j = 0;
71#if (CV_SIMD || CV_SIMD_SCALABLE)
72 for( ; j < size.width; j += VECSZ )
73 {
74 if( j > size.width - VECSZ )
75 {
76 if( j == 0 || src == (_Ts*)dst )
77 break;
78 j = size.width - VECSZ;
79 }
80 v_float32 v0;
81 vx_load_as(src + j, v0);
82 v0 = v_fma(a: v0, b: va, c: vb);
83 v_store_as(dst + j, v_abs(x: v0));
84 }
85#endif
86 for( ; j < size.width; j++ )
87 dst[j] = saturate_cast<_Td>(src[j]*a + b);
88 }
89}
90
91template<typename _Ts, typename _Td> inline void
92cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
93 Size size, float a, float b )
94{
95#if (CV_SIMD || (CV_SIMD_SCALABLE && !(defined(__GNUC__) && !defined(__clang__))) )
96 v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
97 const int VECSZ = VTraits<v_float32>::vlanes()*2;
98#endif
99 sstep /= sizeof(src[0]);
100 dstep /= sizeof(dst[0]);
101
102 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
103 {
104 int j = 0;
105// Excluding GNU in CV_SIMD_SCALABLE because of "opencv/issues/26936"
106#if (CV_SIMD || (CV_SIMD_SCALABLE && !(defined(__GNUC__) && !defined(__clang__))) )
107 for( ; j < size.width; j += VECSZ )
108 {
109 if( j > size.width - VECSZ )
110 {
111 if( j == 0 || src == (_Ts*)dst )
112 break;
113 j = size.width - VECSZ;
114 }
115 v_float32 v0, v1;
116 vx_load_pair_as(src + j, v0, v1);
117 v0 = v_fma(a: v0, b: va, c: vb);
118 v1 = v_fma(a: v1, b: va, c: vb);
119 v_store_pair_as(dst + j, v0, v1);
120 }
121#endif
122 for( ; j < size.width; j++ )
123 dst[j] = saturate_cast<_Td>(src[j]*a + b);
124 }
125}
126
127// variant for conversions 16f <-> ... w/o unrolling
128template<typename _Ts, typename _Td> inline void
129cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
130 Size size, float a, float b )
131{
132#if (CV_SIMD || CV_SIMD_SCALABLE)
133 v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b);
134 const int VECSZ = VTraits<v_float32>::vlanes();
135#endif
136 sstep /= sizeof(src[0]);
137 dstep /= sizeof(dst[0]);
138
139 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
140 {
141 int j = 0;
142#if (CV_SIMD || CV_SIMD_SCALABLE)
143 for( ; j < size.width; j += VECSZ )
144 {
145 if( j > size.width - VECSZ )
146 {
147 if( j == 0 || src == (_Ts*)dst )
148 break;
149 j = size.width - VECSZ;
150 }
151 v_float32 v0;
152 vx_load_as(src + j, v0);
153 v0 = v_fma(a: v0, b: va, c: vb);
154 v_store_as(dst + j, v0);
155 }
156#endif
157 for( ; j < size.width; j++ )
158 dst[j] = saturate_cast<_Td>(src[j]*a + b);
159 }
160}
161
162
163template<typename _Ts, typename _Td> inline void
164cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
165 Size size, double a, double b )
166{
167#if (CV_SIMD_64F || (CV_SIMD_SCALABLE_64F && !(defined(__GNUC__) && !defined(__clang__))) )
168 v_float64 va = vx_setall_f64(v: a), vb = vx_setall_f64(v: b);
169 const int VECSZ = VTraits<v_float64>::vlanes()*2;
170#endif
171 sstep /= sizeof(src[0]);
172 dstep /= sizeof(dst[0]);
173
174 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
175 {
176 int j = 0;
177// Excluding GNU in CV_SIMD_SCALABLE because of "opencv/issues/26936"
178#if (CV_SIMD_64F || (CV_SIMD_SCALABLE_64F && !(defined(__GNUC__) && !defined(__clang__))) )
179 for( ; j < size.width; j += VECSZ )
180 {
181 if( j > size.width - VECSZ )
182 {
183 if( j == 0 || src == (_Ts*)dst )
184 break;
185 j = size.width - VECSZ;
186 }
187 v_float64 v0, v1;
188 vx_load_pair_as(src + j, v0, v1);
189 v0 = v_fma(a: v0, b: va, c: vb);
190 v1 = v_fma(a: v1, b: va, c: vb);
191 v_store_pair_as(dst + j, v0, v1);
192 }
193#endif
194 for( ; j < size.width; j++ )
195 dst[j] = saturate_cast<_Td>(src[j]*a + b);
196 }
197}
198
199//==================================================================================================
200
201#define DEF_CVT_SCALE_ABS_FUNC(suffix, cvt, stype, dtype, wtype) \
202static void cvtScaleAbs##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
203 uchar* dst_, size_t dstep, Size size, void* scale_) \
204{ \
205 const stype* src = (const stype*)src_; \
206 dtype* dst = (dtype*)dst_; \
207 double* scale = (double*)scale_; \
208 cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
209}
210
211
212#define DEF_CVT_SCALE_FUNC(suffix, cvt, stype, dtype, wtype) \
213static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
214 uchar* dst_, size_t dstep, Size size, void* scale_) \
215{ \
216 const stype* src = (const stype*)src_; \
217 dtype* dst = (dtype*)dst_; \
218 double* scale = (double*)scale_; \
219 cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
220}
221
222DEF_CVT_SCALE_ABS_FUNC(8u, cvtabs_32f, uchar, uchar, float)
223DEF_CVT_SCALE_ABS_FUNC(8s8u, cvtabs_32f, schar, uchar, float)
224DEF_CVT_SCALE_ABS_FUNC(16u8u, cvtabs_32f, ushort, uchar, float)
225DEF_CVT_SCALE_ABS_FUNC(16s8u, cvtabs_32f, short, uchar, float)
226DEF_CVT_SCALE_ABS_FUNC(32s8u, cvtabs_32f, int, uchar, float)
227DEF_CVT_SCALE_ABS_FUNC(32f8u, cvtabs_32f, float, uchar, float)
228DEF_CVT_SCALE_ABS_FUNC(64f8u, cvtabs_32f, double, uchar, float)
229
230DEF_CVT_SCALE_FUNC(8u, cvt_32f, uchar, uchar, float)
231DEF_CVT_SCALE_FUNC(8s8u, cvt_32f, schar, uchar, float)
232DEF_CVT_SCALE_FUNC(16u8u, cvt_32f, ushort, uchar, float)
233DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float)
234DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float)
235DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float)
236DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float)
237DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, hfloat, uchar, float)
238
239DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float)
240DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float)
241DEF_CVT_SCALE_FUNC(16u8s, cvt_32f, ushort, schar, float)
242DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float)
243DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float)
244DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float)
245DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float)
246DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, hfloat, schar, float)
247
248DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float)
249DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float)
250DEF_CVT_SCALE_FUNC(16u, cvt_32f, ushort, ushort, float)
251DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float)
252DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float)
253DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float)
254DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
255DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, hfloat, ushort, float)
256
257DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float)
258DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float)
259DEF_CVT_SCALE_FUNC(16u16s, cvt_32f, ushort, short, float)
260DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float)
261DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float)
262DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float)
263DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
264DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, hfloat, short, float)
265
266DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float)
267DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float)
268DEF_CVT_SCALE_FUNC(16u32s, cvt_32f, ushort, int, float)
269DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float)
270DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double)
271DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float)
272DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
273DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, hfloat, int, float)
274
275DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float)
276DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float)
277DEF_CVT_SCALE_FUNC(16u32f, cvt_32f, ushort, float, float)
278DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float)
279DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float)
280DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float)
281DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
282DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, hfloat, float, float)
283
284DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double)
285DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double)
286DEF_CVT_SCALE_FUNC(16u64f, cvt_64f, ushort, double, double)
287DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double)
288DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double)
289DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double)
290DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double)
291DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, hfloat, double, double)
292
293DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, hfloat, float)
294DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, hfloat, float)
295DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, hfloat, float)
296DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, hfloat, float)
297DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, hfloat, float)
298DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, hfloat, float)
299DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, hfloat, double)
300DEF_CVT_SCALE_FUNC(16f, cvt1_32f, hfloat, hfloat, float)
301
302BinaryFunc getCvtScaleAbsFunc(int depth)
303{
304 static BinaryFunc cvtScaleAbsTab[CV_DEPTH_MAX] =
305 {
306 (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
307 (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
308 (BinaryFunc)cvtScaleAbs64f8u, 0
309 };
310
311 return cvtScaleAbsTab[depth];
312}
313
314BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
315{
316 static BinaryFunc cvtScaleTab[CV_DEPTH_MAX][CV_DEPTH_MAX] =
317 {
318 {
319 (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
320 (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
321 (BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u
322 },
323 {
324 (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
325 (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
326 (BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s
327 },
328 {
329 (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
330 (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
331 (BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u
332 },
333 {
334 (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
335 (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
336 (BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s
337 },
338 {
339 (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
340 (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
341 (BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s
342 },
343 {
344 (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
345 (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
346 (BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f
347 },
348 {
349 (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
350 (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
351 (BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f
352 },
353 {
354 (BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
355 (BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f,
356 (BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f
357 },
358 };
359
360 return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
361}
362
363#endif
364
365CV_CPU_OPTIMIZATION_NAMESPACE_END
366} // namespace
367

source code of opencv/modules/core/src/convert_scale.simd.hpp