1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html |
4 | |
5 | |
6 | #include "precomp.hpp" |
7 | #include "convert.hpp" |
8 | |
9 | namespace cv { |
10 | CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
11 | |
12 | BinaryFunc getCvtScaleAbsFunc(int depth); |
13 | BinaryFunc getConvertScaleFunc(int sdepth, int ddepth); |
14 | |
15 | #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
16 | |
17 | /****************************************************************************************\ |
18 | * convertScale[Abs] * |
19 | \****************************************************************************************/ |
20 | |
21 | template<typename _Ts, typename _Td> inline void |
22 | cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, |
23 | Size size, float a, float b ) |
24 | { |
25 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
26 | v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b); |
27 | const int VECSZ = VTraits<v_float32>::vlanes()*2; |
28 | #endif |
29 | sstep /= sizeof(src[0]); |
30 | dstep /= sizeof(dst[0]); |
31 | |
32 | for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) |
33 | { |
34 | int j = 0; |
35 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
36 | for( ; j < size.width; j += VECSZ ) |
37 | { |
38 | if( j > size.width - VECSZ ) |
39 | { |
40 | if( j == 0 || src == (_Ts*)dst ) |
41 | break; |
42 | j = size.width - VECSZ; |
43 | } |
44 | v_float32 v0, v1; |
45 | vx_load_pair_as(src + j, v0, v1); |
46 | v0 = v_fma(a: v0, b: va, c: vb); |
47 | v1 = v_fma(a: v1, b: va, c: vb); |
48 | v_store_pair_as(dst + j, v_abs(x: v0), v_abs(x: v1)); |
49 | } |
50 | #endif |
51 | for( ; j < size.width; j++ ) |
52 | dst[j] = saturate_cast<_Td>(std::abs(src[j]*a + b)); |
53 | } |
54 | } |
55 | |
56 | // variant for conversions 16f <-> ... w/o unrolling |
57 | template<typename _Ts, typename _Td> inline void |
58 | cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, |
59 | Size size, float a, float b ) |
60 | { |
61 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
62 | v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b); |
63 | const int VECSZ = VTraits<v_float32>::vlanes()*2; |
64 | #endif |
65 | sstep /= sizeof(src[0]); |
66 | dstep /= sizeof(dst[0]); |
67 | |
68 | for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) |
69 | { |
70 | int j = 0; |
71 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
72 | for( ; j < size.width; j += VECSZ ) |
73 | { |
74 | if( j > size.width - VECSZ ) |
75 | { |
76 | if( j == 0 || src == (_Ts*)dst ) |
77 | break; |
78 | j = size.width - VECSZ; |
79 | } |
80 | v_float32 v0; |
81 | vx_load_as(src + j, v0); |
82 | v0 = v_fma(a: v0, b: va, c: vb); |
83 | v_store_as(dst + j, v_abs(x: v0)); |
84 | } |
85 | #endif |
86 | for( ; j < size.width; j++ ) |
87 | dst[j] = saturate_cast<_Td>(src[j]*a + b); |
88 | } |
89 | } |
90 | |
91 | template<typename _Ts, typename _Td> inline void |
92 | cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, |
93 | Size size, float a, float b ) |
94 | { |
95 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
96 | v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b); |
97 | const int VECSZ = VTraits<v_float32>::vlanes()*2; |
98 | #endif |
99 | sstep /= sizeof(src[0]); |
100 | dstep /= sizeof(dst[0]); |
101 | |
102 | for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) |
103 | { |
104 | int j = 0; |
105 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
106 | for( ; j < size.width; j += VECSZ ) |
107 | { |
108 | if( j > size.width - VECSZ ) |
109 | { |
110 | if( j == 0 || src == (_Ts*)dst ) |
111 | break; |
112 | j = size.width - VECSZ; |
113 | } |
114 | v_float32 v0, v1; |
115 | vx_load_pair_as(src + j, v0, v1); |
116 | v0 = v_fma(a: v0, b: va, c: vb); |
117 | v1 = v_fma(a: v1, b: va, c: vb); |
118 | v_store_pair_as(dst + j, v0, v1); |
119 | } |
120 | #endif |
121 | for( ; j < size.width; j++ ) |
122 | dst[j] = saturate_cast<_Td>(src[j]*a + b); |
123 | } |
124 | } |
125 | |
126 | // variant for conversions 16f <-> ... w/o unrolling |
127 | template<typename _Ts, typename _Td> inline void |
128 | cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, |
129 | Size size, float a, float b ) |
130 | { |
131 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
132 | v_float32 va = vx_setall_f32(v: a), vb = vx_setall_f32(v: b); |
133 | const int VECSZ = VTraits<v_float32>::vlanes(); |
134 | #endif |
135 | sstep /= sizeof(src[0]); |
136 | dstep /= sizeof(dst[0]); |
137 | |
138 | for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) |
139 | { |
140 | int j = 0; |
141 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
142 | for( ; j < size.width; j += VECSZ ) |
143 | { |
144 | if( j > size.width - VECSZ ) |
145 | { |
146 | if( j == 0 || src == (_Ts*)dst ) |
147 | break; |
148 | j = size.width - VECSZ; |
149 | } |
150 | v_float32 v0; |
151 | vx_load_as(src + j, v0); |
152 | v0 = v_fma(a: v0, b: va, c: vb); |
153 | v_store_as(dst + j, v0); |
154 | } |
155 | #endif |
156 | for( ; j < size.width; j++ ) |
157 | dst[j] = saturate_cast<_Td>(src[j]*a + b); |
158 | } |
159 | } |
160 | |
161 | |
162 | template<typename _Ts, typename _Td> inline void |
163 | cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, |
164 | Size size, double a, double b ) |
165 | { |
166 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
167 | v_float64 va = vx_setall_f64(v: a), vb = vx_setall_f64(v: b); |
168 | const int VECSZ = VTraits<v_float64>::vlanes()*2; |
169 | #endif |
170 | sstep /= sizeof(src[0]); |
171 | dstep /= sizeof(dst[0]); |
172 | |
173 | for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) |
174 | { |
175 | int j = 0; |
176 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
177 | for( ; j < size.width; j += VECSZ ) |
178 | { |
179 | if( j > size.width - VECSZ ) |
180 | { |
181 | if( j == 0 || src == (_Ts*)dst ) |
182 | break; |
183 | j = size.width - VECSZ; |
184 | } |
185 | v_float64 v0, v1; |
186 | vx_load_pair_as(src + j, v0, v1); |
187 | v0 = v_fma(a: v0, b: va, c: vb); |
188 | v1 = v_fma(a: v1, b: va, c: vb); |
189 | v_store_pair_as(dst + j, v0, v1); |
190 | } |
191 | #endif |
192 | for( ; j < size.width; j++ ) |
193 | dst[j] = saturate_cast<_Td>(src[j]*a + b); |
194 | } |
195 | } |
196 | |
197 | //================================================================================================== |
198 | |
199 | #define DEF_CVT_SCALE_ABS_FUNC(suffix, cvt, stype, dtype, wtype) \ |
200 | static void cvtScaleAbs##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \ |
201 | uchar* dst_, size_t dstep, Size size, void* scale_) \ |
202 | { \ |
203 | const stype* src = (const stype*)src_; \ |
204 | dtype* dst = (dtype*)dst_; \ |
205 | double* scale = (double*)scale_; \ |
206 | cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \ |
207 | } |
208 | |
209 | |
210 | #define DEF_CVT_SCALE_FUNC(suffix, cvt, stype, dtype, wtype) \ |
211 | static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \ |
212 | uchar* dst_, size_t dstep, Size size, void* scale_) \ |
213 | { \ |
214 | const stype* src = (const stype*)src_; \ |
215 | dtype* dst = (dtype*)dst_; \ |
216 | double* scale = (double*)scale_; \ |
217 | cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \ |
218 | } |
219 | |
220 | DEF_CVT_SCALE_ABS_FUNC(8u, cvtabs_32f, uchar, uchar, float) |
221 | DEF_CVT_SCALE_ABS_FUNC(8s8u, cvtabs_32f, schar, uchar, float) |
222 | DEF_CVT_SCALE_ABS_FUNC(16u8u, cvtabs_32f, ushort, uchar, float) |
223 | DEF_CVT_SCALE_ABS_FUNC(16s8u, cvtabs_32f, short, uchar, float) |
224 | DEF_CVT_SCALE_ABS_FUNC(32s8u, cvtabs_32f, int, uchar, float) |
225 | DEF_CVT_SCALE_ABS_FUNC(32f8u, cvtabs_32f, float, uchar, float) |
226 | DEF_CVT_SCALE_ABS_FUNC(64f8u, cvtabs_32f, double, uchar, float) |
227 | |
228 | DEF_CVT_SCALE_FUNC(8u, cvt_32f, uchar, uchar, float) |
229 | DEF_CVT_SCALE_FUNC(8s8u, cvt_32f, schar, uchar, float) |
230 | DEF_CVT_SCALE_FUNC(16u8u, cvt_32f, ushort, uchar, float) |
231 | DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float) |
232 | DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float) |
233 | DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float) |
234 | DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float) |
235 | DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, hfloat, uchar, float) |
236 | |
237 | DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float) |
238 | DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float) |
239 | DEF_CVT_SCALE_FUNC(16u8s, cvt_32f, ushort, schar, float) |
240 | DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float) |
241 | DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float) |
242 | DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float) |
243 | DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float) |
244 | DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, hfloat, schar, float) |
245 | |
246 | DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float) |
247 | DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float) |
248 | DEF_CVT_SCALE_FUNC(16u, cvt_32f, ushort, ushort, float) |
249 | DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float) |
250 | DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float) |
251 | DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float) |
252 | DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float) |
253 | DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, hfloat, ushort, float) |
254 | |
255 | DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float) |
256 | DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float) |
257 | DEF_CVT_SCALE_FUNC(16u16s, cvt_32f, ushort, short, float) |
258 | DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float) |
259 | DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float) |
260 | DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float) |
261 | DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float) |
262 | DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, hfloat, short, float) |
263 | |
264 | DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float) |
265 | DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float) |
266 | DEF_CVT_SCALE_FUNC(16u32s, cvt_32f, ushort, int, float) |
267 | DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float) |
268 | DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double) |
269 | DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float) |
270 | DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double) |
271 | DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, hfloat, int, float) |
272 | |
273 | DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float) |
274 | DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float) |
275 | DEF_CVT_SCALE_FUNC(16u32f, cvt_32f, ushort, float, float) |
276 | DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float) |
277 | DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float) |
278 | DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float) |
279 | DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double) |
280 | DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, hfloat, float, float) |
281 | |
282 | DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double) |
283 | DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double) |
284 | DEF_CVT_SCALE_FUNC(16u64f, cvt_64f, ushort, double, double) |
285 | DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double) |
286 | DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double) |
287 | DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double) |
288 | DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double) |
289 | DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, hfloat, double, double) |
290 | |
291 | DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, hfloat, float) |
292 | DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, hfloat, float) |
293 | DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, hfloat, float) |
294 | DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, hfloat, float) |
295 | DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, hfloat, float) |
296 | DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, hfloat, float) |
297 | DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, hfloat, double) |
298 | DEF_CVT_SCALE_FUNC(16f, cvt1_32f, hfloat, hfloat, float) |
299 | |
300 | BinaryFunc getCvtScaleAbsFunc(int depth) |
301 | { |
302 | static BinaryFunc cvtScaleAbsTab[CV_DEPTH_MAX] = |
303 | { |
304 | (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u, |
305 | (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u, |
306 | (BinaryFunc)cvtScaleAbs64f8u, 0 |
307 | }; |
308 | |
309 | return cvtScaleAbsTab[depth]; |
310 | } |
311 | |
312 | BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) |
313 | { |
314 | static BinaryFunc cvtScaleTab[CV_DEPTH_MAX][CV_DEPTH_MAX] = |
315 | { |
316 | { |
317 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u), |
318 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u), |
319 | (BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u |
320 | }, |
321 | { |
322 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s), |
323 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s), |
324 | (BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s |
325 | }, |
326 | { |
327 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u), |
328 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u), |
329 | (BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u |
330 | }, |
331 | { |
332 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s), |
333 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s), |
334 | (BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s |
335 | }, |
336 | { |
337 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s), |
338 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s), |
339 | (BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s |
340 | }, |
341 | { |
342 | (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f), |
343 | (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f), |
344 | (BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f |
345 | }, |
346 | { |
347 | (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f, |
348 | (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f, |
349 | (BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f |
350 | }, |
351 | { |
352 | (BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f, |
353 | (BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f, |
354 | (BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f |
355 | }, |
356 | }; |
357 | |
358 | return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)]; |
359 | } |
360 | |
361 | #endif |
362 | |
363 | CV_CPU_OPTIMIZATION_NAMESPACE_END |
364 | } // namespace |
365 | |