1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21// * Redistribution's of source code must retain the above copyright notice,
22// this list of conditions and the following disclaimer.
23//
24// * Redistribution's in binary form must reproduce the above copyright notice,
25// this list of conditions and the following disclaimer in the documentation
26// and/or other materials provided with the distribution.
27//
28// * The name of the copyright holders may not be used to endorse or promote products
29// derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44/* ////////////////////////////////////////////////////////////////////
45//
46// Geometrical transforms on images and matrices: rotation, zoom etc.
47//
48// */
49
50#include "precomp.hpp"
51#include "opencl_kernels_imgproc.hpp"
52#include "hal_replacement.hpp"
53#include <opencv2/core/utils/configuration.private.hpp>
54#include "opencv2/core/hal/intrin.hpp"
55#include "opencv2/core/softfloat.hpp"
56#include "imgwarp.hpp"
57
58using namespace cv;
59
60namespace cv
61{
62
63#if defined (HAVE_IPP) && (!IPP_DISABLE_WARPAFFINE || !IPP_DISABLE_WARPPERSPECTIVE || !IPP_DISABLE_REMAP)
64typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
65
66template <int channels, typename Type>
67bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
68{
69 CV_INSTRUMENT_REGION_IPP();
70
71 Type values[channels];
72 for( int i = 0; i < channels; i++ )
73 values[i] = saturate_cast<Type>(value[i]);
74 return func(values, dataPointer, step, size) >= 0;
75}
76
77static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
78{
79 CV_INSTRUMENT_REGION_IPP();
80
81 if( channels == 1 )
82 {
83 switch( depth )
84 {
85 case CV_8U:
86 return CV_INSTRUMENT_FUN_IPP(ippiSet_8u_C1R, saturate_cast<Ipp8u>(value[0]), (Ipp8u *)dataPointer, step, size) >= 0;
87 case CV_16U:
88 return CV_INSTRUMENT_FUN_IPP(ippiSet_16u_C1R, saturate_cast<Ipp16u>(value[0]), (Ipp16u *)dataPointer, step, size) >= 0;
89 case CV_32F:
90 return CV_INSTRUMENT_FUN_IPP(ippiSet_32f_C1R, saturate_cast<Ipp32f>(value[0]), (Ipp32f *)dataPointer, step, size) >= 0;
91 }
92 }
93 else
94 {
95 if( channels == 3 )
96 {
97 switch( depth )
98 {
99 case CV_8U:
100 return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
101 case CV_16U:
102 return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
103 case CV_32F:
104 return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
105 }
106 }
107 else if( channels == 4 )
108 {
109 switch( depth )
110 {
111 case CV_8U:
112 return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
113 case CV_16U:
114 return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
115 case CV_32F:
116 return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
117 }
118 }
119 }
120 return false;
121}
122#endif
123
124/************** interpolation formulas and tables ***************/
125
126const int INTER_REMAP_COEF_BITS=15;
127const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
128
129static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
130
131static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
132static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
133
134#if CV_SIMD128
135static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
136static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(ptr: BilinearTab_iC4_buf, n: 16);
137#endif
138
139static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
140static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
141
142static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
143static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
144
145static inline void interpolateLinear( float x, float* coeffs )
146{
147 coeffs[0] = 1.f - x;
148 coeffs[1] = x;
149}
150
151static inline void interpolateCubic( float x, float* coeffs )
152{
153 const float A = -0.75f;
154
155 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
156 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
157 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
158 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
159}
160
161static inline void interpolateLanczos4( float x, float* coeffs )
162{
163 static const double s45 = 0.70710678118654752440084436210485;
164 static const double cs[][2]=
165 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
166
167 if( x < FLT_EPSILON )
168 {
169 for( int i = 0; i < 8; i++ )
170 coeffs[i] = 0;
171 coeffs[3] = 1;
172 return;
173 }
174
175 float sum = 0;
176 double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(x: y0), c0= std::cos(x: y0);
177 for(int i = 0; i < 8; i++ )
178 {
179 double y = -(x+3-i)*CV_PI*0.25;
180 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
181 sum += coeffs[i];
182 }
183
184 sum = 1.f/sum;
185 for(int i = 0; i < 8; i++ )
186 coeffs[i] *= sum;
187}
188
189static void initInterTab1D(int method, float* tab, int tabsz)
190{
191 float scale = 1.f/tabsz;
192 if( method == INTER_LINEAR )
193 {
194 for( int i = 0; i < tabsz; i++, tab += 2 )
195 interpolateLinear( x: i*scale, coeffs: tab );
196 }
197 else if( method == INTER_CUBIC )
198 {
199 for( int i = 0; i < tabsz; i++, tab += 4 )
200 interpolateCubic( x: i*scale, coeffs: tab );
201 }
202 else if( method == INTER_LANCZOS4 )
203 {
204 for( int i = 0; i < tabsz; i++, tab += 8 )
205 interpolateLanczos4( x: i*scale, coeffs: tab );
206 }
207 else
208 CV_Error( cv::Error::StsBadArg, "Unknown interpolation method" );
209}
210
211
212static const void* initInterTab2D( int method, bool fixpt )
213{
214 static bool inittab[INTER_MAX+1] = {false};
215 float* tab = 0;
216 short* itab = 0;
217 int ksize = 0;
218 if( method == INTER_LINEAR )
219 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
220 else if( method == INTER_CUBIC )
221 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
222 else if( method == INTER_LANCZOS4 )
223 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
224 else
225 CV_Error( cv::Error::StsBadArg, "Unknown/unsupported interpolation type" );
226
227 if( !inittab[method] )
228 {
229 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
230 int i, j, k1, k2;
231 initInterTab1D(method, tab: _tab.data(), tabsz: INTER_TAB_SIZE);
232 for( i = 0; i < INTER_TAB_SIZE; i++ )
233 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
234 {
235 int isum = 0;
236 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
237 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
238
239 for( k1 = 0; k1 < ksize; k1++ )
240 {
241 float vy = _tab[i*ksize + k1];
242 for( k2 = 0; k2 < ksize; k2++ )
243 {
244 float v = vy*_tab[j*ksize + k2];
245 tab[k1*ksize + k2] = v;
246 isum += itab[k1*ksize + k2] = saturate_cast<short>(v: v*INTER_REMAP_COEF_SCALE);
247 }
248 }
249
250 if( isum != INTER_REMAP_COEF_SCALE )
251 {
252 int diff = isum - INTER_REMAP_COEF_SCALE;
253 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
254 for( k1 = ksize2; k1 < ksize2+2; k1++ )
255 for( k2 = ksize2; k2 < ksize2+2; k2++ )
256 {
257 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
258 mk1 = k1, mk2 = k2;
259 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
260 Mk1 = k1, Mk2 = k2;
261 }
262 if( diff < 0 )
263 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
264 else
265 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
266 }
267 }
268 tab -= INTER_TAB_SIZE2*ksize*ksize;
269 itab -= INTER_TAB_SIZE2*ksize*ksize;
270#if CV_SIMD128
271 if( method == INTER_LINEAR )
272 {
273 for( i = 0; i < INTER_TAB_SIZE2; i++ )
274 for( j = 0; j < 4; j++ )
275 {
276 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
277 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
278 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
279 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
280 }
281 }
282#endif
283 inittab[method] = true;
284 }
285 return fixpt ? (const void*)itab : (const void*)tab;
286}
287
288#ifndef __MINGW32__
289static bool initAllInterTab2D()
290{
291 return initInterTab2D( method: INTER_LINEAR, fixpt: false ) &&
292 initInterTab2D( method: INTER_LINEAR, fixpt: true ) &&
293 initInterTab2D( method: INTER_CUBIC, fixpt: false ) &&
294 initInterTab2D( method: INTER_CUBIC, fixpt: true ) &&
295 initInterTab2D( method: INTER_LANCZOS4, fixpt: false ) &&
296 initInterTab2D( method: INTER_LANCZOS4, fixpt: true );
297}
298
299static volatile bool doInitAllInterTab2D = initAllInterTab2D();
300#endif
301
302template<typename ST, typename DT> struct Cast
303{
304 typedef ST type1;
305 typedef DT rtype;
306
307 DT operator()(ST val) const { return saturate_cast<DT>(val); }
308};
309
310template<typename ST, typename DT, int bits> struct FixedPtCast
311{
312 typedef ST type1;
313 typedef DT rtype;
314 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
315
316 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
317};
318
319static inline int clip(int x, int a, int b)
320{
321 return x >= a ? (x < b ? x : b-1) : a;
322}
323
324/****************************************************************************************\
325* General warping (affine, perspective, remap) *
326\****************************************************************************************/
327
328template<typename T, bool isRelative>
329static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
330 int borderType, const Scalar& _borderValue, const Point& _offset )
331{
332 Size ssize = _src.size(), dsize = _dst.size();
333 const int cn = _src.channels();
334 const T* S0 = _src.ptr<T>();
335 T cval[CV_CN_MAX];
336 size_t sstep = _src.step/sizeof(S0[0]);
337
338 for(int k = 0; k < cn; k++ )
339 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
340
341 unsigned width1 = ssize.width, height1 = ssize.height;
342
343 if( _dst.isContinuous() && _xy.isContinuous() && !isRelative )
344 {
345 dsize.width *= dsize.height;
346 dsize.height = 1;
347 }
348
349 for(int dy = 0; dy < dsize.height; dy++ )
350 {
351 T* D = _dst.ptr<T>(dy);
352 const short* XY = _xy.ptr<short>(y: dy);
353 const int off_y = isRelative ? (_offset.y+dy) : 0;
354 if( cn == 1 )
355 {
356 for(int dx = 0; dx < dsize.width; dx++ )
357 {
358 const int off_x = isRelative ? (_offset.x+dx) : 0;
359 int sx = XY[dx*2]+off_x, sy = XY[dx*2+1]+off_y;
360 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
361 D[dx] = S0[sy*sstep + sx];
362 else
363 {
364 if( borderType == BORDER_REPLICATE )
365 {
366 sx = clip(x: sx, a: 0, b: ssize.width);
367 sy = clip(x: sy, a: 0, b: ssize.height);
368 D[dx] = S0[sy*sstep + sx];
369 }
370 else if( borderType == BORDER_CONSTANT )
371 D[dx] = cval[0];
372 else if( borderType != BORDER_TRANSPARENT )
373 {
374 sx = borderInterpolate(p: sx, len: ssize.width, borderType);
375 sy = borderInterpolate(p: sy, len: ssize.height, borderType);
376 D[dx] = S0[sy*sstep + sx];
377 }
378 }
379 }
380 }
381 else
382 {
383 for(int dx = 0; dx < dsize.width; dx++, D += cn )
384 {
385 const int off_x = isRelative ? (_offset.x+dx) : 0;
386 int sx = XY[dx*2]+off_x, sy = XY[dx*2+1]+off_y;
387 const T *S;
388 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
389 {
390 if( cn == 3 )
391 {
392 S = S0 + sy*sstep + sx*3;
393 D[0] = S[0], D[1] = S[1], D[2] = S[2];
394 }
395 else if( cn == 4 )
396 {
397 S = S0 + sy*sstep + sx*4;
398 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
399 }
400 else
401 {
402 S = S0 + sy*sstep + sx*cn;
403 for(int k = 0; k < cn; k++ )
404 D[k] = S[k];
405 }
406 }
407 else if( borderType != BORDER_TRANSPARENT )
408 {
409 if( borderType == BORDER_REPLICATE )
410 {
411 sx = clip(x: sx, a: 0, b: ssize.width);
412 sy = clip(x: sy, a: 0, b: ssize.height);
413 S = S0 + sy*sstep + sx*cn;
414 }
415 else if( borderType == BORDER_CONSTANT )
416 S = &cval[0];
417 else
418 {
419 sx = borderInterpolate(p: sx, len: ssize.width, borderType);
420 sy = borderInterpolate(p: sy, len: ssize.height, borderType);
421 S = S0 + sy*sstep + sx*cn;
422 }
423 for(int k = 0; k < cn; k++ )
424 D[k] = S[k];
425 }
426 }
427 }
428 }
429}
430
431template<bool>
432struct RemapNoVec
433{
434 int operator()( const Mat&, void*, const short*, const ushort*,
435 const void*, int, cv::Point& ) const { return 0; }
436};
437
438#if CV_SIMD128
439
440typedef unsigned short CV_DECL_ALIGNED(1) unaligned_ushort;
441typedef int CV_DECL_ALIGNED(1) unaligned_int;
442
443template<bool isRelative>
444struct RemapVec_8u
445{
446 int operator()( const Mat& _src, void* _dst, const short* XY,
447 const ushort* FXY, const void* _wtab, int width, const Point& _offset ) const
448 {
449 int cn = _src.channels(), x = 0, sstep = (int)_src.step;
450 if( (cn != 1 && cn != 3 && cn != 4) || sstep >= 0x8000 )
451 return 0;
452
453 const uchar *S0 = _src.ptr(), *S1 = _src.ptr(y: 1);
454 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
455 uchar* D = (uchar*)_dst;
456 v_int32x4 delta = v_setall_s32(v: INTER_REMAP_COEF_SCALE / 2);
457 v_int16x8 xy2ofs = v_reinterpret_as_s16(a: v_setall_s32(v: cn + (sstep << 16)));
458 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
459 const uchar* src_limit_8bytes = _src.datalimit - VTraits<v_int16x8>::vlanes();
460#define CV_PICK_AND_PACK_RGB(ptr, offset, result) \
461 { \
462 const uchar* const p = ((const uchar*)ptr) + (offset); \
463 if (p <= src_limit_8bytes) \
464 { \
465 v_uint8x16 rrggbb, dummy; \
466 v_uint16x8 rrggbb8, dummy8; \
467 v_uint8x16 rgb0 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p), 0, 0, 0)); \
468 v_uint8x16 rgb1 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p + 3), 0, 0, 0)); \
469 v_zip(rgb0, rgb1, rrggbb, dummy); \
470 v_expand(rrggbb, rrggbb8, dummy8); \
471 result = v_reinterpret_as_s16(rrggbb8); \
472 } \
473 else \
474 { \
475 result = v_int16x8((short)p[0], (short)p[3], /* r0r1 */ \
476 (short)p[1], (short)p[4], /* g0g1 */ \
477 (short)p[2], (short)p[5], /* b0b1 */ 0, 0); \
478 } \
479 }
480#define CV_PICK_AND_PACK_RGBA(ptr, offset, result) \
481 { \
482 const uchar* const p = ((const uchar*)ptr) + (offset); \
483 CV_DbgAssert(p <= src_limit_8bytes); \
484 v_uint8x16 rrggbbaa, dummy; \
485 v_uint16x8 rrggbbaa8, dummy8; \
486 v_uint8x16 rgba0 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p), 0, 0, 0)); \
487 v_uint8x16 rgba1 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p + VTraits<v_int32x4>::vlanes()), 0, 0, 0)); \
488 v_zip(rgba0, rgba1, rrggbbaa, dummy); \
489 v_expand(rrggbbaa, rrggbbaa8, dummy8); \
490 result = v_reinterpret_as_s16(rrggbbaa8); \
491 }
492#define CV_PICK_AND_PACK4(base,offset) \
493 v_uint16x8(*(unaligned_ushort*)(base + offset[0]), *(unaligned_ushort*)(base + offset[1]), \
494 *(unaligned_ushort*)(base + offset[2]), *(unaligned_ushort*)(base + offset[3]), \
495 0, 0, 0, 0)
496
497 const short _rel_offset_x = static_cast<short>(_offset.x);
498 const short _rel_offset_y = static_cast<short>(_offset.y);
499 v_int16x8 v_dxy0(_rel_offset_x, _rel_offset_y, _rel_offset_x, _rel_offset_y, _rel_offset_x, _rel_offset_y, _rel_offset_x, _rel_offset_y);
500 v_int16x8 v_dxy1 = v_dxy0;
501 v_dxy0 = v_add(a: v_dxy0, b: v_int16x8(0, 0, 1, 0, 2, 0, 3, 0));
502 v_dxy1 = v_add(a: v_dxy1, b: v_int16x8(4, 0, 5, 0, 6, 0, 7, 0));
503 if( cn == 1 )
504 {
505 for( ; x <= width - 8; x += 8 )
506 {
507 v_int16x8 _xy0 = v_load(ptr: XY + x*2);
508 v_int16x8 _xy1 = v_load(ptr: XY + x*2 + 8);
509 if (isRelative)
510 {
511 const short x_s16 = static_cast<short>(x);
512 v_int16x8 v_dxy01(x_s16, 0, x_s16, 0, x_s16, 0, x_s16, 0);
513 _xy0 = v_add(a: _xy0, b: v_add(a: v_dxy01, b: v_dxy0));
514 _xy1 = v_add(a: _xy1, b: v_add(a: v_dxy01, b: v_dxy1));
515 }
516 v_int32x4 v0, v1, v2, v3, a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2;
517
518 v_int32x4 xy0 = v_dotprod( a: _xy0, b: xy2ofs );
519 v_int32x4 xy1 = v_dotprod( a: _xy1, b: xy2ofs );
520 v_store( ptr: iofs0, a: xy0 );
521 v_store( ptr: iofs1, a: xy1 );
522
523 v_uint16x8 stub, dummy;
524 v_uint16x8 vec16;
525 vec16 = CV_PICK_AND_PACK4(S0, iofs0);
526 v_expand(a: v_reinterpret_as_u8(a: vec16), b0&: stub, b1&: dummy);
527 v0 = v_reinterpret_as_s32(a: stub);
528 vec16 = CV_PICK_AND_PACK4(S1, iofs0);
529 v_expand(a: v_reinterpret_as_u8(a: vec16), b0&: stub, b1&: dummy);
530 v1 = v_reinterpret_as_s32(a: stub);
531
532 v_zip(a0: v_load_low(ptr: (int*)(wtab + FXY[x] * 4)), a1: v_load_low(ptr: (int*)(wtab + FXY[x + 1] * 4)), b0&: a0, b1&: a1);
533 v_zip(a0: v_load_low(ptr: (int*)(wtab + FXY[x + 2] * 4)), a1: v_load_low(ptr: (int*)(wtab + FXY[x + 3] * 4)), b0, b1);
534 v_recombine(a: a0, b: b0, c&: a2, d&: b2);
535 v1 = v_dotprod(a: v_reinterpret_as_s16(a: v1), b: v_reinterpret_as_s16(a: b2), c: delta);
536 v0 = v_dotprod(a: v_reinterpret_as_s16(a: v0), b: v_reinterpret_as_s16(a: a2), c: v1);
537
538 vec16 = CV_PICK_AND_PACK4(S0, iofs1);
539 v_expand(a: v_reinterpret_as_u8(a: vec16), b0&: stub, b1&: dummy);
540 v2 = v_reinterpret_as_s32(a: stub);
541 vec16 = CV_PICK_AND_PACK4(S1, iofs1);
542 v_expand(a: v_reinterpret_as_u8(a: vec16), b0&: stub, b1&: dummy);
543 v3 = v_reinterpret_as_s32(a: stub);
544
545 v_zip(a0: v_load_low(ptr: (int*)(wtab + FXY[x + 4] * 4)), a1: v_load_low(ptr: (int*)(wtab + FXY[x + 5] * 4)), b0&: c0, b1&: c1);
546 v_zip(a0: v_load_low(ptr: (int*)(wtab + FXY[x + 6] * 4)), a1: v_load_low(ptr: (int*)(wtab + FXY[x + 7] * 4)), b0&: d0, b1&: d1);
547 v_recombine(a: c0, b: d0, c&: c2, d&: d2);
548 v3 = v_dotprod(a: v_reinterpret_as_s16(a: v3), b: v_reinterpret_as_s16(a: d2), c: delta);
549 v2 = v_dotprod(a: v_reinterpret_as_s16(a: v2), b: v_reinterpret_as_s16(a: c2), c: v3);
550
551 v0 = v_shr<INTER_REMAP_COEF_BITS>(a: v0);
552 v2 = v_shr<INTER_REMAP_COEF_BITS>(a: v2);
553 v_pack_u_store(ptr: D + x, a: v_pack(a: v0, b: v2));
554 }
555 }
556 else if( cn == 3 )
557 {
558 for( ; x <= width - 5; x += 4, D += 12 )
559 {
560 v_int16x8 u0, v0, u1, v1;
561 v_int16x8 _xy0 = v_load(ptr: XY + x * 2);
562 if (isRelative)
563 {
564 const short x_s16 = static_cast<short>(x);
565 v_int16x8 v_dxy01(x_s16, 0, x_s16, 0, x_s16, 0, x_s16, 0);
566 _xy0 = v_add(a: _xy0, b: v_add(a: v_dxy01, b: v_dxy0));
567 }
568
569 v_int32x4 xy0 = v_dotprod(a: _xy0, b: xy2ofs);
570 v_store(ptr: iofs0, a: xy0);
571
572 int offset0 = FXY[x] * 16;
573 int offset1 = FXY[x + 1] * 16;
574 int offset2 = FXY[x + 2] * 16;
575 int offset3 = FXY[x + 3] * 16;
576 v_int16x8 w00 = v_load(ptr: wtab + offset0);
577 v_int16x8 w01 = v_load(ptr: wtab + offset0 + 8);
578 v_int16x8 w10 = v_load(ptr: wtab + offset1);
579 v_int16x8 w11 = v_load(ptr: wtab + offset1 + 8);
580
581 CV_PICK_AND_PACK_RGB(S0, iofs0[0], u0);
582 CV_PICK_AND_PACK_RGB(S1, iofs0[0], v0);
583 CV_PICK_AND_PACK_RGB(S0, iofs0[1], u1);
584 CV_PICK_AND_PACK_RGB(S1, iofs0[1], v1);
585
586 v_int32x4 result0 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u0, b: w00, c: v_dotprod(a: v0, b: w01, c: delta)));
587 v_int32x4 result1 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u1, b: w10, c: v_dotprod(a: v1, b: w11, c: delta)));
588
589 result0 = v_rotate_left<1>(a: result0);
590 v_int16x8 result8 = v_pack(a: result0, b: result1);
591 v_uint8x16 result16 = v_pack_u(a: result8, b: result8);
592 v_store_low(ptr: D, a: v_rotate_right<1>(a: result16));
593
594
595 w00 = v_load(ptr: wtab + offset2);
596 w01 = v_load(ptr: wtab + offset2 + 8);
597 w10 = v_load(ptr: wtab + offset3);
598 w11 = v_load(ptr: wtab + offset3 + 8);
599 CV_PICK_AND_PACK_RGB(S0, iofs0[2], u0);
600 CV_PICK_AND_PACK_RGB(S1, iofs0[2], v0);
601 CV_PICK_AND_PACK_RGB(S0, iofs0[3], u1);
602 CV_PICK_AND_PACK_RGB(S1, iofs0[3], v1);
603
604 result0 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u0, b: w00, c: v_dotprod(a: v0, b: w01, c: delta)));
605 result1 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u1, b: w10, c: v_dotprod(a: v1, b: w11, c: delta)));
606
607 result0 = v_rotate_left<1>(a: result0);
608 result8 = v_pack(a: result0, b: result1);
609 result16 = v_pack_u(a: result8, b: result8);
610 v_store_low(ptr: D + 6, a: v_rotate_right<1>(a: result16));
611 }
612 }
613 else if( cn == 4 )
614 {
615 for( ; x <= width - 4; x += 4, D += 16 )
616 {
617 v_int16x8 _xy0 = v_load(ptr: XY + x * 2);
618 if (isRelative)
619 {
620 const short x_s16 = static_cast<short>(x);
621 v_int16x8 v_dxy01(x_s16, 0, x_s16, 0, x_s16, 0, x_s16, 0);
622 _xy0 = v_add(a: _xy0, b: v_add(a: v_dxy01, b: v_dxy0));
623 }
624 v_int16x8 u0, v0, u1, v1;
625
626 v_int32x4 xy0 = v_dotprod( a: _xy0, b: xy2ofs );
627 v_store(ptr: iofs0, a: xy0);
628
629 int offset0 = FXY[x] * 16;
630 int offset1 = FXY[x + 1] * 16;
631 int offset2 = FXY[x + 2] * 16;
632 int offset3 = FXY[x + 3] * 16;
633
634 v_int16x8 w00 = v_load(ptr: wtab + offset0);
635 v_int16x8 w01 = v_load(ptr: wtab + offset0 + 8);
636 v_int16x8 w10 = v_load(ptr: wtab + offset1);
637 v_int16x8 w11 = v_load(ptr: wtab + offset1 + 8);
638 CV_PICK_AND_PACK_RGBA(S0, iofs0[0], u0);
639 CV_PICK_AND_PACK_RGBA(S1, iofs0[0], v0);
640 CV_PICK_AND_PACK_RGBA(S0, iofs0[1], u1);
641 CV_PICK_AND_PACK_RGBA(S1, iofs0[1], v1);
642
643 v_int32x4 result0 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u0, b: w00, c: v_dotprod(a: v0, b: w01, c: delta)));
644 v_int32x4 result1 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u1, b: w10, c: v_dotprod(a: v1, b: w11, c: delta)));
645 v_int16x8 result8 = v_pack(a: result0, b: result1);
646 v_pack_u_store(ptr: D, a: result8);
647
648 w00 = v_load(ptr: wtab + offset2);
649 w01 = v_load(ptr: wtab + offset2 + 8);
650 w10 = v_load(ptr: wtab + offset3);
651 w11 = v_load(ptr: wtab + offset3 + 8);
652 CV_PICK_AND_PACK_RGBA(S0, iofs0[2], u0);
653 CV_PICK_AND_PACK_RGBA(S1, iofs0[2], v0);
654 CV_PICK_AND_PACK_RGBA(S0, iofs0[3], u1);
655 CV_PICK_AND_PACK_RGBA(S1, iofs0[3], v1);
656
657 result0 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u0, b: w00, c: v_dotprod(a: v0, b: w01, c: delta)));
658 result1 = v_shr<INTER_REMAP_COEF_BITS>(a: v_dotprod(a: u1, b: w10, c: v_dotprod(a: v1, b: w11, c: delta)));
659 result8 = v_pack(a: result0, b: result1);
660 v_pack_u_store(ptr: D + 8, a: result8);
661 }
662 }
663
664 return x;
665 }
666};
667
668#else
669
670template<bool isRelative> using RemapVec_8u = RemapNoVec<isRelative>;
671
672#endif
673
674template<class CastOp, class VecOp, typename AT, bool isRelative>
675static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
676 const Mat& _fxy, const void* _wtab,
677 int borderType, const Scalar& _borderValue, const Point& _offset )
678{
679 typedef typename CastOp::rtype T;
680 typedef typename CastOp::type1 WT;
681 Size ssize = _src.size(), dsize = _dst.size();
682 const int cn = _src.channels();
683 const AT* wtab = (const AT*)_wtab;
684 const T* S0 = _src.ptr<T>();
685 size_t sstep = _src.step/sizeof(S0[0]);
686 T cval[CV_CN_MAX];
687 CastOp castOp;
688 VecOp vecOp;
689
690 for(int k = 0; k < cn; k++ )
691 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
692
693 unsigned width1 = std::max(a: ssize.width-1, b: 0), height1 = std::max(a: ssize.height-1, b: 0);
694 CV_Assert( !ssize.empty() );
695#if CV_SIMD128
696 if( _src.type() == CV_8UC3 )
697 width1 = std::max(a: ssize.width-2, b: 0);
698#endif
699
700 for(int dy = 0; dy < dsize.height; dy++ )
701 {
702 T* D = _dst.ptr<T>(dy);
703 const short* XY = _xy.ptr<short>(y: dy);
704 const ushort* FXY = _fxy.ptr<ushort>(y: dy);
705 int X0 = 0;
706 bool prevInlier = false;
707 const int off_y = (isRelative ? (_offset.y+dy) : 0);
708 for(int dx = 0; dx <= dsize.width; dx++ )
709 {
710 bool curInlier = dx < dsize.width ?
711 (unsigned)XY[dx*2]+(isRelative ? (_offset.x+dx) : 0) < width1 &&
712 (unsigned)XY[dx*2+1]+off_y < height1 : !prevInlier;
713 if( curInlier == prevInlier )
714 continue;
715
716 int X1 = dx;
717 dx = X0;
718 X0 = X1;
719 prevInlier = curInlier;
720
721 if( !curInlier )
722 {
723 Point subOffset(_offset.x+dx, _offset.y+dy);
724 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx, subOffset );
725 D += len*cn;
726 dx += len;
727
728 if( cn == 1 )
729 {
730 for( ; dx < X1; dx++, D++ )
731 {
732 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
733 const AT* w = wtab + FXY[dx]*4;
734 const T* S = S0 + sy*sstep + sx;
735 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
736 }
737 }
738 else if( cn == 2 )
739 for( ; dx < X1; dx++, D += 2 )
740 {
741 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
742 const AT* w = wtab + FXY[dx]*4;
743 const T* S = S0 + sy*sstep + sx*2;
744 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
745 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
746 D[0] = castOp(t0); D[1] = castOp(t1);
747 }
748 else if( cn == 3 )
749 for( ; dx < X1; dx++, D += 3 )
750 {
751 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
752 const AT* w = wtab + FXY[dx]*4;
753 const T* S = S0 + sy*sstep + sx*3;
754 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
755 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
756 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
757 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
758 }
759 else if( cn == 4 )
760 for( ; dx < X1; dx++, D += 4 )
761 {
762 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
763 const AT* w = wtab + FXY[dx]*4;
764 const T* S = S0 + sy*sstep + sx*4;
765 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
766 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
767 D[0] = castOp(t0); D[1] = castOp(t1);
768 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
769 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
770 D[2] = castOp(t0); D[3] = castOp(t1);
771 }
772 else
773 for( ; dx < X1; dx++, D += cn )
774 {
775 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
776 const AT* w = wtab + FXY[dx]*4;
777 const T* S = S0 + sy*sstep + sx*cn;
778 for(int k = 0; k < cn; k++ )
779 {
780 WT t0 = S[k]*w[0] + S[k+cn]*w[1] + S[sstep+k]*w[2] + S[sstep+k+cn]*w[3];
781 D[k] = castOp(t0);
782 }
783 }
784 }
785 else
786 {
787 if (borderType == BORDER_TRANSPARENT) {
788 for (; dx < X1; dx++, D += cn) {
789 if (dx >= dsize.width) continue;
790 const int sx = XY[dx * 2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx * 2 + 1]+off_y;
791 // If the mapped point is still within bounds, it did not get computed
792 // because it lacked 4 neighbors. Still, it can be computed with an
793 // approximate formula. If it is outside, the point is left untouched.
794 if (sx >= 0 && sx <= ssize.width - 1 && sy >= 0 && sy <= ssize.height - 1) {
795 const AT* w = wtab + FXY[dx] * 4;
796 WT w_tot = 0;
797 if (sx >= 0 && sy >= 0) w_tot += w[0];
798 if (sy >= 0 && sx < ssize.width - 1) w_tot += w[1];
799 if (sx >= 0 && sy < ssize.height - 1) w_tot += w[2];
800 if (sx < ssize.width - 1 && sy < ssize.height - 1) w_tot += w[3];
801 if (w_tot == 0.f) continue;
802 const WT w_tot_ini = (WT)w[0] + w[1] + w[2] + w[3];
803 const T* S = S0 + sy * sstep + sx * cn;
804 for (int k = 0; k < cn; k++) {
805 WT t0 = 0;
806 if (sx >= 0 && sy >= 0) t0 += S[k] * w[0];
807 if (sy >= 0 && sx < ssize.width - 1) t0 += S[k + cn] * w[1];
808 if (sx >= 0 && sy < ssize.height - 1) t0 += S[sstep + k] * w[2];
809 if (sx < ssize.width - 1 && sy < ssize.height - 1) t0 += S[sstep + k + cn] * w[3];
810 t0 = (WT)(t0 * (float)w_tot_ini / w_tot);
811 D[k] = castOp(t0);
812 }
813 }
814 }
815 continue;
816 }
817
818 if( cn == 1 )
819 for( ; dx < X1; dx++, D++ )
820 {
821 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
822 if( borderType == BORDER_CONSTANT &&
823 (sx >= ssize.width || sx+1 < 0 ||
824 sy >= ssize.height || sy+1 < 0) )
825 {
826 D[0] = cval[0];
827 }
828 else
829 {
830 int sx0, sx1, sy0, sy1;
831 T v0, v1, v2, v3;
832 const AT* w = wtab + FXY[dx]*4;
833 if( borderType == BORDER_REPLICATE )
834 {
835 sx0 = clip(x: sx, a: 0, b: ssize.width);
836 sx1 = clip(x: sx+1, a: 0, b: ssize.width);
837 sy0 = clip(x: sy, a: 0, b: ssize.height);
838 sy1 = clip(x: sy+1, a: 0, b: ssize.height);
839 v0 = S0[sy0*sstep + sx0];
840 v1 = S0[sy0*sstep + sx1];
841 v2 = S0[sy1*sstep + sx0];
842 v3 = S0[sy1*sstep + sx1];
843 }
844 else
845 {
846 sx0 = borderInterpolate(p: sx, len: ssize.width, borderType);
847 sx1 = borderInterpolate(p: sx+1, len: ssize.width, borderType);
848 sy0 = borderInterpolate(p: sy, len: ssize.height, borderType);
849 sy1 = borderInterpolate(p: sy+1, len: ssize.height, borderType);
850 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
851 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
852 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
853 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
854 }
855 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
856 }
857 }
858 else
859 for( ; dx < X1; dx++, D += cn )
860 {
861 int sx = XY[dx*2]+(isRelative ? (_offset.x+dx) : 0), sy = XY[dx*2+1]+off_y;
862 if( borderType == BORDER_CONSTANT &&
863 (sx >= ssize.width || sx+1 < 0 ||
864 sy >= ssize.height || sy+1 < 0) )
865 {
866 for(int k = 0; k < cn; k++ )
867 D[k] = cval[k];
868 }
869 else
870 {
871 int sx0, sx1, sy0, sy1;
872 const T *v0, *v1, *v2, *v3;
873 const AT* w = wtab + FXY[dx]*4;
874 if( borderType == BORDER_REPLICATE )
875 {
876 sx0 = clip(x: sx, a: 0, b: ssize.width);
877 sx1 = clip(x: sx+1, a: 0, b: ssize.width);
878 sy0 = clip(x: sy, a: 0, b: ssize.height);
879 sy1 = clip(x: sy+1, a: 0, b: ssize.height);
880 v0 = S0 + sy0*sstep + sx0*cn;
881 v1 = S0 + sy0*sstep + sx1*cn;
882 v2 = S0 + sy1*sstep + sx0*cn;
883 v3 = S0 + sy1*sstep + sx1*cn;
884 }
885 else
886 {
887 sx0 = borderInterpolate(p: sx, len: ssize.width, borderType);
888 sx1 = borderInterpolate(p: sx+1, len: ssize.width, borderType);
889 sy0 = borderInterpolate(p: sy, len: ssize.height, borderType);
890 sy1 = borderInterpolate(p: sy+1, len: ssize.height, borderType);
891 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
892 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
893 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
894 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
895 }
896 for(int k = 0; k < cn; k++ )
897 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
898 }
899 }
900 }
901 }
902 }
903}
904
905
906template<class CastOp, typename AT, int ONE, bool isRelative>
907static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
908 const Mat& _fxy, const void* _wtab,
909 int borderType, const Scalar& _borderValue, const Point& _offset )
910{
911 typedef typename CastOp::rtype T;
912 typedef typename CastOp::type1 WT;
913 Size ssize = _src.size(), dsize = _dst.size();
914 const int cn = _src.channels();
915 const AT* wtab = (const AT*)_wtab;
916 const T* S0 = _src.ptr<T>();
917 size_t sstep = _src.step/sizeof(S0[0]);
918 T cval[CV_CN_MAX];
919 CastOp castOp;
920
921 for(int k = 0; k < cn; k++ )
922 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
923
924 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
925
926 unsigned width1 = std::max(a: ssize.width-3, b: 0), height1 = std::max(a: ssize.height-3, b: 0);
927
928 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() && !isRelative )
929 {
930 dsize.width *= dsize.height;
931 dsize.height = 1;
932 }
933
934 for(int dy = 0; dy < dsize.height; dy++ )
935 {
936 T* D = _dst.ptr<T>(dy);
937 const short* XY = _xy.ptr<short>(y: dy);
938 const ushort* FXY = _fxy.ptr<ushort>(y: dy);
939 const int off_y = isRelative ? (_offset.y+dy) : 0;
940 for(int dx = 0; dx < dsize.width; dx++, D += cn )
941 {
942 const int off_x = isRelative ? (_offset.x+dx) : 0;
943 int sx = XY[dx*2]-1+off_x, sy = XY[dx*2+1]-1+off_y;
944 const AT* w = wtab + FXY[dx]*16;
945 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
946 {
947 const T* S = S0 + sy*sstep + sx*cn;
948 for(int k = 0; k < cn; k++ )
949 {
950 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
951 S += sstep;
952 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
953 S += sstep;
954 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
955 S += sstep;
956 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
957 S -= sstep * 3 - 1;
958 D[k] = castOp(sum);
959 }
960 }
961 else
962 {
963 int x[4], y[4];
964 if( borderType == BORDER_TRANSPARENT &&
965 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
966 (unsigned)(sy+1) >= (unsigned)ssize.height) )
967 continue;
968
969 if( borderType1 == BORDER_CONSTANT &&
970 (sx >= ssize.width || sx+4 <= 0 ||
971 sy >= ssize.height || sy+4 <= 0))
972 {
973 for(int k = 0; k < cn; k++ )
974 D[k] = cval[k];
975 continue;
976 }
977
978 for(int i = 0; i < 4; i++ )
979 {
980 x[i] = borderInterpolate(p: sx + i, len: ssize.width, borderType: borderType1)*cn;
981 y[i] = borderInterpolate(p: sy + i, len: ssize.height, borderType: borderType1);
982 }
983
984 for(int k = 0; k < cn; k++, S0++, w -= 16 )
985 {
986 WT cv = cval[k], sum = cv*ONE;
987 for(int i = 0; i < 4; i++, w += 4 )
988 {
989 int yi = y[i];
990 if( yi < 0 )
991 continue;
992 const T* S = S0 + yi*sstep;
993 if( x[0] >= 0 )
994 sum += (S[x[0]] - cv)*w[0];
995 if( x[1] >= 0 )
996 sum += (S[x[1]] - cv)*w[1];
997 if( x[2] >= 0 )
998 sum += (S[x[2]] - cv)*w[2];
999 if( x[3] >= 0 )
1000 sum += (S[x[3]] - cv)*w[3];
1001 }
1002 D[k] = castOp(sum);
1003 }
1004 S0 -= cn;
1005 }
1006 }
1007 }
1008}
1009
1010
1011template<class CastOp, typename AT, int ONE, bool isRelative>
1012static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
1013 const Mat& _fxy, const void* _wtab,
1014 int borderType, const Scalar& _borderValue, const Point& _offset )
1015{
1016 typedef typename CastOp::rtype T;
1017 typedef typename CastOp::type1 WT;
1018 Size ssize = _src.size(), dsize = _dst.size();
1019 const int cn = _src.channels();
1020 const AT* wtab = (const AT*)_wtab;
1021 const T* S0 = _src.ptr<T>();
1022 size_t sstep = _src.step/sizeof(S0[0]);
1023 T cval[CV_CN_MAX];
1024 CastOp castOp;
1025
1026 for(int k = 0; k < cn; k++ )
1027 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
1028
1029 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
1030
1031 unsigned width1 = std::max(a: ssize.width-7, b: 0), height1 = std::max(a: ssize.height-7, b: 0);
1032
1033 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() && !isRelative )
1034 {
1035 dsize.width *= dsize.height;
1036 dsize.height = 1;
1037 }
1038
1039 for(int dy = 0; dy < dsize.height; dy++ )
1040 {
1041 T* D = _dst.ptr<T>(dy);
1042 const short* XY = _xy.ptr<short>(y: dy);
1043 const ushort* FXY = _fxy.ptr<ushort>(y: dy);
1044 const int off_y = isRelative ? (_offset.y+dy) : 0;
1045 for(int dx = 0; dx < dsize.width; dx++, D += cn )
1046 {
1047 const int off_x = isRelative ? (_offset.x+dx) : 0;
1048 int sx = XY[dx*2]-3+off_x, sy = XY[dx*2+1]-3+off_y;
1049 const AT* w = wtab + FXY[dx]*64;
1050 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1051 {
1052 const T* S = S0 + sy*sstep + sx*cn;
1053 for(int k = 0; k < cn; k++ )
1054 {
1055 WT sum = 0;
1056 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
1057 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
1058 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
1059 w -= 64;
1060 S -= sstep*8 - 1;
1061 D[k] = castOp(sum);
1062 }
1063 }
1064 else
1065 {
1066 int x[8], y[8];
1067 if( borderType == BORDER_TRANSPARENT &&
1068 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
1069 (unsigned)(sy+3) >= (unsigned)ssize.height) )
1070 continue;
1071
1072 if( borderType1 == BORDER_CONSTANT &&
1073 (sx >= ssize.width || sx+8 <= 0 ||
1074 sy >= ssize.height || sy+8 <= 0))
1075 {
1076 for(int k = 0; k < cn; k++ )
1077 D[k] = cval[k];
1078 continue;
1079 }
1080
1081 for(int i = 0; i < 8; i++ )
1082 {
1083 x[i] = borderInterpolate(p: sx + i, len: ssize.width, borderType: borderType1)*cn;
1084 y[i] = borderInterpolate(p: sy + i, len: ssize.height, borderType: borderType1);
1085 }
1086
1087 for(int k = 0; k < cn; k++, S0++, w -= 64 )
1088 {
1089 WT cv = cval[k], sum = cv*ONE;
1090 for(int i = 0; i < 8; i++, w += 8 )
1091 {
1092 int yi = y[i];
1093 if( yi < 0 )
1094 continue;
1095 const T* S1 = S0 + yi*sstep;
1096 if( x[0] >= 0 )
1097 sum += (S1[x[0]] - cv)*w[0];
1098 if( x[1] >= 0 )
1099 sum += (S1[x[1]] - cv)*w[1];
1100 if( x[2] >= 0 )
1101 sum += (S1[x[2]] - cv)*w[2];
1102 if( x[3] >= 0 )
1103 sum += (S1[x[3]] - cv)*w[3];
1104 if( x[4] >= 0 )
1105 sum += (S1[x[4]] - cv)*w[4];
1106 if( x[5] >= 0 )
1107 sum += (S1[x[5]] - cv)*w[5];
1108 if( x[6] >= 0 )
1109 sum += (S1[x[6]] - cv)*w[6];
1110 if( x[7] >= 0 )
1111 sum += (S1[x[7]] - cv)*w[7];
1112 }
1113 D[k] = castOp(sum);
1114 }
1115 S0 -= cn;
1116 }
1117 }
1118 }
1119}
1120
1121
1122typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1123 int borderType, const Scalar& _borderValue, const Point& _offset);
1124
1125typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1126 const Mat& _fxy, const void* _wtab,
1127 int borderType, const Scalar& _borderValue, const Point& _offset);
1128
1129class RemapInvoker :
1130 public ParallelLoopBody
1131{
1132public:
1133 RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
1134 const Mat *_m2, int _borderType, const Scalar &_borderValue,
1135 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
1136 ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
1137 borderType(_borderType), borderValue(_borderValue),
1138 planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
1139 {
1140 }
1141
1142 virtual void operator() (const Range& range) const CV_OVERRIDE
1143 {
1144 int x, y, x1, y1;
1145 const int buf_size = 1 << 14;
1146 int brows0 = std::min(a: 128, b: dst->rows), map_depth = m1->depth();
1147 int bcols0 = std::min(a: buf_size/brows0, b: dst->cols);
1148 brows0 = std::min(a: buf_size/bcols0, b: dst->rows);
1149
1150 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
1151 if( !nnfunc )
1152 _bufa.create(rows: brows0, cols: bcols0, CV_16UC1);
1153
1154 for( y = range.start; y < range.end; y += brows0 )
1155 {
1156 for( x = 0; x < dst->cols; x += bcols0 )
1157 {
1158 int brows = std::min(a: brows0, b: range.end - y);
1159 int bcols = std::min(a: bcols0, b: dst->cols - x);
1160 Mat dpart(*dst, Rect(x, y, bcols, brows));
1161 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
1162
1163 if( nnfunc )
1164 {
1165 if( m1->type() == CV_16SC2 && m2->empty() ) // the data is already in the right format
1166 bufxy = (*m1)(Rect(x, y, bcols, brows));
1167 else if( map_depth != CV_32F )
1168 {
1169 for( y1 = 0; y1 < brows; y1++ )
1170 {
1171 short* XY = bufxy.ptr<short>(y: y1);
1172 const short* sXY = m1->ptr<short>(y: y+y1) + x*2;
1173 const ushort* sA = m2->ptr<ushort>(y: y+y1) + x;
1174
1175 for( x1 = 0; x1 < bcols; x1++ )
1176 {
1177 int a = sA[x1] & (INTER_TAB_SIZE2-1);
1178 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
1179 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
1180 }
1181 }
1182 }
1183 else if( !planar_input )
1184 (*m1)(Rect(x, y, bcols, brows)).convertTo(m: bufxy, rtype: bufxy.depth());
1185 else
1186 {
1187 for( y1 = 0; y1 < brows; y1++ )
1188 {
1189 short* XY = bufxy.ptr<short>(y: y1);
1190 const float* sX = m1->ptr<float>(y: y+y1) + x;
1191 const float* sY = m2->ptr<float>(y: y+y1) + x;
1192 x1 = 0;
1193
1194 #if CV_SIMD128
1195 {
1196 int span = VTraits<v_float32x4>::vlanes();
1197 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1198 {
1199 v_int32x4 ix0 = v_round(a: v_load(ptr: sX + x1));
1200 v_int32x4 iy0 = v_round(a: v_load(ptr: sY + x1));
1201 v_int32x4 ix1 = v_round(a: v_load(ptr: sX + x1 + span));
1202 v_int32x4 iy1 = v_round(a: v_load(ptr: sY + x1 + span));
1203
1204 v_int16x8 dx, dy;
1205 dx = v_pack(a: ix0, b: ix1);
1206 dy = v_pack(a: iy0, b: iy1);
1207 v_store_interleave(ptr: XY + x1 * 2, a0: dx, b0: dy);
1208 }
1209 }
1210 #endif
1211 for( ; x1 < bcols; x1++ )
1212 {
1213 XY[x1*2] = saturate_cast<short>(v: sX[x1]);
1214 XY[x1*2+1] = saturate_cast<short>(v: sY[x1]);
1215 }
1216 }
1217 }
1218 nnfunc( *src, dpart, bufxy, borderType, borderValue, Point(x, y) );
1219 continue;
1220 }
1221
1222 Mat bufa(_bufa, Rect(0, 0, bcols, brows));
1223 for( y1 = 0; y1 < brows; y1++ )
1224 {
1225 short* XY = bufxy.ptr<short>(y: y1);
1226 ushort* A = bufa.ptr<ushort>(y: y1);
1227
1228 if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
1229 {
1230 bufxy = (*m1)(Rect(x, y, bcols, brows));
1231
1232 const ushort* sA = m2->ptr<ushort>(y: y+y1) + x;
1233 x1 = 0;
1234
1235 #if CV_SIMD128
1236 {
1237 v_uint16x8 v_scale = v_setall_u16(v: INTER_TAB_SIZE2 - 1);
1238 int span = VTraits<v_uint16x8>::vlanes();
1239 for( ; x1 <= bcols - span; x1 += span )
1240 v_store(ptr: (unsigned short*)(A + x1), a: v_and(a: v_load(ptr: sA + x1), b: v_scale));
1241 }
1242 #endif
1243 for( ; x1 < bcols; x1++ )
1244 A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
1245 }
1246 else if( planar_input )
1247 {
1248 const float* sX = m1->ptr<float>(y: y+y1) + x;
1249 const float* sY = m2->ptr<float>(y: y+y1) + x;
1250
1251 x1 = 0;
1252 #if CV_SIMD128
1253 {
1254 v_float32x4 v_scale = v_setall_f32(v: (float)INTER_TAB_SIZE);
1255 v_int32x4 v_scale2 = v_setall_s32(v: INTER_TAB_SIZE - 1);
1256 int span = VTraits<v_float32x4>::vlanes();
1257 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1258 {
1259 v_int32x4 v_sx0 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: sX + x1)));
1260 v_int32x4 v_sy0 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: sY + x1)));
1261 v_int32x4 v_sx1 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: sX + x1 + span)));
1262 v_int32x4 v_sy1 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: sY + x1 + span)));
1263 v_uint16x8 v_sx8 = v_reinterpret_as_u16(a: v_pack(a: v_and(a: v_sx0, b: v_scale2), b: v_and(a: v_sx1, b: v_scale2)));
1264 v_uint16x8 v_sy8 = v_reinterpret_as_u16(a: v_pack(a: v_and(a: v_sy0, b: v_scale2), b: v_and(a: v_sy1, b: v_scale2)));
1265 v_uint16x8 v_v = v_or(a: v_shl<INTER_BITS>(a: v_sy8), b: v_sx8);
1266 v_store(ptr: A + x1, a: v_v);
1267
1268 v_int16x8 v_d0 = v_pack(a: v_shr<INTER_BITS>(a: v_sx0), b: v_shr<INTER_BITS>(a: v_sx1));
1269 v_int16x8 v_d1 = v_pack(a: v_shr<INTER_BITS>(a: v_sy0), b: v_shr<INTER_BITS>(a: v_sy1));
1270 v_store_interleave(ptr: XY + (x1 << 1), a0: v_d0, b0: v_d1);
1271 }
1272 }
1273 #endif
1274 for( ; x1 < bcols; x1++ )
1275 {
1276 int sx = cvRound(value: sX[x1]*static_cast<float>(INTER_TAB_SIZE));
1277 int sy = cvRound(value: sY[x1]*static_cast<float>(INTER_TAB_SIZE));
1278 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1279 XY[x1*2] = saturate_cast<short>(v: sx >> INTER_BITS);
1280 XY[x1*2+1] = saturate_cast<short>(v: sy >> INTER_BITS);
1281 A[x1] = (ushort)v;
1282 }
1283 }
1284 else
1285 {
1286 const float* sXY = m1->ptr<float>(y: y+y1) + x*2;
1287 x1 = 0;
1288
1289 #if CV_SIMD128
1290 {
1291 v_float32x4 v_scale = v_setall_f32(v: (float)INTER_TAB_SIZE);
1292 v_int32x4 v_scale2 = v_setall_s32(v: INTER_TAB_SIZE - 1), v_scale3 = v_setall_s32(v: INTER_TAB_SIZE);
1293 int span = VTraits<v_float32x4>::vlanes();
1294 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1295 {
1296 v_float32x4 v_fx, v_fy;
1297 v_load_deinterleave(ptr: sXY + (x1 << 1), a&: v_fx, b&: v_fy);
1298 v_int32x4 v_sx0 = v_round(a: v_mul(a: v_fx, b: v_scale));
1299 v_int32x4 v_sy0 = v_round(a: v_mul(a: v_fy, b: v_scale));
1300 v_load_deinterleave(ptr: sXY + ((x1 + span) << 1), a&: v_fx, b&: v_fy);
1301 v_int32x4 v_sx1 = v_round(a: v_mul(a: v_fx, b: v_scale));
1302 v_int32x4 v_sy1 = v_round(a: v_mul(a: v_fy, b: v_scale));
1303 v_int32x4 v_v0 = v_muladd(a: v_scale3, b: (v_and(a: v_sy0, b: v_scale2)), c: (v_and(a: v_sx0, b: v_scale2)));
1304 v_int32x4 v_v1 = v_muladd(a: v_scale3, b: (v_and(a: v_sy1, b: v_scale2)), c: (v_and(a: v_sx1, b: v_scale2)));
1305 v_uint16x8 v_v8 = v_reinterpret_as_u16(a: v_pack(a: v_v0, b: v_v1));
1306 v_store(ptr: A + x1, a: v_v8);
1307 v_int16x8 v_dx = v_pack(a: v_shr<INTER_BITS>(a: v_sx0), b: v_shr<INTER_BITS>(a: v_sx1));
1308 v_int16x8 v_dy = v_pack(a: v_shr<INTER_BITS>(a: v_sy0), b: v_shr<INTER_BITS>(a: v_sy1));
1309 v_store_interleave(ptr: XY + (x1 << 1), a0: v_dx, b0: v_dy);
1310 }
1311 }
1312 #endif
1313
1314 for( ; x1 < bcols; x1++ )
1315 {
1316 int sx = cvRound(value: sXY[x1*2]*static_cast<float>(INTER_TAB_SIZE));
1317 int sy = cvRound(value: sXY[x1*2+1]*static_cast<float>(INTER_TAB_SIZE));
1318 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1319 XY[x1*2] = saturate_cast<short>(v: sx >> INTER_BITS);
1320 XY[x1*2+1] = saturate_cast<short>(v: sy >> INTER_BITS);
1321 A[x1] = (ushort)v;
1322 }
1323 }
1324 }
1325 ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue, Point(x, y));
1326 }
1327 }
1328 }
1329
1330private:
1331 const Mat* src;
1332 Mat* dst;
1333 const Mat *m1, *m2;
1334 int borderType;
1335 Scalar borderValue;
1336 int planar_input;
1337 RemapNNFunc nnfunc;
1338 RemapFunc ifunc;
1339 const void *ctab;
1340};
1341
1342#ifdef HAVE_OPENCL
1343
1344static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, InputArray _map2,
1345 int interpolation, int borderType, const Scalar& borderValue)
1346{
1347 const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
1348 interpolation &= ~WARP_RELATIVE_MAP;
1349
1350 const ocl::Device & dev = ocl::Device::getDefault();
1351 int cn = _src.channels(), type = _src.type(), depth = _src.depth(),
1352 rowsPerWI = dev.isIntel() ? 4 : 1;
1353
1354 if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
1355 || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
1356 return false;
1357
1358 UMat src = _src.getUMat(), map1 = _map1.getUMat(), map2 = _map2.getUMat();
1359
1360 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.empty())) ||
1361 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.empty())) )
1362 {
1363 if (map1.type() != CV_16SC2)
1364 std::swap(a&: map1, b&: map2);
1365 }
1366 else
1367 CV_Assert( map1.type() == CV_32FC2 || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1368
1369 _dst.create(sz: map1.size(), type);
1370 UMat dst = _dst.getUMat();
1371
1372 String kernelName = "remap";
1373 if (map1.type() == CV_32FC2 && map2.empty())
1374 kernelName += "_32FC2";
1375 else if (map1.type() == CV_16SC2)
1376 {
1377 kernelName += "_16SC2";
1378 if (!map2.empty())
1379 kernelName += "_16UC1";
1380 }
1381 else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
1382 kernelName += "_2_32FC1";
1383 else
1384 CV_Error(Error::StsBadArg, "Unsupported map types");
1385
1386 static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
1387 static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
1388 "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
1389 String buildOptions = format(fmt: "-D %s -D %s -D T=%s -D ROWS_PER_WI=%d -D WARP_RELATIVE=%d",
1390 interMap[interpolation], borderMap[borderType],
1391 ocl::typeToStr(t: type), rowsPerWI,
1392 hasRelativeFlag ? 1 : 0);
1393
1394 if (interpolation != INTER_NEAREST)
1395 {
1396 char cvt[3][50];
1397 int wdepth = std::max(CV_32F, b: depth);
1398 buildOptions = buildOptions
1399 + format(fmt: " -D WT=%s -D CONVERT_TO_T=%s -D CONVERT_TO_WT=%s"
1400 " -D CONVERT_TO_WT2=%s -D WT2=%s",
1401 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
1402 ocl::convertTypeStr(sdepth: wdepth, ddepth: depth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])),
1403 ocl::convertTypeStr(sdepth: depth, ddepth: wdepth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])),
1404 ocl::convertTypeStr(CV_32S, ddepth: wdepth, cn: 2, buf: cvt[2], buf_size: sizeof(cvt[2])),
1405 ocl::typeToStr(CV_MAKE_TYPE(wdepth, 2)));
1406 }
1407 int scalarcn = cn == 3 ? 4 : cn;
1408 int sctype = CV_MAKETYPE(depth, scalarcn);
1409 buildOptions += format(fmt: " -D T=%s -D T1=%s -D CN=%d -D ST=%s -D SRC_DEPTH=%d",
1410 ocl::typeToStr(t: type), ocl::typeToStr(t: depth),
1411 cn, ocl::typeToStr(t: sctype), depth);
1412
1413 ocl::Kernel k(kernelName.c_str(), ocl::imgproc::remap_oclsrc, buildOptions);
1414
1415 Mat scalar(1, 1, sctype, borderValue);
1416 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(m: src), dstarg = ocl::KernelArg::WriteOnly(m: dst),
1417 map1arg = ocl::KernelArg::ReadOnlyNoSize(m: map1),
1418 scalararg = ocl::KernelArg::Constant(arr: (void*)scalar.ptr(), n: scalar.elemSize());
1419
1420 if (map2.empty())
1421 k.args(kernel_args: srcarg, kernel_args: dstarg, kernel_args: map1arg, kernel_args: scalararg);
1422 else
1423 k.args(kernel_args: srcarg, kernel_args: dstarg, kernel_args: map1arg, kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: map2), kernel_args: scalararg);
1424
1425 size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
1426 return k.run(dims: 2, globalsize: globalThreads, NULL, sync: false);
1427}
1428
1429#if 0
1430/**
1431@deprecated with old version of cv::linearPolar
1432*/
1433static bool ocl_linearPolar(InputArray _src, OutputArray _dst,
1434 Point2f center, double maxRadius, int flags)
1435{
1436 UMat src_with_border; // don't scope this variable (it holds image data)
1437
1438 UMat mapx, mapy, r, cp_sp;
1439 UMat src = _src.getUMat();
1440 _dst.create(src.size(), src.type());
1441 Size dsize = src.size();
1442 r.create(Size(1, dsize.width), CV_32F);
1443 cp_sp.create(Size(1, dsize.height), CV_32FC2);
1444
1445 mapx.create(dsize, CV_32F);
1446 mapy.create(dsize, CV_32F);
1447 size_t w = dsize.width;
1448 size_t h = dsize.height;
1449 String buildOptions;
1450 unsigned mem_size = 32;
1451 if (flags & cv::WARP_INVERSE_MAP)
1452 {
1453 buildOptions = "-D InverseMap";
1454 }
1455 else
1456 {
1457 buildOptions = format("-D ForwardMap -D MEM_SIZE=%d", mem_size);
1458 }
1459 String retval;
1460 ocl::Program p(ocl::imgproc::linearPolar_oclsrc, buildOptions, retval);
1461 ocl::Kernel k("linearPolar", p);
1462 ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1463 ocl::KernelArg ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1464 ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1465
1466 if (!(flags & cv::WARP_INVERSE_MAP))
1467 {
1468
1469
1470
1471 ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1472 float PI2_height = (float) CV_2PI / dsize.height;
1473 float maxRadius_width = (float) maxRadius / dsize.width;
1474 computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, maxRadius_width, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1475 size_t max_dim = max(h, w);
1476 computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1477 k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1478 }
1479 else
1480 {
1481 const int ANGLE_BORDER = 1;
1482
1483 cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1484 src = src_with_border;
1485 Size ssize = src_with_border.size();
1486 ssize.height -= 2 * ANGLE_BORDER;
1487 float ascale = ssize.height / ((float)CV_2PI);
1488 float pscale = ssize.width / ((float) maxRadius);
1489
1490 k.args(ocl_mapx, ocl_mapy, ascale, pscale, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1491
1492
1493 }
1494 size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1495 size_t localThreads[2] = { mem_size , mem_size };
1496 k.run(2, globalThreads, localThreads, false);
1497 remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1498 return true;
1499}
1500static bool ocl_logPolar(InputArray _src, OutputArray _dst,
1501 Point2f center, double M, int flags)
1502{
1503 if (M <= 0)
1504 CV_Error(cv::Error::StsOutOfRange, "M should be >0");
1505 UMat src_with_border; // don't scope this variable (it holds image data)
1506
1507 UMat mapx, mapy, r, cp_sp;
1508 UMat src = _src.getUMat();
1509 _dst.create(src.size(), src.type());
1510 Size dsize = src.size();
1511 r.create(Size(1, dsize.width), CV_32F);
1512 cp_sp.create(Size(1, dsize.height), CV_32FC2);
1513
1514 mapx.create(dsize, CV_32F);
1515 mapy.create(dsize, CV_32F);
1516 size_t w = dsize.width;
1517 size_t h = dsize.height;
1518 String buildOptions;
1519 unsigned mem_size = 32;
1520 if (flags & cv::WARP_INVERSE_MAP)
1521 {
1522 buildOptions = "-D InverseMap";
1523 }
1524 else
1525 {
1526 buildOptions = format("-D ForwardMap -D MEM_SIZE=%d", mem_size);
1527 }
1528 String retval;
1529 ocl::Program p(ocl::imgproc::logPolar_oclsrc, buildOptions, retval);
1530 //ocl::Program p(ocl::imgproc::my_linearPolar_oclsrc, buildOptions, retval);
1531 //printf("%s\n", retval);
1532 ocl::Kernel k("logPolar", p);
1533 ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1534 ocl::KernelArg ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1535 ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1536
1537 if (!(flags & cv::WARP_INVERSE_MAP))
1538 {
1539
1540
1541
1542 ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1543 float PI2_height = (float) CV_2PI / dsize.height;
1544
1545 computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, (float)M, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1546 size_t max_dim = max(h, w);
1547 computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1548 k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1549 }
1550 else
1551 {
1552 const int ANGLE_BORDER = 1;
1553
1554 cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1555 src = src_with_border;
1556 Size ssize = src_with_border.size();
1557 ssize.height -= 2 * ANGLE_BORDER;
1558 float ascale = ssize.height / ((float)CV_2PI);
1559
1560
1561 k.args(ocl_mapx, ocl_mapy, ascale, (float)M, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1562
1563
1564 }
1565 size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1566 size_t localThreads[2] = { mem_size , mem_size };
1567 k.run(2, globalThreads, localThreads, false);
1568 remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1569 return true;
1570}
1571#endif
1572
1573#endif
1574
1575#if defined HAVE_IPP && !IPP_DISABLE_REMAP
1576
1577typedef IppStatus (CV_STDCALL * ippiRemap)(const void * pSrc, IppiSize srcSize, int srcStep, IppiRect srcRoi,
1578 const Ipp32f* pxMap, int xMapStep, const Ipp32f* pyMap, int yMapStep,
1579 void * pDst, int dstStep, IppiSize dstRoiSize, int interpolation);
1580
1581class IPPRemapInvoker :
1582 public ParallelLoopBody
1583{
1584public:
1585 IPPRemapInvoker(Mat & _src, Mat & _dst, Mat & _xmap, Mat & _ymap, ippiRemap _ippFunc,
1586 int _ippInterpolation, int _borderType, const Scalar & _borderValue, bool * _ok) :
1587 ParallelLoopBody(), src(_src), dst(_dst), map1(_xmap), map2(_ymap), ippFunc(_ippFunc),
1588 ippInterpolation(_ippInterpolation), borderType(_borderType), borderValue(_borderValue), ok(_ok)
1589 {
1590 *ok = true;
1591 }
1592
1593 virtual void operator() (const Range & range) const
1594 {
1595 IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
1596 Mat dstRoi = dst.rowRange(range);
1597 IppiSize dstRoiSize = ippiSize(dstRoi.size());
1598 int type = dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1599
1600 if (borderType == BORDER_CONSTANT &&
1601 !IPPSet(borderValue, dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, cn, depth))
1602 {
1603 *ok = false;
1604 return;
1605 }
1606
1607 if (CV_INSTRUMENT_FUN_IPP(ippFunc, src.ptr(), ippiSize(src.size()), (int)src.step, srcRoiRect,
1608 map1.ptr<Ipp32f>(), (int)map1.step, map2.ptr<Ipp32f>(), (int)map2.step,
1609 dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, ippInterpolation) < 0)
1610 *ok = false;
1611 else
1612 {
1613 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1614 }
1615 }
1616
1617private:
1618 Mat & src, & dst, & map1, & map2;
1619 ippiRemap ippFunc;
1620 int ippInterpolation, borderType;
1621 Scalar borderValue;
1622 bool * ok;
1623};
1624
1625#endif
1626
1627}
1628
1629void cv::remap( InputArray _src, OutputArray _dst,
1630 InputArray _map1, InputArray _map2,
1631 int interpolation, int borderType, const Scalar& borderValue )
1632{
1633 CV_INSTRUMENT_REGION();
1634
1635 const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
1636
1637 static RemapNNFunc nn_tab[2][8] =
1638 {
1639 {
1640 remapNearest<uchar, false>, remapNearest<schar, false>, remapNearest<ushort, false>, remapNearest<short, false>,
1641 remapNearest<int, false>, remapNearest<float, false>, remapNearest<double, false>, 0
1642 },
1643 {
1644 remapNearest<uchar, true>, remapNearest<schar, true>, remapNearest<ushort, true>, remapNearest<short, true>,
1645 remapNearest<int, true>, remapNearest<float, true>, remapNearest<double, true>, 0
1646 }
1647 };
1648
1649 static RemapFunc linear_tab[2][8] =
1650 {
1651 {
1652 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<false>, short, false>, 0,
1653 remapBilinear<Cast<float, ushort>, RemapNoVec<false>, float, false>,
1654 remapBilinear<Cast<float, short>, RemapNoVec<false>, float, false>, 0,
1655 remapBilinear<Cast<float, float>, RemapNoVec<false>, float, false>,
1656 remapBilinear<Cast<double, double>, RemapNoVec<false>, float, false>, 0
1657 },
1658 {
1659 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<true>, short, true>, 0,
1660 remapBilinear<Cast<float, ushort>, RemapNoVec<true>, float, true>,
1661 remapBilinear<Cast<float, short>, RemapNoVec<true>, float, true>, 0,
1662 remapBilinear<Cast<float, float>, RemapNoVec<true>, float, true>,
1663 remapBilinear<Cast<double, double>, RemapNoVec<true>, float, true>, 0
1664 }
1665 };
1666
1667 static RemapFunc cubic_tab[2][8] =
1668 {
1669 {
1670 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
1671 remapBicubic<Cast<float, ushort>, float, 1, false>,
1672 remapBicubic<Cast<float, short>, float, 1, false>, 0,
1673 remapBicubic<Cast<float, float>, float, 1, false>,
1674 remapBicubic<Cast<double, double>, float, 1, false>, 0
1675 },
1676 {
1677 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
1678 remapBicubic<Cast<float, ushort>, float, 1, true>,
1679 remapBicubic<Cast<float, short>, float, 1, true>, 0,
1680 remapBicubic<Cast<float, float>, float, 1, true>,
1681 remapBicubic<Cast<double, double>, float, 1, true>, 0
1682 }
1683};
1684
1685 static RemapFunc lanczos4_tab[2][8] =
1686 {
1687 {
1688 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
1689 remapLanczos4<Cast<float, ushort>, float, 1, false>,
1690 remapLanczos4<Cast<float, short>, float, 1, false>, 0,
1691 remapLanczos4<Cast<float, float>, float, 1, false>,
1692 remapLanczos4<Cast<double, double>, float, 1, false>, 0
1693 },
1694 {
1695 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
1696 remapLanczos4<Cast<float, ushort>, float, 1, true>,
1697 remapLanczos4<Cast<float, short>, float, 1, true>, 0,
1698 remapLanczos4<Cast<float, float>, float, 1, true>,
1699 remapLanczos4<Cast<double, double>, float, 1, true>, 0
1700 }
1701};
1702
1703 CV_Assert( !_map1.empty() );
1704 CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
1705
1706 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
1707 ocl_remap(_src, _dst, _map1, _map2, interpolation, borderType, borderValue))
1708
1709 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
1710 _dst.create( sz: map1.size(), type: src.type() );
1711 Mat dst = _dst.getMat();
1712
1713 CV_Assert( dst.cols < SHRT_MAX && dst.rows < SHRT_MAX && src.cols < SHRT_MAX && src.rows < SHRT_MAX );
1714
1715 if( dst.data == src.data )
1716 src = src.clone();
1717
1718 if ((map1.type() == CV_32FC1) && (map2.type() == CV_32FC1))
1719 {
1720 CALL_HAL(remap32f, cv_hal_remap32f, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
1721 map1.ptr<float>(), map1.step, map2.ptr<float>(), map2.step, interpolation, borderType, borderValue.val);
1722 }
1723 if ((map1.type() == CV_32FC2) && map2.empty())
1724 {
1725 CALL_HAL(remap32fc2, cv_hal_remap32fc2, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
1726 map1.ptr<float>(), map1.step, interpolation, borderType, borderValue.val);
1727 }
1728 if ((map1.type() == CV_16SC2) && (map2.empty() || map2.type() == CV_16UC1))
1729 {
1730 CALL_HAL(remap16s, cv_hal_remap16s, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
1731 map1.ptr<short>(), map1.step, map2.ptr<ushort>(), map2.step, interpolation, borderType, borderValue.val);
1732 }
1733
1734 interpolation &= ~WARP_RELATIVE_MAP;
1735 if( interpolation == INTER_AREA )
1736 interpolation = INTER_LINEAR;
1737
1738 int type = src.type(), depth = CV_MAT_DEPTH(type);
1739
1740#if defined HAVE_IPP && !IPP_DISABLE_REMAP
1741 CV_IPP_CHECK()
1742 {
1743 if ((interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_NEAREST) &&
1744 map1.type() == CV_32FC1 && map2.type() == CV_32FC1 &&
1745 (borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT))
1746 {
1747 int ippInterpolation =
1748 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
1749 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : IPPI_INTER_CUBIC;
1750
1751 ippiRemap ippFunc =
1752 type == CV_8UC1 ? (ippiRemap)ippiRemap_8u_C1R :
1753 type == CV_8UC3 ? (ippiRemap)ippiRemap_8u_C3R :
1754 type == CV_8UC4 ? (ippiRemap)ippiRemap_8u_C4R :
1755 type == CV_16UC1 ? (ippiRemap)ippiRemap_16u_C1R :
1756 type == CV_16UC3 ? (ippiRemap)ippiRemap_16u_C3R :
1757 type == CV_16UC4 ? (ippiRemap)ippiRemap_16u_C4R :
1758 type == CV_32FC1 ? (ippiRemap)ippiRemap_32f_C1R :
1759 type == CV_32FC3 ? (ippiRemap)ippiRemap_32f_C3R :
1760 type == CV_32FC4 ? (ippiRemap)ippiRemap_32f_C4R : 0;
1761
1762 if (ippFunc)
1763 {
1764 bool ok;
1765 IPPRemapInvoker invoker(src, dst, map1, map2, ippFunc, ippInterpolation,
1766 borderType, borderValue, &ok);
1767 Range range(0, dst.rows);
1768 parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
1769
1770 if (ok)
1771 {
1772 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1773 return;
1774 }
1775 setIppErrorStatus();
1776 }
1777 }
1778 }
1779#endif
1780
1781 RemapNNFunc nnfunc = 0;
1782 RemapFunc ifunc = 0;
1783 const void* ctab = 0;
1784 bool fixpt = depth == CV_8U;
1785 bool planar_input = false;
1786
1787 const int relativeOptionIndex = (hasRelativeFlag ? 1 : 0);
1788 if( interpolation == INTER_NEAREST )
1789 {
1790 nnfunc = nn_tab[relativeOptionIndex][depth];
1791 CV_Assert( nnfunc != 0 );
1792 }
1793 else
1794 {
1795 if( interpolation == INTER_LINEAR )
1796 ifunc = linear_tab[relativeOptionIndex][depth];
1797 else if( interpolation == INTER_CUBIC ){
1798 ifunc = cubic_tab[relativeOptionIndex][depth];
1799 CV_Assert( _src.channels() <= 4 );
1800 }
1801 else if( interpolation == INTER_LANCZOS4 ){
1802 ifunc = lanczos4_tab[relativeOptionIndex][depth];
1803 CV_Assert( _src.channels() <= 4 );
1804 }
1805 else
1806 CV_Error( cv::Error::StsBadArg, "Unknown interpolation method" );
1807 CV_Assert( ifunc != 0 );
1808 ctab = initInterTab2D( method: interpolation, fixpt );
1809 }
1810
1811 const Mat *m1 = &map1, *m2 = &map2;
1812
1813 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || map2.empty())) ||
1814 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || map1.empty())) )
1815 {
1816 if( map1.type() != CV_16SC2 )
1817 std::swap(a&: m1, b&: m2);
1818 }
1819 else
1820 {
1821 CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && map2.empty()) ||
1822 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1823 planar_input = map1.channels() == 1;
1824 }
1825
1826 RemapInvoker invoker(src, dst, m1, m2,
1827 borderType, borderValue, planar_input, nnfunc, ifunc,
1828 ctab);
1829 parallel_for_(range: Range(0, dst.rows), body: invoker, nstripes: dst.total()/(double)(1<<16));
1830}
1831
1832
1833void cv::convertMaps( InputArray _map1, InputArray _map2,
1834 OutputArray _dstmap1, OutputArray _dstmap2,
1835 int dstm1type, bool nninterpolate )
1836{
1837 CV_INSTRUMENT_REGION();
1838
1839 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
1840 Size size = map1.size();
1841 const Mat *m1 = &map1, *m2 = &map2;
1842 int m1type = m1->type(), m2type = m2->type();
1843
1844 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
1845 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
1846 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
1847 (m1type == CV_32FC2 && m2->empty()) );
1848
1849 if( m2type == CV_16SC2 )
1850 {
1851 std::swap( a&: m1, b&: m2 );
1852 std::swap( a&: m1type, b&: m2type );
1853 }
1854
1855 if( dstm1type <= 0 )
1856 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
1857 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
1858 _dstmap1.create( sz: size, type: dstm1type );
1859 dstmap1 = _dstmap1.getMat();
1860
1861 if( !nninterpolate && dstm1type != CV_32FC2 )
1862 {
1863 _dstmap2.create( sz: size, type: dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
1864 dstmap2 = _dstmap2.getMat();
1865 }
1866 else
1867 _dstmap2.release();
1868
1869 if( m1type == dstm1type || (nninterpolate &&
1870 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
1871 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
1872 {
1873 m1->convertTo( m: dstmap1, rtype: dstmap1.type() );
1874 if( !dstmap2.empty() && dstmap2.type() == m2->type() )
1875 m2->copyTo( m: dstmap2 );
1876 return;
1877 }
1878
1879 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
1880 {
1881 Mat vdata[] = { *m1, *m2 };
1882 merge( mv: vdata, count: 2, dst: dstmap1 );
1883 return;
1884 }
1885
1886 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
1887 {
1888 Mat mv[] = { dstmap1, dstmap2 };
1889 split( src: *m1, mvbegin: mv );
1890 return;
1891 }
1892
1893 if( m1->isContinuous() && (m2->empty() || m2->isContinuous()) &&
1894 dstmap1.isContinuous() && (dstmap2.empty() || dstmap2.isContinuous()) )
1895 {
1896 size.width *= size.height;
1897 size.height = 1;
1898 }
1899
1900#if CV_TRY_SSE4_1
1901 bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
1902#endif
1903
1904 const float scale = 1.f/static_cast<float>(INTER_TAB_SIZE);
1905 int x, y;
1906 for( y = 0; y < size.height; y++ )
1907 {
1908 const float* src1f = m1->ptr<float>(y);
1909 const float* src2f = m2->ptr<float>(y);
1910 const short* src1 = (const short*)src1f;
1911 const ushort* src2 = (const ushort*)src2f;
1912
1913 float* dst1f = dstmap1.ptr<float>(y);
1914 float* dst2f = dstmap2.ptr<float>(y);
1915 short* dst1 = (short*)dst1f;
1916 ushort* dst2 = (ushort*)dst2f;
1917 x = 0;
1918
1919 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
1920 {
1921 if( nninterpolate )
1922 {
1923 #if CV_TRY_SSE4_1
1924 if (useSSE4_1)
1925 opt_SSE4_1::convertMaps_nninterpolate32f1c16s_SSE41(src1f, src2f, dst1, width: size.width);
1926 else
1927 #endif
1928 {
1929 #if CV_SIMD128
1930 {
1931 int span = VTraits<v_int16x8>::vlanes();
1932 for( ; x <= size.width - span; x += span )
1933 {
1934 v_int16x8 v_dst[2];
1935 #define CV_PACK_MAP(X) v_pack(v_round(v_load(X)), v_round(v_load((X)+4)))
1936 v_dst[0] = CV_PACK_MAP(src1f + x);
1937 v_dst[1] = CV_PACK_MAP(src2f + x);
1938 #undef CV_PACK_MAP
1939 v_store_interleave(ptr: dst1 + (x << 1), a0: v_dst[0], b0: v_dst[1]);
1940 }
1941 }
1942 #endif
1943 for( ; x < size.width; x++ )
1944 {
1945 dst1[x*2] = saturate_cast<short>(v: src1f[x]);
1946 dst1[x*2+1] = saturate_cast<short>(v: src2f[x]);
1947 }
1948 }
1949 }
1950 else
1951 {
1952 #if CV_TRY_SSE4_1
1953 if (useSSE4_1)
1954 opt_SSE4_1::convertMaps_32f1c16s_SSE41(src1f, src2f, dst1, dst2, width: size.width);
1955 else
1956 #endif
1957 {
1958 #if CV_SIMD128
1959 {
1960 v_float32x4 v_scale = v_setall_f32(v: (float)INTER_TAB_SIZE);
1961 v_int32x4 v_mask = v_setall_s32(v: INTER_TAB_SIZE - 1);
1962 v_int32x4 v_scale3 = v_setall_s32(v: INTER_TAB_SIZE);
1963 int span = VTraits<v_float32x4>::vlanes();
1964 for( ; x <= size.width - span * 2; x += span * 2 )
1965 {
1966 v_int32x4 v_ix0 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: src1f + x)));
1967 v_int32x4 v_ix1 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: src1f + x + span)));
1968 v_int32x4 v_iy0 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: src2f + x)));
1969 v_int32x4 v_iy1 = v_round(a: v_mul(a: v_scale, b: v_load(ptr: src2f + x + span)));
1970
1971 v_int16x8 v_dst[2];
1972 v_dst[0] = v_pack(a: v_shr<INTER_BITS>(a: v_ix0), b: v_shr<INTER_BITS>(a: v_ix1));
1973 v_dst[1] = v_pack(a: v_shr<INTER_BITS>(a: v_iy0), b: v_shr<INTER_BITS>(a: v_iy1));
1974 v_store_interleave(ptr: dst1 + (x << 1), a0: v_dst[0], b0: v_dst[1]);
1975
1976 v_int32x4 v_dst0 = v_muladd(a: v_scale3, b: (v_and(a: v_iy0, b: v_mask)), c: (v_and(a: v_ix0, b: v_mask)));
1977 v_int32x4 v_dst1 = v_muladd(a: v_scale3, b: (v_and(a: v_iy1, b: v_mask)), c: (v_and(a: v_ix1, b: v_mask)));
1978 v_store(ptr: dst2 + x, a: v_pack_u(a: v_dst0, b: v_dst1));
1979 }
1980 }
1981 #endif
1982 for( ; x < size.width; x++ )
1983 {
1984 int ix = saturate_cast<int>(v: src1f[x]*static_cast<float>(INTER_TAB_SIZE));
1985 int iy = saturate_cast<int>(v: src2f[x]*static_cast<float>(INTER_TAB_SIZE));
1986 dst1[x*2] = saturate_cast<short>(v: ix >> INTER_BITS);
1987 dst1[x*2+1] = saturate_cast<short>(v: iy >> INTER_BITS);
1988 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
1989 }
1990 }
1991 }
1992 }
1993 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
1994 {
1995 #if CV_TRY_SSE4_1
1996 if( useSSE4_1 )
1997 opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, width: size.width);
1998 else
1999 #endif
2000 {
2001 #if CV_SIMD128
2002 {
2003 v_float32x4 v_scale = v_setall_f32(v: (float)INTER_TAB_SIZE);
2004 v_int32x4 v_mask = v_setall_s32(v: INTER_TAB_SIZE - 1);
2005 v_int32x4 v_scale3 = v_setall_s32(v: INTER_TAB_SIZE);
2006 int span = VTraits<v_uint16x8>::vlanes();
2007 for (; x <= size.width - span; x += span )
2008 {
2009 v_float32x4 v_src0[2], v_src1[2];
2010 v_load_deinterleave(ptr: src1f + (x << 1), a&: v_src0[0], b&: v_src0[1]);
2011 v_load_deinterleave(ptr: src1f + (x << 1) + span, a&: v_src1[0], b&: v_src1[1]);
2012 v_int32x4 v_ix0 = v_round(a: v_mul(a: v_src0[0], b: v_scale));
2013 v_int32x4 v_ix1 = v_round(a: v_mul(a: v_src1[0], b: v_scale));
2014 v_int32x4 v_iy0 = v_round(a: v_mul(a: v_src0[1], b: v_scale));
2015 v_int32x4 v_iy1 = v_round(a: v_mul(a: v_src1[1], b: v_scale));
2016
2017 v_int16x8 v_dst[2];
2018 v_dst[0] = v_pack(a: v_shr<INTER_BITS>(a: v_ix0), b: v_shr<INTER_BITS>(a: v_ix1));
2019 v_dst[1] = v_pack(a: v_shr<INTER_BITS>(a: v_iy0), b: v_shr<INTER_BITS>(a: v_iy1));
2020 v_store_interleave(ptr: dst1 + (x << 1), a0: v_dst[0], b0: v_dst[1]);
2021
2022 v_store(ptr: dst2 + x, a: v_pack_u(
2023 a: v_muladd(a: v_scale3, b: (v_and(a: v_iy0, b: v_mask)), c: (v_and(a: v_ix0, b: v_mask))),
2024 b: v_muladd(a: v_scale3, b: (v_and(a: v_iy1, b: v_mask)), c: (v_and(a: v_ix1, b: v_mask)))));
2025 }
2026 }
2027 #endif
2028 for( ; x < size.width; x++ )
2029 {
2030 int ix = saturate_cast<int>(v: src1f[x*2]*static_cast<float>(INTER_TAB_SIZE));
2031 int iy = saturate_cast<int>(v: src1f[x*2+1]*static_cast<float>(INTER_TAB_SIZE));
2032 dst1[x*2] = saturate_cast<short>(v: ix >> INTER_BITS);
2033 dst1[x*2+1] = saturate_cast<short>(v: iy >> INTER_BITS);
2034 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2035 }
2036 }
2037 }
2038 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
2039 {
2040 #if CV_SIMD128
2041 {
2042 v_uint16x8 v_mask2 = v_setall_u16(v: INTER_TAB_SIZE2-1);
2043 v_uint32x4 v_zero = v_setzero_u32(), v_mask = v_setall_u32(v: INTER_TAB_SIZE-1);
2044 v_float32x4 v_scale = v_setall_f32(v: scale);
2045 int span = VTraits<v_float32x4>::vlanes();
2046 for( ; x <= size.width - span * 2; x += span * 2 )
2047 {
2048 v_uint32x4 v_fxy1, v_fxy2;
2049 if ( src2 )
2050 {
2051 v_uint16x8 v_src2 = v_and(a: v_load(ptr: src2 + x), b: v_mask2);
2052 v_expand(a: v_src2, b0&: v_fxy1, b1&: v_fxy2);
2053 }
2054 else
2055 v_fxy1 = v_fxy2 = v_zero;
2056
2057 v_int16x8 v_src[2];
2058 v_int32x4 v_src0[2], v_src1[2];
2059 v_load_deinterleave(ptr: src1 + (x << 1), a0&: v_src[0], b0&: v_src[1]);
2060 v_expand(a: v_src[0], b0&: v_src0[0], b1&: v_src0[1]);
2061 v_expand(a: v_src[1], b0&: v_src1[0], b1&: v_src1[1]);
2062 #define CV_COMPUTE_MAP_X(X, FXY) v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32(v_and((FXY), v_mask))),\
2063 v_cvt_f32(v_reinterpret_as_s32(X)))
2064 #define CV_COMPUTE_MAP_Y(Y, FXY) v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32(v_shr<INTER_BITS>((FXY)))),\
2065 v_cvt_f32(v_reinterpret_as_s32(Y)))
2066 v_float32x4 v_dst1 = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2067 v_float32x4 v_dst2 = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2068 v_store(ptr: dst1f + x, a: v_dst1);
2069 v_store(ptr: dst2f + x, a: v_dst2);
2070
2071 v_dst1 = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2072 v_dst2 = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2073 v_store(ptr: dst1f + x + span, a: v_dst1);
2074 v_store(ptr: dst2f + x + span, a: v_dst2);
2075 #undef CV_COMPUTE_MAP_X
2076 #undef CV_COMPUTE_MAP_Y
2077 }
2078 }
2079 #endif
2080 for( ; x < size.width; x++ )
2081 {
2082 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
2083 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2084 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2085 }
2086 }
2087 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
2088 {
2089 #if CV_SIMD128
2090 {
2091 v_int16x8 v_mask2 = v_setall_s16(v: INTER_TAB_SIZE2-1);
2092 v_int32x4 v_zero = v_setzero_s32(), v_mask = v_setall_s32(v: INTER_TAB_SIZE-1);
2093 v_float32x4 v_scale = v_setall_f32(v: scale);
2094 int span = VTraits<v_int16x8>::vlanes();
2095 for( ; x <= size.width - span; x += span )
2096 {
2097 v_int32x4 v_fxy1, v_fxy2;
2098 if (src2)
2099 {
2100 v_int16x8 v_src2 = v_and(a: v_load(ptr: (short *)src2 + x), b: v_mask2);
2101 v_expand(a: v_src2, b0&: v_fxy1, b1&: v_fxy2);
2102 }
2103 else
2104 v_fxy1 = v_fxy2 = v_zero;
2105
2106 v_int16x8 v_src[2];
2107 v_int32x4 v_src0[2], v_src1[2];
2108 v_float32x4 v_dst[2];
2109 v_load_deinterleave(ptr: src1 + (x << 1), a0&: v_src[0], b0&: v_src[1]);
2110 v_expand(a: v_src[0], b0&: v_src0[0], b1&: v_src0[1]);
2111 v_expand(a: v_src[1], b0&: v_src1[0], b1&: v_src1[1]);
2112
2113 #define CV_COMPUTE_MAP_X(X, FXY) v_muladd(v_scale, v_cvt_f32(v_and((FXY), v_mask)), v_cvt_f32(X))
2114 #define CV_COMPUTE_MAP_Y(Y, FXY) v_muladd(v_scale, v_cvt_f32(v_shr<INTER_BITS>((FXY))), v_cvt_f32(Y))
2115 v_dst[0] = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2116 v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2117 v_store_interleave(ptr: dst1f + (x << 1), a: v_dst[0], b: v_dst[1]);
2118
2119 v_dst[0] = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2120 v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2121 v_store_interleave(ptr: dst1f + (x << 1) + span, a: v_dst[0], b: v_dst[1]);
2122 #undef CV_COMPUTE_MAP_X
2123 #undef CV_COMPUTE_MAP_Y
2124 }
2125 }
2126 #endif
2127 for( ; x < size.width; x++ )
2128 {
2129 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
2130 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2131 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2132 }
2133 }
2134 else
2135 CV_Error( cv::Error::StsNotImplemented, "Unsupported combination of input/output matrices" );
2136 }
2137}
2138
2139
2140namespace cv
2141{
2142
2143class WarpAffineInvoker :
2144 public ParallelLoopBody
2145{
2146public:
2147 WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
2148 const Scalar &_borderValue, int *_adelta, int *_bdelta, const double *_M) :
2149 ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
2150 borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
2151 M(_M)
2152 {
2153 }
2154
2155 virtual void operator() (const Range& range) const CV_OVERRIDE
2156 {
2157 const int BLOCK_SZ = 64;
2158 AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
2159 short *XY = __XY.data(), *A = __A.data();
2160 const int AB_BITS = MAX(10, (int)INTER_BITS);
2161 const int AB_SCALE = 1 << AB_BITS;
2162 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, y1;
2163
2164 int bh0 = std::min(a: BLOCK_SZ/2, b: dst.rows);
2165 int bw0 = std::min(a: BLOCK_SZ*BLOCK_SZ/bh0, b: dst.cols);
2166 bh0 = std::min(a: BLOCK_SZ*BLOCK_SZ/bw0, b: dst.rows);
2167
2168 for( y = range.start; y < range.end; y += bh0 )
2169 {
2170 for( x = 0; x < dst.cols; x += bw0 )
2171 {
2172 int bw = std::min( a: bw0, b: dst.cols - x);
2173 int bh = std::min( a: bh0, b: range.end - y);
2174
2175 Mat _XY(bh, bw, CV_16SC2, XY);
2176 Mat dpart(dst, Rect(x, y, bw, bh));
2177
2178 for( y1 = 0; y1 < bh; y1++ )
2179 {
2180 short* xy = XY + y1*bw*2;
2181 int X0 = saturate_cast<int>(v: (M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
2182 int Y0 = saturate_cast<int>(v: (M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
2183
2184 if( interpolation == INTER_NEAREST )
2185 hal::warpAffineBlocklineNN(adelta: adelta + x, bdelta: bdelta + x, xy, X0, Y0, bw);
2186 else
2187 hal::warpAffineBlockline(adelta: adelta + x, bdelta: bdelta + x, xy, alpha: A + y1*bw, X0, Y0, bw);
2188 }
2189
2190 if( interpolation == INTER_NEAREST )
2191 remap( src: src, dst: dpart, map1: _XY, map2: Mat(), interpolation, borderType, borderValue );
2192 else
2193 {
2194 Mat _matA(bh, bw, CV_16U, A);
2195 remap( src: src, dst: dpart, map1: _XY, map2: _matA, interpolation, borderType, borderValue );
2196 }
2197 }
2198 }
2199 }
2200
2201private:
2202 Mat src;
2203 Mat dst;
2204 int interpolation, borderType;
2205 Scalar borderValue;
2206 int *adelta, *bdelta;
2207 const double *M;
2208};
2209
2210
2211#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2212typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
2213
2214class IPPWarpAffineInvoker :
2215 public ParallelLoopBody
2216{
2217public:
2218 IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType,
2219 const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
2220 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
2221 borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
2222 {
2223 *ok = true;
2224 }
2225
2226 virtual void operator() (const Range& range) const CV_OVERRIDE
2227 {
2228 IppiSize srcsize = { src.cols, src.rows };
2229 IppiRect srcroi = { 0, 0, src.cols, src.rows };
2230 IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
2231 int cnn = src.channels();
2232 if( borderType == BORDER_CONSTANT )
2233 {
2234 IppiSize setSize = { dst.cols, range.end - range.start };
2235 void *dataPointer = dst.ptr(range.start);
2236 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
2237 {
2238 *ok = false;
2239 return;
2240 }
2241 }
2242
2243 // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
2244 IppStatus status = CV_INSTRUMENT_FUN_IPP(func,( src.ptr(), srcsize, (int)src.step[0], srcroi, dst.ptr(),
2245 (int)dst.step[0], dstroi, coeffs, mode ));
2246 if( status < 0)
2247 *ok = false;
2248 else
2249 {
2250 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2251 }
2252 }
2253private:
2254 Mat &src;
2255 Mat &dst;
2256 int mode;
2257 double (&coeffs)[2][3];
2258 int borderType;
2259 Scalar borderValue;
2260 ippiWarpAffineBackFunc func;
2261 bool *ok;
2262 const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&);
2263};
2264#endif
2265
2266#ifdef HAVE_OPENCL
2267
2268enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
2269
2270static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArray _M0,
2271 Size dsize, int flags, int borderType, const Scalar& borderValue,
2272 int op_type)
2273{
2274 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2275 const ocl::Device & dev = ocl::Device::getDefault();
2276 int type = _src.type(), dtype = _dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2277
2278 int interpolation = flags & INTER_MAX;
2279 if( interpolation == INTER_AREA )
2280 interpolation = INTER_LINEAR;
2281
2282 if ( !dev.isIntel() || !(type == CV_8UC1) ||
2283 !(dtype == CV_8UC1) || !(_dst.cols() % 4 == 0) ||
2284 !(borderType == cv::BORDER_CONSTANT &&
2285 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)))
2286 return false;
2287
2288 const char * const warp_op[2] = { "Affine", "Perspective" };
2289 const char * const interpolationMap[3] = { "nearest", "linear", "cubic" };
2290 ocl::ProgramSource program = ocl::imgproc::warp_transform_oclsrc;
2291 String kernelName = format(fmt: "warp%s_%s_8u", warp_op[op_type], interpolationMap[interpolation]);
2292
2293 bool is32f = (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2294 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(a: is32f ? CV_32F : CV_32S, b: depth);
2295 int sctype = CV_MAKETYPE(wdepth, cn);
2296
2297 ocl::Kernel k;
2298 String opts = format(fmt: "-D ST=%s", ocl::typeToStr(t: sctype));
2299
2300 k.create(kname: kernelName.c_str(), prog: program, buildopts: opts);
2301 if (k.empty())
2302 return false;
2303
2304 float borderBuf[] = { 0, 0, 0, 0 };
2305 scalarToRawData(s: borderValue, buf: borderBuf, type: sctype);
2306
2307 UMat src = _src.getUMat(), M0;
2308 _dst.create( sz: dsize.empty() ? src.size() : dsize, type: src.type() );
2309 UMat dst = _dst.getUMat();
2310
2311 if (src.u == dst.u)
2312 src = src.clone();
2313
2314 float M[9] = {0};
2315 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2316 Mat matM(matRows, 3, CV_32F, M), M1 = _M0.getMat();
2317 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) && M1.rows == matRows && M1.cols == 3 );
2318 M1.convertTo(m: matM, rtype: matM.type());
2319
2320 if( !(flags & WARP_INVERSE_MAP) )
2321 {
2322 if (op_type == OCL_OP_PERSPECTIVE)
2323 invert(src: matM, dst: matM);
2324 else
2325 {
2326 float D = M[0]*M[4] - M[1]*M[3];
2327 D = D != 0 ? 1.f/D : 0;
2328 float A11 = M[4]*D, A22=M[0]*D;
2329 M[0] = A11; M[1] *= -D;
2330 M[3] *= -D; M[4] = A22;
2331 float b1 = -M[0]*M[2] - M[1]*M[5];
2332 float b2 = -M[3]*M[2] - M[4]*M[5];
2333 M[2] = b1; M[5] = b2;
2334 }
2335 }
2336 matM.convertTo(m: M0, CV_32F);
2337
2338 k.args(kernel_args: ocl::KernelArg::ReadOnly(m: src), kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: ocl::KernelArg::PtrReadOnly(m: M0),
2339 kernel_args: ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2340
2341 size_t globalThreads[2];
2342 globalThreads[0] = (size_t)(dst.cols / 4);
2343 globalThreads[1] = (size_t)dst.rows;
2344
2345 return k.run(dims: 2, globalsize: globalThreads, NULL, sync: false);
2346}
2347
2348static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
2349 Size dsize, int flags, int borderType, const Scalar& borderValue,
2350 int op_type)
2351{
2352 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2353 const ocl::Device & dev = ocl::Device::getDefault();
2354
2355 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2356 const bool doubleSupport = dev.doubleFPConfig() > 0;
2357
2358 int interpolation = flags & INTER_MAX;
2359 if( interpolation == INTER_AREA )
2360 interpolation = INTER_LINEAR;
2361 int rowsPerWI = dev.isIntel() && op_type == OCL_OP_AFFINE && interpolation <= INTER_LINEAR ? 4 : 1;
2362
2363 if ( !(borderType == cv::BORDER_CONSTANT &&
2364 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)) ||
2365 (!doubleSupport && depth == CV_64F) || cn > 4)
2366 return false;
2367
2368 bool useDouble = depth == CV_64F;
2369
2370 const char * const interpolationMap[3] = { "NEAREST", "LINEAR", "CUBIC" };
2371 ocl::ProgramSource program = op_type == OCL_OP_AFFINE ?
2372 ocl::imgproc::warp_affine_oclsrc : ocl::imgproc::warp_perspective_oclsrc;
2373 const char * const kernelName = op_type == OCL_OP_AFFINE ? "warpAffine" : "warpPerspective";
2374
2375 int scalarcn = cn == 3 ? 4 : cn;
2376 bool is32f = !dev.isAMD() && (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2377 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(a: is32f ? CV_32F : CV_32S, b: depth);
2378 int sctype = CV_MAKETYPE(wdepth, scalarcn);
2379
2380 ocl::Kernel k;
2381 String opts;
2382 if (interpolation == INTER_NEAREST)
2383 {
2384 opts = format(fmt: "-D INTER_NEAREST -D T=%s%s -D CT=%s -D T1=%s -D ST=%s -D CN=%d -D ROWS_PER_WI=%d",
2385 ocl::typeToStr(t: type),
2386 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2387 useDouble ? "double" : "float",
2388 ocl::typeToStr(CV_MAT_DEPTH(type)),
2389 ocl::typeToStr(t: sctype), cn, rowsPerWI);
2390 }
2391 else
2392 {
2393 char cvt[2][50];
2394 opts = format(fmt: "-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D SRC_DEPTH=%d"
2395 " -D CONVERT_TO_WT=%s -D CONVERT_TO_T=%s%s -D CT=%s -D CN=%d -D ROWS_PER_WI=%d",
2396 interpolationMap[interpolation], ocl::typeToStr(t: type),
2397 ocl::typeToStr(CV_MAT_DEPTH(type)),
2398 ocl::typeToStr(t: sctype),
2399 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), depth,
2400 ocl::convertTypeStr(sdepth: depth, ddepth: wdepth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])),
2401 ocl::convertTypeStr(sdepth: wdepth, ddepth: depth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])),
2402 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2403 useDouble ? "double" : "float",
2404 cn, rowsPerWI);
2405 }
2406
2407 k.create(kname: kernelName, prog: program, buildopts: opts);
2408 if (k.empty())
2409 return false;
2410
2411 double borderBuf[] = { 0, 0, 0, 0 };
2412 scalarToRawData(s: borderValue, buf: borderBuf, type: sctype);
2413
2414 UMat src = _src.getUMat(), M0;
2415 _dst.create( sz: dsize.empty() ? src.size() : dsize, type: src.type() );
2416 UMat dst = _dst.getUMat();
2417
2418 if (src.u == dst.u)
2419 src = src.clone();
2420
2421 double M[9] = {0};
2422 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2423 Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
2424 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) &&
2425 M1.rows == matRows && M1.cols == 3 );
2426 M1.convertTo(m: matM, rtype: matM.type());
2427
2428 if( !(flags & WARP_INVERSE_MAP) )
2429 {
2430 if (op_type == OCL_OP_PERSPECTIVE)
2431 invert(src: matM, dst: matM);
2432 else
2433 {
2434 double D = M[0]*M[4] - M[1]*M[3];
2435 D = D != 0 ? 1./D : 0;
2436 double A11 = M[4]*D, A22=M[0]*D;
2437 M[0] = A11; M[1] *= -D;
2438 M[3] *= -D; M[4] = A22;
2439 double b1 = -M[0]*M[2] - M[1]*M[5];
2440 double b2 = -M[3]*M[2] - M[4]*M[5];
2441 M[2] = b1; M[5] = b2;
2442 }
2443 }
2444 matM.convertTo(m: M0, rtype: useDouble ? CV_64F : CV_32F);
2445
2446 k.args(kernel_args: ocl::KernelArg::ReadOnly(m: src), kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: ocl::KernelArg::PtrReadOnly(m: M0),
2447 kernel_args: ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2448
2449 size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
2450 return k.run(dims: 2, globalsize: globalThreads, NULL, sync: false);
2451}
2452
2453#endif
2454
2455#ifdef HAVE_IPP
2456#define IPP_WARPAFFINE_PARALLEL 1
2457
2458#ifdef HAVE_IPP_IW
2459
2460class ipp_warpAffineParallel: public ParallelLoopBody
2461{
2462public:
2463 ipp_warpAffineParallel(::ipp::IwiImage &src, ::ipp::IwiImage &dst, IppiInterpolationType _inter, double (&_coeffs)[2][3], ::ipp::IwiBorderType _borderType, IwTransDirection _iwTransDirection, bool *_ok):m_src(src), m_dst(dst)
2464 {
2465 pOk = _ok;
2466
2467 inter = _inter;
2468 borderType = _borderType;
2469 iwTransDirection = _iwTransDirection;
2470
2471 for( int i = 0; i < 2; i++ )
2472 for( int j = 0; j < 3; j++ )
2473 coeffs[i][j] = _coeffs[i][j];
2474
2475 *pOk = true;
2476 }
2477 ~ipp_warpAffineParallel() {}
2478
2479 virtual void operator() (const Range& range) const CV_OVERRIDE
2480 {
2481 CV_INSTRUMENT_REGION_IPP();
2482
2483 if(*pOk == false)
2484 return;
2485
2486 try
2487 {
2488 ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
2489 CV_INSTRUMENT_FUN_IPP(::ipp::iwiWarpAffine, m_src, m_dst, coeffs, iwTransDirection, inter, ::ipp::IwiWarpAffineParams(), borderType, tile);
2490 }
2491 catch(const ::ipp::IwException &)
2492 {
2493 *pOk = false;
2494 return;
2495 }
2496 }
2497private:
2498 ::ipp::IwiImage &m_src;
2499 ::ipp::IwiImage &m_dst;
2500
2501 IppiInterpolationType inter;
2502 double coeffs[2][3];
2503 ::ipp::IwiBorderType borderType;
2504 IwTransDirection iwTransDirection;
2505
2506 bool *pOk;
2507 const ipp_warpAffineParallel& operator= (const ipp_warpAffineParallel&);
2508};
2509
2510#endif
2511
2512static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation, int borderType, const Scalar & borderValue, InputArray _M, int flags )
2513{
2514#ifdef HAVE_IPP_IW
2515 CV_INSTRUMENT_REGION_IPP();
2516
2517 if (!cv::ipp::useIPP_NotExact())
2518 return false;
2519
2520 IppiInterpolationType ippInter = ippiGetInterpolation(inter: interpolation);
2521 if((int)ippInter < 0)
2522 return false;
2523
2524 // Acquire data and begin processing
2525 try
2526 {
2527 Mat src = _src.getMat();
2528 Mat dst = _dst.getMat();
2529 ::ipp::IwiImage iwSrc = ippiGetImage(src);
2530 ::ipp::IwiImage iwDst = ippiGetImage(src: dst);
2531 ::ipp::IwiBorderType ippBorder(ippiGetBorderType(borderTypeNI: borderType), ippiGetValue(scalar: borderValue));
2532 IwTransDirection iwTransDirection;
2533 if(!ippBorder)
2534 return false;
2535
2536 if( !(flags & WARP_INVERSE_MAP) )
2537 iwTransDirection = iwTransForward;
2538 else
2539 iwTransDirection = iwTransInverse;
2540
2541 Mat M = _M.getMat();
2542 double coeffs[2][3];
2543 for( int i = 0; i < 2; i++ )
2544 for( int j = 0; j < 3; j++ )
2545 coeffs[i][j] = M.at<double>(i0: i, i1: j);
2546
2547 const int threads = ippiSuggestThreadsNum(image: iwDst, multiplier: 2);
2548
2549 if(IPP_WARPAFFINE_PARALLEL && threads > 1)
2550 {
2551 bool ok = true;
2552 Range range(0, (int)iwDst.m_size.height);
2553 ipp_warpAffineParallel invoker(iwSrc, iwDst, ippInter, coeffs, ippBorder, iwTransDirection, &ok);
2554 if(!ok)
2555 return false;
2556
2557 parallel_for_(range, body: invoker, nstripes: threads*4);
2558
2559 if(!ok)
2560 return false;
2561 } else {
2562 CV_INSTRUMENT_FUN_IPP(::ipp::iwiWarpAffine, iwSrc, iwDst, coeffs, iwTransDirection, ippInter, ::ipp::IwiWarpAffineParams(), ippBorder);
2563 }
2564
2565 }
2566 catch (const ::ipp::IwException &)
2567 {
2568 return false;
2569 }
2570
2571 return true;
2572#else
2573 CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(interpolation);
2574 CV_UNUSED(borderType); CV_UNUSED(borderValue); CV_UNUSED(_M); CV_UNUSED(flags);
2575 return false;
2576#endif
2577}
2578
2579#endif
2580
2581namespace hal {
2582
2583void warpAffine(int src_type,
2584 const uchar * src_data, size_t src_step, int src_width, int src_height,
2585 uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
2586 const double M[6], int interpolation, int borderType, const double borderValue[4])
2587{
2588 CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
2589
2590 Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
2591 Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
2592
2593 int x;
2594 AutoBuffer<int> _abdelta(dst.cols*2);
2595 int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
2596 const int AB_BITS = MAX(10, (int)INTER_BITS);
2597 const int AB_SCALE = 1 << AB_BITS;
2598
2599 for( x = 0; x < dst.cols; x++ )
2600 {
2601 adelta[x] = saturate_cast<int>(v: M[0]*x*AB_SCALE);
2602 bdelta[x] = saturate_cast<int>(v: M[3]*x*AB_SCALE);
2603 }
2604
2605 Range range(0, dst.rows);
2606 WarpAffineInvoker invoker(src, dst, interpolation, borderType,
2607 Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]),
2608 adelta, bdelta, M);
2609 parallel_for_(range, body: invoker, nstripes: dst.total()/(double)(1<<16));
2610}
2611
2612void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw)
2613{
2614 CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw);
2615
2616 constexpr int AB_BITS = MAX(10, static_cast<int>(INTER_BITS));
2617 int x1 = 0;
2618#if (CV_SIMD || CV_SIMD_SCALABLE)
2619 {
2620 const v_int32 v_X0 = vx_setall_s32(v: X0);
2621 const v_int32 v_Y0 = vx_setall_s32(v: Y0);
2622 const int step = VTraits<v_int16>::vlanes();
2623 for (; x1 <= bw - step; x1 += step)
2624 {
2625 v_int16 v_X = v_pack(a: v_shr<AB_BITS>(a: v_add(a: v_X0, b: vx_load(ptr: adelta + x1))),
2626 b: v_shr<AB_BITS>(a: v_add(a: v_X0, b: vx_load(ptr: adelta + x1 + step / 2))));
2627 v_int16 v_Y = v_pack(a: v_shr<AB_BITS>(a: v_add(a: v_Y0, b: vx_load(ptr: bdelta + x1))),
2628 b: v_shr<AB_BITS>(a: v_add(a: v_Y0, b: vx_load(ptr: bdelta + x1 + step / 2))));
2629 v_store_interleave(ptr: xy + 2 * x1, a0: v_X, b0: v_Y);
2630 }
2631 }
2632#endif
2633 for (; x1 < bw; x1++)
2634 {
2635 const int X = (X0 + adelta[x1]) >> AB_BITS;
2636 const int Y = (Y0 + bdelta[x1]) >> AB_BITS;
2637 xy[x1 * 2] = saturate_cast<short>(v: X);
2638 xy[x1 * 2 + 1] = saturate_cast<short>(v: Y);
2639 }
2640}
2641
2642void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw)
2643{
2644 CALL_HAL(warpAffineBlockline, cv_hal_warpAffineBlockline, adelta, bdelta, xy, alpha, X0, Y0, bw);
2645
2646 const int AB_BITS = MAX(10, (int)INTER_BITS);
2647 int x1 = 0;
2648 #if CV_TRY_AVX2
2649 bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
2650 if ( useAVX2 )
2651 x1 = opt_AVX2::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
2652 #endif
2653 #if CV_TRY_LASX
2654 bool useLASX = CV_CPU_HAS_SUPPORT_LASX;
2655 if ( useLASX )
2656 x1 = opt_LASX::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
2657 #endif
2658 {
2659 #if CV_SIMD128
2660 {
2661 v_int32x4 v__X0 = v_setall_s32(v: X0), v__Y0 = v_setall_s32(v: Y0);
2662 v_int32x4 v_mask = v_setall_s32(v: INTER_TAB_SIZE - 1);
2663 int span = VTraits<v_float32x4>::vlanes();
2664 for( ; x1 <= bw - span * 2; x1 += span * 2 )
2665 {
2666 v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(a: v_add(a: v__X0, b: v_load(ptr: adelta + x1)));
2667 v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(a: v_add(a: v__Y0, b: v_load(ptr: bdelta + x1)));
2668 v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(a: v_add(a: v__X0, b: v_load(ptr: adelta + x1 + span)));
2669 v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(a: v_add(a: v__Y0, b: v_load(ptr: bdelta + x1 + span)));
2670
2671 v_int16x8 v_xy[2];
2672 v_xy[0] = v_pack(a: v_shr<INTER_BITS>(a: v_X0), b: v_shr<INTER_BITS>(a: v_X1));
2673 v_xy[1] = v_pack(a: v_shr<INTER_BITS>(a: v_Y0), b: v_shr<INTER_BITS>(a: v_Y1));
2674 v_store_interleave(ptr: xy + (x1 << 1), a0: v_xy[0], b0: v_xy[1]);
2675
2676 v_int32x4 v_alpha0 = v_or(a: v_shl<INTER_BITS>(a: v_and(a: v_Y0, b: v_mask)), b: v_and(a: v_X0, b: v_mask));
2677 v_int32x4 v_alpha1 = v_or(a: v_shl<INTER_BITS>(a: v_and(a: v_Y1, b: v_mask)), b: v_and(a: v_X1, b: v_mask));
2678 v_store(ptr: alpha + x1, a: v_pack(a: v_alpha0, b: v_alpha1));
2679 }
2680 }
2681 #endif
2682 for( ; x1 < bw; x1++ )
2683 {
2684 int X = (X0 + adelta[x1]) >> (AB_BITS - INTER_BITS);
2685 int Y = (Y0 + bdelta[x1]) >> (AB_BITS - INTER_BITS);
2686 xy[x1*2] = saturate_cast<short>(v: X >> INTER_BITS);
2687 xy[x1*2+1] = saturate_cast<short>(v: Y >> INTER_BITS);
2688 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2689 (X & (INTER_TAB_SIZE-1)));
2690 }
2691 }
2692}
2693
2694} // hal::
2695} // cv::
2696
2697
2698void cv::warpAffine( InputArray _src, OutputArray _dst,
2699 InputArray _M0, Size dsize,
2700 int flags, int borderType, const Scalar& borderValue )
2701{
2702 CV_INSTRUMENT_REGION();
2703
2704 int interpolation = flags & INTER_MAX;
2705 CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 &&
2706 interpolation != INTER_CUBIC) );
2707
2708 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
2709 _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
2710 ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType,
2711 borderValue, op_type: OCL_OP_AFFINE))
2712
2713 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2714 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
2715 borderValue, op_type: OCL_OP_AFFINE))
2716
2717 Mat src = _src.getMat(), M0 = _M0.getMat();
2718 _dst.create( sz: dsize.empty() ? src.size() : dsize, type: src.type() );
2719 Mat dst = _dst.getMat();
2720 CV_Assert( src.cols > 0 && src.rows > 0 );
2721 if( dst.data == src.data )
2722 src = src.clone();
2723
2724 double M[6] = {0};
2725 Mat matM(2, 3, CV_64F, M);
2726 if( interpolation == INTER_AREA )
2727 interpolation = INTER_LINEAR;
2728
2729 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
2730 M0.convertTo(m: matM, rtype: matM.type());
2731
2732 CV_IPP_RUN_FAST(ipp_warpAffine(src, dst, interpolation, borderType, borderValue, matM, flags));
2733
2734 if( !(flags & WARP_INVERSE_MAP) )
2735 {
2736 double D = M[0]*M[4] - M[1]*M[3];
2737 D = D != 0 ? 1./D : 0;
2738 double A11 = M[4]*D, A22=M[0]*D;
2739 M[0] = A11; M[1] *= -D;
2740 M[3] *= -D; M[4] = A22;
2741 double b1 = -M[0]*M[2] - M[1]*M[5];
2742 double b2 = -M[3]*M[2] - M[4]*M[5];
2743 M[2] = b1; M[5] = b2;
2744 }
2745
2746#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2747 CV_IPP_CHECK()
2748 {
2749 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2750 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
2751 ( cn == 1 || cn == 3 || cn == 4 ) &&
2752 ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) &&
2753 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) )
2754 {
2755 ippiWarpAffineBackFunc ippFunc = 0;
2756 if ((flags & WARP_INVERSE_MAP) != 0)
2757 {
2758 ippFunc =
2759 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
2760 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
2761 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
2762 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
2763 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
2764 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
2765 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
2766 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
2767 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
2768 0;
2769 }
2770 else
2771 {
2772 ippFunc =
2773 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R :
2774 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R :
2775 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R :
2776 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R :
2777 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R :
2778 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R :
2779 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R :
2780 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R :
2781 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R :
2782 0;
2783 }
2784 int mode =
2785 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
2786 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
2787 interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
2788 0;
2789 CV_Assert(mode && ippFunc);
2790
2791 double coeffs[2][3];
2792 for( int i = 0; i < 2; i++ )
2793 for( int j = 0; j < 3; j++ )
2794 coeffs[i][j] = matM.at<double>(i, j);
2795
2796 bool ok;
2797 Range range(0, dst.rows);
2798 IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
2799 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2800 if( ok )
2801 {
2802 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2803 return;
2804 }
2805 setIppErrorStatus();
2806 }
2807 }
2808#endif
2809
2810 hal::warpAffine(src_type: src.type(), src_data: src.data, src_step: src.step, src_width: src.cols, src_height: src.rows, dst_data: dst.data, dst_step: dst.step, dst_width: dst.cols, dst_height: dst.rows,
2811 M, interpolation, borderType, borderValue: borderValue.val);
2812}
2813
2814
2815namespace cv
2816{
2817#if CV_SIMD128_64F
2818void WarpPerspectiveLine_ProcessNN_CV_SIMD(const double *M, short* xy, double X0, double Y0, double W0, int bw)
2819{
2820 const v_float64x2 v_M0 = v_setall_f64(v: M[0]);
2821 const v_float64x2 v_M3 = v_setall_f64(v: M[3]);
2822 const v_float64x2 v_M6 = v_setall_f64(v: M[6]);
2823 const v_float64x2 v_intmax = v_setall_f64(v: (double)INT_MAX);
2824 const v_float64x2 v_intmin = v_setall_f64(v: (double)INT_MIN);
2825 const v_float64x2 v_2 = v_setall_f64(v: 2.0);
2826 const v_float64x2 v_zero = v_setzero_f64();
2827 const v_float64x2 v_1 = v_setall_f64(v: 1.0);
2828
2829 int x1 = 0;
2830 v_float64x2 v_X0d = v_setall_f64(v: X0);
2831 v_float64x2 v_Y0d = v_setall_f64(v: Y0);
2832 v_float64x2 v_W0 = v_setall_f64(v: W0);
2833 v_float64x2 v_x1(0.0, 1.0);
2834
2835 for (; x1 <= bw - 16; x1 += 16)
2836 {
2837 // 0-3
2838 v_int32x4 v_X0, v_Y0;
2839 {
2840 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2841 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2842 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2843 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2844 v_x1 = v_add(a: v_x1, b: v_2);
2845
2846 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2847 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2848 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2849 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2850 v_x1 = v_add(a: v_x1, b: v_2);
2851
2852 v_X0 = v_round(a: v_fX0, b: v_fX1);
2853 v_Y0 = v_round(a: v_fY0, b: v_fY1);
2854 }
2855
2856 // 4-7
2857 v_int32x4 v_X1, v_Y1;
2858 {
2859 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2860 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2861 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2862 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2863 v_x1 = v_add(a: v_x1, b: v_2);
2864
2865 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2866 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2867 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2868 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2869 v_x1 = v_add(a: v_x1, b: v_2);
2870
2871 v_X1 = v_round(a: v_fX0, b: v_fX1);
2872 v_Y1 = v_round(a: v_fY0, b: v_fY1);
2873 }
2874
2875 // 8-11
2876 v_int32x4 v_X2, v_Y2;
2877 {
2878 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2879 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2880 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2881 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2882 v_x1 = v_add(a: v_x1, b: v_2);
2883
2884 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2885 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2886 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2887 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2888 v_x1 = v_add(a: v_x1, b: v_2);
2889
2890 v_X2 = v_round(a: v_fX0, b: v_fX1);
2891 v_Y2 = v_round(a: v_fY0, b: v_fY1);
2892 }
2893
2894 // 12-15
2895 v_int32x4 v_X3, v_Y3;
2896 {
2897 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2898 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2899 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2900 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2901 v_x1 = v_add(a: v_x1, b: v_2);
2902
2903 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2904 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_1, b: v_W), b: v_zero);
2905 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2906 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2907 v_x1 = v_add(a: v_x1, b: v_2);
2908
2909 v_X3 = v_round(a: v_fX0, b: v_fX1);
2910 v_Y3 = v_round(a: v_fY0, b: v_fY1);
2911 }
2912
2913 // convert to 16s
2914 v_X0 = v_reinterpret_as_s32(a: v_pack(a: v_X0, b: v_X1));
2915 v_X1 = v_reinterpret_as_s32(a: v_pack(a: v_X2, b: v_X3));
2916 v_Y0 = v_reinterpret_as_s32(a: v_pack(a: v_Y0, b: v_Y1));
2917 v_Y1 = v_reinterpret_as_s32(a: v_pack(a: v_Y2, b: v_Y3));
2918
2919 v_store_interleave(ptr: xy + x1 * 2, a0: (v_reinterpret_as_s16)(a: v_X0), b0: (v_reinterpret_as_s16)(a: v_Y0));
2920 v_store_interleave(ptr: xy + x1 * 2 + 16, a0: (v_reinterpret_as_s16)(a: v_X1), b0: (v_reinterpret_as_s16)(a: v_Y1));
2921 }
2922
2923 for( ; x1 < bw; x1++ )
2924 {
2925 double W = W0 + M[6]*x1;
2926 W = W ? 1./W : 0;
2927 double fX = std::max(a: (double)INT_MIN, b: std::min(a: (double)INT_MAX, b: (X0 + M[0]*x1)*W));
2928 double fY = std::max(a: (double)INT_MIN, b: std::min(a: (double)INT_MAX, b: (Y0 + M[3]*x1)*W));
2929 int X = saturate_cast<int>(v: fX);
2930 int Y = saturate_cast<int>(v: fY);
2931
2932 xy[x1*2] = saturate_cast<short>(v: X);
2933 xy[x1*2+1] = saturate_cast<short>(v: Y);
2934 }
2935}
2936
2937void WarpPerspectiveLine_Process_CV_SIMD(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
2938{
2939 const v_float64x2 v_M0 = v_setall_f64(v: M[0]);
2940 const v_float64x2 v_M3 = v_setall_f64(v: M[3]);
2941 const v_float64x2 v_M6 = v_setall_f64(v: M[6]);
2942 const v_float64x2 v_intmax = v_setall_f64(v: (double)INT_MAX);
2943 const v_float64x2 v_intmin = v_setall_f64(v: (double)INT_MIN);
2944 const v_float64x2 v_2 = v_setall_f64(v: 2.0);
2945 const v_float64x2 v_zero = v_setzero_f64();
2946 const v_float64x2 v_its = v_setall_f64(v: (double)INTER_TAB_SIZE);
2947 const v_int32x4 v_itsi1 = v_setall_s32(v: INTER_TAB_SIZE - 1);
2948
2949 int x1 = 0;
2950
2951 v_float64x2 v_X0d = v_setall_f64(v: X0);
2952 v_float64x2 v_Y0d = v_setall_f64(v: Y0);
2953 v_float64x2 v_W0 = v_setall_f64(v: W0);
2954 v_float64x2 v_x1(0.0, 1.0);
2955
2956 for (; x1 <= bw - 16; x1 += 16)
2957 {
2958 // 0-3
2959 v_int32x4 v_X0, v_Y0;
2960 {
2961 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2962 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
2963 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2964 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2965 v_x1 = v_add(a: v_x1, b: v_2);
2966
2967 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2968 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
2969 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2970 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2971 v_x1 = v_add(a: v_x1, b: v_2);
2972
2973 v_X0 = v_round(a: v_fX0, b: v_fX1);
2974 v_Y0 = v_round(a: v_fY0, b: v_fY1);
2975 }
2976
2977 // 4-7
2978 v_int32x4 v_X1, v_Y1;
2979 {
2980 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2981 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
2982 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2983 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2984 v_x1 = v_add(a: v_x1, b: v_2);
2985
2986 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
2987 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
2988 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
2989 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
2990 v_x1 = v_add(a: v_x1, b: v_2);
2991
2992 v_X1 = v_round(a: v_fX0, b: v_fX1);
2993 v_Y1 = v_round(a: v_fY0, b: v_fY1);
2994 }
2995
2996 // 8-11
2997 v_int32x4 v_X2, v_Y2;
2998 {
2999 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
3000 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
3001 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
3002 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
3003 v_x1 = v_add(a: v_x1, b: v_2);
3004
3005 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
3006 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
3007 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
3008 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
3009 v_x1 = v_add(a: v_x1, b: v_2);
3010
3011 v_X2 = v_round(a: v_fX0, b: v_fX1);
3012 v_Y2 = v_round(a: v_fY0, b: v_fY1);
3013 }
3014
3015 // 12-15
3016 v_int32x4 v_X3, v_Y3;
3017 {
3018 v_float64x2 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
3019 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
3020 v_float64x2 v_fX0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
3021 v_float64x2 v_fY0 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
3022 v_x1 = v_add(a: v_x1, b: v_2);
3023
3024 v_W = v_muladd(a: v_M6, b: v_x1, c: v_W0);
3025 v_W = v_select(mask: v_ne(a: v_W, b: v_zero), a: v_div(a: v_its, b: v_W), b: v_zero);
3026 v_float64x2 v_fX1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M0, b: v_x1, c: v_X0d), b: v_W)));
3027 v_float64x2 v_fY1 = v_max(a: v_intmin, b: v_min(a: v_intmax, b: v_mul(a: v_muladd(a: v_M3, b: v_x1, c: v_Y0d), b: v_W)));
3028 v_x1 = v_add(a: v_x1, b: v_2);
3029
3030 v_X3 = v_round(a: v_fX0, b: v_fX1);
3031 v_Y3 = v_round(a: v_fY0, b: v_fY1);
3032 }
3033
3034 // store alpha
3035 v_int32x4 v_alpha0 = v_add(a: v_shl<INTER_BITS>(a: v_and(a: v_Y0, b: v_itsi1)), b: v_and(a: v_X0, b: v_itsi1));
3036 v_int32x4 v_alpha1 = v_add(a: v_shl<INTER_BITS>(a: v_and(a: v_Y1, b: v_itsi1)), b: v_and(a: v_X1, b: v_itsi1));
3037 v_store(ptr: (alpha + x1), a: v_pack(a: v_alpha0, b: v_alpha1));
3038
3039 v_alpha0 = v_add(a: v_shl<INTER_BITS>(a: v_and(a: v_Y2, b: v_itsi1)), b: v_and(a: v_X2, b: v_itsi1));
3040 v_alpha1 = v_add(a: v_shl<INTER_BITS>(a: v_and(a: v_Y3, b: v_itsi1)), b: v_and(a: v_X3, b: v_itsi1));
3041 v_store(ptr: (alpha + x1 + 8), a: v_pack(a: v_alpha0, b: v_alpha1));
3042
3043 // convert to 16s
3044 v_X0 = v_reinterpret_as_s32(a: v_pack(a: v_shr<INTER_BITS>(a: v_X0), b: v_shr<INTER_BITS>(a: v_X1)));
3045 v_X1 = v_reinterpret_as_s32(a: v_pack(a: v_shr<INTER_BITS>(a: v_X2), b: v_shr<INTER_BITS>(a: v_X3)));
3046 v_Y0 = v_reinterpret_as_s32(a: v_pack(a: v_shr<INTER_BITS>(a: v_Y0), b: v_shr<INTER_BITS>(a: v_Y1)));
3047 v_Y1 = v_reinterpret_as_s32(a: v_pack(a: v_shr<INTER_BITS>(a: v_Y2), b: v_shr<INTER_BITS>(a: v_Y3)));
3048
3049 v_store_interleave(ptr: xy + x1 * 2, a0: (v_reinterpret_as_s16)(a: v_X0), b0: (v_reinterpret_as_s16)(a: v_Y0));
3050 v_store_interleave(ptr: xy + x1 * 2 + 16, a0: (v_reinterpret_as_s16)(a: v_X1), b0: (v_reinterpret_as_s16)(a: v_Y1));
3051 }
3052
3053 for( ; x1 < bw; x1++ )
3054 {
3055 double W = W0 + M[6]*x1;
3056 W = W ? static_cast<double>(INTER_TAB_SIZE)/W : 0;
3057 double fX = std::max(a: (double)INT_MIN, b: std::min(a: (double)INT_MAX, b: (X0 + M[0]*x1)*W));
3058 double fY = std::max(a: (double)INT_MIN, b: std::min(a: (double)INT_MAX, b: (Y0 + M[3]*x1)*W));
3059 int X = saturate_cast<int>(v: fX);
3060 int Y = saturate_cast<int>(v: fY);
3061
3062 xy[x1*2] = saturate_cast<short>(v: X >> INTER_BITS);
3063 xy[x1*2+1] = saturate_cast<short>(v: Y >> INTER_BITS);
3064 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3065 (X & (INTER_TAB_SIZE-1)));
3066 }
3067}
3068#endif
3069
3070class WarpPerspectiveInvoker :
3071 public ParallelLoopBody
3072{
3073public:
3074 WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, const double *_M, int _interpolation,
3075 int _borderType, const Scalar &_borderValue) :
3076 ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
3077 borderType(_borderType), borderValue(_borderValue)
3078 {
3079#if defined(_MSC_VER) && _MSC_VER == 1800 /* MSVS 2013 */ && CV_AVX
3080 // details: https://github.com/opencv/opencv/issues/11026
3081 borderValue.val[2] = _borderValue.val[2];
3082 borderValue.val[3] = _borderValue.val[3];
3083#endif
3084 }
3085
3086 virtual void operator() (const Range& range) const CV_OVERRIDE
3087 {
3088 const int BLOCK_SZ = 32;
3089 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
3090 int x, y, y1, width = dst.cols, height = dst.rows;
3091
3092 int bh0 = std::min(a: BLOCK_SZ/2, b: height);
3093 int bw0 = std::min(a: BLOCK_SZ*BLOCK_SZ/bh0, b: width);
3094 bh0 = std::min(a: BLOCK_SZ*BLOCK_SZ/bw0, b: height);
3095
3096 for( y = range.start; y < range.end; y += bh0 )
3097 {
3098 for( x = 0; x < width; x += bw0 )
3099 {
3100 int bw = std::min( a: bw0, b: width - x);
3101 int bh = std::min( a: bh0, b: range.end - y); // height
3102
3103 Mat _XY(bh, bw, CV_16SC2, XY);
3104 Mat dpart(dst, Rect(x, y, bw, bh));
3105
3106 for( y1 = 0; y1 < bh; y1++ )
3107 {
3108 short* xy = XY + y1*bw*2;
3109 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
3110 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
3111 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
3112
3113 if( interpolation == INTER_NEAREST )
3114 hal::warpPerspectiveBlocklineNN(M, xy, X0, Y0, W0, bw);
3115 else
3116 hal::warpPerspectiveBlockline(M, xy, alpha: A + y1*bw, X0, Y0, W0, bw);
3117 }
3118
3119 if( interpolation == INTER_NEAREST )
3120 remap( src: src, dst: dpart, map1: _XY, map2: Mat(), interpolation, borderType, borderValue );
3121 else
3122 {
3123 Mat _matA(bh, bw, CV_16U, A);
3124 remap( src: src, dst: dpart, map1: _XY, map2: _matA, interpolation, borderType, borderValue );
3125 }
3126 }
3127 }
3128 }
3129
3130private:
3131 Mat src;
3132 Mat dst;
3133 const double* M;
3134 int interpolation, borderType;
3135 Scalar borderValue;
3136};
3137
3138#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
3139typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
3140
3141class IPPWarpPerspectiveInvoker :
3142 public ParallelLoopBody
3143{
3144public:
3145 IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation,
3146 int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) :
3147 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
3148 borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
3149 {
3150 *ok = true;
3151 }
3152
3153 virtual void operator() (const Range& range) const CV_OVERRIDE
3154 {
3155 IppiSize srcsize = {src.cols, src.rows};
3156 IppiRect srcroi = {0, 0, src.cols, src.rows};
3157 IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
3158 int cnn = src.channels();
3159
3160 if( borderType == BORDER_CONSTANT )
3161 {
3162 IppiSize setSize = {dst.cols, range.end - range.start};
3163 void *dataPointer = dst.ptr(range.start);
3164 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
3165 {
3166 *ok = false;
3167 return;
3168 }
3169 }
3170
3171 IppStatus status = CV_INSTRUMENT_FUN_IPP(func,(src.ptr();, srcsize, (int)src.step[0], srcroi, dst.ptr(), (int)dst.step[0], dstroi, coeffs, mode));
3172 if (status != ippStsNoErr)
3173 *ok = false;
3174 else
3175 {
3176 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3177 }
3178 }
3179private:
3180 Mat &src;
3181 Mat &dst;
3182 int mode;
3183 double (&coeffs)[3][3];
3184 int borderType;
3185 const Scalar borderValue;
3186 ippiWarpPerspectiveFunc func;
3187 bool *ok;
3188
3189 const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&);
3190};
3191#endif
3192
3193namespace hal {
3194
3195void warpPerspective(int src_type,
3196 const uchar * src_data, size_t src_step, int src_width, int src_height,
3197 uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
3198 const double M[9], int interpolation, int borderType, const double borderValue[4])
3199{
3200 CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
3201 Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
3202 Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
3203
3204 Range range(0, dst.rows);
3205 WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]));
3206 parallel_for_(range, body: invoker, nstripes: dst.total()/(double)(1<<16));
3207}
3208
3209void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw)
3210{
3211 CALL_HAL(warpPerspectiveBlocklineNN, cv_hal_warpPerspectiveBlocklineNN, M, xy, X0, Y0, W0, bw);
3212
3213 #if CV_TRY_SSE4_1
3214 Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
3215 if(CV_CPU_HAS_SUPPORT_SSE4_1)
3216 pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
3217
3218 if (pwarp_impl_sse4)
3219 pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
3220 else
3221 #endif
3222 {
3223 #if CV_SIMD128_64F
3224 WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
3225 #else
3226 for( int x1 = 0; x1 < bw; x1++ )
3227 {
3228 double W = W0 + M[6]*x1;
3229 W = W ? 1./W : 0;
3230 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3231 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3232 int X = saturate_cast<int>(fX);
3233 int Y = saturate_cast<int>(fY);
3234
3235 xy[x1*2] = saturate_cast<short>(X);
3236 xy[x1*2+1] = saturate_cast<short>(Y);
3237 }
3238 #endif
3239 }
3240}
3241
3242void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
3243{
3244 CALL_HAL(warpPerspectiveBlockline, cv_hal_warpPerspectiveBlockline, M, xy, alpha, X0, Y0, W0, bw);
3245
3246 #if CV_TRY_SSE4_1
3247 Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
3248 if(CV_CPU_HAS_SUPPORT_SSE4_1)
3249 pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
3250
3251 if (pwarp_impl_sse4)
3252 pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
3253 else
3254 #endif
3255 {
3256 #if CV_SIMD128_64F
3257 WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
3258 #else
3259 for( int x1 = 0; x1 < bw; x1++ )
3260 {
3261 double W = W0 + M[6]*x1;
3262 W = W ? INTER_TAB_SIZE/W : 0;
3263 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3264 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3265 int X = saturate_cast<int>(fX);
3266 int Y = saturate_cast<int>(fY);
3267
3268 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3269 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3270 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3271 (X & (INTER_TAB_SIZE-1)));
3272 }
3273 #endif
3274 }
3275}
3276
3277} // hal::
3278} // cv::
3279
3280void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
3281 Size dsize, int flags, int borderType, const Scalar& borderValue )
3282{
3283 CV_INSTRUMENT_REGION();
3284
3285 CV_Assert( _src.total() > 0 );
3286
3287 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
3288 _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
3289 ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType, borderValue,
3290 op_type: OCL_OP_PERSPECTIVE))
3291
3292 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
3293 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
3294 op_type: OCL_OP_PERSPECTIVE))
3295
3296 Mat src = _src.getMat(), M0 = _M0.getMat();
3297 _dst.create( sz: dsize.empty() ? src.size() : dsize, type: src.type() );
3298 Mat dst = _dst.getMat();
3299
3300 if( dst.data == src.data )
3301 src = src.clone();
3302
3303 double M[9];
3304 Mat matM(3, 3, CV_64F, M);
3305 int interpolation = flags & INTER_MAX;
3306 if( interpolation == INTER_AREA )
3307 interpolation = INTER_LINEAR;
3308
3309 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
3310 M0.convertTo(m: matM, rtype: matM.type());
3311
3312#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
3313 CV_IPP_CHECK()
3314 {
3315 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
3316 if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) &&
3317 (cn == 1 || cn == 3 || cn == 4) &&
3318 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) &&
3319 (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC))
3320 {
3321 ippiWarpPerspectiveFunc ippFunc = 0;
3322 if ((flags & WARP_INVERSE_MAP) != 0)
3323 {
3324 ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R :
3325 type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R :
3326 type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R :
3327 type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R :
3328 type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R :
3329 type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R :
3330 type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R :
3331 type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R :
3332 type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0;
3333 }
3334 else
3335 {
3336 ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R :
3337 type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R :
3338 type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R :
3339 type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R :
3340 type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R :
3341 type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R :
3342 type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R :
3343 type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R :
3344 type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0;
3345 }
3346 int mode =
3347 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
3348 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
3349 interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0;
3350 CV_Assert(mode && ippFunc);
3351
3352 double coeffs[3][3];
3353 for( int i = 0; i < 3; i++ )
3354 for( int j = 0; j < 3; j++ )
3355 coeffs[i][j] = matM.at<double>(i, j);
3356
3357 bool ok;
3358 Range range(0, dst.rows);
3359 IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
3360 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3361 if( ok )
3362 {
3363 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3364 return;
3365 }
3366 setIppErrorStatus();
3367 }
3368 }
3369#endif
3370
3371 if( !(flags & WARP_INVERSE_MAP) )
3372 invert(src: matM, dst: matM);
3373
3374 hal::warpPerspective(src_type: src.type(), src_data: src.data, src_step: src.step, src_width: src.cols, src_height: src.rows, dst_data: dst.data, dst_step: dst.step, dst_width: dst.cols, dst_height: dst.rows,
3375 M: matM.ptr<double>(), interpolation, borderType, borderValue: borderValue.val);
3376}
3377
3378
3379cv::Matx23d cv::getRotationMatrix2D_(Point2f center, double angle, double scale)
3380{
3381 CV_INSTRUMENT_REGION();
3382
3383 angle *= CV_PI/180;
3384 double alpha = std::cos(x: angle)*scale;
3385 double beta = std::sin(x: angle)*scale;
3386
3387 Matx23d M(
3388 alpha, beta, (1-alpha)*center.x - beta*center.y,
3389 -beta, alpha, beta*center.x + (1-alpha)*center.y
3390 );
3391 return M;
3392}
3393
3394/* Calculates coefficients of perspective transformation
3395 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
3396 *
3397 * c00*xi + c01*yi + c02
3398 * ui = ---------------------
3399 * c20*xi + c21*yi + c22
3400 *
3401 * c10*xi + c11*yi + c12
3402 * vi = ---------------------
3403 * c20*xi + c21*yi + c22
3404 *
3405 * Coefficients are calculated by solving one of 2 linear systems:
3406 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
3407 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
3408 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
3409 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
3410 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
3411 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
3412 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
3413 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
3414 *
3415 * where:
3416 * cij - matrix coefficients, c22 = 1
3417 *
3418 * or
3419 *
3420 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 -u0 \ /c00\ /0\
3421 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 -u1 | |c01| |0|
3422 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 -u2 | |c02| |0|
3423 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 -u3 |.|c10|=|0|,
3424 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 -v0 | |c11| |0|
3425 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 -v1 | |c12| |0|
3426 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 -v2 | |c20| |0|
3427 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 -v3 / |c21| \0/
3428 * \c22/
3429 *
3430 * where:
3431 * cij - matrix coefficients, c00^2 + c01^2 + c02^2 + c10^2 + c11^2 + c12^2 + c20^2 + c21^2 + c22^2 = 1
3432 */
3433cv::Mat cv::getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod)
3434{
3435 CV_INSTRUMENT_REGION();
3436
3437 // try c22 = 1
3438 Mat M(3, 3, CV_64F), X8(8, 1, CV_64F, M.ptr());
3439 double a[8][8], b[8];
3440 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
3441
3442 for( int i = 0; i < 4; ++i )
3443 {
3444 a[i][0] = a[i+4][3] = src[i].x;
3445 a[i][1] = a[i+4][4] = src[i].y;
3446 a[i][2] = a[i+4][5] = 1;
3447 a[i][3] = a[i][4] = a[i][5] =
3448 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
3449 a[i][6] = -src[i].x*dst[i].x;
3450 a[i][7] = -src[i].y*dst[i].x;
3451 a[i+4][6] = -src[i].x*dst[i].y;
3452 a[i+4][7] = -src[i].y*dst[i].y;
3453 b[i] = dst[i].x;
3454 b[i+4] = dst[i].y;
3455 }
3456
3457 if (solve(src1: A, src2: B, dst: X8, flags: solveMethod) && norm(src1: A * X8, src2: B) < 1e-8)
3458 {
3459 M.ptr<double>()[8] = 1.;
3460
3461 return M;
3462 }
3463
3464 // c00^2 + c01^2 + c02^2 + c10^2 + c11^2 + c12^2 + c20^2 + c21^2 + c22^2 = 1
3465 hconcat(src1: A, src2: -B, dst: A);
3466
3467 Mat AtA;
3468 mulTransposed(src: A, dst: AtA, aTa: true);
3469
3470 Mat D, U;
3471 SVDecomp(src: AtA, w: D, u: U, vt: noArray());
3472
3473 Mat X9(9, 1, CV_64F, M.ptr());
3474 U.col(x: 8).copyTo(m: X9);
3475
3476 return M;
3477}
3478
3479/* Calculates coefficients of affine transformation
3480 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
3481 *
3482 * ui = c00*xi + c01*yi + c02
3483 *
3484 * vi = c10*xi + c11*yi + c12
3485 *
3486 * Coefficients are calculated by solving linear system:
3487 * / x0 y0 1 0 0 0 \ /c00\ /u0\
3488 * | x1 y1 1 0 0 0 | |c01| |u1|
3489 * | x2 y2 1 0 0 0 | |c02| |u2|
3490 * | 0 0 0 x0 y0 1 | |c10| |v0|
3491 * | 0 0 0 x1 y1 1 | |c11| |v1|
3492 * \ 0 0 0 x2 y2 1 / |c12| |v2|
3493 *
3494 * where:
3495 * cij - matrix coefficients
3496 */
3497
3498cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
3499{
3500 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.ptr());
3501 double a[6*6], b[6];
3502 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3503
3504 for( int i = 0; i < 3; i++ )
3505 {
3506 int j = i*12;
3507 int k = i*12+6;
3508 a[j] = a[k+3] = src[i].x;
3509 a[j+1] = a[k+4] = src[i].y;
3510 a[j+2] = a[k+5] = 1;
3511 a[j+3] = a[j+4] = a[j+5] = 0;
3512 a[k] = a[k+1] = a[k+2] = 0;
3513 b[i*2] = dst[i].x;
3514 b[i*2+1] = dst[i].y;
3515 }
3516
3517 solve( src1: A, src2: B, dst: X );
3518 return M;
3519}
3520
3521void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
3522{
3523 Mat matM = _matM.getMat();
3524 CV_Assert(matM.rows == 2 && matM.cols == 3);
3525 __iM.create(rows: 2, cols: 3, type: matM.type());
3526 Mat _iM = __iM.getMat();
3527
3528 if( matM.type() == CV_32F )
3529 {
3530 const softfloat* M = matM.ptr<softfloat>();
3531 softfloat* iM = _iM.ptr<softfloat>();
3532 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3533
3534 softdouble D = M[0]*M[step+1] - M[1]*M[step];
3535 D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3536 softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3537 softdouble b1 = -A11*M[2] - A12*M[step+2];
3538 softdouble b2 = -A21*M[2] - A22*M[step+2];
3539
3540 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3541 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3542 }
3543 else if( matM.type() == CV_64F )
3544 {
3545 const softdouble* M = matM.ptr<softdouble>();
3546 softdouble* iM = _iM.ptr<softdouble>();
3547 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3548
3549 softdouble D = M[0]*M[step+1] - M[1]*M[step];
3550 D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3551 softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3552 softdouble b1 = -A11*M[2] - A12*M[step+2];
3553 softdouble b2 = -A21*M[2] - A22*M[step+2];
3554
3555 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3556 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3557 }
3558 else
3559 CV_Error( cv::Error::StsUnsupportedFormat, "" );
3560}
3561
3562cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst, int solveMethod)
3563{
3564 Mat src = _src.getMat(), dst = _dst.getMat();
3565 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
3566 return getPerspectiveTransform(src: (const Point2f*)src.data, dst: (const Point2f*)dst.data, solveMethod);
3567}
3568
3569cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
3570{
3571 Mat src = _src.getMat(), dst = _dst.getMat();
3572 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
3573 return getAffineTransform(src: (const Point2f*)src.data, dst: (const Point2f*)dst.data);
3574}
3575
3576CV_IMPL void
3577cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3578 int flags, CvScalar fillval )
3579{
3580 cv::Mat src = cv::cvarrToMat(arr: srcarr), dst = cv::cvarrToMat(arr: dstarr);
3581 cv::Mat matrix = cv::cvarrToMat(arr: marr);
3582 CV_Assert( src.type() == dst.type() );
3583 cv::warpAffine( src: src, dst: dst, M0: matrix, dsize: dst.size(), flags,
3584 borderType: (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3585 borderValue: fillval );
3586}
3587
3588CV_IMPL void
3589cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3590 int flags, CvScalar fillval )
3591{
3592 cv::Mat src = cv::cvarrToMat(arr: srcarr), dst = cv::cvarrToMat(arr: dstarr);
3593 cv::Mat matrix = cv::cvarrToMat(arr: marr);
3594 CV_Assert( src.type() == dst.type() );
3595 cv::warpPerspective( src: src, dst: dst, M0: matrix, dsize: dst.size(), flags,
3596 borderType: (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3597 borderValue: fillval );
3598}
3599
3600CV_IMPL void
3601cvRemap( const CvArr* srcarr, CvArr* dstarr,
3602 const CvArr* _mapx, const CvArr* _mapy,
3603 int flags, CvScalar fillval )
3604{
3605 cv::Mat src = cv::cvarrToMat(arr: srcarr), dst = cv::cvarrToMat(arr: dstarr), dst0 = dst;
3606 cv::Mat mapx = cv::cvarrToMat(arr: _mapx), mapy = cv::cvarrToMat(arr: _mapy);
3607 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3608 cv::remap( src: src, dst: dst, map1: mapx, map2: mapy, interpolation: flags & cv::INTER_MAX,
3609 borderType: (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3610 borderValue: fillval );
3611 CV_Assert( dst0.data == dst.data );
3612}
3613
3614
3615CV_IMPL CvMat*
3616cv2DRotationMatrix( CvPoint2D32f center, double angle,
3617 double scale, CvMat* matrix )
3618{
3619 cv::Mat M0 = cv::cvarrToMat(arr: matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3620 CV_Assert( M.size() == M0.size() );
3621 M.convertTo(m: M0, rtype: M0.type());
3622 return matrix;
3623}
3624
3625
3626CV_IMPL CvMat*
3627cvGetPerspectiveTransform( const CvPoint2D32f* src,
3628 const CvPoint2D32f* dst,
3629 CvMat* matrix )
3630{
3631 cv::Mat M0 = cv::cvarrToMat(arr: matrix),
3632 M = cv::getPerspectiveTransform(src: (const cv::Point2f*)src, dst: (const cv::Point2f*)dst);
3633 CV_Assert( M.size() == M0.size() );
3634 M.convertTo(m: M0, rtype: M0.type());
3635 return matrix;
3636}
3637
3638
3639CV_IMPL CvMat*
3640cvGetAffineTransform( const CvPoint2D32f* src,
3641 const CvPoint2D32f* dst,
3642 CvMat* matrix )
3643{
3644 cv::Mat M0 = cv::cvarrToMat(arr: matrix),
3645 M = cv::getAffineTransform(src: (const cv::Point2f*)src, dst: (const cv::Point2f*)dst);
3646 CV_Assert( M.size() == M0.size() );
3647 M.convertTo(m: M0, rtype: M0.type());
3648 return matrix;
3649}
3650
3651
3652CV_IMPL void
3653cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
3654{
3655 cv::Mat map1 = cv::cvarrToMat(arr: arr1), map2;
3656 cv::Mat dstmap1 = cv::cvarrToMat(arr: dstarr1), dstmap2;
3657
3658 if( arr2 )
3659 map2 = cv::cvarrToMat(arr: arr2);
3660 if( dstarr2 )
3661 {
3662 dstmap2 = cv::cvarrToMat(arr: dstarr2);
3663 if( dstmap2.type() == CV_16SC1 )
3664 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.ptr(), dstmap2.step);
3665 }
3666
3667 cv::convertMaps( map1: map1, map2: map2, dstmap1: dstmap1, dstmap2: dstmap2, dstm1type: dstmap1.type(), nninterpolate: false );
3668}
3669
3670/****************************************************************************************
3671PkLab.net 2018 based on cv::linearPolar from OpenCV by J.L. Blanco, Apr 2009
3672****************************************************************************************/
3673void cv::warpPolar(InputArray _src, OutputArray _dst, Size dsize,
3674 Point2f center, double maxRadius, int flags)
3675{
3676 // if dest size is empty given than calculate using proportional setting
3677 // thus we calculate needed angles to keep same area as bounding circle
3678 if ((dsize.width <= 0) && (dsize.height <= 0))
3679 {
3680 dsize.width = cvRound(value: maxRadius);
3681 dsize.height = cvRound(value: maxRadius * CV_PI);
3682 }
3683 else if (dsize.height <= 0)
3684 {
3685 dsize.height = cvRound(value: dsize.width * CV_PI);
3686 }
3687
3688 Mat mapx, mapy;
3689 mapx.create(size: dsize, CV_32F);
3690 mapy.create(size: dsize, CV_32F);
3691 bool semiLog = (flags & WARP_POLAR_LOG) != 0;
3692
3693 if (!(flags & cv::WARP_INVERSE_MAP))
3694 {
3695 CV_Assert(!dsize.empty());
3696 double Kangle = CV_2PI / dsize.height;
3697 int phi, rho;
3698
3699 // precalculate scaled rho
3700 Mat rhos = Mat(1, dsize.width, CV_32F);
3701 float* bufRhos = (float*)(rhos.data);
3702 if (semiLog)
3703 {
3704 double Kmag = std::log(x: maxRadius) / dsize.width;
3705 for (rho = 0; rho < dsize.width; rho++)
3706 bufRhos[rho] = (float)(std::exp(x: rho * Kmag) - 1.0);
3707
3708 }
3709 else
3710 {
3711 double Kmag = maxRadius / dsize.width;
3712 for (rho = 0; rho < dsize.width; rho++)
3713 bufRhos[rho] = (float)(rho * Kmag);
3714 }
3715
3716 for (phi = 0; phi < dsize.height; phi++)
3717 {
3718 double KKy = Kangle * phi;
3719 double cp = std::cos(x: KKy);
3720 double sp = std::sin(x: KKy);
3721 float* mx = (float*)(mapx.data + phi*mapx.step);
3722 float* my = (float*)(mapy.data + phi*mapy.step);
3723
3724 for (rho = 0; rho < dsize.width; rho++)
3725 {
3726 double x = bufRhos[rho] * cp + center.x;
3727 double y = bufRhos[rho] * sp + center.y;
3728
3729 mx[rho] = (float)x;
3730 my[rho] = (float)y;
3731 }
3732 }
3733 remap(_src, _dst, map1: mapx, map2: mapy, interpolation: flags & cv::INTER_MAX, borderType: (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3734 }
3735 else
3736 {
3737 const int ANGLE_BORDER = 1;
3738 cv::copyMakeBorder(src: _src, dst: _dst, top: ANGLE_BORDER, bottom: ANGLE_BORDER, left: 0, right: 0, borderType: BORDER_WRAP);
3739 Mat src = _dst.getMat();
3740 Size ssize = _dst.size();
3741 ssize.height -= 2 * ANGLE_BORDER;
3742 CV_Assert(!ssize.empty());
3743 const double Kangle = CV_2PI / ssize.height;
3744 double Kmag;
3745 if (semiLog)
3746 Kmag = std::log(x: maxRadius) / ssize.width;
3747 else
3748 Kmag = maxRadius / ssize.width;
3749
3750 Mat bufx, bufy, bufp, bufa;
3751
3752 bufx = Mat(1, dsize.width, CV_32F);
3753 bufy = Mat(1, dsize.width, CV_32F);
3754 bufp = Mat(1, dsize.width, CV_32F);
3755 bufa = Mat(1, dsize.width, CV_32F);
3756
3757 for (int x = 0; x < dsize.width; x++)
3758 bufx.at<float>(i0: 0, i1: x) = (float)x - center.x;
3759
3760 cv::parallel_for_(range: cv::Range(0, dsize.height), functor: [&](const cv::Range& range) {
3761 for (int y = range.start; y < range.end; ++y) {
3762 Mat local_bufx = bufx.clone();
3763 Mat local_bufy = Mat(1, dsize.width, CV_32F);
3764 Mat local_bufp = Mat(1, dsize.width, CV_32F);
3765 Mat local_bufa = Mat(1, dsize.width, CV_32F);
3766
3767 for (int x = 0; x < dsize.width; x++) {
3768 local_bufy.at<float>(i0: 0, i1: x) = static_cast<float>(y) - center.y;
3769 }
3770
3771 cartToPolar(x: local_bufx, y: local_bufy, magnitude: local_bufp, angle: local_bufa, angleInDegrees: false);
3772
3773 if (semiLog) {
3774 local_bufp += 1.f;
3775 log(src: local_bufp, dst: local_bufp);
3776 }
3777
3778 float* mx = (float*)(mapx.data + y * mapx.step);
3779 float* my = (float*)(mapy.data + y * mapy.step);
3780
3781 for (int x = 0; x < dsize.width; x++) {
3782 double rho = local_bufp.at<float>(i0: 0, i1: x) / Kmag;
3783 double phi = local_bufa.at<float>(i0: 0, i1: x) / Kangle;
3784 mx[x] = static_cast<float>(rho);
3785 my[x] = static_cast<float>(phi) + ANGLE_BORDER;
3786 }
3787 }
3788 });
3789
3790 remap(src: src, _dst, map1: mapx, map2: mapy, interpolation: flags & cv::INTER_MAX,
3791 borderType: (flags & cv::WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3792 }
3793}
3794
3795void cv::linearPolar( InputArray _src, OutputArray _dst,
3796 Point2f center, double maxRadius, int flags )
3797{
3798 warpPolar(_src, _dst, dsize: _src.size(), center, maxRadius, flags: flags & ~WARP_POLAR_LOG);
3799}
3800
3801void cv::logPolar( InputArray _src, OutputArray _dst,
3802 Point2f center, double maxRadius, int flags )
3803{
3804 Size ssize = _src.size();
3805 double M = maxRadius > 0 ? std::exp(x: ssize.width / maxRadius) : 1;
3806 warpPolar(_src, _dst, dsize: ssize, center, maxRadius: M, flags: flags | WARP_POLAR_LOG);
3807}
3808
3809CV_IMPL
3810void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
3811 CvPoint2D32f center, double maxRadius, int flags )
3812{
3813 Mat src = cvarrToMat(arr: srcarr);
3814 Mat dst = cvarrToMat(arr: dstarr);
3815
3816 CV_Assert(src.size == dst.size);
3817 CV_Assert(src.type() == dst.type());
3818
3819 cv::linearPolar(src: src, dst: dst, center, maxRadius, flags);
3820}
3821
3822CV_IMPL
3823void cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
3824 CvPoint2D32f center, double M, int flags )
3825{
3826 Mat src = cvarrToMat(arr: srcarr);
3827 Mat dst = cvarrToMat(arr: dstarr);
3828
3829 CV_Assert(src.size == dst.size);
3830 CV_Assert(src.type() == dst.type());
3831
3832 cv::logPolar(src: src, dst: dst, center, maxRadius: M, flags);
3833}
3834
3835/* End of file. */
3836

source code of opencv/modules/imgproc/src/imgwarp.cpp