1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
15 | // Third party copyrights are property of their respective owners. |
16 | // |
17 | // Redistribution and use in source and binary forms, with or without modification, |
18 | // are permitted provided that the following conditions are met: |
19 | // |
20 | // * Redistribution's of source code must retain the above copyright notice, |
21 | // this list of conditions and the following disclaimer. |
22 | // |
23 | // * Redistribution's in binary form must reproduce the above copyright notice, |
24 | // this list of conditions and the following disclaimer in the documentation |
25 | // and/or other materials provided with the distribution. |
26 | // |
27 | // * The name of the copyright holders may not be used to endorse or promote products |
28 | // derived from this software without specific prior written permission. |
29 | // |
30 | // This software is provided by the copyright holders and contributors "as is" and |
31 | // any express or implied warranties, including, but not limited to, the implied |
32 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
33 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
34 | // indirect, incidental, special, exemplary, or consequential damages |
35 | // (including, but not limited to, procurement of substitute goods or services; |
36 | // loss of use, data, or profits; or business interruption) however caused |
37 | // and on any theory of liability, whether in contract, strict liability, |
38 | // or tort (including negligence or otherwise) arising in any way out of |
39 | // the use of this software, even if advised of the possibility of such damage. |
40 | // |
41 | //M*/ |
42 | |
43 | #include "precomp.hpp" |
44 | #include "opencv2/core/hal/intrin.hpp" |
45 | |
46 | namespace cv { |
47 | CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
48 | // forward declarations |
49 | Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, |
50 | const double* _ir, Matx33d &_matTilt, |
51 | double _u0, double _v0, double _fx, double _fy, |
52 | double _k1, double _k2, double _p1, double _p2, |
53 | double _k3, double _k4, double _k5, double _k6, |
54 | double _s1, double _s2, double _s3, double _s4); |
55 | |
56 | |
57 | #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
58 | namespace |
59 | { |
60 | class initUndistortRectifyMapComputer : public ParallelLoopBody |
61 | { |
62 | public: |
63 | initUndistortRectifyMapComputer( |
64 | Size _size, Mat &_map1, Mat &_map2, int _m1type, |
65 | const double* _ir, Matx33d &_matTilt, |
66 | double _u0, double _v0, double _fx, double _fy, |
67 | double _k1, double _k2, double _p1, double _p2, |
68 | double _k3, double _k4, double _k5, double _k6, |
69 | double _s1, double _s2, double _s3, double _s4) |
70 | : size(_size), |
71 | map1(_map1), |
72 | map2(_map2), |
73 | m1type(_m1type), |
74 | ir(_ir), |
75 | matTilt(_matTilt), |
76 | u0(_u0), |
77 | v0(_v0), |
78 | fx(_fx), |
79 | fy(_fy), |
80 | k1(_k1), |
81 | k2(_k2), |
82 | p1(_p1), |
83 | p2(_p2), |
84 | k3(_k3), |
85 | k4(_k4), |
86 | k5(_k5), |
87 | k6(_k6), |
88 | s1(_s1), |
89 | s2(_s2), |
90 | s3(_s3), |
91 | s4(_s4) { |
92 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
93 | for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i) |
94 | { |
95 | s_x[i] = ir[0] * i; |
96 | s_y[i] = ir[3] * i; |
97 | s_w[i] = ir[6] * i; |
98 | } |
99 | #endif |
100 | } |
101 | |
102 | void operator()( const cv::Range& range ) const CV_OVERRIDE |
103 | { |
104 | CV_INSTRUMENT_REGION(); |
105 | |
106 | const int begin = range.start; |
107 | const int end = range.end; |
108 | |
109 | for( int i = begin; i < end; i++ ) |
110 | { |
111 | float* m1f = map1.ptr<float>(y: i); |
112 | float* m2f = map2.empty() ? 0 : map2.ptr<float>(y: i); |
113 | short* m1 = (short*)m1f; |
114 | ushort* m2 = (ushort*)m2f; |
115 | double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8]; |
116 | |
117 | int j = 0; |
118 | |
119 | if (m1type == CV_16SC2) |
120 | CV_Assert(m1 != NULL && m2 != NULL); |
121 | else if (m1type == CV_32FC1) |
122 | CV_Assert(m1f != NULL && m2f != NULL); |
123 | else |
124 | CV_Assert(m1 != NULL); |
125 | |
126 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
127 | const v_float64 v_one = vx_setall_f64(v: 1.0); |
128 | for (; j <= size.width - 2*VTraits<v_float64>::vlanes(); j += 2*VTraits<v_float64>::vlanes(), _x += 2*VTraits<v_float64>::vlanes() * ir[0], _y += 2*VTraits<v_float64>::vlanes() * ir[3], _w += 2*VTraits<v_float64>::vlanes() * ir[6]) |
129 | { |
130 | v_float64 m_0, m_1, m_2, m_3; |
131 | m_2 = v_div(a: v_one, b: v_add(a: vx_setall_f64(v: _w), b: vx_load(ptr: this->s_w))); |
132 | m_3 = v_div(a: v_one, b: v_add(a: vx_setall_f64(v: _w), b: vx_load(ptr: this->s_w + VTraits<v_float64>::vlanes()))); |
133 | m_0 = vx_setall_f64(v: _x); m_1 = vx_setall_f64(v: _y); |
134 | v_float64 x_0 = v_mul(a: v_add(a: m_0, b: vx_load(ptr: this->s_x)), b: m_2); |
135 | v_float64 x_1 = v_mul(a: v_add(a: m_0, b: vx_load(ptr: this->s_x + VTraits<v_float64>::vlanes())), b: m_3); |
136 | v_float64 y_0 = v_mul(a: v_add(a: m_1, b: vx_load(ptr: this->s_y)), b: m_2); |
137 | v_float64 y_1 = v_mul(a: v_add(a: m_1, b: vx_load(ptr: this->s_y + VTraits<v_float64>::vlanes())), b: m_3); |
138 | |
139 | v_float64 xd_0 = v_mul(a: x_0, b: x_0); |
140 | v_float64 yd_0 = v_mul(a: y_0, b: y_0); |
141 | v_float64 xd_1 = v_mul(a: x_1, b: x_1); |
142 | v_float64 yd_1 = v_mul(a: y_1, b: y_1); |
143 | |
144 | v_float64 r2_0 = v_add(a: xd_0, b: yd_0); |
145 | v_float64 r2_1 = v_add(a: xd_1, b: yd_1); |
146 | |
147 | m_1 = vx_setall_f64(v: k3); |
148 | m_2 = vx_setall_f64(v: k2); |
149 | m_3 = vx_setall_f64(v: k1); |
150 | m_0 = v_muladd(a: v_muladd(a: v_muladd(a: m_1, b: r2_0, c: m_2), b: r2_0, c: m_3), b: r2_0, c: v_one); |
151 | m_1 = v_muladd(a: v_muladd(a: v_muladd(a: m_1, b: r2_1, c: m_2), b: r2_1, c: m_3), b: r2_1, c: v_one); |
152 | m_3 = vx_setall_f64(v: k6); |
153 | m_2 = vx_setall_f64(v: k5); |
154 | m_0 = v_div(a: m_0, b: v_muladd(a: v_muladd(a: v_muladd(a: m_3, b: r2_0, c: m_2), b: r2_0, c: vx_setall_f64(v: this->k4)), b: r2_0, c: v_one)); |
155 | m_1 = v_div(a: m_1, b: v_muladd(a: v_muladd(a: v_muladd(a: m_3, b: r2_1, c: m_2), b: r2_1, c: vx_setall_f64(v: this->k4)), b: r2_1, c: v_one)); |
156 | |
157 | m_3 = vx_setall_f64(v: 2.0); |
158 | xd_0 = v_muladd(a: m_3, b: xd_0, c: r2_0); |
159 | yd_0 = v_muladd(a: m_3, b: yd_0, c: r2_0); |
160 | xd_1 = v_muladd(a: m_3, b: xd_1, c: r2_1); |
161 | yd_1 = v_muladd(a: m_3, b: yd_1, c: r2_1); |
162 | m_2 = v_mul(a: v_mul(a: x_0, b: y_0), b: m_3); |
163 | m_3 = v_mul(a: v_mul(a: x_1, b: y_1), b: m_3); |
164 | |
165 | x_0 = v_mul(a: x_0, b: m_0); y_0 = v_mul(a: y_0, b: m_0); x_1 = v_mul(a: x_1, b: m_1); y_1 = v_mul(a: y_1, b: m_1); |
166 | |
167 | m_0 = vx_setall_f64(v: p1); |
168 | m_1 = vx_setall_f64(v: p2); |
169 | xd_0 = v_muladd(a: xd_0, b: m_1, c: x_0); |
170 | yd_0 = v_muladd(a: yd_0, b: m_0, c: y_0); |
171 | xd_1 = v_muladd(a: xd_1, b: m_1, c: x_1); |
172 | yd_1 = v_muladd(a: yd_1, b: m_0, c: y_1); |
173 | |
174 | xd_0 = v_muladd(a: m_0, b: m_2, c: xd_0); |
175 | yd_0 = v_muladd(a: m_1, b: m_2, c: yd_0); |
176 | xd_1 = v_muladd(a: m_0, b: m_3, c: xd_1); |
177 | yd_1 = v_muladd(a: m_1, b: m_3, c: yd_1); |
178 | |
179 | m_0 = v_mul(a: r2_0, b: r2_0); |
180 | m_1 = v_mul(a: r2_1, b: r2_1); |
181 | m_2 = vx_setall_f64(v: s2); |
182 | m_3 = vx_setall_f64(v: s1); |
183 | xd_0 = v_muladd(a: m_3, b: r2_0, c: v_muladd(a: m_2, b: m_0, c: xd_0)); |
184 | xd_1 = v_muladd(a: m_3, b: r2_1, c: v_muladd(a: m_2, b: m_1, c: xd_1)); |
185 | m_2 = vx_setall_f64(v: s4); |
186 | m_3 = vx_setall_f64(v: s3); |
187 | yd_0 = v_muladd(a: m_3, b: r2_0, c: v_muladd(a: m_2, b: m_0, c: yd_0)); |
188 | yd_1 = v_muladd(a: m_3, b: r2_1, c: v_muladd(a: m_2, b: m_1, c: yd_1)); |
189 | |
190 | m_0 = vx_setall_f64(v: matTilt.val[0]); |
191 | m_1 = vx_setall_f64(v: matTilt.val[1]); |
192 | m_2 = vx_setall_f64(v: matTilt.val[2]); |
193 | x_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
194 | x_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
195 | m_0 = vx_setall_f64(v: matTilt.val[3]); |
196 | m_1 = vx_setall_f64(v: matTilt.val[4]); |
197 | m_2 = vx_setall_f64(v: matTilt.val[5]); |
198 | y_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
199 | y_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
200 | m_0 = vx_setall_f64(v: matTilt.val[6]); |
201 | m_1 = vx_setall_f64(v: matTilt.val[7]); |
202 | m_2 = vx_setall_f64(v: matTilt.val[8]); |
203 | r2_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
204 | r2_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
205 | m_0 = vx_setzero_f64(); |
206 | r2_0 = v_select(mask: v_eq(a: r2_0, b: m_0), a: v_one, b: v_div(a: v_one, b: r2_0)); |
207 | r2_1 = v_select(mask: v_eq(a: r2_1, b: m_0), a: v_one, b: v_div(a: v_one, b: r2_1)); |
208 | |
209 | m_0 = vx_setall_f64(v: fx); |
210 | m_1 = vx_setall_f64(v: u0); |
211 | m_2 = vx_setall_f64(v: fy); |
212 | m_3 = vx_setall_f64(v: v0); |
213 | x_0 = v_muladd(a: v_mul(a: m_0, b: r2_0), b: x_0, c: m_1); |
214 | y_0 = v_muladd(a: v_mul(a: m_2, b: r2_0), b: y_0, c: m_3); |
215 | x_1 = v_muladd(a: v_mul(a: m_0, b: r2_1), b: x_1, c: m_1); |
216 | y_1 = v_muladd(a: v_mul(a: m_2, b: r2_1), b: y_1, c: m_3); |
217 | |
218 | if (m1type == CV_32FC1) |
219 | { |
220 | v_store(ptr: &m1f[j], a: v_cvt_f32(a: x_0, b: x_1)); |
221 | v_store(ptr: &m2f[j], a: v_cvt_f32(a: y_0, b: y_1)); |
222 | } |
223 | else if (m1type == CV_32FC2) |
224 | { |
225 | v_float32 mf0, mf1; |
226 | v_zip(a0: v_cvt_f32(a: x_0, b: x_1), a1: v_cvt_f32(a: y_0, b: y_1), b0&: mf0, b1&: mf1); |
227 | v_store(ptr: &m1f[j * 2], a: mf0); |
228 | v_store(ptr: &m1f[j * 2 + VTraits<v_float32>::vlanes()], a: mf1); |
229 | } |
230 | else // m1type == CV_16SC2 |
231 | { |
232 | m_0 = vx_setall_f64(v: INTER_TAB_SIZE); |
233 | x_0 = v_mul(a: x_0, b: m_0); x_1 = v_mul(a: x_1, b: m_0); y_0 = v_mul(a: y_0, b: m_0); y_1 = v_mul(a: y_1, b: m_0); |
234 | |
235 | v_int32 mask = vx_setall_s32(v: INTER_TAB_SIZE - 1); |
236 | v_int32 iu = v_round(a: x_0, b: x_1); |
237 | v_int32 iv = v_round(a: y_0, b: y_1); |
238 | |
239 | v_pack_u_store(ptr: &m2[j], a: v_add(a: v_and(a: iu, b: mask), b: v_mul(a: v_and(a: iv, b: mask), b: vx_setall_s32(v: INTER_TAB_SIZE)))); |
240 | v_int32 out0, out1; |
241 | v_zip(a0: v_shr<INTER_BITS>(a: iu), a1: v_shr<INTER_BITS>(a: iv), b0&: out0, b1&: out1); |
242 | v_store(ptr: &m1[j * 2], a: v_pack(a: out0, b: out1)); |
243 | } |
244 | } |
245 | |
246 | vx_cleanup(); |
247 | #endif |
248 | for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] ) |
249 | { |
250 | double w = 1./_w, x = _x*w, y = _y*w; |
251 | double x2 = x*x, y2 = y*y; |
252 | double r2 = x2 + y2, _2xy = 2*x*y; |
253 | double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2); |
254 | double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2); |
255 | double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2); |
256 | Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1); |
257 | double invProj = vecTilt(2) ? 1./vecTilt(2) : 1; |
258 | double u = fx*invProj*vecTilt(0) + u0; |
259 | double v = fy*invProj*vecTilt(1) + v0; |
260 | if( m1type == CV_16SC2 ) |
261 | { |
262 | int iu = saturate_cast<int>(v: u*static_cast<double>(INTER_TAB_SIZE)); |
263 | int iv = saturate_cast<int>(v: v*static_cast<double>(INTER_TAB_SIZE)); |
264 | m1[j*2] = (short)(iu >> INTER_BITS); |
265 | m1[j*2+1] = (short)(iv >> INTER_BITS); |
266 | m2[j] = (ushort)((iv & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (iu & (INTER_TAB_SIZE-1))); |
267 | } |
268 | else if( m1type == CV_32FC1 ) |
269 | { |
270 | m1f[j] = (float)u; |
271 | m2f[j] = (float)v; |
272 | } |
273 | else |
274 | { |
275 | m1f[j*2] = (float)u; |
276 | m1f[j*2+1] = (float)v; |
277 | } |
278 | } |
279 | } |
280 | } |
281 | |
282 | private: |
283 | Size size; |
284 | Mat &map1; |
285 | Mat &map2; |
286 | int m1type; |
287 | const double* ir; |
288 | Matx33d &matTilt; |
289 | double u0; |
290 | double v0; |
291 | double fx; |
292 | double fy; |
293 | double k1; |
294 | double k2; |
295 | double p1; |
296 | double p2; |
297 | double k3; |
298 | double k4; |
299 | double k5; |
300 | double k6; |
301 | double s1; |
302 | double s2; |
303 | double s3; |
304 | double s4; |
305 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
306 | double s_x[2*VTraits<v_float64>::max_nlanes]; |
307 | double s_y[2*VTraits<v_float64>::max_nlanes]; |
308 | double s_w[2*VTraits<v_float64>::max_nlanes]; |
309 | #endif |
310 | }; |
311 | } |
312 | |
313 | Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, |
314 | const double* _ir, Matx33d &_matTilt, |
315 | double _u0, double _v0, double _fx, double _fy, |
316 | double _k1, double _k2, double _p1, double _p2, |
317 | double _k3, double _k4, double _k5, double _k6, |
318 | double _s1, double _s2, double _s3, double _s4) |
319 | { |
320 | CV_INSTRUMENT_REGION(); |
321 | |
322 | return Ptr<initUndistortRectifyMapComputer>(new initUndistortRectifyMapComputer(_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy, |
323 | _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4)); |
324 | } |
325 | |
326 | #endif |
327 | CV_CPU_OPTIMIZATION_NAMESPACE_END |
328 | } |
329 | /* End of file */ |
330 | |