| 1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
| 2 | // |
| 3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| 4 | // |
| 5 | // By downloading, copying, installing or using the software you agree to this license. |
| 6 | // If you do not agree to this license, do not download, install, |
| 7 | // copy or use the software. |
| 8 | // |
| 9 | // |
| 10 | // License Agreement |
| 11 | // For Open Source Computer Vision Library |
| 12 | // |
| 13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
| 14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
| 15 | // Third party copyrights are property of their respective owners. |
| 16 | // |
| 17 | // Redistribution and use in source and binary forms, with or without modification, |
| 18 | // are permitted provided that the following conditions are met: |
| 19 | // |
| 20 | // * Redistribution's of source code must retain the above copyright notice, |
| 21 | // this list of conditions and the following disclaimer. |
| 22 | // |
| 23 | // * Redistribution's in binary form must reproduce the above copyright notice, |
| 24 | // this list of conditions and the following disclaimer in the documentation |
| 25 | // and/or other materials provided with the distribution. |
| 26 | // |
| 27 | // * The name of the copyright holders may not be used to endorse or promote products |
| 28 | // derived from this software without specific prior written permission. |
| 29 | // |
| 30 | // This software is provided by the copyright holders and contributors "as is" and |
| 31 | // any express or implied warranties, including, but not limited to, the implied |
| 32 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| 33 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
| 34 | // indirect, incidental, special, exemplary, or consequential damages |
| 35 | // (including, but not limited to, procurement of substitute goods or services; |
| 36 | // loss of use, data, or profits; or business interruption) however caused |
| 37 | // and on any theory of liability, whether in contract, strict liability, |
| 38 | // or tort (including negligence or otherwise) arising in any way out of |
| 39 | // the use of this software, even if advised of the possibility of such damage. |
| 40 | // |
| 41 | //M*/ |
| 42 | |
| 43 | #include "precomp.hpp" |
| 44 | #include "opencv2/core/hal/intrin.hpp" |
| 45 | |
| 46 | namespace cv { |
| 47 | CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
| 48 | // forward declarations |
| 49 | Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, |
| 50 | const double* _ir, Matx33d &_matTilt, |
| 51 | double _u0, double _v0, double _fx, double _fy, |
| 52 | double _k1, double _k2, double _p1, double _p2, |
| 53 | double _k3, double _k4, double _k5, double _k6, |
| 54 | double _s1, double _s2, double _s3, double _s4); |
| 55 | |
| 56 | |
| 57 | #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
| 58 | namespace |
| 59 | { |
| 60 | class initUndistortRectifyMapComputer : public ParallelLoopBody |
| 61 | { |
| 62 | public: |
| 63 | initUndistortRectifyMapComputer( |
| 64 | Size _size, Mat &_map1, Mat &_map2, int _m1type, |
| 65 | const double* _ir, Matx33d &_matTilt, |
| 66 | double _u0, double _v0, double _fx, double _fy, |
| 67 | double _k1, double _k2, double _p1, double _p2, |
| 68 | double _k3, double _k4, double _k5, double _k6, |
| 69 | double _s1, double _s2, double _s3, double _s4) |
| 70 | : size(_size), |
| 71 | map1(_map1), |
| 72 | map2(_map2), |
| 73 | m1type(_m1type), |
| 74 | ir(_ir), |
| 75 | matTilt(_matTilt), |
| 76 | u0(_u0), |
| 77 | v0(_v0), |
| 78 | fx(_fx), |
| 79 | fy(_fy), |
| 80 | k1(_k1), |
| 81 | k2(_k2), |
| 82 | p1(_p1), |
| 83 | p2(_p2), |
| 84 | k3(_k3), |
| 85 | k4(_k4), |
| 86 | k5(_k5), |
| 87 | k6(_k6), |
| 88 | s1(_s1), |
| 89 | s2(_s2), |
| 90 | s3(_s3), |
| 91 | s4(_s4) { |
| 92 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
| 93 | for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i) |
| 94 | { |
| 95 | s_x[i] = ir[0] * i; |
| 96 | s_y[i] = ir[3] * i; |
| 97 | s_w[i] = ir[6] * i; |
| 98 | } |
| 99 | #endif |
| 100 | } |
| 101 | |
| 102 | void operator()( const cv::Range& range ) const CV_OVERRIDE |
| 103 | { |
| 104 | CV_INSTRUMENT_REGION(); |
| 105 | |
| 106 | const int begin = range.start; |
| 107 | const int end = range.end; |
| 108 | |
| 109 | for( int i = begin; i < end; i++ ) |
| 110 | { |
| 111 | float* m1f = map1.ptr<float>(y: i); |
| 112 | float* m2f = map2.empty() ? 0 : map2.ptr<float>(y: i); |
| 113 | short* m1 = (short*)m1f; |
| 114 | ushort* m2 = (ushort*)m2f; |
| 115 | double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8]; |
| 116 | |
| 117 | int j = 0; |
| 118 | |
| 119 | if (m1type == CV_16SC2) |
| 120 | CV_Assert(m1 != NULL && m2 != NULL); |
| 121 | else if (m1type == CV_32FC1) |
| 122 | CV_Assert(m1f != NULL && m2f != NULL); |
| 123 | else |
| 124 | CV_Assert(m1 != NULL); |
| 125 | |
| 126 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
| 127 | const v_float64 v_one = vx_setall_f64(v: 1.0); |
| 128 | for (; j <= size.width - 2*VTraits<v_float64>::vlanes(); j += 2*VTraits<v_float64>::vlanes(), _x += 2*VTraits<v_float64>::vlanes() * ir[0], _y += 2*VTraits<v_float64>::vlanes() * ir[3], _w += 2*VTraits<v_float64>::vlanes() * ir[6]) |
| 129 | { |
| 130 | v_float64 m_0, m_1, m_2, m_3; |
| 131 | m_2 = v_div(a: v_one, b: v_add(a: vx_setall_f64(v: _w), b: vx_load(ptr: this->s_w))); |
| 132 | m_3 = v_div(a: v_one, b: v_add(a: vx_setall_f64(v: _w), b: vx_load(ptr: this->s_w + VTraits<v_float64>::vlanes()))); |
| 133 | m_0 = vx_setall_f64(v: _x); m_1 = vx_setall_f64(v: _y); |
| 134 | v_float64 x_0 = v_mul(a: v_add(a: m_0, b: vx_load(ptr: this->s_x)), b: m_2); |
| 135 | v_float64 x_1 = v_mul(a: v_add(a: m_0, b: vx_load(ptr: this->s_x + VTraits<v_float64>::vlanes())), b: m_3); |
| 136 | v_float64 y_0 = v_mul(a: v_add(a: m_1, b: vx_load(ptr: this->s_y)), b: m_2); |
| 137 | v_float64 y_1 = v_mul(a: v_add(a: m_1, b: vx_load(ptr: this->s_y + VTraits<v_float64>::vlanes())), b: m_3); |
| 138 | |
| 139 | v_float64 xd_0 = v_mul(a: x_0, b: x_0); |
| 140 | v_float64 yd_0 = v_mul(a: y_0, b: y_0); |
| 141 | v_float64 xd_1 = v_mul(a: x_1, b: x_1); |
| 142 | v_float64 yd_1 = v_mul(a: y_1, b: y_1); |
| 143 | |
| 144 | v_float64 r2_0 = v_add(a: xd_0, b: yd_0); |
| 145 | v_float64 r2_1 = v_add(a: xd_1, b: yd_1); |
| 146 | |
| 147 | m_1 = vx_setall_f64(v: k3); |
| 148 | m_2 = vx_setall_f64(v: k2); |
| 149 | m_3 = vx_setall_f64(v: k1); |
| 150 | m_0 = v_muladd(a: v_muladd(a: v_muladd(a: m_1, b: r2_0, c: m_2), b: r2_0, c: m_3), b: r2_0, c: v_one); |
| 151 | m_1 = v_muladd(a: v_muladd(a: v_muladd(a: m_1, b: r2_1, c: m_2), b: r2_1, c: m_3), b: r2_1, c: v_one); |
| 152 | m_3 = vx_setall_f64(v: k6); |
| 153 | m_2 = vx_setall_f64(v: k5); |
| 154 | m_0 = v_div(a: m_0, b: v_muladd(a: v_muladd(a: v_muladd(a: m_3, b: r2_0, c: m_2), b: r2_0, c: vx_setall_f64(v: this->k4)), b: r2_0, c: v_one)); |
| 155 | m_1 = v_div(a: m_1, b: v_muladd(a: v_muladd(a: v_muladd(a: m_3, b: r2_1, c: m_2), b: r2_1, c: vx_setall_f64(v: this->k4)), b: r2_1, c: v_one)); |
| 156 | |
| 157 | m_3 = vx_setall_f64(v: 2.0); |
| 158 | xd_0 = v_muladd(a: m_3, b: xd_0, c: r2_0); |
| 159 | yd_0 = v_muladd(a: m_3, b: yd_0, c: r2_0); |
| 160 | xd_1 = v_muladd(a: m_3, b: xd_1, c: r2_1); |
| 161 | yd_1 = v_muladd(a: m_3, b: yd_1, c: r2_1); |
| 162 | m_2 = v_mul(a: v_mul(a: x_0, b: y_0), b: m_3); |
| 163 | m_3 = v_mul(a: v_mul(a: x_1, b: y_1), b: m_3); |
| 164 | |
| 165 | x_0 = v_mul(a: x_0, b: m_0); y_0 = v_mul(a: y_0, b: m_0); x_1 = v_mul(a: x_1, b: m_1); y_1 = v_mul(a: y_1, b: m_1); |
| 166 | |
| 167 | m_0 = vx_setall_f64(v: p1); |
| 168 | m_1 = vx_setall_f64(v: p2); |
| 169 | xd_0 = v_muladd(a: xd_0, b: m_1, c: x_0); |
| 170 | yd_0 = v_muladd(a: yd_0, b: m_0, c: y_0); |
| 171 | xd_1 = v_muladd(a: xd_1, b: m_1, c: x_1); |
| 172 | yd_1 = v_muladd(a: yd_1, b: m_0, c: y_1); |
| 173 | |
| 174 | xd_0 = v_muladd(a: m_0, b: m_2, c: xd_0); |
| 175 | yd_0 = v_muladd(a: m_1, b: m_2, c: yd_0); |
| 176 | xd_1 = v_muladd(a: m_0, b: m_3, c: xd_1); |
| 177 | yd_1 = v_muladd(a: m_1, b: m_3, c: yd_1); |
| 178 | |
| 179 | m_0 = v_mul(a: r2_0, b: r2_0); |
| 180 | m_1 = v_mul(a: r2_1, b: r2_1); |
| 181 | m_2 = vx_setall_f64(v: s2); |
| 182 | m_3 = vx_setall_f64(v: s1); |
| 183 | xd_0 = v_muladd(a: m_3, b: r2_0, c: v_muladd(a: m_2, b: m_0, c: xd_0)); |
| 184 | xd_1 = v_muladd(a: m_3, b: r2_1, c: v_muladd(a: m_2, b: m_1, c: xd_1)); |
| 185 | m_2 = vx_setall_f64(v: s4); |
| 186 | m_3 = vx_setall_f64(v: s3); |
| 187 | yd_0 = v_muladd(a: m_3, b: r2_0, c: v_muladd(a: m_2, b: m_0, c: yd_0)); |
| 188 | yd_1 = v_muladd(a: m_3, b: r2_1, c: v_muladd(a: m_2, b: m_1, c: yd_1)); |
| 189 | |
| 190 | m_0 = vx_setall_f64(v: matTilt.val[0]); |
| 191 | m_1 = vx_setall_f64(v: matTilt.val[1]); |
| 192 | m_2 = vx_setall_f64(v: matTilt.val[2]); |
| 193 | x_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
| 194 | x_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
| 195 | m_0 = vx_setall_f64(v: matTilt.val[3]); |
| 196 | m_1 = vx_setall_f64(v: matTilt.val[4]); |
| 197 | m_2 = vx_setall_f64(v: matTilt.val[5]); |
| 198 | y_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
| 199 | y_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
| 200 | m_0 = vx_setall_f64(v: matTilt.val[6]); |
| 201 | m_1 = vx_setall_f64(v: matTilt.val[7]); |
| 202 | m_2 = vx_setall_f64(v: matTilt.val[8]); |
| 203 | r2_0 = v_muladd(a: m_0, b: xd_0, c: v_muladd(a: m_1, b: yd_0, c: m_2)); |
| 204 | r2_1 = v_muladd(a: m_0, b: xd_1, c: v_muladd(a: m_1, b: yd_1, c: m_2)); |
| 205 | m_0 = vx_setzero_f64(); |
| 206 | r2_0 = v_select(mask: v_eq(a: r2_0, b: m_0), a: v_one, b: v_div(a: v_one, b: r2_0)); |
| 207 | r2_1 = v_select(mask: v_eq(a: r2_1, b: m_0), a: v_one, b: v_div(a: v_one, b: r2_1)); |
| 208 | |
| 209 | m_0 = vx_setall_f64(v: fx); |
| 210 | m_1 = vx_setall_f64(v: u0); |
| 211 | m_2 = vx_setall_f64(v: fy); |
| 212 | m_3 = vx_setall_f64(v: v0); |
| 213 | x_0 = v_muladd(a: v_mul(a: m_0, b: r2_0), b: x_0, c: m_1); |
| 214 | y_0 = v_muladd(a: v_mul(a: m_2, b: r2_0), b: y_0, c: m_3); |
| 215 | x_1 = v_muladd(a: v_mul(a: m_0, b: r2_1), b: x_1, c: m_1); |
| 216 | y_1 = v_muladd(a: v_mul(a: m_2, b: r2_1), b: y_1, c: m_3); |
| 217 | |
| 218 | if (m1type == CV_32FC1) |
| 219 | { |
| 220 | v_store(ptr: &m1f[j], a: v_cvt_f32(a: x_0, b: x_1)); |
| 221 | v_store(ptr: &m2f[j], a: v_cvt_f32(a: y_0, b: y_1)); |
| 222 | } |
| 223 | else if (m1type == CV_32FC2) |
| 224 | { |
| 225 | v_float32 mf0, mf1; |
| 226 | v_zip(a: v_cvt_f32(a: x_0, b: x_1), b: v_cvt_f32(a: y_0, b: y_1), ab0&: mf0, ab1&: mf1); |
| 227 | v_store(ptr: &m1f[j * 2], a: mf0); |
| 228 | v_store(ptr: &m1f[j * 2 + VTraits<v_float32>::vlanes()], a: mf1); |
| 229 | } |
| 230 | else // m1type == CV_16SC2 |
| 231 | { |
| 232 | m_0 = vx_setall_f64(v: INTER_TAB_SIZE); |
| 233 | x_0 = v_mul(a: x_0, b: m_0); x_1 = v_mul(a: x_1, b: m_0); y_0 = v_mul(a: y_0, b: m_0); y_1 = v_mul(a: y_1, b: m_0); |
| 234 | |
| 235 | v_int32 mask = vx_setall_s32(v: INTER_TAB_SIZE - 1); |
| 236 | v_int32 iu = v_round(a: x_0, b: x_1); |
| 237 | v_int32 iv = v_round(a: y_0, b: y_1); |
| 238 | |
| 239 | v_pack_u_store(ptr: &m2[j], a: v_add(a: v_and(a: iu, b: mask), b: v_mul(a: v_and(a: iv, b: mask), b: vx_setall_s32(v: INTER_TAB_SIZE)))); |
| 240 | v_int32 out0, out1; |
| 241 | v_zip(a: v_shr<INTER_BITS>(a: iu), b: v_shr<INTER_BITS>(a: iv), ab0&: out0, ab1&: out1); |
| 242 | v_store(ptr: &m1[j * 2], a: v_pack(a: out0, b: out1)); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | vx_cleanup(); |
| 247 | #endif |
| 248 | for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] ) |
| 249 | { |
| 250 | double w = 1./_w, x = _x*w, y = _y*w; |
| 251 | double x2 = x*x, y2 = y*y; |
| 252 | double r2 = x2 + y2, _2xy = 2*x*y; |
| 253 | double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2); |
| 254 | double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2); |
| 255 | double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2); |
| 256 | Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1); |
| 257 | double invProj = vecTilt(2) ? 1./vecTilt(2) : 1; |
| 258 | double u = fx*invProj*vecTilt(0) + u0; |
| 259 | double v = fy*invProj*vecTilt(1) + v0; |
| 260 | if( m1type == CV_16SC2 ) |
| 261 | { |
| 262 | int iu = saturate_cast<int>(v: u*static_cast<double>(INTER_TAB_SIZE)); |
| 263 | int iv = saturate_cast<int>(v: v*static_cast<double>(INTER_TAB_SIZE)); |
| 264 | m1[j*2] = (short)(iu >> INTER_BITS); |
| 265 | m1[j*2+1] = (short)(iv >> INTER_BITS); |
| 266 | m2[j] = (ushort)((iv & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (iu & (INTER_TAB_SIZE-1))); |
| 267 | } |
| 268 | else if( m1type == CV_32FC1 ) |
| 269 | { |
| 270 | m1f[j] = (float)u; |
| 271 | m2f[j] = (float)v; |
| 272 | } |
| 273 | else |
| 274 | { |
| 275 | m1f[j*2] = (float)u; |
| 276 | m1f[j*2+1] = (float)v; |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | private: |
| 283 | Size size; |
| 284 | Mat &map1; |
| 285 | Mat &map2; |
| 286 | int m1type; |
| 287 | const double* ir; |
| 288 | Matx33d &matTilt; |
| 289 | double u0; |
| 290 | double v0; |
| 291 | double fx; |
| 292 | double fy; |
| 293 | double k1; |
| 294 | double k2; |
| 295 | double p1; |
| 296 | double p2; |
| 297 | double k3; |
| 298 | double k4; |
| 299 | double k5; |
| 300 | double k6; |
| 301 | double s1; |
| 302 | double s2; |
| 303 | double s3; |
| 304 | double s4; |
| 305 | #if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) |
| 306 | double s_x[2*VTraits<v_float64>::max_nlanes]; |
| 307 | double s_y[2*VTraits<v_float64>::max_nlanes]; |
| 308 | double s_w[2*VTraits<v_float64>::max_nlanes]; |
| 309 | #endif |
| 310 | }; |
| 311 | } |
| 312 | |
| 313 | Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, |
| 314 | const double* _ir, Matx33d &_matTilt, |
| 315 | double _u0, double _v0, double _fx, double _fy, |
| 316 | double _k1, double _k2, double _p1, double _p2, |
| 317 | double _k3, double _k4, double _k5, double _k6, |
| 318 | double _s1, double _s2, double _s3, double _s4) |
| 319 | { |
| 320 | CV_INSTRUMENT_REGION(); |
| 321 | |
| 322 | return Ptr<initUndistortRectifyMapComputer>(new initUndistortRectifyMapComputer(_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy, |
| 323 | _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4)); |
| 324 | } |
| 325 | |
| 326 | #endif |
| 327 | CV_CPU_OPTIMIZATION_NAMESPACE_END |
| 328 | } |
| 329 | /* End of file */ |
| 330 | |