1 | // This file is part of OpenCV project. |
---|---|
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html. |
4 | // |
5 | // Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu> |
6 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
7 | // Copyright (C) 2020, Intel Corporation, all rights reserved. |
8 | |
9 | /**********************************************************************************************\ |
10 | Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/ |
11 | Below is the original copyright. |
12 | Patent US6711293 expired in March 2020. |
13 | |
14 | // Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu> |
15 | // All rights reserved. |
16 | |
17 | // The following patent has been issued for methods embodied in this |
18 | // software: "Method and apparatus for identifying scale invariant features |
19 | // in an image and use of same for locating an object in an image," David |
20 | // G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application |
21 | // filed March 8, 1999. Asignee: The University of British Columbia. For |
22 | // further details, contact David Lowe (lowe@cs.ubc.ca) or the |
23 | // University-Industry Liaison Office of the University of British |
24 | // Columbia. |
25 | |
26 | // Note that restrictions imposed by this patent (and possibly others) |
27 | // exist independently of and may be in conflict with the freedoms granted |
28 | // in this license, which refers to copyright of the program, not patents |
29 | // for any methods that it implements. Both copyright and patent law must |
30 | // be obeyed to legally use and redistribute this program and it is not the |
31 | // purpose of this license to induce you to infringe any patents or other |
32 | // property right claims or to contest validity of any such claims. If you |
33 | // redistribute or use the program, then this license merely protects you |
34 | // from committing copyright infringement. It does not protect you from |
35 | // committing patent infringement. So, before you do anything with this |
36 | // program, make sure that you have permission to do so not merely in terms |
37 | // of copyright, but also in terms of patent law. |
38 | |
39 | // Please note that this license is not to be understood as a guarantee |
40 | // either. If you use the program according to this license, but in |
41 | // conflict with patent law, it does not mean that the licensor will refund |
42 | // you for any losses that you incur if you are sued for your patent |
43 | // infringement. |
44 | |
45 | // Redistribution and use in source and binary forms, with or without |
46 | // modification, are permitted provided that the following conditions are |
47 | // met: |
48 | // * Redistributions of source code must retain the above copyright and |
49 | // patent notices, this list of conditions and the following |
50 | // disclaimer. |
51 | // * Redistributions in binary form must reproduce the above copyright |
52 | // notice, this list of conditions and the following disclaimer in |
53 | // the documentation and/or other materials provided with the |
54 | // distribution. |
55 | // * Neither the name of Oregon State University nor the names of its |
56 | // contributors may be used to endorse or promote products derived |
57 | // from this software without specific prior written permission. |
58 | |
59 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
60 | // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
61 | // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
62 | // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
63 | // HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
64 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
65 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
66 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
67 | // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
68 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
69 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
70 | \**********************************************************************************************/ |
71 | |
72 | #include "precomp.hpp" |
73 | |
74 | #include <opencv2/core/hal/hal.hpp> |
75 | #include "opencv2/core/hal/intrin.hpp" |
76 | #include <opencv2/core/utils/buffer_area.private.hpp> |
77 | |
78 | namespace cv { |
79 | |
80 | #if !defined(CV_CPU_DISPATCH_MODE) || !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) |
81 | /******************************* Defs and macros *****************************/ |
82 | |
83 | // default width of descriptor histogram array |
84 | static const int SIFT_DESCR_WIDTH = 4; |
85 | |
86 | // default number of bins per histogram in descriptor array |
87 | static const int SIFT_DESCR_HIST_BINS = 8; |
88 | |
89 | // assumed gaussian blur for input image |
90 | static const float SIFT_INIT_SIGMA = 0.5f; |
91 | |
92 | // width of border in which to ignore keypoints |
93 | static const int SIFT_IMG_BORDER = 5; |
94 | |
95 | // maximum steps of keypoint interpolation before failure |
96 | static const int SIFT_MAX_INTERP_STEPS = 5; |
97 | |
98 | // default number of bins in histogram for orientation assignment |
99 | static const int SIFT_ORI_HIST_BINS = 36; |
100 | |
101 | // determines gaussian sigma for orientation assignment |
102 | static const float SIFT_ORI_SIG_FCTR = 1.5f; |
103 | |
104 | // determines the radius of the region used in orientation assignment |
105 | static const float SIFT_ORI_RADIUS = 4.5f; // 3 * SIFT_ORI_SIG_FCTR; |
106 | |
107 | // orientation magnitude relative to max that results in new feature |
108 | static const float SIFT_ORI_PEAK_RATIO = 0.8f; |
109 | |
110 | // determines the size of a single descriptor orientation histogram |
111 | static const float SIFT_DESCR_SCL_FCTR = 3.f; |
112 | |
113 | // threshold on magnitude of elements of descriptor vector |
114 | static const float SIFT_DESCR_MAG_THR = 0.2f; |
115 | |
116 | // factor used to convert floating-point descriptor to unsigned char |
117 | static const float SIFT_INT_DESCR_FCTR = 512.f; |
118 | |
119 | #define DoG_TYPE_SHORT 0 |
120 | #if DoG_TYPE_SHORT |
121 | // intermediate type used for DoG pyramids |
122 | typedef short sift_wt; |
123 | static const int SIFT_FIXPT_SCALE = 48; |
124 | #else |
125 | // intermediate type used for DoG pyramids |
126 | typedef float sift_wt; |
127 | static const int SIFT_FIXPT_SCALE = 1; |
128 | #endif |
129 | |
130 | #endif // definitions and macros |
131 | |
132 | |
133 | CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
134 | |
135 | void findScaleSpaceExtrema( |
136 | int octave, |
137 | int layer, |
138 | int threshold, |
139 | int idx, |
140 | int step, |
141 | int cols, |
142 | int nOctaveLayers, |
143 | double contrastThreshold, |
144 | double edgeThreshold, |
145 | double sigma, |
146 | const std::vector<Mat>& gauss_pyr, |
147 | const std::vector<Mat>& dog_pyr, |
148 | std::vector<KeyPoint>& kpts, |
149 | const cv::Range& range); |
150 | |
151 | void calcSIFTDescriptor( |
152 | const Mat& img, Point2f ptf, float ori, float scl, |
153 | const int d, const int n, Mat& dst, int row |
154 | ); |
155 | |
156 | |
157 | #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
158 | |
159 | // Computes a gradient orientation histogram at a specified pixel |
160 | static |
161 | float calcOrientationHist( |
162 | const Mat& img, Point pt, int radius, |
163 | float sigma, float* hist, int n |
164 | ) |
165 | { |
166 | CV_TRACE_FUNCTION(); |
167 | |
168 | int i, j, k, len = (radius*2+1)*(radius*2+1); |
169 | |
170 | float expf_scale = -1.f/(2.f * sigma * sigma); |
171 | |
172 | cv::utils::BufferArea area; |
173 | float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *temphist = 0; |
174 | area.allocate(ptr&: X, count: len, CV_SIMD_WIDTH); |
175 | area.allocate(ptr&: Y, count: len, CV_SIMD_WIDTH); |
176 | area.allocate(ptr&: Ori, count: len, CV_SIMD_WIDTH); |
177 | area.allocate(ptr&: W, count: len, CV_SIMD_WIDTH); |
178 | area.allocate(ptr&: temphist, count: n+4, CV_SIMD_WIDTH); |
179 | area.commit(); |
180 | temphist += 2; |
181 | Mag = X; |
182 | |
183 | for( i = 0; i < n; i++ ) |
184 | temphist[i] = 0.f; |
185 | |
186 | for( i = -radius, k = 0; i <= radius; i++ ) |
187 | { |
188 | int y = pt.y + i; |
189 | if( y <= 0 || y >= img.rows - 1 ) |
190 | continue; |
191 | for( j = -radius; j <= radius; j++ ) |
192 | { |
193 | int x = pt.x + j; |
194 | if( x <= 0 || x >= img.cols - 1 ) |
195 | continue; |
196 | |
197 | float dx = (float)(img.at<sift_wt>(i0: y, i1: x+1) - img.at<sift_wt>(i0: y, i1: x-1)); |
198 | float dy = (float)(img.at<sift_wt>(i0: y-1, i1: x) - img.at<sift_wt>(i0: y+1, i1: x)); |
199 | |
200 | X[k] = dx; Y[k] = dy; W[k] = (i*i + j*j)*expf_scale; |
201 | k++; |
202 | } |
203 | } |
204 | |
205 | len = k; |
206 | |
207 | // compute gradient values, orientations and the weights over the pixel neighborhood |
208 | cv::hal::exp32f(src: W, dst: W, n: len); |
209 | cv::hal::fastAtan2(y: Y, x: X, dst: Ori, n: len, angleInDegrees: true); |
210 | cv::hal::magnitude32f(x: X, y: Y, dst: Mag, n: len); |
211 | |
212 | k = 0; |
213 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
214 | const int vecsize = VTraits<v_float32>::vlanes(); |
215 | v_float32 nd360 = vx_setall_f32(v: n/360.f); |
216 | v_int32 __n = vx_setall_s32(v: n); |
217 | int CV_DECL_ALIGNED(CV_SIMD_WIDTH) bin_buf[VTraits<v_float32>::max_nlanes]; |
218 | float CV_DECL_ALIGNED(CV_SIMD_WIDTH) w_mul_mag_buf[VTraits<v_float32>::max_nlanes]; |
219 | |
220 | for( ; k <= len - vecsize; k += vecsize ) |
221 | { |
222 | v_float32 w = vx_load_aligned( ptr: W + k ); |
223 | v_float32 mag = vx_load_aligned( ptr: Mag + k ); |
224 | v_float32 ori = vx_load_aligned( ptr: Ori + k ); |
225 | v_int32 bin = v_round( a: v_mul(a: nd360, b: ori) ); |
226 | |
227 | bin = v_select(mask: v_ge(a: bin, b: __n), a: v_sub(a: bin, b: __n), b: bin); |
228 | bin = v_select(mask: v_lt(a: bin, b: vx_setzero_s32()), a: v_add(a: bin, b: __n), b: bin); |
229 | |
230 | w = v_mul(a: w, b: mag); |
231 | v_store_aligned(ptr: bin_buf, a: bin); |
232 | v_store_aligned(ptr: w_mul_mag_buf, a: w); |
233 | for(int vi = 0; vi < vecsize; vi++) |
234 | { |
235 | temphist[bin_buf[vi]] += w_mul_mag_buf[vi]; |
236 | } |
237 | } |
238 | #endif |
239 | for( ; k < len; k++ ) |
240 | { |
241 | int bin = cvRound(value: (n/360.f)*Ori[k]); |
242 | if( bin >= n ) |
243 | bin -= n; |
244 | if( bin < 0 ) |
245 | bin += n; |
246 | temphist[bin] += W[k]*Mag[k]; |
247 | } |
248 | |
249 | // smooth the histogram |
250 | temphist[-1] = temphist[n-1]; |
251 | temphist[-2] = temphist[n-2]; |
252 | temphist[n] = temphist[0]; |
253 | temphist[n+1] = temphist[1]; |
254 | |
255 | i = 0; |
256 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
257 | v_float32 d_1_16 = vx_setall_f32(v: 1.f/16.f); |
258 | v_float32 d_4_16 = vx_setall_f32(v: 4.f/16.f); |
259 | v_float32 d_6_16 = vx_setall_f32(v: 6.f/16.f); |
260 | for( ; i <= n - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes() ) |
261 | { |
262 | v_float32 tn2 = vx_load_aligned(ptr: temphist + i-2); |
263 | v_float32 tn1 = vx_load(ptr: temphist + i-1); |
264 | v_float32 t0 = vx_load(ptr: temphist + i); |
265 | v_float32 t1 = vx_load(ptr: temphist + i+1); |
266 | v_float32 t2 = vx_load(ptr: temphist + i+2); |
267 | v_float32 _hist = v_fma(a: v_add(a: tn2, b: t2), b: d_1_16, |
268 | c: v_fma(a: v_add(a: tn1, b: t1), b: d_4_16, c: v_mul(a: t0, b: d_6_16))); |
269 | v_store(ptr: hist + i, a: _hist); |
270 | } |
271 | #endif |
272 | for( ; i < n; i++ ) |
273 | { |
274 | hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) + |
275 | (temphist[i-1] + temphist[i+1])*(4.f/16.f) + |
276 | temphist[i]*(6.f/16.f); |
277 | } |
278 | |
279 | float maxval = hist[0]; |
280 | for( i = 1; i < n; i++ ) |
281 | maxval = std::max(a: maxval, b: hist[i]); |
282 | |
283 | return maxval; |
284 | } |
285 | |
286 | |
287 | // |
288 | // Interpolates a scale-space extremum's location and scale to subpixel |
289 | // accuracy to form an image feature. Rejects features with low contrast. |
290 | // Based on Section 4 of Lowe's paper. |
291 | static |
292 | bool adjustLocalExtrema( |
293 | const std::vector<Mat>& dog_pyr, KeyPoint& kpt, int octv, |
294 | int& layer, int& r, int& c, int nOctaveLayers, |
295 | float contrastThreshold, float edgeThreshold, float sigma |
296 | ) |
297 | { |
298 | CV_TRACE_FUNCTION(); |
299 | |
300 | const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE); |
301 | const float deriv_scale = img_scale*0.5f; |
302 | const float second_deriv_scale = img_scale; |
303 | const float cross_deriv_scale = img_scale*0.25f; |
304 | |
305 | float xi=0, xr=0, xc=0, contr=0; |
306 | int i = 0; |
307 | |
308 | for( ; i < SIFT_MAX_INTERP_STEPS; i++ ) |
309 | { |
310 | int idx = octv*(nOctaveLayers+2) + layer; |
311 | const Mat& img = dog_pyr[idx]; |
312 | const Mat& prev = dog_pyr[idx-1]; |
313 | const Mat& next = dog_pyr[idx+1]; |
314 | |
315 | Vec3f dD((img.at<sift_wt>(i0: r, i1: c+1) - img.at<sift_wt>(i0: r, i1: c-1))*deriv_scale, |
316 | (img.at<sift_wt>(i0: r+1, i1: c) - img.at<sift_wt>(i0: r-1, i1: c))*deriv_scale, |
317 | (next.at<sift_wt>(i0: r, i1: c) - prev.at<sift_wt>(i0: r, i1: c))*deriv_scale); |
318 | |
319 | float v2 = (float)img.at<sift_wt>(i0: r, i1: c)*2; |
320 | float dxx = (img.at<sift_wt>(i0: r, i1: c+1) + img.at<sift_wt>(i0: r, i1: c-1) - v2)*second_deriv_scale; |
321 | float dyy = (img.at<sift_wt>(i0: r+1, i1: c) + img.at<sift_wt>(i0: r-1, i1: c) - v2)*second_deriv_scale; |
322 | float dss = (next.at<sift_wt>(i0: r, i1: c) + prev.at<sift_wt>(i0: r, i1: c) - v2)*second_deriv_scale; |
323 | float dxy = (img.at<sift_wt>(i0: r+1, i1: c+1) - img.at<sift_wt>(i0: r+1, i1: c-1) - |
324 | img.at<sift_wt>(i0: r-1, i1: c+1) + img.at<sift_wt>(i0: r-1, i1: c-1))*cross_deriv_scale; |
325 | float dxs = (next.at<sift_wt>(i0: r, i1: c+1) - next.at<sift_wt>(i0: r, i1: c-1) - |
326 | prev.at<sift_wt>(i0: r, i1: c+1) + prev.at<sift_wt>(i0: r, i1: c-1))*cross_deriv_scale; |
327 | float dys = (next.at<sift_wt>(i0: r+1, i1: c) - next.at<sift_wt>(i0: r-1, i1: c) - |
328 | prev.at<sift_wt>(i0: r+1, i1: c) + prev.at<sift_wt>(i0: r-1, i1: c))*cross_deriv_scale; |
329 | |
330 | Matx33f H(dxx, dxy, dxs, |
331 | dxy, dyy, dys, |
332 | dxs, dys, dss); |
333 | |
334 | Vec3f X = H.solve(rhs: dD, method: DECOMP_LU); |
335 | |
336 | xi = -X[2]; |
337 | xr = -X[1]; |
338 | xc = -X[0]; |
339 | |
340 | if( std::abs(x: xi) < 0.5f && std::abs(x: xr) < 0.5f && std::abs(x: xc) < 0.5f ) |
341 | break; |
342 | |
343 | if( std::abs(x: xi) > (float)(INT_MAX/3) || |
344 | std::abs(x: xr) > (float)(INT_MAX/3) || |
345 | std::abs(x: xc) > (float)(INT_MAX/3) ) |
346 | return false; |
347 | |
348 | c += cvRound(value: xc); |
349 | r += cvRound(value: xr); |
350 | layer += cvRound(value: xi); |
351 | |
352 | if( layer < 1 || layer > nOctaveLayers || |
353 | c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER || |
354 | r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER ) |
355 | return false; |
356 | } |
357 | |
358 | // ensure convergence of interpolation |
359 | if( i >= SIFT_MAX_INTERP_STEPS ) |
360 | return false; |
361 | |
362 | { |
363 | int idx = octv*(nOctaveLayers+2) + layer; |
364 | const Mat& img = dog_pyr[idx]; |
365 | const Mat& prev = dog_pyr[idx-1]; |
366 | const Mat& next = dog_pyr[idx+1]; |
367 | Matx31f dD((img.at<sift_wt>(i0: r, i1: c+1) - img.at<sift_wt>(i0: r, i1: c-1))*deriv_scale, |
368 | (img.at<sift_wt>(i0: r+1, i1: c) - img.at<sift_wt>(i0: r-1, i1: c))*deriv_scale, |
369 | (next.at<sift_wt>(i0: r, i1: c) - prev.at<sift_wt>(i0: r, i1: c))*deriv_scale); |
370 | float t = dD.dot(M: Matx31f(xc, xr, xi)); |
371 | |
372 | contr = img.at<sift_wt>(i0: r, i1: c)*img_scale + t * 0.5f; |
373 | if( std::abs( x: contr ) * nOctaveLayers < contrastThreshold ) |
374 | return false; |
375 | |
376 | // principal curvatures are computed using the trace and det of Hessian |
377 | float v2 = img.at<sift_wt>(i0: r, i1: c)*2.f; |
378 | float dxx = (img.at<sift_wt>(i0: r, i1: c+1) + img.at<sift_wt>(i0: r, i1: c-1) - v2)*second_deriv_scale; |
379 | float dyy = (img.at<sift_wt>(i0: r+1, i1: c) + img.at<sift_wt>(i0: r-1, i1: c) - v2)*second_deriv_scale; |
380 | float dxy = (img.at<sift_wt>(i0: r+1, i1: c+1) - img.at<sift_wt>(i0: r+1, i1: c-1) - |
381 | img.at<sift_wt>(i0: r-1, i1: c+1) + img.at<sift_wt>(i0: r-1, i1: c-1)) * cross_deriv_scale; |
382 | float tr = dxx + dyy; |
383 | float det = dxx * dyy - dxy * dxy; |
384 | |
385 | if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det ) |
386 | return false; |
387 | } |
388 | |
389 | kpt.pt.x = (c + xc) * (1 << octv); |
390 | kpt.pt.y = (r + xr) * (1 << octv); |
391 | kpt.octave = octv + (layer << 8) + (cvRound(value: (xi + 0.5)*255) << 16); |
392 | kpt.size = sigma*powf(x: 2.f, y: (layer + xi) / nOctaveLayers)*(1 << octv)*2; |
393 | kpt.response = std::abs(x: contr); |
394 | |
395 | return true; |
396 | } |
397 | |
398 | namespace { |
399 | |
400 | class findScaleSpaceExtremaT |
401 | { |
402 | public: |
403 | findScaleSpaceExtremaT( |
404 | int _o, |
405 | int _i, |
406 | int _threshold, |
407 | int _idx, |
408 | int _step, |
409 | int _cols, |
410 | int _nOctaveLayers, |
411 | double _contrastThreshold, |
412 | double _edgeThreshold, |
413 | double _sigma, |
414 | const std::vector<Mat>& _gauss_pyr, |
415 | const std::vector<Mat>& _dog_pyr, |
416 | std::vector<KeyPoint>& kpts) |
417 | |
418 | : o(_o), |
419 | i(_i), |
420 | threshold(_threshold), |
421 | idx(_idx), |
422 | step(_step), |
423 | cols(_cols), |
424 | nOctaveLayers(_nOctaveLayers), |
425 | contrastThreshold(_contrastThreshold), |
426 | edgeThreshold(_edgeThreshold), |
427 | sigma(_sigma), |
428 | gauss_pyr(_gauss_pyr), |
429 | dog_pyr(_dog_pyr), |
430 | kpts_(kpts) |
431 | { |
432 | // nothing |
433 | } |
434 | void process(const cv::Range& range) |
435 | { |
436 | CV_TRACE_FUNCTION(); |
437 | |
438 | const int begin = range.start; |
439 | const int end = range.end; |
440 | |
441 | static const int n = SIFT_ORI_HIST_BINS; |
442 | float CV_DECL_ALIGNED(CV_SIMD_WIDTH) hist[n]; |
443 | |
444 | const Mat& img = dog_pyr[idx]; |
445 | const Mat& prev = dog_pyr[idx-1]; |
446 | const Mat& next = dog_pyr[idx+1]; |
447 | |
448 | for( int r = begin; r < end; r++) |
449 | { |
450 | const sift_wt* currptr = img.ptr<sift_wt>(y: r); |
451 | const sift_wt* prevptr = prev.ptr<sift_wt>(y: r); |
452 | const sift_wt* nextptr = next.ptr<sift_wt>(y: r); |
453 | int c = SIFT_IMG_BORDER; |
454 | |
455 | #if (CV_SIMD || CV_SIMD_SCALABLE) && !(DoG_TYPE_SHORT) |
456 | const int vecsize = VTraits<v_float32>::vlanes(); |
457 | for( ; c <= cols-SIFT_IMG_BORDER - vecsize; c += vecsize) |
458 | { |
459 | v_float32 val = vx_load(ptr: &currptr[c]); |
460 | v_float32 _00,_01,_02; |
461 | v_float32 _10, _12; |
462 | v_float32 _20,_21,_22; |
463 | |
464 | v_float32 vmin,vmax; |
465 | |
466 | |
467 | v_float32 cond = v_gt(a: v_abs(x: val), b: vx_setall_f32(v: (float)this->threshold)); |
468 | if (!v_check_any(a: cond)) |
469 | { |
470 | continue; |
471 | } |
472 | |
473 | _00 = vx_load(ptr: &currptr[c-step-1]); _01 = vx_load(ptr: &currptr[c-step]); _02 = vx_load(ptr: &currptr[c-step+1]); |
474 | _10 = vx_load(ptr: &currptr[c -1]); _12 = vx_load(ptr: &currptr[c +1]); |
475 | _20 = vx_load(ptr: &currptr[c+step-1]); _21 = vx_load(ptr: &currptr[c+step]); _22 = vx_load(ptr: &currptr[c+step+1]); |
476 | |
477 | vmax = v_max(a: v_max(a: v_max(a: _00,b: _01),b: v_max(a: _02,b: _10)),b: v_max(a: v_max(a: _12,b: _20),b: v_max(a: _21,b: _22))); |
478 | vmin = v_min(a: v_min(a: v_min(a: _00,b: _01),b: v_min(a: _02,b: _10)),b: v_min(a: v_min(a: _12,b: _20),b: v_min(a: _21,b: _22))); |
479 | |
480 | v_float32 condp = v_and(a: v_and(a: cond, b: v_gt(a: val, b: vx_setall_f32(v: 0))), b: v_ge(a: val, b: vmax)); |
481 | v_float32 condm = v_and(a: v_and(a: cond, b: v_lt(a: val, b: vx_setall_f32(v: 0))), b: v_le(a: val, b: vmin)); |
482 | |
483 | cond = v_or(a: condp, b: condm); |
484 | if (!v_check_any(a: cond)) |
485 | { |
486 | continue; |
487 | } |
488 | |
489 | _00 = vx_load(ptr: &prevptr[c-step-1]); _01 = vx_load(ptr: &prevptr[c-step]); _02 = vx_load(ptr: &prevptr[c-step+1]); |
490 | _10 = vx_load(ptr: &prevptr[c -1]); _12 = vx_load(ptr: &prevptr[c +1]); |
491 | _20 = vx_load(ptr: &prevptr[c+step-1]); _21 = vx_load(ptr: &prevptr[c+step]); _22 = vx_load(ptr: &prevptr[c+step+1]); |
492 | |
493 | vmax = v_max(a: v_max(a: v_max(a: _00,b: _01),b: v_max(a: _02,b: _10)),b: v_max(a: v_max(a: _12,b: _20),b: v_max(a: _21,b: _22))); |
494 | vmin = v_min(a: v_min(a: v_min(a: _00,b: _01),b: v_min(a: _02,b: _10)),b: v_min(a: v_min(a: _12,b: _20),b: v_min(a: _21,b: _22))); |
495 | |
496 | condp = v_and(a: condp, b: v_ge(a: val, b: vmax)); |
497 | condm = v_and(a: condm, b: v_le(a: val, b: vmin)); |
498 | |
499 | cond = v_or(a: condp, b: condm); |
500 | if (!v_check_any(a: cond)) |
501 | { |
502 | continue; |
503 | } |
504 | |
505 | v_float32 _11p = vx_load(ptr: &prevptr[c]); |
506 | v_float32 _11n = vx_load(ptr: &nextptr[c]); |
507 | |
508 | v_float32 max_middle = v_max(a: _11n,b: _11p); |
509 | v_float32 min_middle = v_min(a: _11n,b: _11p); |
510 | |
511 | _00 = vx_load(ptr: &nextptr[c-step-1]); _01 = vx_load(ptr: &nextptr[c-step]); _02 = vx_load(ptr: &nextptr[c-step+1]); |
512 | _10 = vx_load(ptr: &nextptr[c -1]); _12 = vx_load(ptr: &nextptr[c +1]); |
513 | _20 = vx_load(ptr: &nextptr[c+step-1]); _21 = vx_load(ptr: &nextptr[c+step]); _22 = vx_load(ptr: &nextptr[c+step+1]); |
514 | |
515 | vmax = v_max(a: v_max(a: v_max(a: _00,b: _01),b: v_max(a: _02,b: _10)),b: v_max(a: v_max(a: _12,b: _20),b: v_max(a: _21,b: _22))); |
516 | vmin = v_min(a: v_min(a: v_min(a: _00,b: _01),b: v_min(a: _02,b: _10)),b: v_min(a: v_min(a: _12,b: _20),b: v_min(a: _21,b: _22))); |
517 | |
518 | condp = v_and(a: condp, b: v_ge(a: val, b: v_max(a: vmax, b: max_middle))); |
519 | condm = v_and(a: condm, b: v_le(a: val, b: v_min(a: vmin, b: min_middle))); |
520 | |
521 | cond = v_or(a: condp, b: condm); |
522 | if (!v_check_any(a: cond)) |
523 | { |
524 | continue; |
525 | } |
526 | |
527 | int mask = v_signmask(a: cond); |
528 | for (int k = 0; k<vecsize;k++) |
529 | { |
530 | if ((mask & (1<<k)) == 0) |
531 | continue; |
532 | |
533 | CV_TRACE_REGION("pixel_candidate_simd"); |
534 | |
535 | KeyPoint kpt; |
536 | int r1 = r, c1 = c+k, layer = i; |
537 | if( !adjustLocalExtrema(dog_pyr, kpt, octv: o, layer, r&: r1, c&: c1, |
538 | nOctaveLayers, contrastThreshold: (float)contrastThreshold, |
539 | edgeThreshold: (float)edgeThreshold, sigma: (float)sigma) ) |
540 | continue; |
541 | float scl_octv = kpt.size*0.5f/(1 << o); |
542 | float omax = calcOrientationHist(img: gauss_pyr[o*(nOctaveLayers+3) + layer], |
543 | pt: Point(c1, r1), |
544 | radius: cvRound(value: SIFT_ORI_RADIUS * scl_octv), |
545 | sigma: SIFT_ORI_SIG_FCTR * scl_octv, |
546 | hist, n); |
547 | float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); |
548 | for( int j = 0; j < n; j++ ) |
549 | { |
550 | int l = j > 0 ? j - 1 : n - 1; |
551 | int r2 = j < n-1 ? j + 1 : 0; |
552 | |
553 | if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) |
554 | { |
555 | float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); |
556 | bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; |
557 | kpt.angle = 360.f - (float)((360.f/n) * bin); |
558 | if(std::abs(x: kpt.angle - 360.f) < FLT_EPSILON) |
559 | kpt.angle = 0.f; |
560 | |
561 | kpts_.push_back(x: kpt); |
562 | } |
563 | } |
564 | } |
565 | } |
566 | |
567 | #endif //CV_SIMD && !(DoG_TYPE_SHORT) |
568 | |
569 | // vector loop reminder, better predictibility and less branch density |
570 | for( ; c < cols-SIFT_IMG_BORDER; c++) |
571 | { |
572 | sift_wt val = currptr[c]; |
573 | if (std::abs(x: val) <= threshold) |
574 | continue; |
575 | |
576 | sift_wt _00,_01,_02; |
577 | sift_wt _10, _12; |
578 | sift_wt _20,_21,_22; |
579 | _00 = currptr[c-step-1]; _01 = currptr[c-step]; _02 = currptr[c-step+1]; |
580 | _10 = currptr[c -1]; _12 = currptr[c +1]; |
581 | _20 = currptr[c+step-1]; _21 = currptr[c+step]; _22 = currptr[c+step+1]; |
582 | |
583 | bool calculate = false; |
584 | if (val > 0) |
585 | { |
586 | sift_wt vmax = std::max(a: std::max(a: std::max(a: _00,b: _01),b: std::max(a: _02,b: _10)),b: std::max(a: std::max(a: _12,b: _20),b: std::max(a: _21,b: _22))); |
587 | if (val >= vmax) |
588 | { |
589 | _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1]; |
590 | _10 = prevptr[c -1]; _12 = prevptr[c +1]; |
591 | _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1]; |
592 | vmax = std::max(a: std::max(a: std::max(a: _00,b: _01),b: std::max(a: _02,b: _10)),b: std::max(a: std::max(a: _12,b: _20),b: std::max(a: _21,b: _22))); |
593 | if (val >= vmax) |
594 | { |
595 | _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1]; |
596 | _10 = nextptr[c -1]; _12 = nextptr[c +1]; |
597 | _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1]; |
598 | vmax = std::max(a: std::max(a: std::max(a: _00,b: _01),b: std::max(a: _02,b: _10)),b: std::max(a: std::max(a: _12,b: _20),b: std::max(a: _21,b: _22))); |
599 | if (val >= vmax) |
600 | { |
601 | sift_wt _11p = prevptr[c], _11n = nextptr[c]; |
602 | calculate = (val >= std::max(a: _11p,b: _11n)); |
603 | } |
604 | } |
605 | } |
606 | |
607 | } else { // val cant be zero here (first abs took care of zero), must be negative |
608 | sift_wt vmin = std::min(a: std::min(a: std::min(a: _00,b: _01),b: std::min(a: _02,b: _10)),b: std::min(a: std::min(a: _12,b: _20),b: std::min(a: _21,b: _22))); |
609 | if (val <= vmin) |
610 | { |
611 | _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1]; |
612 | _10 = prevptr[c -1]; _12 = prevptr[c +1]; |
613 | _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1]; |
614 | vmin = std::min(a: std::min(a: std::min(a: _00,b: _01),b: std::min(a: _02,b: _10)),b: std::min(a: std::min(a: _12,b: _20),b: std::min(a: _21,b: _22))); |
615 | if (val <= vmin) |
616 | { |
617 | _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1]; |
618 | _10 = nextptr[c -1]; _12 = nextptr[c +1]; |
619 | _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1]; |
620 | vmin = std::min(a: std::min(a: std::min(a: _00,b: _01),b: std::min(a: _02,b: _10)),b: std::min(a: std::min(a: _12,b: _20),b: std::min(a: _21,b: _22))); |
621 | if (val <= vmin) |
622 | { |
623 | sift_wt _11p = prevptr[c], _11n = nextptr[c]; |
624 | calculate = (val <= std::min(a: _11p,b: _11n)); |
625 | } |
626 | } |
627 | } |
628 | } |
629 | |
630 | if (calculate) |
631 | { |
632 | CV_TRACE_REGION("pixel_candidate"); |
633 | |
634 | KeyPoint kpt; |
635 | int r1 = r, c1 = c, layer = i; |
636 | if( !adjustLocalExtrema(dog_pyr, kpt, octv: o, layer, r&: r1, c&: c1, |
637 | nOctaveLayers, contrastThreshold: (float)contrastThreshold, |
638 | edgeThreshold: (float)edgeThreshold, sigma: (float)sigma) ) |
639 | continue; |
640 | float scl_octv = kpt.size*0.5f/(1 << o); |
641 | float omax = calcOrientationHist(img: gauss_pyr[o*(nOctaveLayers+3) + layer], |
642 | pt: Point(c1, r1), |
643 | radius: cvRound(value: SIFT_ORI_RADIUS * scl_octv), |
644 | sigma: SIFT_ORI_SIG_FCTR * scl_octv, |
645 | hist, n); |
646 | float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); |
647 | for( int j = 0; j < n; j++ ) |
648 | { |
649 | int l = j > 0 ? j - 1 : n - 1; |
650 | int r2 = j < n-1 ? j + 1 : 0; |
651 | |
652 | if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) |
653 | { |
654 | float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); |
655 | bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; |
656 | kpt.angle = 360.f - (float)((360.f/n) * bin); |
657 | if(std::abs(x: kpt.angle - 360.f) < FLT_EPSILON) |
658 | kpt.angle = 0.f; |
659 | |
660 | kpts_.push_back(x: kpt); |
661 | } |
662 | } |
663 | } |
664 | } |
665 | } |
666 | } |
667 | private: |
668 | int o, i; |
669 | int threshold; |
670 | int idx, step, cols; |
671 | int nOctaveLayers; |
672 | double contrastThreshold; |
673 | double edgeThreshold; |
674 | double sigma; |
675 | const std::vector<Mat>& gauss_pyr; |
676 | const std::vector<Mat>& dog_pyr; |
677 | std::vector<KeyPoint>& kpts_; |
678 | }; |
679 | |
680 | } // namespace |
681 | |
682 | |
683 | void findScaleSpaceExtrema( |
684 | int octave, |
685 | int layer, |
686 | int threshold, |
687 | int idx, |
688 | int step, |
689 | int cols, |
690 | int nOctaveLayers, |
691 | double contrastThreshold, |
692 | double edgeThreshold, |
693 | double sigma, |
694 | const std::vector<Mat>& gauss_pyr, |
695 | const std::vector<Mat>& dog_pyr, |
696 | std::vector<KeyPoint>& kpts, |
697 | const cv::Range& range) |
698 | { |
699 | CV_TRACE_FUNCTION(); |
700 | |
701 | findScaleSpaceExtremaT(octave, layer, threshold, idx, |
702 | step, cols, |
703 | nOctaveLayers, contrastThreshold, edgeThreshold, sigma, |
704 | gauss_pyr, dog_pyr, |
705 | kpts) |
706 | .process(range); |
707 | } |
708 | |
709 | void calcSIFTDescriptor( |
710 | const Mat& img, Point2f ptf, float ori, float scl, |
711 | const int d, const int n, Mat& dstMat, int row |
712 | ) |
713 | { |
714 | CV_TRACE_FUNCTION(); |
715 | |
716 | Point pt(cvRound(value: ptf.x), cvRound(value: ptf.y)); |
717 | float cos_t = cosf(x: ori*(float)(CV_PI/180)); |
718 | float sin_t = sinf(x: ori*(float)(CV_PI/180)); |
719 | float bins_per_rad = n / 360.f; |
720 | float exp_scale = -1.f/(d * d * 0.5f); |
721 | float hist_width = SIFT_DESCR_SCL_FCTR * scl; |
722 | int radius = cvRound(value: hist_width * 1.4142135623730951f * (d + 1) * 0.5f); |
723 | // Clip the radius to the diagonal of the image to avoid autobuffer too large exception |
724 | radius = std::min(a: radius, b: (int)std::sqrt(x: ((double) img.cols)*img.cols + ((double) img.rows)*img.rows)); |
725 | cos_t /= hist_width; |
726 | sin_t /= hist_width; |
727 | |
728 | int i, j, k; |
729 | const int len = (radius*2+1)*(radius*2+1); |
730 | const int len_hist = (d+2)*(d+2)*(n+2); |
731 | const int len_ddn = d * d * n; |
732 | int rows = img.rows, cols = img.cols; |
733 | |
734 | cv::utils::BufferArea area; |
735 | float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0; |
736 | area.allocate(ptr&: X, count: len, CV_SIMD_WIDTH); |
737 | area.allocate(ptr&: Y, count: len, CV_SIMD_WIDTH); |
738 | area.allocate(ptr&: Ori, count: len, CV_SIMD_WIDTH); |
739 | area.allocate(ptr&: W, count: len, CV_SIMD_WIDTH); |
740 | area.allocate(ptr&: RBin, count: len, CV_SIMD_WIDTH); |
741 | area.allocate(ptr&: CBin, count: len, CV_SIMD_WIDTH); |
742 | area.allocate(ptr&: hist, count: len_hist, CV_SIMD_WIDTH); |
743 | area.allocate(ptr&: rawDst, count: len_ddn, CV_SIMD_WIDTH); |
744 | area.commit(); |
745 | Mag = Y; |
746 | |
747 | for( i = 0; i < d+2; i++ ) |
748 | { |
749 | for( j = 0; j < d+2; j++ ) |
750 | for( k = 0; k < n+2; k++ ) |
751 | hist[(i*(d+2) + j)*(n+2) + k] = 0.; |
752 | } |
753 | |
754 | for( i = -radius, k = 0; i <= radius; i++ ) |
755 | for( j = -radius; j <= radius; j++ ) |
756 | { |
757 | // Calculate sample's histogram array coords rotated relative to ori. |
758 | // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. |
759 | // r_rot = 1.5) have full weight placed in row 1 after interpolation. |
760 | float c_rot = j * cos_t - i * sin_t; |
761 | float r_rot = j * sin_t + i * cos_t; |
762 | float rbin = r_rot + d/2 - 0.5f; |
763 | float cbin = c_rot + d/2 - 0.5f; |
764 | int r = pt.y + i, c = pt.x + j; |
765 | |
766 | if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && |
767 | r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) |
768 | { |
769 | float dx = (float)(img.at<sift_wt>(i0: r, i1: c+1) - img.at<sift_wt>(i0: r, i1: c-1)); |
770 | float dy = (float)(img.at<sift_wt>(i0: r-1, i1: c) - img.at<sift_wt>(i0: r+1, i1: c)); |
771 | X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; |
772 | W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; |
773 | k++; |
774 | } |
775 | } |
776 | |
777 | const int len_left = k; |
778 | cv::hal::fastAtan2(y: Y, x: X, dst: Ori, n: len_left, angleInDegrees: true); |
779 | cv::hal::magnitude32f(x: X, y: Y, dst: Mag, n: len_left); |
780 | cv::hal::exp32f(src: W, dst: W, n: len_left); |
781 | |
782 | k = 0; |
783 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
784 | { |
785 | const int vecsize = VTraits<v_float32>::vlanes(); |
786 | int CV_DECL_ALIGNED(CV_SIMD_WIDTH) idx_buf[VTraits<v_float32>::max_nlanes]; |
787 | float CV_DECL_ALIGNED(CV_SIMD_WIDTH) rco_buf[8*VTraits<v_float32>::max_nlanes]; |
788 | const v_float32 __ori = vx_setall_f32(v: ori); |
789 | const v_float32 __bins_per_rad = vx_setall_f32(v: bins_per_rad); |
790 | const v_int32 __n = vx_setall_s32(v: n); |
791 | const v_int32 __1 = vx_setall_s32(v: 1); |
792 | const v_int32 __d_plus_2 = vx_setall_s32(v: d+2); |
793 | const v_int32 __n_plus_2 = vx_setall_s32(v: n+2); |
794 | for( ; k <= len_left - vecsize; k += vecsize ) |
795 | { |
796 | v_float32 rbin = vx_load_aligned(ptr: RBin + k); |
797 | v_float32 cbin = vx_load_aligned(ptr: CBin + k); |
798 | v_float32 obin = v_mul(a: v_sub(a: vx_load_aligned(ptr: Ori + k), b: __ori), b: __bins_per_rad); |
799 | v_float32 mag = v_mul(a: vx_load_aligned(ptr: Mag + k), b: vx_load_aligned(ptr: W + k)); |
800 | |
801 | v_int32 r0 = v_floor(a: rbin); |
802 | v_int32 c0 = v_floor(a: cbin); |
803 | v_int32 o0 = v_floor(a: obin); |
804 | rbin = v_sub(a: rbin, b: v_cvt_f32(a: r0)); |
805 | cbin = v_sub(a: cbin, b: v_cvt_f32(a: c0)); |
806 | obin = v_sub(a: obin, b: v_cvt_f32(a: o0)); |
807 | |
808 | o0 = v_select(mask: v_lt(a: o0, b: vx_setzero_s32()), a: v_add(a: o0, b: __n), b: o0); |
809 | o0 = v_select(mask: v_ge(a: o0, b: __n), a: v_sub(a: o0, b: __n), b: o0); |
810 | |
811 | v_float32 v_r1 = v_mul(a: mag, b: rbin), v_r0 = v_sub(a: mag, b: v_r1); |
812 | v_float32 v_rc11 = v_mul(a: v_r1, b: cbin), v_rc10 = v_sub(a: v_r1, b: v_rc11); |
813 | v_float32 v_rc01 = v_mul(a: v_r0, b: cbin), v_rc00 = v_sub(a: v_r0, b: v_rc01); |
814 | v_float32 v_rco111 = v_mul(a: v_rc11, b: obin), v_rco110 = v_sub(a: v_rc11, b: v_rco111); |
815 | v_float32 v_rco101 = v_mul(a: v_rc10, b: obin), v_rco100 = v_sub(a: v_rc10, b: v_rco101); |
816 | v_float32 v_rco011 = v_mul(a: v_rc01, b: obin), v_rco010 = v_sub(a: v_rc01, b: v_rco011); |
817 | v_float32 v_rco001 = v_mul(a: v_rc00, b: obin), v_rco000 = v_sub(a: v_rc00, b: v_rco001); |
818 | |
819 | v_int32 idx = v_muladd(a: v_muladd(a: v_add(a: r0, b: __1), b: __d_plus_2, c: v_add(a: c0, b: __1)), b: __n_plus_2, c: o0); |
820 | v_store_aligned(ptr: idx_buf, a: idx); |
821 | |
822 | v_store_aligned(ptr: rco_buf, a: v_rco000); |
823 | v_store_aligned(ptr: rco_buf+vecsize, a: v_rco001); |
824 | v_store_aligned(ptr: rco_buf+vecsize*2, a: v_rco010); |
825 | v_store_aligned(ptr: rco_buf+vecsize*3, a: v_rco011); |
826 | v_store_aligned(ptr: rco_buf+vecsize*4, a: v_rco100); |
827 | v_store_aligned(ptr: rco_buf+vecsize*5, a: v_rco101); |
828 | v_store_aligned(ptr: rco_buf+vecsize*6, a: v_rco110); |
829 | v_store_aligned(ptr: rco_buf+vecsize*7, a: v_rco111); |
830 | |
831 | for(int id = 0; id < vecsize; id++) |
832 | { |
833 | hist[idx_buf[id]] += rco_buf[id]; |
834 | hist[idx_buf[id]+1] += rco_buf[vecsize + id]; |
835 | hist[idx_buf[id]+(n+2)] += rco_buf[2*vecsize + id]; |
836 | hist[idx_buf[id]+(n+3)] += rco_buf[3*vecsize + id]; |
837 | hist[idx_buf[id]+(d+2)*(n+2)] += rco_buf[4*vecsize + id]; |
838 | hist[idx_buf[id]+(d+2)*(n+2)+1] += rco_buf[5*vecsize + id]; |
839 | hist[idx_buf[id]+(d+3)*(n+2)] += rco_buf[6*vecsize + id]; |
840 | hist[idx_buf[id]+(d+3)*(n+2)+1] += rco_buf[7*vecsize + id]; |
841 | } |
842 | } |
843 | } |
844 | #endif |
845 | for( ; k < len_left; k++ ) |
846 | { |
847 | float rbin = RBin[k], cbin = CBin[k]; |
848 | float obin = (Ori[k] - ori)*bins_per_rad; |
849 | float mag = Mag[k]*W[k]; |
850 | |
851 | int r0 = cvFloor( value: rbin ); |
852 | int c0 = cvFloor( value: cbin ); |
853 | int o0 = cvFloor( value: obin ); |
854 | rbin -= r0; |
855 | cbin -= c0; |
856 | obin -= o0; |
857 | |
858 | if( o0 < 0 ) |
859 | o0 += n; |
860 | if( o0 >= n ) |
861 | o0 -= n; |
862 | |
863 | // histogram update using tri-linear interpolation |
864 | float v_r1 = mag*rbin, v_r0 = mag - v_r1; |
865 | float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; |
866 | float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; |
867 | float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; |
868 | float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; |
869 | float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; |
870 | float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; |
871 | |
872 | int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; |
873 | hist[idx] += v_rco000; |
874 | hist[idx+1] += v_rco001; |
875 | hist[idx+(n+2)] += v_rco010; |
876 | hist[idx+(n+3)] += v_rco011; |
877 | hist[idx+(d+2)*(n+2)] += v_rco100; |
878 | hist[idx+(d+2)*(n+2)+1] += v_rco101; |
879 | hist[idx+(d+3)*(n+2)] += v_rco110; |
880 | hist[idx+(d+3)*(n+2)+1] += v_rco111; |
881 | } |
882 | |
883 | // finalize histogram, since the orientation histograms are circular |
884 | for( i = 0; i < d; i++ ) |
885 | for( j = 0; j < d; j++ ) |
886 | { |
887 | int idx = ((i+1)*(d+2) + (j+1))*(n+2); |
888 | hist[idx] += hist[idx+n]; |
889 | hist[idx+1] += hist[idx+n+1]; |
890 | for( k = 0; k < n; k++ ) |
891 | rawDst[(i*d + j)*n + k] = hist[idx+k]; |
892 | } |
893 | // copy histogram to the descriptor, |
894 | // apply hysteresis thresholding |
895 | // and scale the result, so that it can be easily converted |
896 | // to byte array |
897 | float nrm2 = 0; |
898 | k = 0; |
899 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
900 | { |
901 | v_float32 __nrm2 = vx_setzero_f32(); |
902 | v_float32 __rawDst; |
903 | for( ; k <= len_ddn - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() ) |
904 | { |
905 | __rawDst = vx_load_aligned(ptr: rawDst + k); |
906 | __nrm2 = v_fma(a: __rawDst, b: __rawDst, c: __nrm2); |
907 | } |
908 | nrm2 = (float)v_reduce_sum(a: __nrm2); |
909 | } |
910 | #endif |
911 | for( ; k < len_ddn; k++ ) |
912 | nrm2 += rawDst[k]*rawDst[k]; |
913 | |
914 | const float thr = std::sqrt(x: nrm2)*SIFT_DESCR_MAG_THR; |
915 | |
916 | i = 0, nrm2 = 0; |
917 | #if 0 //CV_AVX2 |
918 | // This code cannot be enabled because it sums nrm2 in a different order, |
919 | // thus producing slightly different results |
920 | { |
921 | float CV_DECL_ALIGNED(CV_SIMD_WIDTH) nrm2_buf[8]; |
922 | __m256 __dst; |
923 | __m256 __nrm2 = _mm256_setzero_ps(); |
924 | __m256 __thr = _mm256_set1_ps(thr); |
925 | for( ; i <= len_ddn - 8; i += 8 ) |
926 | { |
927 | __dst = _mm256_loadu_ps(&rawDst[i]); |
928 | __dst = _mm256_min_ps(__dst, __thr); |
929 | _mm256_storeu_ps(&rawDst[i], __dst); |
930 | #if CV_FMA3 |
931 | __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2); |
932 | #else |
933 | __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst)); |
934 | #endif |
935 | } |
936 | _mm256_store_ps(nrm2_buf, __nrm2); |
937 | nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + |
938 | nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; |
939 | } |
940 | #endif |
941 | for( ; i < len_ddn; i++ ) |
942 | { |
943 | float val = std::min(a: rawDst[i], b: thr); |
944 | rawDst[i] = val; |
945 | nrm2 += val*val; |
946 | } |
947 | nrm2 = SIFT_INT_DESCR_FCTR/std::max(a: std::sqrt(x: nrm2), FLT_EPSILON); |
948 | |
949 | #if 1 |
950 | k = 0; |
951 | if( dstMat.type() == CV_32F ) |
952 | { |
953 | float* dst = dstMat.ptr<float>(y: row); |
954 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
955 | v_float32 __dst; |
956 | v_float32 __min = vx_setzero_f32(); |
957 | v_float32 __max = vx_setall_f32(v: 255.0f); // max of uchar |
958 | v_float32 __nrm2 = vx_setall_f32(v: nrm2); |
959 | for( k = 0; k <= len_ddn - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() ) |
960 | { |
961 | __dst = vx_load_aligned(ptr: rawDst + k); |
962 | __dst = v_min(a: v_max(a: v_cvt_f32(a: v_round(a: v_mul(a: __dst, b: __nrm2))), b: __min), b: __max); |
963 | v_store(ptr: dst + k, a: __dst); |
964 | } |
965 | #endif |
966 | #if defined(__GNUC__) && __GNUC__ >= 9 |
967 | #pragma GCC diagnostic push |
968 | #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior |
969 | #endif |
970 | for( ; k < len_ddn; k++ ) |
971 | { |
972 | dst[k] = saturate_cast<uchar>(v: rawDst[k]*nrm2); |
973 | } |
974 | #if defined(__GNUC__) && __GNUC__ >= 9 |
975 | #pragma GCC diagnostic pop |
976 | #endif |
977 | } |
978 | else // CV_8U |
979 | { |
980 | uint8_t* dst = dstMat.ptr<uint8_t>(y: row); |
981 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
982 | v_float32 __dst0, __dst1; |
983 | v_uint16 __pack01; |
984 | v_float32 __nrm2 = vx_setall_f32(v: nrm2); |
985 | for( k = 0; k <= len_ddn - VTraits<v_float32>::vlanes() * 2; k += VTraits<v_float32>::vlanes() * 2 ) |
986 | { |
987 | __dst0 = vx_load_aligned(ptr: rawDst + k); |
988 | __dst1 = vx_load_aligned(ptr: rawDst + k + VTraits<v_float32>::vlanes()); |
989 | |
990 | __pack01 = v_pack_u(a: v_round(a: v_mul(a: __dst0, b: __nrm2)), b: v_round(a: v_mul(a: __dst1, b: __nrm2))); |
991 | v_pack_store(ptr: dst + k, a: __pack01); |
992 | } |
993 | #endif |
994 | |
995 | #if defined(__GNUC__) && __GNUC__ >= 9 |
996 | #pragma GCC diagnostic push |
997 | #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior |
998 | #endif |
999 | for( ; k < len_ddn; k++ ) |
1000 | { |
1001 | dst[k] = saturate_cast<uchar>(v: rawDst[k]*nrm2); |
1002 | } |
1003 | #if defined(__GNUC__) && __GNUC__ >= 9 |
1004 | #pragma GCC diagnostic pop |
1005 | #endif |
1006 | } |
1007 | #else |
1008 | float nrm1 = 0; |
1009 | for( k = 0; k < len_ddn; k++ ) |
1010 | { |
1011 | rawDst[k] *= nrm2; |
1012 | nrm1 += rawDst[k]; |
1013 | } |
1014 | nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); |
1015 | if( dstMat.type() == CV_32F ) |
1016 | { |
1017 | float *dst = dstMat.ptr<float>(row); |
1018 | for( k = 0; k < len_ddn; k++ ) |
1019 | { |
1020 | dst[k] = std::sqrt(rawDst[k] * nrm1); |
1021 | } |
1022 | } |
1023 | else // CV_8U |
1024 | { |
1025 | uint8_t *dst = dstMat.ptr<uint8_t>(row); |
1026 | for( k = 0; k < len_ddn; k++ ) |
1027 | { |
1028 | dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR); |
1029 | } |
1030 | } |
1031 | #endif |
1032 | } |
1033 | |
1034 | #endif |
1035 | CV_CPU_OPTIMIZATION_NAMESPACE_END |
1036 | } // namespace |
1037 |
Definitions
- SIFT_DESCR_WIDTH
- SIFT_DESCR_HIST_BINS
- SIFT_INIT_SIGMA
- SIFT_IMG_BORDER
- SIFT_MAX_INTERP_STEPS
- SIFT_ORI_HIST_BINS
- SIFT_ORI_SIG_FCTR
- SIFT_ORI_RADIUS
- SIFT_ORI_PEAK_RATIO
- SIFT_DESCR_SCL_FCTR
- SIFT_DESCR_MAG_THR
- SIFT_INT_DESCR_FCTR
- SIFT_FIXPT_SCALE
- calcOrientationHist
- adjustLocalExtrema
- findScaleSpaceExtremaT
- findScaleSpaceExtremaT
- process
- findScaleSpaceExtrema
Improve your Profiling and Debugging skills
Find out more