1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
---|---|
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
15 | // Third party copyrights are property of their respective owners. |
16 | // |
17 | // Redistribution and use in source and binary forms, with or without modification, |
18 | // are permitted provided that the following conditions are met: |
19 | // |
20 | // * Redistribution's of source code must retain the above copyright notice, |
21 | // this list of conditions and the following disclaimer. |
22 | // |
23 | // * Redistribution's in binary form must reproduce the above copyright notice, |
24 | // this list of conditions and the following disclaimer in the documentation |
25 | // and/or other materials provided with the distribution. |
26 | // |
27 | // * The name of the copyright holders may not be used to endorse or promote products |
28 | // derived from this software without specific prior written permission. |
29 | // |
30 | // This software is provided by the copyright holders and contributors "as is" and |
31 | // any express or implied warranties, including, but not limited to, the implied |
32 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
33 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
34 | // indirect, incidental, special, exemplary, or consequential damages |
35 | // (including, but not limited to, procurement of substitute goods or services; |
36 | // loss of use, data, or profits; or business interruption) however caused |
37 | // and on any theory of liability, whether in contract, strict liability, |
38 | // or tort (including negligence or otherwise) arising in any way out of |
39 | // the use of this software, even if advised of the possibility of such damage. |
40 | // |
41 | //M*/ |
42 | |
43 | #include "precomp.hpp" |
44 | #include "opencl_kernels_video.hpp" |
45 | #include "opencv2/core/hal/intrin.hpp" |
46 | |
47 | #if defined __APPLE__ || defined __ANDROID__ |
48 | #define SMALL_LOCALSIZE |
49 | #endif |
50 | |
51 | // |
52 | // 2D dense optical flow algorithm from the following paper: |
53 | // Gunnar Farneback. "Two-Frame Motion Estimation Based on Polynomial Expansion". |
54 | // Proceedings of the 13th Scandinavian Conference on Image Analysis, Gothenburg, Sweden |
55 | // |
56 | |
57 | namespace cv |
58 | { |
59 | |
60 | static void |
61 | FarnebackPrepareGaussian(int n, double sigma, float *g, float *xg, float *xxg, |
62 | double &ig11, double &ig03, double &ig33, double &ig55) |
63 | { |
64 | if( sigma < FLT_EPSILON ) |
65 | sigma = n*0.3; |
66 | |
67 | double s = 0.; |
68 | for (int x = -n; x <= n; x++) |
69 | { |
70 | g[x] = (float)std::exp(x: -x*x/(2*sigma*sigma)); |
71 | s += g[x]; |
72 | } |
73 | |
74 | s = 1./s; |
75 | for (int x = -n; x <= n; x++) |
76 | { |
77 | g[x] = (float)(g[x]*s); |
78 | xg[x] = (float)(x*g[x]); |
79 | xxg[x] = (float)(x*x*g[x]); |
80 | } |
81 | |
82 | Mat_<double> G(6, 6); |
83 | G.setTo(value: 0); |
84 | |
85 | for (int y = -n; y <= n; y++) |
86 | { |
87 | for (int x = -n; x <= n; x++) |
88 | { |
89 | G(0,0) += g[y]*g[x]; |
90 | G(1,1) += g[y]*g[x]*x*x; |
91 | G(3,3) += g[y]*g[x]*x*x*x*x; |
92 | G(5,5) += g[y]*g[x]*x*x*y*y; |
93 | } |
94 | } |
95 | |
96 | //G[0][0] = 1.; |
97 | G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1); |
98 | G(4,4) = G(3,3); |
99 | G(3,4) = G(4,3) = G(5,5); |
100 | |
101 | // invG: |
102 | // [ x e e ] |
103 | // [ y ] |
104 | // [ y ] |
105 | // [ e z ] |
106 | // [ e z ] |
107 | // [ u ] |
108 | Mat_<double> invG = G.inv(method: DECOMP_CHOLESKY); |
109 | |
110 | ig11 = invG(1,1); |
111 | ig03 = invG(0,3); |
112 | ig33 = invG(3,3); |
113 | ig55 = invG(5,5); |
114 | } |
115 | |
116 | static void |
117 | FarnebackPolyExp( const Mat& src, Mat& dst, int n, double sigma ) |
118 | { |
119 | int k, x, y; |
120 | |
121 | CV_Assert( src.type() == CV_32FC1 ); |
122 | int width = src.cols; |
123 | int height = src.rows; |
124 | AutoBuffer<float> kbuf(n*6 + 3), _row((width + n*2)*3); |
125 | float* g = kbuf.data() + n; |
126 | float* xg = g + n*2 + 1; |
127 | float* xxg = xg + n*2 + 1; |
128 | float *row = _row.data() + n*3; |
129 | double ig11, ig03, ig33, ig55; |
130 | |
131 | FarnebackPrepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55); |
132 | |
133 | dst.create( rows: height, cols: width, CV_32FC(5)); |
134 | |
135 | for( y = 0; y < height; y++ ) |
136 | { |
137 | float g0 = g[0], g1, g2; |
138 | const float *srow0 = src.ptr<float>(y), *srow1 = 0; |
139 | float *drow = dst.ptr<float>(y); |
140 | |
141 | // vertical part of convolution |
142 | for( x = 0; x < width; x++ ) |
143 | { |
144 | row[x*3] = srow0[x]*g0; |
145 | row[x*3+1] = row[x*3+2] = 0.f; |
146 | } |
147 | |
148 | for( k = 1; k <= n; k++ ) |
149 | { |
150 | g0 = g[k]; g1 = xg[k]; g2 = xxg[k]; |
151 | srow0 = src.ptr<float>(y: std::max(a: y-k,b: 0)); |
152 | srow1 = src.ptr<float>(y: std::min(a: y+k,b: height-1)); |
153 | |
154 | for( x = 0; x < width; x++ ) |
155 | { |
156 | float p = srow0[x] + srow1[x]; |
157 | float t0 = row[x*3] + g0*p; |
158 | float t1 = row[x*3+1] + g1*(srow1[x] - srow0[x]); |
159 | float t2 = row[x*3+2] + g2*p; |
160 | |
161 | row[x*3] = t0; |
162 | row[x*3+1] = t1; |
163 | row[x*3+2] = t2; |
164 | } |
165 | } |
166 | |
167 | // horizontal part of convolution |
168 | for( x = 0; x < n*3; x++ ) |
169 | { |
170 | row[-1-x] = row[2-x]; |
171 | row[width*3+x] = row[width*3+x-3]; |
172 | } |
173 | |
174 | for( x = 0; x < width; x++ ) |
175 | { |
176 | g0 = g[0]; |
177 | // r1 ~ 1, r2 ~ x, r3 ~ y, r4 ~ x^2, r5 ~ y^2, r6 ~ xy |
178 | double b1 = row[x*3]*g0, b2 = 0, b3 = row[x*3+1]*g0, |
179 | b4 = 0, b5 = row[x*3+2]*g0, b6 = 0; |
180 | |
181 | for( k = 1; k <= n; k++ ) |
182 | { |
183 | double tg = row[(x+k)*3] + row[(x-k)*3]; |
184 | g0 = g[k]; |
185 | b1 += tg*g0; |
186 | b4 += tg*xxg[k]; |
187 | b2 += (row[(x+k)*3] - row[(x-k)*3])*xg[k]; |
188 | b3 += (row[(x+k)*3+1] + row[(x-k)*3+1])*g0; |
189 | b6 += (row[(x+k)*3+1] - row[(x-k)*3+1])*xg[k]; |
190 | b5 += (row[(x+k)*3+2] + row[(x-k)*3+2])*g0; |
191 | } |
192 | |
193 | // do not store r1 |
194 | drow[x*5+1] = (float)(b2*ig11); |
195 | drow[x*5] = (float)(b3*ig11); |
196 | drow[x*5+3] = (float)(b1*ig03 + b4*ig33); |
197 | drow[x*5+2] = (float)(b1*ig03 + b5*ig33); |
198 | drow[x*5+4] = (float)(b6*ig55); |
199 | } |
200 | } |
201 | |
202 | row -= n*3; |
203 | } |
204 | |
205 | |
206 | /*static void |
207 | FarnebackPolyExpPyr( const Mat& src0, Vector<Mat>& pyr, int maxlevel, int n, double sigma ) |
208 | { |
209 | Vector<Mat> imgpyr; |
210 | buildPyramid( src0, imgpyr, maxlevel ); |
211 | |
212 | for( int i = 0; i <= maxlevel; i++ ) |
213 | FarnebackPolyExp( imgpyr[i], pyr[i], n, sigma ); |
214 | }*/ |
215 | |
216 | |
217 | static void |
218 | FarnebackUpdateMatrices( const Mat& _R0, const Mat& _R1, const Mat& _flow, Mat& matM, int _y0, int _y1 ) |
219 | { |
220 | const int BORDER = 5; |
221 | static const float border[BORDER] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f}; |
222 | |
223 | int x, y, width = _flow.cols, height = _flow.rows; |
224 | const float* R1 = _R1.ptr<float>(); |
225 | size_t step1 = _R1.step/sizeof(R1[0]); |
226 | |
227 | matM.create(rows: height, cols: width, CV_32FC(5)); |
228 | |
229 | for( y = _y0; y < _y1; y++ ) |
230 | { |
231 | const float* flow = _flow.ptr<float>(y); |
232 | const float* R0 = _R0.ptr<float>(y); |
233 | float* M = matM.ptr<float>(y); |
234 | |
235 | for( x = 0; x < width; x++ ) |
236 | { |
237 | float dx = flow[x*2], dy = flow[x*2+1]; |
238 | float fx = x + dx, fy = y + dy; |
239 | |
240 | #if 1 |
241 | int x1 = cvFloor(value: fx), y1 = cvFloor(value: fy); |
242 | const float* ptr = R1 + y1*step1 + x1*5; |
243 | float r2, r3, r4, r5, r6; |
244 | |
245 | fx -= x1; fy -= y1; |
246 | |
247 | if( (unsigned)x1 < (unsigned)(width-1) && |
248 | (unsigned)y1 < (unsigned)(height-1) ) |
249 | { |
250 | float a00 = (1.f-fx)*(1.f-fy), a01 = fx*(1.f-fy), |
251 | a10 = (1.f-fx)*fy, a11 = fx*fy; |
252 | |
253 | r2 = a00*ptr[0] + a01*ptr[5] + a10*ptr[step1] + a11*ptr[step1+5]; |
254 | r3 = a00*ptr[1] + a01*ptr[6] + a10*ptr[step1+1] + a11*ptr[step1+6]; |
255 | r4 = a00*ptr[2] + a01*ptr[7] + a10*ptr[step1+2] + a11*ptr[step1+7]; |
256 | r5 = a00*ptr[3] + a01*ptr[8] + a10*ptr[step1+3] + a11*ptr[step1+8]; |
257 | r6 = a00*ptr[4] + a01*ptr[9] + a10*ptr[step1+4] + a11*ptr[step1+9]; |
258 | |
259 | r4 = (R0[x*5+2] + r4)*0.5f; |
260 | r5 = (R0[x*5+3] + r5)*0.5f; |
261 | r6 = (R0[x*5+4] + r6)*0.25f; |
262 | } |
263 | #else |
264 | int x1 = cvRound(fx), y1 = cvRound(fy); |
265 | const float* ptr = R1 + y1*step1 + x1*5; |
266 | float r2, r3, r4, r5, r6; |
267 | |
268 | if( (unsigned)x1 < (unsigned)width && |
269 | (unsigned)y1 < (unsigned)height ) |
270 | { |
271 | r2 = ptr[0]; |
272 | r3 = ptr[1]; |
273 | r4 = (R0[x*5+2] + ptr[2])*0.5f; |
274 | r5 = (R0[x*5+3] + ptr[3])*0.5f; |
275 | r6 = (R0[x*5+4] + ptr[4])*0.25f; |
276 | } |
277 | #endif |
278 | else |
279 | { |
280 | r2 = r3 = 0.f; |
281 | r4 = R0[x*5+2]; |
282 | r5 = R0[x*5+3]; |
283 | r6 = R0[x*5+4]*0.5f; |
284 | } |
285 | |
286 | r2 = (R0[x*5] - r2)*0.5f; |
287 | r3 = (R0[x*5+1] - r3)*0.5f; |
288 | |
289 | r2 += r4*dy + r6*dx; |
290 | r3 += r6*dy + r5*dx; |
291 | |
292 | if( (unsigned)(x - BORDER) >= (unsigned)(width - BORDER*2) || |
293 | (unsigned)(y - BORDER) >= (unsigned)(height - BORDER*2)) |
294 | { |
295 | float scale = (x < BORDER ? border[x] : 1.f)* |
296 | (x >= width - BORDER ? border[width - x - 1] : 1.f)* |
297 | (y < BORDER ? border[y] : 1.f)* |
298 | (y >= height - BORDER ? border[height - y - 1] : 1.f); |
299 | |
300 | r2 *= scale; r3 *= scale; r4 *= scale; |
301 | r5 *= scale; r6 *= scale; |
302 | } |
303 | |
304 | M[x*5] = r4*r4 + r6*r6; // G(1,1) |
305 | M[x*5+1] = (r4 + r5)*r6; // G(1,2)=G(2,1) |
306 | M[x*5+2] = r5*r5 + r6*r6; // G(2,2) |
307 | M[x*5+3] = r4*r2 + r6*r3; // h(1) |
308 | M[x*5+4] = r6*r2 + r5*r3; // h(2) |
309 | } |
310 | } |
311 | } |
312 | |
313 | |
314 | static void |
315 | FarnebackUpdateFlow_Blur( const Mat& _R0, const Mat& _R1, |
316 | Mat& _flow, Mat& matM, int block_size, |
317 | bool update_matrices ) |
318 | { |
319 | int x, y, width = _flow.cols, height = _flow.rows; |
320 | int m = block_size/2; |
321 | int y0 = 0, y1; |
322 | int min_update_stripe = std::max(a: (1 << 10)/width, b: block_size); |
323 | double scale = 1./(block_size*block_size); |
324 | |
325 | AutoBuffer<double> _vsum((width+m*2+2)*5); |
326 | double* vsum = _vsum.data() + (m+1)*5; |
327 | |
328 | // init vsum |
329 | const float* srow0 = matM.ptr<float>(); |
330 | for( x = 0; x < width*5; x++ ) |
331 | vsum[x] = srow0[x]*(m+2); |
332 | |
333 | for( y = 1; y < m; y++ ) |
334 | { |
335 | srow0 = matM.ptr<float>(y: std::min(a: y,b: height-1)); |
336 | for( x = 0; x < width*5; x++ ) |
337 | vsum[x] += srow0[x]; |
338 | } |
339 | |
340 | // compute blur(G)*flow=blur(h) |
341 | for( y = 0; y < height; y++ ) |
342 | { |
343 | double g11, g12, g22, h1, h2; |
344 | float* flow = _flow.ptr<float>(y); |
345 | |
346 | srow0 = matM.ptr<float>(y: std::max(a: y-m-1,b: 0)); |
347 | const float* srow1 = matM.ptr<float>(y: std::min(a: y+m,b: height-1)); |
348 | |
349 | // vertical blur |
350 | for( x = 0; x < width*5; x++ ) |
351 | vsum[x] += srow1[x] - srow0[x]; |
352 | |
353 | // update borders |
354 | for( x = 0; x < (m+1)*5; x++ ) |
355 | { |
356 | vsum[-1-x] = vsum[4-x]; |
357 | vsum[width*5+x] = vsum[width*5+x-5]; |
358 | } |
359 | |
360 | // init g** and h* |
361 | g11 = vsum[0]*(m+2); |
362 | g12 = vsum[1]*(m+2); |
363 | g22 = vsum[2]*(m+2); |
364 | h1 = vsum[3]*(m+2); |
365 | h2 = vsum[4]*(m+2); |
366 | |
367 | for( x = 1; x < m; x++ ) |
368 | { |
369 | g11 += vsum[x*5]; |
370 | g12 += vsum[x*5+1]; |
371 | g22 += vsum[x*5+2]; |
372 | h1 += vsum[x*5+3]; |
373 | h2 += vsum[x*5+4]; |
374 | } |
375 | |
376 | // horizontal blur |
377 | for( x = 0; x < width; x++ ) |
378 | { |
379 | g11 += vsum[(x+m)*5] - vsum[(x-m)*5 - 5]; |
380 | g12 += vsum[(x+m)*5 + 1] - vsum[(x-m)*5 - 4]; |
381 | g22 += vsum[(x+m)*5 + 2] - vsum[(x-m)*5 - 3]; |
382 | h1 += vsum[(x+m)*5 + 3] - vsum[(x-m)*5 - 2]; |
383 | h2 += vsum[(x+m)*5 + 4] - vsum[(x-m)*5 - 1]; |
384 | |
385 | double g11_ = g11*scale; |
386 | double g12_ = g12*scale; |
387 | double g22_ = g22*scale; |
388 | double h1_ = h1*scale; |
389 | double h2_ = h2*scale; |
390 | |
391 | double idet = 1./(g11_*g22_ - g12_*g12_+1e-3); |
392 | |
393 | flow[x*2] = (float)((g11_*h2_-g12_*h1_)*idet); |
394 | flow[x*2+1] = (float)((g22_*h1_-g12_*h2_)*idet); |
395 | } |
396 | |
397 | y1 = y == height - 1 ? height : y - block_size; |
398 | if( update_matrices && (y1 == height || y1 >= y0 + min_update_stripe) ) |
399 | { |
400 | FarnebackUpdateMatrices( _R0, _R1, _flow, matM, y0: y0, y1: y1 ); |
401 | y0 = y1; |
402 | } |
403 | } |
404 | } |
405 | |
406 | |
407 | static void |
408 | FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1, |
409 | Mat& _flow, Mat& matM, int block_size, |
410 | bool update_matrices ) |
411 | { |
412 | int x, y, i, width = _flow.cols, height = _flow.rows; |
413 | int m = block_size/2; |
414 | int y0 = 0, y1; |
415 | int min_update_stripe = std::max(a: (1 << 10)/width, b: block_size); |
416 | double sigma = m*0.3, s = 1; |
417 | |
418 | AutoBuffer<float> _vsum((width+m*2+2)*5 + 16), _hsum(width*5 + 16); |
419 | AutoBuffer<float> _kernel((m+1)*5 + 16); |
420 | AutoBuffer<const float*> _srow(m*2+1); |
421 | float *vsum = alignPtr(ptr: _vsum.data() + (m+1)*5, n: 16), *hsum = alignPtr(ptr: _hsum.data(), n: 16); |
422 | float* kernel = _kernel.data(); |
423 | const float** srow = _srow.data(); |
424 | kernel[0] = (float)s; |
425 | |
426 | for( i = 1; i <= m; i++ ) |
427 | { |
428 | float t = (float)std::exp(x: -i*i/(2*sigma*sigma) ); |
429 | kernel[i] = t; |
430 | s += t*2; |
431 | } |
432 | |
433 | s = 1./s; |
434 | for( i = 0; i <= m; i++ ) |
435 | kernel[i] = (float)(kernel[i]*s); |
436 | |
437 | #if CV_SIMD128 |
438 | float* simd_kernel = alignPtr(ptr: kernel + m+1, n: 16); |
439 | { |
440 | for( i = 0; i <= m; i++ ) |
441 | v_store(ptr: simd_kernel + i*4, a: v_setall_f32(v: kernel[i])); |
442 | } |
443 | #endif |
444 | |
445 | // compute blur(G)*flow=blur(h) |
446 | for( y = 0; y < height; y++ ) |
447 | { |
448 | double g11, g12, g22, h1, h2; |
449 | float* flow = _flow.ptr<float>(y); |
450 | |
451 | // vertical blur |
452 | for( i = 0; i <= m; i++ ) |
453 | { |
454 | srow[m-i] = matM.ptr<float>(y: std::max(a: y-i,b: 0)); |
455 | srow[m+i] = matM.ptr<float>(y: std::min(a: y+i,b: height-1)); |
456 | } |
457 | |
458 | x = 0; |
459 | #if CV_SIMD128 |
460 | { |
461 | for( ; x <= width*5 - 16; x += 16 ) |
462 | { |
463 | const float *sptr0 = srow[m], *sptr1; |
464 | v_float32x4 g4 = v_load(ptr: simd_kernel); |
465 | v_float32x4 s0, s1, s2, s3; |
466 | s0 = v_mul(a: v_load(ptr: sptr0 + x), b: g4); |
467 | s1 = v_mul(a: v_load(ptr: sptr0 + x + 4), b: g4); |
468 | s2 = v_mul(a: v_load(ptr: sptr0 + x + 8), b: g4); |
469 | s3 = v_mul(a: v_load(ptr: sptr0 + x + 12), b: g4); |
470 | |
471 | for( i = 1; i <= m; i++ ) |
472 | { |
473 | v_float32x4 x0, x1; |
474 | sptr0 = srow[m+i], sptr1 = srow[m-i]; |
475 | g4 = v_load(ptr: simd_kernel + i*4); |
476 | x0 = v_add(a: v_load(ptr: sptr0 + x), b: v_load(ptr: sptr1 + x)); |
477 | x1 = v_add(a: v_load(ptr: sptr0 + x + 4), b: v_load(ptr: sptr1 + x + 4)); |
478 | s0 = v_muladd(a: x0, b: g4, c: s0); |
479 | s1 = v_muladd(a: x1, b: g4, c: s1); |
480 | x0 = v_add(a: v_load(ptr: sptr0 + x + 8), b: v_load(ptr: sptr1 + x + 8)); |
481 | x1 = v_add(a: v_load(ptr: sptr0 + x + 12), b: v_load(ptr: sptr1 + x + 12)); |
482 | s2 = v_muladd(a: x0, b: g4, c: s2); |
483 | s3 = v_muladd(a: x1, b: g4, c: s3); |
484 | } |
485 | |
486 | v_store(ptr: vsum + x, a: s0); |
487 | v_store(ptr: vsum + x + 4, a: s1); |
488 | v_store(ptr: vsum + x + 8, a: s2); |
489 | v_store(ptr: vsum + x + 12, a: s3); |
490 | } |
491 | |
492 | for( ; x <= width*5 - 4; x += 4 ) |
493 | { |
494 | const float *sptr0 = srow[m], *sptr1; |
495 | v_float32x4 g4 = v_load(ptr: simd_kernel); |
496 | v_float32x4 s0 = v_mul(a: v_load(ptr: sptr0 + x), b: g4); |
497 | |
498 | for( i = 1; i <= m; i++ ) |
499 | { |
500 | sptr0 = srow[m+i], sptr1 = srow[m-i]; |
501 | g4 = v_load(ptr: simd_kernel + i*4); |
502 | v_float32x4 x0 = v_add(a: v_load(ptr: sptr0 + x), b: v_load(ptr: sptr1 + x)); |
503 | s0 = v_muladd(a: x0, b: g4, c: s0); |
504 | } |
505 | v_store(ptr: vsum + x, a: s0); |
506 | } |
507 | } |
508 | #endif |
509 | for( ; x < width*5; x++ ) |
510 | { |
511 | float s0 = srow[m][x]*kernel[0]; |
512 | for( i = 1; i <= m; i++ ) |
513 | s0 += (srow[m+i][x] + srow[m-i][x])*kernel[i]; |
514 | vsum[x] = s0; |
515 | } |
516 | |
517 | // update borders |
518 | for( x = 0; x < m*5; x++ ) |
519 | { |
520 | vsum[-1-x] = vsum[4-x]; |
521 | vsum[width*5+x] = vsum[width*5+x-5]; |
522 | } |
523 | |
524 | // horizontal blur |
525 | x = 0; |
526 | #if CV_SIMD128 |
527 | { |
528 | for( ; x <= width*5 - 8; x += 8 ) |
529 | { |
530 | v_float32x4 g4 = v_load(ptr: simd_kernel); |
531 | v_float32x4 s0 = v_mul(a: v_load(ptr: vsum + x), b: g4); |
532 | v_float32x4 s1 = v_mul(a: v_load(ptr: vsum + x + 4), b: g4); |
533 | |
534 | for( i = 1; i <= m; i++ ) |
535 | { |
536 | g4 = v_load(ptr: simd_kernel + i*4); |
537 | v_float32x4 x0 = v_add(a: v_load(ptr: vsum + x - i * 5), b: v_load(ptr: vsum + x + i * 5)); |
538 | v_float32x4 x1 = v_add(a: v_load(ptr: vsum + x - i * 5 + 4), b: v_load(ptr: vsum + x + i * 5 + 4)); |
539 | s0 = v_muladd(a: x0, b: g4, c: s0); |
540 | s1 = v_muladd(a: x1, b: g4, c: s1); |
541 | } |
542 | |
543 | v_store(ptr: hsum + x, a: s0); |
544 | v_store(ptr: hsum + x + 4, a: s1); |
545 | } |
546 | } |
547 | #endif |
548 | for( ; x < width*5; x++ ) |
549 | { |
550 | float sum = vsum[x]*kernel[0]; |
551 | for( i = 1; i <= m; i++ ) |
552 | sum += kernel[i]*(vsum[x - i*5] + vsum[x + i*5]); |
553 | hsum[x] = sum; |
554 | } |
555 | |
556 | for( x = 0; x < width; x++ ) |
557 | { |
558 | g11 = hsum[x*5]; |
559 | g12 = hsum[x*5+1]; |
560 | g22 = hsum[x*5+2]; |
561 | h1 = hsum[x*5+3]; |
562 | h2 = hsum[x*5+4]; |
563 | |
564 | double idet = 1./(g11*g22 - g12*g12 + 1e-3); |
565 | |
566 | flow[x*2] = (float)((g11*h2-g12*h1)*idet); |
567 | flow[x*2+1] = (float)((g22*h1-g12*h2)*idet); |
568 | } |
569 | |
570 | y1 = y == height - 1 ? height : y - block_size; |
571 | if( update_matrices && (y1 == height || y1 >= y0 + min_update_stripe) ) |
572 | { |
573 | FarnebackUpdateMatrices( _R0, _R1, _flow, matM, y0: y0, y1: y1 ); |
574 | y0 = y1; |
575 | } |
576 | } |
577 | } |
578 | |
579 | } |
580 | |
581 | namespace cv |
582 | { |
583 | namespace |
584 | { |
585 | class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow |
586 | { |
587 | public: |
588 | FarnebackOpticalFlowImpl(int numLevels=5, double pyrScale=0.5, bool fastPyramids=false, int winSize=13, |
589 | int numIters=10, int polyN=5, double polySigma=1.1, int flags=0) : |
590 | numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize), |
591 | numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags) |
592 | { |
593 | } |
594 | |
595 | virtual int getNumLevels() const CV_OVERRIDE { return numLevels_; } |
596 | virtual void setNumLevels(int numLevels) CV_OVERRIDE { numLevels_ = numLevels; } |
597 | |
598 | virtual double getPyrScale() const CV_OVERRIDE { return pyrScale_; } |
599 | virtual void setPyrScale(double pyrScale) CV_OVERRIDE { pyrScale_ = pyrScale; } |
600 | |
601 | virtual bool getFastPyramids() const CV_OVERRIDE { return fastPyramids_; } |
602 | virtual void setFastPyramids(bool fastPyramids) CV_OVERRIDE { fastPyramids_ = fastPyramids; } |
603 | |
604 | virtual int getWinSize() const CV_OVERRIDE { return winSize_; } |
605 | virtual void setWinSize(int winSize) CV_OVERRIDE { winSize_ = winSize; } |
606 | |
607 | virtual int getNumIters() const CV_OVERRIDE { return numIters_; } |
608 | virtual void setNumIters(int numIters) CV_OVERRIDE { numIters_ = numIters; } |
609 | |
610 | virtual int getPolyN() const CV_OVERRIDE { return polyN_; } |
611 | virtual void setPolyN(int polyN) CV_OVERRIDE { polyN_ = polyN; } |
612 | |
613 | virtual double getPolySigma() const CV_OVERRIDE { return polySigma_; } |
614 | virtual void setPolySigma(double polySigma) CV_OVERRIDE { polySigma_ = polySigma; } |
615 | |
616 | virtual int getFlags() const CV_OVERRIDE { return flags_; } |
617 | virtual void setFlags(int flags) CV_OVERRIDE { flags_ = flags; } |
618 | |
619 | virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; |
620 | |
621 | virtual String getDefaultName() const CV_OVERRIDE { return "DenseOpticalFlow.FarnebackOpticalFlow"; } |
622 | |
623 | private: |
624 | int numLevels_; |
625 | double pyrScale_; |
626 | bool fastPyramids_; |
627 | int winSize_; |
628 | int numIters_; |
629 | int polyN_; |
630 | double polySigma_; |
631 | int flags_; |
632 | |
633 | #ifdef HAVE_OPENCL |
634 | bool operator ()(const UMat &frame0, const UMat &frame1, UMat &flowx, UMat &flowy) |
635 | { |
636 | CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); |
637 | CV_Assert(frame0.size() == frame1.size()); |
638 | CV_Assert(polyN_ == 5 || polyN_ == 7); |
639 | CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6); |
640 | |
641 | const int min_size = 32; |
642 | |
643 | Size size = frame0.size(); |
644 | UMat prevFlowX, prevFlowY, curFlowX, curFlowY; |
645 | |
646 | UMat flowx0 = flowx; |
647 | UMat flowy0 = flowy; |
648 | |
649 | // Crop unnecessary levels |
650 | double scale = 1; |
651 | int numLevelsCropped = 0; |
652 | for (; numLevelsCropped < numLevels_; numLevelsCropped++) |
653 | { |
654 | scale *= pyrScale_; |
655 | if (size.width*scale < min_size || size.height*scale < min_size) |
656 | break; |
657 | } |
658 | |
659 | frame0.convertTo(m: frames_[0], CV_32F); |
660 | frame1.convertTo(m: frames_[1], CV_32F); |
661 | |
662 | if (fastPyramids_) |
663 | { |
664 | // Build Gaussian pyramids using pyrDown() |
665 | pyramid0_.resize(new_size: numLevelsCropped + 1); |
666 | pyramid1_.resize(new_size: numLevelsCropped + 1); |
667 | pyramid0_[0] = frames_[0]; |
668 | pyramid1_[0] = frames_[1]; |
669 | for (int i = 1; i <= numLevelsCropped; ++i) |
670 | { |
671 | pyrDown(src: pyramid0_[i - 1], dst: pyramid0_[i]); |
672 | pyrDown(src: pyramid1_[i - 1], dst: pyramid1_[i]); |
673 | } |
674 | } |
675 | |
676 | setPolynomialExpansionConsts(n: polyN_, sigma: polySigma_); |
677 | |
678 | for (int k = numLevelsCropped; k >= 0; k--) |
679 | { |
680 | scale = 1; |
681 | for (int i = 0; i < k; i++) |
682 | scale *= pyrScale_; |
683 | |
684 | double sigma = (1./scale - 1) * 0.5; |
685 | int smoothSize = cvRound(value: sigma*5) | 1; |
686 | smoothSize = std::max(a: smoothSize, b: 3); |
687 | |
688 | int width = cvRound(value: size.width*scale); |
689 | int height = cvRound(value: size.height*scale); |
690 | |
691 | if (fastPyramids_) |
692 | { |
693 | width = pyramid0_[k].cols; |
694 | height = pyramid0_[k].rows; |
695 | } |
696 | |
697 | if (k > 0) |
698 | { |
699 | curFlowX.create(rows: height, cols: width, CV_32F); |
700 | curFlowY.create(rows: height, cols: width, CV_32F); |
701 | } |
702 | else |
703 | { |
704 | curFlowX = flowx0; |
705 | curFlowY = flowy0; |
706 | } |
707 | |
708 | if (prevFlowX.empty()) |
709 | { |
710 | if (flags_ & cv::OPTFLOW_USE_INITIAL_FLOW) |
711 | { |
712 | resize(src: flowx0, dst: curFlowX, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_LINEAR); |
713 | resize(src: flowy0, dst: curFlowY, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_LINEAR); |
714 | multiply(src1: scale, src2: curFlowX, dst: curFlowX); |
715 | multiply(src1: scale, src2: curFlowY, dst: curFlowY); |
716 | } |
717 | else |
718 | { |
719 | curFlowX.setTo(value: 0); |
720 | curFlowY.setTo(value: 0); |
721 | } |
722 | } |
723 | else |
724 | { |
725 | resize(src: prevFlowX, dst: curFlowX, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_LINEAR); |
726 | resize(src: prevFlowY, dst: curFlowY, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_LINEAR); |
727 | multiply(src1: 1./pyrScale_, src2: curFlowX, dst: curFlowX); |
728 | multiply(src1: 1./pyrScale_, src2: curFlowY, dst: curFlowY); |
729 | } |
730 | |
731 | UMat M = allocMatFromBuf(rows: 5*height, cols: width, CV_32F, mat&: M_); |
732 | UMat bufM = allocMatFromBuf(rows: 5*height, cols: width, CV_32F, mat&: bufM_); |
733 | UMat R[2] = |
734 | { |
735 | allocMatFromBuf(rows: 5*height, cols: width, CV_32F, mat&: R_[0]), |
736 | allocMatFromBuf(rows: 5*height, cols: width, CV_32F, mat&: R_[1]) |
737 | }; |
738 | |
739 | if (fastPyramids_) |
740 | { |
741 | if (!polynomialExpansionOcl(src: pyramid0_[k], dst&: R[0])) |
742 | return false; |
743 | if (!polynomialExpansionOcl(src: pyramid1_[k], dst&: R[1])) |
744 | return false; |
745 | } |
746 | else |
747 | { |
748 | UMat blurredFrame[2] = |
749 | { |
750 | allocMatFromBuf(rows: size.height, cols: size.width, CV_32F, mat&: blurredFrame_[0]), |
751 | allocMatFromBuf(rows: size.height, cols: size.width, CV_32F, mat&: blurredFrame_[1]) |
752 | }; |
753 | UMat pyrLevel[2] = |
754 | { |
755 | allocMatFromBuf(rows: height, cols: width, CV_32F, mat&: pyrLevel_[0]), |
756 | allocMatFromBuf(rows: height, cols: width, CV_32F, mat&: pyrLevel_[1]) |
757 | }; |
758 | |
759 | setGaussianBlurKernel(smoothSize, sigma); |
760 | |
761 | for (int i = 0; i < 2; i++) |
762 | { |
763 | if (!gaussianBlurOcl(src: frames_[i], ksizeHalf: smoothSize/2, dst&: blurredFrame[i])) |
764 | return false; |
765 | resize(src: blurredFrame[i], dst: pyrLevel[i], dsize: Size(width, height), fx: INTER_LINEAR); |
766 | if (!polynomialExpansionOcl(src: pyrLevel[i], dst&: R[i])) |
767 | return false; |
768 | } |
769 | } |
770 | |
771 | if (!updateMatricesOcl(flowx: curFlowX, flowy: curFlowY, R0: R[0], R1: R[1], M)) |
772 | return false; |
773 | |
774 | if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) |
775 | setGaussianBlurKernel(smoothSize: winSize_, sigma: winSize_/2*0.3f); |
776 | for (int i = 0; i < numIters_; i++) |
777 | { |
778 | if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) |
779 | { |
780 | if (!updateFlow_gaussianBlur(R0: R[0], R1: R[1], flowx&: curFlowX, flowy&: curFlowY, M, bufM, blockSize: winSize_, updateMatrices: i < numIters_-1)) |
781 | return false; |
782 | } |
783 | else |
784 | { |
785 | if (!updateFlow_boxFilter(R0: R[0], R1: R[1], flowx&: curFlowX, flowy&: curFlowY, M, bufM, blockSize: winSize_, updateMatrices: i < numIters_-1)) |
786 | return false; |
787 | } |
788 | } |
789 | |
790 | prevFlowX = curFlowX; |
791 | prevFlowY = curFlowY; |
792 | } |
793 | |
794 | flowx = curFlowX; |
795 | flowy = curFlowY; |
796 | return true; |
797 | } |
798 | virtual void collectGarbage() CV_OVERRIDE { |
799 | releaseMemory(); |
800 | } |
801 | void releaseMemory() |
802 | { |
803 | frames_[0].release(); |
804 | frames_[1].release(); |
805 | pyrLevel_[0].release(); |
806 | pyrLevel_[1].release(); |
807 | M_.release(); |
808 | bufM_.release(); |
809 | R_[0].release(); |
810 | R_[1].release(); |
811 | blurredFrame_[0].release(); |
812 | blurredFrame_[1].release(); |
813 | pyramid0_.clear(); |
814 | pyramid1_.clear(); |
815 | } |
816 | private: |
817 | UMat m_g; |
818 | UMat m_xg; |
819 | UMat m_xxg; |
820 | |
821 | double m_igd[4]; |
822 | float m_ig[4]; |
823 | void setPolynomialExpansionConsts(int n, double sigma) |
824 | { |
825 | std::vector<float> buf(n*6 + 3); |
826 | float* g = &buf[0] + n; |
827 | float* xg = g + n*2 + 1; |
828 | float* xxg = xg + n*2 + 1; |
829 | |
830 | FarnebackPrepareGaussian(n, sigma, g, xg, xxg, ig11&: m_igd[0], ig03&: m_igd[1], ig33&: m_igd[2], ig55&: m_igd[3]); |
831 | |
832 | cv::Mat t_g(1, n + 1, CV_32FC1, g); t_g.copyTo(m: m_g); |
833 | cv::Mat t_xg(1, n + 1, CV_32FC1, xg); t_xg.copyTo(m: m_xg); |
834 | cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg); t_xxg.copyTo(m: m_xxg); |
835 | |
836 | m_ig[0] = static_cast<float>(m_igd[0]); |
837 | m_ig[1] = static_cast<float>(m_igd[1]); |
838 | m_ig[2] = static_cast<float>(m_igd[2]); |
839 | m_ig[3] = static_cast<float>(m_igd[3]); |
840 | } |
841 | private: |
842 | UMat m_gKer; |
843 | inline void setGaussianBlurKernel(int smoothSize, double sigma) |
844 | { |
845 | Mat g = getGaussianKernel(ksize: smoothSize, sigma, CV_32F); |
846 | Mat gKer(1, smoothSize/2 + 1, CV_32FC1, g.ptr<float>(y: smoothSize/2)); |
847 | gKer.copyTo(m: m_gKer); |
848 | } |
849 | private: |
850 | UMat frames_[2]; |
851 | UMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; |
852 | std::vector<UMat> pyramid0_, pyramid1_; |
853 | |
854 | static UMat allocMatFromBuf(int rows, int cols, int type, UMat &mat) |
855 | { |
856 | if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) |
857 | return mat(Rect(0, 0, cols, rows)); |
858 | return mat = UMat(rows, cols, type); |
859 | } |
860 | private: |
861 | #define DIVUP(total, grain) (((total) + (grain) - 1) / (grain)) |
862 | |
863 | bool gaussianBlurOcl(const UMat &src, int ksizeHalf, UMat &dst) |
864 | { |
865 | #ifdef SMALL_LOCALSIZE |
866 | size_t localsize[2] = { 128, 1}; |
867 | #else |
868 | size_t localsize[2] = { 256, 1}; |
869 | #endif |
870 | size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows}; |
871 | int smem_size = (int)((localsize[0] + 2*ksizeHalf) * sizeof(float)); |
872 | ocl::Kernel kernel; |
873 | if (!kernel.create("gaussianBlur", cv::ocl::video::optical_flow_farneback_oclsrc, "")) |
874 | return false; |
875 | |
876 | CV_Assert(dst.size() == src.size()); |
877 | int idxArg = 0; |
878 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: src)); |
879 | idxArg = kernel.set(i: idxArg, value: (int)(src.step / src.elemSize())); |
880 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst)); |
881 | idxArg = kernel.set(i: idxArg, value: (int)(dst.step / dst.elemSize())); |
882 | idxArg = kernel.set(i: idxArg, value: dst.rows); |
883 | idxArg = kernel.set(i: idxArg, value: dst.cols); |
884 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: m_gKer)); |
885 | idxArg = kernel.set(i: idxArg, value: (int)ksizeHalf); |
886 | kernel.set(i: idxArg, value: (void *)NULL, sz: smem_size); |
887 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
888 | } |
889 | bool gaussianBlur5Ocl(const UMat &src, int ksizeHalf, UMat &dst) |
890 | { |
891 | int height = src.rows / 5; |
892 | #ifdef SMALL_LOCALSIZE |
893 | size_t localsize[2] = { 128, 1}; |
894 | #else |
895 | size_t localsize[2] = { 256, 1}; |
896 | #endif |
897 | size_t globalsize[2] = { (size_t)src.cols, (size_t)height}; |
898 | int smem_size = (int)((localsize[0] + 2*ksizeHalf) * 5 * sizeof(float)); |
899 | ocl::Kernel kernel; |
900 | if (!kernel.create("gaussianBlur5", cv::ocl::video::optical_flow_farneback_oclsrc, "")) |
901 | return false; |
902 | |
903 | int idxArg = 0; |
904 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: src)); |
905 | idxArg = kernel.set(i: idxArg, value: (int)(src.step / src.elemSize())); |
906 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst)); |
907 | idxArg = kernel.set(i: idxArg, value: (int)(dst.step / dst.elemSize())); |
908 | idxArg = kernel.set(i: idxArg, value: height); |
909 | idxArg = kernel.set(i: idxArg, value: src.cols); |
910 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: m_gKer)); |
911 | idxArg = kernel.set(i: idxArg, value: (int)ksizeHalf); |
912 | kernel.set(i: idxArg, value: (void *)NULL, sz: smem_size); |
913 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
914 | } |
915 | bool polynomialExpansionOcl(const UMat &src, UMat &dst) |
916 | { |
917 | #ifdef SMALL_LOCALSIZE |
918 | size_t localsize[2] = { 128, 1}; |
919 | #else |
920 | size_t localsize[2] = { 256, 1}; |
921 | #endif |
922 | size_t globalsize[2] = { DIVUP((size_t)src.cols, localsize[0] - 2*polyN_) * localsize[0], (size_t)src.rows}; |
923 | |
924 | #if 0 |
925 | const cv::ocl::Device &device = cv::ocl::Device::getDefault(); |
926 | bool useDouble = (0 != device.doubleFPConfig()); |
927 | |
928 | cv::String build_options = cv::format("-D polyN=%d -D USE_DOUBLE=%d", polyN_, useDouble ? 1 : 0); |
929 | #else |
930 | cv::String build_options = cv::format(fmt: "-D polyN=%d", polyN_); |
931 | #endif |
932 | ocl::Kernel kernel; |
933 | if (!kernel.create("polynomialExpansion", cv::ocl::video::optical_flow_farneback_oclsrc, build_options)) |
934 | return false; |
935 | |
936 | int smem_size = (int)(3 * localsize[0] * sizeof(float)); |
937 | int idxArg = 0; |
938 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: src)); |
939 | idxArg = kernel.set(i: idxArg, value: (int)(src.step / src.elemSize())); |
940 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst)); |
941 | idxArg = kernel.set(i: idxArg, value: (int)(dst.step / dst.elemSize())); |
942 | idxArg = kernel.set(i: idxArg, value: src.rows); |
943 | idxArg = kernel.set(i: idxArg, value: src.cols); |
944 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: m_g)); |
945 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: m_xg)); |
946 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: m_xxg)); |
947 | idxArg = kernel.set(i: idxArg, value: (void *)NULL, sz: smem_size); |
948 | kernel.set(i: idxArg, value: (void *)m_ig, sz: 4 * sizeof(float)); |
949 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
950 | } |
951 | bool boxFilter5Ocl(const UMat &src, int ksizeHalf, UMat &dst) |
952 | { |
953 | int height = src.rows / 5; |
954 | #ifdef SMALL_LOCALSIZE |
955 | size_t localsize[2] = { 128, 1}; |
956 | #else |
957 | size_t localsize[2] = { 256, 1}; |
958 | #endif |
959 | size_t globalsize[2] = { (size_t)src.cols, (size_t)height}; |
960 | |
961 | ocl::Kernel kernel; |
962 | if (!kernel.create("boxFilter5", cv::ocl::video::optical_flow_farneback_oclsrc, "")) |
963 | return false; |
964 | |
965 | int smem_size = (int)((localsize[0] + 2*ksizeHalf) * 5 * sizeof(float)); |
966 | |
967 | int idxArg = 0; |
968 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: src)); |
969 | idxArg = kernel.set(i: idxArg, value: (int)(src.step / src.elemSize())); |
970 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst)); |
971 | idxArg = kernel.set(i: idxArg, value: (int)(dst.step / dst.elemSize())); |
972 | idxArg = kernel.set(i: idxArg, value: height); |
973 | idxArg = kernel.set(i: idxArg, value: src.cols); |
974 | idxArg = kernel.set(i: idxArg, value: (int)ksizeHalf); |
975 | kernel.set(i: idxArg, value: (void *)NULL, sz: smem_size); |
976 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
977 | } |
978 | |
979 | bool updateFlowOcl(const UMat &M, UMat &flowx, UMat &flowy) |
980 | { |
981 | #ifdef SMALL_LOCALSIZE |
982 | size_t localsize[2] = { 32, 4}; |
983 | #else |
984 | size_t localsize[2] = { 32, 8}; |
985 | #endif |
986 | size_t globalsize[2] = { (size_t)flowx.cols, (size_t)flowx.rows}; |
987 | |
988 | ocl::Kernel kernel; |
989 | if (!kernel.create("updateFlow", cv::ocl::video::optical_flow_farneback_oclsrc, "")) |
990 | return false; |
991 | |
992 | int idxArg = 0; |
993 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: M)); |
994 | idxArg = kernel.set(i: idxArg, value: (int)(M.step / M.elemSize())); |
995 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: flowx)); |
996 | idxArg = kernel.set(i: idxArg, value: (int)(flowx.step / flowx.elemSize())); |
997 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: flowy)); |
998 | idxArg = kernel.set(i: idxArg, value: (int)(flowy.step / flowy.elemSize())); |
999 | idxArg = kernel.set(i: idxArg, value: (int)flowy.rows); |
1000 | kernel.set(i: idxArg, value: (int)flowy.cols); |
1001 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
1002 | } |
1003 | bool updateMatricesOcl(const UMat &flowx, const UMat &flowy, const UMat &R0, const UMat &R1, UMat &M) |
1004 | { |
1005 | #ifdef SMALL_LOCALSIZE |
1006 | size_t localsize[2] = { 32, 4}; |
1007 | #else |
1008 | size_t localsize[2] = { 32, 8}; |
1009 | #endif |
1010 | size_t globalsize[2] = { (size_t)flowx.cols, (size_t)flowx.rows}; |
1011 | |
1012 | ocl::Kernel kernel; |
1013 | if (!kernel.create("updateMatrices", cv::ocl::video::optical_flow_farneback_oclsrc, "")) |
1014 | return false; |
1015 | |
1016 | int idxArg = 0; |
1017 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: flowx)); |
1018 | idxArg = kernel.set(i: idxArg, value: (int)(flowx.step / flowx.elemSize())); |
1019 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: flowy)); |
1020 | idxArg = kernel.set(i: idxArg, value: (int)(flowy.step / flowy.elemSize())); |
1021 | idxArg = kernel.set(i: idxArg, value: (int)flowx.rows); |
1022 | idxArg = kernel.set(i: idxArg, value: (int)flowx.cols); |
1023 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: R0)); |
1024 | idxArg = kernel.set(i: idxArg, value: (int)(R0.step / R0.elemSize())); |
1025 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrReadOnly(m: R1)); |
1026 | idxArg = kernel.set(i: idxArg, value: (int)(R1.step / R1.elemSize())); |
1027 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: M)); |
1028 | kernel.set(i: idxArg, value: (int)(M.step / M.elemSize())); |
1029 | return kernel.run(dims: 2, globalsize, localsize, sync: false); |
1030 | } |
1031 | |
1032 | bool updateFlow_boxFilter( |
1033 | const UMat& R0, const UMat& R1, UMat& flowx, UMat &flowy, |
1034 | UMat& M, UMat &bufM, int blockSize, bool updateMatrices) |
1035 | { |
1036 | if (!boxFilter5Ocl(src: M, ksizeHalf: blockSize/2, dst&: bufM)) |
1037 | return false; |
1038 | swap(a&: M, b&: bufM); |
1039 | if (!updateFlowOcl(M, flowx, flowy)) |
1040 | return false; |
1041 | if (updateMatrices) |
1042 | if (!updateMatricesOcl(flowx, flowy, R0, R1, M)) |
1043 | return false; |
1044 | return true; |
1045 | } |
1046 | bool updateFlow_gaussianBlur( |
1047 | const UMat& R0, const UMat& R1, UMat& flowx, UMat& flowy, |
1048 | UMat& M, UMat &bufM, int blockSize, bool updateMatrices) |
1049 | { |
1050 | if (!gaussianBlur5Ocl(src: M, ksizeHalf: blockSize/2, dst&: bufM)) |
1051 | return false; |
1052 | swap(a&: M, b&: bufM); |
1053 | if (!updateFlowOcl(M, flowx, flowy)) |
1054 | return false; |
1055 | if (updateMatrices) |
1056 | if (!updateMatricesOcl(flowx, flowy, R0, R1, M)) |
1057 | return false; |
1058 | return true; |
1059 | } |
1060 | bool calc_ocl( InputArray _prev0, InputArray _next0, |
1061 | InputOutputArray _flow0) |
1062 | { |
1063 | if ((5 != polyN_) && (7 != polyN_)) |
1064 | return false; |
1065 | if (_next0.size() != _prev0.size()) |
1066 | return false; |
1067 | int typePrev = _prev0.type(); |
1068 | int typeNext = _next0.type(); |
1069 | if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext))) |
1070 | return false; |
1071 | |
1072 | std::vector<UMat> flowar; |
1073 | |
1074 | // If flag is set, check for integrity; if not set, allocate memory space |
1075 | if (flags_ & OPTFLOW_USE_INITIAL_FLOW) |
1076 | { |
1077 | if (_flow0.empty() || _flow0.size() != _prev0.size() || _flow0.channels() != 2 || |
1078 | _flow0.depth() != CV_32F) |
1079 | return false; |
1080 | split(m: _flow0, mv: flowar); |
1081 | } |
1082 | else |
1083 | { |
1084 | flowar.push_back(x: UMat(_prev0.size(), CV_32FC1)); |
1085 | flowar.push_back(x: UMat(_prev0.size(), CV_32FC1)); |
1086 | } |
1087 | if(!this->operator()(frame0: _prev0.getUMat(), frame1: _next0.getUMat(), flowx&: flowar[0], flowy&: flowar[1])){ |
1088 | return false; |
1089 | } |
1090 | merge(mv: flowar, dst: _flow0); |
1091 | return true; |
1092 | } |
1093 | #else // HAVE_OPENCL |
1094 | virtual void collectGarbage() CV_OVERRIDE {} |
1095 | #endif |
1096 | }; |
1097 | |
1098 | void FarnebackOpticalFlowImpl::calc(InputArray _prev0, InputArray _next0, |
1099 | InputOutputArray _flow0) |
1100 | { |
1101 | CV_INSTRUMENT_REGION(); |
1102 | |
1103 | CV_OCL_RUN(_flow0.isUMat() && |
1104 | ocl::Image2D::isFormatSupported(CV_32F, cn: 1, norm: false), |
1105 | calc_ocl(_prev0,_next0,_flow0)) |
1106 | Mat prev0 = _prev0.getMat(), next0 = _next0.getMat(); |
1107 | const int min_size = 32; |
1108 | const Mat* img[2] = { &prev0, &next0 }; |
1109 | |
1110 | int i, k; |
1111 | double scale; |
1112 | Mat prevFlow, flow, fimg; |
1113 | int levels = numLevels_; |
1114 | |
1115 | CV_Assert( prev0.size() == next0.size() && prev0.channels() == next0.channels() && |
1116 | prev0.channels() == 1 && pyrScale_ < 1 ); |
1117 | |
1118 | // If flag is set, check for integrity; if not set, allocate memory space |
1119 | if( flags_ & OPTFLOW_USE_INITIAL_FLOW ) |
1120 | CV_Assert( _flow0.size() == prev0.size() && _flow0.channels() == 2 && |
1121 | _flow0.depth() == CV_32F ); |
1122 | else |
1123 | _flow0.create( sz: prev0.size(), CV_32FC2 ); |
1124 | |
1125 | Mat flow0 = _flow0.getMat(); |
1126 | |
1127 | for( k = 0, scale = 1; k < levels; k++ ) |
1128 | { |
1129 | scale *= pyrScale_; |
1130 | if( prev0.cols*scale < min_size || prev0.rows*scale < min_size ) |
1131 | break; |
1132 | } |
1133 | |
1134 | levels = k; |
1135 | |
1136 | for( k = levels; k >= 0; k-- ) |
1137 | { |
1138 | for( i = 0, scale = 1; i < k; i++ ) |
1139 | scale *= pyrScale_; |
1140 | |
1141 | double sigma = (1./scale-1)*0.5; |
1142 | int smooth_sz = cvRound(value: sigma*5)|1; |
1143 | smooth_sz = std::max(a: smooth_sz, b: 3); |
1144 | |
1145 | int width = cvRound(value: prev0.cols*scale); |
1146 | int height = cvRound(value: prev0.rows*scale); |
1147 | |
1148 | if( k > 0 ) |
1149 | flow.create( rows: height, cols: width, CV_32FC2 ); |
1150 | else |
1151 | flow = flow0; |
1152 | |
1153 | if( prevFlow.empty() ) |
1154 | { |
1155 | if( flags_ & OPTFLOW_USE_INITIAL_FLOW ) |
1156 | { |
1157 | resize( src: flow0, dst: flow, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_AREA ); |
1158 | flow *= scale; |
1159 | } |
1160 | else |
1161 | flow = Mat::zeros( rows: height, cols: width, CV_32FC2 ); |
1162 | } |
1163 | else |
1164 | { |
1165 | resize( src: prevFlow, dst: flow, dsize: Size(width, height), fx: 0, fy: 0, interpolation: INTER_LINEAR ); |
1166 | flow *= 1./pyrScale_; |
1167 | } |
1168 | |
1169 | Mat R[2], I, M; |
1170 | for( i = 0; i < 2; i++ ) |
1171 | { |
1172 | img[i]->convertTo(m: fimg, CV_32F); |
1173 | GaussianBlur(src: fimg, dst: fimg, ksize: Size(smooth_sz, smooth_sz), sigmaX: sigma, sigmaY: sigma); |
1174 | resize( src: fimg, dst: I, dsize: Size(width, height), fx: INTER_LINEAR ); |
1175 | FarnebackPolyExp( src: I, dst&: R[i], n: polyN_, sigma: polySigma_ ); |
1176 | } |
1177 | |
1178 | FarnebackUpdateMatrices( R0: R[0], R1: R[1], flow: flow, matM&: M, y0: 0, y1: flow.rows ); |
1179 | |
1180 | for( i = 0; i < numIters_; i++ ) |
1181 | { |
1182 | if( flags_ & OPTFLOW_FARNEBACK_GAUSSIAN ) |
1183 | FarnebackUpdateFlow_GaussianBlur( R0: R[0], R1: R[1], flow&: flow, matM&: M, block_size: winSize_, update_matrices: i < numIters_ - 1 ); |
1184 | else |
1185 | FarnebackUpdateFlow_Blur( R0: R[0], R1: R[1], flow&: flow, matM&: M, block_size: winSize_, update_matrices: i < numIters_ - 1 ); |
1186 | } |
1187 | |
1188 | prevFlow = flow; |
1189 | } |
1190 | } |
1191 | } // namespace |
1192 | } // namespace cv |
1193 | |
1194 | void cv::calcOpticalFlowFarneback( InputArray _prev0, InputArray _next0, |
1195 | InputOutputArray _flow0, double pyr_scale, int levels, int winsize, |
1196 | int iterations, int poly_n, double poly_sigma, int flags ) |
1197 | { |
1198 | CV_INSTRUMENT_REGION(); |
1199 | |
1200 | Ptr<cv::FarnebackOpticalFlow> optflow; |
1201 | optflow = makePtr<FarnebackOpticalFlowImpl>(a1: levels,a1: pyr_scale,a1: false,a1: winsize,a1: iterations,a1: poly_n,a1: poly_sigma,a1: flags); |
1202 | optflow->calc(I0: _prev0,I1: _next0,flow: _flow0); |
1203 | } |
1204 | |
1205 | |
1206 | cv::Ptr<cv::FarnebackOpticalFlow> cv::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize, |
1207 | int numIters, int polyN, double polySigma, int flags) |
1208 | { |
1209 | return makePtr<FarnebackOpticalFlowImpl>(a1: numLevels, a1: pyrScale, a1: fastPyramids, a1: winSize, |
1210 | a1: numIters, a1: polyN, a1: polySigma, a1: flags); |
1211 | } |
1212 |
Definitions
- FarnebackPrepareGaussian
- FarnebackPolyExp
- FarnebackUpdateMatrices
- FarnebackUpdateFlow_Blur
- FarnebackUpdateFlow_GaussianBlur
- FarnebackOpticalFlowImpl
- FarnebackOpticalFlowImpl
- getNumLevels
- setNumLevels
- getPyrScale
- setPyrScale
- getFastPyramids
- setFastPyramids
- getWinSize
- setWinSize
- getNumIters
- setNumIters
- getPolyN
- setPolyN
- getPolySigma
- setPolySigma
- getFlags
- setFlags
- getDefaultName
- operator ()
- collectGarbage
- releaseMemory
- setPolynomialExpansionConsts
- setGaussianBlurKernel
- allocMatFromBuf
- gaussianBlurOcl
- gaussianBlur5Ocl
- polynomialExpansionOcl
- boxFilter5Ocl
- updateFlowOcl
- updateMatricesOcl
- updateFlow_boxFilter
- updateFlow_gaussianBlur
- calc_ocl
- calc
- calcOpticalFlowFarneback
Learn to use CMake with our Intro Training
Find out more