1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // Intel License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2000, Intel Corporation, all rights reserved. |
14 | // Copyright (C) 2014, Itseez Inc., all rights reserved. |
15 | // Third party copyrights are property of their respective owners. |
16 | // |
17 | // Redistribution and use in source and binary forms, with or without modification, |
18 | // are permitted provided that the following conditions are met: |
19 | // |
20 | // * Redistribution's of source code must retain the above copyright notice, |
21 | // this list of conditions and the following disclaimer. |
22 | // |
23 | // * Redistribution's in binary form must reproduce the above copyright notice, |
24 | // this list of conditions and the following disclaimer in the documentation |
25 | // and/or other materials provided with the distribution. |
26 | // |
27 | // * The name of Intel Corporation may not be used to endorse or promote products |
28 | // derived from this software without specific prior written permission. |
29 | // |
30 | // This software is provided by the copyright holders and contributors "as is" and |
31 | // any express or implied warranties, including, but not limited to, the implied |
32 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
33 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
34 | // indirect, incidental, special, exemplary, or consequential damages |
35 | // (including, but not limited to, procurement of substitute goods or services; |
36 | // loss of use, data, or profits; or business interruption) however caused |
37 | // and on any theory of liability, whether in contract, strict liability, |
38 | // or tort (including negligence or otherwise) arising in any way out of |
39 | // the use of this software, even if advised of the possibility of such damage. |
40 | // |
41 | //M*/ |
42 | |
43 | #include "precomp.hpp" |
44 | #include "opencl_kernels_imgproc.hpp" |
45 | #include "opencv2/core/hal/intrin.hpp" |
46 | #include <deque> |
47 | |
48 | #include "opencv2/core/openvx/ovx_defs.hpp" |
49 | |
50 | namespace cv |
51 | { |
52 | |
53 | #ifdef HAVE_IPP |
54 | static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low, float high, bool L2gradient, int aperture_size) |
55 | { |
56 | #ifdef HAVE_IPP_IW |
57 | CV_INSTRUMENT_REGION_IPP(); |
58 | |
59 | #if IPP_DISABLE_PERF_CANNY_MT |
60 | if(cv::getNumThreads()>1) |
61 | return false; |
62 | #endif |
63 | |
64 | ::ipp::IwiSize size(dst.cols, dst.rows); |
65 | IppDataType type = ippiGetDataType(depth: dst.depth()); |
66 | int channels = dst.channels(); |
67 | IppNormType norm = (L2gradient)?ippNormL2:ippNormL1; |
68 | |
69 | if(size.width <= 3 || size.height <= 3) |
70 | return false; |
71 | |
72 | if(channels != 1) |
73 | return false; |
74 | |
75 | if(type != ipp8u) |
76 | return false; |
77 | |
78 | if(src.empty()) |
79 | { |
80 | try |
81 | { |
82 | ::ipp::IwiImage iwSrcDx; |
83 | ::ipp::IwiImage iwSrcDy; |
84 | ::ipp::IwiImage iwDst; |
85 | |
86 | ippiGetImage(src: dx_, dst&: iwSrcDx); |
87 | ippiGetImage(src: dy_, dst&: iwSrcDy); |
88 | ippiGetImage(src: dst, dst&: iwDst); |
89 | |
90 | CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm)); |
91 | } |
92 | catch (const ::ipp::IwException &) |
93 | { |
94 | return false; |
95 | } |
96 | } |
97 | else |
98 | { |
99 | IppiMaskSize kernel; |
100 | |
101 | if(aperture_size == 3) |
102 | kernel = ippMskSize3x3; |
103 | else if(aperture_size == 5) |
104 | kernel = ippMskSize5x5; |
105 | else |
106 | return false; |
107 | |
108 | try |
109 | { |
110 | ::ipp::IwiImage iwSrc; |
111 | ::ipp::IwiImage iwDst; |
112 | |
113 | ippiGetImage(src, dst&: iwSrc); |
114 | ippiGetImage(src: dst, dst&: iwDst); |
115 | |
116 | CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl); |
117 | } |
118 | catch (const ::ipp::IwException &) |
119 | { |
120 | return false; |
121 | } |
122 | } |
123 | |
124 | return true; |
125 | #else |
126 | CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size); |
127 | return false; |
128 | #endif |
129 | } |
130 | #endif |
131 | |
132 | #ifdef HAVE_OPENCL |
133 | |
134 | template <bool useCustomDeriv> |
135 | static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh, |
136 | int aperture_size, bool L2gradient, int cn, const Size & size) |
137 | { |
138 | CV_INSTRUMENT_REGION_OPENCL(); |
139 | |
140 | UMat map; |
141 | |
142 | const ocl::Device &dev = ocl::Device::getDefault(); |
143 | int max_wg_size = (int)dev.maxWorkGroupSize(); |
144 | |
145 | int lSizeX = 32; |
146 | int lSizeY = max_wg_size / 32; |
147 | |
148 | if (lSizeY == 0) |
149 | { |
150 | lSizeX = 16; |
151 | lSizeY = max_wg_size / 16; |
152 | } |
153 | if (lSizeY == 0) |
154 | { |
155 | lSizeY = 1; |
156 | } |
157 | |
158 | if (aperture_size == 7) |
159 | { |
160 | low_thresh = low_thresh / 16.0f; |
161 | high_thresh = high_thresh / 16.0f; |
162 | } |
163 | |
164 | if (L2gradient) |
165 | { |
166 | low_thresh = std::min(a: 32767.0f, b: low_thresh); |
167 | high_thresh = std::min(a: 32767.0f, b: high_thresh); |
168 | |
169 | if (low_thresh > 0) |
170 | low_thresh *= low_thresh; |
171 | if (high_thresh > 0) |
172 | high_thresh *= high_thresh; |
173 | } |
174 | int low = cvFloor(value: low_thresh), high = cvFloor(value: high_thresh); |
175 | |
176 | if (!useCustomDeriv && |
177 | aperture_size == 3 && !_src.isSubmatrix()) |
178 | { |
179 | /* |
180 | stage1_with_sobel: |
181 | Sobel operator |
182 | Calc magnitudes |
183 | Non maxima suppression |
184 | Double thresholding |
185 | */ |
186 | char cvt[50]; |
187 | ocl::Kernel with_sobel("stage1_with_sobel" , ocl::imgproc::canny_oclsrc, |
188 | format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s" , |
189 | cn, ocl::memopTypeToStr(_src.depth()), |
190 | ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt, sizeof(cvt)), |
191 | ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)), |
192 | lSizeX, lSizeY, |
193 | L2gradient ? " -D L2GRAD" : "" )); |
194 | if (with_sobel.empty()) |
195 | return false; |
196 | |
197 | UMat src = _src.getUMat(); |
198 | map.create(size, CV_32S); |
199 | with_sobel.args(kernel_args: ocl::KernelArg::ReadOnly(m: src), |
200 | kernel_args: ocl::KernelArg::WriteOnlyNoSize(m: map), |
201 | kernel_args: (float) low, kernel_args: (float) high); |
202 | |
203 | size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }, |
204 | localsize[2] = { (size_t)lSizeX, (size_t)lSizeY }; |
205 | |
206 | if (!with_sobel.run(dims: 2, globalsize, localsize, sync: false)) |
207 | return false; |
208 | } |
209 | else |
210 | { |
211 | /* |
212 | stage1_without_sobel: |
213 | Calc magnitudes |
214 | Non maxima suppression |
215 | Double thresholding |
216 | */ |
217 | double scale = 1.0; |
218 | if (aperture_size == 7) |
219 | { |
220 | scale = 1 / 16.0; |
221 | } |
222 | |
223 | UMat dx, dy; |
224 | if (!useCustomDeriv) |
225 | { |
226 | Sobel(src: _src, dst: dx, CV_16S, dx: 1, dy: 0, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE); |
227 | Sobel(src: _src, dst: dy, CV_16S, dx: 0, dy: 1, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE); |
228 | } |
229 | else |
230 | { |
231 | dx = dx_; |
232 | dy = dy_; |
233 | } |
234 | |
235 | ocl::Kernel without_sobel("stage1_without_sobel" , ocl::imgproc::canny_oclsrc, |
236 | format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s" , |
237 | cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : "" )); |
238 | if (without_sobel.empty()) |
239 | return false; |
240 | |
241 | map.create(size, CV_32S); |
242 | without_sobel.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: dx), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: dy), |
243 | kernel_args: ocl::KernelArg::WriteOnly(m: map), |
244 | kernel_args: low, kernel_args: high); |
245 | |
246 | size_t globalsize[2] = { (size_t)size.width, (size_t)size.height }, |
247 | localsize[2] = { (size_t)lSizeX, (size_t)lSizeY }; |
248 | |
249 | if (!without_sobel.run(dims: 2, globalsize, localsize, sync: false)) |
250 | return false; |
251 | } |
252 | |
253 | int PIX_PER_WI = 8; |
254 | /* |
255 | stage2: |
256 | hysteresis (add weak edges if they are connected with strong edges) |
257 | */ |
258 | |
259 | int sizey = lSizeY / PIX_PER_WI; |
260 | if (sizey == 0) |
261 | sizey = 1; |
262 | |
263 | size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey }; |
264 | |
265 | ocl::Kernel edgesHysteresis("stage2_hysteresis" , ocl::imgproc::canny_oclsrc, |
266 | format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d" , |
267 | PIX_PER_WI, lSizeX, sizey)); |
268 | |
269 | if (edgesHysteresis.empty()) |
270 | return false; |
271 | |
272 | edgesHysteresis.args(kernel_args: ocl::KernelArg::ReadWrite(m: map)); |
273 | if (!edgesHysteresis.run(dims: 2, globalsize, localsize, sync: false)) |
274 | return false; |
275 | |
276 | // get edges |
277 | |
278 | ocl::Kernel getEdgesKernel("getEdges" , ocl::imgproc::canny_oclsrc, |
279 | format("-D GET_EDGES -D PIX_PER_WI=%d" , PIX_PER_WI)); |
280 | if (getEdgesKernel.empty()) |
281 | return false; |
282 | |
283 | _dst.create(sz: size, CV_8UC1); |
284 | UMat dst = _dst.getUMat(); |
285 | |
286 | getEdgesKernel.args(kernel_args: ocl::KernelArg::ReadOnly(m: map), kernel_args: ocl::KernelArg::WriteOnlyNoSize(m: dst)); |
287 | |
288 | return getEdgesKernel.run(dims: 2, globalsize, NULL, sync: false); |
289 | } |
290 | |
291 | #endif |
292 | |
293 | #define CANNY_PUSH(map, stack) *map = 2, stack.push_back(map) |
294 | |
295 | #define CANNY_CHECK(m, high, map, stack) \ |
296 | if (m > high) \ |
297 | CANNY_PUSH(map, stack); \ |
298 | else \ |
299 | *map = 0 |
300 | |
301 | class parallelCanny : public ParallelLoopBody |
302 | { |
303 | public: |
304 | parallelCanny(const Mat &_src, Mat &_map, std::deque<uchar*> &borderPeaksParallel, |
305 | int _low, int _high, int _aperture_size, bool _L2gradient) : |
306 | src(_src), src2(_src), map(_map), _borderPeaksParallel(borderPeaksParallel), |
307 | low(_low), high(_high), aperture_size(_aperture_size), L2gradient(_L2gradient) |
308 | { |
309 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
310 | for(int i = 0; i < VTraits<v_int8>::vlanes(); ++i) |
311 | { |
312 | smask[i] = 0; |
313 | smask[i + VTraits<v_int8>::vlanes()] = (schar)-1; |
314 | } |
315 | if (true) |
316 | _map.create(rows: src.rows + 2, cols: (int)alignSize(sz: (size_t)(src.cols + CV_SIMD_WIDTH + 1), CV_SIMD_WIDTH), CV_8UC1); |
317 | else |
318 | #endif |
319 | _map.create(rows: src.rows + 2, cols: src.cols + 2, CV_8UC1); |
320 | map = _map; |
321 | map.row(y: 0).setTo(value: 1); |
322 | map.row(y: src.rows + 1).setTo(value: 1); |
323 | mapstep = map.cols; |
324 | needGradient = true; |
325 | cn = src.channels(); |
326 | } |
327 | |
328 | parallelCanny(const Mat &_dx, const Mat &_dy, Mat &_map, std::deque<uchar*> &borderPeaksParallel, |
329 | int _low, int _high, bool _L2gradient) : |
330 | src(_dx), src2(_dy), map(_map), _borderPeaksParallel(borderPeaksParallel), |
331 | low(_low), high(_high), aperture_size(0), L2gradient(_L2gradient) |
332 | { |
333 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
334 | for(int i = 0; i < VTraits<v_int8>::vlanes(); ++i) |
335 | { |
336 | smask[i] = 0; |
337 | smask[i + VTraits<v_int8>::vlanes()] = (schar)-1; |
338 | } |
339 | if (true) |
340 | _map.create(rows: src.rows + 2, cols: (int)alignSize(sz: (size_t)(src.cols + CV_SIMD_WIDTH + 1), CV_SIMD_WIDTH), CV_8UC1); |
341 | else |
342 | #endif |
343 | _map.create(rows: src.rows + 2, cols: src.cols + 2, CV_8UC1); |
344 | map = _map; |
345 | map.row(y: 0).setTo(value: 1); |
346 | map.row(y: src.rows + 1).setTo(value: 1); |
347 | mapstep = map.cols; |
348 | needGradient = false; |
349 | cn = src.channels(); |
350 | } |
351 | |
352 | ~parallelCanny() {} |
353 | |
354 | parallelCanny& operator=(const parallelCanny&) { return *this; } |
355 | |
356 | void operator()(const Range &boundaries) const CV_OVERRIDE |
357 | { |
358 | CV_TRACE_FUNCTION(); |
359 | |
360 | CV_DbgAssert(cn > 0); |
361 | |
362 | Mat dx, dy; |
363 | AutoBuffer<short> dxMax(0), dyMax(0); |
364 | std::deque<uchar*> stack, borderPeaksLocal; |
365 | const int rowStart = max(a: 0, b: boundaries.start - 1), rowEnd = min(a: src.rows, b: boundaries.end + 1); |
366 | int *_mag_p, *_mag_a, *_mag_n; |
367 | short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL; |
368 | uchar *_pmap; |
369 | double scale = 1.0; |
370 | |
371 | CV_TRACE_REGION("gradient" ) |
372 | if(needGradient) |
373 | { |
374 | if (aperture_size == 7) |
375 | { |
376 | scale = 1 / 16.0; |
377 | } |
378 | Sobel(src: src.rowRange(startrow: rowStart, endrow: rowEnd), dst: dx, CV_16S, dx: 1, dy: 0, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE); |
379 | Sobel(src: src.rowRange(startrow: rowStart, endrow: rowEnd), dst: dy, CV_16S, dx: 0, dy: 1, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE); |
380 | } |
381 | else |
382 | { |
383 | dx = src.rowRange(startrow: rowStart, endrow: rowEnd); |
384 | dy = src2.rowRange(startrow: rowStart, endrow: rowEnd); |
385 | } |
386 | |
387 | CV_TRACE_REGION_NEXT("magnitude" ); |
388 | if(cn > 1) |
389 | { |
390 | dxMax.allocate(size: 2 * dx.cols); |
391 | dyMax.allocate(size: 2 * dy.cols); |
392 | _dx_a = dxMax.data(); |
393 | _dx_n = _dx_a + dx.cols; |
394 | _dy_a = dyMax.data(); |
395 | _dy_n = _dy_a + dy.cols; |
396 | } |
397 | |
398 | // _mag_p: previous row, _mag_a: actual row, _mag_n: next row |
399 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
400 | AutoBuffer<int> buffer(3 * (mapstep * cn + CV_SIMD_WIDTH)); |
401 | _mag_p = alignPtr(ptr: buffer.data() + 1, CV_SIMD_WIDTH); |
402 | _mag_a = alignPtr(ptr: _mag_p + mapstep * cn, CV_SIMD_WIDTH); |
403 | _mag_n = alignPtr(ptr: _mag_a + mapstep * cn, CV_SIMD_WIDTH); |
404 | #else |
405 | AutoBuffer<int> buffer(3 * (mapstep * cn)); |
406 | _mag_p = buffer.data() + 1; |
407 | _mag_a = _mag_p + mapstep * cn; |
408 | _mag_n = _mag_a + mapstep * cn; |
409 | #endif |
410 | |
411 | // For the first time when just 2 rows are filled and for left and right borders |
412 | if(rowStart == boundaries.start) |
413 | memset(s: _mag_n - 1, c: 0, n: mapstep * sizeof(int)); |
414 | else |
415 | _mag_n[src.cols] = _mag_n[-1] = 0; |
416 | |
417 | _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0; |
418 | |
419 | // calculate magnitude and angle of gradient, perform non-maxima suppression. |
420 | // fill the map with one of the following values: |
421 | // 0 - the pixel might belong to an edge |
422 | // 1 - the pixel can not belong to an edge |
423 | // 2 - the pixel does belong to an edge |
424 | for (int i = rowStart; i <= boundaries.end; ++i) |
425 | { |
426 | // Scroll the ring buffer |
427 | std::swap(a&: _mag_n, b&: _mag_a); |
428 | std::swap(a&: _mag_n, b&: _mag_p); |
429 | |
430 | if(i < rowEnd) |
431 | { |
432 | // Next row calculation |
433 | _dx = dx.ptr<short>(y: i - rowStart); |
434 | _dy = dy.ptr<short>(y: i - rowStart); |
435 | |
436 | if (L2gradient) |
437 | { |
438 | int j = 0, width = src.cols * cn; |
439 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
440 | for ( ; j <= width - VTraits<v_int16>::vlanes(); j += VTraits<v_int16>::vlanes()) |
441 | { |
442 | v_int16 v_dx = vx_load(ptr: (const short*)(_dx + j)); |
443 | v_int16 v_dy = vx_load(ptr: (const short*)(_dy + j)); |
444 | |
445 | v_int32 v_dxp_low, v_dxp_high; |
446 | v_int32 v_dyp_low, v_dyp_high; |
447 | v_expand(a: v_dx, b0&: v_dxp_low, b1&: v_dxp_high); |
448 | v_expand(a: v_dy, b0&: v_dyp_low, b1&: v_dyp_high); |
449 | |
450 | v_store_aligned(ptr: (int *)(_mag_n + j), a: v_add(a: v_mul(a: v_dxp_low, b: v_dxp_low), b: v_mul(a: v_dyp_low, b: v_dyp_low))); |
451 | v_store_aligned(ptr: (int *)(_mag_n + j + VTraits<v_int32>::vlanes()), a: v_add(a: v_mul(a: v_dxp_high, b: v_dxp_high), b: v_mul(a: v_dyp_high, b: v_dyp_high))); |
452 | } |
453 | #endif |
454 | for ( ; j < width; ++j) |
455 | _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; |
456 | } |
457 | else |
458 | { |
459 | int j = 0, width = src.cols * cn; |
460 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
461 | for(; j <= width - VTraits<v_int16>::vlanes(); j += VTraits<v_int16>::vlanes()) |
462 | { |
463 | v_int16 v_dx = vx_load(ptr: (const short *)(_dx + j)); |
464 | v_int16 v_dy = vx_load(ptr: (const short *)(_dy + j)); |
465 | |
466 | v_dx = v_reinterpret_as_s16(a: v_abs(x: v_dx)); |
467 | v_dy = v_reinterpret_as_s16(a: v_abs(x: v_dy)); |
468 | |
469 | v_int32 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh; |
470 | v_expand(a: v_dx, b0&: v_dx_ml, b1&: v_dx_mh); |
471 | v_expand(a: v_dy, b0&: v_dy_ml, b1&: v_dy_mh); |
472 | |
473 | v_store_aligned(ptr: (int *)(_mag_n + j), a: v_add(a: v_dx_ml, b: v_dy_ml)); |
474 | v_store_aligned(ptr: (int *)(_mag_n + j + VTraits<v_int32>::vlanes()), a: v_add(a: v_dx_mh, b: v_dy_mh)); |
475 | } |
476 | #endif |
477 | for ( ; j < width; ++j) |
478 | _mag_n[j] = std::abs(x: int(_dx[j])) + std::abs(x: int(_dy[j])); |
479 | } |
480 | |
481 | if(cn > 1) |
482 | { |
483 | std::swap(a&: _dx_n, b&: _dx_a); |
484 | std::swap(a&: _dy_n, b&: _dy_a); |
485 | |
486 | for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) |
487 | { |
488 | int maxIdx = jn; |
489 | for(int k = 1; k < cn; ++k) |
490 | if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k; |
491 | |
492 | _mag_n[j] = _mag_n[maxIdx]; |
493 | _dx_n[j] = _dx[maxIdx]; |
494 | _dy_n[j] = _dy[maxIdx]; |
495 | } |
496 | |
497 | _mag_n[src.cols] = 0; |
498 | } |
499 | |
500 | // at the very beginning we do not have a complete ring |
501 | // buffer of 3 magnitude rows for non-maxima suppression |
502 | if (i <= boundaries.start) |
503 | continue; |
504 | } |
505 | else |
506 | { |
507 | memset(s: _mag_n - 1, c: 0, n: mapstep * sizeof(int)); |
508 | |
509 | if(cn > 1) |
510 | { |
511 | std::swap(a&: _dx_n, b&: _dx_a); |
512 | std::swap(a&: _dy_n, b&: _dy_a); |
513 | } |
514 | } |
515 | |
516 | // From here actual src row is (i - 1) |
517 | // Set left and right border to 1 |
518 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
519 | if (true) |
520 | _pmap = map.ptr<uchar>(y: i) + CV_SIMD_WIDTH; |
521 | else |
522 | #endif |
523 | _pmap = map.ptr<uchar>(y: i) + 1; |
524 | |
525 | _pmap[src.cols] =_pmap[-1] = 1; |
526 | |
527 | if(cn == 1) |
528 | { |
529 | _dx = dx.ptr<short>(y: i - rowStart - 1); |
530 | _dy = dy.ptr<short>(y: i - rowStart - 1); |
531 | } |
532 | else |
533 | { |
534 | _dx = _dx_a; |
535 | _dy = _dy_a; |
536 | } |
537 | |
538 | const int TG22 = 13573; |
539 | int j = 0; |
540 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
541 | { |
542 | const v_int32 v_low = vx_setall_s32(v: low); |
543 | const v_int8 v_one = vx_setall_s8(v: 1); |
544 | |
545 | for (; j <= src.cols - VTraits<v_int8>::vlanes(); j += VTraits<v_int8>::vlanes()) |
546 | { |
547 | v_store_aligned(ptr: (signed char*)(_pmap + j), a: v_one); |
548 | v_int8 v_cmp = v_pack(a: v_pack(a: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j)), b: v_low), |
549 | b: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + VTraits<v_int32>::vlanes())), b: v_low)), |
550 | b: v_pack(a: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + 2 * VTraits<v_int32>::vlanes())), b: v_low), |
551 | b: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + 3 * VTraits<v_int32>::vlanes())), b: v_low))); |
552 | while (v_check_any(a: v_cmp)) |
553 | { |
554 | int l = v_scan_forward(a: v_cmp); |
555 | v_cmp = v_and(a: v_cmp, b: vx_load(ptr: smask + VTraits<v_int8>::vlanes() - 1 - l)); |
556 | int k = j + l; |
557 | |
558 | int m = _mag_a[k]; |
559 | short xs = _dx[k]; |
560 | short ys = _dy[k]; |
561 | int x = (int)std::abs(x: xs); |
562 | int y = (int)std::abs(x: ys) << 15; |
563 | |
564 | int tg22x = x * TG22; |
565 | |
566 | if (y < tg22x) |
567 | { |
568 | if (m > _mag_a[k - 1] && m >= _mag_a[k + 1]) |
569 | { |
570 | CANNY_CHECK(m, high, (_pmap+k), stack); |
571 | } |
572 | } |
573 | else |
574 | { |
575 | int tg67x = tg22x + (x << 16); |
576 | if (y > tg67x) |
577 | { |
578 | if (m > _mag_p[k] && m >= _mag_n[k]) |
579 | { |
580 | CANNY_CHECK(m, high, (_pmap+k), stack); |
581 | } |
582 | } |
583 | else |
584 | { |
585 | int s = (xs ^ ys) < 0 ? -1 : 1; |
586 | if(m > _mag_p[k - s] && m > _mag_n[k + s]) |
587 | { |
588 | CANNY_CHECK(m, high, (_pmap+k), stack); |
589 | } |
590 | } |
591 | } |
592 | } |
593 | } |
594 | } |
595 | #endif |
596 | for (; j < src.cols; j++) |
597 | { |
598 | int m = _mag_a[j]; |
599 | |
600 | if (m > low) |
601 | { |
602 | short xs = _dx[j]; |
603 | short ys = _dy[j]; |
604 | int x = (int)std::abs(x: xs); |
605 | int y = (int)std::abs(x: ys) << 15; |
606 | |
607 | int tg22x = x * TG22; |
608 | |
609 | if (y < tg22x) |
610 | { |
611 | if (m > _mag_a[j - 1] && m >= _mag_a[j + 1]) |
612 | { |
613 | CANNY_CHECK(m, high, (_pmap+j), stack); |
614 | continue; |
615 | } |
616 | } |
617 | else |
618 | { |
619 | int tg67x = tg22x + (x << 16); |
620 | if (y > tg67x) |
621 | { |
622 | if (m > _mag_p[j] && m >= _mag_n[j]) |
623 | { |
624 | CANNY_CHECK(m, high, (_pmap+j), stack); |
625 | continue; |
626 | } |
627 | } |
628 | else |
629 | { |
630 | int s = (xs ^ ys) < 0 ? -1 : 1; |
631 | if(m > _mag_p[j - s] && m > _mag_n[j + s]) |
632 | { |
633 | CANNY_CHECK(m, high, (_pmap+j), stack); |
634 | continue; |
635 | } |
636 | } |
637 | } |
638 | } |
639 | _pmap[j] = 1; |
640 | } |
641 | } |
642 | |
643 | // Not for first row of first slice or last row of last slice |
644 | uchar *pmapLower = (rowStart == 0) ? map.data : (map.data + (boundaries.start + 2) * mapstep); |
645 | uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower); |
646 | |
647 | // now track the edges (hysteresis thresholding) |
648 | CV_TRACE_REGION_NEXT("hysteresis" ); |
649 | while (!stack.empty()) |
650 | { |
651 | uchar *m = stack.back(); |
652 | stack.pop_back(); |
653 | |
654 | // Stops thresholding from expanding to other slices by sending pixels in the borders of each |
655 | // slice in a queue to be serially processed later. |
656 | if((unsigned)(m - pmapLower) < pmapDiff) |
657 | { |
658 | if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack); |
659 | if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack); |
660 | if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack); |
661 | if (!m[-1]) CANNY_PUSH((m-1), stack); |
662 | if (!m[1]) CANNY_PUSH((m+1), stack); |
663 | if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack); |
664 | if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack); |
665 | if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack); |
666 | } |
667 | else |
668 | { |
669 | borderPeaksLocal.push_back(x: m); |
670 | ptrdiff_t mapstep2 = m < pmapLower ? mapstep : -mapstep; |
671 | |
672 | if (!m[-1]) CANNY_PUSH((m-1), stack); |
673 | if (!m[1]) CANNY_PUSH((m+1), stack); |
674 | if (!m[mapstep2-1]) CANNY_PUSH((m+mapstep2-1), stack); |
675 | if (!m[mapstep2]) CANNY_PUSH((m+mapstep2), stack); |
676 | if (!m[mapstep2+1]) CANNY_PUSH((m+mapstep2+1), stack); |
677 | } |
678 | } |
679 | |
680 | if(!borderPeaksLocal.empty()) |
681 | { |
682 | AutoLock lock(mutex); |
683 | _borderPeaksParallel.insert(position: _borderPeaksParallel.end(), first: borderPeaksLocal.begin(), last: borderPeaksLocal.end()); |
684 | } |
685 | } |
686 | |
687 | private: |
688 | const Mat &src, &src2; |
689 | Mat ↦ |
690 | std::deque<uchar*> &_borderPeaksParallel; |
691 | int low, high, aperture_size; |
692 | bool L2gradient, needGradient; |
693 | ptrdiff_t mapstep; |
694 | int cn; |
695 | mutable Mutex mutex; |
696 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
697 | schar smask[2*VTraits<v_int8>::max_nlanes]; |
698 | #endif |
699 | }; |
700 | |
701 | class finalPass : public ParallelLoopBody |
702 | { |
703 | |
704 | public: |
705 | finalPass(const Mat &_map, Mat &_dst) : |
706 | map(_map), dst(_dst) |
707 | { |
708 | dst = _dst; |
709 | } |
710 | |
711 | ~finalPass() {} |
712 | |
713 | void operator()(const Range &boundaries) const CV_OVERRIDE |
714 | { |
715 | // the final pass, form the final image |
716 | for (int i = boundaries.start; i < boundaries.end; i++) |
717 | { |
718 | int j = 0; |
719 | uchar *pdst = dst.ptr<uchar>(y: i); |
720 | const uchar *pmap = map.ptr<uchar>(y: i + 1); |
721 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
722 | if (true) |
723 | pmap += CV_SIMD_WIDTH; |
724 | else |
725 | #endif |
726 | pmap += 1; |
727 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
728 | { |
729 | const v_uint8 v_zero = vx_setzero_u8(); |
730 | const v_uint8 v_ff = v_not(a: v_zero); |
731 | const v_uint8 v_two = vx_setall_u8(v: 2); |
732 | |
733 | for (; j <= dst.cols - VTraits<v_uint8>::vlanes(); j += VTraits<v_uint8>::vlanes()) |
734 | { |
735 | v_uint8 v_pmap = vx_load_aligned(ptr: (const unsigned char*)(pmap + j)); |
736 | v_pmap = v_select(mask: v_eq(a: v_pmap, b: v_two), a: v_ff, b: v_zero); |
737 | v_store(ptr: (pdst + j), a: v_pmap); |
738 | } |
739 | |
740 | if (j <= dst.cols - VTraits<v_uint8>::vlanes()/2) |
741 | { |
742 | v_uint8 v_pmap = vx_load_low(ptr: (const unsigned char*)(pmap + j)); |
743 | v_pmap = v_select(mask: v_eq(a: v_pmap, b: v_two), a: v_ff, b: v_zero); |
744 | v_store_low(ptr: (pdst + j), a: v_pmap); |
745 | j += VTraits<v_uint8>::vlanes()/2; |
746 | } |
747 | } |
748 | #endif |
749 | for (; j < dst.cols; j++) |
750 | { |
751 | pdst[j] = (uchar)-(pmap[j] >> 1); |
752 | } |
753 | } |
754 | } |
755 | |
756 | private: |
757 | const Mat ↦ |
758 | Mat &dst; |
759 | |
760 | finalPass(const finalPass&); // = delete |
761 | finalPass& operator=(const finalPass&); // = delete |
762 | }; |
763 | |
764 | #ifdef HAVE_OPENVX |
765 | namespace ovx { |
766 | template <> inline bool skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(int w, int h) { return w*h < 640 * 480; } |
767 | } |
768 | static bool openvx_canny(const Mat& src, Mat& dst, int loVal, int hiVal, int kSize, bool useL2) |
769 | { |
770 | using namespace ivx; |
771 | |
772 | Context context = ovx::getOpenVXContext(); |
773 | try |
774 | { |
775 | Image _src = Image::createFromHandle( |
776 | context, |
777 | Image::matTypeToFormat(src.type()), |
778 | Image::createAddressing(src), |
779 | src.data ); |
780 | Image _dst = Image::createFromHandle( |
781 | context, |
782 | Image::matTypeToFormat(dst.type()), |
783 | Image::createAddressing(dst), |
784 | dst.data ); |
785 | Threshold threshold = Threshold::createRange(context, VX_TYPE_UINT8, saturate_cast<uchar>(loVal), saturate_cast<uchar>(hiVal)); |
786 | |
787 | #if 0 |
788 | // the code below is disabled because vxuCannyEdgeDetector() |
789 | // ignores context attribute VX_CONTEXT_IMMEDIATE_BORDER |
790 | |
791 | // FIXME: may fail in multithread case |
792 | border_t prevBorder = context.immediateBorder(); |
793 | context.setImmediateBorder(VX_BORDER_REPLICATE); |
794 | IVX_CHECK_STATUS( vxuCannyEdgeDetector(context, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) ); |
795 | context.setImmediateBorder(prevBorder); |
796 | #else |
797 | // alternative code without vxuCannyEdgeDetector() |
798 | Graph graph = Graph::create(context); |
799 | ivx::Node node = ivx::Node(vxCannyEdgeDetectorNode(graph, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) ); |
800 | node.setBorder(VX_BORDER_REPLICATE); |
801 | graph.verify(); |
802 | graph.process(); |
803 | #endif |
804 | |
805 | #ifdef VX_VERSION_1_1 |
806 | _src.swapHandle(); |
807 | _dst.swapHandle(); |
808 | #endif |
809 | } |
810 | catch(const WrapperError& e) |
811 | { |
812 | VX_DbgThrow(e.what()); |
813 | } |
814 | catch(const RuntimeError& e) |
815 | { |
816 | VX_DbgThrow(e.what()); |
817 | } |
818 | |
819 | return true; |
820 | } |
821 | #endif // HAVE_OPENVX |
822 | |
823 | void Canny( InputArray _src, OutputArray _dst, |
824 | double low_thresh, double high_thresh, |
825 | int aperture_size, bool L2gradient ) |
826 | { |
827 | CV_INSTRUMENT_REGION(); |
828 | |
829 | CV_Assert( _src.depth() == CV_8U ); |
830 | |
831 | const Size size = _src.size(); |
832 | |
833 | // we don't support inplace parameters in case with RGB/BGR src |
834 | CV_Assert((_dst.getObj() != _src.getObj() || _src.type() == CV_8UC1) && "Inplace parameters are not supported" ); |
835 | |
836 | _dst.create(sz: size, CV_8U); |
837 | |
838 | // backward compatibility |
839 | const int CV_CANNY_L2_GRADIENT = (1 << 31); |
840 | if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT) |
841 | { |
842 | aperture_size &= ~CV_CANNY_L2_GRADIENT; |
843 | L2gradient = true; |
844 | } |
845 | |
846 | if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7))) |
847 | CV_Error(cv::Error::StsBadFlag, "Aperture size should be odd between 3 and 7" ); |
848 | |
849 | if (aperture_size == 7) |
850 | { |
851 | low_thresh = low_thresh / 16.0; |
852 | high_thresh = high_thresh / 16.0; |
853 | } |
854 | |
855 | if (low_thresh > high_thresh) |
856 | std::swap(a&: low_thresh, b&: high_thresh); |
857 | |
858 | CV_OCL_RUN(_dst.isUMat() && (_src.channels() == 1 || _src.channels() == 3), |
859 | ocl_Canny<false>(_src, dx_: UMat(), dy_: UMat(), _dst, low_thresh: (float)low_thresh, high_thresh: (float)high_thresh, aperture_size, L2gradient, cn: _src.channels(), size)) |
860 | |
861 | Mat src0 = _src.getMat(), dst = _dst.getMat(); |
862 | Mat src(src0.size(), src0.type(), src0.data, src0.step); |
863 | |
864 | CALL_HAL(canny, cv_hal_canny, src.data, src.step, dst.data, dst.step, src.cols, src.rows, src.channels(), |
865 | low_thresh, high_thresh, aperture_size, L2gradient); |
866 | |
867 | CV_OVX_RUN( |
868 | false && /* disabling due to accuracy issues */ |
869 | src.type() == CV_8UC1 && |
870 | !src.isSubmatrix() && |
871 | src.cols >= aperture_size && |
872 | src.rows >= aperture_size && |
873 | !ovx::skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(src.cols, src.rows), |
874 | openvx_canny( |
875 | src, |
876 | dst, |
877 | cvFloor(low_thresh), |
878 | cvFloor(high_thresh), |
879 | aperture_size, |
880 | L2gradient ) ) |
881 | |
882 | CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size)) |
883 | |
884 | if (L2gradient) |
885 | { |
886 | low_thresh = std::min(a: 32767.0, b: low_thresh); |
887 | high_thresh = std::min(a: 32767.0, b: high_thresh); |
888 | |
889 | if (low_thresh > 0) low_thresh *= low_thresh; |
890 | if (high_thresh > 0) high_thresh *= high_thresh; |
891 | } |
892 | int low = cvFloor(value: low_thresh); |
893 | int high = cvFloor(value: high_thresh); |
894 | |
895 | // If Scharr filter: aperture size is 3, ksize2 is 1 |
896 | int ksize2 = aperture_size < 0 ? 1 : aperture_size / 2; |
897 | // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead |
898 | int numOfThreads = std::max(a: 1, b: std::min(a: getNumThreads(), b: getNumberOfCPUs())); |
899 | // Make a fallback for pictures with too few rows. |
900 | int grainSize = src.rows / numOfThreads; |
901 | int minGrainSize = 2 * (ksize2 + 1); |
902 | if (grainSize < minGrainSize) |
903 | numOfThreads = std::max(a: 1, b: src.rows / minGrainSize); |
904 | |
905 | Mat map; |
906 | std::deque<uchar*> stack; |
907 | |
908 | parallel_for_(range: Range(0, src.rows), body: parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), nstripes: numOfThreads); |
909 | |
910 | CV_TRACE_REGION("global_hysteresis" ); |
911 | // now track the edges (hysteresis thresholding) |
912 | ptrdiff_t mapstep = map.cols; |
913 | |
914 | while (!stack.empty()) |
915 | { |
916 | uchar* m = stack.back(); |
917 | stack.pop_back(); |
918 | |
919 | if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack); |
920 | if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack); |
921 | if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack); |
922 | if (!m[-1]) CANNY_PUSH((m-1), stack); |
923 | if (!m[1]) CANNY_PUSH((m+1), stack); |
924 | if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack); |
925 | if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack); |
926 | if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack); |
927 | } |
928 | |
929 | CV_TRACE_REGION_NEXT("finalPass" ); |
930 | parallel_for_(range: Range(0, src.rows), body: finalPass(map, dst), nstripes: src.total()/(double)(1<<16)); |
931 | } |
932 | |
933 | void Canny( InputArray _dx, InputArray _dy, OutputArray _dst, |
934 | double low_thresh, double high_thresh, |
935 | bool L2gradient ) |
936 | { |
937 | CV_INSTRUMENT_REGION(); |
938 | |
939 | CV_Assert(_dx.dims() == 2); |
940 | CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3); |
941 | CV_Assert(_dy.type() == _dx.type()); |
942 | CV_Assert(_dx.sameSize(_dy)); |
943 | |
944 | if (low_thresh > high_thresh) |
945 | std::swap(a&: low_thresh, b&: high_thresh); |
946 | |
947 | const Size size = _dx.size(); |
948 | |
949 | CV_OCL_RUN(_dst.isUMat(), |
950 | ocl_Canny<true>(src: UMat(), dx_: _dx.getUMat(), dy_: _dy.getUMat(), _dst, low_thresh: (float)low_thresh, high_thresh: (float)high_thresh, aperture_size: 0, L2gradient, cn: _dx.channels(), size)) |
951 | |
952 | _dst.create(sz: size, CV_8U); |
953 | Mat dst = _dst.getMat(); |
954 | |
955 | Mat dx = _dx.getMat(); |
956 | Mat dy = _dy.getMat(); |
957 | |
958 | CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0)) |
959 | |
960 | if (L2gradient) |
961 | { |
962 | low_thresh = std::min(a: 32767.0, b: low_thresh); |
963 | high_thresh = std::min(a: 32767.0, b: high_thresh); |
964 | |
965 | if (low_thresh > 0) low_thresh *= low_thresh; |
966 | if (high_thresh > 0) high_thresh *= high_thresh; |
967 | } |
968 | |
969 | int low = cvFloor(value: low_thresh); |
970 | int high = cvFloor(value: high_thresh); |
971 | |
972 | std::deque<uchar*> stack; |
973 | Mat map; |
974 | |
975 | // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead |
976 | int numOfThreads = std::max(a: 1, b: std::min(a: getNumThreads(), b: getNumberOfCPUs())); |
977 | if (dx.rows / numOfThreads < 3) |
978 | numOfThreads = std::max(a: 1, b: dx.rows / 3); |
979 | |
980 | parallel_for_(range: Range(0, dx.rows), body: parallelCanny(dx, dy, map, stack, low, high, L2gradient), nstripes: numOfThreads); |
981 | |
982 | CV_TRACE_REGION("global_hysteresis" ) |
983 | // now track the edges (hysteresis thresholding) |
984 | ptrdiff_t mapstep = map.cols; |
985 | |
986 | while (!stack.empty()) |
987 | { |
988 | uchar* m = stack.back(); |
989 | stack.pop_back(); |
990 | |
991 | if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack); |
992 | if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack); |
993 | if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack); |
994 | if (!m[-1]) CANNY_PUSH((m-1), stack); |
995 | if (!m[1]) CANNY_PUSH((m+1), stack); |
996 | if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack); |
997 | if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack); |
998 | if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack); |
999 | } |
1000 | |
1001 | CV_TRACE_REGION_NEXT("finalPass" ); |
1002 | parallel_for_(range: Range(0, dx.rows), body: finalPass(map, dst), nstripes: dx.total()/(double)(1<<16)); |
1003 | } |
1004 | |
1005 | } // namespace cv |
1006 | |
1007 | void cvCanny( const CvArr* image, CvArr* edges, double threshold1, |
1008 | double threshold2, int aperture_size ) |
1009 | { |
1010 | cv::Mat src = cv::cvarrToMat(arr: image), dst = cv::cvarrToMat(arr: edges); |
1011 | CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U ); |
1012 | |
1013 | cv::Canny(src: src, dst: dst, low_thresh: threshold1, high_thresh: threshold2, aperture_size: aperture_size & 255, |
1014 | L2gradient: (aperture_size & CV_CANNY_L2_GRADIENT) != 0); |
1015 | } |
1016 | |
1017 | /* End of file. */ |
1018 | |