1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// Intel License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000, Intel Corporation, all rights reserved.
14// Copyright (C) 2014, Itseez Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// Redistribution and use in source and binary forms, with or without modification,
18// are permitted provided that the following conditions are met:
19//
20// * Redistribution's of source code must retain the above copyright notice,
21// this list of conditions and the following disclaimer.
22//
23// * Redistribution's in binary form must reproduce the above copyright notice,
24// this list of conditions and the following disclaimer in the documentation
25// and/or other materials provided with the distribution.
26//
27// * The name of Intel Corporation may not be used to endorse or promote products
28// derived from this software without specific prior written permission.
29//
30// This software is provided by the copyright holders and contributors "as is" and
31// any express or implied warranties, including, but not limited to, the implied
32// warranties of merchantability and fitness for a particular purpose are disclaimed.
33// In no event shall the Intel Corporation or contributors be liable for any direct,
34// indirect, incidental, special, exemplary, or consequential damages
35// (including, but not limited to, procurement of substitute goods or services;
36// loss of use, data, or profits; or business interruption) however caused
37// and on any theory of liability, whether in contract, strict liability,
38// or tort (including negligence or otherwise) arising in any way out of
39// the use of this software, even if advised of the possibility of such damage.
40//
41//M*/
42
43#include "precomp.hpp"
44#include "opencl_kernels_imgproc.hpp"
45#include "opencv2/core/hal/intrin.hpp"
46#include <deque>
47
48namespace cv
49{
50
51#ifdef HAVE_IPP
52static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low, float high, bool L2gradient, int aperture_size)
53{
54#ifdef HAVE_IPP_IW
55 CV_INSTRUMENT_REGION_IPP();
56
57#if IPP_DISABLE_PERF_CANNY_MT
58 if(cv::getNumThreads()>1)
59 return false;
60#endif
61
62 ::ipp::IwiSize size(dst.cols, dst.rows);
63 IppDataType type = ippiGetDataType(depth: dst.depth());
64 int channels = dst.channels();
65 IppNormType norm = (L2gradient)?ippNormL2:ippNormL1;
66
67 if(size.width <= 3 || size.height <= 3)
68 return false;
69
70 if(channels != 1)
71 return false;
72
73 if(type != ipp8u)
74 return false;
75
76 if(src.empty())
77 {
78 try
79 {
80 ::ipp::IwiImage iwSrcDx;
81 ::ipp::IwiImage iwSrcDy;
82 ::ipp::IwiImage iwDst;
83
84 ippiGetImage(src: dx_, dst&: iwSrcDx);
85 ippiGetImage(src: dy_, dst&: iwSrcDy);
86 ippiGetImage(src: dst, dst&: iwDst);
87
88 CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
89 }
90 catch (const ::ipp::IwException &)
91 {
92 return false;
93 }
94 }
95 else
96 {
97 IppiMaskSize kernel;
98
99 if(aperture_size == 3)
100 kernel = ippMskSize3x3;
101 else if(aperture_size == 5)
102 kernel = ippMskSize5x5;
103 else
104 return false;
105
106 try
107 {
108 ::ipp::IwiImage iwSrc;
109 ::ipp::IwiImage iwDst;
110
111 ippiGetImage(src, dst&: iwSrc);
112 ippiGetImage(src: dst, dst&: iwDst);
113
114 CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
115 }
116 catch (const ::ipp::IwException &)
117 {
118 return false;
119 }
120 }
121
122 return true;
123#else
124 CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size);
125 return false;
126#endif
127}
128#endif
129
130#ifdef HAVE_OPENCL
131
132template <bool useCustomDeriv>
133static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
134 int aperture_size, bool L2gradient, int cn, const Size & size)
135{
136 CV_INSTRUMENT_REGION_OPENCL();
137
138 UMat map;
139
140 const ocl::Device &dev = ocl::Device::getDefault();
141 int max_wg_size = (int)dev.maxWorkGroupSize();
142
143 int lSizeX = 32;
144 int lSizeY = max_wg_size / 32;
145
146 if (lSizeY == 0)
147 {
148 lSizeX = 16;
149 lSizeY = max_wg_size / 16;
150 }
151 if (lSizeY == 0)
152 {
153 lSizeY = 1;
154 }
155
156 if (aperture_size == 7)
157 {
158 low_thresh = low_thresh / 16.0f;
159 high_thresh = high_thresh / 16.0f;
160 }
161
162 if (L2gradient)
163 {
164 low_thresh = std::min(a: 32767.0f, b: low_thresh);
165 high_thresh = std::min(a: 32767.0f, b: high_thresh);
166
167 if (low_thresh > 0)
168 low_thresh *= low_thresh;
169 if (high_thresh > 0)
170 high_thresh *= high_thresh;
171 }
172 int low = cvFloor(value: low_thresh), high = cvFloor(value: high_thresh);
173
174 if (!useCustomDeriv &&
175 aperture_size == 3 && !_src.isSubmatrix())
176 {
177 /*
178 stage1_with_sobel:
179 Sobel operator
180 Calc magnitudes
181 Non maxima suppression
182 Double thresholding
183 */
184 char cvt[50];
185 ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
186 format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
187 cn, ocl::memopTypeToStr(_src.depth()),
188 ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt, sizeof(cvt)),
189 ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
190 lSizeX, lSizeY,
191 L2gradient ? " -D L2GRAD" : ""));
192 if (with_sobel.empty())
193 return false;
194
195 UMat src = _src.getUMat();
196 map.create(size, CV_32S);
197 with_sobel.args(kernel_args: ocl::KernelArg::ReadOnly(m: src),
198 kernel_args: ocl::KernelArg::WriteOnlyNoSize(m: map),
199 kernel_args: (float) low, kernel_args: (float) high);
200
201 size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
202 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
203
204 if (!with_sobel.run(dims: 2, globalsize, localsize, sync: false))
205 return false;
206 }
207 else
208 {
209 /*
210 stage1_without_sobel:
211 Calc magnitudes
212 Non maxima suppression
213 Double thresholding
214 */
215 double scale = 1.0;
216 if (aperture_size == 7)
217 {
218 scale = 1 / 16.0;
219 }
220
221 UMat dx, dy;
222 if (!useCustomDeriv)
223 {
224 Sobel(src: _src, dst: dx, CV_16S, dx: 1, dy: 0, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE);
225 Sobel(src: _src, dst: dy, CV_16S, dx: 0, dy: 1, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE);
226 }
227 else
228 {
229 dx = dx_;
230 dy = dy_;
231 }
232
233 ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
234 format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
235 cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
236 if (without_sobel.empty())
237 return false;
238
239 map.create(size, CV_32S);
240 without_sobel.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: dx), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: dy),
241 kernel_args: ocl::KernelArg::WriteOnly(m: map),
242 kernel_args: low, kernel_args: high);
243
244 size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
245 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
246
247 if (!without_sobel.run(dims: 2, globalsize, localsize, sync: false))
248 return false;
249 }
250
251 int PIX_PER_WI = 8;
252 /*
253 stage2:
254 hysteresis (add weak edges if they are connected with strong edges)
255 */
256
257 int sizey = lSizeY / PIX_PER_WI;
258 if (sizey == 0)
259 sizey = 1;
260
261 size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };
262
263 ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
264 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
265 PIX_PER_WI, lSizeX, sizey));
266
267 if (edgesHysteresis.empty())
268 return false;
269
270 edgesHysteresis.args(kernel_args: ocl::KernelArg::ReadWrite(m: map));
271 if (!edgesHysteresis.run(dims: 2, globalsize, localsize, sync: false))
272 return false;
273
274 // get edges
275
276 ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
277 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
278 if (getEdgesKernel.empty())
279 return false;
280
281 _dst.create(sz: size, CV_8UC1);
282 UMat dst = _dst.getUMat();
283
284 getEdgesKernel.args(kernel_args: ocl::KernelArg::ReadOnly(m: map), kernel_args: ocl::KernelArg::WriteOnlyNoSize(m: dst));
285
286 return getEdgesKernel.run(dims: 2, globalsize, NULL, sync: false);
287}
288
289#endif
290
291#define CANNY_PUSH(map, stack) *map = 2, stack.push_back(map)
292
293#define CANNY_CHECK(m, high, map, stack) \
294 if (m > high) \
295 CANNY_PUSH(map, stack); \
296 else \
297 *map = 0
298
299class parallelCanny : public ParallelLoopBody
300{
301public:
302 parallelCanny(const Mat &_src, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
303 int _low, int _high, int _aperture_size, bool _L2gradient) :
304 src(_src), src2(_src), map(_map), _borderPeaksParallel(borderPeaksParallel),
305 low(_low), high(_high), aperture_size(_aperture_size), L2gradient(_L2gradient)
306 {
307#if (CV_SIMD || CV_SIMD_SCALABLE)
308 for(int i = 0; i < VTraits<v_int8>::vlanes(); ++i)
309 {
310 smask[i] = 0;
311 smask[i + VTraits<v_int8>::vlanes()] = (schar)-1;
312 }
313 if (true)
314 _map.create(rows: src.rows + 2, cols: (int)alignSize(sz: (size_t)(src.cols + CV_SIMD_WIDTH + 1), CV_SIMD_WIDTH), CV_8UC1);
315 else
316#endif
317 _map.create(rows: src.rows + 2, cols: src.cols + 2, CV_8UC1);
318 map = _map;
319 map.row(y: 0).setTo(value: 1);
320 map.row(y: src.rows + 1).setTo(value: 1);
321 mapstep = map.cols;
322 needGradient = true;
323 cn = src.channels();
324 }
325
326 parallelCanny(const Mat &_dx, const Mat &_dy, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
327 int _low, int _high, bool _L2gradient) :
328 src(_dx), src2(_dy), map(_map), _borderPeaksParallel(borderPeaksParallel),
329 low(_low), high(_high), aperture_size(0), L2gradient(_L2gradient)
330 {
331#if (CV_SIMD || CV_SIMD_SCALABLE)
332 for(int i = 0; i < VTraits<v_int8>::vlanes(); ++i)
333 {
334 smask[i] = 0;
335 smask[i + VTraits<v_int8>::vlanes()] = (schar)-1;
336 }
337 if (true)
338 _map.create(rows: src.rows + 2, cols: (int)alignSize(sz: (size_t)(src.cols + CV_SIMD_WIDTH + 1), CV_SIMD_WIDTH), CV_8UC1);
339 else
340#endif
341 _map.create(rows: src.rows + 2, cols: src.cols + 2, CV_8UC1);
342 map = _map;
343 map.row(y: 0).setTo(value: 1);
344 map.row(y: src.rows + 1).setTo(value: 1);
345 mapstep = map.cols;
346 needGradient = false;
347 cn = src.channels();
348 }
349
350 ~parallelCanny() {}
351
352 parallelCanny& operator=(const parallelCanny&) { return *this; }
353
354 void operator()(const Range &boundaries) const CV_OVERRIDE
355 {
356 CV_TRACE_FUNCTION();
357
358 CV_DbgAssert(cn > 0);
359
360 Mat dx, dy;
361 AutoBuffer<short> dxMax(0), dyMax(0);
362 std::deque<uchar*> stack, borderPeaksLocal;
363 const int rowStart = max(a: 0, b: boundaries.start - 1), rowEnd = min(a: src.rows, b: boundaries.end + 1);
364 int *_mag_p, *_mag_a, *_mag_n;
365 short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
366 uchar *_pmap;
367 double scale = 1.0;
368
369 CV_TRACE_REGION("gradient")
370 if(needGradient)
371 {
372 if (aperture_size == 7)
373 {
374 scale = 1 / 16.0;
375 }
376 Sobel(src: src.rowRange(startrow: rowStart, endrow: rowEnd), dst: dx, CV_16S, dx: 1, dy: 0, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE);
377 Sobel(src: src.rowRange(startrow: rowStart, endrow: rowEnd), dst: dy, CV_16S, dx: 0, dy: 1, ksize: aperture_size, scale, delta: 0, borderType: BORDER_REPLICATE);
378 }
379 else
380 {
381 dx = src.rowRange(startrow: rowStart, endrow: rowEnd);
382 dy = src2.rowRange(startrow: rowStart, endrow: rowEnd);
383 }
384
385 CV_TRACE_REGION_NEXT("magnitude");
386 if(cn > 1)
387 {
388 dxMax.allocate(size: 2 * dx.cols);
389 dyMax.allocate(size: 2 * dy.cols);
390 _dx_a = dxMax.data();
391 _dx_n = _dx_a + dx.cols;
392 _dy_a = dyMax.data();
393 _dy_n = _dy_a + dy.cols;
394 }
395
396 // _mag_p: previous row, _mag_a: actual row, _mag_n: next row
397#if (CV_SIMD || CV_SIMD_SCALABLE)
398 AutoBuffer<int> buffer(3 * (mapstep * cn + CV_SIMD_WIDTH));
399 _mag_p = alignPtr(ptr: buffer.data() + 1, CV_SIMD_WIDTH);
400 _mag_a = alignPtr(ptr: _mag_p + mapstep * cn, CV_SIMD_WIDTH);
401 _mag_n = alignPtr(ptr: _mag_a + mapstep * cn, CV_SIMD_WIDTH);
402#else
403 AutoBuffer<int> buffer(3 * (mapstep * cn));
404 _mag_p = buffer.data() + 1;
405 _mag_a = _mag_p + mapstep * cn;
406 _mag_n = _mag_a + mapstep * cn;
407#endif
408
409 // For the first time when just 2 rows are filled and for left and right borders
410 if(rowStart == boundaries.start)
411 memset(s: _mag_n - 1, c: 0, n: mapstep * sizeof(int));
412 else
413 _mag_n[src.cols] = _mag_n[-1] = 0;
414
415 _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0;
416
417 // calculate magnitude and angle of gradient, perform non-maxima suppression.
418 // fill the map with one of the following values:
419 // 0 - the pixel might belong to an edge
420 // 1 - the pixel can not belong to an edge
421 // 2 - the pixel does belong to an edge
422 for (int i = rowStart; i <= boundaries.end; ++i)
423 {
424 // Scroll the ring buffer
425 std::swap(a&: _mag_n, b&: _mag_a);
426 std::swap(a&: _mag_n, b&: _mag_p);
427
428 if(i < rowEnd)
429 {
430 // Next row calculation
431 _dx = dx.ptr<short>(y: i - rowStart);
432 _dy = dy.ptr<short>(y: i - rowStart);
433
434 if (L2gradient)
435 {
436 int j = 0, width = src.cols * cn;
437#if (CV_SIMD || CV_SIMD_SCALABLE)
438 for ( ; j <= width - VTraits<v_int16>::vlanes(); j += VTraits<v_int16>::vlanes())
439 {
440 v_int16 v_dx = vx_load(ptr: (const short*)(_dx + j));
441 v_int16 v_dy = vx_load(ptr: (const short*)(_dy + j));
442
443 v_int32 v_dxp_low, v_dxp_high;
444 v_int32 v_dyp_low, v_dyp_high;
445 v_expand(a: v_dx, b0&: v_dxp_low, b1&: v_dxp_high);
446 v_expand(a: v_dy, b0&: v_dyp_low, b1&: v_dyp_high);
447
448 v_store_aligned(ptr: (int *)(_mag_n + j), a: v_add(a: v_mul(a: v_dxp_low, b: v_dxp_low), b: v_mul(a: v_dyp_low, b: v_dyp_low)));
449 v_store_aligned(ptr: (int *)(_mag_n + j + VTraits<v_int32>::vlanes()), a: v_add(a: v_mul(a: v_dxp_high, b: v_dxp_high), b: v_mul(a: v_dyp_high, b: v_dyp_high)));
450 }
451#endif
452 for ( ; j < width; ++j)
453 _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
454 }
455 else
456 {
457 int j = 0, width = src.cols * cn;
458#if (CV_SIMD || CV_SIMD_SCALABLE)
459 for(; j <= width - VTraits<v_int16>::vlanes(); j += VTraits<v_int16>::vlanes())
460 {
461 v_int16 v_dx = vx_load(ptr: (const short *)(_dx + j));
462 v_int16 v_dy = vx_load(ptr: (const short *)(_dy + j));
463
464 v_dx = v_reinterpret_as_s16(a: v_abs(x: v_dx));
465 v_dy = v_reinterpret_as_s16(a: v_abs(x: v_dy));
466
467 v_int32 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh;
468 v_expand(a: v_dx, b0&: v_dx_ml, b1&: v_dx_mh);
469 v_expand(a: v_dy, b0&: v_dy_ml, b1&: v_dy_mh);
470
471 v_store_aligned(ptr: (int *)(_mag_n + j), a: v_add(a: v_dx_ml, b: v_dy_ml));
472 v_store_aligned(ptr: (int *)(_mag_n + j + VTraits<v_int32>::vlanes()), a: v_add(a: v_dx_mh, b: v_dy_mh));
473 }
474#endif
475 for ( ; j < width; ++j)
476 _mag_n[j] = std::abs(x: int(_dx[j])) + std::abs(x: int(_dy[j]));
477 }
478
479 if(cn > 1)
480 {
481 std::swap(a&: _dx_n, b&: _dx_a);
482 std::swap(a&: _dy_n, b&: _dy_a);
483
484 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
485 {
486 int maxIdx = jn;
487 for(int k = 1; k < cn; ++k)
488 if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k;
489
490 _mag_n[j] = _mag_n[maxIdx];
491 _dx_n[j] = _dx[maxIdx];
492 _dy_n[j] = _dy[maxIdx];
493 }
494
495 _mag_n[src.cols] = 0;
496 }
497
498 // at the very beginning we do not have a complete ring
499 // buffer of 3 magnitude rows for non-maxima suppression
500 if (i <= boundaries.start)
501 continue;
502 }
503 else
504 {
505 memset(s: _mag_n - 1, c: 0, n: mapstep * sizeof(int));
506
507 if(cn > 1)
508 {
509 std::swap(a&: _dx_n, b&: _dx_a);
510 std::swap(a&: _dy_n, b&: _dy_a);
511 }
512 }
513
514 // From here actual src row is (i - 1)
515 // Set left and right border to 1
516#if (CV_SIMD || CV_SIMD_SCALABLE)
517 if (true)
518 _pmap = map.ptr<uchar>(y: i) + CV_SIMD_WIDTH;
519 else
520#endif
521 _pmap = map.ptr<uchar>(y: i) + 1;
522
523 _pmap[src.cols] =_pmap[-1] = 1;
524
525 if(cn == 1)
526 {
527 _dx = dx.ptr<short>(y: i - rowStart - 1);
528 _dy = dy.ptr<short>(y: i - rowStart - 1);
529 }
530 else
531 {
532 _dx = _dx_a;
533 _dy = _dy_a;
534 }
535
536 const int TG22 = 13573;
537 int j = 0;
538#if (CV_SIMD || CV_SIMD_SCALABLE)
539 {
540 const v_int32 v_low = vx_setall_s32(v: low);
541 const v_int8 v_one = vx_setall_s8(v: 1);
542
543 for (; j <= src.cols - VTraits<v_int8>::vlanes(); j += VTraits<v_int8>::vlanes())
544 {
545 v_store_aligned(ptr: (signed char*)(_pmap + j), a: v_one);
546 v_int8 v_cmp = v_pack(a: v_pack(a: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j)), b: v_low),
547 b: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + VTraits<v_int32>::vlanes())), b: v_low)),
548 b: v_pack(a: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + 2 * VTraits<v_int32>::vlanes())), b: v_low),
549 b: v_gt(a: vx_load_aligned(ptr: (const int *)(_mag_a + j + 3 * VTraits<v_int32>::vlanes())), b: v_low)));
550 while (v_check_any(a: v_cmp))
551 {
552 int l = v_scan_forward(a: v_cmp);
553 v_cmp = v_and(a: v_cmp, b: vx_load(ptr: smask + VTraits<v_int8>::vlanes() - 1 - l));
554 int k = j + l;
555
556 int m = _mag_a[k];
557 short xs = _dx[k];
558 short ys = _dy[k];
559 int x = (int)std::abs(x: xs);
560 int y = (int)std::abs(x: ys) << 15;
561
562 int tg22x = x * TG22;
563
564 if (y < tg22x)
565 {
566 if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
567 {
568 CANNY_CHECK(m, high, (_pmap+k), stack);
569 }
570 }
571 else
572 {
573 int tg67x = tg22x + (x << 16);
574 if (y > tg67x)
575 {
576 if (m > _mag_p[k] && m >= _mag_n[k])
577 {
578 CANNY_CHECK(m, high, (_pmap+k), stack);
579 }
580 }
581 else
582 {
583 int s = (xs ^ ys) < 0 ? -1 : 1;
584 if(m > _mag_p[k - s] && m > _mag_n[k + s])
585 {
586 CANNY_CHECK(m, high, (_pmap+k), stack);
587 }
588 }
589 }
590 }
591 }
592 }
593#endif
594 for (; j < src.cols; j++)
595 {
596 int m = _mag_a[j];
597
598 if (m > low)
599 {
600 short xs = _dx[j];
601 short ys = _dy[j];
602 int x = (int)std::abs(x: xs);
603 int y = (int)std::abs(x: ys) << 15;
604
605 int tg22x = x * TG22;
606
607 if (y < tg22x)
608 {
609 if (m > _mag_a[j - 1] && m >= _mag_a[j + 1])
610 {
611 CANNY_CHECK(m, high, (_pmap+j), stack);
612 continue;
613 }
614 }
615 else
616 {
617 int tg67x = tg22x + (x << 16);
618 if (y > tg67x)
619 {
620 if (m > _mag_p[j] && m >= _mag_n[j])
621 {
622 CANNY_CHECK(m, high, (_pmap+j), stack);
623 continue;
624 }
625 }
626 else
627 {
628 int s = (xs ^ ys) < 0 ? -1 : 1;
629 if(m > _mag_p[j - s] && m > _mag_n[j + s])
630 {
631 CANNY_CHECK(m, high, (_pmap+j), stack);
632 continue;
633 }
634 }
635 }
636 }
637 _pmap[j] = 1;
638 }
639 }
640
641 // Not for first row of first slice or last row of last slice
642 uchar *pmapLower = (rowStart == 0) ? map.data : (map.data + (boundaries.start + 2) * mapstep);
643 uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower);
644
645 // now track the edges (hysteresis thresholding)
646 CV_TRACE_REGION_NEXT("hysteresis");
647 while (!stack.empty())
648 {
649 uchar *m = stack.back();
650 stack.pop_back();
651
652 // Stops thresholding from expanding to other slices by sending pixels in the borders of each
653 // slice in a queue to be serially processed later.
654 if((unsigned)(m - pmapLower) < pmapDiff)
655 {
656 if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
657 if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
658 if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
659 if (!m[-1]) CANNY_PUSH((m-1), stack);
660 if (!m[1]) CANNY_PUSH((m+1), stack);
661 if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
662 if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
663 if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
664 }
665 else
666 {
667 borderPeaksLocal.push_back(x: m);
668 ptrdiff_t mapstep2 = m < pmapLower ? mapstep : -mapstep;
669
670 if (!m[-1]) CANNY_PUSH((m-1), stack);
671 if (!m[1]) CANNY_PUSH((m+1), stack);
672 if (!m[mapstep2-1]) CANNY_PUSH((m+mapstep2-1), stack);
673 if (!m[mapstep2]) CANNY_PUSH((m+mapstep2), stack);
674 if (!m[mapstep2+1]) CANNY_PUSH((m+mapstep2+1), stack);
675 }
676 }
677
678 if(!borderPeaksLocal.empty())
679 {
680 AutoLock lock(mutex);
681 _borderPeaksParallel.insert(position: _borderPeaksParallel.end(), first: borderPeaksLocal.begin(), last: borderPeaksLocal.end());
682 }
683 }
684
685private:
686 const Mat &src, &src2;
687 Mat &map;
688 std::deque<uchar*> &_borderPeaksParallel;
689 int low, high, aperture_size;
690 bool L2gradient, needGradient;
691 ptrdiff_t mapstep;
692 int cn;
693 mutable Mutex mutex;
694#if (CV_SIMD || CV_SIMD_SCALABLE)
695 schar smask[2*VTraits<v_int8>::max_nlanes];
696#endif
697};
698
699class finalPass : public ParallelLoopBody
700{
701
702public:
703 finalPass(const Mat &_map, Mat &_dst) :
704 map(_map), dst(_dst)
705 {
706 dst = _dst;
707 }
708
709 ~finalPass() {}
710
711 void operator()(const Range &boundaries) const CV_OVERRIDE
712 {
713 // the final pass, form the final image
714 for (int i = boundaries.start; i < boundaries.end; i++)
715 {
716 int j = 0;
717 uchar *pdst = dst.ptr<uchar>(y: i);
718 const uchar *pmap = map.ptr<uchar>(y: i + 1);
719#if (CV_SIMD || CV_SIMD_SCALABLE)
720 if (true)
721 pmap += CV_SIMD_WIDTH;
722 else
723#endif
724 pmap += 1;
725#if (CV_SIMD || CV_SIMD_SCALABLE)
726 {
727 const v_uint8 v_zero = vx_setzero_u8();
728 const v_uint8 v_ff = v_not(a: v_zero);
729 const v_uint8 v_two = vx_setall_u8(v: 2);
730
731 for (; j <= dst.cols - VTraits<v_uint8>::vlanes(); j += VTraits<v_uint8>::vlanes())
732 {
733 v_uint8 v_pmap = vx_load_aligned(ptr: (const unsigned char*)(pmap + j));
734 v_pmap = v_select(mask: v_eq(a: v_pmap, b: v_two), a: v_ff, b: v_zero);
735 v_store(ptr: (pdst + j), a: v_pmap);
736 }
737
738 if (j <= dst.cols - VTraits<v_uint8>::vlanes()/2)
739 {
740 v_uint8 v_pmap = vx_load_low(ptr: (const unsigned char*)(pmap + j));
741 v_pmap = v_select(mask: v_eq(a: v_pmap, b: v_two), a: v_ff, b: v_zero);
742 v_store_low(ptr: (pdst + j), a: v_pmap);
743 j += VTraits<v_uint8>::vlanes()/2;
744 }
745 }
746#endif
747 for (; j < dst.cols; j++)
748 {
749 pdst[j] = (uchar)-(pmap[j] >> 1);
750 }
751 }
752 }
753
754private:
755 const Mat &map;
756 Mat &dst;
757
758 finalPass(const finalPass&); // = delete
759 finalPass& operator=(const finalPass&); // = delete
760};
761
762void Canny( InputArray _src, OutputArray _dst,
763 double low_thresh, double high_thresh,
764 int aperture_size, bool L2gradient )
765{
766 CV_INSTRUMENT_REGION();
767
768 CV_Assert( _src.depth() == CV_8U );
769
770 const Size size = _src.size();
771
772 // we don't support inplace parameters in case with RGB/BGR src
773 CV_Assert((_dst.getObj() != _src.getObj() || _src.type() == CV_8UC1) && "Inplace parameters are not supported");
774
775 _dst.create(sz: size, CV_8U);
776
777 // backward compatibility
778 const int CV_CANNY_L2_GRADIENT = (1 << 31);
779 if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
780 {
781 aperture_size &= ~CV_CANNY_L2_GRADIENT;
782 L2gradient = true;
783 }
784
785 if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
786 CV_Error(cv::Error::StsBadFlag, "Aperture size should be odd between 3 and 7");
787
788 if (aperture_size == 7)
789 {
790 low_thresh = low_thresh / 16.0;
791 high_thresh = high_thresh / 16.0;
792 }
793
794 if (low_thresh > high_thresh)
795 std::swap(a&: low_thresh, b&: high_thresh);
796
797 CV_OCL_RUN(_dst.isUMat() && (_src.channels() == 1 || _src.channels() == 3),
798 ocl_Canny<false>(_src, dx_: UMat(), dy_: UMat(), _dst, low_thresh: (float)low_thresh, high_thresh: (float)high_thresh, aperture_size, L2gradient, cn: _src.channels(), size))
799
800 Mat src0 = _src.getMat(), dst = _dst.getMat();
801 Mat src(src0.size(), src0.type(), src0.data, src0.step);
802
803 CALL_HAL(canny, cv_hal_canny, src.data, src.step, dst.data, dst.step, src.cols, src.rows, src.channels(),
804 low_thresh, high_thresh, aperture_size, L2gradient);
805
806 CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size))
807
808 if (L2gradient)
809 {
810 low_thresh = std::min(a: 32767.0, b: low_thresh);
811 high_thresh = std::min(a: 32767.0, b: high_thresh);
812
813 if (low_thresh > 0) low_thresh *= low_thresh;
814 if (high_thresh > 0) high_thresh *= high_thresh;
815 }
816 int low = cvFloor(value: low_thresh);
817 int high = cvFloor(value: high_thresh);
818
819 // If Scharr filter: aperture size is 3, ksize2 is 1
820 int ksize2 = aperture_size < 0 ? 1 : aperture_size / 2;
821 // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
822 int numOfThreads = std::max(a: 1, b: std::min(a: getNumThreads(), b: getNumberOfCPUs()));
823 // Make a fallback for pictures with too few rows.
824 int grainSize = src.rows / numOfThreads;
825 int minGrainSize = 2 * (ksize2 + 1);
826 if (grainSize < minGrainSize)
827 numOfThreads = std::max(a: 1, b: src.rows / minGrainSize);
828
829 Mat map;
830 std::deque<uchar*> stack;
831
832 parallel_for_(range: Range(0, src.rows), body: parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), nstripes: numOfThreads);
833
834 CV_TRACE_REGION("global_hysteresis");
835 // now track the edges (hysteresis thresholding)
836 ptrdiff_t mapstep = map.cols;
837
838 while (!stack.empty())
839 {
840 uchar* m = stack.back();
841 stack.pop_back();
842
843 if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
844 if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
845 if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
846 if (!m[-1]) CANNY_PUSH((m-1), stack);
847 if (!m[1]) CANNY_PUSH((m+1), stack);
848 if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
849 if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
850 if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
851 }
852
853 CV_TRACE_REGION_NEXT("finalPass");
854 parallel_for_(range: Range(0, src.rows), body: finalPass(map, dst), nstripes: src.total()/(double)(1<<16));
855}
856
857void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
858 double low_thresh, double high_thresh,
859 bool L2gradient )
860{
861 CV_INSTRUMENT_REGION();
862
863 CV_Assert(_dx.dims() == 2);
864 CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3);
865 CV_Assert(_dy.type() == _dx.type());
866 CV_Assert(_dx.sameSize(_dy));
867
868 if (low_thresh > high_thresh)
869 std::swap(a&: low_thresh, b&: high_thresh);
870
871 const Size size = _dx.size();
872
873 CV_OCL_RUN(_dst.isUMat(),
874 ocl_Canny<true>(src: UMat(), dx_: _dx.getUMat(), dy_: _dy.getUMat(), _dst, low_thresh: (float)low_thresh, high_thresh: (float)high_thresh, aperture_size: 0, L2gradient, cn: _dx.channels(), size))
875
876 _dst.create(sz: size, CV_8U);
877 Mat dst = _dst.getMat();
878
879 Mat dx = _dx.getMat();
880 Mat dy = _dy.getMat();
881
882 CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0))
883
884 if (L2gradient)
885 {
886 low_thresh = std::min(a: 32767.0, b: low_thresh);
887 high_thresh = std::min(a: 32767.0, b: high_thresh);
888
889 if (low_thresh > 0) low_thresh *= low_thresh;
890 if (high_thresh > 0) high_thresh *= high_thresh;
891 }
892
893 int low = cvFloor(value: low_thresh);
894 int high = cvFloor(value: high_thresh);
895
896 std::deque<uchar*> stack;
897 Mat map;
898
899 // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
900 int numOfThreads = std::max(a: 1, b: std::min(a: getNumThreads(), b: getNumberOfCPUs()));
901 if (dx.rows / numOfThreads < 3)
902 numOfThreads = std::max(a: 1, b: dx.rows / 3);
903
904 parallel_for_(range: Range(0, dx.rows), body: parallelCanny(dx, dy, map, stack, low, high, L2gradient), nstripes: numOfThreads);
905
906 CV_TRACE_REGION("global_hysteresis")
907 // now track the edges (hysteresis thresholding)
908 ptrdiff_t mapstep = map.cols;
909
910 while (!stack.empty())
911 {
912 uchar* m = stack.back();
913 stack.pop_back();
914
915 if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
916 if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
917 if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
918 if (!m[-1]) CANNY_PUSH((m-1), stack);
919 if (!m[1]) CANNY_PUSH((m+1), stack);
920 if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
921 if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
922 if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
923 }
924
925 CV_TRACE_REGION_NEXT("finalPass");
926 parallel_for_(range: Range(0, dx.rows), body: finalPass(map, dst), nstripes: dx.total()/(double)(1<<16));
927}
928
929} // namespace cv
930
931void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
932 double threshold2, int aperture_size )
933{
934 cv::Mat src = cv::cvarrToMat(arr: image), dst = cv::cvarrToMat(arr: edges);
935 CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
936
937 cv::Canny(src: src, dst: dst, low_thresh: threshold1, high_thresh: threshold2, aperture_size: aperture_size & 255,
938 L2gradient: (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
939}
940
941/* End of file. */
942

source code of opencv/modules/imgproc/src/canny.cpp