1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
2 | // |
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
4 | // |
5 | // By downloading, copying, installing or using the software you agree to this license. |
6 | // If you do not agree to this license, do not download, install, |
7 | // copy or use the software. |
8 | // |
9 | // |
10 | // License Agreement |
11 | // For Open Source Computer Vision Library |
12 | // |
13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
15 | // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. |
16 | // Third party copyrights are property of their respective owners. |
17 | // |
18 | // Redistribution and use in source and binary forms, with or without modification, |
19 | // are permitted provided that the following conditions are met: |
20 | // |
21 | // * Redistribution's of source code must retain the above copyright notice, |
22 | // this list of conditions and the following disclaimer. |
23 | // |
24 | // * Redistribution's in binary form must reproduce the above copyright notice, |
25 | // this list of conditions and the following disclaimer in the documentation |
26 | // and/or other materials provided with the distribution. |
27 | // |
28 | // * The name of the copyright holders may not be used to endorse or promote products |
29 | // derived from this software without specific prior written permission. |
30 | // |
31 | // This software is provided by the copyright holders and contributors "as is" and |
32 | // any express or implied warranties, including, but not limited to, the implied |
33 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
34 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
35 | // indirect, incidental, special, exemplary, or consequential damages |
36 | // (including, but not limited to, procurement of substitute goods or services; |
37 | // loss of use, data, or profits; or business interruption) however caused |
38 | // and on any theory of liability, whether in contract, strict liability, |
39 | // or tort (including negligence or otherwise) arising in any way out of |
40 | // the use of this software, even if advised of the possibility of such damage. |
41 | // |
42 | //M*/ |
43 | |
44 | #include "precomp.hpp" |
45 | |
46 | #include <opencv2/core/utils/logger.hpp> |
47 | |
48 | #include <opencv2/core/utils/configuration.private.hpp> |
49 | |
50 | #include <vector> |
51 | #include <iostream> |
52 | |
53 | #include "opencv2/core/hal/intrin.hpp" |
54 | #include "opencl_kernels_imgproc.hpp" |
55 | |
56 | #include "opencv2/core/openvx/ovx_defs.hpp" |
57 | |
58 | #include "filter.hpp" |
59 | |
60 | #include "opencv2/core/softfloat.hpp" |
61 | |
62 | namespace cv { |
63 | #include "fixedpoint.inl.hpp" |
64 | } |
65 | |
66 | #include "smooth.simd.hpp" |
67 | #include "smooth.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content |
68 | |
69 | namespace cv { |
70 | |
71 | /****************************************************************************************\ |
72 | Gaussian Blur |
73 | \****************************************************************************************/ |
74 | |
75 | /** |
76 | * Bit-exact in terms of softfloat computations |
77 | * |
78 | * returns sum of kernel values. Should be equal to 1.0 |
79 | */ |
80 | static |
81 | softdouble getGaussianKernelBitExact(std::vector<softdouble>& result, int n, double sigma) |
82 | { |
83 | CV_Assert(n > 0); |
84 | //TODO: incorrect SURF implementation requests kernel with n = 20 (PATCH_SZ): https://github.com/opencv/opencv/issues/15856 |
85 | //CV_Assert((n & 1) == 1); // odd |
86 | |
87 | if (sigma <= 0) |
88 | { |
89 | if (n == 1) |
90 | { |
91 | result = std::vector<softdouble>(1, softdouble::one()); |
92 | return softdouble::one(); |
93 | } |
94 | else if (n == 3) |
95 | { |
96 | softdouble v3[] = { |
97 | softdouble::fromRaw(a: 0x3fd0000000000000), // 0.25 |
98 | softdouble::fromRaw(a: 0x3fe0000000000000), // 0.5 |
99 | softdouble::fromRaw(a: 0x3fd0000000000000) // 0.25 |
100 | }; |
101 | result.assign(first: v3, last: v3 + 3); |
102 | return softdouble::one(); |
103 | } |
104 | else if (n == 5) |
105 | { |
106 | softdouble v5[] = { |
107 | softdouble::fromRaw(a: 0x3fb0000000000000), // 0.0625 |
108 | softdouble::fromRaw(a: 0x3fd0000000000000), // 0.25 |
109 | softdouble::fromRaw(a: 0x3fd8000000000000), // 0.375 |
110 | softdouble::fromRaw(a: 0x3fd0000000000000), // 0.25 |
111 | softdouble::fromRaw(a: 0x3fb0000000000000) // 0.0625 |
112 | }; |
113 | result.assign(first: v5, last: v5 + 5); |
114 | return softdouble::one(); |
115 | } |
116 | else if (n == 7) |
117 | { |
118 | softdouble v7[] = { |
119 | softdouble::fromRaw(a: 0x3fa0000000000000), // 0.03125 |
120 | softdouble::fromRaw(a: 0x3fbc000000000000), // 0.109375 |
121 | softdouble::fromRaw(a: 0x3fcc000000000000), // 0.21875 |
122 | softdouble::fromRaw(a: 0x3fd2000000000000), // 0.28125 |
123 | softdouble::fromRaw(a: 0x3fcc000000000000), // 0.21875 |
124 | softdouble::fromRaw(a: 0x3fbc000000000000), // 0.109375 |
125 | softdouble::fromRaw(a: 0x3fa0000000000000) // 0.03125 |
126 | }; |
127 | result.assign(first: v7, last: v7 + 7); |
128 | return softdouble::one(); |
129 | } |
130 | else if (n == 9) |
131 | { |
132 | softdouble v9[] = { |
133 | softdouble::fromRaw(a: 0x3f90000000000000), // 4 / 256 |
134 | softdouble::fromRaw(a: 0x3faa000000000000), // 13 / 256 |
135 | softdouble::fromRaw(a: 0x3fbe000000000000), // 30 / 256 |
136 | softdouble::fromRaw(a: 0x3fc9800000000000), // 51 / 256 |
137 | softdouble::fromRaw(a: 0x3fce000000000000), // 60 / 256 |
138 | softdouble::fromRaw(a: 0x3fc9800000000000), // 51 / 256 |
139 | softdouble::fromRaw(a: 0x3fbe000000000000), // 30 / 256 |
140 | softdouble::fromRaw(a: 0x3faa000000000000), // 13 / 256 |
141 | softdouble::fromRaw(a: 0x3f90000000000000) // 4 / 256 |
142 | }; |
143 | result.assign(first: v9, last: v9 + 9); |
144 | return softdouble::one(); |
145 | } |
146 | } |
147 | |
148 | softdouble sd_0_15 = softdouble::fromRaw(a: 0x3fc3333333333333); // 0.15 |
149 | softdouble sd_0_35 = softdouble::fromRaw(a: 0x3fd6666666666666); // 0.35 |
150 | softdouble sd_minus_0_125 = softdouble::fromRaw(a: 0xbfc0000000000000); // -0.5*0.25 |
151 | |
152 | softdouble sigmaX = sigma > 0 ? softdouble(sigma) : mulAdd(a: softdouble(n), b: sd_0_15, c: sd_0_35);// softdouble(((n-1)*0.5 - 1)*0.3 + 0.8) |
153 | softdouble scale2X = sd_minus_0_125/(sigmaX*sigmaX); |
154 | |
155 | int n2_ = (n - 1) / 2; |
156 | cv::AutoBuffer<softdouble> values(n2_ + 1); |
157 | softdouble sum = softdouble::zero(); |
158 | for (int i = 0, x = 1 - n; i < n2_; i++, x+=2) |
159 | { |
160 | // x = i - (n - 1)*0.5 |
161 | // t = std::exp(scale2X*x*x) |
162 | softdouble t = exp(a: softdouble(x*x)*scale2X); |
163 | values[i] = t; |
164 | sum += t; |
165 | } |
166 | sum *= softdouble(2); |
167 | //values[n2_] = softdouble::one(); // x=0 in exp(softdouble(x*x)*scale2X); |
168 | sum += softdouble::one(); |
169 | if ((n & 1) == 0) |
170 | { |
171 | //values[n2_ + 1] = softdouble::one(); |
172 | sum += softdouble::one(); |
173 | } |
174 | |
175 | // normalize: sum(k[i]) = 1 |
176 | softdouble mul1 = softdouble::one()/sum; |
177 | |
178 | result.resize(new_size: n); |
179 | |
180 | softdouble sum2 = softdouble::zero(); |
181 | for (int i = 0; i < n2_; i++ ) |
182 | { |
183 | softdouble t = values[i] * mul1; |
184 | result[i] = t; |
185 | result[n - 1 - i] = t; |
186 | sum2 += t; |
187 | } |
188 | sum2 *= softdouble(2); |
189 | result[n2_] = /*values[n2_]*/ softdouble::one() * mul1; |
190 | sum2 += result[n2_]; |
191 | if ((n & 1) == 0) |
192 | { |
193 | result[n2_ + 1] = result[n2_]; |
194 | sum2 += result[n2_]; |
195 | } |
196 | |
197 | return sum2; |
198 | } |
199 | |
200 | Mat getGaussianKernel(int n, double sigma, int ktype) |
201 | { |
202 | CV_CheckDepth(ktype, ktype == CV_32F || ktype == CV_64F, "" ); |
203 | Mat kernel(n, 1, ktype); |
204 | |
205 | std::vector<softdouble> kernel_bitexact; |
206 | getGaussianKernelBitExact(result&: kernel_bitexact, n, sigma); |
207 | |
208 | if (ktype == CV_32F) |
209 | { |
210 | for (int i = 0; i < n; i++) |
211 | kernel.at<float>(i0: i) = (float)kernel_bitexact[i]; |
212 | } |
213 | else |
214 | { |
215 | CV_DbgAssert(ktype == CV_64F); |
216 | for (int i = 0; i < n; i++) |
217 | kernel.at<double>(i0: i) = kernel_bitexact[i]; |
218 | } |
219 | |
220 | return kernel; |
221 | } |
222 | |
223 | static |
224 | softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector<int64_t>& result, const std::vector<softdouble> kernel_bitexact, int fractionBits) |
225 | { |
226 | const int n = (int)kernel_bitexact.size(); |
227 | CV_Assert((n & 1) == 1); // odd |
228 | |
229 | CV_CheckGT(fractionBits, 0, "" ); |
230 | CV_CheckLE(fractionBits, 32, "" ); |
231 | |
232 | int64_t fractionMultiplier = CV_BIG_INT(1) << fractionBits; |
233 | softdouble fractionMultiplier_sd(fractionMultiplier); |
234 | |
235 | result.resize(new_size: n); |
236 | |
237 | int n2_ = n / 2; // n is odd |
238 | softdouble err = softdouble::zero(); |
239 | int64_t sum = 0; |
240 | for (int i = 0; i < n2_; i++) |
241 | { |
242 | //softdouble err0 = err; |
243 | softdouble adj_v = kernel_bitexact[i] * fractionMultiplier_sd + err; |
244 | int64_t v0 = cvRound(a: adj_v); // cvFloor() provides bad results |
245 | err = adj_v - softdouble(v0); |
246 | //printf("%3d: adj_v=%8.3f(%8.3f+%8.3f) v0=%d ed_err=%8.3f\n", i, (double)adj_v, (double)(kernel_bitexact[i] * fractionMultiplier_sd), (double)err0, (int)v0, (double)err); |
247 | |
248 | result[i] = v0; |
249 | result[n - 1 - i] = v0; |
250 | sum += v0; |
251 | } |
252 | sum *= 2; |
253 | softdouble adj_v_center = kernel_bitexact[n2_] * fractionMultiplier_sd + err; |
254 | int64_t v_center = fractionMultiplier - sum; |
255 | result[n2_] = v_center; |
256 | //printf("center = %g ===> %g ===> %g\n", (double)(kernel_bitexact[n2_] * fractionMultiplier), (double)adj_v_center, (double)v_center); |
257 | return (adj_v_center - softdouble(v_center)); |
258 | } |
259 | |
260 | static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); } |
261 | template <typename FT> static void getGaussianKernel(int n, double sigma, int, std::vector<FT>& res) |
262 | { |
263 | std::vector<softdouble> res_sd; |
264 | softdouble s0 = getGaussianKernelBitExact(result&: res_sd, n, sigma); |
265 | CV_UNUSED(s0); |
266 | |
267 | std::vector<int64_t> fixed_256; |
268 | softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, FT::fixedShift); |
269 | CV_UNUSED(approx_err); |
270 | |
271 | res.resize(n); |
272 | for (int i = 0; i < n; i++) |
273 | { |
274 | res[i] = FT::fromRaw((typename FT::raw_t)fixed_256[i]); |
275 | //printf("%03d: %d\n", i, res[i].raw()); |
276 | } |
277 | } |
278 | |
279 | template <typename T> |
280 | static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize, |
281 | double sigma1, double sigma2 ) |
282 | { |
283 | int depth = CV_MAT_DEPTH(type); |
284 | if( sigma2 <= 0 ) |
285 | sigma2 = sigma1; |
286 | |
287 | // automatic detection of kernel size from sigma |
288 | if( ksize.width <= 0 && sigma1 > 0 ) |
289 | ksize.width = cvRound(value: sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1; |
290 | if( ksize.height <= 0 && sigma2 > 0 ) |
291 | ksize.height = cvRound(value: sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1; |
292 | |
293 | CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && |
294 | ksize.height > 0 && ksize.height % 2 == 1 ); |
295 | |
296 | sigma1 = std::max( a: sigma1, b: 0. ); |
297 | sigma2 = std::max( a: sigma2, b: 0. ); |
298 | |
299 | getGaussianKernel( ksize.width, sigma1, std::max(a: depth, CV_32F), kx ); |
300 | if( ksize.height == ksize.width && std::abs(x: sigma1 - sigma2) < DBL_EPSILON ) |
301 | ky = kx; |
302 | else |
303 | getGaussianKernel( ksize.height, sigma2, std::max(a: depth, CV_32F), ky ); |
304 | } |
305 | |
306 | Ptr<FilterEngine> createGaussianFilter( int type, Size ksize, |
307 | double sigma1, double sigma2, |
308 | int borderType ) |
309 | { |
310 | Mat kx, ky; |
311 | createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); |
312 | |
313 | return createSeparableLinearFilter( srcType: type, dstType: type, rowKernel: kx, columnKernel: ky, anchor: Point(-1,-1), delta: 0, rowBorderType: borderType ); |
314 | } |
315 | |
316 | #ifdef HAVE_OPENCL |
317 | |
318 | static bool ocl_GaussianBlur_8UC1(InputArray _src, OutputArray _dst, Size ksize, int ddepth, |
319 | InputArray _kernelX, InputArray _kernelY, int borderType) |
320 | { |
321 | const ocl::Device & dev = ocl::Device::getDefault(); |
322 | int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
323 | |
324 | if ( !(dev.isIntel() && (type == CV_8UC1) && |
325 | (_src.offset() == 0) && (_src.step() % 4 == 0) && |
326 | ((ksize.width == 5 && (_src.cols() % 4 == 0)) || |
327 | (ksize.width == 3 && (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)))) ) |
328 | return false; |
329 | |
330 | Mat kernelX = _kernelX.getMat().reshape(cn: 1, rows: 1); |
331 | if (kernelX.cols % 2 != 1) |
332 | return false; |
333 | Mat kernelY = _kernelY.getMat().reshape(cn: 1, rows: 1); |
334 | if (kernelY.cols % 2 != 1) |
335 | return false; |
336 | |
337 | if (ddepth < 0) |
338 | ddepth = sdepth; |
339 | |
340 | Size size = _src.size(); |
341 | size_t globalsize[2] = { 0, 0 }; |
342 | size_t localsize[2] = { 0, 0 }; |
343 | |
344 | if (ksize.width == 3) |
345 | { |
346 | globalsize[0] = size.width / 16; |
347 | globalsize[1] = size.height / 2; |
348 | } |
349 | else if (ksize.width == 5) |
350 | { |
351 | globalsize[0] = size.width / 4; |
352 | globalsize[1] = size.height / 1; |
353 | } |
354 | |
355 | const char * const borderMap[] = { "BORDER_CONSTANT" , "BORDER_REPLICATE" , "BORDER_REFLECT" , 0, "BORDER_REFLECT_101" }; |
356 | char build_opts[1024]; |
357 | snprintf(s: build_opts, maxlen: sizeof(build_opts), format: "-D %s %s%s" , borderMap[borderType & ~BORDER_ISOLATED], |
358 | ocl::kernelToStr(kernel: kernelX, CV_32F, name: "KERNEL_MATRIX_X" ).c_str(), |
359 | ocl::kernelToStr(kernel: kernelY, CV_32F, name: "KERNEL_MATRIX_Y" ).c_str()); |
360 | |
361 | ocl::Kernel kernel; |
362 | |
363 | if (ksize.width == 3) |
364 | kernel.create("gaussianBlur3x3_8UC1_cols16_rows2" , cv::ocl::imgproc::gaussianBlur3x3_oclsrc, build_opts); |
365 | else if (ksize.width == 5) |
366 | kernel.create("gaussianBlur5x5_8UC1_cols4" , cv::ocl::imgproc::gaussianBlur5x5_oclsrc, build_opts); |
367 | |
368 | if (kernel.empty()) |
369 | return false; |
370 | |
371 | UMat src = _src.getUMat(); |
372 | _dst.create(sz: size, CV_MAKETYPE(ddepth, cn)); |
373 | if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) |
374 | return false; |
375 | UMat dst = _dst.getUMat(); |
376 | |
377 | int idxArg = kernel.set(i: 0, arg: ocl::KernelArg::PtrReadOnly(m: src)); |
378 | idxArg = kernel.set(i: idxArg, value: (int)src.step); |
379 | idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst)); |
380 | idxArg = kernel.set(i: idxArg, value: (int)dst.step); |
381 | idxArg = kernel.set(i: idxArg, value: (int)dst.rows); |
382 | idxArg = kernel.set(i: idxArg, value: (int)dst.cols); |
383 | |
384 | return kernel.run(dims: 2, globalsize, localsize: (localsize[0] == 0) ? NULL : localsize, sync: false); |
385 | } |
386 | |
387 | #endif |
388 | |
389 | #ifdef HAVE_OPENVX |
390 | |
391 | namespace ovx { |
392 | template <> inline bool skipSmallImages<VX_KERNEL_GAUSSIAN_3x3>(int w, int h) { return w*h < 320 * 240; } |
393 | } |
394 | static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, |
395 | double sigma1, double sigma2, int borderType) |
396 | { |
397 | if (sigma2 <= 0) |
398 | sigma2 = sigma1; |
399 | // automatic detection of kernel size from sigma |
400 | if (ksize.width <= 0 && sigma1 > 0) |
401 | ksize.width = cvRound(sigma1*6 + 1) | 1; |
402 | if (ksize.height <= 0 && sigma2 > 0) |
403 | ksize.height = cvRound(sigma2*6 + 1) | 1; |
404 | |
405 | if (_src.type() != CV_8UC1 || |
406 | _src.cols() < 3 || _src.rows() < 3 || |
407 | ksize.width != 3 || ksize.height != 3) |
408 | return false; |
409 | |
410 | sigma1 = std::max(sigma1, 0.); |
411 | sigma2 = std::max(sigma2, 0.); |
412 | |
413 | if (!(sigma1 == 0.0 || (sigma1 - 0.8) < DBL_EPSILON) || !(sigma2 == 0.0 || (sigma2 - 0.8) < DBL_EPSILON) || |
414 | ovx::skipSmallImages<VX_KERNEL_GAUSSIAN_3x3>(_src.cols(), _src.rows())) |
415 | return false; |
416 | |
417 | Mat src = _src.getMat(); |
418 | Mat dst = _dst.getMat(); |
419 | |
420 | if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix()) |
421 | return false; //Process isolated borders only |
422 | vx_enum border; |
423 | switch (borderType & ~BORDER_ISOLATED) |
424 | { |
425 | case BORDER_CONSTANT: |
426 | border = VX_BORDER_CONSTANT; |
427 | break; |
428 | case BORDER_REPLICATE: |
429 | border = VX_BORDER_REPLICATE; |
430 | break; |
431 | default: |
432 | return false; |
433 | } |
434 | |
435 | try |
436 | { |
437 | ivx::Context ctx = ovx::getOpenVXContext(); |
438 | |
439 | Mat a; |
440 | if (dst.data != src.data) |
441 | a = src; |
442 | else |
443 | src.copyTo(a); |
444 | |
445 | ivx::Image |
446 | ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, |
447 | ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), |
448 | ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, |
449 | ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); |
450 | |
451 | //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments |
452 | //since OpenVX standard says nothing about thread-safety for now |
453 | ivx::border_t prevBorder = ctx.immediateBorder(); |
454 | ctx.setImmediateBorder(border, (vx_uint8)(0)); |
455 | ivx::IVX_CHECK_STATUS(vxuGaussian3x3(ctx, ia, ib)); |
456 | ctx.setImmediateBorder(prevBorder); |
457 | } |
458 | catch (const ivx::RuntimeError & e) |
459 | { |
460 | VX_DbgThrow(e.what()); |
461 | } |
462 | catch (const ivx::WrapperError & e) |
463 | { |
464 | VX_DbgThrow(e.what()); |
465 | } |
466 | return true; |
467 | } |
468 | |
469 | #endif |
470 | |
471 | #ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option |
472 | |
473 | #define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1 |
474 | #define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1 |
475 | #define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1 |
476 | |
477 | // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling |
478 | #if IPP_VERSION_X100 < 201900 |
479 | #define IPP_GAUSSIANBLUR_PARALLEL 0 |
480 | #else |
481 | #define IPP_GAUSSIANBLUR_PARALLEL 1 |
482 | #endif |
483 | |
484 | #ifdef HAVE_IPP_IW |
485 | |
486 | class ipp_gaussianBlurParallel: public ParallelLoopBody |
487 | { |
488 | public: |
489 | ipp_gaussianBlurParallel(::ipp::IwiImage &src, ::ipp::IwiImage &dst, int kernelSize, float sigma, ::ipp::IwiBorderType &border, bool *pOk): |
490 | m_src(src), m_dst(dst), m_kernelSize(kernelSize), m_sigma(sigma), m_border(border), m_pOk(pOk) { |
491 | *m_pOk = true; |
492 | } |
493 | ~ipp_gaussianBlurParallel() |
494 | { |
495 | } |
496 | |
497 | virtual void operator() (const Range& range) const CV_OVERRIDE |
498 | { |
499 | CV_INSTRUMENT_REGION_IPP(); |
500 | |
501 | if(!*m_pOk) |
502 | return; |
503 | |
504 | try |
505 | { |
506 | ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); |
507 | CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile); |
508 | } |
509 | catch(const ::ipp::IwException &) |
510 | { |
511 | *m_pOk = false; |
512 | return; |
513 | } |
514 | } |
515 | private: |
516 | ::ipp::IwiImage &m_src; |
517 | ::ipp::IwiImage &m_dst; |
518 | |
519 | int m_kernelSize; |
520 | float m_sigma; |
521 | ::ipp::IwiBorderType &m_border; |
522 | |
523 | volatile bool *m_pOk; |
524 | const ipp_gaussianBlurParallel& operator= (const ipp_gaussianBlurParallel&); |
525 | }; |
526 | |
527 | #endif |
528 | |
529 | static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize, |
530 | double sigma1, double sigma2, int borderType ) |
531 | { |
532 | #ifdef HAVE_IPP_IW |
533 | CV_INSTRUMENT_REGION_IPP(); |
534 | |
535 | #if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) |
536 | CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); |
537 | return false; // bug on ia32 |
538 | #else |
539 | if(sigma1 != sigma2) |
540 | return false; |
541 | |
542 | if(sigma1 < FLT_EPSILON) |
543 | return false; |
544 | |
545 | if(ksize.width != ksize.height) |
546 | return false; |
547 | |
548 | // Acquire data and begin processing |
549 | try |
550 | { |
551 | ::ipp::IwiImage iwSrc = ippiGetImage(src); |
552 | ::ipp::IwiImage iwDst = ippiGetImage(dst); |
553 | ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); |
554 | ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); |
555 | if(!ippBorder) |
556 | return false; |
557 | |
558 | const int threads = ippiSuggestThreadsNum(iwDst, 2); |
559 | |
560 | if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25)) |
561 | return false; |
562 | if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4)) |
563 | return false; |
564 | if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4)) |
565 | return false; |
566 | |
567 | if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1 && iwSrc.m_size.height/(threads * 4) >= ksize.height/2) { |
568 | bool ok; |
569 | ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); |
570 | |
571 | if(!ok) |
572 | return false; |
573 | const Range range(0, (int) iwDst.m_size.height); |
574 | parallel_for_(range, invoker, threads*4); |
575 | |
576 | if(!ok) |
577 | return false; |
578 | } else { |
579 | CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder); |
580 | } |
581 | } |
582 | catch (const ::ipp::IwException &) |
583 | { |
584 | return false; |
585 | } |
586 | |
587 | return true; |
588 | #endif |
589 | #else |
590 | CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); |
591 | return false; |
592 | #endif |
593 | } |
594 | #endif |
595 | |
596 | template<typename T> |
597 | static bool validateGaussianBlurKernel(std::vector<T>& kernel) |
598 | { |
599 | softdouble validation_sum = softdouble::zero(); |
600 | for (size_t i = 0; i < kernel.size(); i++) |
601 | { |
602 | validation_sum += softdouble((double)kernel[i]); |
603 | } |
604 | |
605 | bool isValid = validation_sum == softdouble::one(); |
606 | return isValid; |
607 | } |
608 | |
609 | void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, |
610 | double sigma1, double sigma2, |
611 | int borderType, AlgorithmHint hint) |
612 | { |
613 | CV_INSTRUMENT_REGION(); |
614 | |
615 | if (hint == cv::ALGO_HINT_DEFAULT) |
616 | hint = cv::getDefaultAlgorithmHint(); |
617 | |
618 | CV_Assert(!_src.empty()); |
619 | |
620 | int type = _src.type(); |
621 | Size size = _src.size(); |
622 | _dst.create( sz: size, type ); |
623 | |
624 | if( (borderType & ~BORDER_ISOLATED) != BORDER_CONSTANT && |
625 | ((borderType & BORDER_ISOLATED) != 0 || !_src.getMat().isSubmatrix()) ) |
626 | { |
627 | if( size.height == 1 ) |
628 | ksize.height = 1; |
629 | if( size.width == 1 ) |
630 | ksize.width = 1; |
631 | } |
632 | |
633 | if( ksize.width == 1 && ksize.height == 1 ) |
634 | { |
635 | _src.copyTo(arr: _dst); |
636 | return; |
637 | } |
638 | |
639 | if (sigma2 <= 0) |
640 | sigma2 = sigma1; |
641 | |
642 | bool useOpenCL = ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 && |
643 | _src.rows() >= ksize.height && _src.cols() >= ksize.width && |
644 | ksize.width > 1 && ksize.height > 1; |
645 | CV_UNUSED(useOpenCL); |
646 | |
647 | int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
648 | |
649 | Mat kx, ky; |
650 | createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); |
651 | |
652 | CV_OCL_RUN(useOpenCL && sdepth == CV_8U && |
653 | ((ksize.width == 3 && ksize.height == 3) || |
654 | (ksize.width == 5 && ksize.height == 5)), |
655 | ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kernelX: kx, kernelY: ky, borderType) |
656 | ); |
657 | |
658 | if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix())) |
659 | { |
660 | std::vector<ufixedpoint16> fkx, fky; |
661 | createGaussianKernels(kx&: fkx, ky&: fky, type, ksize, sigma1, sigma2); |
662 | |
663 | static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool(name: "OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS" , defaultValue: false); |
664 | if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fkx)) |
665 | { |
666 | CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); |
667 | } |
668 | else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fky)) |
669 | { |
670 | CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); |
671 | } |
672 | else |
673 | { |
674 | CV_OCL_RUN(useOpenCL, |
675 | ocl_sepFilter2D_BitExact(_src, _dst, ddepth: sdepth, |
676 | ksize, |
677 | fkx: (const uint16_t*)&fkx[0], fky: (const uint16_t*)&fky[0], |
678 | anchor: Point(-1, -1), delta: 0, borderType, |
679 | shift_bits: 8/*shift_bits*/) |
680 | ); |
681 | |
682 | Mat src = _src.getMat(); |
683 | Mat dst = _dst.getMat(); |
684 | |
685 | if (src.data == dst.data) |
686 | src = src.clone(); |
687 | |
688 | if ((sigma1 == 0.0) && (sigma2 == 0.0) && (ksize.height == ksize.width)) |
689 | { |
690 | Point ofs; |
691 | Size wsz(src.cols, src.rows); |
692 | Mat src2 = src; |
693 | if(!(borderType & BORDER_ISOLATED)) |
694 | src2.locateROI( wholeSize&: wsz, ofs ); |
695 | |
696 | CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn, |
697 | ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, |
698 | borderType & ~BORDER_ISOLATED); |
699 | } |
700 | |
701 | if (hint == ALGO_HINT_APPROX) |
702 | { |
703 | Point ofs; |
704 | Size wsz(src.cols, src.rows); |
705 | if(!(borderType & BORDER_ISOLATED)) |
706 | src.locateROI( wholeSize&: wsz, ofs ); |
707 | |
708 | CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, |
709 | ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, |
710 | sigma1, sigma2, borderType & ~BORDER_ISOLATED); |
711 | |
712 | #ifdef ENABLE_IPP_GAUSSIAN_BLUR |
713 | // IPP is not bit-exact to OpenCV implementation |
714 | CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); |
715 | #endif |
716 | CV_OVX_RUN(true, |
717 | openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) |
718 | } |
719 | |
720 | CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType), |
721 | CV_CPU_DISPATCH_MODES_ALL); |
722 | |
723 | return; |
724 | } |
725 | } |
726 | if(sdepth == CV_16U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix())) |
727 | { |
728 | CV_LOG_INFO(NULL, "GaussianBlur: running bit-exact version..." ); |
729 | |
730 | std::vector<ufixedpoint32> fkx, fky; |
731 | createGaussianKernels(kx&: fkx, ky&: fky, type, ksize, sigma1, sigma2); |
732 | |
733 | static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool(name: "OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS" , defaultValue: false); |
734 | if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fkx)) |
735 | { |
736 | CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); |
737 | } |
738 | else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fky)) |
739 | { |
740 | CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); |
741 | } |
742 | else |
743 | { |
744 | // TODO: implement ocl_sepFilter2D_BitExact -- how to deal with bdepth? |
745 | // CV_OCL_RUN(useOpenCL, |
746 | // ocl_sepFilter2D_BitExact(_src, _dst, sdepth, |
747 | // ksize, |
748 | // (const uint32_t*)&fkx[0], (const uint32_t*)&fky[0], |
749 | // Point(-1, -1), 0, borderType, |
750 | // 16/*shift_bits*/) |
751 | // ); |
752 | |
753 | Mat src = _src.getMat(); |
754 | Mat dst = _dst.getMat(); |
755 | |
756 | if (src.data == dst.data) |
757 | src = src.clone(); |
758 | |
759 | if ((sigma1 == 0.0) && (sigma2 == 0.0) && (ksize.height == ksize.width)) |
760 | { |
761 | Point ofs; |
762 | Size wsz(src.cols, src.rows); |
763 | Mat src2 = src; |
764 | if(!(borderType & BORDER_ISOLATED)) |
765 | src2.locateROI( wholeSize&: wsz, ofs ); |
766 | |
767 | CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn, |
768 | ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); |
769 | } |
770 | |
771 | if (hint == ALGO_HINT_APPROX) |
772 | { |
773 | Point ofs; |
774 | Size wsz(src.cols, src.rows); |
775 | if(!(borderType & BORDER_ISOLATED)) |
776 | src.locateROI( wholeSize&: wsz, ofs ); |
777 | |
778 | CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, |
779 | ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, |
780 | sigma1, sigma2, borderType & ~BORDER_ISOLATED); |
781 | |
782 | #ifdef ENABLE_IPP_GAUSSIAN_BLUR |
783 | // IPP is not bit-exact to OpenCV implementation |
784 | CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); |
785 | #endif |
786 | CV_OVX_RUN(true, |
787 | openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) |
788 | } |
789 | |
790 | CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType), |
791 | CV_CPU_DISPATCH_MODES_ALL); |
792 | |
793 | return; |
794 | } |
795 | } |
796 | |
797 | #ifdef HAVE_OPENCL |
798 | if (useOpenCL) |
799 | { |
800 | sepFilter2D(src: _src, dst: _dst, ddepth: sdepth, kernelX: kx, kernelY: ky, anchor: Point(-1, -1), delta: 0, borderType); |
801 | return; |
802 | } |
803 | #endif |
804 | |
805 | Mat src = _src.getMat(); |
806 | Mat dst = _dst.getMat(); |
807 | |
808 | Point ofs; |
809 | Size wsz(src.cols, src.rows); |
810 | if(!(borderType & BORDER_ISOLATED)) |
811 | src.locateROI( wholeSize&: wsz, ofs ); |
812 | |
813 | CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, |
814 | ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, |
815 | sigma1, sigma2, borderType & ~BORDER_ISOLATED); |
816 | |
817 | CV_OVX_RUN(true, |
818 | openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) |
819 | |
820 | #if defined ENABLE_IPP_GAUSSIAN_BLUR |
821 | // IPP is not bit-exact to OpenCV implementation |
822 | CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); |
823 | #endif |
824 | |
825 | sepFilter2D(src, dst, ddepth: sdepth, kernelX: kx, kernelY: ky, anchor: Point(-1, -1), delta: 0, borderType); |
826 | } |
827 | |
828 | } // namespace |
829 | |
830 | ////////////////////////////////////////////////////////////////////////////////////////// |
831 | |
832 | CV_IMPL void |
833 | cvSmooth( const void* srcarr, void* dstarr, int smooth_type, |
834 | int param1, int param2, double param3, double param4 ) |
835 | { |
836 | cv::Mat src = cv::cvarrToMat(arr: srcarr), dst0 = cv::cvarrToMat(arr: dstarr), dst = dst0; |
837 | |
838 | CV_Assert( dst.size() == src.size() && |
839 | (smooth_type == CV_BLUR_NO_SCALE || dst.type() == src.type()) ); |
840 | |
841 | if( param2 <= 0 ) |
842 | param2 = param1; |
843 | |
844 | if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ) |
845 | cv::boxFilter( src, dst, ddepth: dst.depth(), ksize: cv::Size(param1, param2), anchor: cv::Point(-1,-1), |
846 | normalize: smooth_type == CV_BLUR, borderType: cv::BORDER_REPLICATE ); |
847 | else if( smooth_type == CV_GAUSSIAN ) |
848 | cv::GaussianBlur( src: src, dst: dst, ksize: cv::Size(param1, param2), sigma1: param3, sigma2: param4, borderType: cv::BORDER_REPLICATE ); |
849 | else if( smooth_type == CV_MEDIAN ) |
850 | cv::medianBlur( src, dst, ksize: param1 ); |
851 | else |
852 | cv::bilateralFilter( src, dst, d: param1, sigmaColor: param3, sigmaSpace: param4, borderType: cv::BORDER_REPLICATE ); |
853 | |
854 | if( dst.data != dst0.data ) |
855 | CV_Error( cv::Error::StsUnmatchedFormats, "The destination image does not have the proper type" ); |
856 | } |
857 | |
858 | /* End of file. */ |
859 | |