smooth.dispatch.cpp source code [opencv/modules/imgproc/src/smooth.dispatch.cpp]

1	/M///////////////////////////////////////////////////////////////////////////////////////*
2	//
3	// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4	//
5	// By downloading, copying, installing or using the software you agree to this license.
6	// If you do not agree to this license, do not download, install,
7	// copy or use the software.
8	//
9	//
10	// License Agreement
11	// For Open Source Computer Vision Library
12	//
13	// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14	// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15	// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16	// Third party copyrights are property of their respective owners.
17	//
18	// Redistribution and use in source and binary forms, with or without modification,
19	// are permitted provided that the following conditions are met:
20	//
21	// Redistribution's of source code must retain the above copyright notice,*
22	// this list of conditions and the following disclaimer.
23	//
24	// Redistribution's in binary form must reproduce the above copyright notice,*
25	// this list of conditions and the following disclaimer in the documentation
26	// and/or other materials provided with the distribution.
27	//
28	// The name of the copyright holders may not be used to endorse or promote products*
29	// derived from this software without specific prior written permission.
30	//
31	// This software is provided by the copyright holders and contributors "as is" and
32	// any express or implied warranties, including, but not limited to, the implied
33	// warranties of merchantability and fitness for a particular purpose are disclaimed.
34	// In no event shall the Intel Corporation or contributors be liable for any direct,
35	// indirect, incidental, special, exemplary, or consequential damages
36	// (including, but not limited to, procurement of substitute goods or services;
37	// loss of use, data, or profits; or business interruption) however caused
38	// and on any theory of liability, whether in contract, strict liability,
39	// or tort (including negligence or otherwise) arising in any way out of
40	// the use of this software, even if advised of the possibility of such damage.
41	//
42	//M/*
43
44	#include "precomp.hpp"
45
46	#include <opencv2/core/utils/logger.hpp>
47
48	#include <opencv2/core/utils/configuration.private.hpp>
49
50	#include <vector>
51	#include <iostream>
52
53	#include "opencv2/core/hal/intrin.hpp"
54	#include "opencl_kernels_imgproc.hpp"
55
56	#include "opencv2/core/openvx/ovx_defs.hpp"
57
58	#include "filter.hpp"
59
60	#include "opencv2/core/softfloat.hpp"
61
62	namespace cv {
63	#include "fixedpoint.inl.hpp"
64	}
65
66	#include "smooth.simd.hpp"
67	#include "smooth.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
68
69	namespace cv {
70
71	/**************************************************************************************\
72	Gaussian Blur
73	\**************************************************************************************/
74
75	/**
76	* Bit-exact in terms of softfloat computations
77	*
78	* returns sum of kernel values. Should be equal to 1.0
79	*/
80	static
81	softdouble getGaussianKernelBitExact(std::vector<softdouble>& result, int n, double sigma)
82	{
83	CV_Assert(n > `0`);
84	//TODO: incorrect SURF implementation requests kernel with n = 20 (PATCH_SZ): https://github.com/opencv/opencv/issues/15856
85	//CV_Assert((n & 1) == 1); // odd
86
87	if (sigma <= `0`)
88	{
89	if (n == `1`)
90	{
91	result = std::vector<softdouble>(`1`, softdouble::one());
92	return softdouble::one();
93	}
94	else if (n == `3`)
95	{
96	softdouble v3[] = {
97	softdouble::fromRaw(a: `0x3fd0000000000000`), // 0.25
98	softdouble::fromRaw(a: `0x3fe0000000000000`), // 0.5
99	softdouble::fromRaw(a: `0x3fd0000000000000`) // 0.25
100	};
101	result.assign(first: v3, last: v3 + `3`);
102	return softdouble::one();
103	}
104	else if (n == `5`)
105	{
106	softdouble v5[] = {
107	softdouble::fromRaw(a: `0x3fb0000000000000`), // 0.0625
108	softdouble::fromRaw(a: `0x3fd0000000000000`), // 0.25
109	softdouble::fromRaw(a: `0x3fd8000000000000`), // 0.375
110	softdouble::fromRaw(a: `0x3fd0000000000000`), // 0.25
111	softdouble::fromRaw(a: `0x3fb0000000000000`) // 0.0625
112	};
113	result.assign(first: v5, last: v5 + `5`);
114	return softdouble::one();
115	}
116	else if (n == `7`)
117	{
118	softdouble v7[] = {
119	softdouble::fromRaw(a: `0x3fa0000000000000`), // 0.03125
120	softdouble::fromRaw(a: `0x3fbc000000000000`), // 0.109375
121	softdouble::fromRaw(a: `0x3fcc000000000000`), // 0.21875
122	softdouble::fromRaw(a: `0x3fd2000000000000`), // 0.28125
123	softdouble::fromRaw(a: `0x3fcc000000000000`), // 0.21875
124	softdouble::fromRaw(a: `0x3fbc000000000000`), // 0.109375
125	softdouble::fromRaw(a: `0x3fa0000000000000`) // 0.03125
126	};
127	result.assign(first: v7, last: v7 + `7`);
128	return softdouble::one();
129	}
130	else if (n == `9`)
131	{
132	softdouble v9[] = {
133	softdouble::fromRaw(a: `0x3f90000000000000`), // 4 / 256
134	softdouble::fromRaw(a: `0x3faa000000000000`), // 13 / 256
135	softdouble::fromRaw(a: `0x3fbe000000000000`), // 30 / 256
136	softdouble::fromRaw(a: `0x3fc9800000000000`), // 51 / 256
137	softdouble::fromRaw(a: `0x3fce000000000000`), // 60 / 256
138	softdouble::fromRaw(a: `0x3fc9800000000000`), // 51 / 256
139	softdouble::fromRaw(a: `0x3fbe000000000000`), // 30 / 256
140	softdouble::fromRaw(a: `0x3faa000000000000`), // 13 / 256
141	softdouble::fromRaw(a: `0x3f90000000000000`) // 4 / 256
142	};
143	result.assign(first: v9, last: v9 + `9`);
144	return softdouble::one();
145	}
146	}
147
148	softdouble sd_0_15 = softdouble::fromRaw(a: `0x3fc3333333333333`); // 0.15
149	softdouble sd_0_35 = softdouble::fromRaw(a: `0x3fd6666666666666`); // 0.35
150	softdouble sd_minus_0_125 = softdouble::fromRaw(a: `0xbfc0000000000000`); // -0.50.25*
151
152	softdouble sigmaX = sigma > `0` ? softdouble (sigma) : mulAdd(a: softdouble (n), b: sd_0_15, c: sd_0_35);// softdouble(((n-1)0.5 - 1)0.3 + 0.8)
153	softdouble scale2X = sd_minus_0_125 /(sigmaX *sigmaX);
154
155	int n2_ = (n - `1`) / `2`;
156	cv::AutoBuffer<softdouble> values(n2_ + `1`);
157	softdouble sum = softdouble::zero();
158	for (int i = `0`, x = `1` - n; i < n2_; i++, x+=`2`)
159	{
160	// x = i - (n - 1)0.5*
161	// t = std::exp(scale2Xxx)
162	softdouble t = exp(a: softdouble (xx)scale2X);
163	values [i] = t;
164	sum += t;
165	}
166	sum *= softdouble (`2`);
167	//values[n2_] = softdouble::one(); // x=0 in exp(softdouble(xx)scale2X);
168	sum += softdouble::one();
169	if ((n & `1`) == `0`)
170	{
171	//values[n2_ + 1] = softdouble::one();
172	sum += softdouble::one();
173	}
174
175	// normalize: sum(k[i]) = 1
176	softdouble mul1 = softdouble::one()/sum;
177
178	result.resize(new_size: n);
179
180	softdouble sum2 = softdouble::zero();
181	for (int i = `0`; i < n2_; i++ )
182	{
183	softdouble t = values [i] * mul1;
184	result [i] = t;
185	result [n - `1` - i] = t;
186	sum2 += t;
187	}
188	sum2 *= softdouble (`2`);
189	result [n2_] = /values[n2_]/ softdouble::one() * mul1;
190	sum2 += result [n2_];
191	if ((n & `1`) == `0`)
192	{
193	result [n2_ + `1`] = result [n2_];
194	sum2 += result [n2_];
195	}
196
197	return sum2;
198	}
199
200	Mat getGaussianKernel(int n, double sigma, int ktype)
201	{
202	CV_CheckDepth(ktype, ktype == CV_32F \|\| ktype == CV_64F, "");
203	Mat kernel(n, `1`, ktype);
204
205	std::vector<softdouble> kernel_bitexact;
206	getGaussianKernelBitExact(result&: kernel_bitexact, n, sigma);
207
208	if (ktype == CV_32F)
209	{
210	for (int i = `0`; i < n; i++)
211	kernel.at<float>(i0: i) = (float)kernel_bitexact [i];
212	}
213	else
214	{
215	CV_DbgAssert(ktype == CV_64F);
216	for (int i = `0`; i < n; i++)
217	kernel.at<double>(i0: i) = kernel_bitexact [i];
218	}
219
220	return kernel;
221	}
222
223	static
224	softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector<int64_t>& result, const std::vector<softdouble> kernel_bitexact, int fractionBits)
225	{
226	const int n = (int)kernel_bitexact.size();
227	CV_Assert((n & `1`) == `1`); // odd
228
229	CV_CheckGT(fractionBits, `0`, "");
230	CV_CheckLE(fractionBits, `32`, "");
231
232	int64_t fractionMultiplier = CV_BIG_INT(`1`) << fractionBits;
233	softdouble fractionMultiplier_sd(fractionMultiplier);
234
235	result.resize(new_size: n);
236
237	int n2_ = n / `2`; // n is odd
238	softdouble err = softdouble::zero();
239	int64_t sum = `0`;
240	for (int i = `0`; i < n2_; i++)
241	{
242	//softdouble err0 = err;
243	softdouble adj_v = kernel_bitexact [i] * fractionMultiplier_sd + err;
244	int64_t v0 = cvRound(a: adj_v); // cvFloor() provides bad results
245	err = adj_v - softdouble (v0);
246	//printf("%3d: adj_v=%8.3f(%8.3f+%8.3f) v0=%d ed_err=%8.3f\n", i, (double)adj_v, (double)(kernel_bitexact[i] fractionMultiplier_sd), (double)err0, (int)v0, (double)err);*
247
248	result [i] = v0;
249	result [n - `1` - i] = v0;
250	sum += v0;
251	}
252	sum *= `2`;
253	softdouble adj_v_center = kernel_bitexact [n2_] * fractionMultiplier_sd + err;
254	int64_t v_center = fractionMultiplier - sum;
255	result [n2_] = v_center;
256	//printf("center = %g ===> %g ===> %g\n", (double)(kernel_bitexact[n2_] fractionMultiplier), (double)adj_v_center, (double)v_center);*
257	return (adj_v_center - softdouble (v_center));
258	}
259
260	static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); }
261	template <typename FT> static void getGaussianKernel(int n, double sigma, int, std::vector<FT>& res)
262	{
263	std::vector<softdouble> res_sd;
264	softdouble s0 = getGaussianKernelBitExact(result&: res_sd, n, sigma);
265	CV_UNUSED(s0);
266
267	std::vector<int64_t> fixed_256;
268	softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, FT::fixedShift);
269	CV_UNUSED(approx_err);
270
271	res.resize(n);
272	for (int i = `0`; i < n; i++)
273	{
274	res[i] = FT::fromRaw((typename FT::raw_t)fixed_256 [i]);
275	//printf("%03d: %d\n", i, res[i].raw());
276	}
277	}
278
279	template <typename T>
280	static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize,
281	double sigma1, double sigma2 )
282	{
283	int depth = CV_MAT_DEPTH(type);
284	if( sigma2 <= `0` )
285	sigma2 = sigma1;
286
287	// automatic detection of kernel size from sigma
288	if( ksize.width <= `0` && sigma1 > `0` )
289	ksize.width = cvRound(value: sigma1(depth == CV_8U ? `3` : `4`)`2` + `1`)\|`1`;
290	if( ksize.height <= `0` && sigma2 > `0` )
291	ksize.height = cvRound(value: sigma2(depth == CV_8U ? `3` : `4`)`2` + `1`)\|`1`;
292
293	CV_Assert( ksize.width > `0` && ksize.width % `2` == `1` &&
294	ksize.height > `0` && ksize.height % `2` == `1` );
295
296	sigma1 = std::max( a: sigma1, b: `0.` );
297	sigma2 = std::max( a: sigma2, b: `0.` );
298
299	getGaussianKernel( ksize.width, sigma1, std::max(a: depth, CV_32F), kx );
300	if( ksize.height == ksize.width && std::abs(x: sigma1 - sigma2) < DBL_EPSILON )
301	ky = kx;
302	else
303	getGaussianKernel( ksize.height, sigma2, std::max(a: depth, CV_32F), ky );
304	}
305
306	Ptr<FilterEngine> createGaussianFilter( int type, Size ksize,
307	double sigma1, double sigma2,
308	int borderType )
309	{
310	Mat kx, ky;
311	createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
312
313	return createSeparableLinearFilter( srcType: type, dstType: type, rowKernel: kx, columnKernel: ky, anchor: Point (-`1`,-`1`), delta: `0`, rowBorderType: borderType );
314	}
315
316	#ifdef HAVE_OPENCL
317
318	static bool ocl_GaussianBlur_8UC1(InputArray _src, OutputArray _dst, Size ksize, int ddepth,
319	InputArray _kernelX, InputArray _kernelY, int borderType)
320	{
321	const ocl::Device & dev = ocl::Device::getDefault();
322	int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
323
324	if ( !(dev.isIntel() && (type == CV_8UC1) &&
325	(_src.offset() == `0`) && (_src.step() % `4` == `0`) &&
326	((ksize.width == `5` && (_src.cols() % `4` == `0`)) \|\|
327	(ksize.width == `3` && (_src.cols() % `16` == `0`) && (_src.rows() % `2` == `0`)))) )
328	return false;
329
330	Mat kernelX = _kernelX.getMat().reshape(cn: `1`, rows: `1`);
331	if (kernelX.cols % `2` != `1`)
332	return false;
333	Mat kernelY = _kernelY.getMat().reshape(cn: `1`, rows: `1`);
334	if (kernelY.cols % `2` != `1`)
335	return false;
336
337	if (ddepth < `0`)
338	ddepth = sdepth;
339
340	Size size = _src.size();
341	size_t globalsize[`2`] = { `0`, `0` };
342	size_t localsize[`2`] = { `0`, `0` };
343
344	if (ksize.width == `3`)
345	{
346	globalsize[`0`] = size.width / `16`;
347	globalsize[`1`] = size.height / `2`;
348	}
349	else if (ksize.width == `5`)
350	{
351	globalsize[`0`] = size.width / `4`;
352	globalsize[`1`] = size.height / `1`;
353	}
354
355	const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", `0`, "BORDER_REFLECT_101" };
356	char build_opts[`1024`];
357	snprintf(s: build_opts, maxlen: sizeof(build_opts), format: "-D %s %s%s", borderMap[borderType & ~BORDER_ISOLATED],
358	ocl::kernelToStr(kernel: kernelX, CV_32F, name: "KERNEL_MATRIX_X").c_str(),
359	ocl::kernelToStr(kernel: kernelY, CV_32F, name: "KERNEL_MATRIX_Y").c_str());
360
361	ocl::Kernel kernel;
362
363	if (ksize.width == `3`)
364	kernel.create("gaussianBlur3x3_8UC1_cols16_rows2", cv::ocl::imgproc::gaussianBlur3x3_oclsrc, build_opts);
365	else if (ksize.width == `5`)
366	kernel.create("gaussianBlur5x5_8UC1_cols4", cv::ocl::imgproc::gaussianBlur5x5_oclsrc, build_opts);
367
368	if (kernel.empty())
369	return false;
370
371	UMat src = _src.getUMat();
372	_dst.create(sz: size, CV_MAKETYPE(ddepth, cn));
373	if (!(_dst.offset() == `0` && _dst.step() % `4` == `0`))
374	return false;
375	UMat dst = _dst.getUMat();
376
377	int idxArg = kernel.set(i: `0`, arg: ocl::KernelArg::PtrReadOnly(m: src));
378	idxArg = kernel.set(i: idxArg, value: (int)src.step);
379	idxArg = kernel.set(i: idxArg, arg: ocl::KernelArg::PtrWriteOnly(m: dst));
380	idxArg = kernel.set(i: idxArg, value: (int)dst.step);
381	idxArg = kernel.set(i: idxArg, value: (int)dst.rows);
382	idxArg = kernel.set(i: idxArg, value: (int)dst.cols);
383
384	return kernel.run(dims: `2`, globalsize, localsize: (localsize[`0`] == `0`) ? NULL : localsize, sync: false);
385	}
386
387	#endif
388
389	#ifdef HAVE_OPENVX
390
391	namespace ovx {
392	template <> inline bool skipSmallImages<VX_KERNEL_GAUSSIAN_3x3>(int w, int h) { return wh < `320` `240`; }
393	}
394	static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
395	double sigma1, double sigma2, int borderType)
396	{
397	if (sigma2 <= `0`)
398	sigma2 = sigma1;
399	// automatic detection of kernel size from sigma
400	if (ksize.width <= `0` && sigma1 > `0`)
401	ksize.width = cvRound(sigma1*`6` + `1`) \| `1`;
402	if (ksize.height <= `0` && sigma2 > `0`)
403	ksize.height = cvRound(sigma2*`6` + `1`) \| `1`;
404
405	if (_src.type() != CV_8UC1 \|\|
406	_src.cols() < `3` \|\| _src.rows() < `3` \|\|
407	ksize.width != `3` \|\| ksize.height != `3`)
408	return false;
409
410	sigma1 = std::max(sigma1, `0.`);
411	sigma2 = std::max(sigma2, `0.`);
412
413	if (!(sigma1 == `0.0` \|\| (sigma1 - `0.8`) < DBL_EPSILON) \|\| !(sigma2 == `0.0` \|\| (sigma2 - `0.8`) < DBL_EPSILON) \|\|
414	ovx::skipSmallImages<VX_KERNEL_GAUSSIAN_3x3>(_src.cols(), _src.rows()))
415	return false;
416
417	Mat src = _src.getMat();
418	Mat dst = _dst.getMat();
419
420	if ((borderType & BORDER_ISOLATED) == `0` && src.isSubmatrix())
421	return false; //Process isolated borders only
422	vx_enum border;
423	switch (borderType & ~BORDER_ISOLATED)
424	{
425	case BORDER_CONSTANT:
426	border = VX_BORDER_CONSTANT;
427	break;
428	case BORDER_REPLICATE:
429	border = VX_BORDER_REPLICATE;
430	break;
431	default:
432	return false;
433	}
434
435	try
436	{
437	ivx::Context ctx = ovx::getOpenVXContext();
438
439	Mat a;
440	if (dst.data != src.data)
441	a = src;
442	else
443	src.copyTo(a);
444
445	ivx::Image
446	ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
447	ivx::Image::createAddressing(a.cols, a.rows, `1`, (vx_int32)(a.step)), a.data),
448	ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
449	ivx::Image::createAddressing(dst.cols, dst.rows, `1`, (vx_int32)(dst.step)), dst.data);
450
451	//ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
452	//since OpenVX standard says nothing about thread-safety for now
453	ivx::border_t prevBorder = ctx.immediateBorder();
454	ctx.setImmediateBorder(border, (vx_uint8)(`0`));
455	ivx::IVX_CHECK_STATUS(vxuGaussian3x3(ctx, ia, ib));
456	ctx.setImmediateBorder(prevBorder);
457	}
458	catch (const ivx::RuntimeError & e)
459	{
460	VX_DbgThrow(e.what());
461	}
462	catch (const ivx::WrapperError & e)
463	{
464	VX_DbgThrow(e.what());
465	}
466	return true;
467	}
468
469	#endif
470
471	#ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
472
473	#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
474	#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
475	#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1
476
477	// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
478	#if IPP_VERSION_X100 < 201900
479	#define IPP_GAUSSIANBLUR_PARALLEL 0
480	#else
481	#define IPP_GAUSSIANBLUR_PARALLEL 1
482	#endif
483
484	#ifdef HAVE_IPP_IW
485
486	class ipp_gaussianBlurParallel: public ParallelLoopBody
487	{
488	public:
489	ipp_gaussianBlurParallel(::ipp::IwiImage &src, ::ipp::IwiImage &dst, int kernelSize, float sigma, ::ipp::IwiBorderType &border, bool *pOk):
490	m_src(src), m_dst(dst), m_kernelSize(kernelSize), m_sigma(sigma), m_border(border), m_pOk(pOk) {
491	m_pOk = true*;
492	}
493	~ipp_gaussianBlurParallel()
494	{
495	}
496
497	virtual void operator() (const Range& range) const CV_OVERRIDE
498	{
499	CV_INSTRUMENT_REGION_IPP();
500
501	if(!*m_pOk)
502	return;
503
504	try
505	{
506	::ipp::IwiTile tile = ::ipp::IwiRoi(`0`, range.start, m_dst.m_size.width, range.end - range.start);
507	CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile);
508	}
509	catch(const ::ipp::IwException &)
510	{
511	m_pOk = false*;
512	return;
513	}
514	}
515	private:
516	::ipp::IwiImage &m_src;
517	::ipp::IwiImage &m_dst;
518
519	int m_kernelSize;
520	float m_sigma;
521	::ipp::IwiBorderType &m_border;
522
523	volatile bool *m_pOk;
524	const ipp_gaussianBlurParallel& operator= (const ipp_gaussianBlurParallel&);
525	};
526
527	#endif
528
529	static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize,
530	double sigma1, double sigma2, int borderType )
531	{
532	#ifdef HAVE_IPP_IW
533	CV_INSTRUMENT_REGION_IPP();
534
535	#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) \|\| (defined __GNUC__ && defined __i386__))
536	CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
537	return false; // bug on ia32
538	#else
539	if(sigma1 != sigma2)
540	return false;
541
542	if(sigma1 < FLT_EPSILON)
543	return false;
544
545	if(ksize.width != ksize.height)
546	return false;
547
548	// Acquire data and begin processing
549	try
550	{
551	::ipp::IwiImage iwSrc = ippiGetImage(src);
552	::ipp::IwiImage iwDst = ippiGetImage(dst);
553	::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
554	::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
555	if(!ippBorder)
556	return false;
557
558	const int threads = ippiSuggestThreadsNum(iwDst, `2`);
559
560	if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == `1` && ksize.width > `25`))
561	return false;
562	if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == `1` && src.type() == CV_16SC4))
563	return false;
564	if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == `1` && src.type() == CV_32FC4))
565	return false;
566
567	if(IPP_GAUSSIANBLUR_PARALLEL && threads > `1` && iwSrc.m_size.height/(threads * `4`) >= ksize.height/`2`) {
568	bool ok;
569	ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
570
571	if(!ok)
572	return false;
573	const Range range(`0`, (int) iwDst.m_size.height);
574	parallel_for_(range, invoker, threads*`4`);
575
576	if(!ok)
577	return false;
578	} else {
579	CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder);
580	}
581	}
582	catch (const ::ipp::IwException &)
583	{
584	return false;
585	}
586
587	return true;
588	#endif
589	#else
590	CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
591	return false;
592	#endif
593	}
594	#endif
595
596	template<typename T>
597	static bool validateGaussianBlurKernel(std::vector<T>& kernel)
598	{
599	softdouble validation_sum = softdouble::zero();
600	for (size_t i = `0`; i < kernel.size(); i++)
601	{
602	validation_sum += softdouble((double)kernel[i]);
603	}
604
605	bool isValid = validation_sum == softdouble::one();
606	return isValid;
607	}
608
609	void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
610	double sigma1, double sigma2,
611	int borderType, AlgorithmHint hint)
612	{
613	CV_INSTRUMENT_REGION();
614
615	if (hint == cv::ALGO_HINT_DEFAULT)
616	hint = cv::getDefaultAlgorithmHint();
617
618	CV_Assert(!_src.empty());
619
620	int type = _src.type();
621	Size size = _src.size();
622	_dst.create( sz: size, type );
623
624	if( (borderType & ~BORDER_ISOLATED) != BORDER_CONSTANT &&
625	((borderType & BORDER_ISOLATED) != `0` \|\| !_src.getMat().isSubmatrix()) )
626	{
627	if( size.height == `1` )
628	ksize.height = `1`;
629	if( size.width == `1` )
630	ksize.width = `1`;
631	}
632
633	if( ksize.width == `1` && ksize.height == `1` )
634	{
635	_src.copyTo(arr: _dst);
636	return;
637	}
638
639	if (sigma2 <= `0`)
640	sigma2 = sigma1;
641
642	bool useOpenCL = ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= `2` &&
643	_src.rows() >= ksize.height && _src.cols() >= ksize.width &&
644	ksize.width > `1` && ksize.height > `1`;
645	CV_UNUSED(useOpenCL);
646
647	int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
648
649	Mat kx, ky;
650	createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
651
652	CV_OCL_RUN(useOpenCL && sdepth == CV_8U &&
653	((ksize.width == `3` && ksize.height == `3`) \|\|
654	(ksize.width == `5` && ksize.height == `5`)),
655	ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kernelX: kx, kernelY: ky, borderType)
656	);
657
658	if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) \|\| !_src.isSubmatrix()))
659	{
660	std::vector<ufixedpoint16> fkx, fky;
661	createGaussianKernels(kx&: fkx, ky&: fky, type, ksize, sigma1, sigma2);
662
663	static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool(name: "OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", defaultValue: false);
664	if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fkx))
665	{
666	CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d (sigma1, sigma2));
667	}
668	else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fky))
669	{
670	CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d (sigma1, sigma2));
671	}
672	else
673	{
674	CV_OCL_RUN(useOpenCL,
675	ocl_sepFilter2D_BitExact(_src, _dst, ddepth: sdepth,
676	ksize,
677	fkx: (const uint16_t)&fkx [`0`], fky: (const* uint16_t*)&fky [`0`],
678	anchor: Point (-`1`, -`1`), delta: `0`, borderType,
679	shift_bits: `8`/shift_bits/)
680	);
681
682	Mat src = _src.getMat();
683	Mat dst = _dst.getMat();
684
685	if (src.data == dst.data)
686	src = src.clone();
687
688	if ((sigma1 == `0.0`) && (sigma2 == `0.0`) && (ksize.height == ksize.width))
689	{
690	Point ofs;
691	Size wsz(src.cols, src.rows);
692	Mat src2 = src;
693	if(!(borderType & BORDER_ISOLATED))
694	src2.locateROI( wholeSize&: wsz, ofs );
695
696	CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
697	ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width,
698	borderType & ~BORDER_ISOLATED);
699	}
700
701	if (hint == ALGO_HINT_APPROX)
702	{
703	Point ofs;
704	Size wsz(src.cols, src.rows);
705	if(!(borderType & BORDER_ISOLATED))
706	src.locateROI( wholeSize&: wsz, ofs );
707
708	CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
709	ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
710	sigma1, sigma2, borderType & ~BORDER_ISOLATED);
711
712	#ifdef ENABLE_IPP_GAUSSIAN_BLUR
713	// IPP is not bit-exact to OpenCV implementation
714	CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
715	#endif
716	CV_OVX_RUN(true,
717	openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
718	}
719
720	CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t)&fkx[`0`], (int)fkx.size(), (const* uint16_t)&fky[`0`], (int*)fky.size(), borderType),
721	CV_CPU_DISPATCH_MODES_ALL);
722
723	return;
724	}
725	}
726	if(sdepth == CV_16U && ((borderType & BORDER_ISOLATED) \|\| !_src.isSubmatrix()))
727	{
728	CV_LOG_INFO(NULL, "GaussianBlur: running bit-exact version...");
729
730	std::vector<ufixedpoint32> fkx, fky;
731	createGaussianKernels(kx&: fkx, ky&: fky, type, ksize, sigma1, sigma2);
732
733	static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool(name: "OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", defaultValue: false);
734	if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fkx))
735	{
736	CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d (sigma1, sigma2));
737	}
738	else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(kernel&: fky))
739	{
740	CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d (sigma1, sigma2));
741	}
742	else
743	{
744	// TODO: implement ocl_sepFilter2D_BitExact -- how to deal with bdepth?
745	// CV_OCL_RUN(useOpenCL,
746	// ocl_sepFilter2D_BitExact(_src, _dst, sdepth,
747	// ksize,
748	// (const uint32_t)&fkx[0], (const uint32_t)&fky[0],
749	// Point(-1, -1), 0, borderType,
750	// 16/shift_bits/)
751	// );
752
753	Mat src = _src.getMat();
754	Mat dst = _dst.getMat();
755
756	if (src.data == dst.data)
757	src = src.clone();
758
759	if ((sigma1 == `0.0`) && (sigma2 == `0.0`) && (ksize.height == ksize.width))
760	{
761	Point ofs;
762	Size wsz(src.cols, src.rows);
763	Mat src2 = src;
764	if(!(borderType & BORDER_ISOLATED))
765	src2.locateROI( wholeSize&: wsz, ofs );
766
767	CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
768	ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
769	}
770
771	if (hint == ALGO_HINT_APPROX)
772	{
773	Point ofs;
774	Size wsz(src.cols, src.rows);
775	if(!(borderType & BORDER_ISOLATED))
776	src.locateROI( wholeSize&: wsz, ofs );
777
778	CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
779	ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
780	sigma1, sigma2, borderType & ~BORDER_ISOLATED);
781
782	#ifdef ENABLE_IPP_GAUSSIAN_BLUR
783	// IPP is not bit-exact to OpenCV implementation
784	CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
785	#endif
786	CV_OVX_RUN(true,
787	openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
788	}
789
790	CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t)&fkx[`0`], (int)fkx.size(), (const* uint32_t)&fky[`0`], (int*)fky.size(), borderType),
791	CV_CPU_DISPATCH_MODES_ALL);
792
793	return;
794	}
795	}
796
797	#ifdef HAVE_OPENCL
798	if (useOpenCL)
799	{
800	sepFilter2D(src: _src, dst: _dst, ddepth: sdepth, kernelX: kx, kernelY: ky, anchor: Point (-`1`, -`1`), delta: `0`, borderType);
801	return;
802	}
803	#endif
804
805	Mat src = _src.getMat();
806	Mat dst = _dst.getMat();
807
808	Point ofs;
809	Size wsz(src.cols, src.rows);
810	if(!(borderType & BORDER_ISOLATED))
811	src.locateROI( wholeSize&: wsz, ofs );
812
813	CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
814	ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
815	sigma1, sigma2, borderType & ~BORDER_ISOLATED);
816
817	CV_OVX_RUN(true,
818	openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
819
820	#if defined ENABLE_IPP_GAUSSIAN_BLUR
821	// IPP is not bit-exact to OpenCV implementation
822	CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
823	#endif
824
825	sepFilter2D(src, dst, ddepth: sdepth, kernelX: kx, kernelY: ky, anchor: Point (-`1`, -`1`), delta: `0`, borderType);
826	}
827
828	} // namespace
829
830	//////////////////////////////////////////////////////////////////////////////////////////
831
832	CV_IMPL void
833	cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
834	int param1, int param2, double param3, double param4 )
835	{
836	cv::Mat src = cv::cvarrToMat(arr: srcarr), dst0 = cv::cvarrToMat(arr: dstarr), dst = dst0;
837
838	CV_Assert( dst.size() == src.size() &&
839	(smooth_type == CV_BLUR_NO_SCALE \|\| dst.type() == src.type()) );
840
841	if( param2 <= `0` )
842	param2 = param1;
843
844	if( smooth_type == CV_BLUR \|\| smooth_type == CV_BLUR_NO_SCALE )
845	cv::boxFilter( src, dst, ddepth: dst.depth(), ksize: cv::Size (param1, param2), anchor: cv::Point (-`1`,-`1`),
846	normalize: smooth_type == CV_BLUR, borderType: cv::BORDER_REPLICATE );
847	else if( smooth_type == CV_GAUSSIAN )
848	cv::GaussianBlur( src: src, dst: dst, ksize: cv::Size (param1, param2), sigma1: param3, sigma2: param4, borderType: cv::BORDER_REPLICATE );
849	else if( smooth_type == CV_MEDIAN )
850	cv::medianBlur( src, dst, ksize: param1 );
851	else
852	cv::bilateralFilter( src, dst, d: param1, sigmaColor: param3, sigmaSpace: param4, borderType: cv::BORDER_REPLICATE );
853
854	if( dst.data != dst0.data )
855	CV_Error( cv::Error::StsUnmatchedFormats, "The destination image does not have the proper type" );
856	}
857
858	/ End of file. /
859

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of opencv/modules/imgproc/src/smooth.dispatch.cpp