| 1 | // This file is part of OpenCV project. |
| 2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
| 3 | // of this distribution and at http://opencv.org/license.html |
| 4 | |
| 5 | |
| 6 | #include "precomp.hpp" |
| 7 | #include "opencl_kernels_core.hpp" |
| 8 | #include "stat.hpp" |
| 9 | |
| 10 | #include "norm.simd.hpp" |
| 11 | #include "norm.simd_declarations.hpp" |
| 12 | |
| 13 | /****************************************************************************************\ |
| 14 | * norm * |
| 15 | \****************************************************************************************/ |
| 16 | |
| 17 | namespace cv { namespace hal { |
| 18 | |
| 19 | extern const uchar popCountTable[256] = |
| 20 | { |
| 21 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
| 22 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 23 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 24 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 25 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 26 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 27 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 28 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 |
| 29 | }; |
| 30 | |
| 31 | static const uchar popCountTable2[] = |
| 32 | { |
| 33 | 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, |
| 34 | 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, |
| 35 | 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, |
| 36 | 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, |
| 37 | 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, |
| 38 | 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, |
| 39 | 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, |
| 40 | 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4 |
| 41 | }; |
| 42 | |
| 43 | static const uchar popCountTable4[] = |
| 44 | { |
| 45 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 46 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 47 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 48 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 49 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 50 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 51 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 52 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 |
| 53 | }; |
| 54 | |
| 55 | |
| 56 | int normHamming(const uchar* a, int n, int cellSize) |
| 57 | { |
| 58 | int output; |
| 59 | CALL_HAL_RET(normHamming8u, cv_hal_normHamming8u, output, a, n, cellSize); |
| 60 | |
| 61 | if( cellSize == 1 ) |
| 62 | return normHamming(a, n); |
| 63 | const uchar* tab = 0; |
| 64 | if( cellSize == 2 ) |
| 65 | tab = popCountTable2; |
| 66 | else if( cellSize == 4 ) |
| 67 | tab = popCountTable4; |
| 68 | else |
| 69 | return -1; |
| 70 | int i = 0; |
| 71 | int result = 0; |
| 72 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
| 73 | v_uint64 t = vx_setzero_u64(); |
| 74 | if ( cellSize == 2) |
| 75 | { |
| 76 | v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x55)); |
| 77 | for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes()) |
| 78 | { |
| 79 | v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i)); |
| 80 | t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a0, b: v_shr<1>(a: a0)), b: mask)))); |
| 81 | } |
| 82 | } |
| 83 | else // cellSize == 4 |
| 84 | { |
| 85 | v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x11)); |
| 86 | for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes()) |
| 87 | { |
| 88 | v_uint16 a0 = v_reinterpret_as_u16(a: vx_load(ptr: a + i)); |
| 89 | v_uint16 a1 = v_or(a: a0, b: v_shr<2>(a: a0)); |
| 90 | t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: a1, b: v_shr<1>(a: a1)), b: mask)))); |
| 91 | |
| 92 | } |
| 93 | } |
| 94 | result += (int)v_reduce_sum(a: t); |
| 95 | vx_cleanup(); |
| 96 | #elif CV_ENABLE_UNROLLED |
| 97 | for( ; i <= n - 4; i += 4 ) |
| 98 | result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]]; |
| 99 | #endif |
| 100 | for( ; i < n; i++ ) |
| 101 | result += tab[a[i]]; |
| 102 | return result; |
| 103 | } |
| 104 | |
| 105 | int normHamming(const uchar* a, const uchar* b, int n, int cellSize) |
| 106 | { |
| 107 | int output; |
| 108 | CALL_HAL_RET(normHammingDiff8u, cv_hal_normHammingDiff8u, output, a, b, n, cellSize); |
| 109 | |
| 110 | if( cellSize == 1 ) |
| 111 | return normHamming(a, b, n); |
| 112 | const uchar* tab = 0; |
| 113 | if( cellSize == 2 ) |
| 114 | tab = popCountTable2; |
| 115 | else if( cellSize == 4 ) |
| 116 | tab = popCountTable4; |
| 117 | else |
| 118 | return -1; |
| 119 | int i = 0; |
| 120 | int result = 0; |
| 121 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
| 122 | v_uint64 t = vx_setzero_u64(); |
| 123 | if ( cellSize == 2) |
| 124 | { |
| 125 | v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x55)); |
| 126 | for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes()) |
| 127 | { |
| 128 | v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i))); |
| 129 | t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab0, b: v_shr<1>(a: ab0)), b: mask)))); |
| 130 | } |
| 131 | } |
| 132 | else // cellSize == 4 |
| 133 | { |
| 134 | v_uint16 mask = v_reinterpret_as_u16(a: vx_setall_u8(v: 0x11)); |
| 135 | for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes()) |
| 136 | { |
| 137 | v_uint16 ab0 = v_reinterpret_as_u16(a: v_xor(a: vx_load(ptr: a + i), b: vx_load(ptr: b + i))); |
| 138 | v_uint16 ab1 = v_or(a: ab0, b: v_shr<2>(a: ab0)); |
| 139 | t = v_add(a: t, b: v_popcount(a: v_reinterpret_as_u64(a: v_and(a: v_or(a: ab1, b: v_shr<1>(a: ab1)), b: mask)))); |
| 140 | } |
| 141 | } |
| 142 | result += (int)v_reduce_sum(a: t); |
| 143 | vx_cleanup(); |
| 144 | #elif CV_ENABLE_UNROLLED |
| 145 | for( ; i <= n - 4; i += 4 ) |
| 146 | result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] + |
| 147 | tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]]; |
| 148 | #endif |
| 149 | for( ; i < n; i++ ) |
| 150 | result += tab[a[i] ^ b[i]]; |
| 151 | return result; |
| 152 | } |
| 153 | |
| 154 | float normL2Sqr_(const float* a, const float* b, int n) |
| 155 | { |
| 156 | int j = 0; float d = 0.f; |
| 157 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
| 158 | v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); |
| 159 | v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); |
| 160 | for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes()) |
| 161 | { |
| 162 | v_float32 t0 = v_sub(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)); |
| 163 | v_float32 t1 = v_sub(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes())); |
| 164 | v_d0 = v_muladd(a: t0, b: t0, c: v_d0); |
| 165 | v_float32 t2 = v_sub(a: vx_load(ptr: a + j + 2 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_float32>::vlanes())); |
| 166 | v_d1 = v_muladd(a: t1, b: t1, c: v_d1); |
| 167 | v_float32 t3 = v_sub(a: vx_load(ptr: a + j + 3 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_float32>::vlanes())); |
| 168 | v_d2 = v_muladd(a: t2, b: t2, c: v_d2); |
| 169 | v_d3 = v_muladd(a: t3, b: t3, c: v_d3); |
| 170 | } |
| 171 | d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3)); |
| 172 | #endif |
| 173 | for( ; j < n; j++ ) |
| 174 | { |
| 175 | float t = a[j] - b[j]; |
| 176 | d += t*t; |
| 177 | } |
| 178 | return d; |
| 179 | } |
| 180 | |
| 181 | |
| 182 | float normL1_(const float* a, const float* b, int n) |
| 183 | { |
| 184 | int j = 0; float d = 0.f; |
| 185 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
| 186 | v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); |
| 187 | v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); |
| 188 | for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes()) |
| 189 | { |
| 190 | v_d0 = v_add(a: v_d0, b: v_absdiff(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j))); |
| 191 | v_d1 = v_add(a: v_d1, b: v_absdiff(a: vx_load(ptr: a + j + VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_float32>::vlanes()))); |
| 192 | v_d2 = v_add(a: v_d2, b: v_absdiff(a: vx_load(ptr: a + j + 2 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_float32>::vlanes()))); |
| 193 | v_d3 = v_add(a: v_d3, b: v_absdiff(a: vx_load(ptr: a + j + 3 * VTraits<v_float32>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_float32>::vlanes()))); |
| 194 | } |
| 195 | d = v_reduce_sum(a: v_add(a: v_add(a: v_add(a: v_d0, b: v_d1), b: v_d2), b: v_d3)); |
| 196 | #endif |
| 197 | for( ; j < n; j++ ) |
| 198 | d += std::abs(x: a[j] - b[j]); |
| 199 | return d; |
| 200 | } |
| 201 | |
| 202 | int normL1_(const uchar* a, const uchar* b, int n) |
| 203 | { |
| 204 | int j = 0, d = 0; |
| 205 | #if (CV_SIMD || CV_SIMD_SCALABLE) |
| 206 | for (; j <= n - 4 * VTraits<v_uint8>::vlanes(); j += 4 * VTraits<v_uint8>::vlanes()) |
| 207 | d += v_reduce_sad(a: vx_load(ptr: a + j), b: vx_load(ptr: b + j)) + |
| 208 | v_reduce_sad(a: vx_load(ptr: a + j + VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + VTraits<v_uint8>::vlanes())) + |
| 209 | v_reduce_sad(a: vx_load(ptr: a + j + 2 * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + 2 * VTraits<v_uint8>::vlanes())) + |
| 210 | v_reduce_sad(a: vx_load(ptr: a + j + 3 * VTraits<v_uint8>::vlanes()), b: vx_load(ptr: b + j + 3 * VTraits<v_uint8>::vlanes())); |
| 211 | #endif |
| 212 | for( ; j < n; j++ ) |
| 213 | d += std::abs(x: a[j] - b[j]); |
| 214 | return d; |
| 215 | } |
| 216 | |
| 217 | } //cv::hal |
| 218 | |
| 219 | //================================================================================================== |
| 220 | |
| 221 | typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int); |
| 222 | typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int); |
| 223 | |
| 224 | #ifdef HAVE_OPENCL |
| 225 | |
| 226 | static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result ) |
| 227 | { |
| 228 | const ocl::Device & d = ocl::Device::getDefault(); |
| 229 | |
| 230 | #ifdef __ANDROID__ |
| 231 | if (d.isNVidia()) |
| 232 | return false; |
| 233 | #endif |
| 234 | const int cn = _src.channels(); |
| 235 | if (cn > 4) |
| 236 | return false; |
| 237 | int type = _src.type(), depth = CV_MAT_DEPTH(type); |
| 238 | bool doubleSupport = d.doubleFPConfig() > 0, |
| 239 | haveMask = _mask.kind() != _InputArray::NONE; |
| 240 | |
| 241 | if (depth >= CV_16F) |
| 242 | return false; // TODO: support FP16 |
| 243 | |
| 244 | if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) || |
| 245 | (!doubleSupport && depth == CV_64F)) |
| 246 | return false; |
| 247 | |
| 248 | UMat src = _src.getUMat(); |
| 249 | |
| 250 | if (normType == NORM_INF) |
| 251 | { |
| 252 | if (!ocl_minMaxIdx(_src, NULL, maxVal: &result, NULL, NULL, _mask, |
| 253 | ddepth: std::max(a: depth, CV_32S), absValues: depth != CV_8U && depth != CV_16U)) |
| 254 | return false; |
| 255 | } |
| 256 | else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) |
| 257 | { |
| 258 | Scalar sc; |
| 259 | bool unstype = depth == CV_8U || depth == CV_16U; |
| 260 | |
| 261 | if ( !ocl_sum(src: haveMask ? src : src.reshape(cn: 1), res&: sc, sum_op: normType == NORM_L2 || normType == NORM_L2SQR ? |
| 262 | OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) ) |
| 263 | return false; |
| 264 | |
| 265 | double s = 0.0; |
| 266 | for (int i = 0; i < (haveMask ? cn : 1); ++i) |
| 267 | s += sc[i]; |
| 268 | |
| 269 | result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(x: s); |
| 270 | } |
| 271 | |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | #endif |
| 276 | |
| 277 | static NormFunc getNormFunc(int normType, int depth) { |
| 278 | CV_INSTRUMENT_REGION(); |
| 279 | CV_CPU_DISPATCH(getNormFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL); |
| 280 | } |
| 281 | static NormDiffFunc getNormDiffFunc(int normType, int depth) { |
| 282 | CV_INSTRUMENT_REGION(); |
| 283 | CV_CPU_DISPATCH(getNormDiffFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL); |
| 284 | } |
| 285 | |
| 286 | double norm( InputArray _src, int normType, InputArray _mask ) |
| 287 | { |
| 288 | CV_INSTRUMENT_REGION(); |
| 289 | |
| 290 | normType &= NORM_TYPE_MASK; |
| 291 | CV_Assert( normType == NORM_INF || normType == NORM_L1 || |
| 292 | normType == NORM_L2 || normType == NORM_L2SQR || |
| 293 | ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) ); |
| 294 | |
| 295 | #if defined HAVE_OPENCL |
| 296 | double _result = 0; |
| 297 | #endif |
| 298 | |
| 299 | #ifdef HAVE_OPENCL |
| 300 | CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, |
| 301 | ocl_norm(_src, normType, _mask, result&: _result), |
| 302 | _result) |
| 303 | #endif |
| 304 | |
| 305 | Mat src = _src.getMat(), mask = _mask.getMat(); |
| 306 | int depth = src.depth(), cn = src.channels(); |
| 307 | if( src.dims <= 2 ) |
| 308 | { |
| 309 | double result; |
| 310 | CALL_HAL_RET(norm, cv_hal_norm, result, src.data, src.step, mask.data, mask.step, src.cols, src.rows, src.type(), normType); |
| 311 | } |
| 312 | else if( src.isContinuous() && mask.isContinuous() ) |
| 313 | { |
| 314 | double result; |
| 315 | CALL_HAL_RET(norm, cv_hal_norm, result, src.data, 0, mask.data, 0, (int)src.total(), 1, src.type(), normType); |
| 316 | } |
| 317 | |
| 318 | NormFunc func = getNormFunc(normType: normType >> 1, depth: depth == CV_16F ? CV_32F : depth); |
| 319 | CV_Assert( (normType >> 1) >= 3 || func != 0 ); |
| 320 | |
| 321 | if( src.isContinuous() && mask.empty() ) |
| 322 | { |
| 323 | size_t len = src.total()*cn; |
| 324 | if( len == (size_t)(int)len ) |
| 325 | { |
| 326 | if( depth == CV_32F ) |
| 327 | { |
| 328 | const uchar* data = src.ptr<const uchar>(); |
| 329 | |
| 330 | if( normType == NORM_L2 || normType == NORM_L2SQR || normType == NORM_L1 ) |
| 331 | { |
| 332 | double result = 0; |
| 333 | func(data, 0, (uchar*)&result, (int)len, 1); |
| 334 | return normType == NORM_L2 ? std::sqrt(x: result) : result; |
| 335 | } |
| 336 | if( normType == NORM_INF ) |
| 337 | { |
| 338 | float result = 0; |
| 339 | func(data, 0, (uchar*)&result, (int)len, 1); |
| 340 | return result; |
| 341 | } |
| 342 | } |
| 343 | if( depth == CV_8U ) |
| 344 | { |
| 345 | const uchar* data = src.ptr<uchar>(); |
| 346 | |
| 347 | if( normType == NORM_HAMMING ) |
| 348 | { |
| 349 | return hal::normHamming(a: data, n: (int)len, cellSize: 1); |
| 350 | } |
| 351 | |
| 352 | if( normType == NORM_HAMMING2 ) |
| 353 | { |
| 354 | return hal::normHamming(a: data, n: (int)len, cellSize: 2); |
| 355 | } |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | CV_Assert( mask.empty() || mask.type() == CV_8U ); |
| 361 | |
| 362 | if( normType == NORM_HAMMING || normType == NORM_HAMMING2 ) |
| 363 | { |
| 364 | if( !mask.empty() ) |
| 365 | { |
| 366 | Mat temp; |
| 367 | bitwise_and(src1: src, src2: mask, dst: temp); |
| 368 | return norm(src: temp, normType); |
| 369 | } |
| 370 | int cellSize = normType == NORM_HAMMING ? 1 : 2; |
| 371 | |
| 372 | const Mat* arrays[] = {&src, 0}; |
| 373 | uchar* ptrs[1] = {}; |
| 374 | NAryMatIterator it(arrays, ptrs); |
| 375 | int total = (int)it.size; |
| 376 | int result = 0; |
| 377 | |
| 378 | for( size_t i = 0; i < it.nplanes; i++, ++it ) |
| 379 | { |
| 380 | result += hal::normHamming(a: ptrs[0], n: total, cellSize); |
| 381 | } |
| 382 | |
| 383 | return result; |
| 384 | } |
| 385 | |
| 386 | const Mat* arrays[] = {&src, &mask, 0}; |
| 387 | uchar* ptrs[2] = {}; |
| 388 | union |
| 389 | { |
| 390 | double d; |
| 391 | int i; |
| 392 | float f; |
| 393 | } |
| 394 | result; |
| 395 | result.d = 0; |
| 396 | NAryMatIterator it(arrays, ptrs); |
| 397 | CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "" ); |
| 398 | |
| 399 | if ((normType == NORM_L1 && depth <= CV_16S) || |
| 400 | ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S)) |
| 401 | { |
| 402 | // special case to handle "integer" overflow in accumulator |
| 403 | const size_t esz = src.elemSize(); |
| 404 | const int total = (int)it.size; |
| 405 | const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; |
| 406 | const int blockSize = std::min(a: total, b: intSumBlockSize); |
| 407 | int isum = 0; |
| 408 | int count = 0; |
| 409 | |
| 410 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 411 | { |
| 412 | for (int j = 0; j < total; j += blockSize) |
| 413 | { |
| 414 | int bsz = std::min(a: total - j, b: blockSize); |
| 415 | func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn); |
| 416 | count += bsz; |
| 417 | if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) |
| 418 | { |
| 419 | result.d += isum; |
| 420 | isum = 0; |
| 421 | count = 0; |
| 422 | } |
| 423 | ptrs[0] += bsz*esz; |
| 424 | if (ptrs[1]) |
| 425 | ptrs[1] += bsz; |
| 426 | } |
| 427 | } |
| 428 | } |
| 429 | else if (depth == CV_16F) |
| 430 | { |
| 431 | const size_t esz = src.elemSize(); |
| 432 | const int total = (int)it.size; |
| 433 | const int blockSize = std::min(a: total, b: divUp(a: 1024, b: cn)); |
| 434 | AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn); |
| 435 | float* data0 = fltbuf.data(); |
| 436 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 437 | { |
| 438 | for (int j = 0; j < total; j += blockSize) |
| 439 | { |
| 440 | int bsz = std::min(a: total - j, b: blockSize); |
| 441 | hal::cvt16f32f(src: (const hfloat*)ptrs[0], dst: data0, len: bsz * cn); |
| 442 | func((uchar*)data0, ptrs[1], (uchar*)&result.f, bsz, cn); |
| 443 | ptrs[0] += bsz*esz; |
| 444 | if (ptrs[1]) |
| 445 | ptrs[1] += bsz; |
| 446 | } |
| 447 | } |
| 448 | } |
| 449 | else |
| 450 | { |
| 451 | // generic implementation |
| 452 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 453 | { |
| 454 | func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn); |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | if( normType == NORM_INF ) |
| 459 | { |
| 460 | if(depth == CV_64F) |
| 461 | return result.d; |
| 462 | else if (depth == CV_32F || depth == CV_16F) |
| 463 | return result.f; |
| 464 | else |
| 465 | return result.i; |
| 466 | } |
| 467 | else if( normType == NORM_L2 ) |
| 468 | return std::sqrt(x: result.d); |
| 469 | |
| 470 | return result.d; |
| 471 | } |
| 472 | |
| 473 | //================================================================================================== |
| 474 | |
| 475 | #ifdef HAVE_OPENCL |
| 476 | static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) |
| 477 | { |
| 478 | #ifdef __ANDROID__ |
| 479 | if (ocl::Device::getDefault().isNVidia()) |
| 480 | return false; |
| 481 | #endif |
| 482 | |
| 483 | Scalar sc1, sc2; |
| 484 | int cn = _src1.channels(); |
| 485 | if (cn > 4) |
| 486 | return false; |
| 487 | int type = _src1.type(), depth = CV_MAT_DEPTH(type); |
| 488 | bool relative = (normType & NORM_RELATIVE) != 0; |
| 489 | normType &= ~NORM_RELATIVE; |
| 490 | bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; |
| 491 | |
| 492 | #ifdef __APPLE__ |
| 493 | if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty()) |
| 494 | return false; |
| 495 | #endif |
| 496 | |
| 497 | if (normsum) |
| 498 | { |
| 499 | if (!ocl_sum(src: _src1, res&: sc1, sum_op: normType == NORM_L2 || normType == NORM_L2SQR ? |
| 500 | OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, calc2: relative, res2: sc2)) |
| 501 | return false; |
| 502 | } |
| 503 | else |
| 504 | { |
| 505 | if (!ocl_minMaxIdx(src: _src1, NULL, maxVal: &sc1[0], NULL, NULL, _mask, ddepth: std::max(CV_32S, b: depth), |
| 506 | absValues: false, _src2, maxVal2: relative ? &sc2[0] : NULL)) |
| 507 | return false; |
| 508 | cn = 1; |
| 509 | } |
| 510 | |
| 511 | double s2 = 0; |
| 512 | for (int i = 0; i < cn; ++i) |
| 513 | { |
| 514 | result += sc1[i]; |
| 515 | if (relative) |
| 516 | s2 += sc2[i]; |
| 517 | } |
| 518 | |
| 519 | if (normType == NORM_L2) |
| 520 | { |
| 521 | result = std::sqrt(x: result); |
| 522 | if (relative) |
| 523 | s2 = std::sqrt(x: s2); |
| 524 | } |
| 525 | |
| 526 | if (relative) |
| 527 | result /= (s2 + DBL_EPSILON); |
| 528 | |
| 529 | return true; |
| 530 | } // ocl_norm() |
| 531 | #endif // HAVE_OPENCL |
| 532 | |
| 533 | double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) |
| 534 | { |
| 535 | CV_INSTRUMENT_REGION(); |
| 536 | |
| 537 | CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch" ); |
| 538 | CV_Assert(_src1.sameSize(_src2)); |
| 539 | |
| 540 | #if defined HAVE_OPENCL |
| 541 | double _result = 0; |
| 542 | #endif |
| 543 | |
| 544 | #ifdef HAVE_OPENCL |
| 545 | CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()), |
| 546 | ocl_norm(_src1, _src2, normType, _mask, result&: _result), |
| 547 | _result) |
| 548 | #endif |
| 549 | |
| 550 | Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); |
| 551 | int depth = src1.depth(), cn = src1.channels(); |
| 552 | if( src1.dims <= 2 ) |
| 553 | { |
| 554 | double result; |
| 555 | CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, src1.step, src2.data, src2.step, mask.data, mask.step, src1.cols, src1.rows, src1.type(), normType); |
| 556 | } |
| 557 | else if( src1.isContinuous() && src2.isContinuous() && mask.isContinuous() ) |
| 558 | { |
| 559 | double result; |
| 560 | CALL_HAL_RET(normDiff, cv_hal_normDiff, result, src1.data, 0, src2.data, 0, mask.data, 0, (int)src1.total(), 1, src1.type(), normType); |
| 561 | } |
| 562 | |
| 563 | if( normType & CV_RELATIVE ) |
| 564 | { |
| 565 | return norm(_src1, _src2, normType: normType & ~CV_RELATIVE, _mask)/(norm(src: _src2, normType, _mask) + DBL_EPSILON); |
| 566 | } |
| 567 | |
| 568 | normType &= 7; |
| 569 | CV_Assert( normType == NORM_INF || normType == NORM_L1 || |
| 570 | normType == NORM_L2 || normType == NORM_L2SQR || |
| 571 | ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); |
| 572 | |
| 573 | NormDiffFunc func = getNormDiffFunc(normType: normType >> 1, depth: depth == CV_16F ? CV_32F : depth); |
| 574 | CV_Assert( (normType >> 1) >= 3 || func != 0 ); |
| 575 | |
| 576 | if( src1.isContinuous() && src2.isContinuous() && mask.empty() ) |
| 577 | { |
| 578 | size_t len = src1.total()*src1.channels(); |
| 579 | if( len == (size_t)(int)len ) |
| 580 | { |
| 581 | if( src1.depth() == CV_32F ) |
| 582 | { |
| 583 | const uchar* data1 = src1.ptr<const uchar>(); |
| 584 | const uchar* data2 = src2.ptr<const uchar>(); |
| 585 | |
| 586 | if( normType == NORM_L2 || normType == NORM_L2SQR || normType == NORM_L1 ) |
| 587 | { |
| 588 | double result = 0; |
| 589 | func(data1, data2, 0, (uchar*)&result, (int)len, 1); |
| 590 | return normType == NORM_L2 ? std::sqrt(x: result) : result; |
| 591 | } |
| 592 | if( normType == NORM_INF ) |
| 593 | { |
| 594 | float result = 0; |
| 595 | func(data1, data2, 0, (uchar*)&result, (int)len, 1); |
| 596 | return result; |
| 597 | } |
| 598 | } |
| 599 | } |
| 600 | } |
| 601 | |
| 602 | CV_Assert( mask.empty() || mask.type() == CV_8U ); |
| 603 | |
| 604 | if( normType == NORM_HAMMING || normType == NORM_HAMMING2 ) |
| 605 | { |
| 606 | if( !mask.empty() ) |
| 607 | { |
| 608 | Mat temp; |
| 609 | bitwise_xor(src1, src2, dst: temp); |
| 610 | bitwise_and(src1: temp, src2: mask, dst: temp); |
| 611 | return norm(src: temp, normType); |
| 612 | } |
| 613 | int cellSize = normType == NORM_HAMMING ? 1 : 2; |
| 614 | |
| 615 | const Mat* arrays[] = {&src1, &src2, 0}; |
| 616 | uchar* ptrs[2] = {}; |
| 617 | NAryMatIterator it(arrays, ptrs); |
| 618 | int total = (int)it.size; |
| 619 | int result = 0; |
| 620 | |
| 621 | for( size_t i = 0; i < it.nplanes; i++, ++it ) |
| 622 | { |
| 623 | result += hal::normHamming(a: ptrs[0], b: ptrs[1], n: total, cellSize); |
| 624 | } |
| 625 | |
| 626 | return result; |
| 627 | } |
| 628 | |
| 629 | const Mat* arrays[] = {&src1, &src2, &mask, 0}; |
| 630 | uchar* ptrs[3] = {}; |
| 631 | union |
| 632 | { |
| 633 | double d; |
| 634 | float f; |
| 635 | int i; |
| 636 | unsigned u; |
| 637 | } |
| 638 | result; |
| 639 | result.d = 0; |
| 640 | NAryMatIterator it(arrays, ptrs); |
| 641 | CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "" ); |
| 642 | |
| 643 | if ((normType == NORM_L1 && depth <= CV_16S) || |
| 644 | ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S)) |
| 645 | { |
| 646 | // special case to handle "integer" overflow in accumulator |
| 647 | const size_t esz = src1.elemSize(); |
| 648 | const int total = (int)it.size; |
| 649 | const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; |
| 650 | const int blockSize = std::min(a: total, b: intSumBlockSize); |
| 651 | int isum = 0; |
| 652 | int count = 0; |
| 653 | |
| 654 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 655 | { |
| 656 | for (int j = 0; j < total; j += blockSize) |
| 657 | { |
| 658 | int bsz = std::min(a: total - j, b: blockSize); |
| 659 | func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn); |
| 660 | count += bsz; |
| 661 | if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) |
| 662 | { |
| 663 | result.d += isum; |
| 664 | isum = 0; |
| 665 | count = 0; |
| 666 | } |
| 667 | ptrs[0] += bsz*esz; |
| 668 | ptrs[1] += bsz*esz; |
| 669 | if (ptrs[2]) |
| 670 | ptrs[2] += bsz; |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | else if (depth == CV_16F) |
| 675 | { |
| 676 | const size_t esz = src1.elemSize(); |
| 677 | const int total = (int)it.size; |
| 678 | const int blockSize = std::min(a: total, b: divUp(a: 512, b: cn)); |
| 679 | AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2); |
| 680 | float* data0 = fltbuf.data(); |
| 681 | float* data1 = fltbuf.data() + blockSize * cn; |
| 682 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 683 | { |
| 684 | for (int j = 0; j < total; j += blockSize) |
| 685 | { |
| 686 | int bsz = std::min(a: total - j, b: blockSize); |
| 687 | hal::cvt16f32f(src: (const hfloat*)ptrs[0], dst: data0, len: bsz * cn); |
| 688 | hal::cvt16f32f(src: (const hfloat*)ptrs[1], dst: data1, len: bsz * cn); |
| 689 | func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.f, bsz, cn); |
| 690 | ptrs[0] += bsz*esz; |
| 691 | ptrs[1] += bsz*esz; |
| 692 | if (ptrs[2]) |
| 693 | ptrs[2] += bsz; |
| 694 | } |
| 695 | } |
| 696 | } |
| 697 | else |
| 698 | { |
| 699 | // generic implementation |
| 700 | for (size_t i = 0; i < it.nplanes; i++, ++it) |
| 701 | { |
| 702 | func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn); |
| 703 | } |
| 704 | } |
| 705 | |
| 706 | if( normType == NORM_INF ) |
| 707 | { |
| 708 | if (depth == CV_64F) |
| 709 | return result.d; |
| 710 | else if (depth == CV_32F || depth == CV_16F) |
| 711 | return result.f; |
| 712 | else |
| 713 | return result.u; |
| 714 | } |
| 715 | else if( normType == NORM_L2 ) |
| 716 | return std::sqrt(x: result.d); |
| 717 | |
| 718 | return result.d; |
| 719 | } |
| 720 | |
| 721 | cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const |
| 722 | { |
| 723 | return cv::hal::normHamming(a, b, n: size); |
| 724 | } |
| 725 | |
| 726 | double PSNR(InputArray _src1, InputArray _src2, double R) |
| 727 | { |
| 728 | CV_INSTRUMENT_REGION(); |
| 729 | |
| 730 | //Input arrays must have depth CV_8U |
| 731 | CV_Assert( _src1.type() == _src2.type() ); |
| 732 | |
| 733 | double diff = std::sqrt(x: norm(_src1, _src2, normType: NORM_L2SQR)/(_src1.total()*_src1.channels())); |
| 734 | return 20*log10(x: R/(diff+DBL_EPSILON)); |
| 735 | } |
| 736 | |
| 737 | |
| 738 | #ifdef HAVE_OPENCL |
| 739 | static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, |
| 740 | double scale, double delta ) |
| 741 | { |
| 742 | UMat src = _src.getUMat(); |
| 743 | |
| 744 | if( _mask.empty() ) |
| 745 | src.convertTo( m: _dst, rtype: dtype, alpha: scale, beta: delta ); |
| 746 | else if (src.channels() <= 4) |
| 747 | { |
| 748 | const ocl::Device & dev = ocl::Device::getDefault(); |
| 749 | |
| 750 | int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), |
| 751 | ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, b: std::max(a: sdepth, b: ddepth)), |
| 752 | rowsPerWI = dev.isIntel() ? 4 : 1; |
| 753 | |
| 754 | float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta); |
| 755 | bool haveScale = std::fabs(x: scale - 1) > DBL_EPSILON, |
| 756 | haveZeroScale = !(std::fabs(x: scale) > DBL_EPSILON), |
| 757 | haveDelta = std::fabs(x: delta) > DBL_EPSILON, |
| 758 | doubleSupport = dev.doubleFPConfig() > 0; |
| 759 | |
| 760 | if (!haveScale && !haveDelta && stype == dtype) |
| 761 | { |
| 762 | _src.copyTo(arr: _dst, mask: _mask); |
| 763 | return true; |
| 764 | } |
| 765 | if (haveZeroScale) |
| 766 | { |
| 767 | _dst.setTo(value: Scalar(delta), mask: _mask); |
| 768 | return true; |
| 769 | } |
| 770 | |
| 771 | if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) |
| 772 | return false; |
| 773 | |
| 774 | char cvt[2][50]; |
| 775 | String opts = format(fmt: "-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" |
| 776 | " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s" , |
| 777 | ocl::typeToStr(t: stype), ocl::typeToStr(t: dtype), |
| 778 | ocl::convertTypeStr(sdepth, ddepth: wdepth, cn, buf: cvt[0], buf_size: sizeof(cvt[0])), cn, |
| 779 | rowsPerWI, ocl::convertTypeStr(sdepth: wdepth, ddepth, cn, buf: cvt[1], buf_size: sizeof(cvt[1])), |
| 780 | ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), |
| 781 | doubleSupport ? " -D DOUBLE_SUPPORT" : "" , |
| 782 | haveScale ? " -D HAVE_SCALE" : "" , |
| 783 | haveDelta ? " -D HAVE_DELTA" : "" , |
| 784 | ocl::typeToStr(t: sdepth), ocl::typeToStr(t: ddepth)); |
| 785 | |
| 786 | ocl::Kernel k("normalizek" , ocl::core::normalize_oclsrc, opts); |
| 787 | if (k.empty()) |
| 788 | return false; |
| 789 | |
| 790 | UMat mask = _mask.getUMat(), dst = _dst.getUMat(); |
| 791 | |
| 792 | ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(m: src), |
| 793 | maskarg = ocl::KernelArg::ReadOnlyNoSize(m: mask), |
| 794 | dstarg = ocl::KernelArg::ReadWrite(m: dst); |
| 795 | |
| 796 | if (haveScale) |
| 797 | { |
| 798 | if (haveDelta) |
| 799 | k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale, kernel_args: fdelta); |
| 800 | else |
| 801 | k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fscale); |
| 802 | } |
| 803 | else |
| 804 | { |
| 805 | if (haveDelta) |
| 806 | k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg, kernel_args: fdelta); |
| 807 | else |
| 808 | k.args(kernel_args: srcarg, kernel_args: maskarg, kernel_args: dstarg); |
| 809 | } |
| 810 | |
| 811 | size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; |
| 812 | return k.run(dims: 2, globalsize, NULL, sync: false); |
| 813 | } |
| 814 | else |
| 815 | { |
| 816 | UMat temp; |
| 817 | src.convertTo( m: temp, rtype: dtype, alpha: scale, beta: delta ); |
| 818 | temp.copyTo( m: _dst, mask: _mask ); |
| 819 | } |
| 820 | |
| 821 | return true; |
| 822 | } // ocl_normalize |
| 823 | #endif // HAVE_OPENCL |
| 824 | |
| 825 | void normalize(InputArray _src, InputOutputArray _dst, double a, double b, |
| 826 | int norm_type, int rtype, InputArray _mask) |
| 827 | { |
| 828 | CV_INSTRUMENT_REGION(); |
| 829 | |
| 830 | double scale = 1, shift = 0; |
| 831 | int type = _src.type(), depth = CV_MAT_DEPTH(type); |
| 832 | |
| 833 | if( rtype < 0 ) |
| 834 | rtype = _dst.fixedType() ? _dst.depth() : depth; |
| 835 | |
| 836 | if( norm_type == CV_MINMAX ) |
| 837 | { |
| 838 | double smin = 0, smax = 0; |
| 839 | double dmin = MIN( a, b ), dmax = MAX( a, b ); |
| 840 | minMaxIdx( src: _src, minVal: &smin, maxVal: &smax, minIdx: 0, maxIdx: 0, mask: _mask ); |
| 841 | scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); |
| 842 | if( rtype == CV_32F ) |
| 843 | { |
| 844 | scale = (float)scale; |
| 845 | shift = (float)dmin - (float)(smin*scale); |
| 846 | } |
| 847 | else |
| 848 | shift = dmin - smin*scale; |
| 849 | } |
| 850 | else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) |
| 851 | { |
| 852 | scale = norm( _src, normType: norm_type, _mask ); |
| 853 | scale = scale > DBL_EPSILON ? a/scale : 0.; |
| 854 | shift = 0; |
| 855 | } |
| 856 | else |
| 857 | CV_Error( cv::Error::StsBadArg, "Unknown/unsupported norm type" ); |
| 858 | |
| 859 | CV_OCL_RUN(_dst.isUMat(), |
| 860 | ocl_normalize(_src, _dst, _mask, dtype: rtype, scale, delta: shift)) |
| 861 | |
| 862 | Mat src = _src.getMat(); |
| 863 | if( _mask.empty() ) |
| 864 | src.convertTo( m: _dst, rtype, alpha: scale, beta: shift ); |
| 865 | else |
| 866 | { |
| 867 | Mat temp; |
| 868 | src.convertTo( m: temp, rtype, alpha: scale, beta: shift ); |
| 869 | temp.copyTo( m: _dst, mask: _mask ); |
| 870 | } |
| 871 | } |
| 872 | |
| 873 | } // namespace |
| 874 | |