| 1 | // Copyright (c) 2019-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | cfg_if::cfg_if! { |
| 11 | if #[cfg(nasm_x86_64)] { |
| 12 | pub use crate::asm::x86::dist::*; |
| 13 | } else if #[cfg(asm_neon)] { |
| 14 | pub use crate::asm::aarch64::dist::*; |
| 15 | } else { |
| 16 | pub use self::rust::*; |
| 17 | } |
| 18 | } |
| 19 | |
| 20 | pub(crate) mod rust { |
| 21 | use crate::activity::apply_ssim_boost; |
| 22 | use crate::cpu_features::CpuFeatureLevel; |
| 23 | use crate::tiling::*; |
| 24 | use crate::util::*; |
| 25 | |
| 26 | use crate::encoder::IMPORTANCE_BLOCK_SIZE; |
| 27 | use crate::rdo::DistortionScale; |
| 28 | |
| 29 | /// Compute the sum of absolute differences over a block. |
| 30 | /// w and h can be at most 128, the size of the largest block. |
| 31 | pub fn get_sad<T: Pixel>( |
| 32 | plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, |
| 33 | h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, |
| 34 | ) -> u32 { |
| 35 | debug_assert!(w <= 128 && h <= 128); |
| 36 | let plane_org = |
| 37 | plane_org.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
| 38 | let plane_ref = |
| 39 | plane_ref.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
| 40 | |
| 41 | plane_org |
| 42 | .rows_iter() |
| 43 | .zip(plane_ref.rows_iter()) |
| 44 | .map(|(src, dst)| { |
| 45 | src |
| 46 | .iter() |
| 47 | .zip(dst) |
| 48 | .map(|(&p1, &p2)| i32::cast_from(p1).abs_diff(i32::cast_from(p2))) |
| 49 | .sum::<u32>() |
| 50 | }) |
| 51 | .sum() |
| 52 | } |
| 53 | |
| 54 | #[inline (always)] |
| 55 | const fn butterfly(a: i32, b: i32) -> (i32, i32) { |
| 56 | ((a + b), (a - b)) |
| 57 | } |
| 58 | |
| 59 | #[inline (always)] |
| 60 | #[allow (clippy::identity_op, clippy::erasing_op)] |
| 61 | fn hadamard4_1d< |
| 62 | const LEN: usize, |
| 63 | const N: usize, |
| 64 | const STRIDE0: usize, |
| 65 | const STRIDE1: usize, |
| 66 | >( |
| 67 | data: &mut [i32; LEN], |
| 68 | ) { |
| 69 | for i in 0..N { |
| 70 | let sub: &mut [i32] = &mut data[i * STRIDE0..]; |
| 71 | let (a0, a1) = butterfly(sub[0 * STRIDE1], sub[1 * STRIDE1]); |
| 72 | let (a2, a3) = butterfly(sub[2 * STRIDE1], sub[3 * STRIDE1]); |
| 73 | let (b0, b2) = butterfly(a0, a2); |
| 74 | let (b1, b3) = butterfly(a1, a3); |
| 75 | sub[0 * STRIDE1] = b0; |
| 76 | sub[1 * STRIDE1] = b1; |
| 77 | sub[2 * STRIDE1] = b2; |
| 78 | sub[3 * STRIDE1] = b3; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | #[inline (always)] |
| 83 | #[allow (clippy::identity_op, clippy::erasing_op)] |
| 84 | fn hadamard8_1d< |
| 85 | const LEN: usize, |
| 86 | const N: usize, |
| 87 | const STRIDE0: usize, |
| 88 | const STRIDE1: usize, |
| 89 | >( |
| 90 | data: &mut [i32; LEN], |
| 91 | ) { |
| 92 | for i in 0..N { |
| 93 | let sub: &mut [i32] = &mut data[i * STRIDE0..]; |
| 94 | |
| 95 | let (a0, a1) = butterfly(sub[0 * STRIDE1], sub[1 * STRIDE1]); |
| 96 | let (a2, a3) = butterfly(sub[2 * STRIDE1], sub[3 * STRIDE1]); |
| 97 | let (a4, a5) = butterfly(sub[4 * STRIDE1], sub[5 * STRIDE1]); |
| 98 | let (a6, a7) = butterfly(sub[6 * STRIDE1], sub[7 * STRIDE1]); |
| 99 | |
| 100 | let (b0, b2) = butterfly(a0, a2); |
| 101 | let (b1, b3) = butterfly(a1, a3); |
| 102 | let (b4, b6) = butterfly(a4, a6); |
| 103 | let (b5, b7) = butterfly(a5, a7); |
| 104 | |
| 105 | let (c0, c4) = butterfly(b0, b4); |
| 106 | let (c1, c5) = butterfly(b1, b5); |
| 107 | let (c2, c6) = butterfly(b2, b6); |
| 108 | let (c3, c7) = butterfly(b3, b7); |
| 109 | |
| 110 | sub[0 * STRIDE1] = c0; |
| 111 | sub[1 * STRIDE1] = c1; |
| 112 | sub[2 * STRIDE1] = c2; |
| 113 | sub[3 * STRIDE1] = c3; |
| 114 | sub[4 * STRIDE1] = c4; |
| 115 | sub[5 * STRIDE1] = c5; |
| 116 | sub[6 * STRIDE1] = c6; |
| 117 | sub[7 * STRIDE1] = c7; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | #[inline (always)] |
| 122 | fn hadamard2d<const LEN: usize, const W: usize, const H: usize>( |
| 123 | data: &mut [i32; LEN], |
| 124 | ) { |
| 125 | /*Vertical transform.*/ |
| 126 | let vert_func = if H == 4 { |
| 127 | hadamard4_1d::<LEN, W, 1, H> |
| 128 | } else { |
| 129 | hadamard8_1d::<LEN, W, 1, H> |
| 130 | }; |
| 131 | vert_func(data); |
| 132 | /*Horizontal transform.*/ |
| 133 | let horz_func = if W == 4 { |
| 134 | hadamard4_1d::<LEN, H, W, 1> |
| 135 | } else { |
| 136 | hadamard8_1d::<LEN, H, W, 1> |
| 137 | }; |
| 138 | horz_func(data); |
| 139 | } |
| 140 | |
| 141 | // SAFETY: The length of data must be 16. |
| 142 | unsafe fn hadamard4x4(data: &mut [i32]) { |
| 143 | hadamard2d::<{ 4 * 4 }, 4, 4>(&mut *(data.as_mut_ptr() as *mut [i32; 16])); |
| 144 | } |
| 145 | |
| 146 | // SAFETY: The length of data must be 64. |
| 147 | unsafe fn hadamard8x8(data: &mut [i32]) { |
| 148 | hadamard2d::<{ 8 * 8 }, 8, 8>(&mut *(data.as_mut_ptr() as *mut [i32; 64])); |
| 149 | } |
| 150 | |
| 151 | /// Sum of absolute transformed differences over a block. |
| 152 | /// w and h can be at most 128, the size of the largest block. |
| 153 | /// Use the sum of 4x4 and 8x8 hadamard transforms for the transform, but |
| 154 | /// revert to sad on edges when these transforms do not fit into w and h. |
| 155 | /// 4x4 transforms instead of 8x8 transforms when width or height < 8. |
| 156 | pub fn get_satd<T: Pixel>( |
| 157 | plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, |
| 158 | h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, |
| 159 | ) -> u32 { |
| 160 | assert!(w <= 128 && h <= 128); |
| 161 | assert!(plane_org.rect().width >= w && plane_org.rect().height >= h); |
| 162 | assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h); |
| 163 | |
| 164 | // Size of hadamard transform should be 4x4 or 8x8 |
| 165 | // 4x* and *x4 use 4x4 and all other use 8x8 |
| 166 | let size: usize = w.min(h).min(8); |
| 167 | let tx2d = if size == 4 { hadamard4x4 } else { hadamard8x8 }; |
| 168 | |
| 169 | let mut sum: u64 = 0; |
| 170 | |
| 171 | // Loop over chunks the size of the chosen transform |
| 172 | for chunk_y in (0..h).step_by(size) { |
| 173 | let chunk_h = (h - chunk_y).min(size); |
| 174 | for chunk_x in (0..w).step_by(size) { |
| 175 | let chunk_w = (w - chunk_x).min(size); |
| 176 | let chunk_area: Area = Area::Rect { |
| 177 | x: chunk_x as isize, |
| 178 | y: chunk_y as isize, |
| 179 | width: chunk_w, |
| 180 | height: chunk_h, |
| 181 | }; |
| 182 | let chunk_org = plane_org.subregion(chunk_area); |
| 183 | let chunk_ref = plane_ref.subregion(chunk_area); |
| 184 | |
| 185 | // Revert to sad on edge blocks (frame edges) |
| 186 | if chunk_w != size || chunk_h != size { |
| 187 | sum += get_sad( |
| 188 | &chunk_org, &chunk_ref, chunk_w, chunk_h, _bit_depth, _cpu, |
| 189 | ) as u64; |
| 190 | continue; |
| 191 | } |
| 192 | |
| 193 | let buf: &mut [i32] = &mut [0; 8 * 8][..size * size]; |
| 194 | |
| 195 | // Move the difference of the transforms to a buffer |
| 196 | for (row_diff, (row_org, row_ref)) in buf |
| 197 | .chunks_mut(size) |
| 198 | .zip(chunk_org.rows_iter().zip(chunk_ref.rows_iter())) |
| 199 | { |
| 200 | for (diff, (a, b)) in |
| 201 | row_diff.iter_mut().zip(row_org.iter().zip(row_ref.iter())) |
| 202 | { |
| 203 | *diff = i32::cast_from(*a) - i32::cast_from(*b); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | // Perform the hadamard transform on the differences |
| 208 | // SAFETY: A sufficient number elements exist for the size of the transform. |
| 209 | unsafe { |
| 210 | tx2d(buf); |
| 211 | } |
| 212 | |
| 213 | // Sum the absolute values of the transformed differences |
| 214 | sum += buf.iter().map(|a| a.unsigned_abs() as u64).sum::<u64>(); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | // Normalize the results |
| 219 | let ln = msb(size as i32) as u64; |
| 220 | ((sum + (1 << ln >> 1)) >> ln) as u32 |
| 221 | } |
| 222 | |
| 223 | /// Number of bits rounded off before summing in `get_weighted_sse` |
| 224 | pub const GET_WEIGHTED_SSE_SHIFT: u8 = 8; |
| 225 | |
| 226 | /// Computes weighted sum of squared error. |
| 227 | /// |
| 228 | /// Each scale is applied to a 4x4 region in the provided inputs. Each scale |
| 229 | /// value is a fixed point number, currently [`DistortionScale`]. |
| 230 | /// |
| 231 | /// Implementations can require alignment (`bw` (block width) for [`src1`] and |
| 232 | /// [`src2`] and `bw/4` for `scale`). |
| 233 | #[inline (never)] |
| 234 | pub fn get_weighted_sse<T: Pixel>( |
| 235 | src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, scale: &[u32], |
| 236 | scale_stride: usize, w: usize, h: usize, _bit_depth: usize, |
| 237 | _cpu: CpuFeatureLevel, |
| 238 | ) -> u64 { |
| 239 | let src1 = src1.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); |
| 240 | // Always chunk and apply scaling on the sse of squares the size of |
| 241 | // decimated/sub-sampled importance block sizes. |
| 242 | // Warning: Changing this will require changing/disabling assembly. |
| 243 | let chunk_size: usize = IMPORTANCE_BLOCK_SIZE >> 1; |
| 244 | |
| 245 | // Iterator of a row of scales, stretched out to be per row |
| 246 | let scales = scale.chunks_exact(scale_stride); |
| 247 | |
| 248 | let sse = src1 |
| 249 | .vert_windows(chunk_size) |
| 250 | .step_by(chunk_size) |
| 251 | .zip(src2.vert_windows(chunk_size).step_by(chunk_size)) |
| 252 | .zip(scales) |
| 253 | .map(|((row1, row2), scales)| { |
| 254 | row1 |
| 255 | .horz_windows(chunk_size) |
| 256 | .step_by(chunk_size) |
| 257 | .zip(row2.horz_windows(chunk_size).step_by(chunk_size)) |
| 258 | .zip(scales) |
| 259 | .map(|((chunk1, chunk2), &scale)| { |
| 260 | let sum = chunk1 |
| 261 | .rows_iter() |
| 262 | .zip(chunk2.rows_iter()) |
| 263 | .map(|(chunk_row1, chunk_row2)| { |
| 264 | chunk_row1 |
| 265 | .iter() |
| 266 | .zip(chunk_row2) |
| 267 | .map(|(&a, &b)| { |
| 268 | let c = i32::cast_from(a) - i32::cast_from(b); |
| 269 | (c * c) as u32 |
| 270 | }) |
| 271 | .sum::<u32>() |
| 272 | }) |
| 273 | .sum::<u32>(); |
| 274 | (sum as u64 * scale as u64 + (1 << GET_WEIGHTED_SSE_SHIFT >> 1)) |
| 275 | >> GET_WEIGHTED_SSE_SHIFT |
| 276 | }) |
| 277 | .sum::<u64>() |
| 278 | }) |
| 279 | .sum::<u64>(); |
| 280 | |
| 281 | let den = DistortionScale::new(1, 1 << GET_WEIGHTED_SSE_SHIFT).0 as u64; |
| 282 | (sse + (den >> 1)) / den |
| 283 | } |
| 284 | |
| 285 | /// Number of bits of precision used in `AREA_DIVISORS` |
| 286 | const AREA_DIVISOR_BITS: u8 = 14; |
| 287 | |
| 288 | /// Lookup table for 2^`AREA_DIVISOR_BITS` / (1 + x) |
| 289 | #[rustfmt::skip] |
| 290 | const AREA_DIVISORS: [u16; 64] = [ |
| 291 | 16384, 8192, 5461, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, |
| 292 | 1260, 1170, 1092, 1024, 964, 910, 862, 819, 780, 745, 712, 683, |
| 293 | 655, 630, 607, 585, 565, 546, 529, 512, 496, 482, 468, 455, |
| 294 | 443, 431, 420, 410, 400, 390, 381, 372, 364, 356, 349, 341, |
| 295 | 334, 328, 321, 315, 309, 303, 298, 293, 287, 282, 278, 273, |
| 296 | 269, 264, 260, 256, |
| 297 | ]; |
| 298 | |
| 299 | /// Computes a distortion metric of the sum of squares weighted by activity. |
| 300 | /// w and h should be <= 8. |
| 301 | #[inline (never)] |
| 302 | pub fn cdef_dist_kernel<T: Pixel>( |
| 303 | src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, |
| 304 | bit_depth: usize, _cpu: CpuFeatureLevel, |
| 305 | ) -> u32 { |
| 306 | // TODO: Investigate using different constants in ssim boost for block sizes |
| 307 | // smaller than 8x8. |
| 308 | |
| 309 | debug_assert!(src.plane_cfg.xdec == 0); |
| 310 | debug_assert!(src.plane_cfg.ydec == 0); |
| 311 | debug_assert!(dst.plane_cfg.xdec == 0); |
| 312 | debug_assert!(dst.plane_cfg.ydec == 0); |
| 313 | |
| 314 | // Limit kernel to 8x8 |
| 315 | debug_assert!(w <= 8); |
| 316 | debug_assert!(h <= 8); |
| 317 | |
| 318 | // Compute the following summations. |
| 319 | let mut sum_s: u32 = 0; // sum(src_{i,j}) |
| 320 | let mut sum_d: u32 = 0; // sum(dst_{i,j}) |
| 321 | let mut sum_s2: u32 = 0; // sum(src_{i,j}^2) |
| 322 | let mut sum_d2: u32 = 0; // sum(dst_{i,j}^2) |
| 323 | let mut sum_sd: u32 = 0; // sum(src_{i,j} * dst_{i,j}) |
| 324 | for (row1, row2) in src.rows_iter().take(h).zip(dst.rows_iter()) { |
| 325 | for (s, d) in row1[..w].iter().zip(row2) { |
| 326 | let s: u32 = u32::cast_from(*s); |
| 327 | let d: u32 = u32::cast_from(*d); |
| 328 | sum_s += s; |
| 329 | sum_d += d; |
| 330 | |
| 331 | sum_s2 += s * s; |
| 332 | sum_d2 += d * d; |
| 333 | sum_sd += s * d; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | // To get the distortion, compute sum of squared error and apply a weight |
| 338 | // based on the variance of the two planes. |
| 339 | let sse = sum_d2 + sum_s2 - 2 * sum_sd; |
| 340 | |
| 341 | // Convert to 64-bits to avoid overflow when squaring |
| 342 | let sum_s = sum_s as u64; |
| 343 | let sum_d = sum_d as u64; |
| 344 | |
| 345 | // Calculate the variance (more accurately variance*area) of each plane. |
| 346 | // var[iance] = avg(X^2) - avg(X)^2 = sum(X^2) / n - sum(X)^2 / n^2 |
| 347 | // (n = # samples i.e. area) |
| 348 | // var * n = sum(X^2) - sum(X)^2 / n |
| 349 | // When w and h are powers of two, this can be done via shifting. |
| 350 | let div = AREA_DIVISORS[w * h - 1] as u64; |
| 351 | let div_shift = AREA_DIVISOR_BITS; |
| 352 | // Due to rounding, negative values can occur when w or h aren't powers of |
| 353 | // two. Saturate to avoid underflow. |
| 354 | let mut svar = sum_s2.saturating_sub( |
| 355 | ((sum_s * sum_s * div + (1 << div_shift >> 1)) >> div_shift) as u32, |
| 356 | ); |
| 357 | let mut dvar = sum_d2.saturating_sub( |
| 358 | ((sum_d * sum_d * div + (1 << div_shift >> 1)) >> div_shift) as u32, |
| 359 | ); |
| 360 | |
| 361 | // Scale variances up to 8x8 size. |
| 362 | // scaled variance = var * (8x8) / wxh |
| 363 | // For 8x8, this is a nop. For powers of 2, this is doable with shifting. |
| 364 | // TODO: It should be possible and faster to do this adjustment in ssim boost |
| 365 | let scale_shift = AREA_DIVISOR_BITS - 6; |
| 366 | svar = |
| 367 | ((svar as u64 * div + (1 << scale_shift >> 1)) >> scale_shift) as u32; |
| 368 | dvar = |
| 369 | ((dvar as u64 * div + (1 << scale_shift >> 1)) >> scale_shift) as u32; |
| 370 | |
| 371 | apply_ssim_boost(sse, svar, dvar, bit_depth) |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | #[cfg (test)] |
| 376 | pub mod test { |
| 377 | use super::*; |
| 378 | use crate::cpu_features::CpuFeatureLevel; |
| 379 | use crate::frame::*; |
| 380 | use crate::tiling::Area; |
| 381 | use crate::util::Pixel; |
| 382 | |
| 383 | // Generate plane data for get_sad_same() |
| 384 | fn setup_planes<T: Pixel>() -> (Plane<T>, Plane<T>) { |
| 385 | // Two planes with different strides |
| 386 | let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8); |
| 387 | let mut rec_plane = Plane::new(640, 480, 0, 0, 2 * 128 + 8, 2 * 128 + 8); |
| 388 | |
| 389 | // Make the test pattern robust to data alignment |
| 390 | let xpad_off = |
| 391 | (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32; |
| 392 | |
| 393 | for (i, row) in |
| 394 | input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() |
| 395 | { |
| 396 | for (j, pixel) in row.iter_mut().enumerate() { |
| 397 | let val = ((j + i) as i32 - xpad_off) & 255i32; |
| 398 | assert!(val >= u8::MIN.into() && val <= u8::MAX.into()); |
| 399 | *pixel = T::cast_from(val); |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() |
| 404 | { |
| 405 | for (j, pixel) in row.iter_mut().enumerate() { |
| 406 | let val = (j as i32 - i as i32 - xpad_off) & 255i32; |
| 407 | assert!(val >= u8::MIN.into() && val <= u8::MAX.into()); |
| 408 | *pixel = T::cast_from(val); |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | (input_plane, rec_plane) |
| 413 | } |
| 414 | |
| 415 | // Regression and validation test for SAD computation |
| 416 | fn get_sad_same_inner<T: Pixel>() { |
| 417 | // dynamic allocation: test |
| 418 | let blocks: Vec<(usize, usize, u32)> = vec![ |
| 419 | (4, 4, 1912), |
| 420 | (4, 8, 4296), |
| 421 | (8, 4, 3496), |
| 422 | (8, 8, 7824), |
| 423 | (8, 16, 16592), |
| 424 | (16, 8, 14416), |
| 425 | (16, 16, 31136), |
| 426 | (16, 32, 60064), |
| 427 | (32, 16, 59552), |
| 428 | (32, 32, 120128), |
| 429 | (32, 64, 186688), |
| 430 | (64, 32, 250176), |
| 431 | (64, 64, 438912), |
| 432 | (64, 128, 654272), |
| 433 | (128, 64, 1016768), |
| 434 | (128, 128, 1689792), |
| 435 | (4, 16, 8680), |
| 436 | (16, 4, 6664), |
| 437 | (8, 32, 31056), |
| 438 | (32, 8, 27600), |
| 439 | (16, 64, 93344), |
| 440 | (64, 16, 116384), |
| 441 | ]; |
| 442 | |
| 443 | let bit_depth: usize = 8; |
| 444 | let (input_plane, rec_plane) = setup_planes::<T>(); |
| 445 | |
| 446 | for (w, h, distortion) in blocks { |
| 447 | let area = Area::StartingAt { x: 32, y: 40 }; |
| 448 | |
| 449 | let input_region = input_plane.region(area); |
| 450 | let rec_region = rec_plane.region(area); |
| 451 | |
| 452 | assert_eq!( |
| 453 | distortion, |
| 454 | get_sad( |
| 455 | &input_region, |
| 456 | &rec_region, |
| 457 | w, |
| 458 | h, |
| 459 | bit_depth, |
| 460 | CpuFeatureLevel::default() |
| 461 | ) |
| 462 | ); |
| 463 | } |
| 464 | } |
| 465 | |
| 466 | #[test ] |
| 467 | fn get_sad_same_u8() { |
| 468 | get_sad_same_inner::<u8>(); |
| 469 | } |
| 470 | |
| 471 | #[test ] |
| 472 | fn get_sad_same_u16() { |
| 473 | get_sad_same_inner::<u16>(); |
| 474 | } |
| 475 | |
| 476 | fn get_satd_same_inner<T: Pixel>() { |
| 477 | let blocks: Vec<(usize, usize, u32)> = vec![ |
| 478 | (4, 4, 1408), |
| 479 | (4, 8, 2016), |
| 480 | (8, 4, 1816), |
| 481 | (8, 8, 3984), |
| 482 | (8, 16, 5136), |
| 483 | (16, 8, 4864), |
| 484 | (16, 16, 9984), |
| 485 | (16, 32, 13824), |
| 486 | (32, 16, 13760), |
| 487 | (32, 32, 27952), |
| 488 | (32, 64, 37168), |
| 489 | (64, 32, 45104), |
| 490 | (64, 64, 84176), |
| 491 | (64, 128, 127920), |
| 492 | (128, 64, 173680), |
| 493 | (128, 128, 321456), |
| 494 | (4, 16, 3136), |
| 495 | (16, 4, 2632), |
| 496 | (8, 32, 7056), |
| 497 | (32, 8, 6624), |
| 498 | (16, 64, 18432), |
| 499 | (64, 16, 21312), |
| 500 | ]; |
| 501 | |
| 502 | let bit_depth: usize = 8; |
| 503 | let (input_plane, rec_plane) = setup_planes::<T>(); |
| 504 | |
| 505 | for (w, h, distortion) in blocks { |
| 506 | let area = Area::StartingAt { x: 32, y: 40 }; |
| 507 | |
| 508 | let input_region = input_plane.region(area); |
| 509 | let rec_region = rec_plane.region(area); |
| 510 | |
| 511 | assert_eq!( |
| 512 | distortion, |
| 513 | get_satd( |
| 514 | &input_region, |
| 515 | &rec_region, |
| 516 | w, |
| 517 | h, |
| 518 | bit_depth, |
| 519 | CpuFeatureLevel::default() |
| 520 | ) |
| 521 | ); |
| 522 | } |
| 523 | } |
| 524 | |
| 525 | #[test ] |
| 526 | fn get_satd_same_u8() { |
| 527 | get_satd_same_inner::<u8>(); |
| 528 | } |
| 529 | |
| 530 | #[test ] |
| 531 | fn get_satd_same_u16() { |
| 532 | get_satd_same_inner::<u16>(); |
| 533 | } |
| 534 | } |
| 535 | |