| 1 | // Copyright (c) 2017-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | use crate::frame::*; |
| 11 | use crate::rdo::DistortionScale; |
| 12 | use crate::tiling::*; |
| 13 | use crate::util::*; |
| 14 | use itertools::izip; |
| 15 | |
| 16 | #[derive (Debug, Default, Clone)] |
| 17 | pub struct ActivityMask { |
| 18 | variances: Box<[u32]>, |
| 19 | } |
| 20 | |
| 21 | impl ActivityMask { |
| 22 | #[profiling::function ] |
| 23 | pub fn from_plane<T: Pixel>(luma_plane: &Plane<T>) -> ActivityMask { |
| 24 | let PlaneConfig { width, height, .. } = luma_plane.cfg; |
| 25 | |
| 26 | // Width and height are padded to 8×8 block size. |
| 27 | let w_in_imp_b = width.align_power_of_two_and_shift(3); |
| 28 | let h_in_imp_b = height.align_power_of_two_and_shift(3); |
| 29 | |
| 30 | let aligned_luma = Rect { |
| 31 | x: 0_isize, |
| 32 | y: 0_isize, |
| 33 | width: w_in_imp_b << 3, |
| 34 | height: h_in_imp_b << 3, |
| 35 | }; |
| 36 | let luma = PlaneRegion::new(luma_plane, aligned_luma); |
| 37 | |
| 38 | let mut variances = Vec::with_capacity(w_in_imp_b * h_in_imp_b); |
| 39 | |
| 40 | for y in 0..h_in_imp_b { |
| 41 | for x in 0..w_in_imp_b { |
| 42 | let block_rect = Area::Rect { |
| 43 | x: (x << 3) as isize, |
| 44 | y: (y << 3) as isize, |
| 45 | width: 8, |
| 46 | height: 8, |
| 47 | }; |
| 48 | |
| 49 | let block = luma.subregion(block_rect); |
| 50 | let variance = variance_8x8(&block); |
| 51 | variances.push(variance); |
| 52 | } |
| 53 | } |
| 54 | ActivityMask { variances: variances.into_boxed_slice() } |
| 55 | } |
| 56 | |
| 57 | #[profiling::function ] |
| 58 | pub fn fill_scales( |
| 59 | &self, bit_depth: usize, activity_scales: &mut Box<[DistortionScale]>, |
| 60 | ) { |
| 61 | for (dst, &src) in activity_scales.iter_mut().zip(self.variances.iter()) { |
| 62 | *dst = ssim_boost(src, src, bit_depth); |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | // Adapted from the source variance calculation in `cdef_dist_wxh_8x8`. |
| 68 | #[inline (never)] |
| 69 | fn variance_8x8<T: Pixel>(src: &PlaneRegion<'_, T>) -> u32 { |
| 70 | debug_assert!(src.plane_cfg.xdec == 0); |
| 71 | debug_assert!(src.plane_cfg.ydec == 0); |
| 72 | |
| 73 | // Sum into columns to improve auto-vectorization |
| 74 | let mut sum_s_cols: [u16; 8] = [0; 8]; |
| 75 | let mut sum_s2_cols: [u32; 8] = [0; 8]; |
| 76 | |
| 77 | // Check upfront that 8 rows are available. |
| 78 | let _row = &src[7]; |
| 79 | |
| 80 | for j in 0..8 { |
| 81 | let row = &src[j][0..8]; |
| 82 | for (sum_s, sum_s2, s) in izip!(&mut sum_s_cols, &mut sum_s2_cols, row) { |
| 83 | // Don't convert directly to u32 to allow better vectorization |
| 84 | let s: u16 = u16::cast_from(*s); |
| 85 | *sum_s += s; |
| 86 | |
| 87 | // Convert to u32 to avoid overflows when multiplying |
| 88 | let s: u32 = s as u32; |
| 89 | *sum_s2 += s * s; |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | // Sum together the sum of columns |
| 94 | let sum_s = sum_s_cols.iter().copied().map(u64::from).sum::<u64>(); |
| 95 | let sum_s2 = sum_s2_cols.iter().copied().map(u64::from).sum::<u64>(); |
| 96 | |
| 97 | // Use sums to calculate variance |
| 98 | u32::try_from(sum_s2 - ((sum_s * sum_s + 32) >> 6)).unwrap_or(u32::MAX) |
| 99 | } |
| 100 | |
| 101 | /// `rsqrt` result stored in fixed point w/ scaling such that: |
| 102 | /// `rsqrt = output.rsqrt_norm / (1 << output.shift)` |
| 103 | struct RsqrtOutput { |
| 104 | norm: u16, |
| 105 | shift: u8, |
| 106 | } |
| 107 | |
| 108 | /// Fixed point `rsqrt` for `ssim_boost` |
| 109 | fn ssim_boost_rsqrt(x: u64) -> RsqrtOutput { |
| 110 | const INSHIFT: u8 = 16; |
| 111 | const OUTSHIFT: u8 = 14; |
| 112 | |
| 113 | let k = ((ILog::ilog(x) - 1) >> 1) as i16; |
| 114 | /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s). |
| 115 | Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/ |
| 116 | let s: i16 = 2 * k - (INSHIFT as i16 - 2); |
| 117 | let t: u16 = if s > 0 { x >> s } else { x << -s } as u16; |
| 118 | |
| 119 | /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is |
| 120 | defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s). |
| 121 | This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller |
| 122 | needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/ |
| 123 | let rsqrt_shift: u8 = (OUTSHIFT as i16 + ((s + INSHIFT as i16) >> 1)) as u8; |
| 124 | |
| 125 | #[inline (always)] |
| 126 | const fn mult16_16_q15(a: i32, b: i32) -> i32 { |
| 127 | (a * b) >> 15 |
| 128 | } |
| 129 | |
| 130 | /* Reciprocal sqrt approximation where the input is in the range [0.25,1) in |
| 131 | Q16 and the output is in the range (1.0, 2.0] in Q14). */ |
| 132 | |
| 133 | /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
| 134 | let n: i32 = t as i32 - 32768; |
| 135 | debug_assert!(n >= -16384); |
| 136 | |
| 137 | /* Get a rough guess for the root. |
| 138 | The optimal minimax quadratic approximation (using relative error) is |
| 139 | r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
| 140 | Coefficients here, and the final result r, are Q14. */ |
| 141 | let rsqrt: i32 = 23557 + mult16_16_q15(n, -13490 + mult16_16_q15(n, 6711)); |
| 142 | |
| 143 | debug_assert!((16384..32768).contains(&rsqrt)); |
| 144 | RsqrtOutput { norm: rsqrt as u16, shift: rsqrt_shift } |
| 145 | } |
| 146 | |
| 147 | #[inline (always)] |
| 148 | pub fn ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> DistortionScale { |
| 149 | DistortionScale(apply_ssim_boost( |
| 150 | input:DistortionScale::default().0, |
| 151 | svar, |
| 152 | dvar, |
| 153 | bit_depth, |
| 154 | )) |
| 155 | } |
| 156 | |
| 157 | /// Apply ssim boost to a given input |
| 158 | #[inline (always)] |
| 159 | pub fn apply_ssim_boost( |
| 160 | input: u32, svar: u32, dvar: u32, bit_depth: usize, |
| 161 | ) -> u32 { |
| 162 | let coeff_shift = bit_depth - 8; |
| 163 | |
| 164 | // Scale dvar and svar to lbd range to prevent overflows. |
| 165 | let svar = (svar >> (2 * coeff_shift)) as u64; |
| 166 | let dvar = (dvar >> (2 * coeff_shift)) as u64; |
| 167 | |
| 168 | // The constants are such that when source and destination variance are equal, |
| 169 | // ssim_boost ~= (x/2)^(-1/3) where x = variance / scale and the scale is |
| 170 | // (maximum variance / sample range) << (bit depth - 8). |
| 171 | // C2 is the variance floor, equivalent to a flat block of mean valued samples |
| 172 | // with a single maximum value sample. |
| 173 | const C1: u64 = 3355; |
| 174 | const C2: u64 = 16128; |
| 175 | const C3: u64 = 12338; |
| 176 | const RATIO_SHIFT: u8 = 14; |
| 177 | const RATIO: u64 = (((C1 << (RATIO_SHIFT + 1)) / C3) + 1) >> 1; |
| 178 | |
| 179 | // C1 (svar + dvar + C2) |
| 180 | // input * ---- * -------------------------- |
| 181 | // C3 sqrt(C1^2 + svar * dvar) |
| 182 | let rsqrt = ssim_boost_rsqrt((C1 * C1) + svar * dvar); |
| 183 | ((input as u64 |
| 184 | * (((RATIO * (svar + dvar + C2)) * rsqrt.norm as u64) >> RATIO_SHIFT)) |
| 185 | >> rsqrt.shift) as u32 |
| 186 | } |
| 187 | |
| 188 | #[cfg (test)] |
| 189 | mod ssim_boost_tests { |
| 190 | use super::*; |
| 191 | use interpolate_name::interpolate_test; |
| 192 | use rand::Rng; |
| 193 | |
| 194 | /// Test to make sure extreme values of `ssim_boost` don't overflow. |
| 195 | #[test ] |
| 196 | fn overflow_test() { |
| 197 | // Test variance for 8x8 region with a bit depth of 12 |
| 198 | let max_pix_diff = (1 << 12) - 1; |
| 199 | let max_pix_sse = max_pix_diff * max_pix_diff; |
| 200 | let max_variance = max_pix_diff * 8 * 8 / 4; |
| 201 | apply_ssim_boost(max_pix_sse * 8 * 8, max_variance, max_variance, 12); |
| 202 | } |
| 203 | |
| 204 | /// Floating point reference version of `ssim_boost` |
| 205 | fn reference_ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> f64 { |
| 206 | let coeff_shift = bit_depth - 8; |
| 207 | let var_scale = 1f64 / (1 << (2 * coeff_shift)) as f64; |
| 208 | let svar = svar as f64 * var_scale; |
| 209 | let dvar = dvar as f64 * var_scale; |
| 210 | // These constants are from ssim boost and need to be updated if the |
| 211 | // constants in ssim boost change. |
| 212 | const C1: f64 = 3355f64; |
| 213 | const C2: f64 = 16128f64; |
| 214 | const C3: f64 = 12338f64; |
| 215 | const RATIO: f64 = C1 / C3; |
| 216 | |
| 217 | RATIO * (svar + dvar + C2) / f64::sqrt(C1.mul_add(C1, svar * dvar)) |
| 218 | } |
| 219 | |
| 220 | /// Test that `ssim_boost` has sufficient accuracy. |
| 221 | #[test ] |
| 222 | fn accuracy_test() { |
| 223 | let mut rng = rand::thread_rng(); |
| 224 | |
| 225 | let mut max_relative_error = 0f64; |
| 226 | let bd = 12; |
| 227 | |
| 228 | // Test different log scale ranges for the variance. |
| 229 | // Each scale is tested multiple times with randomized variances. |
| 230 | for scale in 0..(bd + 3 * 2 - 2) { |
| 231 | for _ in 0..40 { |
| 232 | let svar = rng.gen_range(0..(1 << scale)); |
| 233 | let dvar = rng.gen_range(0..(1 << scale)); |
| 234 | |
| 235 | let float = reference_ssim_boost(svar, dvar, 12); |
| 236 | let fixed = |
| 237 | apply_ssim_boost(1 << 23, svar, dvar, 12) as f64 / (1 << 23) as f64; |
| 238 | |
| 239 | // Compare the two versions |
| 240 | max_relative_error = |
| 241 | max_relative_error.max(f64::abs(1f64 - fixed / float)); |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | assert!( |
| 246 | max_relative_error < 0.05, |
| 247 | "SSIM boost error too high. Measured max relative error: {}." , |
| 248 | max_relative_error |
| 249 | ); |
| 250 | } |
| 251 | |
| 252 | #[interpolate_test(8, 8)] |
| 253 | #[interpolate_test(10, 10)] |
| 254 | #[interpolate_test(12, 12)] |
| 255 | fn reciprocal_cube_root_test(bd: usize) { |
| 256 | let mut max_relative_error = 0f64; |
| 257 | |
| 258 | let scale = ((1 << bd) - 1) << (6 - 2 + bd - 8); |
| 259 | for svar in scale..(scale << 2) { |
| 260 | let float = ((scale << 1) as f64 / svar as f64).cbrt(); |
| 261 | let fixed = |
| 262 | apply_ssim_boost(1 << 23, svar, svar, bd) as f64 / (1 << 23) as f64; |
| 263 | |
| 264 | // Compare the two versions |
| 265 | max_relative_error = |
| 266 | max_relative_error.max(f64::abs(1f64 - fixed / float)); |
| 267 | } |
| 268 | |
| 269 | assert!( |
| 270 | max_relative_error < 0.0273, |
| 271 | "SSIM boost error too high. Measured max relative error: {}." , |
| 272 | max_relative_error |
| 273 | ); |
| 274 | } |
| 275 | } |
| 276 | |