1// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::frame::*;
11use crate::rdo::DistortionScale;
12use crate::tiling::*;
13use crate::util::*;
14use itertools::izip;
15
16#[derive(Debug, Default, Clone)]
17pub struct ActivityMask {
18 variances: Box<[u32]>,
19}
20
21impl ActivityMask {
22 #[profiling::function]
23 pub fn from_plane<T: Pixel>(luma_plane: &Plane<T>) -> ActivityMask {
24 let PlaneConfig { width, height, .. } = luma_plane.cfg;
25
26 // Width and height are padded to 8×8 block size.
27 let w_in_imp_b = width.align_power_of_two_and_shift(3);
28 let h_in_imp_b = height.align_power_of_two_and_shift(3);
29
30 let aligned_luma = Rect {
31 x: 0_isize,
32 y: 0_isize,
33 width: w_in_imp_b << 3,
34 height: h_in_imp_b << 3,
35 };
36 let luma = PlaneRegion::new(luma_plane, aligned_luma);
37
38 let mut variances = Vec::with_capacity(w_in_imp_b * h_in_imp_b);
39
40 for y in 0..h_in_imp_b {
41 for x in 0..w_in_imp_b {
42 let block_rect = Area::Rect {
43 x: (x << 3) as isize,
44 y: (y << 3) as isize,
45 width: 8,
46 height: 8,
47 };
48
49 let block = luma.subregion(block_rect);
50 let variance = variance_8x8(&block);
51 variances.push(variance);
52 }
53 }
54 ActivityMask { variances: variances.into_boxed_slice() }
55 }
56
57 #[profiling::function]
58 pub fn fill_scales(
59 &self, bit_depth: usize, activity_scales: &mut Box<[DistortionScale]>,
60 ) {
61 for (dst, &src) in activity_scales.iter_mut().zip(self.variances.iter()) {
62 *dst = ssim_boost(src, src, bit_depth);
63 }
64 }
65}
66
67// Adapted from the source variance calculation in `cdef_dist_wxh_8x8`.
68#[inline(never)]
69fn variance_8x8<T: Pixel>(src: &PlaneRegion<'_, T>) -> u32 {
70 debug_assert!(src.plane_cfg.xdec == 0);
71 debug_assert!(src.plane_cfg.ydec == 0);
72
73 // Sum into columns to improve auto-vectorization
74 let mut sum_s_cols: [u16; 8] = [0; 8];
75 let mut sum_s2_cols: [u32; 8] = [0; 8];
76
77 // Check upfront that 8 rows are available.
78 let _row = &src[7];
79
80 for j in 0..8 {
81 let row = &src[j][0..8];
82 for (sum_s, sum_s2, s) in izip!(&mut sum_s_cols, &mut sum_s2_cols, row) {
83 // Don't convert directly to u32 to allow better vectorization
84 let s: u16 = u16::cast_from(*s);
85 *sum_s += s;
86
87 // Convert to u32 to avoid overflows when multiplying
88 let s: u32 = s as u32;
89 *sum_s2 += s * s;
90 }
91 }
92
93 // Sum together the sum of columns
94 let sum_s = sum_s_cols.iter().copied().map(u64::from).sum::<u64>();
95 let sum_s2 = sum_s2_cols.iter().copied().map(u64::from).sum::<u64>();
96
97 // Use sums to calculate variance
98 u32::try_from(sum_s2 - ((sum_s * sum_s + 32) >> 6)).unwrap_or(u32::MAX)
99}
100
101/// `rsqrt` result stored in fixed point w/ scaling such that:
102/// `rsqrt = output.rsqrt_norm / (1 << output.shift)`
103struct RsqrtOutput {
104 norm: u16,
105 shift: u8,
106}
107
108/// Fixed point `rsqrt` for `ssim_boost`
109fn ssim_boost_rsqrt(x: u64) -> RsqrtOutput {
110 const INSHIFT: u8 = 16;
111 const OUTSHIFT: u8 = 14;
112
113 let k = ((ILog::ilog(x) - 1) >> 1) as i16;
114 /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
115 Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
116 let s: i16 = 2 * k - (INSHIFT as i16 - 2);
117 let t: u16 = if s > 0 { x >> s } else { x << -s } as u16;
118
119 /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is
120 defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
121 This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller
122 needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/
123 let rsqrt_shift: u8 = (OUTSHIFT as i16 + ((s + INSHIFT as i16) >> 1)) as u8;
124
125 #[inline(always)]
126 const fn mult16_16_q15(a: i32, b: i32) -> i32 {
127 (a * b) >> 15
128 }
129
130 /* Reciprocal sqrt approximation where the input is in the range [0.25,1) in
131 Q16 and the output is in the range (1.0, 2.0] in Q14). */
132
133 /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
134 let n: i32 = t as i32 - 32768;
135 debug_assert!(n >= -16384);
136
137 /* Get a rough guess for the root.
138 The optimal minimax quadratic approximation (using relative error) is
139 r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
140 Coefficients here, and the final result r, are Q14. */
141 let rsqrt: i32 = 23557 + mult16_16_q15(n, -13490 + mult16_16_q15(n, 6711));
142
143 debug_assert!((16384..32768).contains(&rsqrt));
144 RsqrtOutput { norm: rsqrt as u16, shift: rsqrt_shift }
145}
146
147#[inline(always)]
148pub fn ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> DistortionScale {
149 DistortionScale(apply_ssim_boost(
150 input:DistortionScale::default().0,
151 svar,
152 dvar,
153 bit_depth,
154 ))
155}
156
157/// Apply ssim boost to a given input
158#[inline(always)]
159pub fn apply_ssim_boost(
160 input: u32, svar: u32, dvar: u32, bit_depth: usize,
161) -> u32 {
162 let coeff_shift = bit_depth - 8;
163
164 // Scale dvar and svar to lbd range to prevent overflows.
165 let svar = (svar >> (2 * coeff_shift)) as u64;
166 let dvar = (dvar >> (2 * coeff_shift)) as u64;
167
168 // The constants are such that when source and destination variance are equal,
169 // ssim_boost ~= (x/2)^(-1/3) where x = variance / scale and the scale is
170 // (maximum variance / sample range) << (bit depth - 8).
171 // C2 is the variance floor, equivalent to a flat block of mean valued samples
172 // with a single maximum value sample.
173 const C1: u64 = 3355;
174 const C2: u64 = 16128;
175 const C3: u64 = 12338;
176 const RATIO_SHIFT: u8 = 14;
177 const RATIO: u64 = (((C1 << (RATIO_SHIFT + 1)) / C3) + 1) >> 1;
178
179 // C1 (svar + dvar + C2)
180 // input * ---- * --------------------------
181 // C3 sqrt(C1^2 + svar * dvar)
182 let rsqrt = ssim_boost_rsqrt((C1 * C1) + svar * dvar);
183 ((input as u64
184 * (((RATIO * (svar + dvar + C2)) * rsqrt.norm as u64) >> RATIO_SHIFT))
185 >> rsqrt.shift) as u32
186}
187
188#[cfg(test)]
189mod ssim_boost_tests {
190 use super::*;
191 use interpolate_name::interpolate_test;
192 use rand::Rng;
193
194 /// Test to make sure extreme values of `ssim_boost` don't overflow.
195 #[test]
196 fn overflow_test() {
197 // Test variance for 8x8 region with a bit depth of 12
198 let max_pix_diff = (1 << 12) - 1;
199 let max_pix_sse = max_pix_diff * max_pix_diff;
200 let max_variance = max_pix_diff * 8 * 8 / 4;
201 apply_ssim_boost(max_pix_sse * 8 * 8, max_variance, max_variance, 12);
202 }
203
204 /// Floating point reference version of `ssim_boost`
205 fn reference_ssim_boost(svar: u32, dvar: u32, bit_depth: usize) -> f64 {
206 let coeff_shift = bit_depth - 8;
207 let var_scale = 1f64 / (1 << (2 * coeff_shift)) as f64;
208 let svar = svar as f64 * var_scale;
209 let dvar = dvar as f64 * var_scale;
210 // These constants are from ssim boost and need to be updated if the
211 // constants in ssim boost change.
212 const C1: f64 = 3355f64;
213 const C2: f64 = 16128f64;
214 const C3: f64 = 12338f64;
215 const RATIO: f64 = C1 / C3;
216
217 RATIO * (svar + dvar + C2) / f64::sqrt(C1.mul_add(C1, svar * dvar))
218 }
219
220 /// Test that `ssim_boost` has sufficient accuracy.
221 #[test]
222 fn accuracy_test() {
223 let mut rng = rand::thread_rng();
224
225 let mut max_relative_error = 0f64;
226 let bd = 12;
227
228 // Test different log scale ranges for the variance.
229 // Each scale is tested multiple times with randomized variances.
230 for scale in 0..(bd + 3 * 2 - 2) {
231 for _ in 0..40 {
232 let svar = rng.gen_range(0..(1 << scale));
233 let dvar = rng.gen_range(0..(1 << scale));
234
235 let float = reference_ssim_boost(svar, dvar, 12);
236 let fixed =
237 apply_ssim_boost(1 << 23, svar, dvar, 12) as f64 / (1 << 23) as f64;
238
239 // Compare the two versions
240 max_relative_error =
241 max_relative_error.max(f64::abs(1f64 - fixed / float));
242 }
243 }
244
245 assert!(
246 max_relative_error < 0.05,
247 "SSIM boost error too high. Measured max relative error: {}.",
248 max_relative_error
249 );
250 }
251
252 #[interpolate_test(8, 8)]
253 #[interpolate_test(10, 10)]
254 #[interpolate_test(12, 12)]
255 fn reciprocal_cube_root_test(bd: usize) {
256 let mut max_relative_error = 0f64;
257
258 let scale = ((1 << bd) - 1) << (6 - 2 + bd - 8);
259 for svar in scale..(scale << 2) {
260 let float = ((scale << 1) as f64 / svar as f64).cbrt();
261 let fixed =
262 apply_ssim_boost(1 << 23, svar, svar, bd) as f64 / (1 << 23) as f64;
263
264 // Compare the two versions
265 max_relative_error =
266 max_relative_error.max(f64::abs(1f64 - fixed / float));
267 }
268
269 assert!(
270 max_relative_error < 0.0273,
271 "SSIM boost error too high. Measured max relative error: {}.",
272 max_relative_error
273 );
274 }
275}
276