| 1 | // Copyright (c) 2019-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | use crate::api::color::ChromaSampling; |
| 11 | use crate::api::ContextInner; |
| 12 | use crate::encoder::TEMPORAL_DELIMITER; |
| 13 | use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi}; |
| 14 | use crate::util::{ |
| 15 | bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel, |
| 16 | }; |
| 17 | use std::cmp; |
| 18 | |
| 19 | // The number of frame sub-types for which we track distinct parameters. |
| 20 | // This does not include FRAME_SUBTYPE_SEF, because we don't need to do any |
| 21 | // parameter tracking for Show Existing Frame frames. |
| 22 | pub const FRAME_NSUBTYPES: usize = 4; |
| 23 | |
| 24 | pub const FRAME_SUBTYPE_I: usize = 0; |
| 25 | pub const FRAME_SUBTYPE_P: usize = 1; |
| 26 | #[allow (unused)] |
| 27 | pub const FRAME_SUBTYPE_B0: usize = 2; |
| 28 | #[allow (unused)] |
| 29 | pub const FRAME_SUBTYPE_B1: usize = 3; |
| 30 | pub const FRAME_SUBTYPE_SEF: usize = 4; |
| 31 | |
| 32 | const PASS_SINGLE: i32 = 0; |
| 33 | const PASS_1: i32 = 1; |
| 34 | const PASS_2: i32 = 2; |
| 35 | const PASS_2_PLUS_1: i32 = 3; |
| 36 | |
| 37 | // Magic value at the start of the 2-pass stats file |
| 38 | const TWOPASS_MAGIC: i32 = 0x50324156; |
| 39 | // Version number for the 2-pass stats file |
| 40 | const TWOPASS_VERSION: i32 = 1; |
| 41 | // 4 byte magic + 4 byte version + 4 byte TU count + 4 byte SEF frame count |
| 42 | // + FRAME_NSUBTYPES*(4 byte frame count + 1 byte exp + 8 byte scale_sum) |
| 43 | pub(crate) const TWOPASS_HEADER_SZ: usize = 16 + FRAME_NSUBTYPES * (4 + 1 + 8); |
| 44 | // 4 byte frame type (show_frame and fti jointly coded) + 4 byte log_scale_q24 |
| 45 | const TWOPASS_PACKET_SZ: usize = 8; |
| 46 | |
| 47 | const SEF_BITS: i64 = 24; |
| 48 | |
| 49 | // The scale of AV1 quantizer tables (relative to the pixel domain), i.e., Q3. |
| 50 | pub(crate) const QSCALE: i32 = 3; |
| 51 | |
| 52 | // We clamp the actual I and B frame delays to a minimum of 10 to work |
| 53 | // within the range of values where later incrementing the delay works as |
| 54 | // designed. |
| 55 | // 10 is not an exact choice, but rather a good working trade-off. |
| 56 | const INTER_DELAY_TARGET_MIN: i32 = 10; |
| 57 | |
| 58 | // The base quantizer for a frame is adjusted based on the frame type using the |
| 59 | // formula (log_qp*mqp + dqp), where log_qp is the base-2 logarithm of the |
| 60 | // "linear" quantizer (the actual factor by which coefficients are divided). |
| 61 | // Because log_qp has an implicit offset built in based on the scale of the |
| 62 | // coefficients (which depends on the pixel bit depth and the transform |
| 63 | // scale), we normalize the quantizer to the equivalent for 8-bit pixels with |
| 64 | // orthonormal transforms for the purposes of rate modeling. |
| 65 | const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[ |
| 66 | // TODO: Use a const function once f64 operations in const functions are |
| 67 | // stable. |
| 68 | (1.0 * (1 << 12) as f64) as i32, |
| 69 | (1.0 * (1 << 12) as f64) as i32, |
| 70 | (1.0 * (1 << 12) as f64) as i32, |
| 71 | (1.0 * (1 << 12) as f64) as i32, |
| 72 | ]; |
| 73 | |
| 74 | // The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median |
| 75 | // of a change of 15 quantizer steps in the quantizer tables. |
| 76 | const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[ |
| 77 | (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
| 78 | (0.0 * (1i64 << 57) as f64) as i64, |
| 79 | ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
| 80 | (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
| 81 | ]; |
| 82 | |
| 83 | // For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a |
| 84 | // linear model: |
| 85 | // log_q_y = log_target_q + (log_target_q >> 32) * Q_MODEL_MUL + Q_MODEL_ADD |
| 86 | // Derivation of the linear models: |
| 87 | // https://github.com/xiph/rav1e/blob/d02bdbd3b0b7b2cb9fc301031cc6a4e67a567a5c/doc/quantizer-weight-analysis.ipynb |
| 88 | #[rustfmt::skip] |
| 89 | const Q_MODEL_ADD: [i64; 4] = [ |
| 90 | // 4:2:0 |
| 91 | -0x24_4FE7_ECB3_DD90, |
| 92 | // 4:2:2 |
| 93 | -0x37_41DA_38AD_0924, |
| 94 | // 4:4:4 |
| 95 | -0x70_83BD_A626_311C, |
| 96 | // 4:0:0 |
| 97 | 0, |
| 98 | ]; |
| 99 | #[rustfmt::skip] |
| 100 | const Q_MODEL_MUL: [i64; 4] = [ |
| 101 | // 4:2:0 |
| 102 | 0x8A0_50DD, |
| 103 | // 4:2:2 |
| 104 | 0x887_7666, |
| 105 | // 4:4:4 |
| 106 | 0x8D4_A712, |
| 107 | // 4:0:0 |
| 108 | 0, |
| 109 | ]; |
| 110 | |
| 111 | #[rustfmt::skip] |
| 112 | const ROUGH_TAN_LOOKUP: &[u16; 18] = &[ |
| 113 | 0, 358, 722, 1098, 1491, 1910, |
| 114 | 2365, 2868, 3437, 4096, 4881, 5850, |
| 115 | 7094, 8784, 11254, 15286, 23230, 46817 |
| 116 | ]; |
| 117 | |
| 118 | // A digital approximation of a 2nd-order low-pass Bessel follower. |
| 119 | // We use this for rate control because it has fast reaction time, but is |
| 120 | // critically damped. |
| 121 | pub struct IIRBessel2 { |
| 122 | c: [i32; 2], |
| 123 | g: i32, |
| 124 | x: [i32; 2], |
| 125 | y: [i32; 2], |
| 126 | } |
| 127 | |
| 128 | // alpha is Q24 in the range [0,0.5). |
| 129 | // The return value is 5.12. |
| 130 | fn warp_alpha(alpha: i32) -> i32 { |
| 131 | let i: i32 = ((alpha * 36) >> 24).min(16); |
| 132 | let t0: u16 = ROUGH_TAN_LOOKUP[i as usize]; |
| 133 | let t1: u16 = ROUGH_TAN_LOOKUP[i as usize + 1]; |
| 134 | let d: i32 = alpha * 36 - (i << 24); |
| 135 | ((((t0 as i64) << 32) + (((t1 - t0) << 8) as i64) * (d as i64)) >> 32) as i32 |
| 136 | } |
| 137 | |
| 138 | // Compute Bessel filter coefficients with the specified delay. |
| 139 | // Return: Filter parameters (c[0], c[1], g). |
| 140 | fn iir_bessel2_get_parameters(delay: i32) -> (i32, i32, i32) { |
| 141 | // This borrows some code from an unreleased version of Postfish. |
| 142 | // See the recipe at http://unicorn.us.com/alex/2polefilters.html for details |
| 143 | // on deriving the filter coefficients. |
| 144 | // alpha is Q24 |
| 145 | let alpha = (1 << 24) / delay; |
| 146 | // warp is 7.12 (5.12? the max value is 70386 in Q12). |
| 147 | let warp = warp_alpha(alpha).max(1) as i64; |
| 148 | // k1 is 9.12 (6.12?) |
| 149 | let k1 = 3 * warp; |
| 150 | // k2 is 16.24 (11.24?) |
| 151 | let k2 = k1 * warp; |
| 152 | // d is 16.15 (10.15?) |
| 153 | let d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9; |
| 154 | // a is 0.32, since d is larger than both 1.0 and k2 |
| 155 | let a = (k2 << 23) / d; |
| 156 | // ik2 is 25.24 |
| 157 | let ik2 = (1i64 << 48) / k2; |
| 158 | // b1 is Q56; in practice, the integer ranges between -2 and 2. |
| 159 | let b1 = 2 * a * (ik2 - (1i64 << 24)); |
| 160 | // b2 is Q56; in practice, the integer ranges between -2 and 2. |
| 161 | let b2 = (1i64 << 56) - ((4 * a) << 24) - b1; |
| 162 | // All of the filter parameters are Q24. |
| 163 | ( |
| 164 | ((b1 + (1i64 << 31)) >> 32) as i32, |
| 165 | ((b2 + (1i64 << 31)) >> 32) as i32, |
| 166 | ((a + 128) >> 8) as i32, |
| 167 | ) |
| 168 | } |
| 169 | |
| 170 | impl IIRBessel2 { |
| 171 | pub fn new(delay: i32, value: i32) -> IIRBessel2 { |
| 172 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
| 173 | IIRBessel2 { c: [c0, c1], g, x: [value, value], y: [value, value] } |
| 174 | } |
| 175 | |
| 176 | // Re-initialize Bessel filter coefficients with the specified delay. |
| 177 | // This does not alter the x/y state, but changes the reaction time of the |
| 178 | // filter. |
| 179 | // Altering the time constant of a reactive filter without altering internal |
| 180 | // state is something that has to be done carefully, but our design operates |
| 181 | // at high enough delays and with small enough time constant changes to make |
| 182 | // it safe. |
| 183 | pub fn reinit(&mut self, delay: i32) { |
| 184 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
| 185 | self.c[0] = c0; |
| 186 | self.c[1] = c1; |
| 187 | self.g = g; |
| 188 | } |
| 189 | |
| 190 | pub fn update(&mut self, x: i32) -> i32 { |
| 191 | let c0 = self.c[0] as i64; |
| 192 | let c1 = self.c[1] as i64; |
| 193 | let g = self.g as i64; |
| 194 | let x0 = self.x[0] as i64; |
| 195 | let x1 = self.x[1] as i64; |
| 196 | let y0 = self.y[0] as i64; |
| 197 | let y1 = self.y[1] as i64; |
| 198 | let ya = |
| 199 | ((((x as i64) + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1i64 << 23)) |
| 200 | >> 24) as i32; |
| 201 | self.x[1] = self.x[0]; |
| 202 | self.x[0] = x; |
| 203 | self.y[1] = self.y[0]; |
| 204 | self.y[0] = ya; |
| 205 | ya |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | #[derive (Copy, Clone)] |
| 210 | struct RCFrameMetrics { |
| 211 | // The log base 2 of the scale factor for this frame in Q24 format. |
| 212 | log_scale_q24: i32, |
| 213 | // The frame type from pass 1 |
| 214 | fti: usize, |
| 215 | // Whether or not the frame was hidden in pass 1 |
| 216 | show_frame: bool, |
| 217 | // TODO: The input frame number corresponding to this frame in the input. |
| 218 | // input_frameno: u32 |
| 219 | // TODO vfr: PTS |
| 220 | } |
| 221 | |
| 222 | impl RCFrameMetrics { |
| 223 | const fn new() -> RCFrameMetrics { |
| 224 | RCFrameMetrics { log_scale_q24: 0, fti: 0, show_frame: false } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | /// Rate control pass summary |
| 229 | /// |
| 230 | /// It contains encoding information related to the whole previous |
| 231 | /// encoding pass. |
| 232 | #[derive (Debug, Default, Clone)] |
| 233 | pub struct RCSummary { |
| 234 | pub(crate) ntus: i32, |
| 235 | nframes: [i32; FRAME_NSUBTYPES + 1], |
| 236 | exp: [u8; FRAME_NSUBTYPES], |
| 237 | scale_sum: [i64; FRAME_NSUBTYPES], |
| 238 | pub(crate) total: i32, |
| 239 | } |
| 240 | |
| 241 | // Backing storage to deserialize Summary and Per-Frame pass data |
| 242 | // |
| 243 | // Can store up to a full header size since it is the largest of the two |
| 244 | // packet kinds. |
| 245 | pub(crate) struct RCDeserialize { |
| 246 | // The current byte position in the frame metrics buffer. |
| 247 | pass2_buffer_pos: usize, |
| 248 | // In pass 2, this represents the number of bytes that are available in the |
| 249 | // input buffer. |
| 250 | pass2_buffer_fill: usize, |
| 251 | // Buffer for current frame metrics in pass 2. |
| 252 | pass2_buffer: [u8; TWOPASS_HEADER_SZ], |
| 253 | } |
| 254 | |
| 255 | impl Default for RCDeserialize { |
| 256 | fn default() -> Self { |
| 257 | RCDeserialize { |
| 258 | pass2_buffer: [0; TWOPASS_HEADER_SZ], |
| 259 | pass2_buffer_pos: 0, |
| 260 | pass2_buffer_fill: 0, |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | impl RCDeserialize { |
| 266 | // Fill the backing storage by reading enough bytes from the |
| 267 | // buf slice until goal bytes are available for parsing. |
| 268 | // |
| 269 | // goal must be at most TWOPASS_HEADER_SZ. |
| 270 | pub(crate) fn buffer_fill( |
| 271 | &mut self, buf: &[u8], consumed: usize, goal: usize, |
| 272 | ) -> usize { |
| 273 | let mut consumed = consumed; |
| 274 | while self.pass2_buffer_fill < goal && consumed < buf.len() { |
| 275 | self.pass2_buffer[self.pass2_buffer_fill] = buf[consumed]; |
| 276 | self.pass2_buffer_fill += 1; |
| 277 | consumed += 1; |
| 278 | } |
| 279 | consumed |
| 280 | } |
| 281 | |
| 282 | // Read the next n bytes as i64. |
| 283 | // n must be within 1 and 8 |
| 284 | fn unbuffer_val(&mut self, n: usize) -> i64 { |
| 285 | let mut bytes = n; |
| 286 | let mut ret = 0; |
| 287 | let mut shift = 0; |
| 288 | while bytes > 0 { |
| 289 | bytes -= 1; |
| 290 | ret |= (self.pass2_buffer[self.pass2_buffer_pos] as i64) << shift; |
| 291 | self.pass2_buffer_pos += 1; |
| 292 | shift += 8; |
| 293 | } |
| 294 | ret |
| 295 | } |
| 296 | |
| 297 | // Read metrics for the next frame. |
| 298 | fn parse_metrics(&mut self) -> Result<RCFrameMetrics, String> { |
| 299 | debug_assert!(self.pass2_buffer_fill >= TWOPASS_PACKET_SZ); |
| 300 | let ft_val = self.unbuffer_val(4); |
| 301 | let show_frame = (ft_val >> 31) != 0; |
| 302 | let fti = (ft_val & 0x7FFFFFFF) as usize; |
| 303 | // Make sure the frame type is valid. |
| 304 | if fti > FRAME_NSUBTYPES { |
| 305 | return Err("Invalid frame type" .to_string()); |
| 306 | } |
| 307 | let log_scale_q24 = self.unbuffer_val(4) as i32; |
| 308 | Ok(RCFrameMetrics { log_scale_q24, fti, show_frame }) |
| 309 | } |
| 310 | |
| 311 | // Read the summary header data. |
| 312 | pub(crate) fn parse_summary(&mut self) -> Result<RCSummary, String> { |
| 313 | // check the magic value and version number. |
| 314 | if self.unbuffer_val(4) != TWOPASS_MAGIC as i64 { |
| 315 | return Err("Magic value mismatch" .to_string()); |
| 316 | } |
| 317 | if self.unbuffer_val(4) != TWOPASS_VERSION as i64 { |
| 318 | return Err("Version number mismatch" .to_string()); |
| 319 | } |
| 320 | let mut s = |
| 321 | RCSummary { ntus: self.unbuffer_val(4) as i32, ..Default::default() }; |
| 322 | |
| 323 | // Make sure the file claims to have at least one TU. |
| 324 | // Otherwise we probably got the placeholder data from an aborted |
| 325 | // pass 1. |
| 326 | if s.ntus < 1 { |
| 327 | return Err("No TUs found in first pass summary" .to_string()); |
| 328 | } |
| 329 | let mut total: i32 = 0; |
| 330 | for nframes in s.nframes.iter_mut() { |
| 331 | let n = self.unbuffer_val(4) as i32; |
| 332 | if n < 0 { |
| 333 | return Err("Got negative frame count" .to_string()); |
| 334 | } |
| 335 | total = total |
| 336 | .checked_add(n) |
| 337 | .ok_or_else(|| "Frame count too large" .to_string())?; |
| 338 | |
| 339 | *nframes = n; |
| 340 | } |
| 341 | |
| 342 | // We can't have more TUs than frames. |
| 343 | if s.ntus > total { |
| 344 | return Err("More TUs than frames" .to_string()); |
| 345 | } |
| 346 | |
| 347 | s.total = total; |
| 348 | |
| 349 | for exp in s.exp.iter_mut() { |
| 350 | *exp = self.unbuffer_val(1) as u8; |
| 351 | } |
| 352 | |
| 353 | for scale_sum in s.scale_sum.iter_mut() { |
| 354 | *scale_sum = self.unbuffer_val(8); |
| 355 | if *scale_sum < 0 { |
| 356 | return Err("Got negative scale sum" .to_string()); |
| 357 | } |
| 358 | } |
| 359 | Ok(s) |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | pub struct RCState { |
| 364 | // The target bit-rate in bits per second. |
| 365 | target_bitrate: i32, |
| 366 | // The number of TUs over which to distribute the reservoir usage. |
| 367 | // We use TUs because in our leaky bucket model, we only add bits to the |
| 368 | // reservoir on TU boundaries. |
| 369 | reservoir_frame_delay: i32, |
| 370 | // Whether or not the reservoir_frame_delay was explicitly specified by the |
| 371 | // user, or is the default value. |
| 372 | reservoir_frame_delay_is_set: bool, |
| 373 | // The maximum quantizer index to allow (for the luma AC coefficients, other |
| 374 | // quantizers will still be adjusted to match). |
| 375 | maybe_ac_qi_max: Option<u8>, |
| 376 | // The minimum quantizer index to allow (for the luma AC coefficients). |
| 377 | ac_qi_min: u8, |
| 378 | // Will we drop frames to meet bitrate requirements? |
| 379 | drop_frames: bool, |
| 380 | // Do we respect the maximum reservoir fullness? |
| 381 | cap_overflow: bool, |
| 382 | // Can the reservoir go negative? |
| 383 | cap_underflow: bool, |
| 384 | // The log of the first-pass base quantizer. |
| 385 | pass1_log_base_q: i64, |
| 386 | // Two-pass mode state. |
| 387 | // PASS_SINGLE => 1-pass encoding. |
| 388 | // PASS_1 => 1st pass of 2-pass encoding. |
| 389 | // PASS_2 => 2nd pass of 2-pass encoding. |
| 390 | // PASS_2_PLUS_1 => 2nd pass of 2-pass encoding, but also emitting pass 1 |
| 391 | // data again. |
| 392 | twopass_state: i32, |
| 393 | // The log of the number of pixels in a frame in Q57 format. |
| 394 | log_npixels: i64, |
| 395 | // The target average bits per Temporal Unit (input frame). |
| 396 | bits_per_tu: i64, |
| 397 | // The current bit reservoir fullness (bits available to be used). |
| 398 | reservoir_fullness: i64, |
| 399 | // The target buffer fullness. |
| 400 | // This is where we'd like to be by the last keyframe that appears in the |
| 401 | // next reservoir_frame_delay frames. |
| 402 | reservoir_target: i64, |
| 403 | // The maximum buffer fullness (total size of the buffer). |
| 404 | reservoir_max: i64, |
| 405 | // The log of estimated scale factor for the rate model in Q57 format. |
| 406 | // |
| 407 | // TODO: Convert to Q23 or figure out a better way to avoid overflow |
| 408 | // once 2-pass mode is introduced, if required. |
| 409 | log_scale: [i64; FRAME_NSUBTYPES], |
| 410 | // The exponent used in the rate model in Q6 format. |
| 411 | exp: [u8; FRAME_NSUBTYPES], |
| 412 | // The log of an estimated scale factor used to obtain the real framerate, |
| 413 | // for VFR sources or, e.g., 12 fps content doubled to 24 fps, etc. |
| 414 | // TODO vfr: log_vfr_scale: i64, |
| 415 | // Second-order lowpass filters to track scale and VFR. |
| 416 | scalefilter: [IIRBessel2; FRAME_NSUBTYPES], |
| 417 | // TODO vfr: vfrfilter: IIRBessel2, |
| 418 | // The number of frames of each type we have seen, for filter adaptation |
| 419 | // purposes. |
| 420 | // These are only 32 bits to guarantee that we can sum the scales over the |
| 421 | // whole file without overflow in a 64-bit int. |
| 422 | // That limits us to 2.268 years at 60 fps (minus 33% with re-ordering). |
| 423 | nframes: [i32; FRAME_NSUBTYPES + 1], |
| 424 | inter_delay: [i32; FRAME_NSUBTYPES - 1], |
| 425 | inter_delay_target: i32, |
| 426 | // The total accumulated estimation bias. |
| 427 | rate_bias: i64, |
| 428 | // The number of (non-Show Existing Frame) frames that have been encoded. |
| 429 | nencoded_frames: i64, |
| 430 | // The number of Show Existing Frames that have been emitted. |
| 431 | nsef_frames: i64, |
| 432 | // Buffer for current frame metrics in pass 1. |
| 433 | pass1_buffer: [u8; TWOPASS_HEADER_SZ], |
| 434 | // Whether or not the user has retrieved the pass 1 data for the last frame. |
| 435 | // For PASS_1 or PASS_2_PLUS_1 encoding, this is set to false after each |
| 436 | // frame is encoded, and must be set to true by calling twopass_out() before |
| 437 | // the next frame can be encoded. |
| 438 | pub pass1_data_retrieved: bool, |
| 439 | // Marks whether or not the user has retrieved the summary data at the end of |
| 440 | // the encode. |
| 441 | pass1_summary_retrieved: bool, |
| 442 | // Whether or not the user has provided enough data to encode in the second |
| 443 | // pass. |
| 444 | // For PASS_2 or PASS_2_PLUS_1 encoding, this is set to false after each |
| 445 | // frame, and must be set to true by calling twopass_in() before the next |
| 446 | // frame can be encoded. |
| 447 | pass2_data_ready: bool, |
| 448 | // TODO: Add a way to force the next frame to be a keyframe in 2-pass mode. |
| 449 | // Right now we are relying on keyframe detection to detect the same |
| 450 | // keyframes. |
| 451 | // The metrics for the previous frame. |
| 452 | prev_metrics: RCFrameMetrics, |
| 453 | // The metrics for the current frame. |
| 454 | cur_metrics: RCFrameMetrics, |
| 455 | // The buffered metrics for future frames. |
| 456 | frame_metrics: Vec<RCFrameMetrics>, |
| 457 | // The total number of frames still in use in the circular metric buffer. |
| 458 | nframe_metrics: usize, |
| 459 | // The index of the current frame in the circular metric buffer. |
| 460 | frame_metrics_head: usize, |
| 461 | // Data deserialization |
| 462 | des: RCDeserialize, |
| 463 | // The TU count encoded so far. |
| 464 | ntus: i32, |
| 465 | // The TU count for the whole file. |
| 466 | ntus_total: i32, |
| 467 | // The remaining TU count. |
| 468 | ntus_left: i32, |
| 469 | // The frame count of each frame subtype in the whole file. |
| 470 | nframes_total: [i32; FRAME_NSUBTYPES + 1], |
| 471 | // The sum of those counts. |
| 472 | nframes_total_total: i32, |
| 473 | // The number of frames of each subtype yet to be processed. |
| 474 | nframes_left: [i32; FRAME_NSUBTYPES + 1], |
| 475 | // The sum of the scale values for each frame subtype. |
| 476 | scale_sum: [i64; FRAME_NSUBTYPES], |
| 477 | // The number of TUs represented by the current scale sums. |
| 478 | scale_window_ntus: i32, |
| 479 | // The frame count of each frame subtype in the current scale window. |
| 480 | scale_window_nframes: [i32; FRAME_NSUBTYPES + 1], |
| 481 | // The sum of the scale values for each frame subtype in the current window. |
| 482 | scale_window_sum: [i64; FRAME_NSUBTYPES], |
| 483 | } |
| 484 | |
| 485 | // TODO: Separate qi values for each color plane. |
| 486 | pub struct QuantizerParameters { |
| 487 | // The full-precision, unmodulated log quantizer upon which our modulated |
| 488 | // quantizer indices are based. |
| 489 | // This is only used to limit sudden quality changes from frame to frame, and |
| 490 | // as such is not adjusted when we encounter buffer overrun or underrun. |
| 491 | pub log_base_q: i64, |
| 492 | // The full-precision log quantizer modulated by the current frame type upon |
| 493 | // which our quantizer indices are based (including any adjustments to |
| 494 | // prevent buffer overrun or underrun). |
| 495 | // This is used when estimating the scale parameter once we know the actual |
| 496 | // bit usage of a frame. |
| 497 | pub log_target_q: i64, |
| 498 | pub dc_qi: [u8; 3], |
| 499 | pub ac_qi: [u8; 3], |
| 500 | pub lambda: f64, |
| 501 | pub dist_scale: [f64; 3], |
| 502 | } |
| 503 | |
| 504 | const Q57_SQUARE_EXP_SCALE: f64 = |
| 505 | (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64); |
| 506 | |
| 507 | // Daala style log-offset for chroma quantizers |
| 508 | // TODO: Optimal offsets for more configurations than just BT.709 |
| 509 | fn chroma_offset( |
| 510 | log_target_q: i64, chroma_sampling: ChromaSampling, |
| 511 | ) -> (i64, i64) { |
| 512 | let x: i64 = log_target_q.max(0); |
| 513 | // Gradient optimized for CIEDE2000+PSNR on subset3 |
| 514 | let y: i64 = match chroma_sampling { |
| 515 | ChromaSampling::Cs400 => 0, |
| 516 | ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266 |
| 517 | ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180 |
| 518 | ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098 |
| 519 | }; |
| 520 | // blog64(7) - blog64(4); blog64(5) - blog64(4) |
| 521 | (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y) |
| 522 | } |
| 523 | |
| 524 | impl QuantizerParameters { |
| 525 | fn new_from_log_q( |
| 526 | log_base_q: i64, log_target_q: i64, bit_depth: usize, |
| 527 | chroma_sampling: ChromaSampling, is_intra: bool, |
| 528 | log_isqrt_mean_scale: i64, |
| 529 | ) -> QuantizerParameters { |
| 530 | let scale = log_isqrt_mean_scale + q57(QSCALE + bit_depth as i32 - 8); |
| 531 | |
| 532 | let mut log_q_y = log_target_q; |
| 533 | if !is_intra && bit_depth == 8 { |
| 534 | log_q_y = log_target_q |
| 535 | + (log_target_q >> 32) * Q_MODEL_MUL[chroma_sampling as usize] |
| 536 | + Q_MODEL_ADD[chroma_sampling as usize]; |
| 537 | } |
| 538 | |
| 539 | let quantizer = bexp64(log_q_y + scale); |
| 540 | let (offset_u, offset_v) = |
| 541 | chroma_offset(log_q_y + log_isqrt_mean_scale, chroma_sampling); |
| 542 | let mono = chroma_sampling == ChromaSampling::Cs400; |
| 543 | let log_q_u = log_q_y + offset_u; |
| 544 | let log_q_v = log_q_y + offset_v; |
| 545 | let quantizer_u = bexp64(log_q_u + scale); |
| 546 | let quantizer_v = bexp64(log_q_v + scale); |
| 547 | let lambda = (::std::f64::consts::LN_2 / 6.0) |
| 548 | * (((log_target_q + log_isqrt_mean_scale) as f64) |
| 549 | * Q57_SQUARE_EXP_SCALE) |
| 550 | .exp(); |
| 551 | |
| 552 | let scale = |q| bexp64((log_target_q - q) * 2 + q57(16)) as f64 / 65536.; |
| 553 | let dist_scale = [scale(log_q_y), scale(log_q_u), scale(log_q_v)]; |
| 554 | |
| 555 | let base_q_idx = select_ac_qi(quantizer, bit_depth).max(1); |
| 556 | |
| 557 | // delta_q only gets 6 bits + a sign bit, so it can differ by 63 at most. |
| 558 | let min_qi = base_q_idx.saturating_sub(63).max(1); |
| 559 | let max_qi = base_q_idx.saturating_add(63).min(255); |
| 560 | let clamp_qi = |qi: u8| qi.clamp(min_qi, max_qi); |
| 561 | |
| 562 | QuantizerParameters { |
| 563 | log_base_q, |
| 564 | log_target_q, |
| 565 | // TODO: Allow lossless mode; i.e. qi == 0. |
| 566 | dc_qi: [ |
| 567 | clamp_qi(select_dc_qi(quantizer, bit_depth)), |
| 568 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_u, bit_depth)) }, |
| 569 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_v, bit_depth)) }, |
| 570 | ], |
| 571 | ac_qi: [ |
| 572 | base_q_idx, |
| 573 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_u, bit_depth)) }, |
| 574 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_v, bit_depth)) }, |
| 575 | ], |
| 576 | lambda, |
| 577 | dist_scale, |
| 578 | } |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | impl RCState { |
| 583 | pub fn new( |
| 584 | frame_width: i32, frame_height: i32, framerate_num: i64, |
| 585 | framerate_den: i64, target_bitrate: i32, maybe_ac_qi_max: Option<u8>, |
| 586 | ac_qi_min: u8, max_key_frame_interval: i32, |
| 587 | maybe_reservoir_frame_delay: Option<i32>, |
| 588 | ) -> RCState { |
| 589 | // The default buffer size is set equal to 1.5x the keyframe interval, or 240 |
| 590 | // frames; whichever is smaller, with a minimum of 12. |
| 591 | // For user set values, we enforce a minimum of 12. |
| 592 | // The interval is short enough to allow reaction, but long enough to allow |
| 593 | // looking into the next GOP (avoiding the case where the last frames |
| 594 | // before an I-frame get starved), in most cases. |
| 595 | // The 12 frame minimum gives us some chance to distribute bit estimation |
| 596 | // errors in the worst case. |
| 597 | let reservoir_frame_delay = maybe_reservoir_frame_delay |
| 598 | .unwrap_or_else(|| ((max_key_frame_interval * 3) >> 1).min(240)) |
| 599 | .max(12); |
| 600 | // TODO: What are the limits on these? |
| 601 | let npixels = (frame_width as i64) * (frame_height as i64); |
| 602 | // Insane framerates or frame sizes mean insane bitrates. |
| 603 | // Let's not get carried away. |
| 604 | // We also subtract 16 bits from each temporal unit to account for the |
| 605 | // temporal delimiter, whose bits are not included in the frame sizes |
| 606 | // reported to update_state(). |
| 607 | // TODO: Support constraints imposed by levels. |
| 608 | let bits_per_tu = clamp( |
| 609 | (target_bitrate as i64) * framerate_den / framerate_num, |
| 610 | 40, |
| 611 | 0x4000_0000_0000, |
| 612 | ) - (TEMPORAL_DELIMITER.len() * 8) as i64; |
| 613 | let reservoir_max = bits_per_tu * (reservoir_frame_delay as i64); |
| 614 | // Start with a buffer fullness and fullness target of 50%. |
| 615 | let reservoir_target = (reservoir_max + 1) >> 1; |
| 616 | // Pick exponents and initial scales for quantizer selection. |
| 617 | let ibpp = npixels / bits_per_tu; |
| 618 | // These have been derived by encoding many clips at every quantizer |
| 619 | // and running a piecewise-linear regression in binary log space. |
| 620 | let (i_exp, i_log_scale) = if ibpp < 1 { |
| 621 | (48u8, blog64(36) - q57(QSCALE)) |
| 622 | } else if ibpp < 4 { |
| 623 | (61u8, blog64(55) - q57(QSCALE)) |
| 624 | } else { |
| 625 | (77u8, blog64(129) - q57(QSCALE)) |
| 626 | }; |
| 627 | let (p_exp, p_log_scale) = if ibpp < 2 { |
| 628 | (69u8, blog64(32) - q57(QSCALE)) |
| 629 | } else if ibpp < 139 { |
| 630 | (104u8, blog64(84) - q57(QSCALE)) |
| 631 | } else { |
| 632 | (83u8, blog64(19) - q57(QSCALE)) |
| 633 | }; |
| 634 | let (b0_exp, b0_log_scale) = if ibpp < 2 { |
| 635 | (84u8, blog64(30) - q57(QSCALE)) |
| 636 | } else if ibpp < 92 { |
| 637 | (120u8, blog64(68) - q57(QSCALE)) |
| 638 | } else { |
| 639 | (68u8, blog64(4) - q57(QSCALE)) |
| 640 | }; |
| 641 | let (b1_exp, b1_log_scale) = if ibpp < 2 { |
| 642 | (87u8, blog64(27) - q57(QSCALE)) |
| 643 | } else if ibpp < 126 { |
| 644 | (139u8, blog64(84) - q57(QSCALE)) |
| 645 | } else { |
| 646 | (61u8, blog64(1) - q57(QSCALE)) |
| 647 | }; |
| 648 | |
| 649 | // TODO: Add support for "golden" P frames. |
| 650 | RCState { |
| 651 | target_bitrate, |
| 652 | reservoir_frame_delay, |
| 653 | reservoir_frame_delay_is_set: maybe_reservoir_frame_delay.is_some(), |
| 654 | maybe_ac_qi_max, |
| 655 | ac_qi_min, |
| 656 | drop_frames: false, |
| 657 | cap_overflow: true, |
| 658 | cap_underflow: false, |
| 659 | pass1_log_base_q: 0, |
| 660 | twopass_state: PASS_SINGLE, |
| 661 | log_npixels: blog64(npixels), |
| 662 | bits_per_tu, |
| 663 | reservoir_fullness: reservoir_target, |
| 664 | reservoir_target, |
| 665 | reservoir_max, |
| 666 | log_scale: [i_log_scale, p_log_scale, b0_log_scale, b1_log_scale], |
| 667 | exp: [i_exp, p_exp, b0_exp, b1_exp], |
| 668 | scalefilter: [ |
| 669 | IIRBessel2::new(4, q57_to_q24(i_log_scale)), |
| 670 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(p_log_scale)), |
| 671 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b0_log_scale)), |
| 672 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b1_log_scale)), |
| 673 | ], |
| 674 | // TODO VFR |
| 675 | nframes: [0; FRAME_NSUBTYPES + 1], |
| 676 | inter_delay: [INTER_DELAY_TARGET_MIN; FRAME_NSUBTYPES - 1], |
| 677 | inter_delay_target: reservoir_frame_delay >> 1, |
| 678 | rate_bias: 0, |
| 679 | nencoded_frames: 0, |
| 680 | nsef_frames: 0, |
| 681 | pass1_buffer: [0; TWOPASS_HEADER_SZ], |
| 682 | pass1_data_retrieved: true, |
| 683 | pass1_summary_retrieved: false, |
| 684 | pass2_data_ready: false, |
| 685 | prev_metrics: RCFrameMetrics::new(), |
| 686 | cur_metrics: RCFrameMetrics::new(), |
| 687 | frame_metrics: Vec::new(), |
| 688 | nframe_metrics: 0, |
| 689 | frame_metrics_head: 0, |
| 690 | ntus: 0, |
| 691 | ntus_total: 0, |
| 692 | ntus_left: 0, |
| 693 | nframes_total: [0; FRAME_NSUBTYPES + 1], |
| 694 | nframes_total_total: 0, |
| 695 | nframes_left: [0; FRAME_NSUBTYPES + 1], |
| 696 | scale_sum: [0; FRAME_NSUBTYPES], |
| 697 | scale_window_ntus: 0, |
| 698 | scale_window_nframes: [0; FRAME_NSUBTYPES + 1], |
| 699 | scale_window_sum: [0; FRAME_NSUBTYPES], |
| 700 | des: RCDeserialize::default(), |
| 701 | } |
| 702 | } |
| 703 | |
| 704 | pub(crate) fn select_first_pass_qi( |
| 705 | &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, |
| 706 | ) -> QuantizerParameters { |
| 707 | // Adjust the quantizer for the frame type, result is Q57: |
| 708 | let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) |
| 709 | * (MQP_Q12[fti] as i64) |
| 710 | + DQP_Q57[fti]; |
| 711 | QuantizerParameters::new_from_log_q( |
| 712 | self.pass1_log_base_q, |
| 713 | log_q, |
| 714 | bit_depth, |
| 715 | chroma_sampling, |
| 716 | fti == 0, |
| 717 | 0, |
| 718 | ) |
| 719 | } |
| 720 | |
| 721 | // TODO: Separate quantizers for Cb and Cr. |
| 722 | #[profiling::function ] |
| 723 | pub(crate) fn select_qi<T: Pixel>( |
| 724 | &self, ctx: &ContextInner<T>, output_frameno: u64, fti: usize, |
| 725 | maybe_prev_log_base_q: Option<i64>, log_isqrt_mean_scale: i64, |
| 726 | ) -> QuantizerParameters { |
| 727 | // Is rate control active? |
| 728 | if self.target_bitrate <= 0 { |
| 729 | // Rate control is not active. |
| 730 | // Derive quantizer directly from frame type. |
| 731 | let bit_depth = ctx.config.bit_depth; |
| 732 | let chroma_sampling = ctx.config.chroma_sampling; |
| 733 | let (log_base_q, log_q) = |
| 734 | Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti); |
| 735 | QuantizerParameters::new_from_log_q( |
| 736 | log_base_q, |
| 737 | log_q, |
| 738 | bit_depth, |
| 739 | chroma_sampling, |
| 740 | fti == 0, |
| 741 | log_isqrt_mean_scale, |
| 742 | ) |
| 743 | } else { |
| 744 | let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1]; |
| 745 | let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale; |
| 746 | let mut reservoir_tus = self.reservoir_frame_delay.min(self.ntus_left); |
| 747 | let mut reservoir_frames = 0; |
| 748 | let mut log_cur_scale = (self.scalefilter[fti].y[0] as i64) << 33; |
| 749 | match self.twopass_state { |
| 750 | // First pass of 2-pass mode: use a fixed base quantizer. |
| 751 | PASS_1 => { |
| 752 | return self.select_first_pass_qi( |
| 753 | ctx.config.bit_depth, |
| 754 | fti, |
| 755 | ctx.config.chroma_sampling, |
| 756 | ); |
| 757 | } |
| 758 | // Second pass of 2-pass mode: we know exactly how much of each frame |
| 759 | // type there is in the current buffer window, and have estimates for |
| 760 | // the scales. |
| 761 | PASS_2 | PASS_2_PLUS_1 => { |
| 762 | let mut scale_window_sum: [i64; FRAME_NSUBTYPES] = |
| 763 | self.scale_window_sum; |
| 764 | let mut scale_window_nframes: [i32; FRAME_NSUBTYPES + 1] = |
| 765 | self.scale_window_nframes; |
| 766 | // Intentionally exclude Show Existing Frame frames from this. |
| 767 | for ftj in 0..FRAME_NSUBTYPES { |
| 768 | reservoir_frames += scale_window_nframes[ftj]; |
| 769 | } |
| 770 | // If we're approaching the end of the file, add some slack to keep |
| 771 | // us from slamming into a rail. |
| 772 | // Our rate accuracy goes down, but it keeps the result sensible. |
| 773 | // We position the target where the first forced keyframe beyond the |
| 774 | // end of the file would be (for consistency with 1-pass mode). |
| 775 | // TODO: let mut buf_pad = self.reservoir_frame_delay.min(...); |
| 776 | // if buf_delay < buf_pad { |
| 777 | // buf_pad -= buf_delay; |
| 778 | // } |
| 779 | // else ... |
| 780 | // Otherwise, search for the last keyframe in the buffer window and |
| 781 | // target that. |
| 782 | // Currently we only do this when using a finite buffer. |
| 783 | // We could save the position of the last keyframe in the stream in |
| 784 | // the summary data and do it with a whole-file buffer as well, but |
| 785 | // it isn't likely to make a difference. |
| 786 | if !self.frame_metrics.is_empty() { |
| 787 | let mut fm_tail = self.frame_metrics_head + self.nframe_metrics; |
| 788 | if fm_tail >= self.frame_metrics.len() { |
| 789 | fm_tail -= self.frame_metrics.len(); |
| 790 | } |
| 791 | let mut fmi = fm_tail; |
| 792 | loop { |
| 793 | if fmi == 0 { |
| 794 | fmi += self.frame_metrics.len(); |
| 795 | } |
| 796 | fmi -= 1; |
| 797 | // Stop before we remove the first frame. |
| 798 | if fmi == self.frame_metrics_head { |
| 799 | break; |
| 800 | } |
| 801 | // If we find a keyframe, remove it and everything past it. |
| 802 | if self.frame_metrics[fmi].fti == FRAME_SUBTYPE_I { |
| 803 | while fmi != fm_tail { |
| 804 | let m = &self.frame_metrics[fmi]; |
| 805 | let ftj = m.fti; |
| 806 | scale_window_nframes[ftj] -= 1; |
| 807 | if ftj < FRAME_NSUBTYPES { |
| 808 | scale_window_sum[ftj] -= bexp_q24(m.log_scale_q24); |
| 809 | reservoir_frames -= 1; |
| 810 | } |
| 811 | if m.show_frame { |
| 812 | reservoir_tus -= 1; |
| 813 | } |
| 814 | fmi += 1; |
| 815 | if fmi >= self.frame_metrics.len() { |
| 816 | fmi = 0; |
| 817 | } |
| 818 | } |
| 819 | // And stop scanning backwards. |
| 820 | break; |
| 821 | } |
| 822 | } |
| 823 | } |
| 824 | nframes = scale_window_nframes; |
| 825 | // If we're not using the same frame type as in pass 1 (because |
| 826 | // someone changed some encoding parameters), remove that scale |
| 827 | // estimate. |
| 828 | // We'll add a replacement for the correct frame type below. |
| 829 | if self.cur_metrics.fti != fti { |
| 830 | scale_window_nframes[self.cur_metrics.fti] -= 1; |
| 831 | if self.cur_metrics.fti != FRAME_SUBTYPE_SEF { |
| 832 | scale_window_sum[self.cur_metrics.fti] -= |
| 833 | bexp_q24(self.cur_metrics.log_scale_q24); |
| 834 | } |
| 835 | } else { |
| 836 | log_cur_scale = (self.cur_metrics.log_scale_q24 as i64) << 33; |
| 837 | } |
| 838 | // If we're approaching the end of the file, add some slack to keep |
| 839 | // us from slamming into a rail. |
| 840 | // Our rate accuracy goes down, but it keeps the result sensible. |
| 841 | // We position the target where the first forced keyframe beyond the |
| 842 | // end of the file would be (for consistency with 1-pass mode). |
| 843 | if reservoir_tus >= self.ntus_left |
| 844 | && self.ntus_total as u64 |
| 845 | > ctx.gop_input_frameno_start[&output_frameno] |
| 846 | { |
| 847 | let nfinal_gop_tus = self.ntus_total |
| 848 | - (ctx.gop_input_frameno_start[&output_frameno] as i32); |
| 849 | if ctx.config.max_key_frame_interval as i32 > nfinal_gop_tus { |
| 850 | let reservoir_pad = (ctx.config.max_key_frame_interval as i32 |
| 851 | - nfinal_gop_tus) |
| 852 | .min(self.reservoir_frame_delay - reservoir_tus); |
| 853 | let (guessed_reservoir_frames, guessed_reservoir_tus) = ctx |
| 854 | .guess_frame_subtypes( |
| 855 | &mut nframes, |
| 856 | reservoir_tus + reservoir_pad, |
| 857 | ); |
| 858 | reservoir_frames = guessed_reservoir_frames; |
| 859 | reservoir_tus = guessed_reservoir_tus; |
| 860 | } |
| 861 | } |
| 862 | // Blend in the low-pass filtered scale according to how many |
| 863 | // frames of each type we need to add compared to the actual sums in |
| 864 | // our window. |
| 865 | for ftj in 0..FRAME_NSUBTYPES { |
| 866 | let scale = scale_window_sum[ftj] |
| 867 | + bexp_q24(self.scalefilter[ftj].y[0]) |
| 868 | * (nframes[ftj] - scale_window_nframes[ftj]) as i64; |
| 869 | log_scale[ftj] = if nframes[ftj] > 0 { |
| 870 | blog64(scale) - blog64(nframes[ftj] as i64) - q57(24) |
| 871 | } else { |
| 872 | -self.log_npixels |
| 873 | }; |
| 874 | } |
| 875 | } |
| 876 | // Single pass. |
| 877 | _ => { |
| 878 | // Figure out how to re-distribute bits so that we hit our fullness |
| 879 | // target before the last keyframe in our current buffer window |
| 880 | // (after the current frame), or the end of the buffer window, |
| 881 | // whichever comes first. |
| 882 | // Count the various types and classes of frames. |
| 883 | let (guessed_reservoir_frames, guessed_reservoir_tus) = |
| 884 | ctx.guess_frame_subtypes(&mut nframes, self.reservoir_frame_delay); |
| 885 | reservoir_frames = guessed_reservoir_frames; |
| 886 | reservoir_tus = guessed_reservoir_tus; |
| 887 | // TODO: Scale for VFR. |
| 888 | } |
| 889 | } |
| 890 | // If we've been missing our target, add a penalty term. |
| 891 | let rate_bias = (self.rate_bias / (self.nencoded_frames + 100)) |
| 892 | * (reservoir_frames as i64); |
| 893 | // rate_total is the total bits available over the next |
| 894 | // reservoir_tus TUs. |
| 895 | let rate_total = self.reservoir_fullness - self.reservoir_target |
| 896 | + rate_bias |
| 897 | + (reservoir_tus as i64) * self.bits_per_tu; |
| 898 | // Find a target quantizer that meets our rate target for the |
| 899 | // specific mix of frame types we'll have over the next |
| 900 | // reservoir_frame frames. |
| 901 | // We model the rate<->quantizer relationship as |
| 902 | // rate = scale*(quantizer**-exp) |
| 903 | // In this case, we have our desired rate, an exponent selected in |
| 904 | // setup, and a scale that's been measured over our frame history, |
| 905 | // so we're solving for the quantizer. |
| 906 | // Exponentiation with arbitrary exponents is expensive, so we work |
| 907 | // in the binary log domain (binary exp and log aren't too bad): |
| 908 | // rate = exp2(log2(scale) - log2(quantizer)*exp) |
| 909 | // There's no easy closed form solution, so we bisection searh for it. |
| 910 | let bit_depth = ctx.config.bit_depth; |
| 911 | let chroma_sampling = ctx.config.chroma_sampling; |
| 912 | // TODO: Proper handling of lossless. |
| 913 | let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth).get() as i64) |
| 914 | - q57(QSCALE + bit_depth as i32 - 8); |
| 915 | // The AC quantizer tables map to values larger than the DC quantizer |
| 916 | // tables, so we use that as the upper bound to make sure we can use |
| 917 | // the full table if needed. |
| 918 | let mut log_qhi = blog64( |
| 919 | ac_q(self.maybe_ac_qi_max.unwrap_or(255), 0, bit_depth).get() as i64, |
| 920 | ) - q57(QSCALE + bit_depth as i32 - 8); |
| 921 | let mut log_base_q = (log_qlo + log_qhi) >> 1; |
| 922 | while log_qlo < log_qhi { |
| 923 | // Count bits contributed by each frame type using the model. |
| 924 | let mut bits = 0i64; |
| 925 | for ftj in 0..FRAME_NSUBTYPES { |
| 926 | // Modulate base quantizer by frame type. |
| 927 | let log_q = ((log_base_q + (1i64 << 11)) >> 12) |
| 928 | * (MQP_Q12[ftj] as i64) |
| 929 | + DQP_Q57[ftj]; |
| 930 | // All the fields here are Q57 except for the exponent, which is |
| 931 | // Q6. |
| 932 | bits += (nframes[ftj] as i64) |
| 933 | * bexp64( |
| 934 | log_scale[ftj] + self.log_npixels |
| 935 | - ((log_q + 32) >> 6) * (self.exp[ftj] as i64), |
| 936 | ); |
| 937 | } |
| 938 | // The number of bits for Show Existing Frame frames is constant. |
| 939 | bits += (nframes[FRAME_SUBTYPE_SEF] as i64) * SEF_BITS; |
| 940 | let diff = bits - rate_total; |
| 941 | if diff > 0 { |
| 942 | log_qlo = log_base_q + 1; |
| 943 | } else if diff < 0 { |
| 944 | log_qhi = log_base_q - 1; |
| 945 | } else { |
| 946 | break; |
| 947 | } |
| 948 | log_base_q = (log_qlo + log_qhi) >> 1; |
| 949 | } |
| 950 | // If this was not one of the initial frames, limit the change in |
| 951 | // base quantizer to within [0.8*Q, 1.2*Q] where Q is the previous |
| 952 | // frame's base quantizer. |
| 953 | if let Some(prev_log_base_q) = maybe_prev_log_base_q { |
| 954 | log_base_q = clamp( |
| 955 | log_base_q, |
| 956 | prev_log_base_q - 0xA4_D3C2_5E68_DC58, |
| 957 | prev_log_base_q + 0xA4_D3C2_5E68_DC58, |
| 958 | ); |
| 959 | } |
| 960 | // Modulate base quantizer by frame type. |
| 961 | let mut log_q = ((log_base_q + (1i64 << 11)) >> 12) |
| 962 | * (MQP_Q12[fti] as i64) |
| 963 | + DQP_Q57[fti]; |
| 964 | // The above allocation looks only at the total rate we'll accumulate |
| 965 | // in the next reservoir_frame_delay frames. |
| 966 | // However, we could overflow the bit reservoir on the very next |
| 967 | // frame. |
| 968 | // Check for that here if we're not using a soft target. |
| 969 | if self.cap_overflow { |
| 970 | // Allow 3% of the buffer for prediction error. |
| 971 | // This should be plenty, and we don't mind if we go a bit over. |
| 972 | // We only want to keep these bits from being completely wasted. |
| 973 | let margin = (self.reservoir_max + 31) >> 5; |
| 974 | // We want to use at least this many bits next frame. |
| 975 | let soft_limit = self.reservoir_fullness + self.bits_per_tu |
| 976 | - (self.reservoir_max - margin); |
| 977 | if soft_limit > 0 { |
| 978 | let log_soft_limit = blog64(soft_limit); |
| 979 | // If we're predicting we won't use that many bits... |
| 980 | // TODO: When using frame re-ordering, we should include the rate |
| 981 | // for all of the frames in the current TU. |
| 982 | // When there is more than one frame, there will be no direct |
| 983 | // solution for the required adjustment, however. |
| 984 | let log_scale_pixels = log_cur_scale + self.log_npixels; |
| 985 | let exp = self.exp[fti] as i64; |
| 986 | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
| 987 | if log_scale_pixels - log_q_exp < log_soft_limit { |
| 988 | // Scale the adjustment based on how far into the margin we are. |
| 989 | log_q_exp += ((log_scale_pixels - log_soft_limit - log_q_exp) |
| 990 | >> 32) |
| 991 | * ((margin.min(soft_limit) << 32) / margin); |
| 992 | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
| 993 | } |
| 994 | } |
| 995 | } |
| 996 | // We just checked we don't overflow the reservoir next frame, now |
| 997 | // check we don't underflow and bust the budget (when not using a |
| 998 | // soft target). |
| 999 | if self.maybe_ac_qi_max.is_none() { |
| 1000 | // Compute the maximum number of bits we can use in the next frame. |
| 1001 | // Allow 50% of the rate for a single frame for prediction error. |
| 1002 | // This may not be enough for keyframes or sudden changes in |
| 1003 | // complexity. |
| 1004 | let log_hard_limit = |
| 1005 | blog64(self.reservoir_fullness + (self.bits_per_tu >> 1)); |
| 1006 | // If we're predicting we'll use more than this... |
| 1007 | // TODO: When using frame re-ordering, we should include the rate |
| 1008 | // for all of the frames in the current TU. |
| 1009 | // When there is more than one frame, there will be no direct |
| 1010 | // solution for the required adjustment, however. |
| 1011 | let log_scale_pixels = log_cur_scale + self.log_npixels; |
| 1012 | let exp = self.exp[fti] as i64; |
| 1013 | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
| 1014 | if log_scale_pixels - log_q_exp > log_hard_limit { |
| 1015 | // Force the target to hit our limit exactly. |
| 1016 | log_q_exp = log_scale_pixels - log_hard_limit; |
| 1017 | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
| 1018 | // If that target is unreasonable, oh well; we'll have to drop. |
| 1019 | } |
| 1020 | } |
| 1021 | |
| 1022 | if let Some(qi_max) = self.maybe_ac_qi_max { |
| 1023 | let (max_log_base_q, max_log_q) = |
| 1024 | Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti); |
| 1025 | log_base_q = cmp::min(log_base_q, max_log_base_q); |
| 1026 | log_q = cmp::min(log_q, max_log_q); |
| 1027 | } |
| 1028 | if self.ac_qi_min > 0 { |
| 1029 | let (min_log_base_q, min_log_q) = |
| 1030 | Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti); |
| 1031 | log_base_q = cmp::max(log_base_q, min_log_base_q); |
| 1032 | log_q = cmp::max(log_q, min_log_q); |
| 1033 | } |
| 1034 | QuantizerParameters::new_from_log_q( |
| 1035 | log_base_q, |
| 1036 | log_q, |
| 1037 | bit_depth, |
| 1038 | chroma_sampling, |
| 1039 | fti == 0, |
| 1040 | log_isqrt_mean_scale, |
| 1041 | ) |
| 1042 | } |
| 1043 | } |
| 1044 | |
| 1045 | // Computes a quantizer directly from the frame type and base quantizer index, |
| 1046 | // without consideration for rate control. |
| 1047 | fn calc_flat_quantizer( |
| 1048 | base_qi: u8, bit_depth: usize, fti: usize, |
| 1049 | ) -> (i64, i64) { |
| 1050 | // TODO: Rename "quantizer" something that indicates it is a quantizer |
| 1051 | // index, and move it somewhere more sensible (or choose a better way to |
| 1052 | // parameterize a "quality" configuration parameter). |
| 1053 | |
| 1054 | // We use the AC quantizer as the source quantizer since its quantizer |
| 1055 | // tables have unique entries, while the DC tables do not. |
| 1056 | let ac_quantizer = ac_q(base_qi, 0, bit_depth).get() as i64; |
| 1057 | // Pick the nearest DC entry since an exact match may be unavailable. |
| 1058 | let dc_qi = select_dc_qi(ac_quantizer, bit_depth); |
| 1059 | let dc_quantizer = dc_q(dc_qi, 0, bit_depth).get() as i64; |
| 1060 | // Get the log quantizers as Q57. |
| 1061 | let log_ac_q = blog64(ac_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
| 1062 | let log_dc_q = blog64(dc_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
| 1063 | // Target the midpoint of the chosen entries. |
| 1064 | let log_base_q = (log_ac_q + log_dc_q + 1) >> 1; |
| 1065 | // Adjust the quantizer for the frame type, result is Q57: |
| 1066 | let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) |
| 1067 | + DQP_Q57[fti]; |
| 1068 | (log_base_q, log_q) |
| 1069 | } |
| 1070 | |
| 1071 | #[profiling::function ] |
| 1072 | pub fn update_state( |
| 1073 | &mut self, bits: i64, fti: usize, show_frame: bool, log_target_q: i64, |
| 1074 | trial: bool, droppable: bool, |
| 1075 | ) -> bool { |
| 1076 | if trial { |
| 1077 | assert!(self.needs_trial_encode(fti)); |
| 1078 | assert!(bits > 0); |
| 1079 | } |
| 1080 | let mut dropped = false; |
| 1081 | // Update rate control only if rate control is active. |
| 1082 | if self.target_bitrate > 0 { |
| 1083 | let mut estimated_bits = 0; |
| 1084 | let mut bits = bits; |
| 1085 | let mut droppable = droppable; |
| 1086 | let mut log_scale = q57(-64); |
| 1087 | // Drop frames is also disabled for now in the case of infinite-buffer |
| 1088 | // two-pass mode. |
| 1089 | if !self.drop_frames |
| 1090 | || fti == FRAME_SUBTYPE_SEF |
| 1091 | || (self.twopass_state == PASS_2 |
| 1092 | || self.twopass_state == PASS_2_PLUS_1) |
| 1093 | && !self.frame_metrics.is_empty() |
| 1094 | { |
| 1095 | droppable = false; |
| 1096 | } |
| 1097 | if fti == FRAME_SUBTYPE_SEF { |
| 1098 | debug_assert!(bits == SEF_BITS); |
| 1099 | debug_assert!(show_frame); |
| 1100 | // Please don't make trial encodes of a SEF. |
| 1101 | debug_assert!(!trial); |
| 1102 | estimated_bits = SEF_BITS; |
| 1103 | self.nsef_frames += 1; |
| 1104 | } else { |
| 1105 | let log_q_exp = ((log_target_q + 32) >> 6) * (self.exp[fti] as i64); |
| 1106 | let prev_log_scale = self.log_scale[fti]; |
| 1107 | if bits <= 0 { |
| 1108 | // We didn't code any blocks in this frame. |
| 1109 | bits = 0; |
| 1110 | dropped = true; |
| 1111 | // TODO: Adjust VFR rate based on drop count. |
| 1112 | } else { |
| 1113 | // Compute the estimated scale factor for this frame type. |
| 1114 | let log_bits = blog64(bits); |
| 1115 | log_scale = (log_bits - self.log_npixels + log_q_exp).min(q57(16)); |
| 1116 | estimated_bits = |
| 1117 | bexp64(prev_log_scale + self.log_npixels - log_q_exp); |
| 1118 | if !trial { |
| 1119 | self.nencoded_frames += 1; |
| 1120 | } |
| 1121 | } |
| 1122 | } |
| 1123 | let log_scale_q24 = q57_to_q24(log_scale); |
| 1124 | // Special two-pass processing. |
| 1125 | if self.twopass_state == PASS_2 || self.twopass_state == PASS_2_PLUS_1 { |
| 1126 | // Pass 2 mode: |
| 1127 | if !trial { |
| 1128 | // Move the current metrics back one frame. |
| 1129 | self.prev_metrics = self.cur_metrics; |
| 1130 | // Back out the last frame's statistics from the sliding window. |
| 1131 | let ftj = self.prev_metrics.fti; |
| 1132 | self.nframes_left[ftj] -= 1; |
| 1133 | self.scale_window_nframes[ftj] -= 1; |
| 1134 | if ftj < FRAME_NSUBTYPES { |
| 1135 | self.scale_window_sum[ftj] -= |
| 1136 | bexp_q24(self.prev_metrics.log_scale_q24); |
| 1137 | } |
| 1138 | if self.prev_metrics.show_frame { |
| 1139 | self.ntus_left -= 1; |
| 1140 | self.scale_window_ntus -= 1; |
| 1141 | } |
| 1142 | // Free the corresponding entry in the circular buffer. |
| 1143 | if !self.frame_metrics.is_empty() { |
| 1144 | self.nframe_metrics -= 1; |
| 1145 | self.frame_metrics_head += 1; |
| 1146 | if self.frame_metrics_head >= self.frame_metrics.len() { |
| 1147 | self.frame_metrics_head = 0; |
| 1148 | } |
| 1149 | } |
| 1150 | // Mark us ready for the next 2-pass packet. |
| 1151 | self.pass2_data_ready = false; |
| 1152 | // Update state, so the user doesn't have to keep calling |
| 1153 | // twopass_in() after they've fed in all the data when we're using |
| 1154 | // a finite buffer. |
| 1155 | self.twopass_in(None).unwrap_or(0); |
| 1156 | } |
| 1157 | } |
| 1158 | if self.twopass_state == PASS_1 || self.twopass_state == PASS_2_PLUS_1 { |
| 1159 | // Pass 1 mode: save the metrics for this frame. |
| 1160 | self.prev_metrics.log_scale_q24 = log_scale_q24; |
| 1161 | self.prev_metrics.fti = fti; |
| 1162 | self.prev_metrics.show_frame = show_frame; |
| 1163 | self.pass1_data_retrieved = false; |
| 1164 | } |
| 1165 | // Common to all passes: |
| 1166 | if fti != FRAME_SUBTYPE_SEF && bits > 0 { |
| 1167 | // If this is the first example of the given frame type we've seen, |
| 1168 | // we immediately replace the default scale factor guess with the |
| 1169 | // estimate we just computed using the first frame. |
| 1170 | if trial || self.nframes[fti] <= 0 { |
| 1171 | let f = &mut self.scalefilter[fti]; |
| 1172 | let x = log_scale_q24; |
| 1173 | f.x[0] = x; |
| 1174 | f.x[1] = x; |
| 1175 | f.y[0] = x; |
| 1176 | f.y[1] = x; |
| 1177 | self.log_scale[fti] = log_scale; |
| 1178 | // TODO: Duplicate regular P frame state for first golden P frame. |
| 1179 | } else { |
| 1180 | // Lengthen the time constant for the inter filters as we collect |
| 1181 | // more frame statistics, until we reach our target. |
| 1182 | if fti > 0 |
| 1183 | && self.inter_delay[fti - 1] < self.inter_delay_target |
| 1184 | && self.nframes[fti] >= self.inter_delay[fti - 1] |
| 1185 | { |
| 1186 | self.inter_delay[fti - 1] += 1; |
| 1187 | self.scalefilter[fti].reinit(self.inter_delay[fti - 1]); |
| 1188 | } |
| 1189 | // Update the low-pass scale filter for this frame type regardless |
| 1190 | // of whether or not we will ultimately drop this frame. |
| 1191 | self.log_scale[fti] = |
| 1192 | q24_to_q57(self.scalefilter[fti].update(log_scale_q24)); |
| 1193 | } |
| 1194 | // If this frame busts our budget, it must be dropped. |
| 1195 | if droppable && self.reservoir_fullness + self.bits_per_tu < bits { |
| 1196 | // TODO: Adjust VFR rate based on drop count. |
| 1197 | bits = 0; |
| 1198 | dropped = true; |
| 1199 | } else { |
| 1200 | // TODO: Update a low-pass filter to estimate the "real" frame rate |
| 1201 | // taking timestamps and drops into account. |
| 1202 | // This is only done if the frame is coded, as it needs the final |
| 1203 | // count of dropped frames. |
| 1204 | } |
| 1205 | } |
| 1206 | if !trial { |
| 1207 | // Increment the frame count for filter adaptation purposes. |
| 1208 | if !trial && self.nframes[fti] < ::std::i32::MAX { |
| 1209 | self.nframes[fti] += 1; |
| 1210 | } |
| 1211 | self.reservoir_fullness -= bits; |
| 1212 | if show_frame { |
| 1213 | self.reservoir_fullness += self.bits_per_tu; |
| 1214 | // TODO: Properly account for temporal delimiter bits. |
| 1215 | } |
| 1216 | // If we're too quick filling the buffer and overflow is capped, that |
| 1217 | // rate is lost forever. |
| 1218 | if self.cap_overflow { |
| 1219 | self.reservoir_fullness = |
| 1220 | self.reservoir_fullness.min(self.reservoir_max); |
| 1221 | } |
| 1222 | // If we're too quick draining the buffer and underflow is capped, |
| 1223 | // don't try to make up that rate later. |
| 1224 | if self.cap_underflow { |
| 1225 | self.reservoir_fullness = self.reservoir_fullness.max(0); |
| 1226 | } |
| 1227 | // Adjust the bias for the real bits we've used. |
| 1228 | self.rate_bias += estimated_bits - bits; |
| 1229 | } |
| 1230 | } |
| 1231 | dropped |
| 1232 | } |
| 1233 | |
| 1234 | pub const fn needs_trial_encode(&self, fti: usize) -> bool { |
| 1235 | self.target_bitrate > 0 && self.nframes[fti] == 0 |
| 1236 | } |
| 1237 | |
| 1238 | pub(crate) const fn ready(&self) -> bool { |
| 1239 | match self.twopass_state { |
| 1240 | PASS_SINGLE => true, |
| 1241 | PASS_1 => self.pass1_data_retrieved, |
| 1242 | PASS_2 => self.pass2_data_ready, |
| 1243 | _ => self.pass1_data_retrieved && self.pass2_data_ready, |
| 1244 | } |
| 1245 | } |
| 1246 | |
| 1247 | fn buffer_val(&mut self, val: i64, bytes: usize, cur_pos: usize) -> usize { |
| 1248 | let mut val = val; |
| 1249 | let mut bytes = bytes; |
| 1250 | let mut cur_pos = cur_pos; |
| 1251 | while bytes > 0 { |
| 1252 | bytes -= 1; |
| 1253 | self.pass1_buffer[cur_pos] = val as u8; |
| 1254 | cur_pos += 1; |
| 1255 | val >>= 8; |
| 1256 | } |
| 1257 | cur_pos |
| 1258 | } |
| 1259 | |
| 1260 | pub(crate) fn select_pass1_log_base_q<T: Pixel>( |
| 1261 | &self, ctx: &ContextInner<T>, output_frameno: u64, |
| 1262 | ) -> i64 { |
| 1263 | assert_eq!(self.twopass_state, PASS_SINGLE); |
| 1264 | self.select_qi(ctx, output_frameno, FRAME_SUBTYPE_I, None, 0).log_base_q |
| 1265 | } |
| 1266 | |
| 1267 | // Initialize the first pass and emit a placeholder summary |
| 1268 | pub(crate) fn init_first_pass( |
| 1269 | &mut self, maybe_pass1_log_base_q: Option<i64>, |
| 1270 | ) { |
| 1271 | if let Some(pass1_log_base_q) = maybe_pass1_log_base_q { |
| 1272 | assert_eq!(self.twopass_state, PASS_SINGLE); |
| 1273 | // Pick first-pass qi for scale calculations. |
| 1274 | self.pass1_log_base_q = pass1_log_base_q; |
| 1275 | } else { |
| 1276 | debug_assert!(self.twopass_state == PASS_2); |
| 1277 | } |
| 1278 | self.twopass_state += PASS_1; |
| 1279 | } |
| 1280 | |
| 1281 | // Prepare a placeholder summary |
| 1282 | fn emit_placeholder_summary(&mut self) -> &[u8] { |
| 1283 | // Fill in dummy summary values. |
| 1284 | let mut cur_pos = 0; |
| 1285 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
| 1286 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
| 1287 | cur_pos = self.buffer_val(0, TWOPASS_HEADER_SZ - 8, cur_pos); |
| 1288 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
| 1289 | self.pass1_data_retrieved = true; |
| 1290 | &self.pass1_buffer[..cur_pos] |
| 1291 | } |
| 1292 | |
| 1293 | // Frame-specific pass data |
| 1294 | pub(crate) fn emit_frame_data(&mut self) -> Option<&[u8]> { |
| 1295 | let mut cur_pos = 0; |
| 1296 | let fti = self.prev_metrics.fti; |
| 1297 | if fti < FRAME_NSUBTYPES { |
| 1298 | self.scale_sum[fti] += bexp_q24(self.prev_metrics.log_scale_q24); |
| 1299 | } |
| 1300 | if self.prev_metrics.show_frame { |
| 1301 | self.ntus += 1; |
| 1302 | } |
| 1303 | // If we have encoded too many frames, prevent us from reaching the |
| 1304 | // ready state required to encode more. |
| 1305 | if self.nencoded_frames + self.nsef_frames >= std::i32::MAX as i64 { |
| 1306 | None? |
| 1307 | } |
| 1308 | cur_pos = self.buffer_val( |
| 1309 | (self.prev_metrics.show_frame as i64) << 31 |
| 1310 | | self.prev_metrics.fti as i64, |
| 1311 | 4, |
| 1312 | cur_pos, |
| 1313 | ); |
| 1314 | cur_pos = |
| 1315 | self.buffer_val(self.prev_metrics.log_scale_q24 as i64, 4, cur_pos); |
| 1316 | debug_assert!(cur_pos == TWOPASS_PACKET_SZ); |
| 1317 | self.pass1_data_retrieved = true; |
| 1318 | Some(&self.pass1_buffer[..cur_pos]) |
| 1319 | } |
| 1320 | |
| 1321 | // Summary of the whole encoding process. |
| 1322 | pub(crate) fn emit_summary(&mut self) -> &[u8] { |
| 1323 | let mut cur_pos = 0; |
| 1324 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
| 1325 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
| 1326 | cur_pos = self.buffer_val(self.ntus as i64, 4, cur_pos); |
| 1327 | for fti in 0..=FRAME_NSUBTYPES { |
| 1328 | cur_pos = self.buffer_val(self.nframes[fti] as i64, 4, cur_pos); |
| 1329 | } |
| 1330 | for fti in 0..FRAME_NSUBTYPES { |
| 1331 | cur_pos = self.buffer_val(self.exp[fti] as i64, 1, cur_pos); |
| 1332 | } |
| 1333 | for fti in 0..FRAME_NSUBTYPES { |
| 1334 | cur_pos = self.buffer_val(self.scale_sum[fti], 8, cur_pos); |
| 1335 | } |
| 1336 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
| 1337 | self.pass1_summary_retrieved = true; |
| 1338 | &self.pass1_buffer[..cur_pos] |
| 1339 | } |
| 1340 | |
| 1341 | // Emit either summary or frame-specific data depending on the previous call |
| 1342 | pub(crate) fn twopass_out( |
| 1343 | &mut self, done_processing: bool, |
| 1344 | ) -> Option<&[u8]> { |
| 1345 | if !self.pass1_data_retrieved { |
| 1346 | if self.twopass_state != PASS_1 && self.twopass_state != PASS_2_PLUS_1 { |
| 1347 | Some(self.emit_placeholder_summary()) |
| 1348 | } else { |
| 1349 | self.emit_frame_data() |
| 1350 | } |
| 1351 | } else if done_processing && !self.pass1_summary_retrieved { |
| 1352 | Some(self.emit_summary()) |
| 1353 | } else { |
| 1354 | // The data for this frame has already been retrieved. |
| 1355 | None |
| 1356 | } |
| 1357 | } |
| 1358 | |
| 1359 | // Initialize the rate control for second pass encoding |
| 1360 | pub(crate) fn init_second_pass(&mut self) { |
| 1361 | if self.twopass_state == PASS_SINGLE || self.twopass_state == PASS_1 { |
| 1362 | // Initialize the second pass. |
| 1363 | self.twopass_state += PASS_2; |
| 1364 | // If the user requested a finite buffer, reserve the space required for |
| 1365 | // it. |
| 1366 | if self.reservoir_frame_delay_is_set { |
| 1367 | debug_assert!(self.reservoir_frame_delay > 0); |
| 1368 | // reservoir_frame_delay counts in TUs, but RCFrameMetrics are stored |
| 1369 | // per frame (including Show Existing Frame frames). |
| 1370 | // When re-ordering, we will have more frames than TUs. |
| 1371 | // How many more? |
| 1372 | // That depends on the re-ordering scheme used. |
| 1373 | // Doubling the number of TUs and adding a fixed latency equal to the |
| 1374 | // maximum number of reference frames we can store should be |
| 1375 | // sufficient for any reasonable scheme, and keeps this code from |
| 1376 | // depending too closely on the details of the scheme currently used |
| 1377 | // by rav1e. |
| 1378 | let nmetrics = (self.reservoir_frame_delay as usize) * 2 + 8; |
| 1379 | self.frame_metrics.reserve_exact(nmetrics); |
| 1380 | self.frame_metrics.resize(nmetrics, RCFrameMetrics::new()); |
| 1381 | } |
| 1382 | } |
| 1383 | } |
| 1384 | |
| 1385 | pub(crate) fn setup_second_pass(&mut self, s: &RCSummary) { |
| 1386 | self.ntus_total = s.ntus; |
| 1387 | self.ntus_left = s.ntus; |
| 1388 | self.nframes_total = s.nframes; |
| 1389 | self.nframes_left = s.nframes; |
| 1390 | self.nframes_total_total = s.nframes.iter().sum(); |
| 1391 | if self.frame_metrics.is_empty() { |
| 1392 | self.reservoir_frame_delay = s.ntus; |
| 1393 | self.scale_window_nframes = self.nframes_total; |
| 1394 | self.scale_window_sum = s.scale_sum; |
| 1395 | self.reservoir_max = |
| 1396 | self.bits_per_tu * (self.reservoir_frame_delay as i64); |
| 1397 | self.reservoir_target = (self.reservoir_max + 1) >> 1; |
| 1398 | self.reservoir_fullness = self.reservoir_target; |
| 1399 | } else { |
| 1400 | self.reservoir_frame_delay = self.reservoir_frame_delay.min(s.ntus); |
| 1401 | } |
| 1402 | self.exp = s.exp; |
| 1403 | } |
| 1404 | |
| 1405 | // Parse the rate control summary |
| 1406 | // |
| 1407 | // It returns the amount of data consumed in the process or |
| 1408 | // an empty error on parsing failure. |
| 1409 | fn twopass_parse_summary(&mut self, buf: &[u8]) -> Result<usize, String> { |
| 1410 | let consumed = self.des.buffer_fill(buf, 0, TWOPASS_HEADER_SZ); |
| 1411 | if self.des.pass2_buffer_fill >= TWOPASS_HEADER_SZ { |
| 1412 | self.des.pass2_buffer_pos = 0; |
| 1413 | |
| 1414 | let s = self.des.parse_summary()?; |
| 1415 | |
| 1416 | self.setup_second_pass(&s); |
| 1417 | |
| 1418 | // Got a valid header. |
| 1419 | // Set up pass 2. |
| 1420 | // Clear the header data from the buffer to make room for the |
| 1421 | // packet data. |
| 1422 | self.des.pass2_buffer_fill = 0; |
| 1423 | } |
| 1424 | |
| 1425 | Ok(consumed) |
| 1426 | } |
| 1427 | |
| 1428 | // Return the size of the first buffer twopass_in expects |
| 1429 | // |
| 1430 | // It is the summary size (constant) + the number of frame data packets |
| 1431 | // (variable depending on the configuration) it needs to starts encoding. |
| 1432 | pub(crate) fn twopass_first_packet_size(&self) -> usize { |
| 1433 | let frames_needed = if !self.frame_metrics.is_empty() { |
| 1434 | // If we're not using whole-file buffering, we need at least one |
| 1435 | // frame per buffer slot. |
| 1436 | self.reservoir_frame_delay as usize |
| 1437 | } else { |
| 1438 | // Otherwise we need just one. |
| 1439 | 1 |
| 1440 | }; |
| 1441 | |
| 1442 | TWOPASS_HEADER_SZ + frames_needed * TWOPASS_PACKET_SZ |
| 1443 | } |
| 1444 | |
| 1445 | // Return the number of frame data packets to be parsed before |
| 1446 | // the encoding process can continue. |
| 1447 | pub(crate) fn twopass_in_frames_needed(&self) -> i32 { |
| 1448 | if self.target_bitrate <= 0 { |
| 1449 | return 0; |
| 1450 | } |
| 1451 | if self.frame_metrics.is_empty() { |
| 1452 | return i32::from(!self.pass2_data_ready); |
| 1453 | } |
| 1454 | let mut cur_scale_window_nframes = 0; |
| 1455 | let mut cur_nframes_left = 0; |
| 1456 | for fti in 0..=FRAME_NSUBTYPES { |
| 1457 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
| 1458 | cur_nframes_left += self.nframes_left[fti]; |
| 1459 | } |
| 1460 | |
| 1461 | (self.reservoir_frame_delay - self.scale_window_ntus) |
| 1462 | .clamp(0, cur_nframes_left - cur_scale_window_nframes) |
| 1463 | } |
| 1464 | |
| 1465 | pub(crate) fn parse_frame_data_packet( |
| 1466 | &mut self, buf: &[u8], |
| 1467 | ) -> Result<(), String> { |
| 1468 | if buf.len() != TWOPASS_PACKET_SZ { |
| 1469 | return Err("Incorrect buffer size" .to_string()); |
| 1470 | } |
| 1471 | |
| 1472 | self.des.buffer_fill(buf, 0, TWOPASS_PACKET_SZ); |
| 1473 | self.des.pass2_buffer_pos = 0; |
| 1474 | let m = self.des.parse_metrics()?; |
| 1475 | self.des.pass2_buffer_fill = 0; |
| 1476 | |
| 1477 | if self.frame_metrics.is_empty() { |
| 1478 | // We're using a whole-file buffer. |
| 1479 | self.cur_metrics = m; |
| 1480 | self.pass2_data_ready = true; |
| 1481 | } else { |
| 1482 | // Safety check |
| 1483 | let frames_needed = self.twopass_in_frames_needed(); |
| 1484 | |
| 1485 | if frames_needed > 0 { |
| 1486 | if self.nframe_metrics >= self.frame_metrics.len() { |
| 1487 | return Err( |
| 1488 | "Read too many frames without finding enough TUs" .to_string(), |
| 1489 | ); |
| 1490 | } |
| 1491 | |
| 1492 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
| 1493 | if fmi >= self.frame_metrics.len() { |
| 1494 | fmi -= self.frame_metrics.len(); |
| 1495 | } |
| 1496 | self.nframe_metrics += 1; |
| 1497 | self.frame_metrics[fmi] = m; |
| 1498 | // And accumulate the statistics over the window. |
| 1499 | self.scale_window_nframes[m.fti] += 1; |
| 1500 | if m.fti < FRAME_NSUBTYPES { |
| 1501 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
| 1502 | } |
| 1503 | if m.show_frame { |
| 1504 | self.scale_window_ntus += 1; |
| 1505 | } |
| 1506 | if frames_needed == 1 { |
| 1507 | self.pass2_data_ready = true; |
| 1508 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
| 1509 | } |
| 1510 | } else { |
| 1511 | return Err("No frames needed" .to_string()); |
| 1512 | } |
| 1513 | } |
| 1514 | |
| 1515 | Ok(()) |
| 1516 | } |
| 1517 | |
| 1518 | // Parse the rate control per-frame data |
| 1519 | // |
| 1520 | // If no buffer is passed return the amount of data it expects |
| 1521 | // to consume next. |
| 1522 | // |
| 1523 | // If a properly sized buffer is passed it returns the amount of data |
| 1524 | // consumed in the process or an empty error on parsing failure. |
| 1525 | fn twopass_parse_frame_data( |
| 1526 | &mut self, maybe_buf: Option<&[u8]>, mut consumed: usize, |
| 1527 | ) -> Result<usize, String> { |
| 1528 | { |
| 1529 | if self.frame_metrics.is_empty() { |
| 1530 | // We're using a whole-file buffer. |
| 1531 | if let Some(buf) = maybe_buf { |
| 1532 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
| 1533 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
| 1534 | self.des.pass2_buffer_pos = 0; |
| 1535 | // Read metrics for the next frame. |
| 1536 | self.cur_metrics = self.des.parse_metrics()?; |
| 1537 | // Clear the buffer for the next frame. |
| 1538 | self.des.pass2_buffer_fill = 0; |
| 1539 | self.pass2_data_ready = true; |
| 1540 | } |
| 1541 | } else { |
| 1542 | return Ok(TWOPASS_PACKET_SZ - self.des.pass2_buffer_fill); |
| 1543 | } |
| 1544 | } else { |
| 1545 | // We're using a finite buffer. |
| 1546 | let mut cur_scale_window_nframes = 0; |
| 1547 | let mut cur_nframes_left = 0; |
| 1548 | |
| 1549 | for fti in 0..=FRAME_NSUBTYPES { |
| 1550 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
| 1551 | cur_nframes_left += self.nframes_left[fti]; |
| 1552 | } |
| 1553 | |
| 1554 | let mut frames_needed = self.twopass_in_frames_needed(); |
| 1555 | while frames_needed > 0 { |
| 1556 | if let Some(buf) = maybe_buf { |
| 1557 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
| 1558 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
| 1559 | self.des.pass2_buffer_pos = 0; |
| 1560 | // Read the metrics for the next frame. |
| 1561 | let m = self.des.parse_metrics()?; |
| 1562 | // Add them to the circular buffer. |
| 1563 | if self.nframe_metrics >= self.frame_metrics.len() { |
| 1564 | return Err( |
| 1565 | "Read too many frames without finding enough TUs" |
| 1566 | .to_string(), |
| 1567 | ); |
| 1568 | } |
| 1569 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
| 1570 | if fmi >= self.frame_metrics.len() { |
| 1571 | fmi -= self.frame_metrics.len(); |
| 1572 | } |
| 1573 | self.nframe_metrics += 1; |
| 1574 | self.frame_metrics[fmi] = m; |
| 1575 | // And accumulate the statistics over the window. |
| 1576 | self.scale_window_nframes[m.fti] += 1; |
| 1577 | cur_scale_window_nframes += 1; |
| 1578 | if m.fti < FRAME_NSUBTYPES { |
| 1579 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
| 1580 | } |
| 1581 | if m.show_frame { |
| 1582 | self.scale_window_ntus += 1; |
| 1583 | } |
| 1584 | frames_needed = (self.reservoir_frame_delay |
| 1585 | - self.scale_window_ntus) |
| 1586 | .clamp(0, cur_nframes_left - cur_scale_window_nframes); |
| 1587 | // Clear the buffer for the next frame. |
| 1588 | self.des.pass2_buffer_fill = 0; |
| 1589 | } else { |
| 1590 | // Go back for more data. |
| 1591 | break; |
| 1592 | } |
| 1593 | } else { |
| 1594 | return Ok( |
| 1595 | TWOPASS_PACKET_SZ * (frames_needed as usize) |
| 1596 | - self.des.pass2_buffer_fill, |
| 1597 | ); |
| 1598 | } |
| 1599 | } |
| 1600 | // If we've got all the frames we need, fill in the current metrics. |
| 1601 | // We're ready to go. |
| 1602 | if frames_needed <= 0 { |
| 1603 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
| 1604 | // Mark us ready for the next frame. |
| 1605 | self.pass2_data_ready = true; |
| 1606 | } |
| 1607 | } |
| 1608 | } |
| 1609 | |
| 1610 | Ok(consumed) |
| 1611 | } |
| 1612 | |
| 1613 | // If called without a buffer it will return the size of the next |
| 1614 | // buffer it expects. |
| 1615 | // |
| 1616 | // If called with a buffer it will consume it fully. |
| 1617 | // It returns Ok(0) if the buffer had been parsed or Err(()) |
| 1618 | // if the buffer hadn't been enough or other errors happened. |
| 1619 | pub(crate) fn twopass_in( |
| 1620 | &mut self, maybe_buf: Option<&[u8]>, |
| 1621 | ) -> Result<usize, String> { |
| 1622 | let mut consumed = 0; |
| 1623 | self.init_second_pass(); |
| 1624 | // If we haven't got a valid summary header yet, try to parse one. |
| 1625 | if self.nframes_total[FRAME_SUBTYPE_I] == 0 { |
| 1626 | self.pass2_data_ready = false; |
| 1627 | if let Some(buf) = maybe_buf { |
| 1628 | consumed = self.twopass_parse_summary(buf)? |
| 1629 | } else { |
| 1630 | return Ok(self.twopass_first_packet_size()); |
| 1631 | } |
| 1632 | } |
| 1633 | if self.nframes_total[FRAME_SUBTYPE_I] > 0 { |
| 1634 | if self.nencoded_frames + self.nsef_frames |
| 1635 | >= self.nframes_total_total as i64 |
| 1636 | { |
| 1637 | // We don't want any more data after the last frame, and we don't want |
| 1638 | // to allow any more frames to be encoded. |
| 1639 | self.pass2_data_ready = false; |
| 1640 | } else if !self.pass2_data_ready { |
| 1641 | return self.twopass_parse_frame_data(maybe_buf, consumed); |
| 1642 | } |
| 1643 | } |
| 1644 | Ok(consumed) |
| 1645 | } |
| 1646 | } |
| 1647 | |