1// Copyright (c) 2019-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::api::color::ChromaSampling;
11use crate::api::ContextInner;
12use crate::encoder::TEMPORAL_DELIMITER;
13use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi};
14use crate::util::{
15 bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel,
16};
17use std::cmp;
18
19// The number of frame sub-types for which we track distinct parameters.
20// This does not include FRAME_SUBTYPE_SEF, because we don't need to do any
21// parameter tracking for Show Existing Frame frames.
22pub const FRAME_NSUBTYPES: usize = 4;
23
24pub const FRAME_SUBTYPE_I: usize = 0;
25pub const FRAME_SUBTYPE_P: usize = 1;
26#[allow(unused)]
27pub const FRAME_SUBTYPE_B0: usize = 2;
28#[allow(unused)]
29pub const FRAME_SUBTYPE_B1: usize = 3;
30pub const FRAME_SUBTYPE_SEF: usize = 4;
31
32const PASS_SINGLE: i32 = 0;
33const PASS_1: i32 = 1;
34const PASS_2: i32 = 2;
35const PASS_2_PLUS_1: i32 = 3;
36
37// Magic value at the start of the 2-pass stats file
38const TWOPASS_MAGIC: i32 = 0x50324156;
39// Version number for the 2-pass stats file
40const TWOPASS_VERSION: i32 = 1;
41// 4 byte magic + 4 byte version + 4 byte TU count + 4 byte SEF frame count
42// + FRAME_NSUBTYPES*(4 byte frame count + 1 byte exp + 8 byte scale_sum)
43pub(crate) const TWOPASS_HEADER_SZ: usize = 16 + FRAME_NSUBTYPES * (4 + 1 + 8);
44// 4 byte frame type (show_frame and fti jointly coded) + 4 byte log_scale_q24
45const TWOPASS_PACKET_SZ: usize = 8;
46
47const SEF_BITS: i64 = 24;
48
49// The scale of AV1 quantizer tables (relative to the pixel domain), i.e., Q3.
50pub(crate) const QSCALE: i32 = 3;
51
52// We clamp the actual I and B frame delays to a minimum of 10 to work
53// within the range of values where later incrementing the delay works as
54// designed.
55// 10 is not an exact choice, but rather a good working trade-off.
56const INTER_DELAY_TARGET_MIN: i32 = 10;
57
58// The base quantizer for a frame is adjusted based on the frame type using the
59// formula (log_qp*mqp + dqp), where log_qp is the base-2 logarithm of the
60// "linear" quantizer (the actual factor by which coefficients are divided).
61// Because log_qp has an implicit offset built in based on the scale of the
62// coefficients (which depends on the pixel bit depth and the transform
63// scale), we normalize the quantizer to the equivalent for 8-bit pixels with
64// orthonormal transforms for the purposes of rate modeling.
65const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[
66 // TODO: Use a const function once f64 operations in const functions are
67 // stable.
68 (1.0 * (1 << 12) as f64) as i32,
69 (1.0 * (1 << 12) as f64) as i32,
70 (1.0 * (1 << 12) as f64) as i32,
71 (1.0 * (1 << 12) as f64) as i32,
72];
73
74// The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median
75// of a change of 15 quantizer steps in the quantizer tables.
76const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[
77 (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
78 (0.0 * (1i64 << 57) as f64) as i64,
79 ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
80 (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
81];
82
83// For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a
84// linear model:
85// log_q_y = log_target_q + (log_target_q >> 32) * Q_MODEL_MUL + Q_MODEL_ADD
86// Derivation of the linear models:
87// https://github.com/xiph/rav1e/blob/d02bdbd3b0b7b2cb9fc301031cc6a4e67a567a5c/doc/quantizer-weight-analysis.ipynb
88#[rustfmt::skip]
89const Q_MODEL_ADD: [i64; 4] = [
90 // 4:2:0
91 -0x24_4FE7_ECB3_DD90,
92 // 4:2:2
93 -0x37_41DA_38AD_0924,
94 // 4:4:4
95 -0x70_83BD_A626_311C,
96 // 4:0:0
97 0,
98];
99#[rustfmt::skip]
100const Q_MODEL_MUL: [i64; 4] = [
101 // 4:2:0
102 0x8A0_50DD,
103 // 4:2:2
104 0x887_7666,
105 // 4:4:4
106 0x8D4_A712,
107 // 4:0:0
108 0,
109];
110
111#[rustfmt::skip]
112const ROUGH_TAN_LOOKUP: &[u16; 18] = &[
113 0, 358, 722, 1098, 1491, 1910,
114 2365, 2868, 3437, 4096, 4881, 5850,
115 7094, 8784, 11254, 15286, 23230, 46817
116];
117
118// A digital approximation of a 2nd-order low-pass Bessel follower.
119// We use this for rate control because it has fast reaction time, but is
120// critically damped.
121pub struct IIRBessel2 {
122 c: [i32; 2],
123 g: i32,
124 x: [i32; 2],
125 y: [i32; 2],
126}
127
128// alpha is Q24 in the range [0,0.5).
129// The return value is 5.12.
130fn warp_alpha(alpha: i32) -> i32 {
131 let i: i32 = ((alpha * 36) >> 24).min(16);
132 let t0: u16 = ROUGH_TAN_LOOKUP[i as usize];
133 let t1: u16 = ROUGH_TAN_LOOKUP[i as usize + 1];
134 let d: i32 = alpha * 36 - (i << 24);
135 ((((t0 as i64) << 32) + (((t1 - t0) << 8) as i64) * (d as i64)) >> 32) as i32
136}
137
138// Compute Bessel filter coefficients with the specified delay.
139// Return: Filter parameters (c[0], c[1], g).
140fn iir_bessel2_get_parameters(delay: i32) -> (i32, i32, i32) {
141 // This borrows some code from an unreleased version of Postfish.
142 // See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
143 // on deriving the filter coefficients.
144 // alpha is Q24
145 let alpha = (1 << 24) / delay;
146 // warp is 7.12 (5.12? the max value is 70386 in Q12).
147 let warp = warp_alpha(alpha).max(1) as i64;
148 // k1 is 9.12 (6.12?)
149 let k1 = 3 * warp;
150 // k2 is 16.24 (11.24?)
151 let k2 = k1 * warp;
152 // d is 16.15 (10.15?)
153 let d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
154 // a is 0.32, since d is larger than both 1.0 and k2
155 let a = (k2 << 23) / d;
156 // ik2 is 25.24
157 let ik2 = (1i64 << 48) / k2;
158 // b1 is Q56; in practice, the integer ranges between -2 and 2.
159 let b1 = 2 * a * (ik2 - (1i64 << 24));
160 // b2 is Q56; in practice, the integer ranges between -2 and 2.
161 let b2 = (1i64 << 56) - ((4 * a) << 24) - b1;
162 // All of the filter parameters are Q24.
163 (
164 ((b1 + (1i64 << 31)) >> 32) as i32,
165 ((b2 + (1i64 << 31)) >> 32) as i32,
166 ((a + 128) >> 8) as i32,
167 )
168}
169
170impl IIRBessel2 {
171 pub fn new(delay: i32, value: i32) -> IIRBessel2 {
172 let (c0, c1, g) = iir_bessel2_get_parameters(delay);
173 IIRBessel2 { c: [c0, c1], g, x: [value, value], y: [value, value] }
174 }
175
176 // Re-initialize Bessel filter coefficients with the specified delay.
177 // This does not alter the x/y state, but changes the reaction time of the
178 // filter.
179 // Altering the time constant of a reactive filter without altering internal
180 // state is something that has to be done carefully, but our design operates
181 // at high enough delays and with small enough time constant changes to make
182 // it safe.
183 pub fn reinit(&mut self, delay: i32) {
184 let (c0, c1, g) = iir_bessel2_get_parameters(delay);
185 self.c[0] = c0;
186 self.c[1] = c1;
187 self.g = g;
188 }
189
190 pub fn update(&mut self, x: i32) -> i32 {
191 let c0 = self.c[0] as i64;
192 let c1 = self.c[1] as i64;
193 let g = self.g as i64;
194 let x0 = self.x[0] as i64;
195 let x1 = self.x[1] as i64;
196 let y0 = self.y[0] as i64;
197 let y1 = self.y[1] as i64;
198 let ya =
199 ((((x as i64) + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1i64 << 23))
200 >> 24) as i32;
201 self.x[1] = self.x[0];
202 self.x[0] = x;
203 self.y[1] = self.y[0];
204 self.y[0] = ya;
205 ya
206 }
207}
208
209#[derive(Copy, Clone)]
210struct RCFrameMetrics {
211 // The log base 2 of the scale factor for this frame in Q24 format.
212 log_scale_q24: i32,
213 // The frame type from pass 1
214 fti: usize,
215 // Whether or not the frame was hidden in pass 1
216 show_frame: bool,
217 // TODO: The input frame number corresponding to this frame in the input.
218 // input_frameno: u32
219 // TODO vfr: PTS
220}
221
222impl RCFrameMetrics {
223 const fn new() -> RCFrameMetrics {
224 RCFrameMetrics { log_scale_q24: 0, fti: 0, show_frame: false }
225 }
226}
227
228/// Rate control pass summary
229///
230/// It contains encoding information related to the whole previous
231/// encoding pass.
232#[derive(Debug, Default, Clone)]
233pub struct RCSummary {
234 pub(crate) ntus: i32,
235 nframes: [i32; FRAME_NSUBTYPES + 1],
236 exp: [u8; FRAME_NSUBTYPES],
237 scale_sum: [i64; FRAME_NSUBTYPES],
238 pub(crate) total: i32,
239}
240
241// Backing storage to deserialize Summary and Per-Frame pass data
242//
243// Can store up to a full header size since it is the largest of the two
244// packet kinds.
245pub(crate) struct RCDeserialize {
246 // The current byte position in the frame metrics buffer.
247 pass2_buffer_pos: usize,
248 // In pass 2, this represents the number of bytes that are available in the
249 // input buffer.
250 pass2_buffer_fill: usize,
251 // Buffer for current frame metrics in pass 2.
252 pass2_buffer: [u8; TWOPASS_HEADER_SZ],
253}
254
255impl Default for RCDeserialize {
256 fn default() -> Self {
257 RCDeserialize {
258 pass2_buffer: [0; TWOPASS_HEADER_SZ],
259 pass2_buffer_pos: 0,
260 pass2_buffer_fill: 0,
261 }
262 }
263}
264
265impl RCDeserialize {
266 // Fill the backing storage by reading enough bytes from the
267 // buf slice until goal bytes are available for parsing.
268 //
269 // goal must be at most TWOPASS_HEADER_SZ.
270 pub(crate) fn buffer_fill(
271 &mut self, buf: &[u8], consumed: usize, goal: usize,
272 ) -> usize {
273 let mut consumed = consumed;
274 while self.pass2_buffer_fill < goal && consumed < buf.len() {
275 self.pass2_buffer[self.pass2_buffer_fill] = buf[consumed];
276 self.pass2_buffer_fill += 1;
277 consumed += 1;
278 }
279 consumed
280 }
281
282 // Read the next n bytes as i64.
283 // n must be within 1 and 8
284 fn unbuffer_val(&mut self, n: usize) -> i64 {
285 let mut bytes = n;
286 let mut ret = 0;
287 let mut shift = 0;
288 while bytes > 0 {
289 bytes -= 1;
290 ret |= (self.pass2_buffer[self.pass2_buffer_pos] as i64) << shift;
291 self.pass2_buffer_pos += 1;
292 shift += 8;
293 }
294 ret
295 }
296
297 // Read metrics for the next frame.
298 fn parse_metrics(&mut self) -> Result<RCFrameMetrics, String> {
299 debug_assert!(self.pass2_buffer_fill >= TWOPASS_PACKET_SZ);
300 let ft_val = self.unbuffer_val(4);
301 let show_frame = (ft_val >> 31) != 0;
302 let fti = (ft_val & 0x7FFFFFFF) as usize;
303 // Make sure the frame type is valid.
304 if fti > FRAME_NSUBTYPES {
305 return Err("Invalid frame type".to_string());
306 }
307 let log_scale_q24 = self.unbuffer_val(4) as i32;
308 Ok(RCFrameMetrics { log_scale_q24, fti, show_frame })
309 }
310
311 // Read the summary header data.
312 pub(crate) fn parse_summary(&mut self) -> Result<RCSummary, String> {
313 // check the magic value and version number.
314 if self.unbuffer_val(4) != TWOPASS_MAGIC as i64 {
315 return Err("Magic value mismatch".to_string());
316 }
317 if self.unbuffer_val(4) != TWOPASS_VERSION as i64 {
318 return Err("Version number mismatch".to_string());
319 }
320 let mut s =
321 RCSummary { ntus: self.unbuffer_val(4) as i32, ..Default::default() };
322
323 // Make sure the file claims to have at least one TU.
324 // Otherwise we probably got the placeholder data from an aborted
325 // pass 1.
326 if s.ntus < 1 {
327 return Err("No TUs found in first pass summary".to_string());
328 }
329 let mut total: i32 = 0;
330 for nframes in s.nframes.iter_mut() {
331 let n = self.unbuffer_val(4) as i32;
332 if n < 0 {
333 return Err("Got negative frame count".to_string());
334 }
335 total = total
336 .checked_add(n)
337 .ok_or_else(|| "Frame count too large".to_string())?;
338
339 *nframes = n;
340 }
341
342 // We can't have more TUs than frames.
343 if s.ntus > total {
344 return Err("More TUs than frames".to_string());
345 }
346
347 s.total = total;
348
349 for exp in s.exp.iter_mut() {
350 *exp = self.unbuffer_val(1) as u8;
351 }
352
353 for scale_sum in s.scale_sum.iter_mut() {
354 *scale_sum = self.unbuffer_val(8);
355 if *scale_sum < 0 {
356 return Err("Got negative scale sum".to_string());
357 }
358 }
359 Ok(s)
360 }
361}
362
363pub struct RCState {
364 // The target bit-rate in bits per second.
365 target_bitrate: i32,
366 // The number of TUs over which to distribute the reservoir usage.
367 // We use TUs because in our leaky bucket model, we only add bits to the
368 // reservoir on TU boundaries.
369 reservoir_frame_delay: i32,
370 // Whether or not the reservoir_frame_delay was explicitly specified by the
371 // user, or is the default value.
372 reservoir_frame_delay_is_set: bool,
373 // The maximum quantizer index to allow (for the luma AC coefficients, other
374 // quantizers will still be adjusted to match).
375 maybe_ac_qi_max: Option<u8>,
376 // The minimum quantizer index to allow (for the luma AC coefficients).
377 ac_qi_min: u8,
378 // Will we drop frames to meet bitrate requirements?
379 drop_frames: bool,
380 // Do we respect the maximum reservoir fullness?
381 cap_overflow: bool,
382 // Can the reservoir go negative?
383 cap_underflow: bool,
384 // The log of the first-pass base quantizer.
385 pass1_log_base_q: i64,
386 // Two-pass mode state.
387 // PASS_SINGLE => 1-pass encoding.
388 // PASS_1 => 1st pass of 2-pass encoding.
389 // PASS_2 => 2nd pass of 2-pass encoding.
390 // PASS_2_PLUS_1 => 2nd pass of 2-pass encoding, but also emitting pass 1
391 // data again.
392 twopass_state: i32,
393 // The log of the number of pixels in a frame in Q57 format.
394 log_npixels: i64,
395 // The target average bits per Temporal Unit (input frame).
396 bits_per_tu: i64,
397 // The current bit reservoir fullness (bits available to be used).
398 reservoir_fullness: i64,
399 // The target buffer fullness.
400 // This is where we'd like to be by the last keyframe that appears in the
401 // next reservoir_frame_delay frames.
402 reservoir_target: i64,
403 // The maximum buffer fullness (total size of the buffer).
404 reservoir_max: i64,
405 // The log of estimated scale factor for the rate model in Q57 format.
406 //
407 // TODO: Convert to Q23 or figure out a better way to avoid overflow
408 // once 2-pass mode is introduced, if required.
409 log_scale: [i64; FRAME_NSUBTYPES],
410 // The exponent used in the rate model in Q6 format.
411 exp: [u8; FRAME_NSUBTYPES],
412 // The log of an estimated scale factor used to obtain the real framerate,
413 // for VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.
414 // TODO vfr: log_vfr_scale: i64,
415 // Second-order lowpass filters to track scale and VFR.
416 scalefilter: [IIRBessel2; FRAME_NSUBTYPES],
417 // TODO vfr: vfrfilter: IIRBessel2,
418 // The number of frames of each type we have seen, for filter adaptation
419 // purposes.
420 // These are only 32 bits to guarantee that we can sum the scales over the
421 // whole file without overflow in a 64-bit int.
422 // That limits us to 2.268 years at 60 fps (minus 33% with re-ordering).
423 nframes: [i32; FRAME_NSUBTYPES + 1],
424 inter_delay: [i32; FRAME_NSUBTYPES - 1],
425 inter_delay_target: i32,
426 // The total accumulated estimation bias.
427 rate_bias: i64,
428 // The number of (non-Show Existing Frame) frames that have been encoded.
429 nencoded_frames: i64,
430 // The number of Show Existing Frames that have been emitted.
431 nsef_frames: i64,
432 // Buffer for current frame metrics in pass 1.
433 pass1_buffer: [u8; TWOPASS_HEADER_SZ],
434 // Whether or not the user has retrieved the pass 1 data for the last frame.
435 // For PASS_1 or PASS_2_PLUS_1 encoding, this is set to false after each
436 // frame is encoded, and must be set to true by calling twopass_out() before
437 // the next frame can be encoded.
438 pub pass1_data_retrieved: bool,
439 // Marks whether or not the user has retrieved the summary data at the end of
440 // the encode.
441 pass1_summary_retrieved: bool,
442 // Whether or not the user has provided enough data to encode in the second
443 // pass.
444 // For PASS_2 or PASS_2_PLUS_1 encoding, this is set to false after each
445 // frame, and must be set to true by calling twopass_in() before the next
446 // frame can be encoded.
447 pass2_data_ready: bool,
448 // TODO: Add a way to force the next frame to be a keyframe in 2-pass mode.
449 // Right now we are relying on keyframe detection to detect the same
450 // keyframes.
451 // The metrics for the previous frame.
452 prev_metrics: RCFrameMetrics,
453 // The metrics for the current frame.
454 cur_metrics: RCFrameMetrics,
455 // The buffered metrics for future frames.
456 frame_metrics: Vec<RCFrameMetrics>,
457 // The total number of frames still in use in the circular metric buffer.
458 nframe_metrics: usize,
459 // The index of the current frame in the circular metric buffer.
460 frame_metrics_head: usize,
461 // Data deserialization
462 des: RCDeserialize,
463 // The TU count encoded so far.
464 ntus: i32,
465 // The TU count for the whole file.
466 ntus_total: i32,
467 // The remaining TU count.
468 ntus_left: i32,
469 // The frame count of each frame subtype in the whole file.
470 nframes_total: [i32; FRAME_NSUBTYPES + 1],
471 // The sum of those counts.
472 nframes_total_total: i32,
473 // The number of frames of each subtype yet to be processed.
474 nframes_left: [i32; FRAME_NSUBTYPES + 1],
475 // The sum of the scale values for each frame subtype.
476 scale_sum: [i64; FRAME_NSUBTYPES],
477 // The number of TUs represented by the current scale sums.
478 scale_window_ntus: i32,
479 // The frame count of each frame subtype in the current scale window.
480 scale_window_nframes: [i32; FRAME_NSUBTYPES + 1],
481 // The sum of the scale values for each frame subtype in the current window.
482 scale_window_sum: [i64; FRAME_NSUBTYPES],
483}
484
485// TODO: Separate qi values for each color plane.
486pub struct QuantizerParameters {
487 // The full-precision, unmodulated log quantizer upon which our modulated
488 // quantizer indices are based.
489 // This is only used to limit sudden quality changes from frame to frame, and
490 // as such is not adjusted when we encounter buffer overrun or underrun.
491 pub log_base_q: i64,
492 // The full-precision log quantizer modulated by the current frame type upon
493 // which our quantizer indices are based (including any adjustments to
494 // prevent buffer overrun or underrun).
495 // This is used when estimating the scale parameter once we know the actual
496 // bit usage of a frame.
497 pub log_target_q: i64,
498 pub dc_qi: [u8; 3],
499 pub ac_qi: [u8; 3],
500 pub lambda: f64,
501 pub dist_scale: [f64; 3],
502}
503
504const Q57_SQUARE_EXP_SCALE: f64 =
505 (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64);
506
507// Daala style log-offset for chroma quantizers
508// TODO: Optimal offsets for more configurations than just BT.709
509fn chroma_offset(
510 log_target_q: i64, chroma_sampling: ChromaSampling,
511) -> (i64, i64) {
512 let x: i64 = log_target_q.max(0);
513 // Gradient optimized for CIEDE2000+PSNR on subset3
514 let y: i64 = match chroma_sampling {
515 ChromaSampling::Cs400 => 0,
516 ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266
517 ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180
518 ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098
519 };
520 // blog64(7) - blog64(4); blog64(5) - blog64(4)
521 (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y)
522}
523
524impl QuantizerParameters {
525 fn new_from_log_q(
526 log_base_q: i64, log_target_q: i64, bit_depth: usize,
527 chroma_sampling: ChromaSampling, is_intra: bool,
528 log_isqrt_mean_scale: i64,
529 ) -> QuantizerParameters {
530 let scale = log_isqrt_mean_scale + q57(QSCALE + bit_depth as i32 - 8);
531
532 let mut log_q_y = log_target_q;
533 if !is_intra && bit_depth == 8 {
534 log_q_y = log_target_q
535 + (log_target_q >> 32) * Q_MODEL_MUL[chroma_sampling as usize]
536 + Q_MODEL_ADD[chroma_sampling as usize];
537 }
538
539 let quantizer = bexp64(log_q_y + scale);
540 let (offset_u, offset_v) =
541 chroma_offset(log_q_y + log_isqrt_mean_scale, chroma_sampling);
542 let mono = chroma_sampling == ChromaSampling::Cs400;
543 let log_q_u = log_q_y + offset_u;
544 let log_q_v = log_q_y + offset_v;
545 let quantizer_u = bexp64(log_q_u + scale);
546 let quantizer_v = bexp64(log_q_v + scale);
547 let lambda = (::std::f64::consts::LN_2 / 6.0)
548 * (((log_target_q + log_isqrt_mean_scale) as f64)
549 * Q57_SQUARE_EXP_SCALE)
550 .exp();
551
552 let scale = |q| bexp64((log_target_q - q) * 2 + q57(16)) as f64 / 65536.;
553 let dist_scale = [scale(log_q_y), scale(log_q_u), scale(log_q_v)];
554
555 let base_q_idx = select_ac_qi(quantizer, bit_depth).max(1);
556
557 // delta_q only gets 6 bits + a sign bit, so it can differ by 63 at most.
558 let min_qi = base_q_idx.saturating_sub(63).max(1);
559 let max_qi = base_q_idx.saturating_add(63).min(255);
560 let clamp_qi = |qi: u8| qi.clamp(min_qi, max_qi);
561
562 QuantizerParameters {
563 log_base_q,
564 log_target_q,
565 // TODO: Allow lossless mode; i.e. qi == 0.
566 dc_qi: [
567 clamp_qi(select_dc_qi(quantizer, bit_depth)),
568 if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_u, bit_depth)) },
569 if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_v, bit_depth)) },
570 ],
571 ac_qi: [
572 base_q_idx,
573 if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_u, bit_depth)) },
574 if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_v, bit_depth)) },
575 ],
576 lambda,
577 dist_scale,
578 }
579 }
580}
581
582impl RCState {
583 pub fn new(
584 frame_width: i32, frame_height: i32, framerate_num: i64,
585 framerate_den: i64, target_bitrate: i32, maybe_ac_qi_max: Option<u8>,
586 ac_qi_min: u8, max_key_frame_interval: i32,
587 maybe_reservoir_frame_delay: Option<i32>,
588 ) -> RCState {
589 // The default buffer size is set equal to 1.5x the keyframe interval, or 240
590 // frames; whichever is smaller, with a minimum of 12.
591 // For user set values, we enforce a minimum of 12.
592 // The interval is short enough to allow reaction, but long enough to allow
593 // looking into the next GOP (avoiding the case where the last frames
594 // before an I-frame get starved), in most cases.
595 // The 12 frame minimum gives us some chance to distribute bit estimation
596 // errors in the worst case.
597 let reservoir_frame_delay = maybe_reservoir_frame_delay
598 .unwrap_or_else(|| ((max_key_frame_interval * 3) >> 1).min(240))
599 .max(12);
600 // TODO: What are the limits on these?
601 let npixels = (frame_width as i64) * (frame_height as i64);
602 // Insane framerates or frame sizes mean insane bitrates.
603 // Let's not get carried away.
604 // We also subtract 16 bits from each temporal unit to account for the
605 // temporal delimiter, whose bits are not included in the frame sizes
606 // reported to update_state().
607 // TODO: Support constraints imposed by levels.
608 let bits_per_tu = clamp(
609 (target_bitrate as i64) * framerate_den / framerate_num,
610 40,
611 0x4000_0000_0000,
612 ) - (TEMPORAL_DELIMITER.len() * 8) as i64;
613 let reservoir_max = bits_per_tu * (reservoir_frame_delay as i64);
614 // Start with a buffer fullness and fullness target of 50%.
615 let reservoir_target = (reservoir_max + 1) >> 1;
616 // Pick exponents and initial scales for quantizer selection.
617 let ibpp = npixels / bits_per_tu;
618 // These have been derived by encoding many clips at every quantizer
619 // and running a piecewise-linear regression in binary log space.
620 let (i_exp, i_log_scale) = if ibpp < 1 {
621 (48u8, blog64(36) - q57(QSCALE))
622 } else if ibpp < 4 {
623 (61u8, blog64(55) - q57(QSCALE))
624 } else {
625 (77u8, blog64(129) - q57(QSCALE))
626 };
627 let (p_exp, p_log_scale) = if ibpp < 2 {
628 (69u8, blog64(32) - q57(QSCALE))
629 } else if ibpp < 139 {
630 (104u8, blog64(84) - q57(QSCALE))
631 } else {
632 (83u8, blog64(19) - q57(QSCALE))
633 };
634 let (b0_exp, b0_log_scale) = if ibpp < 2 {
635 (84u8, blog64(30) - q57(QSCALE))
636 } else if ibpp < 92 {
637 (120u8, blog64(68) - q57(QSCALE))
638 } else {
639 (68u8, blog64(4) - q57(QSCALE))
640 };
641 let (b1_exp, b1_log_scale) = if ibpp < 2 {
642 (87u8, blog64(27) - q57(QSCALE))
643 } else if ibpp < 126 {
644 (139u8, blog64(84) - q57(QSCALE))
645 } else {
646 (61u8, blog64(1) - q57(QSCALE))
647 };
648
649 // TODO: Add support for "golden" P frames.
650 RCState {
651 target_bitrate,
652 reservoir_frame_delay,
653 reservoir_frame_delay_is_set: maybe_reservoir_frame_delay.is_some(),
654 maybe_ac_qi_max,
655 ac_qi_min,
656 drop_frames: false,
657 cap_overflow: true,
658 cap_underflow: false,
659 pass1_log_base_q: 0,
660 twopass_state: PASS_SINGLE,
661 log_npixels: blog64(npixels),
662 bits_per_tu,
663 reservoir_fullness: reservoir_target,
664 reservoir_target,
665 reservoir_max,
666 log_scale: [i_log_scale, p_log_scale, b0_log_scale, b1_log_scale],
667 exp: [i_exp, p_exp, b0_exp, b1_exp],
668 scalefilter: [
669 IIRBessel2::new(4, q57_to_q24(i_log_scale)),
670 IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(p_log_scale)),
671 IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b0_log_scale)),
672 IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b1_log_scale)),
673 ],
674 // TODO VFR
675 nframes: [0; FRAME_NSUBTYPES + 1],
676 inter_delay: [INTER_DELAY_TARGET_MIN; FRAME_NSUBTYPES - 1],
677 inter_delay_target: reservoir_frame_delay >> 1,
678 rate_bias: 0,
679 nencoded_frames: 0,
680 nsef_frames: 0,
681 pass1_buffer: [0; TWOPASS_HEADER_SZ],
682 pass1_data_retrieved: true,
683 pass1_summary_retrieved: false,
684 pass2_data_ready: false,
685 prev_metrics: RCFrameMetrics::new(),
686 cur_metrics: RCFrameMetrics::new(),
687 frame_metrics: Vec::new(),
688 nframe_metrics: 0,
689 frame_metrics_head: 0,
690 ntus: 0,
691 ntus_total: 0,
692 ntus_left: 0,
693 nframes_total: [0; FRAME_NSUBTYPES + 1],
694 nframes_total_total: 0,
695 nframes_left: [0; FRAME_NSUBTYPES + 1],
696 scale_sum: [0; FRAME_NSUBTYPES],
697 scale_window_ntus: 0,
698 scale_window_nframes: [0; FRAME_NSUBTYPES + 1],
699 scale_window_sum: [0; FRAME_NSUBTYPES],
700 des: RCDeserialize::default(),
701 }
702 }
703
704 pub(crate) fn select_first_pass_qi(
705 &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling,
706 ) -> QuantizerParameters {
707 // Adjust the quantizer for the frame type, result is Q57:
708 let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12)
709 * (MQP_Q12[fti] as i64)
710 + DQP_Q57[fti];
711 QuantizerParameters::new_from_log_q(
712 self.pass1_log_base_q,
713 log_q,
714 bit_depth,
715 chroma_sampling,
716 fti == 0,
717 0,
718 )
719 }
720
721 // TODO: Separate quantizers for Cb and Cr.
722 #[profiling::function]
723 pub(crate) fn select_qi<T: Pixel>(
724 &self, ctx: &ContextInner<T>, output_frameno: u64, fti: usize,
725 maybe_prev_log_base_q: Option<i64>, log_isqrt_mean_scale: i64,
726 ) -> QuantizerParameters {
727 // Is rate control active?
728 if self.target_bitrate <= 0 {
729 // Rate control is not active.
730 // Derive quantizer directly from frame type.
731 let bit_depth = ctx.config.bit_depth;
732 let chroma_sampling = ctx.config.chroma_sampling;
733 let (log_base_q, log_q) =
734 Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti);
735 QuantizerParameters::new_from_log_q(
736 log_base_q,
737 log_q,
738 bit_depth,
739 chroma_sampling,
740 fti == 0,
741 log_isqrt_mean_scale,
742 )
743 } else {
744 let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1];
745 let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale;
746 let mut reservoir_tus = self.reservoir_frame_delay.min(self.ntus_left);
747 let mut reservoir_frames = 0;
748 let mut log_cur_scale = (self.scalefilter[fti].y[0] as i64) << 33;
749 match self.twopass_state {
750 // First pass of 2-pass mode: use a fixed base quantizer.
751 PASS_1 => {
752 return self.select_first_pass_qi(
753 ctx.config.bit_depth,
754 fti,
755 ctx.config.chroma_sampling,
756 );
757 }
758 // Second pass of 2-pass mode: we know exactly how much of each frame
759 // type there is in the current buffer window, and have estimates for
760 // the scales.
761 PASS_2 | PASS_2_PLUS_1 => {
762 let mut scale_window_sum: [i64; FRAME_NSUBTYPES] =
763 self.scale_window_sum;
764 let mut scale_window_nframes: [i32; FRAME_NSUBTYPES + 1] =
765 self.scale_window_nframes;
766 // Intentionally exclude Show Existing Frame frames from this.
767 for ftj in 0..FRAME_NSUBTYPES {
768 reservoir_frames += scale_window_nframes[ftj];
769 }
770 // If we're approaching the end of the file, add some slack to keep
771 // us from slamming into a rail.
772 // Our rate accuracy goes down, but it keeps the result sensible.
773 // We position the target where the first forced keyframe beyond the
774 // end of the file would be (for consistency with 1-pass mode).
775 // TODO: let mut buf_pad = self.reservoir_frame_delay.min(...);
776 // if buf_delay < buf_pad {
777 // buf_pad -= buf_delay;
778 // }
779 // else ...
780 // Otherwise, search for the last keyframe in the buffer window and
781 // target that.
782 // Currently we only do this when using a finite buffer.
783 // We could save the position of the last keyframe in the stream in
784 // the summary data and do it with a whole-file buffer as well, but
785 // it isn't likely to make a difference.
786 if !self.frame_metrics.is_empty() {
787 let mut fm_tail = self.frame_metrics_head + self.nframe_metrics;
788 if fm_tail >= self.frame_metrics.len() {
789 fm_tail -= self.frame_metrics.len();
790 }
791 let mut fmi = fm_tail;
792 loop {
793 if fmi == 0 {
794 fmi += self.frame_metrics.len();
795 }
796 fmi -= 1;
797 // Stop before we remove the first frame.
798 if fmi == self.frame_metrics_head {
799 break;
800 }
801 // If we find a keyframe, remove it and everything past it.
802 if self.frame_metrics[fmi].fti == FRAME_SUBTYPE_I {
803 while fmi != fm_tail {
804 let m = &self.frame_metrics[fmi];
805 let ftj = m.fti;
806 scale_window_nframes[ftj] -= 1;
807 if ftj < FRAME_NSUBTYPES {
808 scale_window_sum[ftj] -= bexp_q24(m.log_scale_q24);
809 reservoir_frames -= 1;
810 }
811 if m.show_frame {
812 reservoir_tus -= 1;
813 }
814 fmi += 1;
815 if fmi >= self.frame_metrics.len() {
816 fmi = 0;
817 }
818 }
819 // And stop scanning backwards.
820 break;
821 }
822 }
823 }
824 nframes = scale_window_nframes;
825 // If we're not using the same frame type as in pass 1 (because
826 // someone changed some encoding parameters), remove that scale
827 // estimate.
828 // We'll add a replacement for the correct frame type below.
829 if self.cur_metrics.fti != fti {
830 scale_window_nframes[self.cur_metrics.fti] -= 1;
831 if self.cur_metrics.fti != FRAME_SUBTYPE_SEF {
832 scale_window_sum[self.cur_metrics.fti] -=
833 bexp_q24(self.cur_metrics.log_scale_q24);
834 }
835 } else {
836 log_cur_scale = (self.cur_metrics.log_scale_q24 as i64) << 33;
837 }
838 // If we're approaching the end of the file, add some slack to keep
839 // us from slamming into a rail.
840 // Our rate accuracy goes down, but it keeps the result sensible.
841 // We position the target where the first forced keyframe beyond the
842 // end of the file would be (for consistency with 1-pass mode).
843 if reservoir_tus >= self.ntus_left
844 && self.ntus_total as u64
845 > ctx.gop_input_frameno_start[&output_frameno]
846 {
847 let nfinal_gop_tus = self.ntus_total
848 - (ctx.gop_input_frameno_start[&output_frameno] as i32);
849 if ctx.config.max_key_frame_interval as i32 > nfinal_gop_tus {
850 let reservoir_pad = (ctx.config.max_key_frame_interval as i32
851 - nfinal_gop_tus)
852 .min(self.reservoir_frame_delay - reservoir_tus);
853 let (guessed_reservoir_frames, guessed_reservoir_tus) = ctx
854 .guess_frame_subtypes(
855 &mut nframes,
856 reservoir_tus + reservoir_pad,
857 );
858 reservoir_frames = guessed_reservoir_frames;
859 reservoir_tus = guessed_reservoir_tus;
860 }
861 }
862 // Blend in the low-pass filtered scale according to how many
863 // frames of each type we need to add compared to the actual sums in
864 // our window.
865 for ftj in 0..FRAME_NSUBTYPES {
866 let scale = scale_window_sum[ftj]
867 + bexp_q24(self.scalefilter[ftj].y[0])
868 * (nframes[ftj] - scale_window_nframes[ftj]) as i64;
869 log_scale[ftj] = if nframes[ftj] > 0 {
870 blog64(scale) - blog64(nframes[ftj] as i64) - q57(24)
871 } else {
872 -self.log_npixels
873 };
874 }
875 }
876 // Single pass.
877 _ => {
878 // Figure out how to re-distribute bits so that we hit our fullness
879 // target before the last keyframe in our current buffer window
880 // (after the current frame), or the end of the buffer window,
881 // whichever comes first.
882 // Count the various types and classes of frames.
883 let (guessed_reservoir_frames, guessed_reservoir_tus) =
884 ctx.guess_frame_subtypes(&mut nframes, self.reservoir_frame_delay);
885 reservoir_frames = guessed_reservoir_frames;
886 reservoir_tus = guessed_reservoir_tus;
887 // TODO: Scale for VFR.
888 }
889 }
890 // If we've been missing our target, add a penalty term.
891 let rate_bias = (self.rate_bias / (self.nencoded_frames + 100))
892 * (reservoir_frames as i64);
893 // rate_total is the total bits available over the next
894 // reservoir_tus TUs.
895 let rate_total = self.reservoir_fullness - self.reservoir_target
896 + rate_bias
897 + (reservoir_tus as i64) * self.bits_per_tu;
898 // Find a target quantizer that meets our rate target for the
899 // specific mix of frame types we'll have over the next
900 // reservoir_frame frames.
901 // We model the rate<->quantizer relationship as
902 // rate = scale*(quantizer**-exp)
903 // In this case, we have our desired rate, an exponent selected in
904 // setup, and a scale that's been measured over our frame history,
905 // so we're solving for the quantizer.
906 // Exponentiation with arbitrary exponents is expensive, so we work
907 // in the binary log domain (binary exp and log aren't too bad):
908 // rate = exp2(log2(scale) - log2(quantizer)*exp)
909 // There's no easy closed form solution, so we bisection searh for it.
910 let bit_depth = ctx.config.bit_depth;
911 let chroma_sampling = ctx.config.chroma_sampling;
912 // TODO: Proper handling of lossless.
913 let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth).get() as i64)
914 - q57(QSCALE + bit_depth as i32 - 8);
915 // The AC quantizer tables map to values larger than the DC quantizer
916 // tables, so we use that as the upper bound to make sure we can use
917 // the full table if needed.
918 let mut log_qhi = blog64(
919 ac_q(self.maybe_ac_qi_max.unwrap_or(255), 0, bit_depth).get() as i64,
920 ) - q57(QSCALE + bit_depth as i32 - 8);
921 let mut log_base_q = (log_qlo + log_qhi) >> 1;
922 while log_qlo < log_qhi {
923 // Count bits contributed by each frame type using the model.
924 let mut bits = 0i64;
925 for ftj in 0..FRAME_NSUBTYPES {
926 // Modulate base quantizer by frame type.
927 let log_q = ((log_base_q + (1i64 << 11)) >> 12)
928 * (MQP_Q12[ftj] as i64)
929 + DQP_Q57[ftj];
930 // All the fields here are Q57 except for the exponent, which is
931 // Q6.
932 bits += (nframes[ftj] as i64)
933 * bexp64(
934 log_scale[ftj] + self.log_npixels
935 - ((log_q + 32) >> 6) * (self.exp[ftj] as i64),
936 );
937 }
938 // The number of bits for Show Existing Frame frames is constant.
939 bits += (nframes[FRAME_SUBTYPE_SEF] as i64) * SEF_BITS;
940 let diff = bits - rate_total;
941 if diff > 0 {
942 log_qlo = log_base_q + 1;
943 } else if diff < 0 {
944 log_qhi = log_base_q - 1;
945 } else {
946 break;
947 }
948 log_base_q = (log_qlo + log_qhi) >> 1;
949 }
950 // If this was not one of the initial frames, limit the change in
951 // base quantizer to within [0.8*Q, 1.2*Q] where Q is the previous
952 // frame's base quantizer.
953 if let Some(prev_log_base_q) = maybe_prev_log_base_q {
954 log_base_q = clamp(
955 log_base_q,
956 prev_log_base_q - 0xA4_D3C2_5E68_DC58,
957 prev_log_base_q + 0xA4_D3C2_5E68_DC58,
958 );
959 }
960 // Modulate base quantizer by frame type.
961 let mut log_q = ((log_base_q + (1i64 << 11)) >> 12)
962 * (MQP_Q12[fti] as i64)
963 + DQP_Q57[fti];
964 // The above allocation looks only at the total rate we'll accumulate
965 // in the next reservoir_frame_delay frames.
966 // However, we could overflow the bit reservoir on the very next
967 // frame.
968 // Check for that here if we're not using a soft target.
969 if self.cap_overflow {
970 // Allow 3% of the buffer for prediction error.
971 // This should be plenty, and we don't mind if we go a bit over.
972 // We only want to keep these bits from being completely wasted.
973 let margin = (self.reservoir_max + 31) >> 5;
974 // We want to use at least this many bits next frame.
975 let soft_limit = self.reservoir_fullness + self.bits_per_tu
976 - (self.reservoir_max - margin);
977 if soft_limit > 0 {
978 let log_soft_limit = blog64(soft_limit);
979 // If we're predicting we won't use that many bits...
980 // TODO: When using frame re-ordering, we should include the rate
981 // for all of the frames in the current TU.
982 // When there is more than one frame, there will be no direct
983 // solution for the required adjustment, however.
984 let log_scale_pixels = log_cur_scale + self.log_npixels;
985 let exp = self.exp[fti] as i64;
986 let mut log_q_exp = ((log_q + 32) >> 6) * exp;
987 if log_scale_pixels - log_q_exp < log_soft_limit {
988 // Scale the adjustment based on how far into the margin we are.
989 log_q_exp += ((log_scale_pixels - log_soft_limit - log_q_exp)
990 >> 32)
991 * ((margin.min(soft_limit) << 32) / margin);
992 log_q = ((log_q_exp + (exp >> 1)) / exp) << 6;
993 }
994 }
995 }
996 // We just checked we don't overflow the reservoir next frame, now
997 // check we don't underflow and bust the budget (when not using a
998 // soft target).
999 if self.maybe_ac_qi_max.is_none() {
1000 // Compute the maximum number of bits we can use in the next frame.
1001 // Allow 50% of the rate for a single frame for prediction error.
1002 // This may not be enough for keyframes or sudden changes in
1003 // complexity.
1004 let log_hard_limit =
1005 blog64(self.reservoir_fullness + (self.bits_per_tu >> 1));
1006 // If we're predicting we'll use more than this...
1007 // TODO: When using frame re-ordering, we should include the rate
1008 // for all of the frames in the current TU.
1009 // When there is more than one frame, there will be no direct
1010 // solution for the required adjustment, however.
1011 let log_scale_pixels = log_cur_scale + self.log_npixels;
1012 let exp = self.exp[fti] as i64;
1013 let mut log_q_exp = ((log_q + 32) >> 6) * exp;
1014 if log_scale_pixels - log_q_exp > log_hard_limit {
1015 // Force the target to hit our limit exactly.
1016 log_q_exp = log_scale_pixels - log_hard_limit;
1017 log_q = ((log_q_exp + (exp >> 1)) / exp) << 6;
1018 // If that target is unreasonable, oh well; we'll have to drop.
1019 }
1020 }
1021
1022 if let Some(qi_max) = self.maybe_ac_qi_max {
1023 let (max_log_base_q, max_log_q) =
1024 Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti);
1025 log_base_q = cmp::min(log_base_q, max_log_base_q);
1026 log_q = cmp::min(log_q, max_log_q);
1027 }
1028 if self.ac_qi_min > 0 {
1029 let (min_log_base_q, min_log_q) =
1030 Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti);
1031 log_base_q = cmp::max(log_base_q, min_log_base_q);
1032 log_q = cmp::max(log_q, min_log_q);
1033 }
1034 QuantizerParameters::new_from_log_q(
1035 log_base_q,
1036 log_q,
1037 bit_depth,
1038 chroma_sampling,
1039 fti == 0,
1040 log_isqrt_mean_scale,
1041 )
1042 }
1043 }
1044
1045 // Computes a quantizer directly from the frame type and base quantizer index,
1046 // without consideration for rate control.
1047 fn calc_flat_quantizer(
1048 base_qi: u8, bit_depth: usize, fti: usize,
1049 ) -> (i64, i64) {
1050 // TODO: Rename "quantizer" something that indicates it is a quantizer
1051 // index, and move it somewhere more sensible (or choose a better way to
1052 // parameterize a "quality" configuration parameter).
1053
1054 // We use the AC quantizer as the source quantizer since its quantizer
1055 // tables have unique entries, while the DC tables do not.
1056 let ac_quantizer = ac_q(base_qi, 0, bit_depth).get() as i64;
1057 // Pick the nearest DC entry since an exact match may be unavailable.
1058 let dc_qi = select_dc_qi(ac_quantizer, bit_depth);
1059 let dc_quantizer = dc_q(dc_qi, 0, bit_depth).get() as i64;
1060 // Get the log quantizers as Q57.
1061 let log_ac_q = blog64(ac_quantizer) - q57(QSCALE + bit_depth as i32 - 8);
1062 let log_dc_q = blog64(dc_quantizer) - q57(QSCALE + bit_depth as i32 - 8);
1063 // Target the midpoint of the chosen entries.
1064 let log_base_q = (log_ac_q + log_dc_q + 1) >> 1;
1065 // Adjust the quantizer for the frame type, result is Q57:
1066 let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64)
1067 + DQP_Q57[fti];
1068 (log_base_q, log_q)
1069 }
1070
1071 #[profiling::function]
1072 pub fn update_state(
1073 &mut self, bits: i64, fti: usize, show_frame: bool, log_target_q: i64,
1074 trial: bool, droppable: bool,
1075 ) -> bool {
1076 if trial {
1077 assert!(self.needs_trial_encode(fti));
1078 assert!(bits > 0);
1079 }
1080 let mut dropped = false;
1081 // Update rate control only if rate control is active.
1082 if self.target_bitrate > 0 {
1083 let mut estimated_bits = 0;
1084 let mut bits = bits;
1085 let mut droppable = droppable;
1086 let mut log_scale = q57(-64);
1087 // Drop frames is also disabled for now in the case of infinite-buffer
1088 // two-pass mode.
1089 if !self.drop_frames
1090 || fti == FRAME_SUBTYPE_SEF
1091 || (self.twopass_state == PASS_2
1092 || self.twopass_state == PASS_2_PLUS_1)
1093 && !self.frame_metrics.is_empty()
1094 {
1095 droppable = false;
1096 }
1097 if fti == FRAME_SUBTYPE_SEF {
1098 debug_assert!(bits == SEF_BITS);
1099 debug_assert!(show_frame);
1100 // Please don't make trial encodes of a SEF.
1101 debug_assert!(!trial);
1102 estimated_bits = SEF_BITS;
1103 self.nsef_frames += 1;
1104 } else {
1105 let log_q_exp = ((log_target_q + 32) >> 6) * (self.exp[fti] as i64);
1106 let prev_log_scale = self.log_scale[fti];
1107 if bits <= 0 {
1108 // We didn't code any blocks in this frame.
1109 bits = 0;
1110 dropped = true;
1111 // TODO: Adjust VFR rate based on drop count.
1112 } else {
1113 // Compute the estimated scale factor for this frame type.
1114 let log_bits = blog64(bits);
1115 log_scale = (log_bits - self.log_npixels + log_q_exp).min(q57(16));
1116 estimated_bits =
1117 bexp64(prev_log_scale + self.log_npixels - log_q_exp);
1118 if !trial {
1119 self.nencoded_frames += 1;
1120 }
1121 }
1122 }
1123 let log_scale_q24 = q57_to_q24(log_scale);
1124 // Special two-pass processing.
1125 if self.twopass_state == PASS_2 || self.twopass_state == PASS_2_PLUS_1 {
1126 // Pass 2 mode:
1127 if !trial {
1128 // Move the current metrics back one frame.
1129 self.prev_metrics = self.cur_metrics;
1130 // Back out the last frame's statistics from the sliding window.
1131 let ftj = self.prev_metrics.fti;
1132 self.nframes_left[ftj] -= 1;
1133 self.scale_window_nframes[ftj] -= 1;
1134 if ftj < FRAME_NSUBTYPES {
1135 self.scale_window_sum[ftj] -=
1136 bexp_q24(self.prev_metrics.log_scale_q24);
1137 }
1138 if self.prev_metrics.show_frame {
1139 self.ntus_left -= 1;
1140 self.scale_window_ntus -= 1;
1141 }
1142 // Free the corresponding entry in the circular buffer.
1143 if !self.frame_metrics.is_empty() {
1144 self.nframe_metrics -= 1;
1145 self.frame_metrics_head += 1;
1146 if self.frame_metrics_head >= self.frame_metrics.len() {
1147 self.frame_metrics_head = 0;
1148 }
1149 }
1150 // Mark us ready for the next 2-pass packet.
1151 self.pass2_data_ready = false;
1152 // Update state, so the user doesn't have to keep calling
1153 // twopass_in() after they've fed in all the data when we're using
1154 // a finite buffer.
1155 self.twopass_in(None).unwrap_or(0);
1156 }
1157 }
1158 if self.twopass_state == PASS_1 || self.twopass_state == PASS_2_PLUS_1 {
1159 // Pass 1 mode: save the metrics for this frame.
1160 self.prev_metrics.log_scale_q24 = log_scale_q24;
1161 self.prev_metrics.fti = fti;
1162 self.prev_metrics.show_frame = show_frame;
1163 self.pass1_data_retrieved = false;
1164 }
1165 // Common to all passes:
1166 if fti != FRAME_SUBTYPE_SEF && bits > 0 {
1167 // If this is the first example of the given frame type we've seen,
1168 // we immediately replace the default scale factor guess with the
1169 // estimate we just computed using the first frame.
1170 if trial || self.nframes[fti] <= 0 {
1171 let f = &mut self.scalefilter[fti];
1172 let x = log_scale_q24;
1173 f.x[0] = x;
1174 f.x[1] = x;
1175 f.y[0] = x;
1176 f.y[1] = x;
1177 self.log_scale[fti] = log_scale;
1178 // TODO: Duplicate regular P frame state for first golden P frame.
1179 } else {
1180 // Lengthen the time constant for the inter filters as we collect
1181 // more frame statistics, until we reach our target.
1182 if fti > 0
1183 && self.inter_delay[fti - 1] < self.inter_delay_target
1184 && self.nframes[fti] >= self.inter_delay[fti - 1]
1185 {
1186 self.inter_delay[fti - 1] += 1;
1187 self.scalefilter[fti].reinit(self.inter_delay[fti - 1]);
1188 }
1189 // Update the low-pass scale filter for this frame type regardless
1190 // of whether or not we will ultimately drop this frame.
1191 self.log_scale[fti] =
1192 q24_to_q57(self.scalefilter[fti].update(log_scale_q24));
1193 }
1194 // If this frame busts our budget, it must be dropped.
1195 if droppable && self.reservoir_fullness + self.bits_per_tu < bits {
1196 // TODO: Adjust VFR rate based on drop count.
1197 bits = 0;
1198 dropped = true;
1199 } else {
1200 // TODO: Update a low-pass filter to estimate the "real" frame rate
1201 // taking timestamps and drops into account.
1202 // This is only done if the frame is coded, as it needs the final
1203 // count of dropped frames.
1204 }
1205 }
1206 if !trial {
1207 // Increment the frame count for filter adaptation purposes.
1208 if !trial && self.nframes[fti] < ::std::i32::MAX {
1209 self.nframes[fti] += 1;
1210 }
1211 self.reservoir_fullness -= bits;
1212 if show_frame {
1213 self.reservoir_fullness += self.bits_per_tu;
1214 // TODO: Properly account for temporal delimiter bits.
1215 }
1216 // If we're too quick filling the buffer and overflow is capped, that
1217 // rate is lost forever.
1218 if self.cap_overflow {
1219 self.reservoir_fullness =
1220 self.reservoir_fullness.min(self.reservoir_max);
1221 }
1222 // If we're too quick draining the buffer and underflow is capped,
1223 // don't try to make up that rate later.
1224 if self.cap_underflow {
1225 self.reservoir_fullness = self.reservoir_fullness.max(0);
1226 }
1227 // Adjust the bias for the real bits we've used.
1228 self.rate_bias += estimated_bits - bits;
1229 }
1230 }
1231 dropped
1232 }
1233
1234 pub const fn needs_trial_encode(&self, fti: usize) -> bool {
1235 self.target_bitrate > 0 && self.nframes[fti] == 0
1236 }
1237
1238 pub(crate) const fn ready(&self) -> bool {
1239 match self.twopass_state {
1240 PASS_SINGLE => true,
1241 PASS_1 => self.pass1_data_retrieved,
1242 PASS_2 => self.pass2_data_ready,
1243 _ => self.pass1_data_retrieved && self.pass2_data_ready,
1244 }
1245 }
1246
1247 fn buffer_val(&mut self, val: i64, bytes: usize, cur_pos: usize) -> usize {
1248 let mut val = val;
1249 let mut bytes = bytes;
1250 let mut cur_pos = cur_pos;
1251 while bytes > 0 {
1252 bytes -= 1;
1253 self.pass1_buffer[cur_pos] = val as u8;
1254 cur_pos += 1;
1255 val >>= 8;
1256 }
1257 cur_pos
1258 }
1259
1260 pub(crate) fn select_pass1_log_base_q<T: Pixel>(
1261 &self, ctx: &ContextInner<T>, output_frameno: u64,
1262 ) -> i64 {
1263 assert_eq!(self.twopass_state, PASS_SINGLE);
1264 self.select_qi(ctx, output_frameno, FRAME_SUBTYPE_I, None, 0).log_base_q
1265 }
1266
1267 // Initialize the first pass and emit a placeholder summary
1268 pub(crate) fn init_first_pass(
1269 &mut self, maybe_pass1_log_base_q: Option<i64>,
1270 ) {
1271 if let Some(pass1_log_base_q) = maybe_pass1_log_base_q {
1272 assert_eq!(self.twopass_state, PASS_SINGLE);
1273 // Pick first-pass qi for scale calculations.
1274 self.pass1_log_base_q = pass1_log_base_q;
1275 } else {
1276 debug_assert!(self.twopass_state == PASS_2);
1277 }
1278 self.twopass_state += PASS_1;
1279 }
1280
1281 // Prepare a placeholder summary
1282 fn emit_placeholder_summary(&mut self) -> &[u8] {
1283 // Fill in dummy summary values.
1284 let mut cur_pos = 0;
1285 cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos);
1286 cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos);
1287 cur_pos = self.buffer_val(0, TWOPASS_HEADER_SZ - 8, cur_pos);
1288 debug_assert!(cur_pos == TWOPASS_HEADER_SZ);
1289 self.pass1_data_retrieved = true;
1290 &self.pass1_buffer[..cur_pos]
1291 }
1292
1293 // Frame-specific pass data
1294 pub(crate) fn emit_frame_data(&mut self) -> Option<&[u8]> {
1295 let mut cur_pos = 0;
1296 let fti = self.prev_metrics.fti;
1297 if fti < FRAME_NSUBTYPES {
1298 self.scale_sum[fti] += bexp_q24(self.prev_metrics.log_scale_q24);
1299 }
1300 if self.prev_metrics.show_frame {
1301 self.ntus += 1;
1302 }
1303 // If we have encoded too many frames, prevent us from reaching the
1304 // ready state required to encode more.
1305 if self.nencoded_frames + self.nsef_frames >= std::i32::MAX as i64 {
1306 None?
1307 }
1308 cur_pos = self.buffer_val(
1309 (self.prev_metrics.show_frame as i64) << 31
1310 | self.prev_metrics.fti as i64,
1311 4,
1312 cur_pos,
1313 );
1314 cur_pos =
1315 self.buffer_val(self.prev_metrics.log_scale_q24 as i64, 4, cur_pos);
1316 debug_assert!(cur_pos == TWOPASS_PACKET_SZ);
1317 self.pass1_data_retrieved = true;
1318 Some(&self.pass1_buffer[..cur_pos])
1319 }
1320
1321 // Summary of the whole encoding process.
1322 pub(crate) fn emit_summary(&mut self) -> &[u8] {
1323 let mut cur_pos = 0;
1324 cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos);
1325 cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos);
1326 cur_pos = self.buffer_val(self.ntus as i64, 4, cur_pos);
1327 for fti in 0..=FRAME_NSUBTYPES {
1328 cur_pos = self.buffer_val(self.nframes[fti] as i64, 4, cur_pos);
1329 }
1330 for fti in 0..FRAME_NSUBTYPES {
1331 cur_pos = self.buffer_val(self.exp[fti] as i64, 1, cur_pos);
1332 }
1333 for fti in 0..FRAME_NSUBTYPES {
1334 cur_pos = self.buffer_val(self.scale_sum[fti], 8, cur_pos);
1335 }
1336 debug_assert!(cur_pos == TWOPASS_HEADER_SZ);
1337 self.pass1_summary_retrieved = true;
1338 &self.pass1_buffer[..cur_pos]
1339 }
1340
1341 // Emit either summary or frame-specific data depending on the previous call
1342 pub(crate) fn twopass_out(
1343 &mut self, done_processing: bool,
1344 ) -> Option<&[u8]> {
1345 if !self.pass1_data_retrieved {
1346 if self.twopass_state != PASS_1 && self.twopass_state != PASS_2_PLUS_1 {
1347 Some(self.emit_placeholder_summary())
1348 } else {
1349 self.emit_frame_data()
1350 }
1351 } else if done_processing && !self.pass1_summary_retrieved {
1352 Some(self.emit_summary())
1353 } else {
1354 // The data for this frame has already been retrieved.
1355 None
1356 }
1357 }
1358
1359 // Initialize the rate control for second pass encoding
1360 pub(crate) fn init_second_pass(&mut self) {
1361 if self.twopass_state == PASS_SINGLE || self.twopass_state == PASS_1 {
1362 // Initialize the second pass.
1363 self.twopass_state += PASS_2;
1364 // If the user requested a finite buffer, reserve the space required for
1365 // it.
1366 if self.reservoir_frame_delay_is_set {
1367 debug_assert!(self.reservoir_frame_delay > 0);
1368 // reservoir_frame_delay counts in TUs, but RCFrameMetrics are stored
1369 // per frame (including Show Existing Frame frames).
1370 // When re-ordering, we will have more frames than TUs.
1371 // How many more?
1372 // That depends on the re-ordering scheme used.
1373 // Doubling the number of TUs and adding a fixed latency equal to the
1374 // maximum number of reference frames we can store should be
1375 // sufficient for any reasonable scheme, and keeps this code from
1376 // depending too closely on the details of the scheme currently used
1377 // by rav1e.
1378 let nmetrics = (self.reservoir_frame_delay as usize) * 2 + 8;
1379 self.frame_metrics.reserve_exact(nmetrics);
1380 self.frame_metrics.resize(nmetrics, RCFrameMetrics::new());
1381 }
1382 }
1383 }
1384
1385 pub(crate) fn setup_second_pass(&mut self, s: &RCSummary) {
1386 self.ntus_total = s.ntus;
1387 self.ntus_left = s.ntus;
1388 self.nframes_total = s.nframes;
1389 self.nframes_left = s.nframes;
1390 self.nframes_total_total = s.nframes.iter().sum();
1391 if self.frame_metrics.is_empty() {
1392 self.reservoir_frame_delay = s.ntus;
1393 self.scale_window_nframes = self.nframes_total;
1394 self.scale_window_sum = s.scale_sum;
1395 self.reservoir_max =
1396 self.bits_per_tu * (self.reservoir_frame_delay as i64);
1397 self.reservoir_target = (self.reservoir_max + 1) >> 1;
1398 self.reservoir_fullness = self.reservoir_target;
1399 } else {
1400 self.reservoir_frame_delay = self.reservoir_frame_delay.min(s.ntus);
1401 }
1402 self.exp = s.exp;
1403 }
1404
1405 // Parse the rate control summary
1406 //
1407 // It returns the amount of data consumed in the process or
1408 // an empty error on parsing failure.
1409 fn twopass_parse_summary(&mut self, buf: &[u8]) -> Result<usize, String> {
1410 let consumed = self.des.buffer_fill(buf, 0, TWOPASS_HEADER_SZ);
1411 if self.des.pass2_buffer_fill >= TWOPASS_HEADER_SZ {
1412 self.des.pass2_buffer_pos = 0;
1413
1414 let s = self.des.parse_summary()?;
1415
1416 self.setup_second_pass(&s);
1417
1418 // Got a valid header.
1419 // Set up pass 2.
1420 // Clear the header data from the buffer to make room for the
1421 // packet data.
1422 self.des.pass2_buffer_fill = 0;
1423 }
1424
1425 Ok(consumed)
1426 }
1427
1428 // Return the size of the first buffer twopass_in expects
1429 //
1430 // It is the summary size (constant) + the number of frame data packets
1431 // (variable depending on the configuration) it needs to starts encoding.
1432 pub(crate) fn twopass_first_packet_size(&self) -> usize {
1433 let frames_needed = if !self.frame_metrics.is_empty() {
1434 // If we're not using whole-file buffering, we need at least one
1435 // frame per buffer slot.
1436 self.reservoir_frame_delay as usize
1437 } else {
1438 // Otherwise we need just one.
1439 1
1440 };
1441
1442 TWOPASS_HEADER_SZ + frames_needed * TWOPASS_PACKET_SZ
1443 }
1444
1445 // Return the number of frame data packets to be parsed before
1446 // the encoding process can continue.
1447 pub(crate) fn twopass_in_frames_needed(&self) -> i32 {
1448 if self.target_bitrate <= 0 {
1449 return 0;
1450 }
1451 if self.frame_metrics.is_empty() {
1452 return i32::from(!self.pass2_data_ready);
1453 }
1454 let mut cur_scale_window_nframes = 0;
1455 let mut cur_nframes_left = 0;
1456 for fti in 0..=FRAME_NSUBTYPES {
1457 cur_scale_window_nframes += self.scale_window_nframes[fti];
1458 cur_nframes_left += self.nframes_left[fti];
1459 }
1460
1461 (self.reservoir_frame_delay - self.scale_window_ntus)
1462 .clamp(0, cur_nframes_left - cur_scale_window_nframes)
1463 }
1464
1465 pub(crate) fn parse_frame_data_packet(
1466 &mut self, buf: &[u8],
1467 ) -> Result<(), String> {
1468 if buf.len() != TWOPASS_PACKET_SZ {
1469 return Err("Incorrect buffer size".to_string());
1470 }
1471
1472 self.des.buffer_fill(buf, 0, TWOPASS_PACKET_SZ);
1473 self.des.pass2_buffer_pos = 0;
1474 let m = self.des.parse_metrics()?;
1475 self.des.pass2_buffer_fill = 0;
1476
1477 if self.frame_metrics.is_empty() {
1478 // We're using a whole-file buffer.
1479 self.cur_metrics = m;
1480 self.pass2_data_ready = true;
1481 } else {
1482 // Safety check
1483 let frames_needed = self.twopass_in_frames_needed();
1484
1485 if frames_needed > 0 {
1486 if self.nframe_metrics >= self.frame_metrics.len() {
1487 return Err(
1488 "Read too many frames without finding enough TUs".to_string(),
1489 );
1490 }
1491
1492 let mut fmi = self.frame_metrics_head + self.nframe_metrics;
1493 if fmi >= self.frame_metrics.len() {
1494 fmi -= self.frame_metrics.len();
1495 }
1496 self.nframe_metrics += 1;
1497 self.frame_metrics[fmi] = m;
1498 // And accumulate the statistics over the window.
1499 self.scale_window_nframes[m.fti] += 1;
1500 if m.fti < FRAME_NSUBTYPES {
1501 self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24);
1502 }
1503 if m.show_frame {
1504 self.scale_window_ntus += 1;
1505 }
1506 if frames_needed == 1 {
1507 self.pass2_data_ready = true;
1508 self.cur_metrics = self.frame_metrics[self.frame_metrics_head];
1509 }
1510 } else {
1511 return Err("No frames needed".to_string());
1512 }
1513 }
1514
1515 Ok(())
1516 }
1517
1518 // Parse the rate control per-frame data
1519 //
1520 // If no buffer is passed return the amount of data it expects
1521 // to consume next.
1522 //
1523 // If a properly sized buffer is passed it returns the amount of data
1524 // consumed in the process or an empty error on parsing failure.
1525 fn twopass_parse_frame_data(
1526 &mut self, maybe_buf: Option<&[u8]>, mut consumed: usize,
1527 ) -> Result<usize, String> {
1528 {
1529 if self.frame_metrics.is_empty() {
1530 // We're using a whole-file buffer.
1531 if let Some(buf) = maybe_buf {
1532 consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ);
1533 if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ {
1534 self.des.pass2_buffer_pos = 0;
1535 // Read metrics for the next frame.
1536 self.cur_metrics = self.des.parse_metrics()?;
1537 // Clear the buffer for the next frame.
1538 self.des.pass2_buffer_fill = 0;
1539 self.pass2_data_ready = true;
1540 }
1541 } else {
1542 return Ok(TWOPASS_PACKET_SZ - self.des.pass2_buffer_fill);
1543 }
1544 } else {
1545 // We're using a finite buffer.
1546 let mut cur_scale_window_nframes = 0;
1547 let mut cur_nframes_left = 0;
1548
1549 for fti in 0..=FRAME_NSUBTYPES {
1550 cur_scale_window_nframes += self.scale_window_nframes[fti];
1551 cur_nframes_left += self.nframes_left[fti];
1552 }
1553
1554 let mut frames_needed = self.twopass_in_frames_needed();
1555 while frames_needed > 0 {
1556 if let Some(buf) = maybe_buf {
1557 consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ);
1558 if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ {
1559 self.des.pass2_buffer_pos = 0;
1560 // Read the metrics for the next frame.
1561 let m = self.des.parse_metrics()?;
1562 // Add them to the circular buffer.
1563 if self.nframe_metrics >= self.frame_metrics.len() {
1564 return Err(
1565 "Read too many frames without finding enough TUs"
1566 .to_string(),
1567 );
1568 }
1569 let mut fmi = self.frame_metrics_head + self.nframe_metrics;
1570 if fmi >= self.frame_metrics.len() {
1571 fmi -= self.frame_metrics.len();
1572 }
1573 self.nframe_metrics += 1;
1574 self.frame_metrics[fmi] = m;
1575 // And accumulate the statistics over the window.
1576 self.scale_window_nframes[m.fti] += 1;
1577 cur_scale_window_nframes += 1;
1578 if m.fti < FRAME_NSUBTYPES {
1579 self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24);
1580 }
1581 if m.show_frame {
1582 self.scale_window_ntus += 1;
1583 }
1584 frames_needed = (self.reservoir_frame_delay
1585 - self.scale_window_ntus)
1586 .clamp(0, cur_nframes_left - cur_scale_window_nframes);
1587 // Clear the buffer for the next frame.
1588 self.des.pass2_buffer_fill = 0;
1589 } else {
1590 // Go back for more data.
1591 break;
1592 }
1593 } else {
1594 return Ok(
1595 TWOPASS_PACKET_SZ * (frames_needed as usize)
1596 - self.des.pass2_buffer_fill,
1597 );
1598 }
1599 }
1600 // If we've got all the frames we need, fill in the current metrics.
1601 // We're ready to go.
1602 if frames_needed <= 0 {
1603 self.cur_metrics = self.frame_metrics[self.frame_metrics_head];
1604 // Mark us ready for the next frame.
1605 self.pass2_data_ready = true;
1606 }
1607 }
1608 }
1609
1610 Ok(consumed)
1611 }
1612
1613 // If called without a buffer it will return the size of the next
1614 // buffer it expects.
1615 //
1616 // If called with a buffer it will consume it fully.
1617 // It returns Ok(0) if the buffer had been parsed or Err(())
1618 // if the buffer hadn't been enough or other errors happened.
1619 pub(crate) fn twopass_in(
1620 &mut self, maybe_buf: Option<&[u8]>,
1621 ) -> Result<usize, String> {
1622 let mut consumed = 0;
1623 self.init_second_pass();
1624 // If we haven't got a valid summary header yet, try to parse one.
1625 if self.nframes_total[FRAME_SUBTYPE_I] == 0 {
1626 self.pass2_data_ready = false;
1627 if let Some(buf) = maybe_buf {
1628 consumed = self.twopass_parse_summary(buf)?
1629 } else {
1630 return Ok(self.twopass_first_packet_size());
1631 }
1632 }
1633 if self.nframes_total[FRAME_SUBTYPE_I] > 0 {
1634 if self.nencoded_frames + self.nsef_frames
1635 >= self.nframes_total_total as i64
1636 {
1637 // We don't want any more data after the last frame, and we don't want
1638 // to allow any more frames to be encoded.
1639 self.pass2_data_ready = false;
1640 } else if !self.pass2_data_ready {
1641 return self.twopass_parse_frame_data(maybe_buf, consumed);
1642 }
1643 }
1644 Ok(consumed)
1645 }
1646}
1647