1 | // Copyright (c) 2019-2022, The rav1e contributors. All rights reserved |
2 | // |
3 | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | // was not distributed with this source code in the LICENSE file, you can |
6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | // Media Patent License 1.0 was not distributed with this source code in the |
8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | |
10 | use crate::api::color::ChromaSampling; |
11 | use crate::api::ContextInner; |
12 | use crate::encoder::TEMPORAL_DELIMITER; |
13 | use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi}; |
14 | use crate::util::{ |
15 | bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel, |
16 | }; |
17 | use std::cmp; |
18 | |
19 | // The number of frame sub-types for which we track distinct parameters. |
20 | // This does not include FRAME_SUBTYPE_SEF, because we don't need to do any |
21 | // parameter tracking for Show Existing Frame frames. |
22 | pub const FRAME_NSUBTYPES: usize = 4; |
23 | |
24 | pub const FRAME_SUBTYPE_I: usize = 0; |
25 | pub const FRAME_SUBTYPE_P: usize = 1; |
26 | #[allow (unused)] |
27 | pub const FRAME_SUBTYPE_B0: usize = 2; |
28 | #[allow (unused)] |
29 | pub const FRAME_SUBTYPE_B1: usize = 3; |
30 | pub const FRAME_SUBTYPE_SEF: usize = 4; |
31 | |
32 | const PASS_SINGLE: i32 = 0; |
33 | const PASS_1: i32 = 1; |
34 | const PASS_2: i32 = 2; |
35 | const PASS_2_PLUS_1: i32 = 3; |
36 | |
37 | // Magic value at the start of the 2-pass stats file |
38 | const TWOPASS_MAGIC: i32 = 0x50324156; |
39 | // Version number for the 2-pass stats file |
40 | const TWOPASS_VERSION: i32 = 1; |
41 | // 4 byte magic + 4 byte version + 4 byte TU count + 4 byte SEF frame count |
42 | // + FRAME_NSUBTYPES*(4 byte frame count + 1 byte exp + 8 byte scale_sum) |
43 | pub(crate) const TWOPASS_HEADER_SZ: usize = 16 + FRAME_NSUBTYPES * (4 + 1 + 8); |
44 | // 4 byte frame type (show_frame and fti jointly coded) + 4 byte log_scale_q24 |
45 | const TWOPASS_PACKET_SZ: usize = 8; |
46 | |
47 | const SEF_BITS: i64 = 24; |
48 | |
49 | // The scale of AV1 quantizer tables (relative to the pixel domain), i.e., Q3. |
50 | pub(crate) const QSCALE: i32 = 3; |
51 | |
52 | // We clamp the actual I and B frame delays to a minimum of 10 to work |
53 | // within the range of values where later incrementing the delay works as |
54 | // designed. |
55 | // 10 is not an exact choice, but rather a good working trade-off. |
56 | const INTER_DELAY_TARGET_MIN: i32 = 10; |
57 | |
58 | // The base quantizer for a frame is adjusted based on the frame type using the |
59 | // formula (log_qp*mqp + dqp), where log_qp is the base-2 logarithm of the |
60 | // "linear" quantizer (the actual factor by which coefficients are divided). |
61 | // Because log_qp has an implicit offset built in based on the scale of the |
62 | // coefficients (which depends on the pixel bit depth and the transform |
63 | // scale), we normalize the quantizer to the equivalent for 8-bit pixels with |
64 | // orthonormal transforms for the purposes of rate modeling. |
65 | const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[ |
66 | // TODO: Use a const function once f64 operations in const functions are |
67 | // stable. |
68 | (1.0 * (1 << 12) as f64) as i32, |
69 | (1.0 * (1 << 12) as f64) as i32, |
70 | (1.0 * (1 << 12) as f64) as i32, |
71 | (1.0 * (1 << 12) as f64) as i32, |
72 | ]; |
73 | |
74 | // The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median |
75 | // of a change of 15 quantizer steps in the quantizer tables. |
76 | const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[ |
77 | (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
78 | (0.0 * (1i64 << 57) as f64) as i64, |
79 | ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
80 | (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, |
81 | ]; |
82 | |
83 | // For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a |
84 | // linear model: |
85 | // log_q_y = log_target_q + (log_target_q >> 32) * Q_MODEL_MUL + Q_MODEL_ADD |
86 | // Derivation of the linear models: |
87 | // https://github.com/xiph/rav1e/blob/d02bdbd3b0b7b2cb9fc301031cc6a4e67a567a5c/doc/quantizer-weight-analysis.ipynb |
88 | #[rustfmt::skip] |
89 | const Q_MODEL_ADD: [i64; 4] = [ |
90 | // 4:2:0 |
91 | -0x24_4FE7_ECB3_DD90, |
92 | // 4:2:2 |
93 | -0x37_41DA_38AD_0924, |
94 | // 4:4:4 |
95 | -0x70_83BD_A626_311C, |
96 | // 4:0:0 |
97 | 0, |
98 | ]; |
99 | #[rustfmt::skip] |
100 | const Q_MODEL_MUL: [i64; 4] = [ |
101 | // 4:2:0 |
102 | 0x8A0_50DD, |
103 | // 4:2:2 |
104 | 0x887_7666, |
105 | // 4:4:4 |
106 | 0x8D4_A712, |
107 | // 4:0:0 |
108 | 0, |
109 | ]; |
110 | |
111 | #[rustfmt::skip] |
112 | const ROUGH_TAN_LOOKUP: &[u16; 18] = &[ |
113 | 0, 358, 722, 1098, 1491, 1910, |
114 | 2365, 2868, 3437, 4096, 4881, 5850, |
115 | 7094, 8784, 11254, 15286, 23230, 46817 |
116 | ]; |
117 | |
118 | // A digital approximation of a 2nd-order low-pass Bessel follower. |
119 | // We use this for rate control because it has fast reaction time, but is |
120 | // critically damped. |
121 | pub struct IIRBessel2 { |
122 | c: [i32; 2], |
123 | g: i32, |
124 | x: [i32; 2], |
125 | y: [i32; 2], |
126 | } |
127 | |
128 | // alpha is Q24 in the range [0,0.5). |
129 | // The return value is 5.12. |
130 | fn warp_alpha(alpha: i32) -> i32 { |
131 | let i: i32 = ((alpha * 36) >> 24).min(16); |
132 | let t0: u16 = ROUGH_TAN_LOOKUP[i as usize]; |
133 | let t1: u16 = ROUGH_TAN_LOOKUP[i as usize + 1]; |
134 | let d: i32 = alpha * 36 - (i << 24); |
135 | ((((t0 as i64) << 32) + (((t1 - t0) << 8) as i64) * (d as i64)) >> 32) as i32 |
136 | } |
137 | |
138 | // Compute Bessel filter coefficients with the specified delay. |
139 | // Return: Filter parameters (c[0], c[1], g). |
140 | fn iir_bessel2_get_parameters(delay: i32) -> (i32, i32, i32) { |
141 | // This borrows some code from an unreleased version of Postfish. |
142 | // See the recipe at http://unicorn.us.com/alex/2polefilters.html for details |
143 | // on deriving the filter coefficients. |
144 | // alpha is Q24 |
145 | let alpha = (1 << 24) / delay; |
146 | // warp is 7.12 (5.12? the max value is 70386 in Q12). |
147 | let warp = warp_alpha(alpha).max(1) as i64; |
148 | // k1 is 9.12 (6.12?) |
149 | let k1 = 3 * warp; |
150 | // k2 is 16.24 (11.24?) |
151 | let k2 = k1 * warp; |
152 | // d is 16.15 (10.15?) |
153 | let d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9; |
154 | // a is 0.32, since d is larger than both 1.0 and k2 |
155 | let a = (k2 << 23) / d; |
156 | // ik2 is 25.24 |
157 | let ik2 = (1i64 << 48) / k2; |
158 | // b1 is Q56; in practice, the integer ranges between -2 and 2. |
159 | let b1 = 2 * a * (ik2 - (1i64 << 24)); |
160 | // b2 is Q56; in practice, the integer ranges between -2 and 2. |
161 | let b2 = (1i64 << 56) - ((4 * a) << 24) - b1; |
162 | // All of the filter parameters are Q24. |
163 | ( |
164 | ((b1 + (1i64 << 31)) >> 32) as i32, |
165 | ((b2 + (1i64 << 31)) >> 32) as i32, |
166 | ((a + 128) >> 8) as i32, |
167 | ) |
168 | } |
169 | |
170 | impl IIRBessel2 { |
171 | pub fn new(delay: i32, value: i32) -> IIRBessel2 { |
172 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
173 | IIRBessel2 { c: [c0, c1], g, x: [value, value], y: [value, value] } |
174 | } |
175 | |
176 | // Re-initialize Bessel filter coefficients with the specified delay. |
177 | // This does not alter the x/y state, but changes the reaction time of the |
178 | // filter. |
179 | // Altering the time constant of a reactive filter without altering internal |
180 | // state is something that has to be done carefully, but our design operates |
181 | // at high enough delays and with small enough time constant changes to make |
182 | // it safe. |
183 | pub fn reinit(&mut self, delay: i32) { |
184 | let (c0, c1, g) = iir_bessel2_get_parameters(delay); |
185 | self.c[0] = c0; |
186 | self.c[1] = c1; |
187 | self.g = g; |
188 | } |
189 | |
190 | pub fn update(&mut self, x: i32) -> i32 { |
191 | let c0 = self.c[0] as i64; |
192 | let c1 = self.c[1] as i64; |
193 | let g = self.g as i64; |
194 | let x0 = self.x[0] as i64; |
195 | let x1 = self.x[1] as i64; |
196 | let y0 = self.y[0] as i64; |
197 | let y1 = self.y[1] as i64; |
198 | let ya = |
199 | ((((x as i64) + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1i64 << 23)) |
200 | >> 24) as i32; |
201 | self.x[1] = self.x[0]; |
202 | self.x[0] = x; |
203 | self.y[1] = self.y[0]; |
204 | self.y[0] = ya; |
205 | ya |
206 | } |
207 | } |
208 | |
209 | #[derive (Copy, Clone)] |
210 | struct RCFrameMetrics { |
211 | // The log base 2 of the scale factor for this frame in Q24 format. |
212 | log_scale_q24: i32, |
213 | // The frame type from pass 1 |
214 | fti: usize, |
215 | // Whether or not the frame was hidden in pass 1 |
216 | show_frame: bool, |
217 | // TODO: The input frame number corresponding to this frame in the input. |
218 | // input_frameno: u32 |
219 | // TODO vfr: PTS |
220 | } |
221 | |
222 | impl RCFrameMetrics { |
223 | const fn new() -> RCFrameMetrics { |
224 | RCFrameMetrics { log_scale_q24: 0, fti: 0, show_frame: false } |
225 | } |
226 | } |
227 | |
228 | /// Rate control pass summary |
229 | /// |
230 | /// It contains encoding information related to the whole previous |
231 | /// encoding pass. |
232 | #[derive (Debug, Default, Clone)] |
233 | pub struct RCSummary { |
234 | pub(crate) ntus: i32, |
235 | nframes: [i32; FRAME_NSUBTYPES + 1], |
236 | exp: [u8; FRAME_NSUBTYPES], |
237 | scale_sum: [i64; FRAME_NSUBTYPES], |
238 | pub(crate) total: i32, |
239 | } |
240 | |
241 | // Backing storage to deserialize Summary and Per-Frame pass data |
242 | // |
243 | // Can store up to a full header size since it is the largest of the two |
244 | // packet kinds. |
245 | pub(crate) struct RCDeserialize { |
246 | // The current byte position in the frame metrics buffer. |
247 | pass2_buffer_pos: usize, |
248 | // In pass 2, this represents the number of bytes that are available in the |
249 | // input buffer. |
250 | pass2_buffer_fill: usize, |
251 | // Buffer for current frame metrics in pass 2. |
252 | pass2_buffer: [u8; TWOPASS_HEADER_SZ], |
253 | } |
254 | |
255 | impl Default for RCDeserialize { |
256 | fn default() -> Self { |
257 | RCDeserialize { |
258 | pass2_buffer: [0; TWOPASS_HEADER_SZ], |
259 | pass2_buffer_pos: 0, |
260 | pass2_buffer_fill: 0, |
261 | } |
262 | } |
263 | } |
264 | |
265 | impl RCDeserialize { |
266 | // Fill the backing storage by reading enough bytes from the |
267 | // buf slice until goal bytes are available for parsing. |
268 | // |
269 | // goal must be at most TWOPASS_HEADER_SZ. |
270 | pub(crate) fn buffer_fill( |
271 | &mut self, buf: &[u8], consumed: usize, goal: usize, |
272 | ) -> usize { |
273 | let mut consumed = consumed; |
274 | while self.pass2_buffer_fill < goal && consumed < buf.len() { |
275 | self.pass2_buffer[self.pass2_buffer_fill] = buf[consumed]; |
276 | self.pass2_buffer_fill += 1; |
277 | consumed += 1; |
278 | } |
279 | consumed |
280 | } |
281 | |
282 | // Read the next n bytes as i64. |
283 | // n must be within 1 and 8 |
284 | fn unbuffer_val(&mut self, n: usize) -> i64 { |
285 | let mut bytes = n; |
286 | let mut ret = 0; |
287 | let mut shift = 0; |
288 | while bytes > 0 { |
289 | bytes -= 1; |
290 | ret |= (self.pass2_buffer[self.pass2_buffer_pos] as i64) << shift; |
291 | self.pass2_buffer_pos += 1; |
292 | shift += 8; |
293 | } |
294 | ret |
295 | } |
296 | |
297 | // Read metrics for the next frame. |
298 | fn parse_metrics(&mut self) -> Result<RCFrameMetrics, String> { |
299 | debug_assert!(self.pass2_buffer_fill >= TWOPASS_PACKET_SZ); |
300 | let ft_val = self.unbuffer_val(4); |
301 | let show_frame = (ft_val >> 31) != 0; |
302 | let fti = (ft_val & 0x7FFFFFFF) as usize; |
303 | // Make sure the frame type is valid. |
304 | if fti > FRAME_NSUBTYPES { |
305 | return Err("Invalid frame type" .to_string()); |
306 | } |
307 | let log_scale_q24 = self.unbuffer_val(4) as i32; |
308 | Ok(RCFrameMetrics { log_scale_q24, fti, show_frame }) |
309 | } |
310 | |
311 | // Read the summary header data. |
312 | pub(crate) fn parse_summary(&mut self) -> Result<RCSummary, String> { |
313 | // check the magic value and version number. |
314 | if self.unbuffer_val(4) != TWOPASS_MAGIC as i64 { |
315 | return Err("Magic value mismatch" .to_string()); |
316 | } |
317 | if self.unbuffer_val(4) != TWOPASS_VERSION as i64 { |
318 | return Err("Version number mismatch" .to_string()); |
319 | } |
320 | let mut s = |
321 | RCSummary { ntus: self.unbuffer_val(4) as i32, ..Default::default() }; |
322 | |
323 | // Make sure the file claims to have at least one TU. |
324 | // Otherwise we probably got the placeholder data from an aborted |
325 | // pass 1. |
326 | if s.ntus < 1 { |
327 | return Err("No TUs found in first pass summary" .to_string()); |
328 | } |
329 | let mut total: i32 = 0; |
330 | for nframes in s.nframes.iter_mut() { |
331 | let n = self.unbuffer_val(4) as i32; |
332 | if n < 0 { |
333 | return Err("Got negative frame count" .to_string()); |
334 | } |
335 | total = total |
336 | .checked_add(n) |
337 | .ok_or_else(|| "Frame count too large" .to_string())?; |
338 | |
339 | *nframes = n; |
340 | } |
341 | |
342 | // We can't have more TUs than frames. |
343 | if s.ntus > total { |
344 | return Err("More TUs than frames" .to_string()); |
345 | } |
346 | |
347 | s.total = total; |
348 | |
349 | for exp in s.exp.iter_mut() { |
350 | *exp = self.unbuffer_val(1) as u8; |
351 | } |
352 | |
353 | for scale_sum in s.scale_sum.iter_mut() { |
354 | *scale_sum = self.unbuffer_val(8); |
355 | if *scale_sum < 0 { |
356 | return Err("Got negative scale sum" .to_string()); |
357 | } |
358 | } |
359 | Ok(s) |
360 | } |
361 | } |
362 | |
363 | pub struct RCState { |
364 | // The target bit-rate in bits per second. |
365 | target_bitrate: i32, |
366 | // The number of TUs over which to distribute the reservoir usage. |
367 | // We use TUs because in our leaky bucket model, we only add bits to the |
368 | // reservoir on TU boundaries. |
369 | reservoir_frame_delay: i32, |
370 | // Whether or not the reservoir_frame_delay was explicitly specified by the |
371 | // user, or is the default value. |
372 | reservoir_frame_delay_is_set: bool, |
373 | // The maximum quantizer index to allow (for the luma AC coefficients, other |
374 | // quantizers will still be adjusted to match). |
375 | maybe_ac_qi_max: Option<u8>, |
376 | // The minimum quantizer index to allow (for the luma AC coefficients). |
377 | ac_qi_min: u8, |
378 | // Will we drop frames to meet bitrate requirements? |
379 | drop_frames: bool, |
380 | // Do we respect the maximum reservoir fullness? |
381 | cap_overflow: bool, |
382 | // Can the reservoir go negative? |
383 | cap_underflow: bool, |
384 | // The log of the first-pass base quantizer. |
385 | pass1_log_base_q: i64, |
386 | // Two-pass mode state. |
387 | // PASS_SINGLE => 1-pass encoding. |
388 | // PASS_1 => 1st pass of 2-pass encoding. |
389 | // PASS_2 => 2nd pass of 2-pass encoding. |
390 | // PASS_2_PLUS_1 => 2nd pass of 2-pass encoding, but also emitting pass 1 |
391 | // data again. |
392 | twopass_state: i32, |
393 | // The log of the number of pixels in a frame in Q57 format. |
394 | log_npixels: i64, |
395 | // The target average bits per Temporal Unit (input frame). |
396 | bits_per_tu: i64, |
397 | // The current bit reservoir fullness (bits available to be used). |
398 | reservoir_fullness: i64, |
399 | // The target buffer fullness. |
400 | // This is where we'd like to be by the last keyframe that appears in the |
401 | // next reservoir_frame_delay frames. |
402 | reservoir_target: i64, |
403 | // The maximum buffer fullness (total size of the buffer). |
404 | reservoir_max: i64, |
405 | // The log of estimated scale factor for the rate model in Q57 format. |
406 | // |
407 | // TODO: Convert to Q23 or figure out a better way to avoid overflow |
408 | // once 2-pass mode is introduced, if required. |
409 | log_scale: [i64; FRAME_NSUBTYPES], |
410 | // The exponent used in the rate model in Q6 format. |
411 | exp: [u8; FRAME_NSUBTYPES], |
412 | // The log of an estimated scale factor used to obtain the real framerate, |
413 | // for VFR sources or, e.g., 12 fps content doubled to 24 fps, etc. |
414 | // TODO vfr: log_vfr_scale: i64, |
415 | // Second-order lowpass filters to track scale and VFR. |
416 | scalefilter: [IIRBessel2; FRAME_NSUBTYPES], |
417 | // TODO vfr: vfrfilter: IIRBessel2, |
418 | // The number of frames of each type we have seen, for filter adaptation |
419 | // purposes. |
420 | // These are only 32 bits to guarantee that we can sum the scales over the |
421 | // whole file without overflow in a 64-bit int. |
422 | // That limits us to 2.268 years at 60 fps (minus 33% with re-ordering). |
423 | nframes: [i32; FRAME_NSUBTYPES + 1], |
424 | inter_delay: [i32; FRAME_NSUBTYPES - 1], |
425 | inter_delay_target: i32, |
426 | // The total accumulated estimation bias. |
427 | rate_bias: i64, |
428 | // The number of (non-Show Existing Frame) frames that have been encoded. |
429 | nencoded_frames: i64, |
430 | // The number of Show Existing Frames that have been emitted. |
431 | nsef_frames: i64, |
432 | // Buffer for current frame metrics in pass 1. |
433 | pass1_buffer: [u8; TWOPASS_HEADER_SZ], |
434 | // Whether or not the user has retrieved the pass 1 data for the last frame. |
435 | // For PASS_1 or PASS_2_PLUS_1 encoding, this is set to false after each |
436 | // frame is encoded, and must be set to true by calling twopass_out() before |
437 | // the next frame can be encoded. |
438 | pub pass1_data_retrieved: bool, |
439 | // Marks whether or not the user has retrieved the summary data at the end of |
440 | // the encode. |
441 | pass1_summary_retrieved: bool, |
442 | // Whether or not the user has provided enough data to encode in the second |
443 | // pass. |
444 | // For PASS_2 or PASS_2_PLUS_1 encoding, this is set to false after each |
445 | // frame, and must be set to true by calling twopass_in() before the next |
446 | // frame can be encoded. |
447 | pass2_data_ready: bool, |
448 | // TODO: Add a way to force the next frame to be a keyframe in 2-pass mode. |
449 | // Right now we are relying on keyframe detection to detect the same |
450 | // keyframes. |
451 | // The metrics for the previous frame. |
452 | prev_metrics: RCFrameMetrics, |
453 | // The metrics for the current frame. |
454 | cur_metrics: RCFrameMetrics, |
455 | // The buffered metrics for future frames. |
456 | frame_metrics: Vec<RCFrameMetrics>, |
457 | // The total number of frames still in use in the circular metric buffer. |
458 | nframe_metrics: usize, |
459 | // The index of the current frame in the circular metric buffer. |
460 | frame_metrics_head: usize, |
461 | // Data deserialization |
462 | des: RCDeserialize, |
463 | // The TU count encoded so far. |
464 | ntus: i32, |
465 | // The TU count for the whole file. |
466 | ntus_total: i32, |
467 | // The remaining TU count. |
468 | ntus_left: i32, |
469 | // The frame count of each frame subtype in the whole file. |
470 | nframes_total: [i32; FRAME_NSUBTYPES + 1], |
471 | // The sum of those counts. |
472 | nframes_total_total: i32, |
473 | // The number of frames of each subtype yet to be processed. |
474 | nframes_left: [i32; FRAME_NSUBTYPES + 1], |
475 | // The sum of the scale values for each frame subtype. |
476 | scale_sum: [i64; FRAME_NSUBTYPES], |
477 | // The number of TUs represented by the current scale sums. |
478 | scale_window_ntus: i32, |
479 | // The frame count of each frame subtype in the current scale window. |
480 | scale_window_nframes: [i32; FRAME_NSUBTYPES + 1], |
481 | // The sum of the scale values for each frame subtype in the current window. |
482 | scale_window_sum: [i64; FRAME_NSUBTYPES], |
483 | } |
484 | |
485 | // TODO: Separate qi values for each color plane. |
486 | pub struct QuantizerParameters { |
487 | // The full-precision, unmodulated log quantizer upon which our modulated |
488 | // quantizer indices are based. |
489 | // This is only used to limit sudden quality changes from frame to frame, and |
490 | // as such is not adjusted when we encounter buffer overrun or underrun. |
491 | pub log_base_q: i64, |
492 | // The full-precision log quantizer modulated by the current frame type upon |
493 | // which our quantizer indices are based (including any adjustments to |
494 | // prevent buffer overrun or underrun). |
495 | // This is used when estimating the scale parameter once we know the actual |
496 | // bit usage of a frame. |
497 | pub log_target_q: i64, |
498 | pub dc_qi: [u8; 3], |
499 | pub ac_qi: [u8; 3], |
500 | pub lambda: f64, |
501 | pub dist_scale: [f64; 3], |
502 | } |
503 | |
504 | const Q57_SQUARE_EXP_SCALE: f64 = |
505 | (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64); |
506 | |
507 | // Daala style log-offset for chroma quantizers |
508 | // TODO: Optimal offsets for more configurations than just BT.709 |
509 | fn chroma_offset( |
510 | log_target_q: i64, chroma_sampling: ChromaSampling, |
511 | ) -> (i64, i64) { |
512 | let x: i64 = log_target_q.max(0); |
513 | // Gradient optimized for CIEDE2000+PSNR on subset3 |
514 | let y: i64 = match chroma_sampling { |
515 | ChromaSampling::Cs400 => 0, |
516 | ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266 |
517 | ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180 |
518 | ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098 |
519 | }; |
520 | // blog64(7) - blog64(4); blog64(5) - blog64(4) |
521 | (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y) |
522 | } |
523 | |
524 | impl QuantizerParameters { |
525 | fn new_from_log_q( |
526 | log_base_q: i64, log_target_q: i64, bit_depth: usize, |
527 | chroma_sampling: ChromaSampling, is_intra: bool, |
528 | log_isqrt_mean_scale: i64, |
529 | ) -> QuantizerParameters { |
530 | let scale = log_isqrt_mean_scale + q57(QSCALE + bit_depth as i32 - 8); |
531 | |
532 | let mut log_q_y = log_target_q; |
533 | if !is_intra && bit_depth == 8 { |
534 | log_q_y = log_target_q |
535 | + (log_target_q >> 32) * Q_MODEL_MUL[chroma_sampling as usize] |
536 | + Q_MODEL_ADD[chroma_sampling as usize]; |
537 | } |
538 | |
539 | let quantizer = bexp64(log_q_y + scale); |
540 | let (offset_u, offset_v) = |
541 | chroma_offset(log_q_y + log_isqrt_mean_scale, chroma_sampling); |
542 | let mono = chroma_sampling == ChromaSampling::Cs400; |
543 | let log_q_u = log_q_y + offset_u; |
544 | let log_q_v = log_q_y + offset_v; |
545 | let quantizer_u = bexp64(log_q_u + scale); |
546 | let quantizer_v = bexp64(log_q_v + scale); |
547 | let lambda = (::std::f64::consts::LN_2 / 6.0) |
548 | * (((log_target_q + log_isqrt_mean_scale) as f64) |
549 | * Q57_SQUARE_EXP_SCALE) |
550 | .exp(); |
551 | |
552 | let scale = |q| bexp64((log_target_q - q) * 2 + q57(16)) as f64 / 65536.; |
553 | let dist_scale = [scale(log_q_y), scale(log_q_u), scale(log_q_v)]; |
554 | |
555 | let base_q_idx = select_ac_qi(quantizer, bit_depth).max(1); |
556 | |
557 | // delta_q only gets 6 bits + a sign bit, so it can differ by 63 at most. |
558 | let min_qi = base_q_idx.saturating_sub(63).max(1); |
559 | let max_qi = base_q_idx.saturating_add(63).min(255); |
560 | let clamp_qi = |qi: u8| qi.clamp(min_qi, max_qi); |
561 | |
562 | QuantizerParameters { |
563 | log_base_q, |
564 | log_target_q, |
565 | // TODO: Allow lossless mode; i.e. qi == 0. |
566 | dc_qi: [ |
567 | clamp_qi(select_dc_qi(quantizer, bit_depth)), |
568 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_u, bit_depth)) }, |
569 | if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_v, bit_depth)) }, |
570 | ], |
571 | ac_qi: [ |
572 | base_q_idx, |
573 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_u, bit_depth)) }, |
574 | if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_v, bit_depth)) }, |
575 | ], |
576 | lambda, |
577 | dist_scale, |
578 | } |
579 | } |
580 | } |
581 | |
582 | impl RCState { |
583 | pub fn new( |
584 | frame_width: i32, frame_height: i32, framerate_num: i64, |
585 | framerate_den: i64, target_bitrate: i32, maybe_ac_qi_max: Option<u8>, |
586 | ac_qi_min: u8, max_key_frame_interval: i32, |
587 | maybe_reservoir_frame_delay: Option<i32>, |
588 | ) -> RCState { |
589 | // The default buffer size is set equal to 1.5x the keyframe interval, or 240 |
590 | // frames; whichever is smaller, with a minimum of 12. |
591 | // For user set values, we enforce a minimum of 12. |
592 | // The interval is short enough to allow reaction, but long enough to allow |
593 | // looking into the next GOP (avoiding the case where the last frames |
594 | // before an I-frame get starved), in most cases. |
595 | // The 12 frame minimum gives us some chance to distribute bit estimation |
596 | // errors in the worst case. |
597 | let reservoir_frame_delay = maybe_reservoir_frame_delay |
598 | .unwrap_or_else(|| ((max_key_frame_interval * 3) >> 1).min(240)) |
599 | .max(12); |
600 | // TODO: What are the limits on these? |
601 | let npixels = (frame_width as i64) * (frame_height as i64); |
602 | // Insane framerates or frame sizes mean insane bitrates. |
603 | // Let's not get carried away. |
604 | // We also subtract 16 bits from each temporal unit to account for the |
605 | // temporal delimiter, whose bits are not included in the frame sizes |
606 | // reported to update_state(). |
607 | // TODO: Support constraints imposed by levels. |
608 | let bits_per_tu = clamp( |
609 | (target_bitrate as i64) * framerate_den / framerate_num, |
610 | 40, |
611 | 0x4000_0000_0000, |
612 | ) - (TEMPORAL_DELIMITER.len() * 8) as i64; |
613 | let reservoir_max = bits_per_tu * (reservoir_frame_delay as i64); |
614 | // Start with a buffer fullness and fullness target of 50%. |
615 | let reservoir_target = (reservoir_max + 1) >> 1; |
616 | // Pick exponents and initial scales for quantizer selection. |
617 | let ibpp = npixels / bits_per_tu; |
618 | // These have been derived by encoding many clips at every quantizer |
619 | // and running a piecewise-linear regression in binary log space. |
620 | let (i_exp, i_log_scale) = if ibpp < 1 { |
621 | (48u8, blog64(36) - q57(QSCALE)) |
622 | } else if ibpp < 4 { |
623 | (61u8, blog64(55) - q57(QSCALE)) |
624 | } else { |
625 | (77u8, blog64(129) - q57(QSCALE)) |
626 | }; |
627 | let (p_exp, p_log_scale) = if ibpp < 2 { |
628 | (69u8, blog64(32) - q57(QSCALE)) |
629 | } else if ibpp < 139 { |
630 | (104u8, blog64(84) - q57(QSCALE)) |
631 | } else { |
632 | (83u8, blog64(19) - q57(QSCALE)) |
633 | }; |
634 | let (b0_exp, b0_log_scale) = if ibpp < 2 { |
635 | (84u8, blog64(30) - q57(QSCALE)) |
636 | } else if ibpp < 92 { |
637 | (120u8, blog64(68) - q57(QSCALE)) |
638 | } else { |
639 | (68u8, blog64(4) - q57(QSCALE)) |
640 | }; |
641 | let (b1_exp, b1_log_scale) = if ibpp < 2 { |
642 | (87u8, blog64(27) - q57(QSCALE)) |
643 | } else if ibpp < 126 { |
644 | (139u8, blog64(84) - q57(QSCALE)) |
645 | } else { |
646 | (61u8, blog64(1) - q57(QSCALE)) |
647 | }; |
648 | |
649 | // TODO: Add support for "golden" P frames. |
650 | RCState { |
651 | target_bitrate, |
652 | reservoir_frame_delay, |
653 | reservoir_frame_delay_is_set: maybe_reservoir_frame_delay.is_some(), |
654 | maybe_ac_qi_max, |
655 | ac_qi_min, |
656 | drop_frames: false, |
657 | cap_overflow: true, |
658 | cap_underflow: false, |
659 | pass1_log_base_q: 0, |
660 | twopass_state: PASS_SINGLE, |
661 | log_npixels: blog64(npixels), |
662 | bits_per_tu, |
663 | reservoir_fullness: reservoir_target, |
664 | reservoir_target, |
665 | reservoir_max, |
666 | log_scale: [i_log_scale, p_log_scale, b0_log_scale, b1_log_scale], |
667 | exp: [i_exp, p_exp, b0_exp, b1_exp], |
668 | scalefilter: [ |
669 | IIRBessel2::new(4, q57_to_q24(i_log_scale)), |
670 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(p_log_scale)), |
671 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b0_log_scale)), |
672 | IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b1_log_scale)), |
673 | ], |
674 | // TODO VFR |
675 | nframes: [0; FRAME_NSUBTYPES + 1], |
676 | inter_delay: [INTER_DELAY_TARGET_MIN; FRAME_NSUBTYPES - 1], |
677 | inter_delay_target: reservoir_frame_delay >> 1, |
678 | rate_bias: 0, |
679 | nencoded_frames: 0, |
680 | nsef_frames: 0, |
681 | pass1_buffer: [0; TWOPASS_HEADER_SZ], |
682 | pass1_data_retrieved: true, |
683 | pass1_summary_retrieved: false, |
684 | pass2_data_ready: false, |
685 | prev_metrics: RCFrameMetrics::new(), |
686 | cur_metrics: RCFrameMetrics::new(), |
687 | frame_metrics: Vec::new(), |
688 | nframe_metrics: 0, |
689 | frame_metrics_head: 0, |
690 | ntus: 0, |
691 | ntus_total: 0, |
692 | ntus_left: 0, |
693 | nframes_total: [0; FRAME_NSUBTYPES + 1], |
694 | nframes_total_total: 0, |
695 | nframes_left: [0; FRAME_NSUBTYPES + 1], |
696 | scale_sum: [0; FRAME_NSUBTYPES], |
697 | scale_window_ntus: 0, |
698 | scale_window_nframes: [0; FRAME_NSUBTYPES + 1], |
699 | scale_window_sum: [0; FRAME_NSUBTYPES], |
700 | des: RCDeserialize::default(), |
701 | } |
702 | } |
703 | |
704 | pub(crate) fn select_first_pass_qi( |
705 | &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, |
706 | ) -> QuantizerParameters { |
707 | // Adjust the quantizer for the frame type, result is Q57: |
708 | let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) |
709 | * (MQP_Q12[fti] as i64) |
710 | + DQP_Q57[fti]; |
711 | QuantizerParameters::new_from_log_q( |
712 | self.pass1_log_base_q, |
713 | log_q, |
714 | bit_depth, |
715 | chroma_sampling, |
716 | fti == 0, |
717 | 0, |
718 | ) |
719 | } |
720 | |
721 | // TODO: Separate quantizers for Cb and Cr. |
722 | #[profiling::function ] |
723 | pub(crate) fn select_qi<T: Pixel>( |
724 | &self, ctx: &ContextInner<T>, output_frameno: u64, fti: usize, |
725 | maybe_prev_log_base_q: Option<i64>, log_isqrt_mean_scale: i64, |
726 | ) -> QuantizerParameters { |
727 | // Is rate control active? |
728 | if self.target_bitrate <= 0 { |
729 | // Rate control is not active. |
730 | // Derive quantizer directly from frame type. |
731 | let bit_depth = ctx.config.bit_depth; |
732 | let chroma_sampling = ctx.config.chroma_sampling; |
733 | let (log_base_q, log_q) = |
734 | Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti); |
735 | QuantizerParameters::new_from_log_q( |
736 | log_base_q, |
737 | log_q, |
738 | bit_depth, |
739 | chroma_sampling, |
740 | fti == 0, |
741 | log_isqrt_mean_scale, |
742 | ) |
743 | } else { |
744 | let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1]; |
745 | let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale; |
746 | let mut reservoir_tus = self.reservoir_frame_delay.min(self.ntus_left); |
747 | let mut reservoir_frames = 0; |
748 | let mut log_cur_scale = (self.scalefilter[fti].y[0] as i64) << 33; |
749 | match self.twopass_state { |
750 | // First pass of 2-pass mode: use a fixed base quantizer. |
751 | PASS_1 => { |
752 | return self.select_first_pass_qi( |
753 | ctx.config.bit_depth, |
754 | fti, |
755 | ctx.config.chroma_sampling, |
756 | ); |
757 | } |
758 | // Second pass of 2-pass mode: we know exactly how much of each frame |
759 | // type there is in the current buffer window, and have estimates for |
760 | // the scales. |
761 | PASS_2 | PASS_2_PLUS_1 => { |
762 | let mut scale_window_sum: [i64; FRAME_NSUBTYPES] = |
763 | self.scale_window_sum; |
764 | let mut scale_window_nframes: [i32; FRAME_NSUBTYPES + 1] = |
765 | self.scale_window_nframes; |
766 | // Intentionally exclude Show Existing Frame frames from this. |
767 | for ftj in 0..FRAME_NSUBTYPES { |
768 | reservoir_frames += scale_window_nframes[ftj]; |
769 | } |
770 | // If we're approaching the end of the file, add some slack to keep |
771 | // us from slamming into a rail. |
772 | // Our rate accuracy goes down, but it keeps the result sensible. |
773 | // We position the target where the first forced keyframe beyond the |
774 | // end of the file would be (for consistency with 1-pass mode). |
775 | // TODO: let mut buf_pad = self.reservoir_frame_delay.min(...); |
776 | // if buf_delay < buf_pad { |
777 | // buf_pad -= buf_delay; |
778 | // } |
779 | // else ... |
780 | // Otherwise, search for the last keyframe in the buffer window and |
781 | // target that. |
782 | // Currently we only do this when using a finite buffer. |
783 | // We could save the position of the last keyframe in the stream in |
784 | // the summary data and do it with a whole-file buffer as well, but |
785 | // it isn't likely to make a difference. |
786 | if !self.frame_metrics.is_empty() { |
787 | let mut fm_tail = self.frame_metrics_head + self.nframe_metrics; |
788 | if fm_tail >= self.frame_metrics.len() { |
789 | fm_tail -= self.frame_metrics.len(); |
790 | } |
791 | let mut fmi = fm_tail; |
792 | loop { |
793 | if fmi == 0 { |
794 | fmi += self.frame_metrics.len(); |
795 | } |
796 | fmi -= 1; |
797 | // Stop before we remove the first frame. |
798 | if fmi == self.frame_metrics_head { |
799 | break; |
800 | } |
801 | // If we find a keyframe, remove it and everything past it. |
802 | if self.frame_metrics[fmi].fti == FRAME_SUBTYPE_I { |
803 | while fmi != fm_tail { |
804 | let m = &self.frame_metrics[fmi]; |
805 | let ftj = m.fti; |
806 | scale_window_nframes[ftj] -= 1; |
807 | if ftj < FRAME_NSUBTYPES { |
808 | scale_window_sum[ftj] -= bexp_q24(m.log_scale_q24); |
809 | reservoir_frames -= 1; |
810 | } |
811 | if m.show_frame { |
812 | reservoir_tus -= 1; |
813 | } |
814 | fmi += 1; |
815 | if fmi >= self.frame_metrics.len() { |
816 | fmi = 0; |
817 | } |
818 | } |
819 | // And stop scanning backwards. |
820 | break; |
821 | } |
822 | } |
823 | } |
824 | nframes = scale_window_nframes; |
825 | // If we're not using the same frame type as in pass 1 (because |
826 | // someone changed some encoding parameters), remove that scale |
827 | // estimate. |
828 | // We'll add a replacement for the correct frame type below. |
829 | if self.cur_metrics.fti != fti { |
830 | scale_window_nframes[self.cur_metrics.fti] -= 1; |
831 | if self.cur_metrics.fti != FRAME_SUBTYPE_SEF { |
832 | scale_window_sum[self.cur_metrics.fti] -= |
833 | bexp_q24(self.cur_metrics.log_scale_q24); |
834 | } |
835 | } else { |
836 | log_cur_scale = (self.cur_metrics.log_scale_q24 as i64) << 33; |
837 | } |
838 | // If we're approaching the end of the file, add some slack to keep |
839 | // us from slamming into a rail. |
840 | // Our rate accuracy goes down, but it keeps the result sensible. |
841 | // We position the target where the first forced keyframe beyond the |
842 | // end of the file would be (for consistency with 1-pass mode). |
843 | if reservoir_tus >= self.ntus_left |
844 | && self.ntus_total as u64 |
845 | > ctx.gop_input_frameno_start[&output_frameno] |
846 | { |
847 | let nfinal_gop_tus = self.ntus_total |
848 | - (ctx.gop_input_frameno_start[&output_frameno] as i32); |
849 | if ctx.config.max_key_frame_interval as i32 > nfinal_gop_tus { |
850 | let reservoir_pad = (ctx.config.max_key_frame_interval as i32 |
851 | - nfinal_gop_tus) |
852 | .min(self.reservoir_frame_delay - reservoir_tus); |
853 | let (guessed_reservoir_frames, guessed_reservoir_tus) = ctx |
854 | .guess_frame_subtypes( |
855 | &mut nframes, |
856 | reservoir_tus + reservoir_pad, |
857 | ); |
858 | reservoir_frames = guessed_reservoir_frames; |
859 | reservoir_tus = guessed_reservoir_tus; |
860 | } |
861 | } |
862 | // Blend in the low-pass filtered scale according to how many |
863 | // frames of each type we need to add compared to the actual sums in |
864 | // our window. |
865 | for ftj in 0..FRAME_NSUBTYPES { |
866 | let scale = scale_window_sum[ftj] |
867 | + bexp_q24(self.scalefilter[ftj].y[0]) |
868 | * (nframes[ftj] - scale_window_nframes[ftj]) as i64; |
869 | log_scale[ftj] = if nframes[ftj] > 0 { |
870 | blog64(scale) - blog64(nframes[ftj] as i64) - q57(24) |
871 | } else { |
872 | -self.log_npixels |
873 | }; |
874 | } |
875 | } |
876 | // Single pass. |
877 | _ => { |
878 | // Figure out how to re-distribute bits so that we hit our fullness |
879 | // target before the last keyframe in our current buffer window |
880 | // (after the current frame), or the end of the buffer window, |
881 | // whichever comes first. |
882 | // Count the various types and classes of frames. |
883 | let (guessed_reservoir_frames, guessed_reservoir_tus) = |
884 | ctx.guess_frame_subtypes(&mut nframes, self.reservoir_frame_delay); |
885 | reservoir_frames = guessed_reservoir_frames; |
886 | reservoir_tus = guessed_reservoir_tus; |
887 | // TODO: Scale for VFR. |
888 | } |
889 | } |
890 | // If we've been missing our target, add a penalty term. |
891 | let rate_bias = (self.rate_bias / (self.nencoded_frames + 100)) |
892 | * (reservoir_frames as i64); |
893 | // rate_total is the total bits available over the next |
894 | // reservoir_tus TUs. |
895 | let rate_total = self.reservoir_fullness - self.reservoir_target |
896 | + rate_bias |
897 | + (reservoir_tus as i64) * self.bits_per_tu; |
898 | // Find a target quantizer that meets our rate target for the |
899 | // specific mix of frame types we'll have over the next |
900 | // reservoir_frame frames. |
901 | // We model the rate<->quantizer relationship as |
902 | // rate = scale*(quantizer**-exp) |
903 | // In this case, we have our desired rate, an exponent selected in |
904 | // setup, and a scale that's been measured over our frame history, |
905 | // so we're solving for the quantizer. |
906 | // Exponentiation with arbitrary exponents is expensive, so we work |
907 | // in the binary log domain (binary exp and log aren't too bad): |
908 | // rate = exp2(log2(scale) - log2(quantizer)*exp) |
909 | // There's no easy closed form solution, so we bisection searh for it. |
910 | let bit_depth = ctx.config.bit_depth; |
911 | let chroma_sampling = ctx.config.chroma_sampling; |
912 | // TODO: Proper handling of lossless. |
913 | let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth).get() as i64) |
914 | - q57(QSCALE + bit_depth as i32 - 8); |
915 | // The AC quantizer tables map to values larger than the DC quantizer |
916 | // tables, so we use that as the upper bound to make sure we can use |
917 | // the full table if needed. |
918 | let mut log_qhi = blog64( |
919 | ac_q(self.maybe_ac_qi_max.unwrap_or(255), 0, bit_depth).get() as i64, |
920 | ) - q57(QSCALE + bit_depth as i32 - 8); |
921 | let mut log_base_q = (log_qlo + log_qhi) >> 1; |
922 | while log_qlo < log_qhi { |
923 | // Count bits contributed by each frame type using the model. |
924 | let mut bits = 0i64; |
925 | for ftj in 0..FRAME_NSUBTYPES { |
926 | // Modulate base quantizer by frame type. |
927 | let log_q = ((log_base_q + (1i64 << 11)) >> 12) |
928 | * (MQP_Q12[ftj] as i64) |
929 | + DQP_Q57[ftj]; |
930 | // All the fields here are Q57 except for the exponent, which is |
931 | // Q6. |
932 | bits += (nframes[ftj] as i64) |
933 | * bexp64( |
934 | log_scale[ftj] + self.log_npixels |
935 | - ((log_q + 32) >> 6) * (self.exp[ftj] as i64), |
936 | ); |
937 | } |
938 | // The number of bits for Show Existing Frame frames is constant. |
939 | bits += (nframes[FRAME_SUBTYPE_SEF] as i64) * SEF_BITS; |
940 | let diff = bits - rate_total; |
941 | if diff > 0 { |
942 | log_qlo = log_base_q + 1; |
943 | } else if diff < 0 { |
944 | log_qhi = log_base_q - 1; |
945 | } else { |
946 | break; |
947 | } |
948 | log_base_q = (log_qlo + log_qhi) >> 1; |
949 | } |
950 | // If this was not one of the initial frames, limit the change in |
951 | // base quantizer to within [0.8*Q, 1.2*Q] where Q is the previous |
952 | // frame's base quantizer. |
953 | if let Some(prev_log_base_q) = maybe_prev_log_base_q { |
954 | log_base_q = clamp( |
955 | log_base_q, |
956 | prev_log_base_q - 0xA4_D3C2_5E68_DC58, |
957 | prev_log_base_q + 0xA4_D3C2_5E68_DC58, |
958 | ); |
959 | } |
960 | // Modulate base quantizer by frame type. |
961 | let mut log_q = ((log_base_q + (1i64 << 11)) >> 12) |
962 | * (MQP_Q12[fti] as i64) |
963 | + DQP_Q57[fti]; |
964 | // The above allocation looks only at the total rate we'll accumulate |
965 | // in the next reservoir_frame_delay frames. |
966 | // However, we could overflow the bit reservoir on the very next |
967 | // frame. |
968 | // Check for that here if we're not using a soft target. |
969 | if self.cap_overflow { |
970 | // Allow 3% of the buffer for prediction error. |
971 | // This should be plenty, and we don't mind if we go a bit over. |
972 | // We only want to keep these bits from being completely wasted. |
973 | let margin = (self.reservoir_max + 31) >> 5; |
974 | // We want to use at least this many bits next frame. |
975 | let soft_limit = self.reservoir_fullness + self.bits_per_tu |
976 | - (self.reservoir_max - margin); |
977 | if soft_limit > 0 { |
978 | let log_soft_limit = blog64(soft_limit); |
979 | // If we're predicting we won't use that many bits... |
980 | // TODO: When using frame re-ordering, we should include the rate |
981 | // for all of the frames in the current TU. |
982 | // When there is more than one frame, there will be no direct |
983 | // solution for the required adjustment, however. |
984 | let log_scale_pixels = log_cur_scale + self.log_npixels; |
985 | let exp = self.exp[fti] as i64; |
986 | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
987 | if log_scale_pixels - log_q_exp < log_soft_limit { |
988 | // Scale the adjustment based on how far into the margin we are. |
989 | log_q_exp += ((log_scale_pixels - log_soft_limit - log_q_exp) |
990 | >> 32) |
991 | * ((margin.min(soft_limit) << 32) / margin); |
992 | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
993 | } |
994 | } |
995 | } |
996 | // We just checked we don't overflow the reservoir next frame, now |
997 | // check we don't underflow and bust the budget (when not using a |
998 | // soft target). |
999 | if self.maybe_ac_qi_max.is_none() { |
1000 | // Compute the maximum number of bits we can use in the next frame. |
1001 | // Allow 50% of the rate for a single frame for prediction error. |
1002 | // This may not be enough for keyframes or sudden changes in |
1003 | // complexity. |
1004 | let log_hard_limit = |
1005 | blog64(self.reservoir_fullness + (self.bits_per_tu >> 1)); |
1006 | // If we're predicting we'll use more than this... |
1007 | // TODO: When using frame re-ordering, we should include the rate |
1008 | // for all of the frames in the current TU. |
1009 | // When there is more than one frame, there will be no direct |
1010 | // solution for the required adjustment, however. |
1011 | let log_scale_pixels = log_cur_scale + self.log_npixels; |
1012 | let exp = self.exp[fti] as i64; |
1013 | let mut log_q_exp = ((log_q + 32) >> 6) * exp; |
1014 | if log_scale_pixels - log_q_exp > log_hard_limit { |
1015 | // Force the target to hit our limit exactly. |
1016 | log_q_exp = log_scale_pixels - log_hard_limit; |
1017 | log_q = ((log_q_exp + (exp >> 1)) / exp) << 6; |
1018 | // If that target is unreasonable, oh well; we'll have to drop. |
1019 | } |
1020 | } |
1021 | |
1022 | if let Some(qi_max) = self.maybe_ac_qi_max { |
1023 | let (max_log_base_q, max_log_q) = |
1024 | Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti); |
1025 | log_base_q = cmp::min(log_base_q, max_log_base_q); |
1026 | log_q = cmp::min(log_q, max_log_q); |
1027 | } |
1028 | if self.ac_qi_min > 0 { |
1029 | let (min_log_base_q, min_log_q) = |
1030 | Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti); |
1031 | log_base_q = cmp::max(log_base_q, min_log_base_q); |
1032 | log_q = cmp::max(log_q, min_log_q); |
1033 | } |
1034 | QuantizerParameters::new_from_log_q( |
1035 | log_base_q, |
1036 | log_q, |
1037 | bit_depth, |
1038 | chroma_sampling, |
1039 | fti == 0, |
1040 | log_isqrt_mean_scale, |
1041 | ) |
1042 | } |
1043 | } |
1044 | |
1045 | // Computes a quantizer directly from the frame type and base quantizer index, |
1046 | // without consideration for rate control. |
1047 | fn calc_flat_quantizer( |
1048 | base_qi: u8, bit_depth: usize, fti: usize, |
1049 | ) -> (i64, i64) { |
1050 | // TODO: Rename "quantizer" something that indicates it is a quantizer |
1051 | // index, and move it somewhere more sensible (or choose a better way to |
1052 | // parameterize a "quality" configuration parameter). |
1053 | |
1054 | // We use the AC quantizer as the source quantizer since its quantizer |
1055 | // tables have unique entries, while the DC tables do not. |
1056 | let ac_quantizer = ac_q(base_qi, 0, bit_depth).get() as i64; |
1057 | // Pick the nearest DC entry since an exact match may be unavailable. |
1058 | let dc_qi = select_dc_qi(ac_quantizer, bit_depth); |
1059 | let dc_quantizer = dc_q(dc_qi, 0, bit_depth).get() as i64; |
1060 | // Get the log quantizers as Q57. |
1061 | let log_ac_q = blog64(ac_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
1062 | let log_dc_q = blog64(dc_quantizer) - q57(QSCALE + bit_depth as i32 - 8); |
1063 | // Target the midpoint of the chosen entries. |
1064 | let log_base_q = (log_ac_q + log_dc_q + 1) >> 1; |
1065 | // Adjust the quantizer for the frame type, result is Q57: |
1066 | let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) |
1067 | + DQP_Q57[fti]; |
1068 | (log_base_q, log_q) |
1069 | } |
1070 | |
1071 | #[profiling::function ] |
1072 | pub fn update_state( |
1073 | &mut self, bits: i64, fti: usize, show_frame: bool, log_target_q: i64, |
1074 | trial: bool, droppable: bool, |
1075 | ) -> bool { |
1076 | if trial { |
1077 | assert!(self.needs_trial_encode(fti)); |
1078 | assert!(bits > 0); |
1079 | } |
1080 | let mut dropped = false; |
1081 | // Update rate control only if rate control is active. |
1082 | if self.target_bitrate > 0 { |
1083 | let mut estimated_bits = 0; |
1084 | let mut bits = bits; |
1085 | let mut droppable = droppable; |
1086 | let mut log_scale = q57(-64); |
1087 | // Drop frames is also disabled for now in the case of infinite-buffer |
1088 | // two-pass mode. |
1089 | if !self.drop_frames |
1090 | || fti == FRAME_SUBTYPE_SEF |
1091 | || (self.twopass_state == PASS_2 |
1092 | || self.twopass_state == PASS_2_PLUS_1) |
1093 | && !self.frame_metrics.is_empty() |
1094 | { |
1095 | droppable = false; |
1096 | } |
1097 | if fti == FRAME_SUBTYPE_SEF { |
1098 | debug_assert!(bits == SEF_BITS); |
1099 | debug_assert!(show_frame); |
1100 | // Please don't make trial encodes of a SEF. |
1101 | debug_assert!(!trial); |
1102 | estimated_bits = SEF_BITS; |
1103 | self.nsef_frames += 1; |
1104 | } else { |
1105 | let log_q_exp = ((log_target_q + 32) >> 6) * (self.exp[fti] as i64); |
1106 | let prev_log_scale = self.log_scale[fti]; |
1107 | if bits <= 0 { |
1108 | // We didn't code any blocks in this frame. |
1109 | bits = 0; |
1110 | dropped = true; |
1111 | // TODO: Adjust VFR rate based on drop count. |
1112 | } else { |
1113 | // Compute the estimated scale factor for this frame type. |
1114 | let log_bits = blog64(bits); |
1115 | log_scale = (log_bits - self.log_npixels + log_q_exp).min(q57(16)); |
1116 | estimated_bits = |
1117 | bexp64(prev_log_scale + self.log_npixels - log_q_exp); |
1118 | if !trial { |
1119 | self.nencoded_frames += 1; |
1120 | } |
1121 | } |
1122 | } |
1123 | let log_scale_q24 = q57_to_q24(log_scale); |
1124 | // Special two-pass processing. |
1125 | if self.twopass_state == PASS_2 || self.twopass_state == PASS_2_PLUS_1 { |
1126 | // Pass 2 mode: |
1127 | if !trial { |
1128 | // Move the current metrics back one frame. |
1129 | self.prev_metrics = self.cur_metrics; |
1130 | // Back out the last frame's statistics from the sliding window. |
1131 | let ftj = self.prev_metrics.fti; |
1132 | self.nframes_left[ftj] -= 1; |
1133 | self.scale_window_nframes[ftj] -= 1; |
1134 | if ftj < FRAME_NSUBTYPES { |
1135 | self.scale_window_sum[ftj] -= |
1136 | bexp_q24(self.prev_metrics.log_scale_q24); |
1137 | } |
1138 | if self.prev_metrics.show_frame { |
1139 | self.ntus_left -= 1; |
1140 | self.scale_window_ntus -= 1; |
1141 | } |
1142 | // Free the corresponding entry in the circular buffer. |
1143 | if !self.frame_metrics.is_empty() { |
1144 | self.nframe_metrics -= 1; |
1145 | self.frame_metrics_head += 1; |
1146 | if self.frame_metrics_head >= self.frame_metrics.len() { |
1147 | self.frame_metrics_head = 0; |
1148 | } |
1149 | } |
1150 | // Mark us ready for the next 2-pass packet. |
1151 | self.pass2_data_ready = false; |
1152 | // Update state, so the user doesn't have to keep calling |
1153 | // twopass_in() after they've fed in all the data when we're using |
1154 | // a finite buffer. |
1155 | self.twopass_in(None).unwrap_or(0); |
1156 | } |
1157 | } |
1158 | if self.twopass_state == PASS_1 || self.twopass_state == PASS_2_PLUS_1 { |
1159 | // Pass 1 mode: save the metrics for this frame. |
1160 | self.prev_metrics.log_scale_q24 = log_scale_q24; |
1161 | self.prev_metrics.fti = fti; |
1162 | self.prev_metrics.show_frame = show_frame; |
1163 | self.pass1_data_retrieved = false; |
1164 | } |
1165 | // Common to all passes: |
1166 | if fti != FRAME_SUBTYPE_SEF && bits > 0 { |
1167 | // If this is the first example of the given frame type we've seen, |
1168 | // we immediately replace the default scale factor guess with the |
1169 | // estimate we just computed using the first frame. |
1170 | if trial || self.nframes[fti] <= 0 { |
1171 | let f = &mut self.scalefilter[fti]; |
1172 | let x = log_scale_q24; |
1173 | f.x[0] = x; |
1174 | f.x[1] = x; |
1175 | f.y[0] = x; |
1176 | f.y[1] = x; |
1177 | self.log_scale[fti] = log_scale; |
1178 | // TODO: Duplicate regular P frame state for first golden P frame. |
1179 | } else { |
1180 | // Lengthen the time constant for the inter filters as we collect |
1181 | // more frame statistics, until we reach our target. |
1182 | if fti > 0 |
1183 | && self.inter_delay[fti - 1] < self.inter_delay_target |
1184 | && self.nframes[fti] >= self.inter_delay[fti - 1] |
1185 | { |
1186 | self.inter_delay[fti - 1] += 1; |
1187 | self.scalefilter[fti].reinit(self.inter_delay[fti - 1]); |
1188 | } |
1189 | // Update the low-pass scale filter for this frame type regardless |
1190 | // of whether or not we will ultimately drop this frame. |
1191 | self.log_scale[fti] = |
1192 | q24_to_q57(self.scalefilter[fti].update(log_scale_q24)); |
1193 | } |
1194 | // If this frame busts our budget, it must be dropped. |
1195 | if droppable && self.reservoir_fullness + self.bits_per_tu < bits { |
1196 | // TODO: Adjust VFR rate based on drop count. |
1197 | bits = 0; |
1198 | dropped = true; |
1199 | } else { |
1200 | // TODO: Update a low-pass filter to estimate the "real" frame rate |
1201 | // taking timestamps and drops into account. |
1202 | // This is only done if the frame is coded, as it needs the final |
1203 | // count of dropped frames. |
1204 | } |
1205 | } |
1206 | if !trial { |
1207 | // Increment the frame count for filter adaptation purposes. |
1208 | if !trial && self.nframes[fti] < ::std::i32::MAX { |
1209 | self.nframes[fti] += 1; |
1210 | } |
1211 | self.reservoir_fullness -= bits; |
1212 | if show_frame { |
1213 | self.reservoir_fullness += self.bits_per_tu; |
1214 | // TODO: Properly account for temporal delimiter bits. |
1215 | } |
1216 | // If we're too quick filling the buffer and overflow is capped, that |
1217 | // rate is lost forever. |
1218 | if self.cap_overflow { |
1219 | self.reservoir_fullness = |
1220 | self.reservoir_fullness.min(self.reservoir_max); |
1221 | } |
1222 | // If we're too quick draining the buffer and underflow is capped, |
1223 | // don't try to make up that rate later. |
1224 | if self.cap_underflow { |
1225 | self.reservoir_fullness = self.reservoir_fullness.max(0); |
1226 | } |
1227 | // Adjust the bias for the real bits we've used. |
1228 | self.rate_bias += estimated_bits - bits; |
1229 | } |
1230 | } |
1231 | dropped |
1232 | } |
1233 | |
1234 | pub const fn needs_trial_encode(&self, fti: usize) -> bool { |
1235 | self.target_bitrate > 0 && self.nframes[fti] == 0 |
1236 | } |
1237 | |
1238 | pub(crate) const fn ready(&self) -> bool { |
1239 | match self.twopass_state { |
1240 | PASS_SINGLE => true, |
1241 | PASS_1 => self.pass1_data_retrieved, |
1242 | PASS_2 => self.pass2_data_ready, |
1243 | _ => self.pass1_data_retrieved && self.pass2_data_ready, |
1244 | } |
1245 | } |
1246 | |
1247 | fn buffer_val(&mut self, val: i64, bytes: usize, cur_pos: usize) -> usize { |
1248 | let mut val = val; |
1249 | let mut bytes = bytes; |
1250 | let mut cur_pos = cur_pos; |
1251 | while bytes > 0 { |
1252 | bytes -= 1; |
1253 | self.pass1_buffer[cur_pos] = val as u8; |
1254 | cur_pos += 1; |
1255 | val >>= 8; |
1256 | } |
1257 | cur_pos |
1258 | } |
1259 | |
1260 | pub(crate) fn select_pass1_log_base_q<T: Pixel>( |
1261 | &self, ctx: &ContextInner<T>, output_frameno: u64, |
1262 | ) -> i64 { |
1263 | assert_eq!(self.twopass_state, PASS_SINGLE); |
1264 | self.select_qi(ctx, output_frameno, FRAME_SUBTYPE_I, None, 0).log_base_q |
1265 | } |
1266 | |
1267 | // Initialize the first pass and emit a placeholder summary |
1268 | pub(crate) fn init_first_pass( |
1269 | &mut self, maybe_pass1_log_base_q: Option<i64>, |
1270 | ) { |
1271 | if let Some(pass1_log_base_q) = maybe_pass1_log_base_q { |
1272 | assert_eq!(self.twopass_state, PASS_SINGLE); |
1273 | // Pick first-pass qi for scale calculations. |
1274 | self.pass1_log_base_q = pass1_log_base_q; |
1275 | } else { |
1276 | debug_assert!(self.twopass_state == PASS_2); |
1277 | } |
1278 | self.twopass_state += PASS_1; |
1279 | } |
1280 | |
1281 | // Prepare a placeholder summary |
1282 | fn emit_placeholder_summary(&mut self) -> &[u8] { |
1283 | // Fill in dummy summary values. |
1284 | let mut cur_pos = 0; |
1285 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
1286 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
1287 | cur_pos = self.buffer_val(0, TWOPASS_HEADER_SZ - 8, cur_pos); |
1288 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
1289 | self.pass1_data_retrieved = true; |
1290 | &self.pass1_buffer[..cur_pos] |
1291 | } |
1292 | |
1293 | // Frame-specific pass data |
1294 | pub(crate) fn emit_frame_data(&mut self) -> Option<&[u8]> { |
1295 | let mut cur_pos = 0; |
1296 | let fti = self.prev_metrics.fti; |
1297 | if fti < FRAME_NSUBTYPES { |
1298 | self.scale_sum[fti] += bexp_q24(self.prev_metrics.log_scale_q24); |
1299 | } |
1300 | if self.prev_metrics.show_frame { |
1301 | self.ntus += 1; |
1302 | } |
1303 | // If we have encoded too many frames, prevent us from reaching the |
1304 | // ready state required to encode more. |
1305 | if self.nencoded_frames + self.nsef_frames >= std::i32::MAX as i64 { |
1306 | None? |
1307 | } |
1308 | cur_pos = self.buffer_val( |
1309 | (self.prev_metrics.show_frame as i64) << 31 |
1310 | | self.prev_metrics.fti as i64, |
1311 | 4, |
1312 | cur_pos, |
1313 | ); |
1314 | cur_pos = |
1315 | self.buffer_val(self.prev_metrics.log_scale_q24 as i64, 4, cur_pos); |
1316 | debug_assert!(cur_pos == TWOPASS_PACKET_SZ); |
1317 | self.pass1_data_retrieved = true; |
1318 | Some(&self.pass1_buffer[..cur_pos]) |
1319 | } |
1320 | |
1321 | // Summary of the whole encoding process. |
1322 | pub(crate) fn emit_summary(&mut self) -> &[u8] { |
1323 | let mut cur_pos = 0; |
1324 | cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos); |
1325 | cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos); |
1326 | cur_pos = self.buffer_val(self.ntus as i64, 4, cur_pos); |
1327 | for fti in 0..=FRAME_NSUBTYPES { |
1328 | cur_pos = self.buffer_val(self.nframes[fti] as i64, 4, cur_pos); |
1329 | } |
1330 | for fti in 0..FRAME_NSUBTYPES { |
1331 | cur_pos = self.buffer_val(self.exp[fti] as i64, 1, cur_pos); |
1332 | } |
1333 | for fti in 0..FRAME_NSUBTYPES { |
1334 | cur_pos = self.buffer_val(self.scale_sum[fti], 8, cur_pos); |
1335 | } |
1336 | debug_assert!(cur_pos == TWOPASS_HEADER_SZ); |
1337 | self.pass1_summary_retrieved = true; |
1338 | &self.pass1_buffer[..cur_pos] |
1339 | } |
1340 | |
1341 | // Emit either summary or frame-specific data depending on the previous call |
1342 | pub(crate) fn twopass_out( |
1343 | &mut self, done_processing: bool, |
1344 | ) -> Option<&[u8]> { |
1345 | if !self.pass1_data_retrieved { |
1346 | if self.twopass_state != PASS_1 && self.twopass_state != PASS_2_PLUS_1 { |
1347 | Some(self.emit_placeholder_summary()) |
1348 | } else { |
1349 | self.emit_frame_data() |
1350 | } |
1351 | } else if done_processing && !self.pass1_summary_retrieved { |
1352 | Some(self.emit_summary()) |
1353 | } else { |
1354 | // The data for this frame has already been retrieved. |
1355 | None |
1356 | } |
1357 | } |
1358 | |
1359 | // Initialize the rate control for second pass encoding |
1360 | pub(crate) fn init_second_pass(&mut self) { |
1361 | if self.twopass_state == PASS_SINGLE || self.twopass_state == PASS_1 { |
1362 | // Initialize the second pass. |
1363 | self.twopass_state += PASS_2; |
1364 | // If the user requested a finite buffer, reserve the space required for |
1365 | // it. |
1366 | if self.reservoir_frame_delay_is_set { |
1367 | debug_assert!(self.reservoir_frame_delay > 0); |
1368 | // reservoir_frame_delay counts in TUs, but RCFrameMetrics are stored |
1369 | // per frame (including Show Existing Frame frames). |
1370 | // When re-ordering, we will have more frames than TUs. |
1371 | // How many more? |
1372 | // That depends on the re-ordering scheme used. |
1373 | // Doubling the number of TUs and adding a fixed latency equal to the |
1374 | // maximum number of reference frames we can store should be |
1375 | // sufficient for any reasonable scheme, and keeps this code from |
1376 | // depending too closely on the details of the scheme currently used |
1377 | // by rav1e. |
1378 | let nmetrics = (self.reservoir_frame_delay as usize) * 2 + 8; |
1379 | self.frame_metrics.reserve_exact(nmetrics); |
1380 | self.frame_metrics.resize(nmetrics, RCFrameMetrics::new()); |
1381 | } |
1382 | } |
1383 | } |
1384 | |
1385 | pub(crate) fn setup_second_pass(&mut self, s: &RCSummary) { |
1386 | self.ntus_total = s.ntus; |
1387 | self.ntus_left = s.ntus; |
1388 | self.nframes_total = s.nframes; |
1389 | self.nframes_left = s.nframes; |
1390 | self.nframes_total_total = s.nframes.iter().sum(); |
1391 | if self.frame_metrics.is_empty() { |
1392 | self.reservoir_frame_delay = s.ntus; |
1393 | self.scale_window_nframes = self.nframes_total; |
1394 | self.scale_window_sum = s.scale_sum; |
1395 | self.reservoir_max = |
1396 | self.bits_per_tu * (self.reservoir_frame_delay as i64); |
1397 | self.reservoir_target = (self.reservoir_max + 1) >> 1; |
1398 | self.reservoir_fullness = self.reservoir_target; |
1399 | } else { |
1400 | self.reservoir_frame_delay = self.reservoir_frame_delay.min(s.ntus); |
1401 | } |
1402 | self.exp = s.exp; |
1403 | } |
1404 | |
1405 | // Parse the rate control summary |
1406 | // |
1407 | // It returns the amount of data consumed in the process or |
1408 | // an empty error on parsing failure. |
1409 | fn twopass_parse_summary(&mut self, buf: &[u8]) -> Result<usize, String> { |
1410 | let consumed = self.des.buffer_fill(buf, 0, TWOPASS_HEADER_SZ); |
1411 | if self.des.pass2_buffer_fill >= TWOPASS_HEADER_SZ { |
1412 | self.des.pass2_buffer_pos = 0; |
1413 | |
1414 | let s = self.des.parse_summary()?; |
1415 | |
1416 | self.setup_second_pass(&s); |
1417 | |
1418 | // Got a valid header. |
1419 | // Set up pass 2. |
1420 | // Clear the header data from the buffer to make room for the |
1421 | // packet data. |
1422 | self.des.pass2_buffer_fill = 0; |
1423 | } |
1424 | |
1425 | Ok(consumed) |
1426 | } |
1427 | |
1428 | // Return the size of the first buffer twopass_in expects |
1429 | // |
1430 | // It is the summary size (constant) + the number of frame data packets |
1431 | // (variable depending on the configuration) it needs to starts encoding. |
1432 | pub(crate) fn twopass_first_packet_size(&self) -> usize { |
1433 | let frames_needed = if !self.frame_metrics.is_empty() { |
1434 | // If we're not using whole-file buffering, we need at least one |
1435 | // frame per buffer slot. |
1436 | self.reservoir_frame_delay as usize |
1437 | } else { |
1438 | // Otherwise we need just one. |
1439 | 1 |
1440 | }; |
1441 | |
1442 | TWOPASS_HEADER_SZ + frames_needed * TWOPASS_PACKET_SZ |
1443 | } |
1444 | |
1445 | // Return the number of frame data packets to be parsed before |
1446 | // the encoding process can continue. |
1447 | pub(crate) fn twopass_in_frames_needed(&self) -> i32 { |
1448 | if self.target_bitrate <= 0 { |
1449 | return 0; |
1450 | } |
1451 | if self.frame_metrics.is_empty() { |
1452 | return i32::from(!self.pass2_data_ready); |
1453 | } |
1454 | let mut cur_scale_window_nframes = 0; |
1455 | let mut cur_nframes_left = 0; |
1456 | for fti in 0..=FRAME_NSUBTYPES { |
1457 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
1458 | cur_nframes_left += self.nframes_left[fti]; |
1459 | } |
1460 | |
1461 | (self.reservoir_frame_delay - self.scale_window_ntus) |
1462 | .clamp(0, cur_nframes_left - cur_scale_window_nframes) |
1463 | } |
1464 | |
1465 | pub(crate) fn parse_frame_data_packet( |
1466 | &mut self, buf: &[u8], |
1467 | ) -> Result<(), String> { |
1468 | if buf.len() != TWOPASS_PACKET_SZ { |
1469 | return Err("Incorrect buffer size" .to_string()); |
1470 | } |
1471 | |
1472 | self.des.buffer_fill(buf, 0, TWOPASS_PACKET_SZ); |
1473 | self.des.pass2_buffer_pos = 0; |
1474 | let m = self.des.parse_metrics()?; |
1475 | self.des.pass2_buffer_fill = 0; |
1476 | |
1477 | if self.frame_metrics.is_empty() { |
1478 | // We're using a whole-file buffer. |
1479 | self.cur_metrics = m; |
1480 | self.pass2_data_ready = true; |
1481 | } else { |
1482 | // Safety check |
1483 | let frames_needed = self.twopass_in_frames_needed(); |
1484 | |
1485 | if frames_needed > 0 { |
1486 | if self.nframe_metrics >= self.frame_metrics.len() { |
1487 | return Err( |
1488 | "Read too many frames without finding enough TUs" .to_string(), |
1489 | ); |
1490 | } |
1491 | |
1492 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
1493 | if fmi >= self.frame_metrics.len() { |
1494 | fmi -= self.frame_metrics.len(); |
1495 | } |
1496 | self.nframe_metrics += 1; |
1497 | self.frame_metrics[fmi] = m; |
1498 | // And accumulate the statistics over the window. |
1499 | self.scale_window_nframes[m.fti] += 1; |
1500 | if m.fti < FRAME_NSUBTYPES { |
1501 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
1502 | } |
1503 | if m.show_frame { |
1504 | self.scale_window_ntus += 1; |
1505 | } |
1506 | if frames_needed == 1 { |
1507 | self.pass2_data_ready = true; |
1508 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
1509 | } |
1510 | } else { |
1511 | return Err("No frames needed" .to_string()); |
1512 | } |
1513 | } |
1514 | |
1515 | Ok(()) |
1516 | } |
1517 | |
1518 | // Parse the rate control per-frame data |
1519 | // |
1520 | // If no buffer is passed return the amount of data it expects |
1521 | // to consume next. |
1522 | // |
1523 | // If a properly sized buffer is passed it returns the amount of data |
1524 | // consumed in the process or an empty error on parsing failure. |
1525 | fn twopass_parse_frame_data( |
1526 | &mut self, maybe_buf: Option<&[u8]>, mut consumed: usize, |
1527 | ) -> Result<usize, String> { |
1528 | { |
1529 | if self.frame_metrics.is_empty() { |
1530 | // We're using a whole-file buffer. |
1531 | if let Some(buf) = maybe_buf { |
1532 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
1533 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
1534 | self.des.pass2_buffer_pos = 0; |
1535 | // Read metrics for the next frame. |
1536 | self.cur_metrics = self.des.parse_metrics()?; |
1537 | // Clear the buffer for the next frame. |
1538 | self.des.pass2_buffer_fill = 0; |
1539 | self.pass2_data_ready = true; |
1540 | } |
1541 | } else { |
1542 | return Ok(TWOPASS_PACKET_SZ - self.des.pass2_buffer_fill); |
1543 | } |
1544 | } else { |
1545 | // We're using a finite buffer. |
1546 | let mut cur_scale_window_nframes = 0; |
1547 | let mut cur_nframes_left = 0; |
1548 | |
1549 | for fti in 0..=FRAME_NSUBTYPES { |
1550 | cur_scale_window_nframes += self.scale_window_nframes[fti]; |
1551 | cur_nframes_left += self.nframes_left[fti]; |
1552 | } |
1553 | |
1554 | let mut frames_needed = self.twopass_in_frames_needed(); |
1555 | while frames_needed > 0 { |
1556 | if let Some(buf) = maybe_buf { |
1557 | consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ); |
1558 | if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ { |
1559 | self.des.pass2_buffer_pos = 0; |
1560 | // Read the metrics for the next frame. |
1561 | let m = self.des.parse_metrics()?; |
1562 | // Add them to the circular buffer. |
1563 | if self.nframe_metrics >= self.frame_metrics.len() { |
1564 | return Err( |
1565 | "Read too many frames without finding enough TUs" |
1566 | .to_string(), |
1567 | ); |
1568 | } |
1569 | let mut fmi = self.frame_metrics_head + self.nframe_metrics; |
1570 | if fmi >= self.frame_metrics.len() { |
1571 | fmi -= self.frame_metrics.len(); |
1572 | } |
1573 | self.nframe_metrics += 1; |
1574 | self.frame_metrics[fmi] = m; |
1575 | // And accumulate the statistics over the window. |
1576 | self.scale_window_nframes[m.fti] += 1; |
1577 | cur_scale_window_nframes += 1; |
1578 | if m.fti < FRAME_NSUBTYPES { |
1579 | self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24); |
1580 | } |
1581 | if m.show_frame { |
1582 | self.scale_window_ntus += 1; |
1583 | } |
1584 | frames_needed = (self.reservoir_frame_delay |
1585 | - self.scale_window_ntus) |
1586 | .clamp(0, cur_nframes_left - cur_scale_window_nframes); |
1587 | // Clear the buffer for the next frame. |
1588 | self.des.pass2_buffer_fill = 0; |
1589 | } else { |
1590 | // Go back for more data. |
1591 | break; |
1592 | } |
1593 | } else { |
1594 | return Ok( |
1595 | TWOPASS_PACKET_SZ * (frames_needed as usize) |
1596 | - self.des.pass2_buffer_fill, |
1597 | ); |
1598 | } |
1599 | } |
1600 | // If we've got all the frames we need, fill in the current metrics. |
1601 | // We're ready to go. |
1602 | if frames_needed <= 0 { |
1603 | self.cur_metrics = self.frame_metrics[self.frame_metrics_head]; |
1604 | // Mark us ready for the next frame. |
1605 | self.pass2_data_ready = true; |
1606 | } |
1607 | } |
1608 | } |
1609 | |
1610 | Ok(consumed) |
1611 | } |
1612 | |
1613 | // If called without a buffer it will return the size of the next |
1614 | // buffer it expects. |
1615 | // |
1616 | // If called with a buffer it will consume it fully. |
1617 | // It returns Ok(0) if the buffer had been parsed or Err(()) |
1618 | // if the buffer hadn't been enough or other errors happened. |
1619 | pub(crate) fn twopass_in( |
1620 | &mut self, maybe_buf: Option<&[u8]>, |
1621 | ) -> Result<usize, String> { |
1622 | let mut consumed = 0; |
1623 | self.init_second_pass(); |
1624 | // If we haven't got a valid summary header yet, try to parse one. |
1625 | if self.nframes_total[FRAME_SUBTYPE_I] == 0 { |
1626 | self.pass2_data_ready = false; |
1627 | if let Some(buf) = maybe_buf { |
1628 | consumed = self.twopass_parse_summary(buf)? |
1629 | } else { |
1630 | return Ok(self.twopass_first_packet_size()); |
1631 | } |
1632 | } |
1633 | if self.nframes_total[FRAME_SUBTYPE_I] > 0 { |
1634 | if self.nencoded_frames + self.nsef_frames |
1635 | >= self.nframes_total_total as i64 |
1636 | { |
1637 | // We don't want any more data after the last frame, and we don't want |
1638 | // to allow any more frames to be encoded. |
1639 | self.pass2_data_ready = false; |
1640 | } else if !self.pass2_data_ready { |
1641 | return self.twopass_parse_frame_data(maybe_buf, consumed); |
1642 | } |
1643 | } |
1644 | Ok(consumed) |
1645 | } |
1646 | } |
1647 | |