1// Copyright (c) 2020-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use itertools::*;
11
12use crate::api::color::*;
13use crate::api::config::GrainTableSegment;
14use crate::api::{Rational, SpeedSettings};
15use crate::encoder::Tune;
16use crate::serialize::{Deserialize, Serialize};
17
18use std::fmt;
19
20// We add 1 to rdo_lookahead_frames in a bunch of places.
21pub(crate) const MAX_RDO_LOOKAHEAD_FRAMES: usize = usize::MAX - 1;
22// Due to the math in RCState::new() regarding the reservoir frame delay.
23pub(crate) const MAX_MAX_KEY_FRAME_INTERVAL: u64 = i32::MAX as u64 / 3;
24
25/// Encoder settings which impact the produced bitstream.
26#[derive(Clone, Debug, Serialize, Deserialize)]
27pub struct EncoderConfig {
28 // output size
29 /// Width of the frames in pixels.
30 pub width: usize,
31 /// Height of the frames in pixels.
32 pub height: usize,
33 /// Sample aspect ratio (for anamorphic video).
34 pub sample_aspect_ratio: Rational,
35 /// Video time base.
36 pub time_base: Rational,
37
38 // data format and ancillary color information
39 /// Bit depth.
40 pub bit_depth: usize,
41 /// Chroma subsampling.
42 pub chroma_sampling: ChromaSampling,
43 /// Chroma sample position.
44 pub chroma_sample_position: ChromaSamplePosition,
45 /// Pixel value range.
46 pub pixel_range: PixelRange,
47 /// Content color description (primaries, transfer characteristics, matrix).
48 pub color_description: Option<ColorDescription>,
49 /// HDR mastering display parameters.
50 pub mastering_display: Option<MasteringDisplay>,
51 /// HDR content light parameters.
52 pub content_light: Option<ContentLight>,
53
54 /// AV1 level index to target (0-31).
55 /// If None, allow the encoder to decide.
56 /// Currently, rav1e is unable to guarantee that the output bitstream
57 /// meets the rate limitations of the specified level.
58 pub level_idx: Option<u8>,
59
60 /// Enable signaling timing info in the bitstream.
61 pub enable_timing_info: bool,
62
63 /// Still picture mode flag.
64 pub still_picture: bool,
65
66 /// Flag to force all frames to be error resilient.
67 pub error_resilient: bool,
68
69 /// Interval between switch frames (0 to disable)
70 pub switch_frame_interval: u64,
71
72 // encoder configuration
73 /// The *minimum* interval between two keyframes
74 pub min_key_frame_interval: u64,
75 /// The *maximum* interval between two keyframes
76 pub max_key_frame_interval: u64,
77 /// The number of temporal units over which to distribute the reservoir
78 /// usage.
79 pub reservoir_frame_delay: Option<i32>,
80 /// Flag to enable low latency mode.
81 ///
82 /// In this mode the frame reordering is disabled.
83 pub low_latency: bool,
84 /// The base quantizer to use.
85 pub quantizer: usize,
86 /// The minimum allowed base quantizer to use in bitrate mode.
87 pub min_quantizer: u8,
88 /// The target bitrate for the bitrate mode.
89 pub bitrate: i32,
90 /// Metric to tune the quality for.
91 pub tune: Tune,
92 /// Parameters for grain synthesis.
93 pub film_grain_params: Option<Vec<GrainTableSegment>>,
94 /// Number of tiles horizontally. Must be a power of two.
95 ///
96 /// Overridden by [`tiles`], if present.
97 ///
98 /// [`tiles`]: #structfield.tiles
99 pub tile_cols: usize,
100 /// Number of tiles vertically. Must be a power of two.
101 ///
102 /// Overridden by [`tiles`], if present.
103 ///
104 /// [`tiles`]: #structfield.tiles
105 pub tile_rows: usize,
106 /// Total number of tiles desired.
107 ///
108 /// Encoder will try to optimally split to reach this number of tiles,
109 /// rounded up. Overrides [`tile_cols`] and [`tile_rows`].
110 ///
111 /// [`tile_cols`]: #structfield.tile_cols
112 /// [`tile_rows`]: #structfield.tile_rows
113 pub tiles: usize,
114
115 /// Settings which affect the encoding speed vs. quality trade-off.
116 pub speed_settings: SpeedSettings,
117}
118
119/// Default preset for `EncoderConfig`: it is a balance between quality and
120/// speed. See [`with_speed_preset()`].
121///
122/// [`with_speed_preset()`]: struct.EncoderConfig.html#method.with_speed_preset
123impl Default for EncoderConfig {
124 fn default() -> Self {
125 const DEFAULT_SPEED: u8 = 6;
126 Self::with_speed_preset(DEFAULT_SPEED)
127 }
128}
129
130impl EncoderConfig {
131 /// This is a preset which provides default settings according to a speed
132 /// value in the specific range 0–10. Each speed value corresponds to a
133 /// different preset. See [`from_preset()`]. If the input value is greater
134 /// than 10, it will result in the same settings as 10.
135 ///
136 /// [`from_preset()`]: struct.SpeedSettings.html#method.from_preset
137 pub fn with_speed_preset(speed: u8) -> Self {
138 EncoderConfig {
139 width: 640,
140 height: 480,
141 sample_aspect_ratio: Rational { num: 1, den: 1 },
142 time_base: Rational { num: 1, den: 30 },
143
144 bit_depth: 8,
145 chroma_sampling: ChromaSampling::Cs420,
146 chroma_sample_position: ChromaSamplePosition::Unknown,
147 pixel_range: Default::default(),
148 color_description: None,
149 mastering_display: None,
150 content_light: None,
151
152 level_idx: None,
153
154 enable_timing_info: false,
155
156 still_picture: false,
157
158 error_resilient: false,
159 switch_frame_interval: 0,
160
161 min_key_frame_interval: 12,
162 max_key_frame_interval: 240,
163 min_quantizer: 0,
164 reservoir_frame_delay: None,
165 low_latency: false,
166 quantizer: 100,
167 bitrate: 0,
168 tune: Tune::default(),
169 film_grain_params: None,
170 tile_cols: 0,
171 tile_rows: 0,
172 tiles: 0,
173 speed_settings: SpeedSettings::from_preset(speed),
174 }
175 }
176
177 /// Sets the minimum and maximum keyframe interval, handling special cases as needed.
178 pub fn set_key_frame_interval(
179 &mut self, min_interval: u64, max_interval: u64,
180 ) {
181 self.min_key_frame_interval = min_interval;
182
183 // Map an input value of 0 to an infinite interval
184 self.max_key_frame_interval = if max_interval == 0 {
185 MAX_MAX_KEY_FRAME_INTERVAL
186 } else {
187 max_interval
188 };
189 }
190
191 /// Returns the video frame rate computed from [`time_base`].
192 ///
193 /// [`time_base`]: #structfield.time_base
194 pub fn frame_rate(&self) -> f64 {
195 Rational::from_reciprocal(self.time_base).as_f64()
196 }
197
198 /// Computes the render width and height of the stream based
199 /// on [`width`], [`height`], and [`sample_aspect_ratio`].
200 ///
201 /// [`width`]: #structfield.width
202 /// [`height`]: #structfield.height
203 /// [`sample_aspect_ratio`]: #structfield.sample_aspect_ratio
204 pub fn render_size(&self) -> (usize, usize) {
205 let sar = self.sample_aspect_ratio.as_f64();
206
207 if sar > 1.0 {
208 ((self.width as f64 * sar).round() as usize, self.height)
209 } else {
210 (self.width, (self.height as f64 / sar).round() as usize)
211 }
212 }
213
214 /// Is temporal RDO enabled ?
215 #[inline]
216 pub const fn temporal_rdo(&self) -> bool {
217 // Note: This function is called frequently, unlike most other functions here.
218
219 // `compute_distortion_scale` computes a scaling factor for the distortion
220 // of an 8x8 block (4x4 blocks simply use the scaling of the enclosing 8x8
221 // block). As long as distortion is always computed on <= 8x8 blocks, this
222 // has the property that the scaled distortion of a 2Nx2N block is always
223 // equal to the sum of the scaled distortions of the NxN sub-blocks it's
224 // made of, this is a necessary property to be able to do RDO between
225 // multiple partition sizes properly. Unfortunately, when tx domain
226 // distortion is used, distortion is only known at the tx block level which
227 // might be bigger than 8x8. So temporal RDO is always disabled in that case.
228 !self.speed_settings.transform.tx_domain_distortion
229 }
230
231 /// Describes whether the output is targeted as HDR
232 pub fn is_hdr(&self) -> bool {
233 self
234 .color_description
235 .map(|colors| {
236 colors.transfer_characteristics == TransferCharacteristics::SMPTE2084
237 })
238 .unwrap_or(false)
239 }
240
241 pub(crate) fn get_film_grain_at(
242 &self, timestamp: u64,
243 ) -> Option<&GrainTableSegment> {
244 self.film_grain_params.as_ref().and_then(|entries| {
245 entries.iter().find(|entry| {
246 timestamp >= entry.start_time && timestamp < entry.end_time
247 })
248 })
249 }
250
251 pub(crate) fn get_film_grain_mut_at(
252 &mut self, timestamp: u64,
253 ) -> Option<&mut GrainTableSegment> {
254 self.film_grain_params.as_mut().and_then(|entries| {
255 entries.iter_mut().find(|entry| {
256 timestamp >= entry.start_time && timestamp < entry.end_time
257 })
258 })
259 }
260}
261
262impl fmt::Display for EncoderConfig {
263 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
264 let pairs = [
265 ("keyint_min", self.min_key_frame_interval.to_string()),
266 ("keyint_max", self.max_key_frame_interval.to_string()),
267 ("quantizer", self.quantizer.to_string()),
268 ("bitrate", self.bitrate.to_string()),
269 ("min_quantizer", self.min_quantizer.to_string()),
270 ("low_latency", self.low_latency.to_string()),
271 ("tune", self.tune.to_string()),
272 (
273 "rdo_lookahead_frames",
274 self.speed_settings.rdo_lookahead_frames.to_string(),
275 ),
276 (
277 "multiref",
278 (!self.low_latency || self.speed_settings.multiref).to_string(),
279 ),
280 ("fast_deblock", self.speed_settings.fast_deblock.to_string()),
281 (
282 "scene_detection_mode",
283 self.speed_settings.scene_detection_mode.to_string(),
284 ),
285 ("cdef", self.speed_settings.cdef.to_string()),
286 ("lrf", self.speed_settings.lrf.to_string()),
287 ("enable_timing_info", self.enable_timing_info.to_string()),
288 (
289 "min_block_size",
290 self.speed_settings.partition.partition_range.min.to_string(),
291 ),
292 (
293 "max_block_size",
294 self.speed_settings.partition.partition_range.max.to_string(),
295 ),
296 (
297 "encode_bottomup",
298 self.speed_settings.partition.encode_bottomup.to_string(),
299 ),
300 (
301 "non_square_partition_max_threshold",
302 self
303 .speed_settings
304 .partition
305 .non_square_partition_max_threshold
306 .to_string(),
307 ),
308 (
309 "reduced_tx_set",
310 self.speed_settings.transform.reduced_tx_set.to_string(),
311 ),
312 (
313 "tx_domain_distortion",
314 self.speed_settings.transform.tx_domain_distortion.to_string(),
315 ),
316 (
317 "tx_domain_rate",
318 self.speed_settings.transform.tx_domain_rate.to_string(),
319 ),
320 (
321 "rdo_tx_decision",
322 self.speed_settings.transform.rdo_tx_decision.to_string(),
323 ),
324 (
325 "prediction_modes",
326 self.speed_settings.prediction.prediction_modes.to_string(),
327 ),
328 (
329 "fine_directional_intra",
330 self.speed_settings.prediction.fine_directional_intra.to_string(),
331 ),
332 (
333 "include_near_mvs",
334 self.speed_settings.motion.include_near_mvs.to_string(),
335 ),
336 (
337 "use_satd_subpel",
338 self.speed_settings.motion.use_satd_subpel.to_string(),
339 ),
340 ];
341 write!(
342 f,
343 "{}",
344 pairs.iter().map(|pair| format!("{}={}", pair.0, pair.1)).join(" ")
345 )
346 }
347}
348