encoder.rs source code [crates/rav1e/src/api/config/encoder.rs]

1	// Copyright (c) 2020-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	use itertools::*;
11
12	use crate::api::color::*;
13	use crate::api::config::GrainTableSegment;
14	use crate::api::{Rational, SpeedSettings};
15	use crate::encoder::Tune;
16	use crate::serialize::{Deserialize, Serialize};
17
18	use std::fmt;
19
20	// We add 1 to rdo_lookahead_frames in a bunch of places.
21	pub(crate) const MAX_RDO_LOOKAHEAD_FRAMES: usize = usize::MAX - `1`;
22	// Due to the math in RCState::new() regarding the reservoir frame delay.
23	pub(crate) const MAX_MAX_KEY_FRAME_INTERVAL: u64 = i32::MAX as u64 / `3`;
24
25	/// Encoder settings which impact the produced bitstream.
26	#[derive(Clone, Debug, Serialize, Deserialize)]
27	pub struct EncoderConfig {
28	// output size
29	/// Width of the frames in pixels.
30	pub width: usize,
31	/// Height of the frames in pixels.
32	pub height: usize,
33	/// Sample aspect ratio (for anamorphic video).
34	pub sample_aspect_ratio: Rational,
35	/// Video time base.
36	pub time_base: Rational,
37
38	// data format and ancillary color information
39	/// Bit depth.
40	pub bit_depth: usize,
41	/// Chroma subsampling.
42	pub chroma_sampling: ChromaSampling,
43	/// Chroma sample position.
44	pub chroma_sample_position: ChromaSamplePosition,
45	/// Pixel value range.
46	pub pixel_range: PixelRange,
47	/// Content color description (primaries, transfer characteristics, matrix).
48	pub color_description: Option<ColorDescription>,
49	/// HDR mastering display parameters.
50	pub mastering_display: Option<MasteringDisplay>,
51	/// HDR content light parameters.
52	pub content_light: Option<ContentLight>,
53
54	/// AV1 level index to target (0-31).
55	/// If None, allow the encoder to decide.
56	/// Currently, rav1e is unable to guarantee that the output bitstream
57	/// meets the rate limitations of the specified level.
58	pub level_idx: Option<u8>,
59
60	/// Enable signaling timing info in the bitstream.
61	pub enable_timing_info: bool,
62
63	/// Still picture mode flag.
64	pub still_picture: bool,
65
66	/// Flag to force all frames to be error resilient.
67	pub error_resilient: bool,
68
69	/// Interval between switch frames (0 to disable)
70	pub switch_frame_interval: u64,
71
72	// encoder configuration
73	/// The minimum* interval between two keyframes*
74	pub min_key_frame_interval: u64,
75	/// The maximum* interval between two keyframes*
76	pub max_key_frame_interval: u64,
77	/// The number of temporal units over which to distribute the reservoir
78	/// usage.
79	pub reservoir_frame_delay: Option<i32>,
80	/// Flag to enable low latency mode.
81	///
82	/// In this mode the frame reordering is disabled.
83	pub low_latency: bool,
84	/// The base quantizer to use.
85	pub quantizer: usize,
86	/// The minimum allowed base quantizer to use in bitrate mode.
87	pub min_quantizer: u8,
88	/// The target bitrate for the bitrate mode.
89	pub bitrate: i32,
90	/// Metric to tune the quality for.
91	pub tune: Tune,
92	/// Parameters for grain synthesis.
93	pub film_grain_params: Option<Vec<GrainTableSegment>>,
94	/// Number of tiles horizontally. Must be a power of two.
95	///
96	/// Overridden by [`tiles`], if present.
97	///
98	/// [`tiles`]: #structfield.tiles
99	pub tile_cols: usize,
100	/// Number of tiles vertically. Must be a power of two.
101	///
102	/// Overridden by [`tiles`], if present.
103	///
104	/// [`tiles`]: #structfield.tiles
105	pub tile_rows: usize,
106	/// Total number of tiles desired.
107	///
108	/// Encoder will try to optimally split to reach this number of tiles,
109	/// rounded up. Overrides [`tile_cols`] and [`tile_rows`].
110	///
111	/// [`tile_cols`]: #structfield.tile_cols
112	/// [`tile_rows`]: #structfield.tile_rows
113	pub tiles: usize,
114
115	/// Settings which affect the encoding speed vs. quality trade-off.
116	pub speed_settings: SpeedSettings,
117	}
118
119	/// Default preset for `EncoderConfig`: it is a balance between quality and
120	/// speed. See [`with_speed_preset()`].
121	///
122	/// [`with_speed_preset()`]: struct.EncoderConfig.html#method.with_speed_preset
123	impl Default for EncoderConfig {
124	fn default() -> Self {
125	const DEFAULT_SPEED: u8 = `6`;
126	Self::with_speed_preset(DEFAULT_SPEED)
127	}
128	}
129
130	impl EncoderConfig {
131	/// This is a preset which provides default settings according to a speed
132	/// value in the specific range 0–10. Each speed value corresponds to a
133	/// different preset. See [`from_preset()`]. If the input value is greater
134	/// than 10, it will result in the same settings as 10.
135	///
136	/// [`from_preset()`]: struct.SpeedSettings.html#method.from_preset
137	pub fn with_speed_preset(speed: u8) -> Self {
138	EncoderConfig {
139	width: `640`,
140	height: `480`,
141	sample_aspect_ratio: Rational { num: `1`, den: `1` },
142	time_base: Rational { num: `1`, den: `30` },
143
144	bit_depth: `8`,
145	chroma_sampling: ChromaSampling::Cs420,
146	chroma_sample_position: ChromaSamplePosition::Unknown,
147	pixel_range: Default::default(),
148	color_description: None,
149	mastering_display: None,
150	content_light: None,
151
152	level_idx: None,
153
154	enable_timing_info: `false`,
155
156	still_picture: `false`,
157
158	error_resilient: `false`,
159	switch_frame_interval: `0`,
160
161	min_key_frame_interval: `12`,
162	max_key_frame_interval: `240`,
163	min_quantizer: `0`,
164	reservoir_frame_delay: None,
165	low_latency: `false`,
166	quantizer: `100`,
167	bitrate: `0`,
168	tune: Tune::default(),
169	film_grain_params: None,
170	tile_cols: `0`,
171	tile_rows: `0`,
172	tiles: `0`,
173	speed_settings: SpeedSettings::from_preset(speed),
174	}
175	}
176
177	/// Sets the minimum and maximum keyframe interval, handling special cases as needed.
178	pub fn set_key_frame_interval(
179	&mut self, min_interval: u64, max_interval: u64,
180	) {
181	self.min_key_frame_interval = min_interval;
182
183	// Map an input value of 0 to an infinite interval
184	self.max_key_frame_interval = if max_interval == `0` {
185	MAX_MAX_KEY_FRAME_INTERVAL
186	} else {
187	max_interval
188	};
189	}
190
191	/// Returns the video frame rate computed from [`time_base`].
192	///
193	/// [`time_base`]: #structfield.time_base
194	pub fn frame_rate(&self) -> f64 {
195	Rational::from_reciprocal(self.time_base).as_f64()
196	}
197
198	/// Computes the render width and height of the stream based
199	/// on [`width`], [`height`], and [`sample_aspect_ratio`].
200	///
201	/// [`width`]: #structfield.width
202	/// [`height`]: #structfield.height
203	/// [`sample_aspect_ratio`]: #structfield.sample_aspect_ratio
204	pub fn render_size(&self) -> (usize, usize) {
205	let sar = self.sample_aspect_ratio.as_f64();
206
207	if sar > `1.0` {
208	((self.width as f64 * sar).round() as usize, self.height)
209	} else {
210	(self.width, (self.height as f64 / sar).round() as usize)
211	}
212	}
213
214	/// Is temporal RDO enabled ?
215	#[inline]
216	pub const fn temporal_rdo(&self) -> bool {
217	// Note: This function is called frequently, unlike most other functions here.
218
219	// `compute_distortion_scale` computes a scaling factor for the distortion
220	// of an 8x8 block (4x4 blocks simply use the scaling of the enclosing 8x8
221	// block). As long as distortion is always computed on <= 8x8 blocks, this
222	// has the property that the scaled distortion of a 2Nx2N block is always
223	// equal to the sum of the scaled distortions of the NxN sub-blocks it's
224	// made of, this is a necessary property to be able to do RDO between
225	// multiple partition sizes properly. Unfortunately, when tx domain
226	// distortion is used, distortion is only known at the tx block level which
227	// might be bigger than 8x8. So temporal RDO is always disabled in that case.
228	!self.speed_settings.transform.tx_domain_distortion
229	}
230
231	/// Describes whether the output is targeted as HDR
232	pub fn is_hdr(&self) -> bool {
233	self
234	.color_description
235	.map(\|colors\| {
236	colors.transfer_characteristics == TransferCharacteristics::SMPTE2084
237	})
238	.unwrap_or(`false`)
239	}
240
241	pub(crate) fn get_film_grain_at(
242	&self, timestamp: u64,
243	) -> Option<&GrainTableSegment> {
244	self.film_grain_params.as_ref().and_then(\|entries\| {
245	entries.iter().find(\|entry\| {
246	timestamp >= entry.start_time && timestamp < entry.end_time
247	})
248	})
249	}
250
251	pub(crate) fn get_film_grain_mut_at(
252	&mut self, timestamp: u64,
253	) -> Option<&mut GrainTableSegment> {
254	self.film_grain_params.as_mut().and_then(\|entries\| {
255	entries.iter_mut().find(\|entry\| {
256	timestamp >= entry.start_time && timestamp < entry.end_time
257	})
258	})
259	}
260	}
261
262	impl fmt::Display for EncoderConfig {
263	fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
264	let pairs = [
265	("keyint_min", self.min_key_frame_interval.to_string()),
266	("keyint_max", self.max_key_frame_interval.to_string()),
267	("quantizer", self.quantizer.to_string()),
268	("bitrate", self.bitrate.to_string()),
269	("min_quantizer", self.min_quantizer.to_string()),
270	("low_latency", self.low_latency.to_string()),
271	("tune", self.tune.to_string()),
272	(
273	"rdo_lookahead_frames",
274	self.speed_settings.rdo_lookahead_frames.to_string(),
275	),
276	(
277	"multiref",
278	(!self.low_latency \|\| self.speed_settings.multiref).to_string(),
279	),
280	("fast_deblock", self.speed_settings.fast_deblock.to_string()),
281	(
282	"scene_detection_mode",
283	self.speed_settings.scene_detection_mode.to_string(),
284	),
285	("cdef", self.speed_settings.cdef.to_string()),
286	("lrf", self.speed_settings.lrf.to_string()),
287	("enable_timing_info", self.enable_timing_info.to_string()),
288	(
289	"min_block_size",
290	self.speed_settings.partition.partition_range.min.to_string(),
291	),
292	(
293	"max_block_size",
294	self.speed_settings.partition.partition_range.max.to_string(),
295	),
296	(
297	"encode_bottomup",
298	self.speed_settings.partition.encode_bottomup.to_string(),
299	),
300	(
301	"non_square_partition_max_threshold",
302	self
303	.speed_settings
304	.partition
305	.non_square_partition_max_threshold
306	.to_string(),
307	),
308	(
309	"reduced_tx_set",
310	self.speed_settings.transform.reduced_tx_set.to_string(),
311	),
312	(
313	"tx_domain_distortion",
314	self.speed_settings.transform.tx_domain_distortion.to_string(),
315	),
316	(
317	"tx_domain_rate",
318	self.speed_settings.transform.tx_domain_rate.to_string(),
319	),
320	(
321	"rdo_tx_decision",
322	self.speed_settings.transform.rdo_tx_decision.to_string(),
323	),
324	(
325	"prediction_modes",
326	self.speed_settings.prediction.prediction_modes.to_string(),
327	),
328	(
329	"fine_directional_intra",
330	self.speed_settings.prediction.fine_directional_intra.to_string(),
331	),
332	(
333	"include_near_mvs",
334	self.speed_settings.motion.include_near_mvs.to_string(),
335	),
336	(
337	"use_satd_subpel",
338	self.speed_settings.motion.use_satd_subpel.to_string(),
339	),
340	];
341	write!(
342	f,
343	"{}",
344	pairs.iter().map(\|pair\| format!("{}={}", pair.0, pair.1)).join(" ")
345	)
346	}
347	}
348