| 1 | // Copyright (c) 2020-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | use itertools::*; |
| 11 | |
| 12 | use crate::api::color::*; |
| 13 | use crate::api::config::GrainTableSegment; |
| 14 | use crate::api::{Rational, SpeedSettings}; |
| 15 | use crate::encoder::Tune; |
| 16 | use crate::serialize::{Deserialize, Serialize}; |
| 17 | |
| 18 | use std::fmt; |
| 19 | |
| 20 | // We add 1 to rdo_lookahead_frames in a bunch of places. |
| 21 | pub(crate) const MAX_RDO_LOOKAHEAD_FRAMES: usize = usize::MAX - 1; |
| 22 | // Due to the math in RCState::new() regarding the reservoir frame delay. |
| 23 | pub(crate) const MAX_MAX_KEY_FRAME_INTERVAL: u64 = i32::MAX as u64 / 3; |
| 24 | |
| 25 | /// Encoder settings which impact the produced bitstream. |
| 26 | #[derive (Clone, Debug, Serialize, Deserialize)] |
| 27 | pub struct EncoderConfig { |
| 28 | // output size |
| 29 | /// Width of the frames in pixels. |
| 30 | pub width: usize, |
| 31 | /// Height of the frames in pixels. |
| 32 | pub height: usize, |
| 33 | /// Sample aspect ratio (for anamorphic video). |
| 34 | pub sample_aspect_ratio: Rational, |
| 35 | /// Video time base. |
| 36 | pub time_base: Rational, |
| 37 | |
| 38 | // data format and ancillary color information |
| 39 | /// Bit depth. |
| 40 | pub bit_depth: usize, |
| 41 | /// Chroma subsampling. |
| 42 | pub chroma_sampling: ChromaSampling, |
| 43 | /// Chroma sample position. |
| 44 | pub chroma_sample_position: ChromaSamplePosition, |
| 45 | /// Pixel value range. |
| 46 | pub pixel_range: PixelRange, |
| 47 | /// Content color description (primaries, transfer characteristics, matrix). |
| 48 | pub color_description: Option<ColorDescription>, |
| 49 | /// HDR mastering display parameters. |
| 50 | pub mastering_display: Option<MasteringDisplay>, |
| 51 | /// HDR content light parameters. |
| 52 | pub content_light: Option<ContentLight>, |
| 53 | |
| 54 | /// AV1 level index to target (0-31). |
| 55 | /// If None, allow the encoder to decide. |
| 56 | /// Currently, rav1e is unable to guarantee that the output bitstream |
| 57 | /// meets the rate limitations of the specified level. |
| 58 | pub level_idx: Option<u8>, |
| 59 | |
| 60 | /// Enable signaling timing info in the bitstream. |
| 61 | pub enable_timing_info: bool, |
| 62 | |
| 63 | /// Still picture mode flag. |
| 64 | pub still_picture: bool, |
| 65 | |
| 66 | /// Flag to force all frames to be error resilient. |
| 67 | pub error_resilient: bool, |
| 68 | |
| 69 | /// Interval between switch frames (0 to disable) |
| 70 | pub switch_frame_interval: u64, |
| 71 | |
| 72 | // encoder configuration |
| 73 | /// The *minimum* interval between two keyframes |
| 74 | pub min_key_frame_interval: u64, |
| 75 | /// The *maximum* interval between two keyframes |
| 76 | pub max_key_frame_interval: u64, |
| 77 | /// The number of temporal units over which to distribute the reservoir |
| 78 | /// usage. |
| 79 | pub reservoir_frame_delay: Option<i32>, |
| 80 | /// Flag to enable low latency mode. |
| 81 | /// |
| 82 | /// In this mode the frame reordering is disabled. |
| 83 | pub low_latency: bool, |
| 84 | /// The base quantizer to use. |
| 85 | pub quantizer: usize, |
| 86 | /// The minimum allowed base quantizer to use in bitrate mode. |
| 87 | pub min_quantizer: u8, |
| 88 | /// The target bitrate for the bitrate mode. |
| 89 | pub bitrate: i32, |
| 90 | /// Metric to tune the quality for. |
| 91 | pub tune: Tune, |
| 92 | /// Parameters for grain synthesis. |
| 93 | pub film_grain_params: Option<Vec<GrainTableSegment>>, |
| 94 | /// Number of tiles horizontally. Must be a power of two. |
| 95 | /// |
| 96 | /// Overridden by [`tiles`], if present. |
| 97 | /// |
| 98 | /// [`tiles`]: #structfield.tiles |
| 99 | pub tile_cols: usize, |
| 100 | /// Number of tiles vertically. Must be a power of two. |
| 101 | /// |
| 102 | /// Overridden by [`tiles`], if present. |
| 103 | /// |
| 104 | /// [`tiles`]: #structfield.tiles |
| 105 | pub tile_rows: usize, |
| 106 | /// Total number of tiles desired. |
| 107 | /// |
| 108 | /// Encoder will try to optimally split to reach this number of tiles, |
| 109 | /// rounded up. Overrides [`tile_cols`] and [`tile_rows`]. |
| 110 | /// |
| 111 | /// [`tile_cols`]: #structfield.tile_cols |
| 112 | /// [`tile_rows`]: #structfield.tile_rows |
| 113 | pub tiles: usize, |
| 114 | |
| 115 | /// Settings which affect the encoding speed vs. quality trade-off. |
| 116 | pub speed_settings: SpeedSettings, |
| 117 | } |
| 118 | |
| 119 | /// Default preset for `EncoderConfig`: it is a balance between quality and |
| 120 | /// speed. See [`with_speed_preset()`]. |
| 121 | /// |
| 122 | /// [`with_speed_preset()`]: struct.EncoderConfig.html#method.with_speed_preset |
| 123 | impl Default for EncoderConfig { |
| 124 | fn default() -> Self { |
| 125 | const DEFAULT_SPEED: u8 = 6; |
| 126 | Self::with_speed_preset(DEFAULT_SPEED) |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | impl EncoderConfig { |
| 131 | /// This is a preset which provides default settings according to a speed |
| 132 | /// value in the specific range 0–10. Each speed value corresponds to a |
| 133 | /// different preset. See [`from_preset()`]. If the input value is greater |
| 134 | /// than 10, it will result in the same settings as 10. |
| 135 | /// |
| 136 | /// [`from_preset()`]: struct.SpeedSettings.html#method.from_preset |
| 137 | pub fn with_speed_preset(speed: u8) -> Self { |
| 138 | EncoderConfig { |
| 139 | width: 640, |
| 140 | height: 480, |
| 141 | sample_aspect_ratio: Rational { num: 1, den: 1 }, |
| 142 | time_base: Rational { num: 1, den: 30 }, |
| 143 | |
| 144 | bit_depth: 8, |
| 145 | chroma_sampling: ChromaSampling::Cs420, |
| 146 | chroma_sample_position: ChromaSamplePosition::Unknown, |
| 147 | pixel_range: Default::default(), |
| 148 | color_description: None, |
| 149 | mastering_display: None, |
| 150 | content_light: None, |
| 151 | |
| 152 | level_idx: None, |
| 153 | |
| 154 | enable_timing_info: false, |
| 155 | |
| 156 | still_picture: false, |
| 157 | |
| 158 | error_resilient: false, |
| 159 | switch_frame_interval: 0, |
| 160 | |
| 161 | min_key_frame_interval: 12, |
| 162 | max_key_frame_interval: 240, |
| 163 | min_quantizer: 0, |
| 164 | reservoir_frame_delay: None, |
| 165 | low_latency: false, |
| 166 | quantizer: 100, |
| 167 | bitrate: 0, |
| 168 | tune: Tune::default(), |
| 169 | film_grain_params: None, |
| 170 | tile_cols: 0, |
| 171 | tile_rows: 0, |
| 172 | tiles: 0, |
| 173 | speed_settings: SpeedSettings::from_preset(speed), |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /// Sets the minimum and maximum keyframe interval, handling special cases as needed. |
| 178 | pub fn set_key_frame_interval( |
| 179 | &mut self, min_interval: u64, max_interval: u64, |
| 180 | ) { |
| 181 | self.min_key_frame_interval = min_interval; |
| 182 | |
| 183 | // Map an input value of 0 to an infinite interval |
| 184 | self.max_key_frame_interval = if max_interval == 0 { |
| 185 | MAX_MAX_KEY_FRAME_INTERVAL |
| 186 | } else { |
| 187 | max_interval |
| 188 | }; |
| 189 | } |
| 190 | |
| 191 | /// Returns the video frame rate computed from [`time_base`]. |
| 192 | /// |
| 193 | /// [`time_base`]: #structfield.time_base |
| 194 | pub fn frame_rate(&self) -> f64 { |
| 195 | Rational::from_reciprocal(self.time_base).as_f64() |
| 196 | } |
| 197 | |
| 198 | /// Computes the render width and height of the stream based |
| 199 | /// on [`width`], [`height`], and [`sample_aspect_ratio`]. |
| 200 | /// |
| 201 | /// [`width`]: #structfield.width |
| 202 | /// [`height`]: #structfield.height |
| 203 | /// [`sample_aspect_ratio`]: #structfield.sample_aspect_ratio |
| 204 | pub fn render_size(&self) -> (usize, usize) { |
| 205 | let sar = self.sample_aspect_ratio.as_f64(); |
| 206 | |
| 207 | if sar > 1.0 { |
| 208 | ((self.width as f64 * sar).round() as usize, self.height) |
| 209 | } else { |
| 210 | (self.width, (self.height as f64 / sar).round() as usize) |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /// Is temporal RDO enabled ? |
| 215 | #[inline ] |
| 216 | pub const fn temporal_rdo(&self) -> bool { |
| 217 | // Note: This function is called frequently, unlike most other functions here. |
| 218 | |
| 219 | // `compute_distortion_scale` computes a scaling factor for the distortion |
| 220 | // of an 8x8 block (4x4 blocks simply use the scaling of the enclosing 8x8 |
| 221 | // block). As long as distortion is always computed on <= 8x8 blocks, this |
| 222 | // has the property that the scaled distortion of a 2Nx2N block is always |
| 223 | // equal to the sum of the scaled distortions of the NxN sub-blocks it's |
| 224 | // made of, this is a necessary property to be able to do RDO between |
| 225 | // multiple partition sizes properly. Unfortunately, when tx domain |
| 226 | // distortion is used, distortion is only known at the tx block level which |
| 227 | // might be bigger than 8x8. So temporal RDO is always disabled in that case. |
| 228 | !self.speed_settings.transform.tx_domain_distortion |
| 229 | } |
| 230 | |
| 231 | /// Describes whether the output is targeted as HDR |
| 232 | pub fn is_hdr(&self) -> bool { |
| 233 | self |
| 234 | .color_description |
| 235 | .map(|colors| { |
| 236 | colors.transfer_characteristics == TransferCharacteristics::SMPTE2084 |
| 237 | }) |
| 238 | .unwrap_or(false) |
| 239 | } |
| 240 | |
| 241 | pub(crate) fn get_film_grain_at( |
| 242 | &self, timestamp: u64, |
| 243 | ) -> Option<&GrainTableSegment> { |
| 244 | self.film_grain_params.as_ref().and_then(|entries| { |
| 245 | entries.iter().find(|entry| { |
| 246 | timestamp >= entry.start_time && timestamp < entry.end_time |
| 247 | }) |
| 248 | }) |
| 249 | } |
| 250 | |
| 251 | pub(crate) fn get_film_grain_mut_at( |
| 252 | &mut self, timestamp: u64, |
| 253 | ) -> Option<&mut GrainTableSegment> { |
| 254 | self.film_grain_params.as_mut().and_then(|entries| { |
| 255 | entries.iter_mut().find(|entry| { |
| 256 | timestamp >= entry.start_time && timestamp < entry.end_time |
| 257 | }) |
| 258 | }) |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | impl fmt::Display for EncoderConfig { |
| 263 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { |
| 264 | let pairs = [ |
| 265 | ("keyint_min" , self.min_key_frame_interval.to_string()), |
| 266 | ("keyint_max" , self.max_key_frame_interval.to_string()), |
| 267 | ("quantizer" , self.quantizer.to_string()), |
| 268 | ("bitrate" , self.bitrate.to_string()), |
| 269 | ("min_quantizer" , self.min_quantizer.to_string()), |
| 270 | ("low_latency" , self.low_latency.to_string()), |
| 271 | ("tune" , self.tune.to_string()), |
| 272 | ( |
| 273 | "rdo_lookahead_frames" , |
| 274 | self.speed_settings.rdo_lookahead_frames.to_string(), |
| 275 | ), |
| 276 | ( |
| 277 | "multiref" , |
| 278 | (!self.low_latency || self.speed_settings.multiref).to_string(), |
| 279 | ), |
| 280 | ("fast_deblock" , self.speed_settings.fast_deblock.to_string()), |
| 281 | ( |
| 282 | "scene_detection_mode" , |
| 283 | self.speed_settings.scene_detection_mode.to_string(), |
| 284 | ), |
| 285 | ("cdef" , self.speed_settings.cdef.to_string()), |
| 286 | ("lrf" , self.speed_settings.lrf.to_string()), |
| 287 | ("enable_timing_info" , self.enable_timing_info.to_string()), |
| 288 | ( |
| 289 | "min_block_size" , |
| 290 | self.speed_settings.partition.partition_range.min.to_string(), |
| 291 | ), |
| 292 | ( |
| 293 | "max_block_size" , |
| 294 | self.speed_settings.partition.partition_range.max.to_string(), |
| 295 | ), |
| 296 | ( |
| 297 | "encode_bottomup" , |
| 298 | self.speed_settings.partition.encode_bottomup.to_string(), |
| 299 | ), |
| 300 | ( |
| 301 | "non_square_partition_max_threshold" , |
| 302 | self |
| 303 | .speed_settings |
| 304 | .partition |
| 305 | .non_square_partition_max_threshold |
| 306 | .to_string(), |
| 307 | ), |
| 308 | ( |
| 309 | "reduced_tx_set" , |
| 310 | self.speed_settings.transform.reduced_tx_set.to_string(), |
| 311 | ), |
| 312 | ( |
| 313 | "tx_domain_distortion" , |
| 314 | self.speed_settings.transform.tx_domain_distortion.to_string(), |
| 315 | ), |
| 316 | ( |
| 317 | "tx_domain_rate" , |
| 318 | self.speed_settings.transform.tx_domain_rate.to_string(), |
| 319 | ), |
| 320 | ( |
| 321 | "rdo_tx_decision" , |
| 322 | self.speed_settings.transform.rdo_tx_decision.to_string(), |
| 323 | ), |
| 324 | ( |
| 325 | "prediction_modes" , |
| 326 | self.speed_settings.prediction.prediction_modes.to_string(), |
| 327 | ), |
| 328 | ( |
| 329 | "fine_directional_intra" , |
| 330 | self.speed_settings.prediction.fine_directional_intra.to_string(), |
| 331 | ), |
| 332 | ( |
| 333 | "include_near_mvs" , |
| 334 | self.speed_settings.motion.include_near_mvs.to_string(), |
| 335 | ), |
| 336 | ( |
| 337 | "use_satd_subpel" , |
| 338 | self.speed_settings.motion.use_satd_subpel.to_string(), |
| 339 | ), |
| 340 | ]; |
| 341 | write!( |
| 342 | f, |
| 343 | " {}" , |
| 344 | pairs.iter().map(|pair| format!(" {}= {}" , pair.0, pair.1)).join(" " ) |
| 345 | ) |
| 346 | } |
| 347 | } |
| 348 | |