| 1 | // Copyright (c) 2018-2023, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | use crate::activity::*; |
| 11 | use crate::api::config::GrainTableSegment; |
| 12 | use crate::api::*; |
| 13 | use crate::cdef::*; |
| 14 | use crate::context::*; |
| 15 | use crate::deblock::*; |
| 16 | use crate::ec::*; |
| 17 | use crate::frame::*; |
| 18 | use crate::header::*; |
| 19 | use crate::lrf::*; |
| 20 | use crate::mc::{FilterMode, MotionVector}; |
| 21 | use crate::me::*; |
| 22 | use crate::partition::PartitionType::*; |
| 23 | use crate::partition::RefType::*; |
| 24 | use crate::partition::*; |
| 25 | use crate::predict::{ |
| 26 | luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode, |
| 27 | }; |
| 28 | use crate::quantize::*; |
| 29 | use crate::rate::{ |
| 30 | QuantizerParameters, FRAME_SUBTYPE_I, FRAME_SUBTYPE_P, QSCALE, |
| 31 | }; |
| 32 | use crate::rdo::*; |
| 33 | use crate::segmentation::*; |
| 34 | use crate::serialize::{Deserialize, Serialize}; |
| 35 | use crate::stats::EncoderStats; |
| 36 | use crate::tiling::*; |
| 37 | use crate::transform::*; |
| 38 | use crate::util::*; |
| 39 | use crate::wasm_bindgen::*; |
| 40 | |
| 41 | use arg_enum_proc_macro::ArgEnum; |
| 42 | use arrayvec::*; |
| 43 | use bitstream_io::{BigEndian, BitWrite, BitWriter}; |
| 44 | use rayon::iter::*; |
| 45 | |
| 46 | use std::collections::VecDeque; |
| 47 | use std::io::Write; |
| 48 | use std::mem::MaybeUninit; |
| 49 | use std::sync::Arc; |
| 50 | use std::{fmt, io, mem}; |
| 51 | |
| 52 | #[allow (dead_code)] |
| 53 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 54 | pub enum CDEFSearchMethod { |
| 55 | PickFromQ, |
| 56 | FastSearch, |
| 57 | FullSearch, |
| 58 | } |
| 59 | |
| 60 | #[inline (always)] |
| 61 | fn poly2(q: f32, a: f32, b: f32, c: f32, max: i32) -> i32 { |
| 62 | clamp((q * q).mul_add(a, q.mul_add(b, c)).round() as i32, min:0, max) |
| 63 | } |
| 64 | |
| 65 | pub static TEMPORAL_DELIMITER: [u8; 2] = [0x12, 0x00]; |
| 66 | |
| 67 | const MAX_NUM_TEMPORAL_LAYERS: usize = 8; |
| 68 | const MAX_NUM_SPATIAL_LAYERS: usize = 4; |
| 69 | const MAX_NUM_OPERATING_POINTS: usize = |
| 70 | MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS; |
| 71 | |
| 72 | /// Size of blocks for the importance computation, in pixels. |
| 73 | pub const IMPORTANCE_BLOCK_SIZE: usize = |
| 74 | 1 << (IMPORTANCE_BLOCK_TO_BLOCK_SHIFT + BLOCK_TO_PLANE_SHIFT); |
| 75 | |
| 76 | #[derive (Debug, Clone)] |
| 77 | pub struct ReferenceFrame<T: Pixel> { |
| 78 | pub order_hint: u32, |
| 79 | pub width: u32, |
| 80 | pub height: u32, |
| 81 | pub render_width: u32, |
| 82 | pub render_height: u32, |
| 83 | pub frame: Arc<Frame<T>>, |
| 84 | pub input_hres: Arc<Plane<T>>, |
| 85 | pub input_qres: Arc<Plane<T>>, |
| 86 | pub cdfs: CDFContext, |
| 87 | pub frame_me_stats: RefMEStats, |
| 88 | pub output_frameno: u64, |
| 89 | pub segmentation: SegmentationState, |
| 90 | } |
| 91 | |
| 92 | #[derive (Debug, Clone, Default)] |
| 93 | pub struct ReferenceFramesSet<T: Pixel> { |
| 94 | pub frames: [Option<Arc<ReferenceFrame<T>>>; REF_FRAMES], |
| 95 | pub deblock: [DeblockState; REF_FRAMES], |
| 96 | } |
| 97 | |
| 98 | impl<T: Pixel> ReferenceFramesSet<T> { |
| 99 | pub fn new() -> Self { |
| 100 | Self { frames: Default::default(), deblock: Default::default() } |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | #[wasm_bindgen ] |
| 105 | #[derive ( |
| 106 | ArgEnum, Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default, |
| 107 | )] |
| 108 | #[repr (C)] |
| 109 | pub enum Tune { |
| 110 | Psnr, |
| 111 | #[default] |
| 112 | Psychovisual, |
| 113 | } |
| 114 | |
| 115 | const FRAME_ID_LENGTH: u32 = 15; |
| 116 | const DELTA_FRAME_ID_LENGTH: u32 = 14; |
| 117 | |
| 118 | #[derive (Copy, Clone, Debug)] |
| 119 | pub struct Sequence { |
| 120 | /// OBU Sequence header of AV1 |
| 121 | pub profile: u8, |
| 122 | pub num_bits_width: u32, |
| 123 | pub num_bits_height: u32, |
| 124 | pub bit_depth: usize, |
| 125 | pub chroma_sampling: ChromaSampling, |
| 126 | pub chroma_sample_position: ChromaSamplePosition, |
| 127 | pub pixel_range: PixelRange, |
| 128 | pub color_description: Option<ColorDescription>, |
| 129 | pub mastering_display: Option<MasteringDisplay>, |
| 130 | pub content_light: Option<ContentLight>, |
| 131 | pub max_frame_width: u32, |
| 132 | pub max_frame_height: u32, |
| 133 | pub frame_id_numbers_present_flag: bool, |
| 134 | pub frame_id_length: u32, |
| 135 | pub delta_frame_id_length: u32, |
| 136 | pub use_128x128_superblock: bool, |
| 137 | pub order_hint_bits_minus_1: u32, |
| 138 | /// 0 - force off |
| 139 | /// 1 - force on |
| 140 | /// 2 - adaptive |
| 141 | pub force_screen_content_tools: u32, |
| 142 | /// 0 - Not to force. MV can be in 1/4 or 1/8 |
| 143 | /// 1 - force to integer |
| 144 | /// 2 - adaptive |
| 145 | pub force_integer_mv: u32, |
| 146 | /// Video is a single frame still picture |
| 147 | pub still_picture: bool, |
| 148 | /// Use reduced header for still picture |
| 149 | pub reduced_still_picture_hdr: bool, |
| 150 | /// enables/disables filter_intra |
| 151 | pub enable_filter_intra: bool, |
| 152 | /// enables/disables corner/edge filtering and upsampling |
| 153 | pub enable_intra_edge_filter: bool, |
| 154 | /// enables/disables interintra_compound |
| 155 | pub enable_interintra_compound: bool, |
| 156 | /// enables/disables masked compound |
| 157 | pub enable_masked_compound: bool, |
| 158 | /// 0 - disable dual interpolation filter |
| 159 | /// 1 - enable vert/horiz filter selection |
| 160 | pub enable_dual_filter: bool, |
| 161 | /// 0 - disable order hint, and related tools |
| 162 | /// jnt_comp, ref_frame_mvs, frame_sign_bias |
| 163 | /// if 0, enable_jnt_comp and |
| 164 | /// enable_ref_frame_mvs must be set zs 0. |
| 165 | pub enable_order_hint: bool, |
| 166 | /// 0 - disable joint compound modes |
| 167 | /// 1 - enable it |
| 168 | pub enable_jnt_comp: bool, |
| 169 | /// 0 - disable ref frame mvs |
| 170 | /// 1 - enable it |
| 171 | pub enable_ref_frame_mvs: bool, |
| 172 | /// 0 - disable warped motion for sequence |
| 173 | /// 1 - enable it for the sequence |
| 174 | pub enable_warped_motion: bool, |
| 175 | /// 0 - Disable superres for the sequence, and disable |
| 176 | /// transmitting per-frame superres enabled flag. |
| 177 | /// 1 - Enable superres for the sequence, and also |
| 178 | /// enable per-frame flag to denote if superres is |
| 179 | /// enabled for that frame. |
| 180 | pub enable_superres: bool, |
| 181 | /// To turn on/off CDEF |
| 182 | pub enable_cdef: bool, |
| 183 | /// To turn on/off loop restoration |
| 184 | pub enable_restoration: bool, |
| 185 | /// To turn on/off larger-than-superblock loop restoration units |
| 186 | pub enable_large_lru: bool, |
| 187 | /// allow encoder to delay loop filter RDO/coding until after frame reconstruciton is complete |
| 188 | pub enable_delayed_loopfilter_rdo: bool, |
| 189 | pub operating_points_cnt_minus_1: usize, |
| 190 | pub operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS], |
| 191 | pub display_model_info_present_flag: bool, |
| 192 | pub decoder_model_info_present_flag: bool, |
| 193 | pub level_idx: [u8; MAX_NUM_OPERATING_POINTS], |
| 194 | /// seq_tier in the spec. One bit: 0 or 1. |
| 195 | pub tier: [usize; MAX_NUM_OPERATING_POINTS], |
| 196 | pub film_grain_params_present: bool, |
| 197 | pub timing_info_present: bool, |
| 198 | pub tiling: TilingInfo, |
| 199 | pub time_base: Rational, |
| 200 | } |
| 201 | |
| 202 | impl Sequence { |
| 203 | /// # Panics |
| 204 | /// |
| 205 | /// Panics if the resulting tile sizes would be too large. |
| 206 | pub fn new(config: &EncoderConfig) -> Sequence { |
| 207 | let width_bits = 32 - (config.width as u32).leading_zeros(); |
| 208 | let height_bits = 32 - (config.height as u32).leading_zeros(); |
| 209 | assert!(width_bits <= 16); |
| 210 | assert!(height_bits <= 16); |
| 211 | |
| 212 | let profile = if config.bit_depth == 12 |
| 213 | || config.chroma_sampling == ChromaSampling::Cs422 |
| 214 | { |
| 215 | 2 |
| 216 | } else { |
| 217 | u8::from(config.chroma_sampling == ChromaSampling::Cs444) |
| 218 | }; |
| 219 | |
| 220 | let operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS] = |
| 221 | [0; MAX_NUM_OPERATING_POINTS]; |
| 222 | let level_idx: [u8; MAX_NUM_OPERATING_POINTS] = |
| 223 | if let Some(level_idx) = config.level_idx { |
| 224 | [level_idx; MAX_NUM_OPERATING_POINTS] |
| 225 | } else { |
| 226 | [31; MAX_NUM_OPERATING_POINTS] |
| 227 | }; |
| 228 | let tier: [usize; MAX_NUM_OPERATING_POINTS] = |
| 229 | [0; MAX_NUM_OPERATING_POINTS]; |
| 230 | |
| 231 | // Restoration filters are not useful for very small frame sizes, |
| 232 | // so disable them in that case. |
| 233 | let enable_restoration_filters = config.width >= 32 && config.height >= 32; |
| 234 | let use_128x128_superblock = false; |
| 235 | |
| 236 | let frame_rate = config.frame_rate(); |
| 237 | let sb_size_log2 = Self::sb_size_log2(use_128x128_superblock); |
| 238 | |
| 239 | let mut tiling = TilingInfo::from_target_tiles( |
| 240 | sb_size_log2, |
| 241 | config.width, |
| 242 | config.height, |
| 243 | frame_rate, |
| 244 | TilingInfo::tile_log2(1, config.tile_cols).unwrap(), |
| 245 | TilingInfo::tile_log2(1, config.tile_rows).unwrap(), |
| 246 | config.chroma_sampling == ChromaSampling::Cs422, |
| 247 | ); |
| 248 | |
| 249 | if config.tiles > 0 { |
| 250 | let mut tile_rows_log2 = 0; |
| 251 | let mut tile_cols_log2 = 0; |
| 252 | while (tile_rows_log2 < tiling.max_tile_rows_log2) |
| 253 | || (tile_cols_log2 < tiling.max_tile_cols_log2) |
| 254 | { |
| 255 | tiling = TilingInfo::from_target_tiles( |
| 256 | sb_size_log2, |
| 257 | config.width, |
| 258 | config.height, |
| 259 | frame_rate, |
| 260 | tile_cols_log2, |
| 261 | tile_rows_log2, |
| 262 | config.chroma_sampling == ChromaSampling::Cs422, |
| 263 | ); |
| 264 | |
| 265 | if tiling.rows * tiling.cols >= config.tiles { |
| 266 | break; |
| 267 | }; |
| 268 | |
| 269 | if ((tiling.tile_height_sb >= tiling.tile_width_sb) |
| 270 | && (tiling.tile_rows_log2 < tiling.max_tile_rows_log2)) |
| 271 | || (tile_cols_log2 >= tiling.max_tile_cols_log2) |
| 272 | { |
| 273 | tile_rows_log2 += 1; |
| 274 | } else { |
| 275 | tile_cols_log2 += 1; |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | Sequence { |
| 281 | tiling, |
| 282 | profile, |
| 283 | num_bits_width: width_bits, |
| 284 | num_bits_height: height_bits, |
| 285 | bit_depth: config.bit_depth, |
| 286 | chroma_sampling: config.chroma_sampling, |
| 287 | chroma_sample_position: config.chroma_sample_position, |
| 288 | pixel_range: config.pixel_range, |
| 289 | color_description: config.color_description, |
| 290 | mastering_display: config.mastering_display, |
| 291 | content_light: config.content_light, |
| 292 | max_frame_width: config.width as u32, |
| 293 | max_frame_height: config.height as u32, |
| 294 | frame_id_numbers_present_flag: false, |
| 295 | frame_id_length: FRAME_ID_LENGTH, |
| 296 | delta_frame_id_length: DELTA_FRAME_ID_LENGTH, |
| 297 | use_128x128_superblock, |
| 298 | order_hint_bits_minus_1: 5, |
| 299 | force_screen_content_tools: if config.still_picture { 2 } else { 0 }, |
| 300 | force_integer_mv: 2, |
| 301 | still_picture: config.still_picture, |
| 302 | reduced_still_picture_hdr: config.still_picture, |
| 303 | enable_filter_intra: false, |
| 304 | enable_intra_edge_filter: true, |
| 305 | enable_interintra_compound: false, |
| 306 | enable_masked_compound: false, |
| 307 | enable_dual_filter: false, |
| 308 | enable_order_hint: !config.still_picture, |
| 309 | enable_jnt_comp: false, |
| 310 | enable_ref_frame_mvs: false, |
| 311 | enable_warped_motion: false, |
| 312 | enable_superres: false, |
| 313 | enable_cdef: config.speed_settings.cdef && enable_restoration_filters, |
| 314 | enable_restoration: config.speed_settings.lrf |
| 315 | && enable_restoration_filters, |
| 316 | enable_large_lru: true, |
| 317 | enable_delayed_loopfilter_rdo: true, |
| 318 | operating_points_cnt_minus_1: 0, |
| 319 | operating_point_idc, |
| 320 | display_model_info_present_flag: false, |
| 321 | decoder_model_info_present_flag: false, |
| 322 | level_idx, |
| 323 | tier, |
| 324 | film_grain_params_present: config |
| 325 | .film_grain_params |
| 326 | .as_ref() |
| 327 | .map(|entries| !entries.is_empty()) |
| 328 | .unwrap_or(false), |
| 329 | timing_info_present: config.enable_timing_info, |
| 330 | time_base: config.time_base, |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | pub const fn get_relative_dist(&self, a: u32, b: u32) -> i32 { |
| 335 | let diff = a as i32 - b as i32; |
| 336 | let m = 1 << self.order_hint_bits_minus_1; |
| 337 | (diff & (m - 1)) - (diff & m) |
| 338 | } |
| 339 | |
| 340 | pub fn get_skip_mode_allowed<T: Pixel>( |
| 341 | &self, fi: &FrameInvariants<T>, inter_cfg: &InterConfig, |
| 342 | reference_select: bool, |
| 343 | ) -> bool { |
| 344 | if fi.intra_only || !reference_select || !self.enable_order_hint { |
| 345 | return false; |
| 346 | } |
| 347 | |
| 348 | let mut forward_idx: isize = -1; |
| 349 | let mut backward_idx: isize = -1; |
| 350 | let mut forward_hint = 0; |
| 351 | let mut backward_hint = 0; |
| 352 | |
| 353 | for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) { |
| 354 | if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] { |
| 355 | let ref_hint = rec.order_hint; |
| 356 | |
| 357 | if self.get_relative_dist(ref_hint, fi.order_hint) < 0 { |
| 358 | if forward_idx < 0 |
| 359 | || self.get_relative_dist(ref_hint, forward_hint) > 0 |
| 360 | { |
| 361 | forward_idx = i as isize; |
| 362 | forward_hint = ref_hint; |
| 363 | } |
| 364 | } else if self.get_relative_dist(ref_hint, fi.order_hint) > 0 |
| 365 | && (backward_idx < 0 |
| 366 | || self.get_relative_dist(ref_hint, backward_hint) > 0) |
| 367 | { |
| 368 | backward_idx = i as isize; |
| 369 | backward_hint = ref_hint; |
| 370 | } |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | if forward_idx < 0 { |
| 375 | false |
| 376 | } else if backward_idx >= 0 { |
| 377 | // set skip_mode_frame |
| 378 | true |
| 379 | } else { |
| 380 | let mut second_forward_idx: isize = -1; |
| 381 | let mut second_forward_hint = 0; |
| 382 | |
| 383 | for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) { |
| 384 | if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] |
| 385 | { |
| 386 | let ref_hint = rec.order_hint; |
| 387 | |
| 388 | if self.get_relative_dist(ref_hint, forward_hint) < 0 |
| 389 | && (second_forward_idx < 0 |
| 390 | || self.get_relative_dist(ref_hint, second_forward_hint) > 0) |
| 391 | { |
| 392 | second_forward_idx = i as isize; |
| 393 | second_forward_hint = ref_hint; |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | // TODO: Set skip_mode_frame, when second_forward_idx is not less than 0. |
| 399 | second_forward_idx >= 0 |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | #[inline (always)] |
| 404 | const fn sb_size_log2(use_128x128_superblock: bool) -> usize { |
| 405 | 6 + (use_128x128_superblock as usize) |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | #[derive (Debug, Clone)] |
| 410 | pub struct FrameState<T: Pixel> { |
| 411 | pub sb_size_log2: usize, |
| 412 | pub input: Arc<Frame<T>>, |
| 413 | pub input_hres: Arc<Plane<T>>, // half-resolution version of input luma |
| 414 | pub input_qres: Arc<Plane<T>>, // quarter-resolution version of input luma |
| 415 | pub rec: Arc<Frame<T>>, |
| 416 | pub cdfs: CDFContext, |
| 417 | pub context_update_tile_id: usize, // tile id used for the CDFontext |
| 418 | pub max_tile_size_bytes: u32, |
| 419 | pub deblock: DeblockState, |
| 420 | pub segmentation: SegmentationState, |
| 421 | pub restoration: RestorationState, |
| 422 | // Because we only reference these within a tile context, |
| 423 | // these are stored per-tile for easier access. |
| 424 | pub frame_me_stats: RefMEStats, |
| 425 | pub enc_stats: EncoderStats, |
| 426 | } |
| 427 | |
| 428 | impl<T: Pixel> FrameState<T> { |
| 429 | pub fn new(fi: &FrameInvariants<T>) -> Self { |
| 430 | // TODO(negge): Use fi.cfg.chroma_sampling when we store VideoDetails in FrameInvariants |
| 431 | FrameState::new_with_frame( |
| 432 | fi, |
| 433 | Arc::new(Frame::new(fi.width, fi.height, fi.sequence.chroma_sampling)), |
| 434 | ) |
| 435 | } |
| 436 | |
| 437 | /// Similar to [`FrameState::new_with_frame`], but takes an `me_stats` |
| 438 | /// and `rec` to enable reusing the same underlying allocations to create |
| 439 | /// a `FrameState` |
| 440 | /// |
| 441 | /// This function primarily exists for [`estimate_inter_costs`], and so |
| 442 | /// it does not create hres or qres versions of `frame` as downscaling is |
| 443 | /// somewhat expensive and are not needed for [`estimate_inter_costs`]. |
| 444 | pub fn new_with_frame_and_me_stats_and_rec( |
| 445 | fi: &FrameInvariants<T>, frame: Arc<Frame<T>>, me_stats: RefMEStats, |
| 446 | rec: Arc<Frame<T>>, |
| 447 | ) -> Self { |
| 448 | let rs = RestorationState::new(fi, &frame); |
| 449 | |
| 450 | let hres = Plane::new(0, 0, 0, 0, 0, 0); |
| 451 | let qres = Plane::new(0, 0, 0, 0, 0, 0); |
| 452 | |
| 453 | Self { |
| 454 | sb_size_log2: fi.sb_size_log2(), |
| 455 | input: frame, |
| 456 | input_hres: Arc::new(hres), |
| 457 | input_qres: Arc::new(qres), |
| 458 | rec, |
| 459 | cdfs: CDFContext::new(0), |
| 460 | context_update_tile_id: 0, |
| 461 | max_tile_size_bytes: 0, |
| 462 | deblock: Default::default(), |
| 463 | segmentation: Default::default(), |
| 464 | restoration: rs, |
| 465 | frame_me_stats: me_stats, |
| 466 | enc_stats: Default::default(), |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | pub fn new_with_frame( |
| 471 | fi: &FrameInvariants<T>, frame: Arc<Frame<T>>, |
| 472 | ) -> Self { |
| 473 | let rs = RestorationState::new(fi, &frame); |
| 474 | let luma_width = frame.planes[0].cfg.width; |
| 475 | let luma_height = frame.planes[0].cfg.height; |
| 476 | |
| 477 | let hres = frame.planes[0].downsampled(fi.width, fi.height); |
| 478 | let qres = hres.downsampled(fi.width, fi.height); |
| 479 | |
| 480 | Self { |
| 481 | sb_size_log2: fi.sb_size_log2(), |
| 482 | input: frame, |
| 483 | input_hres: Arc::new(hres), |
| 484 | input_qres: Arc::new(qres), |
| 485 | rec: Arc::new(Frame::new( |
| 486 | luma_width, |
| 487 | luma_height, |
| 488 | fi.sequence.chroma_sampling, |
| 489 | )), |
| 490 | cdfs: CDFContext::new(0), |
| 491 | context_update_tile_id: 0, |
| 492 | max_tile_size_bytes: 0, |
| 493 | deblock: Default::default(), |
| 494 | segmentation: Default::default(), |
| 495 | restoration: rs, |
| 496 | frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b), |
| 497 | enc_stats: Default::default(), |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | pub fn apply_tile_state_mut<F, R>(&mut self, f: F) -> R |
| 502 | where |
| 503 | F: FnOnce(&mut TileStateMut<'_, T>) -> R, |
| 504 | { |
| 505 | let PlaneConfig { width, height, .. } = self.rec.planes[0].cfg; |
| 506 | let sbo_0 = PlaneSuperBlockOffset(SuperBlockOffset { x: 0, y: 0 }); |
| 507 | let frame_me_stats = self.frame_me_stats.clone(); |
| 508 | let frame_me_stats = &mut *frame_me_stats.write().expect("poisoned lock" ); |
| 509 | let ts = &mut TileStateMut::new( |
| 510 | self, |
| 511 | sbo_0, |
| 512 | self.sb_size_log2, |
| 513 | width, |
| 514 | height, |
| 515 | frame_me_stats, |
| 516 | ); |
| 517 | |
| 518 | f(ts) |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | #[derive (Copy, Clone, Debug)] |
| 523 | pub struct DeblockState { |
| 524 | pub levels: [u8; MAX_PLANES + 1], // Y vertical edges, Y horizontal, U, V |
| 525 | pub sharpness: u8, |
| 526 | pub deltas_enabled: bool, |
| 527 | pub delta_updates_enabled: bool, |
| 528 | pub ref_deltas: [i8; REF_FRAMES], |
| 529 | pub mode_deltas: [i8; 2], |
| 530 | pub block_deltas_enabled: bool, |
| 531 | pub block_delta_shift: u8, |
| 532 | pub block_delta_multi: bool, |
| 533 | } |
| 534 | |
| 535 | impl Default for DeblockState { |
| 536 | fn default() -> Self { |
| 537 | DeblockState { |
| 538 | levels: [8, 8, 4, 4], |
| 539 | sharpness: 0, |
| 540 | deltas_enabled: false, // requires delta_q_enabled |
| 541 | delta_updates_enabled: false, |
| 542 | ref_deltas: [1, 0, 0, 0, 0, -1, -1, -1], |
| 543 | mode_deltas: [0, 0], |
| 544 | block_deltas_enabled: false, |
| 545 | block_delta_shift: 0, |
| 546 | block_delta_multi: false, |
| 547 | } |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | #[derive (Copy, Clone, Debug, Default)] |
| 552 | pub struct SegmentationState { |
| 553 | pub enabled: bool, |
| 554 | pub update_data: bool, |
| 555 | pub update_map: bool, |
| 556 | pub preskip: bool, |
| 557 | pub last_active_segid: u8, |
| 558 | pub features: [[bool; SegLvl::SEG_LVL_MAX as usize]; 8], |
| 559 | pub data: [[i16; SegLvl::SEG_LVL_MAX as usize]; 8], |
| 560 | pub threshold: [DistortionScale; 7], |
| 561 | pub min_segment: u8, |
| 562 | pub max_segment: u8, |
| 563 | } |
| 564 | |
| 565 | impl SegmentationState { |
| 566 | #[profiling::function ] |
| 567 | pub fn update_threshold(&mut self, base_q_idx: u8, bd: usize) { |
| 568 | let base_ac_q = ac_q(base_q_idx, 0, bd).get() as u64; |
| 569 | let real_ac_q = ArrayVec::<_, MAX_SEGMENTS>::from_iter( |
| 570 | self.data[..=self.max_segment as usize].iter().map(|data| { |
| 571 | ac_q(base_q_idx, data[SegLvl::SEG_LVL_ALT_Q as usize] as i8, bd).get() |
| 572 | as u64 |
| 573 | }), |
| 574 | ); |
| 575 | self.threshold.fill(DistortionScale(0)); |
| 576 | for ((q1, q2), threshold) in |
| 577 | real_ac_q.iter().skip(1).zip(&real_ac_q).zip(&mut self.threshold) |
| 578 | { |
| 579 | *threshold = DistortionScale::new(base_ac_q.pow(2), q1 * q2); |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | #[cfg (feature = "dump_lookahead_data" )] |
| 584 | pub fn dump_threshold( |
| 585 | &self, data_location: std::path::PathBuf, input_frameno: u64, |
| 586 | ) { |
| 587 | use byteorder::{NativeEndian, WriteBytesExt}; |
| 588 | let file_name = format!("{:010}-thresholds" , input_frameno); |
| 589 | let max_segment = self.max_segment; |
| 590 | // dynamic allocation: debugging only |
| 591 | let mut buf = vec![]; |
| 592 | buf.write_u64::<NativeEndian>(max_segment as u64).unwrap(); |
| 593 | for &v in &self.threshold[..max_segment as usize] { |
| 594 | buf.write_u32::<NativeEndian>(v.0).unwrap(); |
| 595 | } |
| 596 | ::std::fs::write(data_location.join(file_name).with_extension("bin" ), buf) |
| 597 | .unwrap(); |
| 598 | } |
| 599 | } |
| 600 | |
| 601 | // Frame Invariants are invariant inside a frame |
| 602 | #[allow (dead_code)] |
| 603 | #[derive (Debug, Clone)] |
| 604 | pub struct FrameInvariants<T: Pixel> { |
| 605 | pub sequence: Arc<Sequence>, |
| 606 | pub config: Arc<EncoderConfig>, |
| 607 | pub width: usize, |
| 608 | pub height: usize, |
| 609 | pub render_width: u32, |
| 610 | pub render_height: u32, |
| 611 | pub frame_size_override_flag: bool, |
| 612 | pub render_and_frame_size_different: bool, |
| 613 | pub sb_width: usize, |
| 614 | pub sb_height: usize, |
| 615 | pub w_in_b: usize, |
| 616 | pub h_in_b: usize, |
| 617 | pub input_frameno: u64, |
| 618 | pub order_hint: u32, |
| 619 | pub show_frame: bool, |
| 620 | pub showable_frame: bool, |
| 621 | pub error_resilient: bool, |
| 622 | pub intra_only: bool, |
| 623 | pub allow_high_precision_mv: bool, |
| 624 | pub frame_type: FrameType, |
| 625 | pub frame_to_show_map_idx: u32, |
| 626 | pub use_reduced_tx_set: bool, |
| 627 | pub reference_mode: ReferenceMode, |
| 628 | pub use_prev_frame_mvs: bool, |
| 629 | pub partition_range: PartitionRange, |
| 630 | pub globalmv_transformation_type: [GlobalMVMode; INTER_REFS_PER_FRAME], |
| 631 | pub num_tg: usize, |
| 632 | pub large_scale_tile: bool, |
| 633 | pub disable_cdf_update: bool, |
| 634 | pub allow_screen_content_tools: u32, |
| 635 | pub force_integer_mv: u32, |
| 636 | pub primary_ref_frame: u32, |
| 637 | pub refresh_frame_flags: u32, // a bitmask that specifies which |
| 638 | // reference frame slots will be updated with the current frame |
| 639 | // after it is decoded. |
| 640 | pub allow_intrabc: bool, |
| 641 | pub use_ref_frame_mvs: bool, |
| 642 | pub is_filter_switchable: bool, |
| 643 | pub is_motion_mode_switchable: bool, |
| 644 | pub disable_frame_end_update_cdf: bool, |
| 645 | pub allow_warped_motion: bool, |
| 646 | pub cdef_search_method: CDEFSearchMethod, |
| 647 | pub cdef_damping: u8, |
| 648 | pub cdef_bits: u8, |
| 649 | pub cdef_y_strengths: [u8; 8], |
| 650 | pub cdef_uv_strengths: [u8; 8], |
| 651 | pub delta_q_present: bool, |
| 652 | pub ref_frames: [u8; INTER_REFS_PER_FRAME], |
| 653 | pub ref_frame_sign_bias: [bool; INTER_REFS_PER_FRAME], |
| 654 | pub rec_buffer: ReferenceFramesSet<T>, |
| 655 | pub base_q_idx: u8, |
| 656 | pub dc_delta_q: [i8; 3], |
| 657 | pub ac_delta_q: [i8; 3], |
| 658 | pub lambda: f64, |
| 659 | pub me_lambda: f64, |
| 660 | pub dist_scale: [DistortionScale; 3], |
| 661 | pub me_range_scale: u8, |
| 662 | pub use_tx_domain_distortion: bool, |
| 663 | pub use_tx_domain_rate: bool, |
| 664 | pub idx_in_group_output: u64, |
| 665 | pub pyramid_level: u64, |
| 666 | pub enable_early_exit: bool, |
| 667 | pub tx_mode_select: bool, |
| 668 | pub enable_inter_txfm_split: bool, |
| 669 | pub default_filter: FilterMode, |
| 670 | pub enable_segmentation: bool, |
| 671 | pub t35_metadata: Box<[T35]>, |
| 672 | /// Target CPU feature level. |
| 673 | pub cpu_feature_level: crate::cpu_features::CpuFeatureLevel, |
| 674 | |
| 675 | // These will be set if this is a coded (non-SEF) frame. |
| 676 | // We do not need them for SEFs. |
| 677 | pub coded_frame_data: Option<CodedFrameData<T>>, |
| 678 | } |
| 679 | |
| 680 | /// These frame invariants are only used on coded frames, i.e. non-SEFs. |
| 681 | /// They are stored separately to avoid useless allocations |
| 682 | /// when we do not need them. |
| 683 | /// |
| 684 | /// Currently this consists only of lookahaed data. |
| 685 | /// This may change in the future. |
| 686 | #[derive (Debug, Clone)] |
| 687 | pub struct CodedFrameData<T: Pixel> { |
| 688 | /// The lookahead version of `rec_buffer`, used for storing and propagating |
| 689 | /// the original reference frames (rather than reconstructed ones). The |
| 690 | /// lookahead uses both `rec_buffer` and `lookahead_rec_buffer`, where |
| 691 | /// `rec_buffer` contains the current frame's reference frames and |
| 692 | /// `lookahead_rec_buffer` contains the next frame's reference frames. |
| 693 | pub lookahead_rec_buffer: ReferenceFramesSet<T>, |
| 694 | /// Frame width in importance blocks. |
| 695 | pub w_in_imp_b: usize, |
| 696 | /// Frame height in importance blocks. |
| 697 | pub h_in_imp_b: usize, |
| 698 | /// Intra prediction cost estimations for each importance block. |
| 699 | pub lookahead_intra_costs: Box<[u32]>, |
| 700 | /// Future importance values for each importance block. That is, a value |
| 701 | /// indicating how much future frames depend on the block (for example, via |
| 702 | /// inter-prediction). |
| 703 | pub block_importances: Box<[f32]>, |
| 704 | /// Pre-computed distortion_scale. |
| 705 | pub distortion_scales: Box<[DistortionScale]>, |
| 706 | /// Pre-computed activity_scale. |
| 707 | pub activity_scales: Box<[DistortionScale]>, |
| 708 | pub activity_mask: ActivityMask, |
| 709 | /// Combined metric of activity and distortion |
| 710 | pub spatiotemporal_scores: Box<[DistortionScale]>, |
| 711 | } |
| 712 | |
| 713 | impl<T: Pixel> CodedFrameData<T> { |
| 714 | pub fn new(fi: &FrameInvariants<T>) -> CodedFrameData<T> { |
| 715 | // Width and height are padded to 8×8 block size. |
| 716 | let w_in_imp_b = fi.w_in_b / 2; |
| 717 | let h_in_imp_b = fi.h_in_b / 2; |
| 718 | |
| 719 | CodedFrameData { |
| 720 | lookahead_rec_buffer: ReferenceFramesSet::new(), |
| 721 | w_in_imp_b, |
| 722 | h_in_imp_b, |
| 723 | // This is never used before it is assigned |
| 724 | lookahead_intra_costs: Box::new([]), |
| 725 | // dynamic allocation: once per frame |
| 726 | block_importances: vec![0.; w_in_imp_b * h_in_imp_b].into_boxed_slice(), |
| 727 | distortion_scales: vec![ |
| 728 | DistortionScale::default(); |
| 729 | w_in_imp_b * h_in_imp_b |
| 730 | ] |
| 731 | .into_boxed_slice(), |
| 732 | activity_scales: vec![ |
| 733 | DistortionScale::default(); |
| 734 | w_in_imp_b * h_in_imp_b |
| 735 | ] |
| 736 | .into_boxed_slice(), |
| 737 | activity_mask: Default::default(), |
| 738 | spatiotemporal_scores: Default::default(), |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | // Assumes that we have already computed activity scales and distortion scales |
| 743 | // Returns -0.5 log2(mean(scale)) |
| 744 | #[profiling::function ] |
| 745 | pub fn compute_spatiotemporal_scores(&mut self) -> i64 { |
| 746 | let mut scores = self |
| 747 | .distortion_scales |
| 748 | .iter() |
| 749 | .zip(self.activity_scales.iter()) |
| 750 | .map(|(&d, &a)| d * a) |
| 751 | .collect::<Box<_>>(); |
| 752 | |
| 753 | let inv_mean = DistortionScale::inv_mean(&scores); |
| 754 | |
| 755 | for score in scores.iter_mut() { |
| 756 | *score *= inv_mean; |
| 757 | } |
| 758 | |
| 759 | for scale in self.distortion_scales.iter_mut() { |
| 760 | *scale *= inv_mean; |
| 761 | } |
| 762 | |
| 763 | self.spatiotemporal_scores = scores; |
| 764 | |
| 765 | inv_mean.blog64() >> 1 |
| 766 | } |
| 767 | |
| 768 | // Assumes that we have already computed distortion_scales |
| 769 | // Returns -0.5 log2(mean(scale)) |
| 770 | #[profiling::function ] |
| 771 | pub fn compute_temporal_scores(&mut self) -> i64 { |
| 772 | let inv_mean = DistortionScale::inv_mean(&self.distortion_scales); |
| 773 | for scale in self.distortion_scales.iter_mut() { |
| 774 | *scale *= inv_mean; |
| 775 | } |
| 776 | self.spatiotemporal_scores = self.distortion_scales.clone(); |
| 777 | inv_mean.blog64() >> 1 |
| 778 | } |
| 779 | |
| 780 | #[cfg (feature = "dump_lookahead_data" )] |
| 781 | pub fn dump_scales( |
| 782 | &self, data_location: std::path::PathBuf, scales: Scales, |
| 783 | input_frameno: u64, |
| 784 | ) { |
| 785 | use byteorder::{NativeEndian, WriteBytesExt}; |
| 786 | let file_name = format!( |
| 787 | "{:010}-{}" , |
| 788 | input_frameno, |
| 789 | match scales { |
| 790 | Scales::ActivityScales => "activity_scales" , |
| 791 | Scales::DistortionScales => "distortion_scales" , |
| 792 | Scales::SpatiotemporalScales => "spatiotemporal_scales" , |
| 793 | } |
| 794 | ); |
| 795 | // dynamic allocation: debugging only |
| 796 | let mut buf = vec![]; |
| 797 | buf.write_u64::<NativeEndian>(self.w_in_imp_b as u64).unwrap(); |
| 798 | buf.write_u64::<NativeEndian>(self.h_in_imp_b as u64).unwrap(); |
| 799 | for &v in match scales { |
| 800 | Scales::ActivityScales => &self.activity_scales[..], |
| 801 | Scales::DistortionScales => &self.distortion_scales[..], |
| 802 | Scales::SpatiotemporalScales => &self.spatiotemporal_scores[..], |
| 803 | } { |
| 804 | buf.write_u32::<NativeEndian>(v.0).unwrap(); |
| 805 | } |
| 806 | ::std::fs::write(data_location.join(file_name).with_extension("bin" ), buf) |
| 807 | .unwrap(); |
| 808 | } |
| 809 | } |
| 810 | |
| 811 | #[cfg (feature = "dump_lookahead_data" )] |
| 812 | pub enum Scales { |
| 813 | ActivityScales, |
| 814 | DistortionScales, |
| 815 | SpatiotemporalScales, |
| 816 | } |
| 817 | |
| 818 | pub(crate) const fn pos_to_lvl(pos: u64, pyramid_depth: u64) -> u64 { |
| 819 | // Derive level within pyramid for a frame with a given coding order position |
| 820 | // For example, with a pyramid of depth 2, the 2 least significant bits of the |
| 821 | // position determine the level: |
| 822 | // 00 -> 0 |
| 823 | // 01 -> 2 |
| 824 | // 10 -> 1 |
| 825 | // 11 -> 2 |
| 826 | pyramid_depth - (pos | (1 << pyramid_depth)).trailing_zeros() as u64 |
| 827 | } |
| 828 | |
| 829 | impl<T: Pixel> FrameInvariants<T> { |
| 830 | #[allow (clippy::erasing_op, clippy::identity_op)] |
| 831 | /// # Panics |
| 832 | /// |
| 833 | /// - If the size of `T` does not match the sequence's bit depth |
| 834 | pub fn new(config: Arc<EncoderConfig>, sequence: Arc<Sequence>) -> Self { |
| 835 | assert!( |
| 836 | sequence.bit_depth <= mem::size_of::<T>() * 8, |
| 837 | "bit depth cannot fit into u8" |
| 838 | ); |
| 839 | |
| 840 | let (width, height) = (config.width, config.height); |
| 841 | let frame_size_override_flag = width as u32 != sequence.max_frame_width |
| 842 | || height as u32 != sequence.max_frame_height; |
| 843 | |
| 844 | let (render_width, render_height) = config.render_size(); |
| 845 | let render_and_frame_size_different = |
| 846 | render_width != width || render_height != height; |
| 847 | |
| 848 | let use_reduced_tx_set = config.speed_settings.transform.reduced_tx_set; |
| 849 | let use_tx_domain_distortion = config.tune == Tune::Psnr |
| 850 | && config.speed_settings.transform.tx_domain_distortion; |
| 851 | let use_tx_domain_rate = config.speed_settings.transform.tx_domain_rate; |
| 852 | |
| 853 | let w_in_b = 2 * config.width.align_power_of_two_and_shift(3); // MiCols, ((width+7)/8)<<3 >> MI_SIZE_LOG2 |
| 854 | let h_in_b = 2 * config.height.align_power_of_two_and_shift(3); // MiRows, ((height+7)/8)<<3 >> MI_SIZE_LOG2 |
| 855 | |
| 856 | Self { |
| 857 | width, |
| 858 | height, |
| 859 | render_width: render_width as u32, |
| 860 | render_height: render_height as u32, |
| 861 | frame_size_override_flag, |
| 862 | render_and_frame_size_different, |
| 863 | sb_width: width.align_power_of_two_and_shift(6), |
| 864 | sb_height: height.align_power_of_two_and_shift(6), |
| 865 | w_in_b, |
| 866 | h_in_b, |
| 867 | input_frameno: 0, |
| 868 | order_hint: 0, |
| 869 | show_frame: true, |
| 870 | showable_frame: !sequence.reduced_still_picture_hdr, |
| 871 | error_resilient: false, |
| 872 | intra_only: true, |
| 873 | allow_high_precision_mv: false, |
| 874 | frame_type: FrameType::KEY, |
| 875 | frame_to_show_map_idx: 0, |
| 876 | use_reduced_tx_set, |
| 877 | reference_mode: ReferenceMode::SINGLE, |
| 878 | use_prev_frame_mvs: false, |
| 879 | partition_range: config.speed_settings.partition.partition_range, |
| 880 | globalmv_transformation_type: [GlobalMVMode::IDENTITY; |
| 881 | INTER_REFS_PER_FRAME], |
| 882 | num_tg: 1, |
| 883 | large_scale_tile: false, |
| 884 | disable_cdf_update: false, |
| 885 | allow_screen_content_tools: sequence.force_screen_content_tools, |
| 886 | force_integer_mv: 1, |
| 887 | primary_ref_frame: PRIMARY_REF_NONE, |
| 888 | refresh_frame_flags: ALL_REF_FRAMES_MASK, |
| 889 | allow_intrabc: false, |
| 890 | use_ref_frame_mvs: false, |
| 891 | is_filter_switchable: false, |
| 892 | is_motion_mode_switchable: false, // 0: only the SIMPLE motion mode will be used. |
| 893 | disable_frame_end_update_cdf: sequence.reduced_still_picture_hdr, |
| 894 | allow_warped_motion: false, |
| 895 | cdef_search_method: CDEFSearchMethod::PickFromQ, |
| 896 | cdef_damping: 3, |
| 897 | cdef_bits: 0, |
| 898 | cdef_y_strengths: [ |
| 899 | 0 * 4 + 0, |
| 900 | 1 * 4 + 0, |
| 901 | 2 * 4 + 1, |
| 902 | 3 * 4 + 1, |
| 903 | 5 * 4 + 2, |
| 904 | 7 * 4 + 3, |
| 905 | 10 * 4 + 3, |
| 906 | 13 * 4 + 3, |
| 907 | ], |
| 908 | cdef_uv_strengths: [ |
| 909 | 0 * 4 + 0, |
| 910 | 1 * 4 + 0, |
| 911 | 2 * 4 + 1, |
| 912 | 3 * 4 + 1, |
| 913 | 5 * 4 + 2, |
| 914 | 7 * 4 + 3, |
| 915 | 10 * 4 + 3, |
| 916 | 13 * 4 + 3, |
| 917 | ], |
| 918 | delta_q_present: false, |
| 919 | ref_frames: [0; INTER_REFS_PER_FRAME], |
| 920 | ref_frame_sign_bias: [false; INTER_REFS_PER_FRAME], |
| 921 | rec_buffer: ReferenceFramesSet::new(), |
| 922 | base_q_idx: config.quantizer as u8, |
| 923 | dc_delta_q: [0; 3], |
| 924 | ac_delta_q: [0; 3], |
| 925 | lambda: 0.0, |
| 926 | dist_scale: Default::default(), |
| 927 | me_lambda: 0.0, |
| 928 | me_range_scale: 1, |
| 929 | use_tx_domain_distortion, |
| 930 | use_tx_domain_rate, |
| 931 | idx_in_group_output: 0, |
| 932 | pyramid_level: 0, |
| 933 | enable_early_exit: true, |
| 934 | tx_mode_select: false, |
| 935 | default_filter: FilterMode::REGULAR, |
| 936 | cpu_feature_level: Default::default(), |
| 937 | enable_segmentation: config.speed_settings.segmentation |
| 938 | != SegmentationLevel::Disabled, |
| 939 | enable_inter_txfm_split: config |
| 940 | .speed_settings |
| 941 | .transform |
| 942 | .enable_inter_tx_split, |
| 943 | t35_metadata: Box::new([]), |
| 944 | sequence, |
| 945 | config, |
| 946 | coded_frame_data: None, |
| 947 | } |
| 948 | } |
| 949 | |
| 950 | pub fn new_key_frame( |
| 951 | config: Arc<EncoderConfig>, sequence: Arc<Sequence>, |
| 952 | gop_input_frameno_start: u64, t35_metadata: Box<[T35]>, |
| 953 | ) -> Self { |
| 954 | let tx_mode_select = config.speed_settings.transform.rdo_tx_decision; |
| 955 | let mut fi = Self::new(config, sequence); |
| 956 | fi.input_frameno = gop_input_frameno_start; |
| 957 | fi.tx_mode_select = tx_mode_select; |
| 958 | fi.coded_frame_data = Some(CodedFrameData::new(&fi)); |
| 959 | fi.t35_metadata = t35_metadata; |
| 960 | fi |
| 961 | } |
| 962 | |
| 963 | /// Returns the created `FrameInvariants`, or `None` if this should be |
| 964 | /// a placeholder frame. |
| 965 | pub(crate) fn new_inter_frame( |
| 966 | previous_coded_fi: &Self, inter_cfg: &InterConfig, |
| 967 | gop_input_frameno_start: u64, output_frameno_in_gop: u64, |
| 968 | next_keyframe_input_frameno: u64, error_resilient: bool, |
| 969 | t35_metadata: Box<[T35]>, |
| 970 | ) -> Option<Self> { |
| 971 | let input_frameno = inter_cfg |
| 972 | .get_input_frameno(output_frameno_in_gop, gop_input_frameno_start); |
| 973 | if input_frameno >= next_keyframe_input_frameno { |
| 974 | // This is an invalid frame. We set it as a placeholder in the FI list. |
| 975 | return None; |
| 976 | } |
| 977 | |
| 978 | // We have this special thin clone method to avoid cloning the |
| 979 | // quite large lookahead data for SEFs, when it is not needed. |
| 980 | let mut fi = previous_coded_fi.clone_without_coded_data(); |
| 981 | fi.intra_only = false; |
| 982 | fi.force_integer_mv = 0; // note: should be 1 if fi.intra_only is true |
| 983 | fi.idx_in_group_output = |
| 984 | inter_cfg.get_idx_in_group_output(output_frameno_in_gop); |
| 985 | fi.tx_mode_select = fi.enable_inter_txfm_split; |
| 986 | |
| 987 | let show_existing_frame = |
| 988 | inter_cfg.get_show_existing_frame(fi.idx_in_group_output); |
| 989 | if !show_existing_frame { |
| 990 | fi.coded_frame_data = previous_coded_fi.coded_frame_data.clone(); |
| 991 | } |
| 992 | |
| 993 | fi.order_hint = |
| 994 | inter_cfg.get_order_hint(output_frameno_in_gop, fi.idx_in_group_output); |
| 995 | |
| 996 | fi.pyramid_level = inter_cfg.get_level(fi.idx_in_group_output); |
| 997 | |
| 998 | fi.frame_type = if (inter_cfg.switch_frame_interval > 0) |
| 999 | && (output_frameno_in_gop % inter_cfg.switch_frame_interval == 0) |
| 1000 | && (fi.pyramid_level == 0) |
| 1001 | { |
| 1002 | FrameType::SWITCH |
| 1003 | } else { |
| 1004 | FrameType::INTER |
| 1005 | }; |
| 1006 | fi.error_resilient = |
| 1007 | if fi.frame_type == FrameType::SWITCH { true } else { error_resilient }; |
| 1008 | |
| 1009 | fi.frame_size_override_flag = if fi.frame_type == FrameType::SWITCH { |
| 1010 | true |
| 1011 | } else if fi.sequence.reduced_still_picture_hdr { |
| 1012 | false |
| 1013 | } else if fi.frame_type == FrameType::INTER |
| 1014 | && !fi.error_resilient |
| 1015 | && fi.render_and_frame_size_different |
| 1016 | { |
| 1017 | // force frame_size_with_refs() code path if render size != frame size |
| 1018 | true |
| 1019 | } else { |
| 1020 | fi.width as u32 != fi.sequence.max_frame_width |
| 1021 | || fi.height as u32 != fi.sequence.max_frame_height |
| 1022 | }; |
| 1023 | |
| 1024 | // this is the slot that the current frame is going to be saved into |
| 1025 | let slot_idx = inter_cfg.get_slot_idx(fi.pyramid_level, fi.order_hint); |
| 1026 | fi.show_frame = inter_cfg.get_show_frame(fi.idx_in_group_output); |
| 1027 | fi.t35_metadata = if fi.show_frame { t35_metadata } else { Box::new([]) }; |
| 1028 | fi.frame_to_show_map_idx = slot_idx; |
| 1029 | fi.refresh_frame_flags = if fi.frame_type == FrameType::SWITCH { |
| 1030 | ALL_REF_FRAMES_MASK |
| 1031 | } else if fi.is_show_existing_frame() { |
| 1032 | 0 |
| 1033 | } else { |
| 1034 | 1 << slot_idx |
| 1035 | }; |
| 1036 | |
| 1037 | let second_ref_frame = |
| 1038 | if fi.idx_in_group_output == 0 { LAST2_FRAME } else { ALTREF_FRAME }; |
| 1039 | let ref_in_previous_group = LAST3_FRAME; |
| 1040 | |
| 1041 | // reuse probability estimates from previous frames only in top level frames |
| 1042 | fi.primary_ref_frame = if fi.error_resilient || (fi.pyramid_level > 2) { |
| 1043 | PRIMARY_REF_NONE |
| 1044 | } else { |
| 1045 | (ref_in_previous_group.to_index()) as u32 |
| 1046 | }; |
| 1047 | |
| 1048 | if fi.pyramid_level == 0 { |
| 1049 | // level 0 has no forward references |
| 1050 | // default to last P frame |
| 1051 | fi.ref_frames = [ |
| 1052 | // calculations done relative to the slot_idx for this frame. |
| 1053 | // the last four frames can be found by subtracting from the current slot_idx |
| 1054 | // add 4 to prevent underflow |
| 1055 | // TODO: maybe use order_hint here like in get_slot_idx? |
| 1056 | // this is the previous P frame |
| 1057 | (slot_idx + 4 - 1) as u8 % 4 |
| 1058 | ; INTER_REFS_PER_FRAME]; |
| 1059 | if inter_cfg.multiref { |
| 1060 | // use the second-previous p frame as a second reference frame |
| 1061 | fi.ref_frames[second_ref_frame.to_index()] = |
| 1062 | (slot_idx + 4 - 2) as u8 % 4; |
| 1063 | } |
| 1064 | } else { |
| 1065 | debug_assert!(inter_cfg.multiref); |
| 1066 | |
| 1067 | // fill in defaults |
| 1068 | // default to backwards reference in lower level |
| 1069 | fi.ref_frames = [{ |
| 1070 | let oh = fi.order_hint |
| 1071 | - (inter_cfg.group_input_len as u32 >> fi.pyramid_level); |
| 1072 | let lvl1 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth); |
| 1073 | if lvl1 == 0 { |
| 1074 | ((oh >> inter_cfg.pyramid_depth) % 4) as u8 |
| 1075 | } else { |
| 1076 | 3 + lvl1 as u8 |
| 1077 | } |
| 1078 | }; INTER_REFS_PER_FRAME]; |
| 1079 | // use forward reference in lower level as a second reference frame |
| 1080 | fi.ref_frames[second_ref_frame.to_index()] = { |
| 1081 | let oh = fi.order_hint |
| 1082 | + (inter_cfg.group_input_len as u32 >> fi.pyramid_level); |
| 1083 | let lvl2 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth); |
| 1084 | if lvl2 == 0 { |
| 1085 | ((oh >> inter_cfg.pyramid_depth) % 4) as u8 |
| 1086 | } else { |
| 1087 | 3 + lvl2 as u8 |
| 1088 | } |
| 1089 | }; |
| 1090 | // use a reference to the previous frame in the same level |
| 1091 | // (horizontally) as a third reference |
| 1092 | fi.ref_frames[ref_in_previous_group.to_index()] = slot_idx as u8; |
| 1093 | } |
| 1094 | |
| 1095 | fi.set_ref_frame_sign_bias(); |
| 1096 | |
| 1097 | fi.reference_mode = if inter_cfg.multiref && fi.idx_in_group_output != 0 { |
| 1098 | ReferenceMode::SELECT |
| 1099 | } else { |
| 1100 | ReferenceMode::SINGLE |
| 1101 | }; |
| 1102 | fi.input_frameno = input_frameno; |
| 1103 | fi.me_range_scale = (inter_cfg.group_input_len >> fi.pyramid_level) as u8; |
| 1104 | |
| 1105 | if fi.show_frame || fi.showable_frame { |
| 1106 | let cur_frame_time = fi.frame_timestamp(); |
| 1107 | // Increment the film grain seed for the next frame |
| 1108 | if let Some(params) = |
| 1109 | Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time) |
| 1110 | { |
| 1111 | params.random_seed = params.random_seed.wrapping_add(3248); |
| 1112 | if params.random_seed == 0 { |
| 1113 | params.random_seed = DEFAULT_GRAIN_SEED; |
| 1114 | } |
| 1115 | } |
| 1116 | } |
| 1117 | |
| 1118 | Some(fi) |
| 1119 | } |
| 1120 | |
| 1121 | pub fn is_show_existing_frame(&self) -> bool { |
| 1122 | self.coded_frame_data.is_none() |
| 1123 | } |
| 1124 | |
| 1125 | pub fn clone_without_coded_data(&self) -> Self { |
| 1126 | Self { |
| 1127 | coded_frame_data: None, |
| 1128 | |
| 1129 | sequence: self.sequence.clone(), |
| 1130 | config: self.config.clone(), |
| 1131 | width: self.width, |
| 1132 | height: self.height, |
| 1133 | render_width: self.render_width, |
| 1134 | render_height: self.render_height, |
| 1135 | frame_size_override_flag: self.frame_size_override_flag, |
| 1136 | render_and_frame_size_different: self.render_and_frame_size_different, |
| 1137 | sb_width: self.sb_width, |
| 1138 | sb_height: self.sb_height, |
| 1139 | w_in_b: self.w_in_b, |
| 1140 | h_in_b: self.h_in_b, |
| 1141 | input_frameno: self.input_frameno, |
| 1142 | order_hint: self.order_hint, |
| 1143 | show_frame: self.show_frame, |
| 1144 | showable_frame: self.showable_frame, |
| 1145 | error_resilient: self.error_resilient, |
| 1146 | intra_only: self.intra_only, |
| 1147 | allow_high_precision_mv: self.allow_high_precision_mv, |
| 1148 | frame_type: self.frame_type, |
| 1149 | frame_to_show_map_idx: self.frame_to_show_map_idx, |
| 1150 | use_reduced_tx_set: self.use_reduced_tx_set, |
| 1151 | reference_mode: self.reference_mode, |
| 1152 | use_prev_frame_mvs: self.use_prev_frame_mvs, |
| 1153 | partition_range: self.partition_range, |
| 1154 | globalmv_transformation_type: self.globalmv_transformation_type, |
| 1155 | num_tg: self.num_tg, |
| 1156 | large_scale_tile: self.large_scale_tile, |
| 1157 | disable_cdf_update: self.disable_cdf_update, |
| 1158 | allow_screen_content_tools: self.allow_screen_content_tools, |
| 1159 | force_integer_mv: self.force_integer_mv, |
| 1160 | primary_ref_frame: self.primary_ref_frame, |
| 1161 | refresh_frame_flags: self.refresh_frame_flags, |
| 1162 | allow_intrabc: self.allow_intrabc, |
| 1163 | use_ref_frame_mvs: self.use_ref_frame_mvs, |
| 1164 | is_filter_switchable: self.is_filter_switchable, |
| 1165 | is_motion_mode_switchable: self.is_motion_mode_switchable, |
| 1166 | disable_frame_end_update_cdf: self.disable_frame_end_update_cdf, |
| 1167 | allow_warped_motion: self.allow_warped_motion, |
| 1168 | cdef_search_method: self.cdef_search_method, |
| 1169 | cdef_damping: self.cdef_damping, |
| 1170 | cdef_bits: self.cdef_bits, |
| 1171 | cdef_y_strengths: self.cdef_y_strengths, |
| 1172 | cdef_uv_strengths: self.cdef_uv_strengths, |
| 1173 | delta_q_present: self.delta_q_present, |
| 1174 | ref_frames: self.ref_frames, |
| 1175 | ref_frame_sign_bias: self.ref_frame_sign_bias, |
| 1176 | rec_buffer: self.rec_buffer.clone(), |
| 1177 | base_q_idx: self.base_q_idx, |
| 1178 | dc_delta_q: self.dc_delta_q, |
| 1179 | ac_delta_q: self.ac_delta_q, |
| 1180 | lambda: self.lambda, |
| 1181 | me_lambda: self.me_lambda, |
| 1182 | dist_scale: self.dist_scale, |
| 1183 | me_range_scale: self.me_range_scale, |
| 1184 | use_tx_domain_distortion: self.use_tx_domain_distortion, |
| 1185 | use_tx_domain_rate: self.use_tx_domain_rate, |
| 1186 | idx_in_group_output: self.idx_in_group_output, |
| 1187 | pyramid_level: self.pyramid_level, |
| 1188 | enable_early_exit: self.enable_early_exit, |
| 1189 | tx_mode_select: self.tx_mode_select, |
| 1190 | enable_inter_txfm_split: self.enable_inter_txfm_split, |
| 1191 | default_filter: self.default_filter, |
| 1192 | enable_segmentation: self.enable_segmentation, |
| 1193 | t35_metadata: self.t35_metadata.clone(), |
| 1194 | cpu_feature_level: self.cpu_feature_level, |
| 1195 | } |
| 1196 | } |
| 1197 | |
| 1198 | pub fn set_ref_frame_sign_bias(&mut self) { |
| 1199 | for i in 0..INTER_REFS_PER_FRAME { |
| 1200 | self.ref_frame_sign_bias[i] = if !self.sequence.enable_order_hint { |
| 1201 | false |
| 1202 | } else if let Some(ref rec) = |
| 1203 | self.rec_buffer.frames[self.ref_frames[i] as usize] |
| 1204 | { |
| 1205 | let hint = rec.order_hint; |
| 1206 | self.sequence.get_relative_dist(hint, self.order_hint) > 0 |
| 1207 | } else { |
| 1208 | false |
| 1209 | }; |
| 1210 | } |
| 1211 | } |
| 1212 | |
| 1213 | pub fn get_frame_subtype(&self) -> usize { |
| 1214 | if self.frame_type == FrameType::KEY { |
| 1215 | FRAME_SUBTYPE_I |
| 1216 | } else { |
| 1217 | FRAME_SUBTYPE_P + (self.pyramid_level as usize) |
| 1218 | } |
| 1219 | } |
| 1220 | |
| 1221 | fn pick_strength_from_q(&mut self, qps: &QuantizerParameters) { |
| 1222 | self.cdef_damping = 3 + (self.base_q_idx >> 6); |
| 1223 | let q = bexp64(qps.log_target_q + q57(QSCALE)) as f32; |
| 1224 | /* These coefficients were trained on libaom. */ |
| 1225 | let (y_f1, y_f2, uv_f1, uv_f2) = if !self.intra_only { |
| 1226 | ( |
| 1227 | poly2(q, -0.0000023593946_f32, 0.0068615186_f32, 0.02709886_f32, 15), |
| 1228 | poly2(q, -0.00000057629734_f32, 0.0013993345_f32, 0.03831067_f32, 3), |
| 1229 | poly2(q, -0.0000007095069_f32, 0.0034628846_f32, 0.00887099_f32, 15), |
| 1230 | poly2(q, 0.00000023874085_f32, 0.00028223585_f32, 0.05576307_f32, 3), |
| 1231 | ) |
| 1232 | } else { |
| 1233 | ( |
| 1234 | poly2(q, 0.0000033731974_f32, 0.008070594_f32, 0.0187634_f32, 15), |
| 1235 | poly2(q, 0.0000029167343_f32, 0.0027798624_f32, 0.0079405_f32, 3), |
| 1236 | poly2(q, -0.0000130790995_f32, 0.012892405_f32, -0.00748388_f32, 15), |
| 1237 | poly2(q, 0.0000032651783_f32, 0.00035520183_f32, 0.00228092_f32, 3), |
| 1238 | ) |
| 1239 | }; |
| 1240 | self.cdef_y_strengths[0] = (y_f1 * CDEF_SEC_STRENGTHS as i32 + y_f2) as u8; |
| 1241 | self.cdef_uv_strengths[0] = |
| 1242 | (uv_f1 * CDEF_SEC_STRENGTHS as i32 + uv_f2) as u8; |
| 1243 | } |
| 1244 | |
| 1245 | pub fn set_quantizers(&mut self, qps: &QuantizerParameters) { |
| 1246 | self.base_q_idx = qps.ac_qi[0]; |
| 1247 | let base_q_idx = self.base_q_idx as i32; |
| 1248 | for pi in 0..3 { |
| 1249 | self.dc_delta_q[pi] = (qps.dc_qi[pi] as i32 - base_q_idx) as i8; |
| 1250 | self.ac_delta_q[pi] = (qps.ac_qi[pi] as i32 - base_q_idx) as i8; |
| 1251 | } |
| 1252 | self.lambda = |
| 1253 | qps.lambda * ((1 << (2 * (self.sequence.bit_depth - 8))) as f64); |
| 1254 | self.me_lambda = self.lambda.sqrt(); |
| 1255 | self.dist_scale = qps.dist_scale.map(DistortionScale::from); |
| 1256 | |
| 1257 | match self.cdef_search_method { |
| 1258 | CDEFSearchMethod::PickFromQ => { |
| 1259 | self.pick_strength_from_q(qps); |
| 1260 | } |
| 1261 | // TODO: implement FastSearch and FullSearch |
| 1262 | _ => unreachable!(), |
| 1263 | } |
| 1264 | } |
| 1265 | |
| 1266 | #[inline (always)] |
| 1267 | pub fn sb_size_log2(&self) -> usize { |
| 1268 | self.sequence.tiling.sb_size_log2 |
| 1269 | } |
| 1270 | |
| 1271 | pub fn film_grain_params(&self) -> Option<&GrainTableSegment> { |
| 1272 | if !(self.show_frame || self.showable_frame) { |
| 1273 | return None; |
| 1274 | } |
| 1275 | let cur_frame_time = self.frame_timestamp(); |
| 1276 | self.config.get_film_grain_at(cur_frame_time) |
| 1277 | } |
| 1278 | |
| 1279 | pub fn frame_timestamp(&self) -> u64 { |
| 1280 | // I don't know why this is the base unit for a timestamp but it is. 1/10000000 of a second. |
| 1281 | const TIMESTAMP_BASE_UNIT: u64 = 10_000_000; |
| 1282 | |
| 1283 | self.input_frameno * TIMESTAMP_BASE_UNIT * self.sequence.time_base.num |
| 1284 | / self.sequence.time_base.den |
| 1285 | } |
| 1286 | } |
| 1287 | |
| 1288 | impl<T: Pixel> fmt::Display for FrameInvariants<T> { |
| 1289 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 1290 | write!(f, "Input Frame {} - {}" , self.input_frameno, self.frame_type) |
| 1291 | } |
| 1292 | } |
| 1293 | |
| 1294 | /// # Errors |
| 1295 | /// |
| 1296 | /// - If the frame packet cannot be written to |
| 1297 | pub fn write_temporal_delimiter(packet: &mut dyn io::Write) -> io::Result<()> { |
| 1298 | packet.write_all(&TEMPORAL_DELIMITER)?; |
| 1299 | Ok(()) |
| 1300 | } |
| 1301 | |
| 1302 | fn write_key_frame_obus<T: Pixel>( |
| 1303 | packet: &mut dyn io::Write, fi: &FrameInvariants<T>, obu_extension: u32, |
| 1304 | ) -> io::Result<()> { |
| 1305 | let mut buf1 = Vec::new(); |
| 1306 | let mut buf2 = Vec::new(); |
| 1307 | { |
| 1308 | let mut bw2 = BitWriter::endian(&mut buf2, BigEndian); |
| 1309 | bw2.write_sequence_header_obu(fi)?; |
| 1310 | bw2.write_bit(true)?; // trailing bit |
| 1311 | bw2.byte_align()?; |
| 1312 | } |
| 1313 | |
| 1314 | { |
| 1315 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 1316 | bw1.write_obu_header(ObuType::OBU_SEQUENCE_HEADER, obu_extension)?; |
| 1317 | } |
| 1318 | packet.write_all(&buf1).unwrap(); |
| 1319 | buf1.clear(); |
| 1320 | |
| 1321 | { |
| 1322 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 1323 | bw1.write_uleb128(buf2.len() as u64)?; |
| 1324 | } |
| 1325 | |
| 1326 | packet.write_all(&buf1).unwrap(); |
| 1327 | buf1.clear(); |
| 1328 | |
| 1329 | packet.write_all(&buf2).unwrap(); |
| 1330 | buf2.clear(); |
| 1331 | |
| 1332 | if fi.sequence.content_light.is_some() { |
| 1333 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 1334 | bw1.write_sequence_metadata_obu( |
| 1335 | ObuMetaType::OBU_META_HDR_CLL, |
| 1336 | &fi.sequence, |
| 1337 | )?; |
| 1338 | packet.write_all(&buf1).unwrap(); |
| 1339 | buf1.clear(); |
| 1340 | } |
| 1341 | |
| 1342 | if fi.sequence.mastering_display.is_some() { |
| 1343 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 1344 | bw1.write_sequence_metadata_obu( |
| 1345 | ObuMetaType::OBU_META_HDR_MDCV, |
| 1346 | &fi.sequence, |
| 1347 | )?; |
| 1348 | packet.write_all(&buf1).unwrap(); |
| 1349 | buf1.clear(); |
| 1350 | } |
| 1351 | |
| 1352 | Ok(()) |
| 1353 | } |
| 1354 | |
| 1355 | /// Write into `dst` the difference between the blocks at `src1` and `src2` |
| 1356 | fn diff<T: Pixel>( |
| 1357 | dst: &mut [MaybeUninit<i16>], src1: &PlaneRegion<'_, T>, |
| 1358 | src2: &PlaneRegion<'_, T>, |
| 1359 | ) { |
| 1360 | debug_assert!(dst.len() % src1.rect().width == 0); |
| 1361 | debug_assert_eq!(src1.rows_iter().count(), src1.rect().height); |
| 1362 | |
| 1363 | let width: usize = src1.rect().width; |
| 1364 | let height: usize = src1.rect().height; |
| 1365 | |
| 1366 | if width == 0 |
| 1367 | || width != src2.rect().width |
| 1368 | || height == 0 |
| 1369 | || src1.rows_iter().len() != src2.rows_iter().len() |
| 1370 | { |
| 1371 | debug_assert!(false); |
| 1372 | return; |
| 1373 | } |
| 1374 | |
| 1375 | for ((l: &mut [MaybeUninit], s1: &[T]), s2: &[T]) in |
| 1376 | dst.chunks_exact_mut(chunk_size:width).zip(src1.rows_iter()).zip(src2.rows_iter()) |
| 1377 | { |
| 1378 | for ((r: &mut MaybeUninit, v1: &T), v2: &T) in l.iter_mut().zip(s1).zip(s2) { |
| 1379 | r.write(val:i16::cast_from(*v1) - i16::cast_from(*v2)); |
| 1380 | } |
| 1381 | } |
| 1382 | } |
| 1383 | |
| 1384 | fn get_qidx<T: Pixel>( |
| 1385 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, cw: &ContextWriter, |
| 1386 | tile_bo: TileBlockOffset, |
| 1387 | ) -> u8 { |
| 1388 | let mut qidx: u8 = fi.base_q_idx; |
| 1389 | let sidx: usize = cw.bc.blocks[tile_bo].segmentation_idx as usize; |
| 1390 | if ts.segmentation.features[sidx][SegLvl::SEG_LVL_ALT_Q as usize] { |
| 1391 | let delta: i16 = ts.segmentation.data[sidx][SegLvl::SEG_LVL_ALT_Q as usize]; |
| 1392 | qidx = clamp((qidx as i16) + delta, min:0, max:255) as u8; |
| 1393 | } |
| 1394 | qidx |
| 1395 | } |
| 1396 | |
| 1397 | /// For a transform block, |
| 1398 | /// predict, transform, quantize, write coefficients to a bitstream, |
| 1399 | /// dequantize, inverse-transform. |
| 1400 | /// |
| 1401 | /// # Panics |
| 1402 | /// |
| 1403 | /// - If the block size is invalid for subsampling |
| 1404 | /// - If a tx type other than DCT is used for 64x64 blocks |
| 1405 | pub fn encode_tx_block<T: Pixel, W: Writer>( |
| 1406 | fi: &FrameInvariants<T>, |
| 1407 | ts: &mut TileStateMut<'_, T>, |
| 1408 | cw: &mut ContextWriter, |
| 1409 | w: &mut W, |
| 1410 | p: usize, |
| 1411 | // Offset in the luma plane of the partition enclosing this block. |
| 1412 | tile_partition_bo: TileBlockOffset, |
| 1413 | // tx block position within a partition, unit: tx block number |
| 1414 | bx: usize, |
| 1415 | by: usize, |
| 1416 | // Offset in the luma plane where this tx block is colocated. Note that for |
| 1417 | // a chroma block, this offset might be outside of the current partition. |
| 1418 | // For example in 4:2:0, four 4x4 luma partitions share one 4x4 chroma block, |
| 1419 | // this block is part of the last 4x4 partition, but its `tx_bo` offset |
| 1420 | // matches the offset of the first 4x4 partition. |
| 1421 | tx_bo: TileBlockOffset, |
| 1422 | mode: PredictionMode, |
| 1423 | tx_size: TxSize, |
| 1424 | tx_type: TxType, |
| 1425 | bsize: BlockSize, |
| 1426 | po: PlaneOffset, |
| 1427 | skip: bool, |
| 1428 | qidx: u8, |
| 1429 | ac: &[i16], |
| 1430 | pred_intra_param: IntraParam, |
| 1431 | rdo_type: RDOType, |
| 1432 | need_recon_pixel: bool, |
| 1433 | ) -> (bool, ScaledDistortion) { |
| 1434 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[p].cfg; |
| 1435 | let tile_rect = ts.tile_rect().decimated(xdec, ydec); |
| 1436 | let area = Area::BlockRect { |
| 1437 | bo: tx_bo.0, |
| 1438 | width: tx_size.width(), |
| 1439 | height: tx_size.height(), |
| 1440 | }; |
| 1441 | |
| 1442 | if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height { |
| 1443 | return (false, ScaledDistortion::zero()); |
| 1444 | } |
| 1445 | |
| 1446 | debug_assert!(tx_bo.0.x < ts.mi_width); |
| 1447 | debug_assert!(tx_bo.0.y < ts.mi_height); |
| 1448 | |
| 1449 | debug_assert!( |
| 1450 | tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT |
| 1451 | ); |
| 1452 | |
| 1453 | let plane_bsize = bsize.subsampled_size(xdec, ydec).unwrap(); |
| 1454 | |
| 1455 | debug_assert!(p != 0 || !mode.is_intra() || tx_size.block_size() == plane_bsize || need_recon_pixel, |
| 1456 | "mode.is_intra()= {:#?}, plane= {:#?}, tx_size.block_size()= {:#?}, plane_bsize= {:#?}, need_recon_pixel= {:#?}" , |
| 1457 | mode.is_intra(), p, tx_size.block_size(), plane_bsize, need_recon_pixel); |
| 1458 | |
| 1459 | let ief_params = if mode.is_directional() |
| 1460 | && fi.sequence.enable_intra_edge_filter |
| 1461 | { |
| 1462 | let (plane_xdec, plane_ydec) = if p == 0 { (0, 0) } else { (xdec, ydec) }; |
| 1463 | let above_block_info = |
| 1464 | ts.above_block_info(tile_partition_bo, plane_xdec, plane_ydec); |
| 1465 | let left_block_info = |
| 1466 | ts.left_block_info(tile_partition_bo, plane_xdec, plane_ydec); |
| 1467 | Some(IntraEdgeFilterParameters::new(p, above_block_info, left_block_info)) |
| 1468 | } else { |
| 1469 | None |
| 1470 | }; |
| 1471 | |
| 1472 | let frame_bo = ts.to_frame_block_offset(tx_bo); |
| 1473 | let rec = &mut ts.rec.planes[p]; |
| 1474 | |
| 1475 | if mode.is_intra() { |
| 1476 | let bit_depth = fi.sequence.bit_depth; |
| 1477 | let mut edge_buf = Aligned::uninit_array(); |
| 1478 | let edge_buf = get_intra_edges( |
| 1479 | &mut edge_buf, |
| 1480 | &rec.as_const(), |
| 1481 | tile_partition_bo, |
| 1482 | bx, |
| 1483 | by, |
| 1484 | bsize, |
| 1485 | po, |
| 1486 | tx_size, |
| 1487 | bit_depth, |
| 1488 | Some(mode), |
| 1489 | fi.sequence.enable_intra_edge_filter, |
| 1490 | pred_intra_param, |
| 1491 | ); |
| 1492 | |
| 1493 | mode.predict_intra( |
| 1494 | tile_rect, |
| 1495 | &mut rec.subregion_mut(area), |
| 1496 | tx_size, |
| 1497 | bit_depth, |
| 1498 | ac, |
| 1499 | pred_intra_param, |
| 1500 | ief_params, |
| 1501 | &edge_buf, |
| 1502 | fi.cpu_feature_level, |
| 1503 | ); |
| 1504 | } |
| 1505 | |
| 1506 | if skip { |
| 1507 | return (false, ScaledDistortion::zero()); |
| 1508 | } |
| 1509 | |
| 1510 | let coded_tx_area = av1_get_coded_tx_size(tx_size).area(); |
| 1511 | let mut residual = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array(); |
| 1512 | let mut coeffs = Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array(); |
| 1513 | let mut qcoeffs = |
| 1514 | Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array(); |
| 1515 | let mut rcoeffs = |
| 1516 | Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array(); |
| 1517 | let residual = &mut residual.data[..tx_size.area()]; |
| 1518 | let coeffs = &mut coeffs.data[..tx_size.area()]; |
| 1519 | let qcoeffs = init_slice_repeat_mut( |
| 1520 | &mut qcoeffs.data[..coded_tx_area], |
| 1521 | T::Coeff::cast_from(0), |
| 1522 | ); |
| 1523 | let rcoeffs = &mut rcoeffs.data[..coded_tx_area]; |
| 1524 | |
| 1525 | let (visible_tx_w, visible_tx_h) = clip_visible_bsize( |
| 1526 | (fi.width + xdec) >> xdec, |
| 1527 | (fi.height + ydec) >> ydec, |
| 1528 | tx_size.block_size(), |
| 1529 | (frame_bo.0.x << MI_SIZE_LOG2) >> xdec, |
| 1530 | (frame_bo.0.y << MI_SIZE_LOG2) >> ydec, |
| 1531 | ); |
| 1532 | |
| 1533 | if visible_tx_w != 0 && visible_tx_h != 0 { |
| 1534 | diff( |
| 1535 | residual, |
| 1536 | &ts.input_tile.planes[p].subregion(area), |
| 1537 | &rec.subregion(area), |
| 1538 | ); |
| 1539 | } else { |
| 1540 | residual.fill(MaybeUninit::new(0)); |
| 1541 | } |
| 1542 | // SAFETY: `diff()` inits `tx_size.area()` elements when it matches size of `subregion(area)` |
| 1543 | let residual = unsafe { slice_assume_init_mut(residual) }; |
| 1544 | |
| 1545 | forward_transform( |
| 1546 | residual, |
| 1547 | coeffs, |
| 1548 | tx_size.width(), |
| 1549 | tx_size, |
| 1550 | tx_type, |
| 1551 | fi.sequence.bit_depth, |
| 1552 | fi.cpu_feature_level, |
| 1553 | ); |
| 1554 | // SAFETY: forward_transform initialized coeffs |
| 1555 | let coeffs = unsafe { slice_assume_init_mut(coeffs) }; |
| 1556 | |
| 1557 | let eob = ts.qc.quantize(coeffs, qcoeffs, tx_size, tx_type); |
| 1558 | |
| 1559 | let has_coeff = if need_recon_pixel || rdo_type.needs_coeff_rate() { |
| 1560 | debug_assert!((((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec) >= 4); |
| 1561 | debug_assert!((((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec) >= 4); |
| 1562 | let frame_clipped_txw: usize = |
| 1563 | (((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec) |
| 1564 | .min(tx_size.width()); |
| 1565 | let frame_clipped_txh: usize = |
| 1566 | (((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec) |
| 1567 | .min(tx_size.height()); |
| 1568 | |
| 1569 | cw.write_coeffs_lv_map( |
| 1570 | w, |
| 1571 | p, |
| 1572 | tx_bo, |
| 1573 | qcoeffs, |
| 1574 | eob, |
| 1575 | mode, |
| 1576 | tx_size, |
| 1577 | tx_type, |
| 1578 | plane_bsize, |
| 1579 | xdec, |
| 1580 | ydec, |
| 1581 | fi.use_reduced_tx_set, |
| 1582 | frame_clipped_txw, |
| 1583 | frame_clipped_txh, |
| 1584 | ) |
| 1585 | } else { |
| 1586 | true |
| 1587 | }; |
| 1588 | |
| 1589 | // Reconstruct |
| 1590 | dequantize( |
| 1591 | qidx, |
| 1592 | qcoeffs, |
| 1593 | eob, |
| 1594 | rcoeffs, |
| 1595 | tx_size, |
| 1596 | fi.sequence.bit_depth, |
| 1597 | fi.dc_delta_q[p], |
| 1598 | fi.ac_delta_q[p], |
| 1599 | fi.cpu_feature_level, |
| 1600 | ); |
| 1601 | // SAFETY: dequantize initialized rcoeffs |
| 1602 | let rcoeffs = unsafe { slice_assume_init_mut(rcoeffs) }; |
| 1603 | |
| 1604 | if eob == 0 { |
| 1605 | // All zero coefficients is a no-op |
| 1606 | } else if !fi.use_tx_domain_distortion || need_recon_pixel { |
| 1607 | inverse_transform_add( |
| 1608 | rcoeffs, |
| 1609 | &mut rec.subregion_mut(area), |
| 1610 | eob, |
| 1611 | tx_size, |
| 1612 | tx_type, |
| 1613 | fi.sequence.bit_depth, |
| 1614 | fi.cpu_feature_level, |
| 1615 | ); |
| 1616 | } |
| 1617 | |
| 1618 | let tx_dist = |
| 1619 | if rdo_type.needs_tx_dist() && visible_tx_w != 0 && visible_tx_h != 0 { |
| 1620 | // Store tx-domain distortion of this block |
| 1621 | // rcoeffs above 32 rows/cols aren't held in the array, because they are |
| 1622 | // always 0. The first 32x32 is stored first in coeffs so we can iterate |
| 1623 | // over coeffs and rcoeffs for the first 32 rows/cols. For the |
| 1624 | // coefficients above 32 rows/cols, we iterate over the rest of coeffs |
| 1625 | // with the assumption that rcoeff coefficients are zero. |
| 1626 | let mut raw_tx_dist = coeffs |
| 1627 | .iter() |
| 1628 | .zip(rcoeffs.iter()) |
| 1629 | .map(|(&a, &b)| { |
| 1630 | let c = i32::cast_from(a) - i32::cast_from(b); |
| 1631 | (c * c) as u64 |
| 1632 | }) |
| 1633 | .sum::<u64>() |
| 1634 | + coeffs[rcoeffs.len()..] |
| 1635 | .iter() |
| 1636 | .map(|&a| { |
| 1637 | let c = i32::cast_from(a); |
| 1638 | (c * c) as u64 |
| 1639 | }) |
| 1640 | .sum::<u64>(); |
| 1641 | |
| 1642 | let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size)); |
| 1643 | let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1); |
| 1644 | |
| 1645 | raw_tx_dist = |
| 1646 | (raw_tx_dist + tx_dist_scale_rounding_offset) >> tx_dist_scale_bits; |
| 1647 | |
| 1648 | if rdo_type == RDOType::TxDistEstRate { |
| 1649 | // look up rate and distortion in table |
| 1650 | let estimated_rate = |
| 1651 | estimate_rate(fi.base_q_idx, tx_size, raw_tx_dist); |
| 1652 | w.add_bits_frac(estimated_rate as u32); |
| 1653 | } |
| 1654 | |
| 1655 | let bias = distortion_scale(fi, ts.to_frame_block_offset(tx_bo), bsize); |
| 1656 | RawDistortion::new(raw_tx_dist) * bias * fi.dist_scale[p] |
| 1657 | } else { |
| 1658 | ScaledDistortion::zero() |
| 1659 | }; |
| 1660 | |
| 1661 | (has_coeff, tx_dist) |
| 1662 | } |
| 1663 | |
| 1664 | /// # Panics |
| 1665 | /// |
| 1666 | /// - If the block size is invalid for subsampling |
| 1667 | #[profiling::function ] |
| 1668 | pub fn motion_compensate<T: Pixel>( |
| 1669 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 1670 | cw: &mut ContextWriter, luma_mode: PredictionMode, ref_frames: [RefType; 2], |
| 1671 | mvs: [MotionVector; 2], bsize: BlockSize, tile_bo: TileBlockOffset, |
| 1672 | luma_only: bool, |
| 1673 | ) { |
| 1674 | debug_assert!(!luma_mode.is_intra()); |
| 1675 | |
| 1676 | let PlaneConfig { xdec: u_xdec, ydec: u_ydec, .. } = ts.input.planes[1].cfg; |
| 1677 | |
| 1678 | // Inter mode prediction can take place once for a whole partition, |
| 1679 | // instead of each tx-block. |
| 1680 | let num_planes = 1 |
| 1681 | + if !luma_only |
| 1682 | && has_chroma( |
| 1683 | tile_bo, |
| 1684 | bsize, |
| 1685 | u_xdec, |
| 1686 | u_ydec, |
| 1687 | fi.sequence.chroma_sampling, |
| 1688 | ) { |
| 1689 | 2 |
| 1690 | } else { |
| 1691 | 0 |
| 1692 | }; |
| 1693 | |
| 1694 | let luma_tile_rect = ts.tile_rect(); |
| 1695 | let compound_buffer = &mut ts.inter_compound_buffers; |
| 1696 | for p in 0..num_planes { |
| 1697 | let plane_bsize = if p == 0 { |
| 1698 | bsize |
| 1699 | } else { |
| 1700 | bsize.subsampled_size(u_xdec, u_ydec).unwrap() |
| 1701 | }; |
| 1702 | |
| 1703 | let rec = &mut ts.rec.planes[p]; |
| 1704 | let po = tile_bo.plane_offset(rec.plane_cfg); |
| 1705 | let &PlaneConfig { xdec, ydec, .. } = rec.plane_cfg; |
| 1706 | let tile_rect = luma_tile_rect.decimated(xdec, ydec); |
| 1707 | |
| 1708 | let area = Area::BlockStartingAt { bo: tile_bo.0 }; |
| 1709 | if p > 0 && bsize < BlockSize::BLOCK_8X8 { |
| 1710 | let mut some_use_intra = false; |
| 1711 | if bsize == BlockSize::BLOCK_4X4 || bsize == BlockSize::BLOCK_4X8 { |
| 1712 | some_use_intra |= |
| 1713 | cw.bc.blocks[tile_bo.with_offset(-1, 0)].mode.is_intra(); |
| 1714 | }; |
| 1715 | if !some_use_intra && bsize == BlockSize::BLOCK_4X4 |
| 1716 | || bsize == BlockSize::BLOCK_8X4 |
| 1717 | { |
| 1718 | some_use_intra |= |
| 1719 | cw.bc.blocks[tile_bo.with_offset(0, -1)].mode.is_intra(); |
| 1720 | }; |
| 1721 | if !some_use_intra && bsize == BlockSize::BLOCK_4X4 { |
| 1722 | some_use_intra |= |
| 1723 | cw.bc.blocks[tile_bo.with_offset(-1, -1)].mode.is_intra(); |
| 1724 | }; |
| 1725 | |
| 1726 | if some_use_intra { |
| 1727 | luma_mode.predict_inter( |
| 1728 | fi, |
| 1729 | tile_rect, |
| 1730 | p, |
| 1731 | po, |
| 1732 | &mut rec.subregion_mut(area), |
| 1733 | plane_bsize.width(), |
| 1734 | plane_bsize.height(), |
| 1735 | ref_frames, |
| 1736 | mvs, |
| 1737 | compound_buffer, |
| 1738 | ); |
| 1739 | } else { |
| 1740 | assert!(u_xdec == 1 && u_ydec == 1); |
| 1741 | // TODO: these are absolutely only valid for 4:2:0 |
| 1742 | if bsize == BlockSize::BLOCK_4X4 { |
| 1743 | let mv0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].mv; |
| 1744 | let rf0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].ref_frames; |
| 1745 | let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv; |
| 1746 | let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames; |
| 1747 | let po1 = PlaneOffset { x: po.x + 2, y: po.y }; |
| 1748 | let area1 = Area::StartingAt { x: po1.x, y: po1.y }; |
| 1749 | let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv; |
| 1750 | let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames; |
| 1751 | let po2 = PlaneOffset { x: po.x, y: po.y + 2 }; |
| 1752 | let area2 = Area::StartingAt { x: po2.x, y: po2.y }; |
| 1753 | let po3 = PlaneOffset { x: po.x + 2, y: po.y + 2 }; |
| 1754 | let area3 = Area::StartingAt { x: po3.x, y: po3.y }; |
| 1755 | luma_mode.predict_inter( |
| 1756 | fi, |
| 1757 | tile_rect, |
| 1758 | p, |
| 1759 | po, |
| 1760 | &mut rec.subregion_mut(area), |
| 1761 | 2, |
| 1762 | 2, |
| 1763 | rf0, |
| 1764 | mv0, |
| 1765 | compound_buffer, |
| 1766 | ); |
| 1767 | luma_mode.predict_inter( |
| 1768 | fi, |
| 1769 | tile_rect, |
| 1770 | p, |
| 1771 | po1, |
| 1772 | &mut rec.subregion_mut(area1), |
| 1773 | 2, |
| 1774 | 2, |
| 1775 | rf1, |
| 1776 | mv1, |
| 1777 | compound_buffer, |
| 1778 | ); |
| 1779 | luma_mode.predict_inter( |
| 1780 | fi, |
| 1781 | tile_rect, |
| 1782 | p, |
| 1783 | po2, |
| 1784 | &mut rec.subregion_mut(area2), |
| 1785 | 2, |
| 1786 | 2, |
| 1787 | rf2, |
| 1788 | mv2, |
| 1789 | compound_buffer, |
| 1790 | ); |
| 1791 | luma_mode.predict_inter( |
| 1792 | fi, |
| 1793 | tile_rect, |
| 1794 | p, |
| 1795 | po3, |
| 1796 | &mut rec.subregion_mut(area3), |
| 1797 | 2, |
| 1798 | 2, |
| 1799 | ref_frames, |
| 1800 | mvs, |
| 1801 | compound_buffer, |
| 1802 | ); |
| 1803 | } |
| 1804 | if bsize == BlockSize::BLOCK_8X4 { |
| 1805 | let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv; |
| 1806 | let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames; |
| 1807 | luma_mode.predict_inter( |
| 1808 | fi, |
| 1809 | tile_rect, |
| 1810 | p, |
| 1811 | po, |
| 1812 | &mut rec.subregion_mut(area), |
| 1813 | 4, |
| 1814 | 2, |
| 1815 | rf1, |
| 1816 | mv1, |
| 1817 | compound_buffer, |
| 1818 | ); |
| 1819 | let po3 = PlaneOffset { x: po.x, y: po.y + 2 }; |
| 1820 | let area3 = Area::StartingAt { x: po3.x, y: po3.y }; |
| 1821 | luma_mode.predict_inter( |
| 1822 | fi, |
| 1823 | tile_rect, |
| 1824 | p, |
| 1825 | po3, |
| 1826 | &mut rec.subregion_mut(area3), |
| 1827 | 4, |
| 1828 | 2, |
| 1829 | ref_frames, |
| 1830 | mvs, |
| 1831 | compound_buffer, |
| 1832 | ); |
| 1833 | } |
| 1834 | if bsize == BlockSize::BLOCK_4X8 { |
| 1835 | let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv; |
| 1836 | let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames; |
| 1837 | luma_mode.predict_inter( |
| 1838 | fi, |
| 1839 | tile_rect, |
| 1840 | p, |
| 1841 | po, |
| 1842 | &mut rec.subregion_mut(area), |
| 1843 | 2, |
| 1844 | 4, |
| 1845 | rf2, |
| 1846 | mv2, |
| 1847 | compound_buffer, |
| 1848 | ); |
| 1849 | let po3 = PlaneOffset { x: po.x + 2, y: po.y }; |
| 1850 | let area3 = Area::StartingAt { x: po3.x, y: po3.y }; |
| 1851 | luma_mode.predict_inter( |
| 1852 | fi, |
| 1853 | tile_rect, |
| 1854 | p, |
| 1855 | po3, |
| 1856 | &mut rec.subregion_mut(area3), |
| 1857 | 2, |
| 1858 | 4, |
| 1859 | ref_frames, |
| 1860 | mvs, |
| 1861 | compound_buffer, |
| 1862 | ); |
| 1863 | } |
| 1864 | } |
| 1865 | } else { |
| 1866 | luma_mode.predict_inter( |
| 1867 | fi, |
| 1868 | tile_rect, |
| 1869 | p, |
| 1870 | po, |
| 1871 | &mut rec.subregion_mut(area), |
| 1872 | plane_bsize.width(), |
| 1873 | plane_bsize.height(), |
| 1874 | ref_frames, |
| 1875 | mvs, |
| 1876 | compound_buffer, |
| 1877 | ); |
| 1878 | } |
| 1879 | } |
| 1880 | } |
| 1881 | |
| 1882 | pub fn save_block_motion<T: Pixel>( |
| 1883 | ts: &mut TileStateMut<'_, T>, bsize: BlockSize, tile_bo: TileBlockOffset, |
| 1884 | ref_frame: usize, mv: MotionVector, |
| 1885 | ) { |
| 1886 | let tile_me_stats: &mut TileMEStatsMut<'_> = &mut ts.me_stats[ref_frame]; |
| 1887 | let tile_bo_x_end: usize = (tile_bo.0.x + bsize.width_mi()).min(ts.mi_width); |
| 1888 | let tile_bo_y_end: usize = (tile_bo.0.y + bsize.height_mi()).min(ts.mi_height); |
| 1889 | for mi_y: usize in tile_bo.0.y..tile_bo_y_end { |
| 1890 | for mi_x: usize in tile_bo.0.x..tile_bo_x_end { |
| 1891 | tile_me_stats[mi_y][mi_x].mv = mv; |
| 1892 | } |
| 1893 | } |
| 1894 | } |
| 1895 | |
| 1896 | #[profiling::function ] |
| 1897 | pub fn encode_block_pre_cdef<T: Pixel, W: Writer>( |
| 1898 | seq: &Sequence, ts: &TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W, |
| 1899 | bsize: BlockSize, tile_bo: TileBlockOffset, skip: bool, |
| 1900 | ) -> bool { |
| 1901 | cw.bc.blocks.set_skip(tile_bo, bsize, skip); |
| 1902 | if ts.segmentation.enabled |
| 1903 | && ts.segmentation.update_map |
| 1904 | && ts.segmentation.preskip |
| 1905 | { |
| 1906 | cw.write_segmentation( |
| 1907 | w, |
| 1908 | tile_bo, |
| 1909 | bsize, |
| 1910 | false, |
| 1911 | ts.segmentation.last_active_segid, |
| 1912 | ); |
| 1913 | } |
| 1914 | cw.write_skip(w, tile_bo, skip); |
| 1915 | if ts.segmentation.enabled |
| 1916 | && ts.segmentation.update_map |
| 1917 | && !ts.segmentation.preskip |
| 1918 | { |
| 1919 | cw.write_segmentation( |
| 1920 | w, |
| 1921 | tile_bo, |
| 1922 | bsize, |
| 1923 | skip, |
| 1924 | ts.segmentation.last_active_segid, |
| 1925 | ); |
| 1926 | } |
| 1927 | if !skip && seq.enable_cdef { |
| 1928 | cw.bc.cdef_coded = true; |
| 1929 | } |
| 1930 | cw.bc.cdef_coded |
| 1931 | } |
| 1932 | |
| 1933 | /// # Panics |
| 1934 | /// |
| 1935 | /// - If chroma and luma do not match for inter modes |
| 1936 | /// - If an invalid motion vector is found |
| 1937 | #[profiling::function ] |
| 1938 | pub fn encode_block_post_cdef<T: Pixel, W: Writer>( |
| 1939 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 1940 | cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode, |
| 1941 | chroma_mode: PredictionMode, angle_delta: AngleDelta, |
| 1942 | ref_frames: [RefType; 2], mvs: [MotionVector; 2], bsize: BlockSize, |
| 1943 | tile_bo: TileBlockOffset, skip: bool, cfl: CFLParams, tx_size: TxSize, |
| 1944 | tx_type: TxType, mode_context: usize, mv_stack: &[CandidateMV], |
| 1945 | rdo_type: RDOType, need_recon_pixel: bool, |
| 1946 | enc_stats: Option<&mut EncoderStats>, |
| 1947 | ) -> (bool, ScaledDistortion) { |
| 1948 | let planes = |
| 1949 | if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 }; |
| 1950 | let is_inter = !luma_mode.is_intra(); |
| 1951 | if is_inter { |
| 1952 | assert!(luma_mode == chroma_mode); |
| 1953 | }; |
| 1954 | let sb_size = if fi.sequence.use_128x128_superblock { |
| 1955 | BlockSize::BLOCK_128X128 |
| 1956 | } else { |
| 1957 | BlockSize::BLOCK_64X64 |
| 1958 | }; |
| 1959 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
| 1960 | if skip { |
| 1961 | cw.bc.reset_skip_context( |
| 1962 | tile_bo, |
| 1963 | bsize, |
| 1964 | xdec, |
| 1965 | ydec, |
| 1966 | fi.sequence.chroma_sampling, |
| 1967 | ); |
| 1968 | } |
| 1969 | cw.bc.blocks.set_block_size(tile_bo, bsize); |
| 1970 | cw.bc.blocks.set_mode(tile_bo, bsize, luma_mode); |
| 1971 | cw.bc.blocks.set_tx_size(tile_bo, bsize, tx_size); |
| 1972 | cw.bc.blocks.set_ref_frames(tile_bo, bsize, ref_frames); |
| 1973 | cw.bc.blocks.set_motion_vectors(tile_bo, bsize, mvs); |
| 1974 | |
| 1975 | //write_q_deltas(); |
| 1976 | if cw.bc.code_deltas |
| 1977 | && ts.deblock.block_deltas_enabled |
| 1978 | && (bsize < sb_size || !skip) |
| 1979 | { |
| 1980 | cw.write_block_deblock_deltas( |
| 1981 | w, |
| 1982 | tile_bo, |
| 1983 | ts.deblock.block_delta_multi, |
| 1984 | planes, |
| 1985 | ); |
| 1986 | } |
| 1987 | cw.bc.code_deltas = false; |
| 1988 | |
| 1989 | if fi.frame_type.has_inter() { |
| 1990 | cw.write_is_inter(w, tile_bo, is_inter); |
| 1991 | if is_inter { |
| 1992 | cw.fill_neighbours_ref_counts(tile_bo); |
| 1993 | cw.write_ref_frames(w, fi, tile_bo); |
| 1994 | |
| 1995 | if luma_mode.is_compound() { |
| 1996 | cw.write_compound_mode(w, luma_mode, mode_context); |
| 1997 | } else { |
| 1998 | cw.write_inter_mode(w, luma_mode, mode_context); |
| 1999 | } |
| 2000 | |
| 2001 | let ref_mv_idx = 0; |
| 2002 | let num_mv_found = mv_stack.len(); |
| 2003 | |
| 2004 | if luma_mode == PredictionMode::NEWMV |
| 2005 | || luma_mode == PredictionMode::NEW_NEWMV |
| 2006 | { |
| 2007 | if luma_mode == PredictionMode::NEW_NEWMV { |
| 2008 | assert!(num_mv_found >= 2); |
| 2009 | } |
| 2010 | for idx in 0..2 { |
| 2011 | if num_mv_found > idx + 1 { |
| 2012 | let drl_mode = ref_mv_idx > idx; |
| 2013 | let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize |
| 2014 | + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize; |
| 2015 | cw.write_drl_mode(w, drl_mode, ctx); |
| 2016 | if !drl_mode { |
| 2017 | break; |
| 2018 | } |
| 2019 | } |
| 2020 | } |
| 2021 | } |
| 2022 | |
| 2023 | let ref_mvs = if num_mv_found > 0 { |
| 2024 | [mv_stack[ref_mv_idx].this_mv, mv_stack[ref_mv_idx].comp_mv] |
| 2025 | } else { |
| 2026 | [MotionVector::default(); 2] |
| 2027 | }; |
| 2028 | |
| 2029 | let mv_precision = if fi.force_integer_mv != 0 { |
| 2030 | MvSubpelPrecision::MV_SUBPEL_NONE |
| 2031 | } else if fi.allow_high_precision_mv { |
| 2032 | MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION |
| 2033 | } else { |
| 2034 | MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION |
| 2035 | }; |
| 2036 | |
| 2037 | if luma_mode == PredictionMode::NEWMV |
| 2038 | || luma_mode == PredictionMode::NEW_NEWMV |
| 2039 | || luma_mode == PredictionMode::NEW_NEARESTMV |
| 2040 | { |
| 2041 | cw.write_mv(w, mvs[0], ref_mvs[0], mv_precision); |
| 2042 | } |
| 2043 | if luma_mode == PredictionMode::NEW_NEWMV |
| 2044 | || luma_mode == PredictionMode::NEAREST_NEWMV |
| 2045 | { |
| 2046 | cw.write_mv(w, mvs[1], ref_mvs[1], mv_precision); |
| 2047 | } |
| 2048 | |
| 2049 | if luma_mode.has_nearmv() { |
| 2050 | let ref_mv_idx = luma_mode.ref_mv_idx(); |
| 2051 | if luma_mode != PredictionMode::NEAR0MV { |
| 2052 | assert!(num_mv_found > ref_mv_idx); |
| 2053 | } |
| 2054 | |
| 2055 | for idx in 1..3 { |
| 2056 | if num_mv_found > idx + 1 { |
| 2057 | let drl_mode = ref_mv_idx > idx; |
| 2058 | let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize |
| 2059 | + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize; |
| 2060 | |
| 2061 | cw.write_drl_mode(w, drl_mode, ctx); |
| 2062 | if !drl_mode { |
| 2063 | break; |
| 2064 | } |
| 2065 | } |
| 2066 | } |
| 2067 | if mv_stack.len() > 1 { |
| 2068 | assert!(mv_stack[ref_mv_idx].this_mv.row == mvs[0].row); |
| 2069 | assert!(mv_stack[ref_mv_idx].this_mv.col == mvs[0].col); |
| 2070 | } else { |
| 2071 | assert!(0 == mvs[0].row); |
| 2072 | assert!(0 == mvs[0].col); |
| 2073 | } |
| 2074 | } else if luma_mode == PredictionMode::NEARESTMV { |
| 2075 | if mv_stack.is_empty() { |
| 2076 | assert_eq!(mvs[0].row, 0); |
| 2077 | assert_eq!(mvs[0].col, 0); |
| 2078 | } else { |
| 2079 | assert_eq!(mvs[0].row, mv_stack[0].this_mv.row); |
| 2080 | assert_eq!(mvs[0].col, mv_stack[0].this_mv.col); |
| 2081 | } |
| 2082 | } |
| 2083 | } else { |
| 2084 | cw.write_intra_mode(w, bsize, luma_mode); |
| 2085 | } |
| 2086 | } else { |
| 2087 | cw.write_intra_mode_kf(w, tile_bo, luma_mode); |
| 2088 | } |
| 2089 | |
| 2090 | if !is_inter { |
| 2091 | if luma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 { |
| 2092 | cw.write_angle_delta(w, angle_delta.y, luma_mode); |
| 2093 | } |
| 2094 | if has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling) { |
| 2095 | cw.write_intra_uv_mode(w, chroma_mode, luma_mode, bsize); |
| 2096 | if chroma_mode.is_cfl() { |
| 2097 | assert!(bsize.cfl_allowed()); |
| 2098 | cw.write_cfl_alphas(w, cfl); |
| 2099 | } |
| 2100 | if chroma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 { |
| 2101 | cw.write_angle_delta(w, angle_delta.uv, chroma_mode); |
| 2102 | } |
| 2103 | } |
| 2104 | |
| 2105 | if fi.allow_screen_content_tools > 0 |
| 2106 | && bsize >= BlockSize::BLOCK_8X8 |
| 2107 | && bsize.width() <= 64 |
| 2108 | && bsize.height() <= 64 |
| 2109 | { |
| 2110 | cw.write_use_palette_mode( |
| 2111 | w, |
| 2112 | false, |
| 2113 | bsize, |
| 2114 | tile_bo, |
| 2115 | luma_mode, |
| 2116 | chroma_mode, |
| 2117 | xdec, |
| 2118 | ydec, |
| 2119 | fi.sequence.chroma_sampling, |
| 2120 | ); |
| 2121 | } |
| 2122 | |
| 2123 | if fi.sequence.enable_filter_intra |
| 2124 | && luma_mode == PredictionMode::DC_PRED |
| 2125 | && bsize.width() <= 32 |
| 2126 | && bsize.height() <= 32 |
| 2127 | { |
| 2128 | cw.write_use_filter_intra(w, false, bsize); // turn off FILTER_INTRA |
| 2129 | } |
| 2130 | } |
| 2131 | |
| 2132 | // write tx_size here |
| 2133 | if fi.tx_mode_select { |
| 2134 | if bsize > BlockSize::BLOCK_4X4 && (!is_inter || !skip) { |
| 2135 | if !is_inter { |
| 2136 | cw.write_tx_size_intra(w, tile_bo, bsize, tx_size); |
| 2137 | cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, false); |
| 2138 | } else { |
| 2139 | // write var_tx_size |
| 2140 | // if here, bsize > BLOCK_4X4 && is_inter && !skip && !Lossless |
| 2141 | debug_assert!(fi.tx_mode_select); |
| 2142 | debug_assert!(bsize > BlockSize::BLOCK_4X4); |
| 2143 | debug_assert!(is_inter); |
| 2144 | debug_assert!(!skip); |
| 2145 | let max_tx_size = max_txsize_rect_lookup[bsize as usize]; |
| 2146 | debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64); |
| 2147 | |
| 2148 | //TODO: "&& tx_size.block_size() < bsize" will be replaced with tx-split info for a partition |
| 2149 | // once it is available. |
| 2150 | let txfm_split = |
| 2151 | fi.enable_inter_txfm_split && tx_size.block_size() < bsize; |
| 2152 | |
| 2153 | // TODO: Revise write_tx_size_inter() for txfm_split = true |
| 2154 | cw.write_tx_size_inter( |
| 2155 | w, |
| 2156 | tile_bo, |
| 2157 | bsize, |
| 2158 | max_tx_size, |
| 2159 | txfm_split, |
| 2160 | 0, |
| 2161 | 0, |
| 2162 | 0, |
| 2163 | ); |
| 2164 | } |
| 2165 | } else { |
| 2166 | debug_assert!(bsize == BlockSize::BLOCK_4X4 || (is_inter && skip)); |
| 2167 | cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, is_inter && skip); |
| 2168 | } |
| 2169 | } |
| 2170 | |
| 2171 | if let Some(enc_stats) = enc_stats { |
| 2172 | let pixels = tx_size.area(); |
| 2173 | enc_stats.block_size_counts[bsize as usize] += pixels; |
| 2174 | enc_stats.tx_type_counts[tx_type as usize] += pixels; |
| 2175 | enc_stats.luma_pred_mode_counts[luma_mode as usize] += pixels; |
| 2176 | enc_stats.chroma_pred_mode_counts[chroma_mode as usize] += pixels; |
| 2177 | if skip { |
| 2178 | enc_stats.skip_block_count += pixels; |
| 2179 | } |
| 2180 | } |
| 2181 | |
| 2182 | if fi.sequence.enable_intra_edge_filter { |
| 2183 | for y in 0..bsize.height_mi() { |
| 2184 | if tile_bo.0.y + y >= ts.mi_height { |
| 2185 | continue; |
| 2186 | } |
| 2187 | for x in 0..bsize.width_mi() { |
| 2188 | if tile_bo.0.x + x >= ts.mi_width { |
| 2189 | continue; |
| 2190 | } |
| 2191 | let bi = &mut ts.coded_block_info[tile_bo.0.y + y][tile_bo.0.x + x]; |
| 2192 | bi.luma_mode = luma_mode; |
| 2193 | bi.chroma_mode = chroma_mode; |
| 2194 | bi.reference_types = ref_frames; |
| 2195 | } |
| 2196 | } |
| 2197 | } |
| 2198 | |
| 2199 | if is_inter { |
| 2200 | motion_compensate( |
| 2201 | fi, ts, cw, luma_mode, ref_frames, mvs, bsize, tile_bo, false, |
| 2202 | ); |
| 2203 | write_tx_tree( |
| 2204 | fi, |
| 2205 | ts, |
| 2206 | cw, |
| 2207 | w, |
| 2208 | luma_mode, |
| 2209 | angle_delta.y, |
| 2210 | tile_bo, |
| 2211 | bsize, |
| 2212 | tx_size, |
| 2213 | tx_type, |
| 2214 | skip, |
| 2215 | false, |
| 2216 | rdo_type, |
| 2217 | need_recon_pixel, |
| 2218 | ) |
| 2219 | } else { |
| 2220 | write_tx_blocks( |
| 2221 | fi, |
| 2222 | ts, |
| 2223 | cw, |
| 2224 | w, |
| 2225 | luma_mode, |
| 2226 | chroma_mode, |
| 2227 | angle_delta, |
| 2228 | tile_bo, |
| 2229 | bsize, |
| 2230 | tx_size, |
| 2231 | tx_type, |
| 2232 | skip, |
| 2233 | cfl, |
| 2234 | false, |
| 2235 | rdo_type, |
| 2236 | need_recon_pixel, |
| 2237 | ) |
| 2238 | } |
| 2239 | } |
| 2240 | |
| 2241 | /// # Panics |
| 2242 | /// |
| 2243 | /// - If attempting to encode a lossless block (not yet supported) |
| 2244 | pub fn write_tx_blocks<T: Pixel, W: Writer>( |
| 2245 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 2246 | cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode, |
| 2247 | chroma_mode: PredictionMode, angle_delta: AngleDelta, |
| 2248 | tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize, |
| 2249 | tx_type: TxType, skip: bool, cfl: CFLParams, luma_only: bool, |
| 2250 | rdo_type: RDOType, need_recon_pixel: bool, |
| 2251 | ) -> (bool, ScaledDistortion) { |
| 2252 | let bw = bsize.width_mi() / tx_size.width_mi(); |
| 2253 | let bh = bsize.height_mi() / tx_size.height_mi(); |
| 2254 | let qidx = get_qidx(fi, ts, cw, tile_bo); |
| 2255 | |
| 2256 | // TODO: Lossless is not yet supported. |
| 2257 | if !skip { |
| 2258 | assert_ne!(qidx, 0); |
| 2259 | } |
| 2260 | |
| 2261 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
| 2262 | let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array(); |
| 2263 | let mut partition_has_coeff: bool = false; |
| 2264 | let mut tx_dist = ScaledDistortion::zero(); |
| 2265 | let do_chroma = |
| 2266 | has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling); |
| 2267 | |
| 2268 | ts.qc.update( |
| 2269 | qidx, |
| 2270 | tx_size, |
| 2271 | luma_mode.is_intra(), |
| 2272 | fi.sequence.bit_depth, |
| 2273 | fi.dc_delta_q[0], |
| 2274 | 0, |
| 2275 | ); |
| 2276 | |
| 2277 | for by in 0..bh { |
| 2278 | for bx in 0..bw { |
| 2279 | let tx_bo = TileBlockOffset(BlockOffset { |
| 2280 | x: tile_bo.0.x + bx * tx_size.width_mi(), |
| 2281 | y: tile_bo.0.y + by * tx_size.height_mi(), |
| 2282 | }); |
| 2283 | if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height { |
| 2284 | continue; |
| 2285 | } |
| 2286 | let po = tx_bo.plane_offset(&ts.input.planes[0].cfg); |
| 2287 | let (has_coeff, dist) = encode_tx_block( |
| 2288 | fi, |
| 2289 | ts, |
| 2290 | cw, |
| 2291 | w, |
| 2292 | 0, |
| 2293 | tile_bo, |
| 2294 | bx, |
| 2295 | by, |
| 2296 | tx_bo, |
| 2297 | luma_mode, |
| 2298 | tx_size, |
| 2299 | tx_type, |
| 2300 | bsize, |
| 2301 | po, |
| 2302 | skip, |
| 2303 | qidx, |
| 2304 | &[], |
| 2305 | IntraParam::AngleDelta(angle_delta.y), |
| 2306 | rdo_type, |
| 2307 | need_recon_pixel, |
| 2308 | ); |
| 2309 | partition_has_coeff |= has_coeff; |
| 2310 | tx_dist += dist; |
| 2311 | } |
| 2312 | } |
| 2313 | |
| 2314 | if !do_chroma |
| 2315 | || luma_only |
| 2316 | || fi.sequence.chroma_sampling == ChromaSampling::Cs400 |
| 2317 | { |
| 2318 | return (partition_has_coeff, tx_dist); |
| 2319 | }; |
| 2320 | debug_assert!(has_chroma( |
| 2321 | tile_bo, |
| 2322 | bsize, |
| 2323 | xdec, |
| 2324 | ydec, |
| 2325 | fi.sequence.chroma_sampling |
| 2326 | )); |
| 2327 | |
| 2328 | let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec); |
| 2329 | |
| 2330 | let mut bw_uv = (bw * tx_size.width_mi()) >> xdec; |
| 2331 | let mut bh_uv = (bh * tx_size.height_mi()) >> ydec; |
| 2332 | |
| 2333 | if bw_uv == 0 || bh_uv == 0 { |
| 2334 | bw_uv = 1; |
| 2335 | bh_uv = 1; |
| 2336 | } |
| 2337 | |
| 2338 | bw_uv /= uv_tx_size.width_mi(); |
| 2339 | bh_uv /= uv_tx_size.height_mi(); |
| 2340 | |
| 2341 | let ac_data = if chroma_mode.is_cfl() { |
| 2342 | luma_ac(&mut ac.data, ts, tile_bo, bsize, tx_size, fi) |
| 2343 | } else { |
| 2344 | [].as_slice() |
| 2345 | }; |
| 2346 | |
| 2347 | let uv_tx_type = if uv_tx_size.width() >= 32 || uv_tx_size.height() >= 32 { |
| 2348 | TxType::DCT_DCT |
| 2349 | } else { |
| 2350 | uv_intra_mode_to_tx_type_context(chroma_mode) |
| 2351 | }; |
| 2352 | |
| 2353 | for p in 1..3 { |
| 2354 | ts.qc.update( |
| 2355 | qidx, |
| 2356 | uv_tx_size, |
| 2357 | true, |
| 2358 | fi.sequence.bit_depth, |
| 2359 | fi.dc_delta_q[p], |
| 2360 | fi.ac_delta_q[p], |
| 2361 | ); |
| 2362 | let alpha = cfl.alpha(p - 1); |
| 2363 | for by in 0..bh_uv { |
| 2364 | for bx in 0..bw_uv { |
| 2365 | let tx_bo = TileBlockOffset(BlockOffset { |
| 2366 | x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec) |
| 2367 | - ((bw * tx_size.width_mi() == 1) as usize) * xdec, |
| 2368 | y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec) |
| 2369 | - ((bh * tx_size.height_mi() == 1) as usize) * ydec, |
| 2370 | }); |
| 2371 | |
| 2372 | let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg); |
| 2373 | po.x += (bx * uv_tx_size.width()) as isize; |
| 2374 | po.y += (by * uv_tx_size.height()) as isize; |
| 2375 | let (has_coeff, dist) = encode_tx_block( |
| 2376 | fi, |
| 2377 | ts, |
| 2378 | cw, |
| 2379 | w, |
| 2380 | p, |
| 2381 | tile_bo, |
| 2382 | bx, |
| 2383 | by, |
| 2384 | tx_bo, |
| 2385 | chroma_mode, |
| 2386 | uv_tx_size, |
| 2387 | uv_tx_type, |
| 2388 | bsize, |
| 2389 | po, |
| 2390 | skip, |
| 2391 | qidx, |
| 2392 | ac_data, |
| 2393 | if chroma_mode.is_cfl() { |
| 2394 | IntraParam::Alpha(alpha) |
| 2395 | } else { |
| 2396 | IntraParam::AngleDelta(angle_delta.uv) |
| 2397 | }, |
| 2398 | rdo_type, |
| 2399 | need_recon_pixel, |
| 2400 | ); |
| 2401 | partition_has_coeff |= has_coeff; |
| 2402 | tx_dist += dist; |
| 2403 | } |
| 2404 | } |
| 2405 | } |
| 2406 | |
| 2407 | (partition_has_coeff, tx_dist) |
| 2408 | } |
| 2409 | |
| 2410 | pub fn write_tx_tree<T: Pixel, W: Writer>( |
| 2411 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 2412 | cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode, |
| 2413 | angle_delta_y: i8, tile_bo: TileBlockOffset, bsize: BlockSize, |
| 2414 | tx_size: TxSize, tx_type: TxType, skip: bool, luma_only: bool, |
| 2415 | rdo_type: RDOType, need_recon_pixel: bool, |
| 2416 | ) -> (bool, ScaledDistortion) { |
| 2417 | if skip { |
| 2418 | return (false, ScaledDistortion::zero()); |
| 2419 | } |
| 2420 | let bw = bsize.width_mi() / tx_size.width_mi(); |
| 2421 | let bh = bsize.height_mi() / tx_size.height_mi(); |
| 2422 | let qidx = get_qidx(fi, ts, cw, tile_bo); |
| 2423 | |
| 2424 | let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg; |
| 2425 | let ac = &[0i16; 0]; |
| 2426 | let mut partition_has_coeff: bool = false; |
| 2427 | let mut tx_dist = ScaledDistortion::zero(); |
| 2428 | |
| 2429 | ts.qc.update( |
| 2430 | qidx, |
| 2431 | tx_size, |
| 2432 | luma_mode.is_intra(), |
| 2433 | fi.sequence.bit_depth, |
| 2434 | fi.dc_delta_q[0], |
| 2435 | 0, |
| 2436 | ); |
| 2437 | |
| 2438 | // TODO: If tx-parition more than only 1-level, this code does not work. |
| 2439 | // It should recursively traverse the tx block that are split recursivelty by calling write_tx_tree(), |
| 2440 | // as defined in https://aomediacodec.github.io/av1-spec/#transform-tree-syntax |
| 2441 | for by in 0..bh { |
| 2442 | for bx in 0..bw { |
| 2443 | let tx_bo = TileBlockOffset(BlockOffset { |
| 2444 | x: tile_bo.0.x + bx * tx_size.width_mi(), |
| 2445 | y: tile_bo.0.y + by * tx_size.height_mi(), |
| 2446 | }); |
| 2447 | if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height { |
| 2448 | continue; |
| 2449 | } |
| 2450 | |
| 2451 | let po = tx_bo.plane_offset(&ts.input.planes[0].cfg); |
| 2452 | let (has_coeff, dist) = encode_tx_block( |
| 2453 | fi, |
| 2454 | ts, |
| 2455 | cw, |
| 2456 | w, |
| 2457 | 0, |
| 2458 | tile_bo, |
| 2459 | 0, |
| 2460 | 0, |
| 2461 | tx_bo, |
| 2462 | luma_mode, |
| 2463 | tx_size, |
| 2464 | tx_type, |
| 2465 | bsize, |
| 2466 | po, |
| 2467 | skip, |
| 2468 | qidx, |
| 2469 | ac, |
| 2470 | IntraParam::AngleDelta(angle_delta_y), |
| 2471 | rdo_type, |
| 2472 | need_recon_pixel, |
| 2473 | ); |
| 2474 | partition_has_coeff |= has_coeff; |
| 2475 | tx_dist += dist; |
| 2476 | } |
| 2477 | } |
| 2478 | |
| 2479 | if !has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling) |
| 2480 | || luma_only |
| 2481 | || fi.sequence.chroma_sampling == ChromaSampling::Cs400 |
| 2482 | { |
| 2483 | return (partition_has_coeff, tx_dist); |
| 2484 | }; |
| 2485 | debug_assert!(has_chroma( |
| 2486 | tile_bo, |
| 2487 | bsize, |
| 2488 | xdec, |
| 2489 | ydec, |
| 2490 | fi.sequence.chroma_sampling |
| 2491 | )); |
| 2492 | |
| 2493 | let max_tx_size = max_txsize_rect_lookup[bsize as usize]; |
| 2494 | debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64); |
| 2495 | let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec); |
| 2496 | |
| 2497 | let mut bw_uv = max_tx_size.width_mi() >> xdec; |
| 2498 | let mut bh_uv = max_tx_size.height_mi() >> ydec; |
| 2499 | |
| 2500 | if bw_uv == 0 || bh_uv == 0 { |
| 2501 | bw_uv = 1; |
| 2502 | bh_uv = 1; |
| 2503 | } |
| 2504 | |
| 2505 | bw_uv /= uv_tx_size.width_mi(); |
| 2506 | bh_uv /= uv_tx_size.height_mi(); |
| 2507 | |
| 2508 | let uv_tx_type = if partition_has_coeff { |
| 2509 | tx_type.uv_inter(uv_tx_size) |
| 2510 | } else { |
| 2511 | TxType::DCT_DCT |
| 2512 | }; |
| 2513 | |
| 2514 | for p in 1..3 { |
| 2515 | ts.qc.update( |
| 2516 | qidx, |
| 2517 | uv_tx_size, |
| 2518 | false, |
| 2519 | fi.sequence.bit_depth, |
| 2520 | fi.dc_delta_q[p], |
| 2521 | fi.ac_delta_q[p], |
| 2522 | ); |
| 2523 | |
| 2524 | for by in 0..bh_uv { |
| 2525 | for bx in 0..bw_uv { |
| 2526 | let tx_bo = TileBlockOffset(BlockOffset { |
| 2527 | x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec) |
| 2528 | - (max_tx_size.width_mi() == 1) as usize * xdec, |
| 2529 | y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec) |
| 2530 | - (max_tx_size.height_mi() == 1) as usize * ydec, |
| 2531 | }); |
| 2532 | |
| 2533 | let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg); |
| 2534 | po.x += (bx * uv_tx_size.width()) as isize; |
| 2535 | po.y += (by * uv_tx_size.height()) as isize; |
| 2536 | let (has_coeff, dist) = encode_tx_block( |
| 2537 | fi, |
| 2538 | ts, |
| 2539 | cw, |
| 2540 | w, |
| 2541 | p, |
| 2542 | tile_bo, |
| 2543 | bx, |
| 2544 | by, |
| 2545 | tx_bo, |
| 2546 | luma_mode, |
| 2547 | uv_tx_size, |
| 2548 | uv_tx_type, |
| 2549 | bsize, |
| 2550 | po, |
| 2551 | skip, |
| 2552 | qidx, |
| 2553 | ac, |
| 2554 | IntraParam::AngleDelta(angle_delta_y), |
| 2555 | rdo_type, |
| 2556 | need_recon_pixel, |
| 2557 | ); |
| 2558 | partition_has_coeff |= has_coeff; |
| 2559 | tx_dist += dist; |
| 2560 | } |
| 2561 | } |
| 2562 | } |
| 2563 | |
| 2564 | (partition_has_coeff, tx_dist) |
| 2565 | } |
| 2566 | |
| 2567 | #[profiling::function ] |
| 2568 | pub fn encode_block_with_modes<T: Pixel, W: Writer>( |
| 2569 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 2570 | cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, |
| 2571 | bsize: BlockSize, tile_bo: TileBlockOffset, |
| 2572 | mode_decision: &PartitionParameters, rdo_type: RDOType, |
| 2573 | enc_stats: Option<&mut EncoderStats>, |
| 2574 | ) { |
| 2575 | let (mode_luma, mode_chroma) = |
| 2576 | (mode_decision.pred_mode_luma, mode_decision.pred_mode_chroma); |
| 2577 | let cfl = mode_decision.pred_cfl_params; |
| 2578 | let ref_frames = mode_decision.ref_frames; |
| 2579 | let mvs = mode_decision.mvs; |
| 2580 | let mut skip = mode_decision.skip; |
| 2581 | let mut cdef_coded = cw.bc.cdef_coded; |
| 2582 | |
| 2583 | // Set correct segmentation ID before encoding and before |
| 2584 | // rdo_tx_size_type(). |
| 2585 | cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, mode_decision.sidx); |
| 2586 | |
| 2587 | let mut mv_stack = ArrayVec::<CandidateMV, 9>::new(); |
| 2588 | let is_compound = ref_frames[1] != NONE_FRAME; |
| 2589 | let mode_context = |
| 2590 | cw.find_mvrefs(tile_bo, ref_frames, &mut mv_stack, bsize, fi, is_compound); |
| 2591 | |
| 2592 | let (tx_size, tx_type) = if !mode_decision.skip && !mode_decision.has_coeff { |
| 2593 | skip = true; |
| 2594 | rdo_tx_size_type( |
| 2595 | fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip, |
| 2596 | ) |
| 2597 | } else { |
| 2598 | (mode_decision.tx_size, mode_decision.tx_type) |
| 2599 | }; |
| 2600 | |
| 2601 | cdef_coded = encode_block_pre_cdef( |
| 2602 | &fi.sequence, |
| 2603 | ts, |
| 2604 | cw, |
| 2605 | if cdef_coded { w_post_cdef } else { w_pre_cdef }, |
| 2606 | bsize, |
| 2607 | tile_bo, |
| 2608 | skip, |
| 2609 | ); |
| 2610 | encode_block_post_cdef( |
| 2611 | fi, |
| 2612 | ts, |
| 2613 | cw, |
| 2614 | if cdef_coded { w_post_cdef } else { w_pre_cdef }, |
| 2615 | mode_luma, |
| 2616 | mode_chroma, |
| 2617 | mode_decision.angle_delta, |
| 2618 | ref_frames, |
| 2619 | mvs, |
| 2620 | bsize, |
| 2621 | tile_bo, |
| 2622 | skip, |
| 2623 | cfl, |
| 2624 | tx_size, |
| 2625 | tx_type, |
| 2626 | mode_context, |
| 2627 | &mv_stack, |
| 2628 | rdo_type, |
| 2629 | true, |
| 2630 | enc_stats, |
| 2631 | ); |
| 2632 | } |
| 2633 | |
| 2634 | #[profiling::function ] |
| 2635 | fn encode_partition_bottomup<T: Pixel, W: Writer>( |
| 2636 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 2637 | cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, |
| 2638 | bsize: BlockSize, tile_bo: TileBlockOffset, ref_rd_cost: f64, |
| 2639 | inter_cfg: &InterConfig, enc_stats: &mut EncoderStats, |
| 2640 | ) -> PartitionGroupParameters { |
| 2641 | let rdo_type = RDOType::PixelDistRealRate; |
| 2642 | let mut rd_cost = std::f64::MAX; |
| 2643 | let mut best_rd = std::f64::MAX; |
| 2644 | let mut rdo_output = PartitionGroupParameters { |
| 2645 | rd_cost, |
| 2646 | part_type: PartitionType::PARTITION_INVALID, |
| 2647 | part_modes: ArrayVec::new(), |
| 2648 | }; |
| 2649 | |
| 2650 | if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height { |
| 2651 | return rdo_output; |
| 2652 | } |
| 2653 | |
| 2654 | let is_square = bsize.is_sqr(); |
| 2655 | let hbs = bsize.width_mi() / 2; |
| 2656 | let has_cols = tile_bo.0.x + hbs < ts.mi_width; |
| 2657 | let has_rows = tile_bo.0.y + hbs < ts.mi_height; |
| 2658 | let is_straddle_x = tile_bo.0.x + bsize.width_mi() > ts.mi_width; |
| 2659 | let is_straddle_y = tile_bo.0.y + bsize.height_mi() > ts.mi_height; |
| 2660 | |
| 2661 | // TODO: Update for 128x128 superblocks |
| 2662 | assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64); |
| 2663 | |
| 2664 | let must_split = |
| 2665 | is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows); |
| 2666 | |
| 2667 | let can_split = // FIXME: sub-8x8 inter blocks not supported for non-4:2:0 sampling |
| 2668 | if fi.frame_type.has_inter() && |
| 2669 | fi.sequence.chroma_sampling != ChromaSampling::Cs420 && |
| 2670 | bsize <= BlockSize::BLOCK_8X8 { |
| 2671 | false |
| 2672 | } else { |
| 2673 | (bsize > fi.partition_range.min && is_square) || must_split |
| 2674 | }; |
| 2675 | |
| 2676 | assert!(bsize >= BlockSize::BLOCK_8X8 || !can_split); |
| 2677 | |
| 2678 | let mut best_partition = PartitionType::PARTITION_INVALID; |
| 2679 | |
| 2680 | let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling); |
| 2681 | let w_pre_checkpoint = w_pre_cdef.checkpoint(); |
| 2682 | let w_post_checkpoint = w_post_cdef.checkpoint(); |
| 2683 | |
| 2684 | // Code the whole block |
| 2685 | if !must_split { |
| 2686 | let cost = if bsize >= BlockSize::BLOCK_8X8 && is_square { |
| 2687 | let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
| 2688 | let tell = w.tell_frac(); |
| 2689 | cw.write_partition(w, tile_bo, PartitionType::PARTITION_NONE, bsize); |
| 2690 | compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero()) |
| 2691 | } else { |
| 2692 | 0.0 |
| 2693 | }; |
| 2694 | |
| 2695 | let mode_decision = |
| 2696 | rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg); |
| 2697 | |
| 2698 | if !mode_decision.pred_mode_luma.is_intra() { |
| 2699 | // Fill the saved motion structure |
| 2700 | save_block_motion( |
| 2701 | ts, |
| 2702 | mode_decision.bsize, |
| 2703 | mode_decision.bo, |
| 2704 | mode_decision.ref_frames[0].to_index(), |
| 2705 | mode_decision.mvs[0], |
| 2706 | ); |
| 2707 | } |
| 2708 | |
| 2709 | rd_cost = mode_decision.rd_cost + cost; |
| 2710 | |
| 2711 | best_partition = PartitionType::PARTITION_NONE; |
| 2712 | best_rd = rd_cost; |
| 2713 | rdo_output.part_modes.push(mode_decision.clone()); |
| 2714 | |
| 2715 | if !can_split { |
| 2716 | encode_block_with_modes( |
| 2717 | fi, |
| 2718 | ts, |
| 2719 | cw, |
| 2720 | w_pre_cdef, |
| 2721 | w_post_cdef, |
| 2722 | bsize, |
| 2723 | tile_bo, |
| 2724 | &mode_decision, |
| 2725 | rdo_type, |
| 2726 | Some(enc_stats), |
| 2727 | ); |
| 2728 | } |
| 2729 | } // if !must_split |
| 2730 | |
| 2731 | let mut early_exit = false; |
| 2732 | |
| 2733 | // Test all partition types other than PARTITION_NONE by comparing their RD costs |
| 2734 | if can_split { |
| 2735 | debug_assert!(is_square); |
| 2736 | |
| 2737 | let mut partition_types = ArrayVec::<PartitionType, 3>::new(); |
| 2738 | if bsize |
| 2739 | <= fi.config.speed_settings.partition.non_square_partition_max_threshold |
| 2740 | || is_straddle_x |
| 2741 | || is_straddle_y |
| 2742 | { |
| 2743 | if has_cols { |
| 2744 | partition_types.push(PartitionType::PARTITION_HORZ); |
| 2745 | } |
| 2746 | if !(fi.sequence.chroma_sampling == ChromaSampling::Cs422) && has_rows { |
| 2747 | partition_types.push(PartitionType::PARTITION_VERT); |
| 2748 | } |
| 2749 | } |
| 2750 | partition_types.push(PartitionType::PARTITION_SPLIT); |
| 2751 | |
| 2752 | for partition in partition_types { |
| 2753 | // (!has_rows || !has_cols) --> must_split |
| 2754 | debug_assert!((has_rows && has_cols) || must_split); |
| 2755 | // (!has_rows && has_cols) --> partition != PartitionType::PARTITION_VERT |
| 2756 | debug_assert!( |
| 2757 | has_rows || !has_cols || (partition != PartitionType::PARTITION_VERT) |
| 2758 | ); |
| 2759 | // (has_rows && !has_cols) --> partition != PartitionType::PARTITION_HORZ |
| 2760 | debug_assert!( |
| 2761 | !has_rows || has_cols || (partition != PartitionType::PARTITION_HORZ) |
| 2762 | ); |
| 2763 | // (!has_rows && !has_cols) --> partition == PartitionType::PARTITION_SPLIT |
| 2764 | debug_assert!( |
| 2765 | has_rows || has_cols || (partition == PartitionType::PARTITION_SPLIT) |
| 2766 | ); |
| 2767 | |
| 2768 | cw.rollback(&cw_checkpoint); |
| 2769 | w_pre_cdef.rollback(&w_pre_checkpoint); |
| 2770 | w_post_cdef.rollback(&w_post_checkpoint); |
| 2771 | |
| 2772 | let subsize = bsize.subsize(partition).unwrap(); |
| 2773 | let hbsw = subsize.width_mi(); // Half the block size width in blocks |
| 2774 | let hbsh = subsize.height_mi(); // Half the block size height in blocks |
| 2775 | let mut child_modes = ArrayVec::<PartitionParameters, 4>::new(); |
| 2776 | rd_cost = 0.0; |
| 2777 | |
| 2778 | if bsize >= BlockSize::BLOCK_8X8 { |
| 2779 | let w: &mut W = |
| 2780 | if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
| 2781 | let tell = w.tell_frac(); |
| 2782 | cw.write_partition(w, tile_bo, partition, bsize); |
| 2783 | rd_cost = |
| 2784 | compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero()); |
| 2785 | } |
| 2786 | |
| 2787 | let four_partitions = [ |
| 2788 | tile_bo, |
| 2789 | TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }), |
| 2790 | TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }), |
| 2791 | TileBlockOffset(BlockOffset { |
| 2792 | x: tile_bo.0.x + hbsw, |
| 2793 | y: tile_bo.0.y + hbsh, |
| 2794 | }), |
| 2795 | ]; |
| 2796 | let partitions = get_sub_partitions(&four_partitions, partition); |
| 2797 | |
| 2798 | early_exit = false; |
| 2799 | // If either of horz or vert partition types is being tested, |
| 2800 | // two partitioned rectangles, defined in 'partitions', of the current block |
| 2801 | // is passed to encode_partition_bottomup() |
| 2802 | for offset in partitions { |
| 2803 | if offset.0.x >= ts.mi_width || offset.0.y >= ts.mi_height { |
| 2804 | continue; |
| 2805 | } |
| 2806 | let child_rdo_output = encode_partition_bottomup( |
| 2807 | fi, |
| 2808 | ts, |
| 2809 | cw, |
| 2810 | w_pre_cdef, |
| 2811 | w_post_cdef, |
| 2812 | subsize, |
| 2813 | offset, |
| 2814 | best_rd, |
| 2815 | inter_cfg, |
| 2816 | enc_stats, |
| 2817 | ); |
| 2818 | let cost = child_rdo_output.rd_cost; |
| 2819 | assert!(cost >= 0.0); |
| 2820 | |
| 2821 | if cost != std::f64::MAX { |
| 2822 | rd_cost += cost; |
| 2823 | if !must_split |
| 2824 | && fi.enable_early_exit |
| 2825 | && (rd_cost >= best_rd || rd_cost >= ref_rd_cost) |
| 2826 | { |
| 2827 | assert!(cost != std::f64::MAX); |
| 2828 | early_exit = true; |
| 2829 | break; |
| 2830 | } else if partition != PartitionType::PARTITION_SPLIT { |
| 2831 | child_modes.push(child_rdo_output.part_modes[0].clone()); |
| 2832 | } |
| 2833 | } |
| 2834 | } |
| 2835 | |
| 2836 | if !early_exit && rd_cost < best_rd { |
| 2837 | best_rd = rd_cost; |
| 2838 | best_partition = partition; |
| 2839 | if partition != PartitionType::PARTITION_SPLIT { |
| 2840 | assert!(!child_modes.is_empty()); |
| 2841 | rdo_output.part_modes = child_modes; |
| 2842 | } |
| 2843 | } |
| 2844 | } |
| 2845 | |
| 2846 | debug_assert!( |
| 2847 | early_exit || best_partition != PartitionType::PARTITION_INVALID |
| 2848 | ); |
| 2849 | |
| 2850 | // If the best partition is not PARTITION_SPLIT, recode it |
| 2851 | if best_partition != PartitionType::PARTITION_SPLIT { |
| 2852 | assert!(!rdo_output.part_modes.is_empty()); |
| 2853 | cw.rollback(&cw_checkpoint); |
| 2854 | w_pre_cdef.rollback(&w_pre_checkpoint); |
| 2855 | w_post_cdef.rollback(&w_post_checkpoint); |
| 2856 | |
| 2857 | assert!(best_partition != PartitionType::PARTITION_NONE || !must_split); |
| 2858 | let subsize = bsize.subsize(best_partition).unwrap(); |
| 2859 | |
| 2860 | if bsize >= BlockSize::BLOCK_8X8 { |
| 2861 | let w: &mut W = |
| 2862 | if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
| 2863 | cw.write_partition(w, tile_bo, best_partition, bsize); |
| 2864 | } |
| 2865 | for mode in rdo_output.part_modes.clone() { |
| 2866 | assert!(subsize == mode.bsize); |
| 2867 | |
| 2868 | if !mode.pred_mode_luma.is_intra() { |
| 2869 | save_block_motion( |
| 2870 | ts, |
| 2871 | mode.bsize, |
| 2872 | mode.bo, |
| 2873 | mode.ref_frames[0].to_index(), |
| 2874 | mode.mvs[0], |
| 2875 | ); |
| 2876 | } |
| 2877 | |
| 2878 | // FIXME: redundant block re-encode |
| 2879 | encode_block_with_modes( |
| 2880 | fi, |
| 2881 | ts, |
| 2882 | cw, |
| 2883 | w_pre_cdef, |
| 2884 | w_post_cdef, |
| 2885 | mode.bsize, |
| 2886 | mode.bo, |
| 2887 | &mode, |
| 2888 | rdo_type, |
| 2889 | Some(enc_stats), |
| 2890 | ); |
| 2891 | } |
| 2892 | } |
| 2893 | } // if can_split { |
| 2894 | |
| 2895 | assert!(best_partition != PartitionType::PARTITION_INVALID); |
| 2896 | |
| 2897 | if is_square |
| 2898 | && bsize >= BlockSize::BLOCK_8X8 |
| 2899 | && (bsize == BlockSize::BLOCK_8X8 |
| 2900 | || best_partition != PartitionType::PARTITION_SPLIT) |
| 2901 | { |
| 2902 | cw.bc.update_partition_context( |
| 2903 | tile_bo, |
| 2904 | bsize.subsize(best_partition).unwrap(), |
| 2905 | bsize, |
| 2906 | ); |
| 2907 | } |
| 2908 | |
| 2909 | rdo_output.rd_cost = best_rd; |
| 2910 | rdo_output.part_type = best_partition; |
| 2911 | |
| 2912 | if best_partition != PartitionType::PARTITION_NONE { |
| 2913 | rdo_output.part_modes.clear(); |
| 2914 | } |
| 2915 | rdo_output |
| 2916 | } |
| 2917 | |
| 2918 | fn encode_partition_topdown<T: Pixel, W: Writer>( |
| 2919 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 2920 | cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, |
| 2921 | bsize: BlockSize, tile_bo: TileBlockOffset, |
| 2922 | block_output: &Option<PartitionGroupParameters>, inter_cfg: &InterConfig, |
| 2923 | enc_stats: &mut EncoderStats, |
| 2924 | ) { |
| 2925 | if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height { |
| 2926 | return; |
| 2927 | } |
| 2928 | let is_square = bsize.is_sqr(); |
| 2929 | let rdo_type = RDOType::PixelDistRealRate; |
| 2930 | let hbs = bsize.width_mi() / 2; |
| 2931 | let has_cols = tile_bo.0.x + hbs < ts.mi_width; |
| 2932 | let has_rows = tile_bo.0.y + hbs < ts.mi_height; |
| 2933 | |
| 2934 | // TODO: Update for 128x128 superblocks |
| 2935 | debug_assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64); |
| 2936 | |
| 2937 | let must_split = |
| 2938 | is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows); |
| 2939 | |
| 2940 | let can_split = // FIXME: sub-8x8 inter blocks not supported for non-4:2:0 sampling |
| 2941 | if fi.frame_type.has_inter() && |
| 2942 | fi.sequence.chroma_sampling != ChromaSampling::Cs420 && |
| 2943 | bsize <= BlockSize::BLOCK_8X8 { |
| 2944 | false |
| 2945 | } else { |
| 2946 | (bsize > fi.partition_range.min && is_square) || must_split |
| 2947 | }; |
| 2948 | |
| 2949 | let mut rdo_output = |
| 2950 | block_output.clone().unwrap_or_else(|| PartitionGroupParameters { |
| 2951 | part_type: PartitionType::PARTITION_INVALID, |
| 2952 | rd_cost: std::f64::MAX, |
| 2953 | part_modes: ArrayVec::new(), |
| 2954 | }); |
| 2955 | |
| 2956 | let partition = if must_split { |
| 2957 | PartitionType::PARTITION_SPLIT |
| 2958 | } else if can_split { |
| 2959 | debug_assert!(bsize.is_sqr()); |
| 2960 | |
| 2961 | // Blocks of sizes within the supported range are subjected to a partitioning decision |
| 2962 | rdo_output = rdo_partition_decision( |
| 2963 | fi, |
| 2964 | ts, |
| 2965 | cw, |
| 2966 | w_pre_cdef, |
| 2967 | w_post_cdef, |
| 2968 | bsize, |
| 2969 | tile_bo, |
| 2970 | &rdo_output, |
| 2971 | &[PartitionType::PARTITION_SPLIT, PartitionType::PARTITION_NONE], |
| 2972 | rdo_type, |
| 2973 | inter_cfg, |
| 2974 | ); |
| 2975 | rdo_output.part_type |
| 2976 | } else { |
| 2977 | // Blocks of sizes below the supported range are encoded directly |
| 2978 | PartitionType::PARTITION_NONE |
| 2979 | }; |
| 2980 | |
| 2981 | debug_assert!(partition != PartitionType::PARTITION_INVALID); |
| 2982 | |
| 2983 | let subsize = bsize.subsize(partition).unwrap(); |
| 2984 | |
| 2985 | if bsize >= BlockSize::BLOCK_8X8 && is_square { |
| 2986 | let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef }; |
| 2987 | cw.write_partition(w, tile_bo, partition, bsize); |
| 2988 | } |
| 2989 | |
| 2990 | match partition { |
| 2991 | PartitionType::PARTITION_NONE => { |
| 2992 | let rdo_decision; |
| 2993 | let part_decision = |
| 2994 | if let Some(part_mode) = rdo_output.part_modes.first() { |
| 2995 | // The optimal prediction mode is known from a previous iteration |
| 2996 | part_mode |
| 2997 | } else { |
| 2998 | // Make a prediction mode decision for blocks encoded with no rdo_partition_decision call (e.g. edges) |
| 2999 | rdo_decision = |
| 3000 | rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg); |
| 3001 | &rdo_decision |
| 3002 | }; |
| 3003 | |
| 3004 | let mut mode_luma = part_decision.pred_mode_luma; |
| 3005 | let mut mode_chroma = part_decision.pred_mode_chroma; |
| 3006 | |
| 3007 | let cfl = part_decision.pred_cfl_params; |
| 3008 | let skip = part_decision.skip; |
| 3009 | let ref_frames = part_decision.ref_frames; |
| 3010 | let mvs = part_decision.mvs; |
| 3011 | let mut cdef_coded = cw.bc.cdef_coded; |
| 3012 | |
| 3013 | // Set correct segmentation ID before encoding and before |
| 3014 | // rdo_tx_size_type(). |
| 3015 | cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, part_decision.sidx); |
| 3016 | |
| 3017 | // NOTE: Cannot avoid calling rdo_tx_size_type() here again, |
| 3018 | // because, with top-down partition RDO, the neighboring contexts |
| 3019 | // of current partition can change, i.e. neighboring partitions can split down more. |
| 3020 | let (tx_size, tx_type) = rdo_tx_size_type( |
| 3021 | fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip, |
| 3022 | ); |
| 3023 | |
| 3024 | let mut mv_stack = ArrayVec::<CandidateMV, 9>::new(); |
| 3025 | let is_compound = ref_frames[1] != NONE_FRAME; |
| 3026 | let mode_context = cw.find_mvrefs( |
| 3027 | tile_bo, |
| 3028 | ref_frames, |
| 3029 | &mut mv_stack, |
| 3030 | bsize, |
| 3031 | fi, |
| 3032 | is_compound, |
| 3033 | ); |
| 3034 | |
| 3035 | // TODO: proper remap when is_compound is true |
| 3036 | if !mode_luma.is_intra() { |
| 3037 | if is_compound && mode_luma != PredictionMode::GLOBAL_GLOBALMV { |
| 3038 | let match0 = mv_stack[0].this_mv.row == mvs[0].row |
| 3039 | && mv_stack[0].this_mv.col == mvs[0].col; |
| 3040 | let match1 = mv_stack[0].comp_mv.row == mvs[1].row |
| 3041 | && mv_stack[0].comp_mv.col == mvs[1].col; |
| 3042 | |
| 3043 | let match2 = mv_stack[1].this_mv.row == mvs[0].row |
| 3044 | && mv_stack[1].this_mv.col == mvs[0].col; |
| 3045 | let match3 = mv_stack[1].comp_mv.row == mvs[1].row |
| 3046 | && mv_stack[1].comp_mv.col == mvs[1].col; |
| 3047 | |
| 3048 | let match4 = mv_stack.len() > 2 && mv_stack[2].this_mv == mvs[0]; |
| 3049 | let match5 = mv_stack.len() > 2 && mv_stack[2].comp_mv == mvs[1]; |
| 3050 | |
| 3051 | let match6 = mv_stack.len() > 3 && mv_stack[3].this_mv == mvs[0]; |
| 3052 | let match7 = mv_stack.len() > 3 && mv_stack[3].comp_mv == mvs[1]; |
| 3053 | |
| 3054 | mode_luma = if match0 && match1 { |
| 3055 | PredictionMode::NEAREST_NEARESTMV |
| 3056 | } else if match2 && match3 { |
| 3057 | PredictionMode::NEAR_NEAR0MV |
| 3058 | } else if match4 && match5 { |
| 3059 | PredictionMode::NEAR_NEAR1MV |
| 3060 | } else if match6 && match7 { |
| 3061 | PredictionMode::NEAR_NEAR2MV |
| 3062 | } else if match0 { |
| 3063 | PredictionMode::NEAREST_NEWMV |
| 3064 | } else if match1 { |
| 3065 | PredictionMode::NEW_NEARESTMV |
| 3066 | } else { |
| 3067 | PredictionMode::NEW_NEWMV |
| 3068 | }; |
| 3069 | |
| 3070 | if mode_luma != PredictionMode::NEAREST_NEARESTMV |
| 3071 | && mvs[0].row == 0 |
| 3072 | && mvs[0].col == 0 |
| 3073 | && mvs[1].row == 0 |
| 3074 | && mvs[1].col == 0 |
| 3075 | { |
| 3076 | mode_luma = PredictionMode::GLOBAL_GLOBALMV; |
| 3077 | } |
| 3078 | mode_chroma = mode_luma; |
| 3079 | } else if !is_compound && mode_luma != PredictionMode::GLOBALMV { |
| 3080 | mode_luma = PredictionMode::NEWMV; |
| 3081 | for (c, m) in mv_stack.iter().take(4).zip( |
| 3082 | [ |
| 3083 | PredictionMode::NEARESTMV, |
| 3084 | PredictionMode::NEAR0MV, |
| 3085 | PredictionMode::NEAR1MV, |
| 3086 | PredictionMode::NEAR2MV, |
| 3087 | ] |
| 3088 | .iter(), |
| 3089 | ) { |
| 3090 | if c.this_mv.row == mvs[0].row && c.this_mv.col == mvs[0].col { |
| 3091 | mode_luma = *m; |
| 3092 | } |
| 3093 | } |
| 3094 | if mode_luma == PredictionMode::NEWMV |
| 3095 | && mvs[0].row == 0 |
| 3096 | && mvs[0].col == 0 |
| 3097 | { |
| 3098 | mode_luma = if mv_stack.is_empty() { |
| 3099 | PredictionMode::NEARESTMV |
| 3100 | } else if mv_stack.len() == 1 { |
| 3101 | PredictionMode::NEAR0MV |
| 3102 | } else { |
| 3103 | PredictionMode::GLOBALMV |
| 3104 | }; |
| 3105 | } |
| 3106 | mode_chroma = mode_luma; |
| 3107 | } |
| 3108 | |
| 3109 | save_block_motion( |
| 3110 | ts, |
| 3111 | part_decision.bsize, |
| 3112 | part_decision.bo, |
| 3113 | part_decision.ref_frames[0].to_index(), |
| 3114 | part_decision.mvs[0], |
| 3115 | ); |
| 3116 | } |
| 3117 | |
| 3118 | // FIXME: every final block that has gone through the RDO decision process is encoded twice |
| 3119 | cdef_coded = encode_block_pre_cdef( |
| 3120 | &fi.sequence, |
| 3121 | ts, |
| 3122 | cw, |
| 3123 | if cdef_coded { w_post_cdef } else { w_pre_cdef }, |
| 3124 | bsize, |
| 3125 | tile_bo, |
| 3126 | skip, |
| 3127 | ); |
| 3128 | encode_block_post_cdef( |
| 3129 | fi, |
| 3130 | ts, |
| 3131 | cw, |
| 3132 | if cdef_coded { w_post_cdef } else { w_pre_cdef }, |
| 3133 | mode_luma, |
| 3134 | mode_chroma, |
| 3135 | part_decision.angle_delta, |
| 3136 | ref_frames, |
| 3137 | mvs, |
| 3138 | bsize, |
| 3139 | tile_bo, |
| 3140 | skip, |
| 3141 | cfl, |
| 3142 | tx_size, |
| 3143 | tx_type, |
| 3144 | mode_context, |
| 3145 | &mv_stack, |
| 3146 | RDOType::PixelDistRealRate, |
| 3147 | true, |
| 3148 | Some(enc_stats), |
| 3149 | ); |
| 3150 | } |
| 3151 | PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => { |
| 3152 | if !rdo_output.part_modes.is_empty() { |
| 3153 | debug_assert!(can_split && !must_split); |
| 3154 | |
| 3155 | // The optimal prediction modes for each split block is known from an rdo_partition_decision() call |
| 3156 | for mode in rdo_output.part_modes { |
| 3157 | // Each block is subjected to a new splitting decision |
| 3158 | encode_partition_topdown( |
| 3159 | fi, |
| 3160 | ts, |
| 3161 | cw, |
| 3162 | w_pre_cdef, |
| 3163 | w_post_cdef, |
| 3164 | subsize, |
| 3165 | mode.bo, |
| 3166 | &Some(PartitionGroupParameters { |
| 3167 | rd_cost: mode.rd_cost, |
| 3168 | part_type: PartitionType::PARTITION_NONE, |
| 3169 | part_modes: [mode][..].try_into().unwrap(), |
| 3170 | }), |
| 3171 | inter_cfg, |
| 3172 | enc_stats, |
| 3173 | ); |
| 3174 | } |
| 3175 | } else { |
| 3176 | debug_assert!(must_split); |
| 3177 | let hbsw = subsize.width_mi(); // Half the block size width in blocks |
| 3178 | let hbsh = subsize.height_mi(); // Half the block size height in blocks |
| 3179 | let four_partitions = [ |
| 3180 | tile_bo, |
| 3181 | TileBlockOffset(BlockOffset { |
| 3182 | x: tile_bo.0.x + hbsw, |
| 3183 | y: tile_bo.0.y, |
| 3184 | }), |
| 3185 | TileBlockOffset(BlockOffset { |
| 3186 | x: tile_bo.0.x, |
| 3187 | y: tile_bo.0.y + hbsh, |
| 3188 | }), |
| 3189 | TileBlockOffset(BlockOffset { |
| 3190 | x: tile_bo.0.x + hbsw, |
| 3191 | y: tile_bo.0.y + hbsh, |
| 3192 | }), |
| 3193 | ]; |
| 3194 | let partitions = get_sub_partitions(&four_partitions, partition); |
| 3195 | |
| 3196 | partitions.iter().for_each(|&offset| { |
| 3197 | encode_partition_topdown( |
| 3198 | fi, |
| 3199 | ts, |
| 3200 | cw, |
| 3201 | w_pre_cdef, |
| 3202 | w_post_cdef, |
| 3203 | subsize, |
| 3204 | offset, |
| 3205 | &None, |
| 3206 | inter_cfg, |
| 3207 | enc_stats, |
| 3208 | ); |
| 3209 | }); |
| 3210 | } |
| 3211 | } |
| 3212 | _ => unreachable!(), |
| 3213 | } |
| 3214 | |
| 3215 | if is_square |
| 3216 | && bsize >= BlockSize::BLOCK_8X8 |
| 3217 | && (bsize == BlockSize::BLOCK_8X8 |
| 3218 | || partition != PartitionType::PARTITION_SPLIT) |
| 3219 | { |
| 3220 | cw.bc.update_partition_context(tile_bo, subsize, bsize); |
| 3221 | } |
| 3222 | } |
| 3223 | |
| 3224 | fn get_initial_cdfcontext<T: Pixel>(fi: &FrameInvariants<T>) -> CDFContext { |
| 3225 | let cdf: Option = if fi.primary_ref_frame == PRIMARY_REF_NONE { |
| 3226 | None |
| 3227 | } else { |
| 3228 | let ref_frame_idx: usize = fi.ref_frames[fi.primary_ref_frame as usize] as usize; |
| 3229 | let ref_frame: Option<&Arc>> = fi.rec_buffer.frames[ref_frame_idx].as_ref(); |
| 3230 | ref_frame.map(|rec: &Arc>| rec.cdfs) |
| 3231 | }; |
| 3232 | |
| 3233 | // return the retrieved instance if any, a new one otherwise |
| 3234 | cdf.unwrap_or_else(|| CDFContext::new(quantizer:fi.base_q_idx)) |
| 3235 | } |
| 3236 | |
| 3237 | #[profiling::function ] |
| 3238 | fn encode_tile_group<T: Pixel>( |
| 3239 | fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig, |
| 3240 | ) -> Vec<u8> { |
| 3241 | let planes = |
| 3242 | if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 }; |
| 3243 | let mut blocks = FrameBlocks::new(fi.w_in_b, fi.h_in_b); |
| 3244 | let ti = &fi.sequence.tiling; |
| 3245 | |
| 3246 | let initial_cdf = get_initial_cdfcontext(fi); |
| 3247 | // dynamic allocation: once per frame |
| 3248 | let mut cdfs = vec![initial_cdf; ti.tile_count()]; |
| 3249 | |
| 3250 | let (raw_tiles, stats): (Vec<_>, Vec<_>) = ti |
| 3251 | .tile_iter_mut(fs, &mut blocks) |
| 3252 | .zip(cdfs.iter_mut()) |
| 3253 | .collect::<Vec<_>>() |
| 3254 | .into_par_iter() |
| 3255 | .map(|(mut ctx, cdf)| { |
| 3256 | encode_tile(fi, &mut ctx.ts, cdf, &mut ctx.tb, inter_cfg) |
| 3257 | }) |
| 3258 | .unzip(); |
| 3259 | |
| 3260 | for tile_stats in stats { |
| 3261 | fs.enc_stats += &tile_stats; |
| 3262 | } |
| 3263 | |
| 3264 | /* Frame deblocking operates over a single large tile wrapping the |
| 3265 | * frame rather than the frame itself so that deblocking is |
| 3266 | * available inside RDO when needed */ |
| 3267 | /* TODO: Don't apply if lossless */ |
| 3268 | let levels = fs.apply_tile_state_mut(|ts| { |
| 3269 | let rec = &mut ts.rec; |
| 3270 | deblock_filter_optimize( |
| 3271 | fi, |
| 3272 | &rec.as_const(), |
| 3273 | &ts.input.as_tile(), |
| 3274 | &blocks.as_tile_blocks(), |
| 3275 | fi.width, |
| 3276 | fi.height, |
| 3277 | ) |
| 3278 | }); |
| 3279 | fs.deblock.levels = levels; |
| 3280 | |
| 3281 | if fs.deblock.levels[0] != 0 || fs.deblock.levels[1] != 0 { |
| 3282 | fs.apply_tile_state_mut(|ts| { |
| 3283 | let rec = &mut ts.rec; |
| 3284 | deblock_filter_frame( |
| 3285 | ts.deblock, |
| 3286 | rec, |
| 3287 | &blocks.as_tile_blocks(), |
| 3288 | fi.width, |
| 3289 | fi.height, |
| 3290 | fi.sequence.bit_depth, |
| 3291 | planes, |
| 3292 | ); |
| 3293 | }); |
| 3294 | } |
| 3295 | |
| 3296 | if fi.sequence.enable_restoration { |
| 3297 | // Until the loop filters are better pipelined, we'll need to keep |
| 3298 | // around a copy of both the deblocked and cdeffed frame. |
| 3299 | let deblocked_frame = (*fs.rec).clone(); |
| 3300 | |
| 3301 | /* TODO: Don't apply if lossless */ |
| 3302 | if fi.sequence.enable_cdef { |
| 3303 | fs.apply_tile_state_mut(|ts| { |
| 3304 | let rec = &mut ts.rec; |
| 3305 | cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec); |
| 3306 | }); |
| 3307 | } |
| 3308 | /* TODO: Don't apply if lossless */ |
| 3309 | fs.restoration.lrf_filter_frame( |
| 3310 | Arc::get_mut(&mut fs.rec).unwrap(), |
| 3311 | &deblocked_frame, |
| 3312 | fi, |
| 3313 | ); |
| 3314 | } else { |
| 3315 | /* TODO: Don't apply if lossless */ |
| 3316 | if fi.sequence.enable_cdef { |
| 3317 | let deblocked_frame = (*fs.rec).clone(); |
| 3318 | fs.apply_tile_state_mut(|ts| { |
| 3319 | let rec = &mut ts.rec; |
| 3320 | cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec); |
| 3321 | }); |
| 3322 | } |
| 3323 | } |
| 3324 | |
| 3325 | let (idx_max, max_len) = raw_tiles |
| 3326 | .iter() |
| 3327 | .map(Vec::len) |
| 3328 | .enumerate() |
| 3329 | .max_by_key(|&(_, len)| len) |
| 3330 | .unwrap(); |
| 3331 | |
| 3332 | if !fi.disable_frame_end_update_cdf { |
| 3333 | // use the biggest tile (in bytes) for CDF update |
| 3334 | fs.context_update_tile_id = idx_max; |
| 3335 | fs.cdfs = cdfs[idx_max]; |
| 3336 | fs.cdfs.reset_counts(); |
| 3337 | } |
| 3338 | |
| 3339 | let max_tile_size_bytes = ((ILog::ilog(max_len) + 7) / 8) as u32; |
| 3340 | debug_assert!(max_tile_size_bytes > 0 && max_tile_size_bytes <= 4); |
| 3341 | fs.max_tile_size_bytes = max_tile_size_bytes; |
| 3342 | |
| 3343 | build_raw_tile_group(ti, &raw_tiles, max_tile_size_bytes) |
| 3344 | } |
| 3345 | |
| 3346 | fn build_raw_tile_group( |
| 3347 | ti: &TilingInfo, raw_tiles: &[Vec<u8>], max_tile_size_bytes: u32, |
| 3348 | ) -> Vec<u8> { |
| 3349 | // <https://aomediacodec.github.io/av1-spec/#general-tile-group-obu-syntax> |
| 3350 | let mut raw: Vec = Vec::new(); |
| 3351 | let mut bw: BitWriter<&mut Vec, BigEndian> = BitWriter::endian(&mut raw, _endian:BigEndian); |
| 3352 | if ti.cols * ti.rows > 1 { |
| 3353 | // tile_start_and_end_present_flag |
| 3354 | bw.write_bit(false).unwrap(); |
| 3355 | } |
| 3356 | bw.byte_align().unwrap(); |
| 3357 | for (i: usize, raw_tile: &Vec) in raw_tiles.iter().enumerate() { |
| 3358 | let last: usize = raw_tiles.len() - 1; |
| 3359 | if i != last { |
| 3360 | let tile_size_minus_1: usize = raw_tile.len() - 1; |
| 3361 | bw.write_le(max_tile_size_bytes, payload:tile_size_minus_1 as u64).unwrap(); |
| 3362 | } |
| 3363 | bw.write_bytes(buf:raw_tile).unwrap(); |
| 3364 | } |
| 3365 | raw |
| 3366 | } |
| 3367 | |
| 3368 | pub struct SBSQueueEntry { |
| 3369 | pub sbo: TileSuperBlockOffset, |
| 3370 | pub lru_index: [i32; MAX_PLANES], |
| 3371 | pub cdef_coded: bool, |
| 3372 | pub w_pre_cdef: WriterBase<WriterRecorder>, |
| 3373 | pub w_post_cdef: WriterBase<WriterRecorder>, |
| 3374 | } |
| 3375 | |
| 3376 | #[profiling::function ] |
| 3377 | fn check_lf_queue<T: Pixel>( |
| 3378 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 3379 | cw: &mut ContextWriter, w: &mut WriterBase<WriterEncoder>, |
| 3380 | sbs_q: &mut VecDeque<SBSQueueEntry>, last_lru_ready: &mut [i32; 3], |
| 3381 | last_lru_rdoed: &mut [i32; 3], last_lru_coded: &mut [i32; 3], |
| 3382 | deblock_p: bool, |
| 3383 | ) { |
| 3384 | let mut check_queue = true; |
| 3385 | let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { |
| 3386 | 1 |
| 3387 | } else { |
| 3388 | MAX_PLANES |
| 3389 | }; |
| 3390 | |
| 3391 | // Walk queue from the head, see if anything is ready for RDO and flush |
| 3392 | while check_queue { |
| 3393 | if let Some(qe) = sbs_q.front_mut() { |
| 3394 | for pli in 0..planes { |
| 3395 | if qe.lru_index[pli] > last_lru_ready[pli] { |
| 3396 | check_queue = false; |
| 3397 | break; |
| 3398 | } |
| 3399 | } |
| 3400 | if check_queue { |
| 3401 | // yes, this entry is ready |
| 3402 | if qe.cdef_coded || fi.sequence.enable_restoration { |
| 3403 | // only RDO once for a given LRU. |
| 3404 | |
| 3405 | // One quirk worth noting: LRUs in different planes |
| 3406 | // may be different sizes; eg, one chroma LRU may |
| 3407 | // cover four luma LRUs. However, we won't get here |
| 3408 | // until all are ready for RDO because the smaller |
| 3409 | // ones all fit inside the biggest, and the biggest |
| 3410 | // doesn't trigger until everything is done. |
| 3411 | |
| 3412 | // RDO happens on all LRUs within the confines of the |
| 3413 | // biggest, all together. If any of this SB's planes' |
| 3414 | // LRUs are RDOed, in actuality they all are. |
| 3415 | |
| 3416 | // SBs tagged with a lru index of -1 are ignored in |
| 3417 | // LRU coding/rdoing decisions (but still need to rdo |
| 3418 | // for cdef). |
| 3419 | let mut already_rdoed = false; |
| 3420 | for pli in 0..planes { |
| 3421 | if qe.lru_index[pli] != -1 |
| 3422 | && qe.lru_index[pli] <= last_lru_rdoed[pli] |
| 3423 | { |
| 3424 | already_rdoed = true; |
| 3425 | break; |
| 3426 | } |
| 3427 | } |
| 3428 | if !already_rdoed { |
| 3429 | rdo_loop_decision(qe.sbo, fi, ts, cw, w, deblock_p); |
| 3430 | for pli in 0..planes { |
| 3431 | if qe.lru_index[pli] != -1 |
| 3432 | && last_lru_rdoed[pli] < qe.lru_index[pli] |
| 3433 | { |
| 3434 | last_lru_rdoed[pli] = qe.lru_index[pli]; |
| 3435 | } |
| 3436 | } |
| 3437 | } |
| 3438 | } |
| 3439 | // write LRF information |
| 3440 | if !fi.allow_intrabc && fi.sequence.enable_restoration { |
| 3441 | // TODO: also disallow if lossless |
| 3442 | for pli in 0..planes { |
| 3443 | if qe.lru_index[pli] != -1 |
| 3444 | && last_lru_coded[pli] < qe.lru_index[pli] |
| 3445 | { |
| 3446 | last_lru_coded[pli] = qe.lru_index[pli]; |
| 3447 | cw.write_lrf(w, &mut ts.restoration, qe.sbo, pli); |
| 3448 | } |
| 3449 | } |
| 3450 | } |
| 3451 | // Now that loop restoration is coded, we can replay the initial block bits |
| 3452 | qe.w_pre_cdef.replay(w); |
| 3453 | // Now code CDEF into the middle of the block |
| 3454 | if qe.cdef_coded { |
| 3455 | let cdef_index = cw.bc.blocks.get_cdef(qe.sbo); |
| 3456 | cw.write_cdef(w, cdef_index, fi.cdef_bits); |
| 3457 | // Code queued symbols that come after the CDEF index |
| 3458 | qe.w_post_cdef.replay(w); |
| 3459 | } |
| 3460 | sbs_q.pop_front(); |
| 3461 | } |
| 3462 | } else { |
| 3463 | check_queue = false; |
| 3464 | } |
| 3465 | } |
| 3466 | } |
| 3467 | |
| 3468 | #[profiling::function ] |
| 3469 | fn encode_tile<'a, T: Pixel>( |
| 3470 | fi: &FrameInvariants<T>, ts: &'a mut TileStateMut<'_, T>, |
| 3471 | fc: &'a mut CDFContext, blocks: &'a mut TileBlocksMut<'a>, |
| 3472 | inter_cfg: &InterConfig, |
| 3473 | ) -> (Vec<u8>, EncoderStats) { |
| 3474 | let mut enc_stats = EncoderStats::default(); |
| 3475 | let mut w = WriterEncoder::new(); |
| 3476 | let planes = |
| 3477 | if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 }; |
| 3478 | |
| 3479 | let bc = BlockContext::new(blocks); |
| 3480 | let mut cw = ContextWriter::new(fc, bc); |
| 3481 | let mut sbs_q: VecDeque<SBSQueueEntry> = VecDeque::new(); |
| 3482 | let mut last_lru_ready = [-1; 3]; |
| 3483 | let mut last_lru_rdoed = [-1; 3]; |
| 3484 | let mut last_lru_coded = [-1; 3]; |
| 3485 | |
| 3486 | // main loop |
| 3487 | for sby in 0..ts.sb_height { |
| 3488 | cw.bc.reset_left_contexts(planes); |
| 3489 | |
| 3490 | for sbx in 0..ts.sb_width { |
| 3491 | cw.fc_log.clear(); |
| 3492 | |
| 3493 | let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby }); |
| 3494 | let mut sbs_qe = SBSQueueEntry { |
| 3495 | sbo: tile_sbo, |
| 3496 | lru_index: [-1; MAX_PLANES], |
| 3497 | cdef_coded: false, |
| 3498 | w_pre_cdef: WriterRecorder::new(), |
| 3499 | w_post_cdef: WriterRecorder::new(), |
| 3500 | }; |
| 3501 | |
| 3502 | let tile_bo = tile_sbo.block_offset(0, 0); |
| 3503 | cw.bc.cdef_coded = false; |
| 3504 | cw.bc.code_deltas = fi.delta_q_present; |
| 3505 | |
| 3506 | let is_straddle_sbx = |
| 3507 | tile_bo.0.x + BlockSize::BLOCK_64X64.width_mi() > ts.mi_width; |
| 3508 | let is_straddle_sby = |
| 3509 | tile_bo.0.y + BlockSize::BLOCK_64X64.height_mi() > ts.mi_height; |
| 3510 | |
| 3511 | // Encode SuperBlock |
| 3512 | if fi.config.speed_settings.partition.encode_bottomup |
| 3513 | || is_straddle_sbx |
| 3514 | || is_straddle_sby |
| 3515 | { |
| 3516 | encode_partition_bottomup( |
| 3517 | fi, |
| 3518 | ts, |
| 3519 | &mut cw, |
| 3520 | &mut sbs_qe.w_pre_cdef, |
| 3521 | &mut sbs_qe.w_post_cdef, |
| 3522 | BlockSize::BLOCK_64X64, |
| 3523 | tile_bo, |
| 3524 | std::f64::MAX, |
| 3525 | inter_cfg, |
| 3526 | &mut enc_stats, |
| 3527 | ); |
| 3528 | } else { |
| 3529 | encode_partition_topdown( |
| 3530 | fi, |
| 3531 | ts, |
| 3532 | &mut cw, |
| 3533 | &mut sbs_qe.w_pre_cdef, |
| 3534 | &mut sbs_qe.w_post_cdef, |
| 3535 | BlockSize::BLOCK_64X64, |
| 3536 | tile_bo, |
| 3537 | &None, |
| 3538 | inter_cfg, |
| 3539 | &mut enc_stats, |
| 3540 | ); |
| 3541 | } |
| 3542 | |
| 3543 | { |
| 3544 | let mut check_queue = false; |
| 3545 | // queue our superblock for when the LRU is complete |
| 3546 | sbs_qe.cdef_coded = cw.bc.cdef_coded; |
| 3547 | for pli in 0..planes { |
| 3548 | if let Some((lru_x, lru_y)) = |
| 3549 | ts.restoration.planes[pli].restoration_unit_index(tile_sbo, false) |
| 3550 | { |
| 3551 | let lru_index = ts.restoration.planes[pli] |
| 3552 | .restoration_unit_countable(lru_x, lru_y) |
| 3553 | as i32; |
| 3554 | sbs_qe.lru_index[pli] = lru_index; |
| 3555 | if ts.restoration.planes[pli] |
| 3556 | .restoration_unit_last_sb_for_rdo(fi, ts.sbo, tile_sbo) |
| 3557 | { |
| 3558 | last_lru_ready[pli] = lru_index; |
| 3559 | check_queue = true; |
| 3560 | } |
| 3561 | } else { |
| 3562 | // we're likely in an area stretched into a new tile |
| 3563 | // tag this SB to be ignored in LRU decisions |
| 3564 | sbs_qe.lru_index[pli] = -1; |
| 3565 | check_queue = true; |
| 3566 | } |
| 3567 | } |
| 3568 | sbs_q.push_back(sbs_qe); |
| 3569 | |
| 3570 | if check_queue && !fi.sequence.enable_delayed_loopfilter_rdo { |
| 3571 | check_lf_queue( |
| 3572 | fi, |
| 3573 | ts, |
| 3574 | &mut cw, |
| 3575 | &mut w, |
| 3576 | &mut sbs_q, |
| 3577 | &mut last_lru_ready, |
| 3578 | &mut last_lru_rdoed, |
| 3579 | &mut last_lru_coded, |
| 3580 | true, |
| 3581 | ); |
| 3582 | } |
| 3583 | } |
| 3584 | } |
| 3585 | } |
| 3586 | |
| 3587 | if fi.sequence.enable_delayed_loopfilter_rdo { |
| 3588 | // Solve deblocking for just this tile |
| 3589 | /* TODO: Don't apply if lossless */ |
| 3590 | let deblock_levels = deblock_filter_optimize( |
| 3591 | fi, |
| 3592 | &ts.rec.as_const(), |
| 3593 | &ts.input_tile, |
| 3594 | &cw.bc.blocks.as_const(), |
| 3595 | fi.width, |
| 3596 | fi.height, |
| 3597 | ); |
| 3598 | |
| 3599 | if deblock_levels[0] != 0 || deblock_levels[1] != 0 { |
| 3600 | // copy reconstruction to a temp frame to restore it later |
| 3601 | let rec_copy = if planes == 3 { |
| 3602 | vec![ |
| 3603 | ts.rec.planes[0].scratch_copy(), |
| 3604 | ts.rec.planes[1].scratch_copy(), |
| 3605 | ts.rec.planes[2].scratch_copy(), |
| 3606 | ] |
| 3607 | } else { |
| 3608 | vec![ts.rec.planes[0].scratch_copy()] |
| 3609 | }; |
| 3610 | |
| 3611 | // copy ts.deblock because we need to set some of our own values here |
| 3612 | let mut deblock_copy = *ts.deblock; |
| 3613 | deblock_copy.levels = deblock_levels; |
| 3614 | |
| 3615 | // temporarily deblock the reference |
| 3616 | deblock_filter_frame( |
| 3617 | &deblock_copy, |
| 3618 | &mut ts.rec, |
| 3619 | &cw.bc.blocks.as_const(), |
| 3620 | fi.width, |
| 3621 | fi.height, |
| 3622 | fi.sequence.bit_depth, |
| 3623 | planes, |
| 3624 | ); |
| 3625 | |
| 3626 | // rdo lf and write |
| 3627 | check_lf_queue( |
| 3628 | fi, |
| 3629 | ts, |
| 3630 | &mut cw, |
| 3631 | &mut w, |
| 3632 | &mut sbs_q, |
| 3633 | &mut last_lru_ready, |
| 3634 | &mut last_lru_rdoed, |
| 3635 | &mut last_lru_coded, |
| 3636 | false, |
| 3637 | ); |
| 3638 | |
| 3639 | // copy original reference back in |
| 3640 | for pli in 0..planes { |
| 3641 | let dst = &mut ts.rec.planes[pli]; |
| 3642 | let src = &rec_copy[pli]; |
| 3643 | for (dst_row, src_row) in dst.rows_iter_mut().zip(src.rows_iter()) { |
| 3644 | for (out, input) in dst_row.iter_mut().zip(src_row) { |
| 3645 | *out = *input; |
| 3646 | } |
| 3647 | } |
| 3648 | } |
| 3649 | } else { |
| 3650 | // rdo lf and write |
| 3651 | check_lf_queue( |
| 3652 | fi, |
| 3653 | ts, |
| 3654 | &mut cw, |
| 3655 | &mut w, |
| 3656 | &mut sbs_q, |
| 3657 | &mut last_lru_ready, |
| 3658 | &mut last_lru_rdoed, |
| 3659 | &mut last_lru_coded, |
| 3660 | false, |
| 3661 | ); |
| 3662 | } |
| 3663 | } |
| 3664 | |
| 3665 | assert!( |
| 3666 | sbs_q.is_empty(), |
| 3667 | "Superblock queue not empty in tile at offset {}: {}" , |
| 3668 | ts.sbo.0.x, |
| 3669 | ts.sbo.0.y |
| 3670 | ); |
| 3671 | (w.done(), enc_stats) |
| 3672 | } |
| 3673 | |
| 3674 | #[allow (unused)] |
| 3675 | fn write_tile_group_header(tile_start_and_end_present_flag: bool) -> Vec<u8> { |
| 3676 | let mut buf: Vec = Vec::new(); |
| 3677 | { |
| 3678 | let mut bw: BitWriter<&mut Vec, BigEndian> = BitWriter::endian(&mut buf, _endian:BigEndian); |
| 3679 | bw.write_bit(tile_start_and_end_present_flag).unwrap(); |
| 3680 | bw.byte_align().unwrap(); |
| 3681 | } |
| 3682 | buf |
| 3683 | } |
| 3684 | |
| 3685 | /// Write a packet containing only the placeholder that tells the decoder |
| 3686 | /// to present the already decoded frame present at `frame_to_show_map_idx` |
| 3687 | /// |
| 3688 | /// See `av1-spec` Section 6.8.2 and 7.18. |
| 3689 | /// |
| 3690 | /// # Panics |
| 3691 | /// |
| 3692 | /// - If the frame packets cannot be written |
| 3693 | #[profiling::function ] |
| 3694 | pub fn encode_show_existing_frame<T: Pixel>( |
| 3695 | fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig, |
| 3696 | ) -> Vec<u8> { |
| 3697 | debug_assert!(fi.is_show_existing_frame()); |
| 3698 | let obu_extension = 0; |
| 3699 | |
| 3700 | let mut packet = Vec::new(); |
| 3701 | |
| 3702 | if fi.frame_type == FrameType::KEY { |
| 3703 | write_key_frame_obus(&mut packet, fi, obu_extension).unwrap(); |
| 3704 | } |
| 3705 | |
| 3706 | for t35 in fi.t35_metadata.iter() { |
| 3707 | let mut t35_buf = Vec::new(); |
| 3708 | let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian); |
| 3709 | t35_bw.write_t35_metadata_obu(t35).unwrap(); |
| 3710 | packet.write_all(&t35_buf).unwrap(); |
| 3711 | t35_buf.clear(); |
| 3712 | } |
| 3713 | |
| 3714 | let mut buf1 = Vec::new(); |
| 3715 | let mut buf2 = Vec::new(); |
| 3716 | { |
| 3717 | let mut bw2 = BitWriter::endian(&mut buf2, BigEndian); |
| 3718 | bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap(); |
| 3719 | } |
| 3720 | |
| 3721 | { |
| 3722 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 3723 | bw1.write_obu_header(ObuType::OBU_FRAME_HEADER, obu_extension).unwrap(); |
| 3724 | } |
| 3725 | packet.write_all(&buf1).unwrap(); |
| 3726 | buf1.clear(); |
| 3727 | |
| 3728 | { |
| 3729 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 3730 | bw1.write_uleb128(buf2.len() as u64).unwrap(); |
| 3731 | } |
| 3732 | packet.write_all(&buf1).unwrap(); |
| 3733 | buf1.clear(); |
| 3734 | |
| 3735 | packet.write_all(&buf2).unwrap(); |
| 3736 | buf2.clear(); |
| 3737 | |
| 3738 | let map_idx = fi.frame_to_show_map_idx as usize; |
| 3739 | if let Some(ref rec) = fi.rec_buffer.frames[map_idx] { |
| 3740 | let fs_rec = Arc::get_mut(&mut fs.rec).unwrap(); |
| 3741 | let planes = |
| 3742 | if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 }; |
| 3743 | for p in 0..planes { |
| 3744 | fs_rec.planes[p].data.copy_from_slice(&rec.frame.planes[p].data); |
| 3745 | } |
| 3746 | } |
| 3747 | packet |
| 3748 | } |
| 3749 | |
| 3750 | fn get_initial_segmentation<T: Pixel>( |
| 3751 | fi: &FrameInvariants<T>, |
| 3752 | ) -> SegmentationState { |
| 3753 | let segmentation: Option = if fi.primary_ref_frame == PRIMARY_REF_NONE { |
| 3754 | None |
| 3755 | } else { |
| 3756 | let ref_frame_idx: usize = fi.ref_frames[fi.primary_ref_frame as usize] as usize; |
| 3757 | let ref_frame: Option<&Arc>> = fi.rec_buffer.frames[ref_frame_idx].as_ref(); |
| 3758 | ref_frame.map(|rec: &Arc>| rec.segmentation) |
| 3759 | }; |
| 3760 | |
| 3761 | // return the retrieved instance if any, a new one otherwise |
| 3762 | segmentation.unwrap_or_default() |
| 3763 | } |
| 3764 | |
| 3765 | /// # Panics |
| 3766 | /// |
| 3767 | /// - If the frame packets cannot be written |
| 3768 | #[profiling::function ] |
| 3769 | pub fn encode_frame<T: Pixel>( |
| 3770 | fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig, |
| 3771 | ) -> Vec<u8> { |
| 3772 | debug_assert!(!fi.is_show_existing_frame()); |
| 3773 | let obu_extension = 0; |
| 3774 | |
| 3775 | let mut packet = Vec::new(); |
| 3776 | |
| 3777 | if fi.enable_segmentation { |
| 3778 | fs.segmentation = get_initial_segmentation(fi); |
| 3779 | segmentation_optimize(fi, fs); |
| 3780 | } |
| 3781 | let tile_group = encode_tile_group(fi, fs, inter_cfg); |
| 3782 | |
| 3783 | if fi.frame_type == FrameType::KEY { |
| 3784 | write_key_frame_obus(&mut packet, fi, obu_extension).unwrap(); |
| 3785 | } |
| 3786 | |
| 3787 | for t35 in fi.t35_metadata.iter() { |
| 3788 | let mut t35_buf = Vec::new(); |
| 3789 | let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian); |
| 3790 | t35_bw.write_t35_metadata_obu(t35).unwrap(); |
| 3791 | packet.write_all(&t35_buf).unwrap(); |
| 3792 | t35_buf.clear(); |
| 3793 | } |
| 3794 | |
| 3795 | let mut buf1 = Vec::new(); |
| 3796 | let mut buf2 = Vec::new(); |
| 3797 | { |
| 3798 | let mut bw2 = BitWriter::endian(&mut buf2, BigEndian); |
| 3799 | bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap(); |
| 3800 | } |
| 3801 | |
| 3802 | { |
| 3803 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 3804 | bw1.write_obu_header(ObuType::OBU_FRAME, obu_extension).unwrap(); |
| 3805 | } |
| 3806 | packet.write_all(&buf1).unwrap(); |
| 3807 | buf1.clear(); |
| 3808 | |
| 3809 | { |
| 3810 | let mut bw1 = BitWriter::endian(&mut buf1, BigEndian); |
| 3811 | bw1.write_uleb128((buf2.len() + tile_group.len()) as u64).unwrap(); |
| 3812 | } |
| 3813 | packet.write_all(&buf1).unwrap(); |
| 3814 | buf1.clear(); |
| 3815 | |
| 3816 | packet.write_all(&buf2).unwrap(); |
| 3817 | buf2.clear(); |
| 3818 | |
| 3819 | packet.write_all(&tile_group).unwrap(); |
| 3820 | packet |
| 3821 | } |
| 3822 | |
| 3823 | pub fn update_rec_buffer<T: Pixel>( |
| 3824 | output_frameno: u64, fi: &mut FrameInvariants<T>, fs: &FrameState<T>, |
| 3825 | ) { |
| 3826 | let rfs: Arc> = Arc::new(data:ReferenceFrame { |
| 3827 | order_hint: fi.order_hint, |
| 3828 | width: fi.width as u32, |
| 3829 | height: fi.height as u32, |
| 3830 | render_width: fi.render_width, |
| 3831 | render_height: fi.render_height, |
| 3832 | frame: fs.rec.clone(), |
| 3833 | input_hres: fs.input_hres.clone(), |
| 3834 | input_qres: fs.input_qres.clone(), |
| 3835 | cdfs: fs.cdfs, |
| 3836 | frame_me_stats: fs.frame_me_stats.clone(), |
| 3837 | output_frameno, |
| 3838 | segmentation: fs.segmentation, |
| 3839 | }); |
| 3840 | for i: usize in 0..REF_FRAMES { |
| 3841 | if (fi.refresh_frame_flags & (1 << i)) != 0 { |
| 3842 | fi.rec_buffer.frames[i] = Some(Arc::clone(&rfs)); |
| 3843 | fi.rec_buffer.deblock[i] = fs.deblock; |
| 3844 | } |
| 3845 | } |
| 3846 | } |
| 3847 | |
| 3848 | #[cfg (test)] |
| 3849 | mod test { |
| 3850 | use super::*; |
| 3851 | |
| 3852 | #[test ] |
| 3853 | fn check_partition_types_order() { |
| 3854 | assert_eq!( |
| 3855 | RAV1E_PARTITION_TYPES[RAV1E_PARTITION_TYPES.len() - 1], |
| 3856 | PartitionType::PARTITION_SPLIT |
| 3857 | ); |
| 3858 | } |
| 3859 | } |
| 3860 | |