| 1 | // Copyright (c) 2017-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | use crate::api::InterConfig; |
| 11 | use crate::context::{ |
| 12 | BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset, |
| 13 | TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE, |
| 14 | MI_SIZE_LOG2, SB_SIZE, |
| 15 | }; |
| 16 | use crate::dist::*; |
| 17 | use crate::frame::*; |
| 18 | use crate::mc::MotionVector; |
| 19 | use crate::partition::*; |
| 20 | use crate::predict::PredictionMode; |
| 21 | use crate::tiling::*; |
| 22 | use crate::util::ILog; |
| 23 | use crate::util::{clamp, Pixel}; |
| 24 | use crate::FrameInvariants; |
| 25 | |
| 26 | use arrayvec::*; |
| 27 | use std::ops::{Index, IndexMut}; |
| 28 | use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; |
| 29 | |
| 30 | #[derive (Debug, Copy, Clone, Default)] |
| 31 | pub struct MEStats { |
| 32 | pub mv: MotionVector, |
| 33 | /// sad value, on the scale of a 128x128 block |
| 34 | pub normalized_sad: u32, |
| 35 | } |
| 36 | |
| 37 | #[derive (Debug, Clone)] |
| 38 | pub struct FrameMEStats { |
| 39 | stats: Box<[MEStats]>, |
| 40 | pub cols: usize, |
| 41 | pub rows: usize, |
| 42 | } |
| 43 | |
| 44 | /// cbindgen:ignore |
| 45 | pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>; |
| 46 | /// cbindgen:ignore |
| 47 | pub type ReadGuardMEStats<'a> = |
| 48 | RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>; |
| 49 | /// cbindgen:ignore |
| 50 | pub type WriteGuardMEStats<'a> = |
| 51 | RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>; |
| 52 | |
| 53 | impl FrameMEStats { |
| 54 | #[inline ] |
| 55 | pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> { |
| 56 | self.stats.chunks_exact(self.cols) |
| 57 | } |
| 58 | |
| 59 | pub fn new(cols: usize, rows: usize) -> Self { |
| 60 | Self { |
| 61 | // dynamic allocation: once per frame |
| 62 | stats: vec![MEStats::default(); cols * rows].into_boxed_slice(), |
| 63 | cols, |
| 64 | rows, |
| 65 | } |
| 66 | } |
| 67 | pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats { |
| 68 | Arc::new(RwLock::new([ |
| 69 | FrameMEStats::new(cols, rows), |
| 70 | FrameMEStats::new(cols, rows), |
| 71 | FrameMEStats::new(cols, rows), |
| 72 | FrameMEStats::new(cols, rows), |
| 73 | FrameMEStats::new(cols, rows), |
| 74 | FrameMEStats::new(cols, rows), |
| 75 | FrameMEStats::new(cols, rows), |
| 76 | FrameMEStats::new(cols, rows), |
| 77 | ])) |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | impl Index<usize> for FrameMEStats { |
| 82 | type Output = [MEStats]; |
| 83 | #[inline ] |
| 84 | fn index(&self, index: usize) -> &Self::Output { |
| 85 | &self.stats[index * self.cols..(index + 1) * self.cols] |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | impl IndexMut<usize> for FrameMEStats { |
| 90 | #[inline ] |
| 91 | fn index_mut(&mut self, index: usize) -> &mut Self::Output { |
| 92 | &mut self.stats[index * self.cols..(index + 1) * self.cols] |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /// Result of motion search. |
| 97 | #[derive (Debug, Copy, Clone)] |
| 98 | pub struct MotionSearchResult { |
| 99 | /// Motion vector chosen by the motion search. |
| 100 | pub mv: MotionVector, |
| 101 | /// Rate distortion data associated with `mv`. |
| 102 | pub rd: MVCandidateRD, |
| 103 | } |
| 104 | |
| 105 | impl MotionSearchResult { |
| 106 | /// Creates an 'empty' value. |
| 107 | /// |
| 108 | /// To be considered empty, cost is set higher than any naturally occurring |
| 109 | /// cost value. The idea is that comparing to any valid rd output, the search |
| 110 | /// result will always be replaced. |
| 111 | #[inline (always)] |
| 112 | pub fn empty() -> MotionSearchResult { |
| 113 | MotionSearchResult { |
| 114 | mv: MotionVector::default(), |
| 115 | rd: MVCandidateRD::empty(), |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /// Check if the value should be considered to be empty. |
| 120 | #[inline (always)] |
| 121 | const fn is_empty(&self) -> bool { |
| 122 | self.rd.cost == u64::MAX |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | /// Holds data from computing rate distortion of a motion vector. |
| 127 | #[derive (Debug, Copy, Clone)] |
| 128 | pub struct MVCandidateRD { |
| 129 | /// Rate distortion cost of the motion vector. |
| 130 | pub cost: u64, |
| 131 | /// Distortion metric value for the motion vector. |
| 132 | pub sad: u32, |
| 133 | } |
| 134 | |
| 135 | impl MVCandidateRD { |
| 136 | /// Creates an 'empty' value. |
| 137 | /// |
| 138 | /// To be considered empty, cost is set higher than any naturally occurring |
| 139 | /// cost value. The idea is that comparing to any valid rd output, the search |
| 140 | /// result will always be replaced. |
| 141 | #[inline (always)] |
| 142 | const fn empty() -> MVCandidateRD { |
| 143 | MVCandidateRD { sad: u32::MAX, cost: u64::MAX } |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | #[derive (Debug, Copy, Clone, Eq, PartialEq)] |
| 148 | pub enum MVSamplingMode { |
| 149 | INIT, |
| 150 | CORNER { right: bool, bottom: bool }, |
| 151 | } |
| 152 | |
| 153 | pub fn estimate_tile_motion<T: Pixel>( |
| 154 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, |
| 155 | inter_cfg: &InterConfig, |
| 156 | ) { |
| 157 | let init_size = MIB_SIZE_LOG2; |
| 158 | |
| 159 | let mut prev_ssdec: Option<u8> = None; |
| 160 | for mv_size_in_b_log2 in (2..=init_size).rev() { |
| 161 | let init = mv_size_in_b_log2 == init_size; |
| 162 | |
| 163 | // Choose subsampling. Pass one is quarter res and pass two is at half res. |
| 164 | let ssdec = match init_size - mv_size_in_b_log2 { |
| 165 | 0 => 2, |
| 166 | 1 => 1, |
| 167 | _ => 0, |
| 168 | }; |
| 169 | |
| 170 | let new_subsampling = |
| 171 | if let Some(prev) = prev_ssdec { prev != ssdec } else { false }; |
| 172 | prev_ssdec = Some(ssdec); |
| 173 | |
| 174 | // 0.5 and 0.125 are a fudge factors |
| 175 | let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64 |
| 176 | * if ssdec == 0 { 0.5 } else { 0.125 }) as u32; |
| 177 | |
| 178 | for sby in 0..ts.sb_height { |
| 179 | for sbx in 0..ts.sb_width { |
| 180 | let mut tested_frames_flags = 0; |
| 181 | for &ref_frame in inter_cfg.allowed_ref_frames() { |
| 182 | let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()]; |
| 183 | if tested_frames_flags & frame_flag == frame_flag { |
| 184 | continue; |
| 185 | } |
| 186 | tested_frames_flags |= frame_flag; |
| 187 | |
| 188 | let tile_bo = |
| 189 | TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby }) |
| 190 | .block_offset(0, 0); |
| 191 | |
| 192 | if new_subsampling { |
| 193 | refine_subsampled_sb_motion( |
| 194 | fi, |
| 195 | ts, |
| 196 | ref_frame, |
| 197 | mv_size_in_b_log2 + 1, |
| 198 | tile_bo, |
| 199 | ssdec, |
| 200 | lambda, |
| 201 | ); |
| 202 | } |
| 203 | |
| 204 | estimate_sb_motion( |
| 205 | fi, |
| 206 | ts, |
| 207 | ref_frame, |
| 208 | mv_size_in_b_log2, |
| 209 | tile_bo, |
| 210 | init, |
| 211 | ssdec, |
| 212 | lambda, |
| 213 | ); |
| 214 | } |
| 215 | } |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | fn estimate_sb_motion<T: Pixel>( |
| 221 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType, |
| 222 | mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8, |
| 223 | lambda: u32, |
| 224 | ) { |
| 225 | let pix_offset = tile_bo.to_luma_plane_offset(); |
| 226 | let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize); |
| 227 | let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize); |
| 228 | |
| 229 | let mv_size = MI_SIZE << mv_size_in_b_log2; |
| 230 | |
| 231 | // Process in blocks, cropping at edges. |
| 232 | for y in (0..sb_h).step_by(mv_size) { |
| 233 | for x in (0..sb_w).step_by(mv_size) { |
| 234 | let corner: MVSamplingMode = if init { |
| 235 | MVSamplingMode::INIT |
| 236 | } else { |
| 237 | // Processing the block a size up produces data that can be used by |
| 238 | // the right and bottom corners. |
| 239 | MVSamplingMode::CORNER { |
| 240 | right: x & mv_size == mv_size, |
| 241 | bottom: y & mv_size == mv_size, |
| 242 | } |
| 243 | }; |
| 244 | |
| 245 | let sub_bo = tile_bo |
| 246 | .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2); |
| 247 | |
| 248 | // Clamp to frame edge, rounding up in the case of subsampling. |
| 249 | // The rounding makes some assumptions about how subsampling is done. |
| 250 | let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec; |
| 251 | let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec; |
| 252 | |
| 253 | // Run motion estimation. |
| 254 | // Note that the initial search (init) instructs the called function to |
| 255 | // perform a more extensive search. |
| 256 | if let Some(results) = estimate_motion( |
| 257 | fi, |
| 258 | ts, |
| 259 | w, |
| 260 | h, |
| 261 | sub_bo, |
| 262 | ref_frame, |
| 263 | None, |
| 264 | corner, |
| 265 | init, |
| 266 | ssdec, |
| 267 | Some(lambda), |
| 268 | ) { |
| 269 | // normalize sad to 128x128 block |
| 270 | let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) |
| 271 | / (w * h) as u64) as u32; |
| 272 | save_me_stats( |
| 273 | ts, |
| 274 | mv_size_in_b_log2, |
| 275 | sub_bo, |
| 276 | ref_frame, |
| 277 | MEStats { mv: results.mv, normalized_sad: sad }, |
| 278 | ); |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | fn refine_subsampled_sb_motion<T: Pixel>( |
| 285 | fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType, |
| 286 | mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32, |
| 287 | ) { |
| 288 | let pix_offset = tile_bo.to_luma_plane_offset(); |
| 289 | let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize); |
| 290 | let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize); |
| 291 | |
| 292 | let mv_size = MI_SIZE << mv_size_in_b_log2; |
| 293 | |
| 294 | // Process in blocks, cropping at edges. |
| 295 | for y in (0..sb_h).step_by(mv_size) { |
| 296 | for x in (0..sb_w).step_by(mv_size) { |
| 297 | let sub_bo = tile_bo |
| 298 | .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2); |
| 299 | |
| 300 | // Clamp to frame edge, rounding up in the case of subsampling. |
| 301 | // The rounding makes some assumptions about how subsampling is done. |
| 302 | let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec; |
| 303 | let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec; |
| 304 | |
| 305 | // Refine the existing motion estimate |
| 306 | if let Some(results) = refine_subsampled_motion_estimate( |
| 307 | fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda, |
| 308 | ) { |
| 309 | // normalize sad to 128x128 block |
| 310 | let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) |
| 311 | / (w * h) as u64) as u32; |
| 312 | save_me_stats( |
| 313 | ts, |
| 314 | mv_size_in_b_log2, |
| 315 | sub_bo, |
| 316 | ref_frame, |
| 317 | MEStats { mv: results.mv, normalized_sad: sad }, |
| 318 | ); |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | fn save_me_stats<T: Pixel>( |
| 325 | ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize, |
| 326 | tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats, |
| 327 | ) { |
| 328 | let size_in_b: usize = 1 << mv_size_in_b_log2; |
| 329 | let tile_me_stats: &mut TileMEStatsMut<'_> = &mut ts.me_stats[ref_frame.to_index()]; |
| 330 | let tile_bo_x_end: usize = (tile_bo.0.x + size_in_b).min(ts.mi_width); |
| 331 | let tile_bo_y_end: usize = (tile_bo.0.y + size_in_b).min(ts.mi_height); |
| 332 | for mi_y: usize in tile_bo.0.y..tile_bo_y_end { |
| 333 | for a: &mut MEStats in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() { |
| 334 | *a = stats; |
| 335 | } |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | fn get_mv_range( |
| 340 | w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize, |
| 341 | blk_h: usize, |
| 342 | ) -> (isize, isize, isize, isize) { |
| 343 | let border_w: isize = 128 + blk_w as isize * 8; |
| 344 | let border_h: isize = 128 + blk_h as isize * 8; |
| 345 | let mvx_min: isize = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w; |
| 346 | let mvx_max: isize = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize) |
| 347 | * (8 * MI_SIZE) as isize |
| 348 | + border_w; |
| 349 | let mvy_min: isize = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h; |
| 350 | let mvy_max: isize = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize) |
| 351 | * (8 * MI_SIZE) as isize |
| 352 | + border_h; |
| 353 | |
| 354 | // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics> |
| 355 | use crate::context::{MV_LOW, MV_UPP}; |
| 356 | ( |
| 357 | mvx_min.max(MV_LOW as isize + 1), |
| 358 | mvx_max.min(MV_UPP as isize - 1), |
| 359 | mvy_min.max(MV_LOW as isize + 1), |
| 360 | mvy_max.min(MV_UPP as isize - 1), |
| 361 | ) |
| 362 | } |
| 363 | |
| 364 | struct MotionEstimationSubsets { |
| 365 | min_sad: u32, |
| 366 | median: Option<MotionVector>, |
| 367 | subset_b: ArrayVec<MotionVector, 5>, |
| 368 | subset_c: ArrayVec<MotionVector, 5>, |
| 369 | } |
| 370 | |
| 371 | impl MotionEstimationSubsets { |
| 372 | fn all_mvs(&self) -> ArrayVec<MotionVector, 11> { |
| 373 | let mut all: ArrayVec = ArrayVec::new(); |
| 374 | if let Some(median: MotionVector) = self.median { |
| 375 | all.push(element:median); |
| 376 | } |
| 377 | |
| 378 | all.extend(self.subset_b.iter().copied()); |
| 379 | all.extend(self.subset_c.iter().copied()); |
| 380 | |
| 381 | all |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | #[profiling::function ] |
| 386 | fn get_subset_predictors( |
| 387 | tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>, |
| 388 | frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize, |
| 389 | pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize, |
| 390 | mvy_max: isize, corner: MVSamplingMode, ssdec: u8, |
| 391 | ) -> MotionEstimationSubsets { |
| 392 | let mut min_sad: u32 = u32::MAX; |
| 393 | let mut subset_b = ArrayVec::<MotionVector, 5>::new(); |
| 394 | let mut subset_c = ArrayVec::<MotionVector, 5>::new(); |
| 395 | |
| 396 | // rounded up width in blocks |
| 397 | let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
| 398 | let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
| 399 | |
| 400 | // Get predictors from the same frame. |
| 401 | |
| 402 | let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x); |
| 403 | let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y); |
| 404 | |
| 405 | let mut process_cand = |stats: MEStats| -> MotionVector { |
| 406 | min_sad = min_sad.min(stats.normalized_sad); |
| 407 | let mv = stats.mv.quantize_to_fullpel(); |
| 408 | MotionVector { |
| 409 | col: clamp(mv.col as isize, mvx_min, mvx_max) as i16, |
| 410 | row: clamp(mv.row as isize, mvy_min, mvy_max) as i16, |
| 411 | } |
| 412 | }; |
| 413 | |
| 414 | // Sample the middle of all block edges bordering this one. |
| 415 | // Note: If motion vectors haven't been precomputed to a given blocksize, then |
| 416 | // the right and bottom edges will be duplicates of the center predictor when |
| 417 | // processing in raster order. |
| 418 | |
| 419 | // left |
| 420 | if tile_bo.0.x > 0 { |
| 421 | subset_b.push(process_cand( |
| 422 | tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1], |
| 423 | )); |
| 424 | } |
| 425 | // top |
| 426 | if tile_bo.0.y > 0 { |
| 427 | subset_b.push(process_cand( |
| 428 | tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w], |
| 429 | )); |
| 430 | } |
| 431 | |
| 432 | // Sampling far right and far bottom edges was tested, but had worse results |
| 433 | // without an extensive threshold test (with threshold being applied after |
| 434 | // checking median and the best of each subset). |
| 435 | |
| 436 | // right |
| 437 | if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner { |
| 438 | if tile_bo.0.x + w < tile_me_stats.cols() { |
| 439 | subset_b.push(process_cand( |
| 440 | tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w], |
| 441 | )); |
| 442 | } |
| 443 | } |
| 444 | // bottom |
| 445 | if let MVSamplingMode::CORNER { right: _, bottom: true } = corner { |
| 446 | if tile_bo.0.y + h < tile_me_stats.rows() { |
| 447 | subset_b.push(process_cand( |
| 448 | tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w], |
| 449 | )); |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | let median = if corner != MVSamplingMode::INIT { |
| 454 | // Sample the center of the current block. |
| 455 | Some(process_cand( |
| 456 | tile_me_stats[tile_bo.0.y + clipped_half_h] |
| 457 | [tile_bo.0.x + clipped_half_w], |
| 458 | )) |
| 459 | } else if subset_b.len() != 3 { |
| 460 | None |
| 461 | } else { |
| 462 | let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect(); |
| 463 | let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect(); |
| 464 | rows.as_mut_slice().sort_unstable(); |
| 465 | cols.as_mut_slice().sort_unstable(); |
| 466 | Some(MotionVector { row: rows[1], col: cols[1] }) |
| 467 | }; |
| 468 | |
| 469 | // Zero motion vector, don't use add_cand since it skips zero vectors. |
| 470 | subset_b.push(MotionVector::default()); |
| 471 | |
| 472 | // EPZS subset C predictors. |
| 473 | // Sample the middle of bordering side of the left, right, top and bottom |
| 474 | // blocks of the previous frame. |
| 475 | // Sample the middle of this block in the previous frame. |
| 476 | |
| 477 | if let Some(frame_me_stats) = frame_ref_opt { |
| 478 | let prev_frame = &frame_me_stats[ref_frame_id]; |
| 479 | |
| 480 | let frame_bo = PlaneBlockOffset(BlockOffset { |
| 481 | x: tile_me_stats.x() + tile_bo.0.x, |
| 482 | y: tile_me_stats.y() + tile_bo.0.y, |
| 483 | }); |
| 484 | let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x); |
| 485 | let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y); |
| 486 | |
| 487 | // left |
| 488 | if frame_bo.0.x > 0 { |
| 489 | subset_c.push(process_cand( |
| 490 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1], |
| 491 | )); |
| 492 | } |
| 493 | // top |
| 494 | if frame_bo.0.y > 0 { |
| 495 | subset_c.push(process_cand( |
| 496 | prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w], |
| 497 | )); |
| 498 | } |
| 499 | // right |
| 500 | if frame_bo.0.x + w < prev_frame.cols { |
| 501 | subset_c.push(process_cand( |
| 502 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w], |
| 503 | )); |
| 504 | } |
| 505 | // bottom |
| 506 | if frame_bo.0.y + h < prev_frame.rows { |
| 507 | subset_c.push(process_cand( |
| 508 | prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w], |
| 509 | )); |
| 510 | } |
| 511 | |
| 512 | subset_c.push(process_cand( |
| 513 | prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w], |
| 514 | )); |
| 515 | } |
| 516 | |
| 517 | // Undo normalization to 128x128 block size |
| 518 | let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64) |
| 519 | >> (MAX_SB_SIZE_LOG2 * 2)) as u32; |
| 520 | |
| 521 | let dec_mv = |mv: MotionVector| MotionVector { |
| 522 | col: mv.col >> ssdec, |
| 523 | row: mv.row >> ssdec, |
| 524 | }; |
| 525 | let median = median.map(dec_mv); |
| 526 | for mv in subset_b.iter_mut() { |
| 527 | *mv = dec_mv(*mv); |
| 528 | } |
| 529 | for mv in subset_c.iter_mut() { |
| 530 | *mv = dec_mv(*mv); |
| 531 | } |
| 532 | |
| 533 | MotionEstimationSubsets { min_sad, median, subset_b, subset_c } |
| 534 | } |
| 535 | |
| 536 | pub fn estimate_motion<T: Pixel>( |
| 537 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize, |
| 538 | tile_bo: TileBlockOffset, ref_frame: RefType, |
| 539 | pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode, |
| 540 | extensive_search: bool, ssdec: u8, lambda: Option<u32>, |
| 541 | ) -> Option<MotionSearchResult> { |
| 542 | if let Some(ref rec) = |
| 543 | fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] |
| 544 | { |
| 545 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
| 546 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
| 547 | get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec); |
| 548 | |
| 549 | let lambda = lambda.unwrap_or({ |
| 550 | // 0.5 is a fudge factor |
| 551 | (fi.me_lambda * 256.0 * 0.5) as u32 |
| 552 | }); |
| 553 | |
| 554 | let global_mv = [MotionVector { row: 0, col: 0 }; 2]; |
| 555 | |
| 556 | let po = frame_bo.to_luma_plane_offset(); |
| 557 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
| 558 | (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec); |
| 559 | let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec }; |
| 560 | let p_ref = match ssdec { |
| 561 | 0 => &rec.frame.planes[0], |
| 562 | 1 => &rec.input_hres, |
| 563 | 2 => &rec.input_qres, |
| 564 | _ => unimplemented!(), |
| 565 | }; |
| 566 | |
| 567 | let org_region = &match ssdec { |
| 568 | 0 => ts.input_tile.planes[0] |
| 569 | .subregion(Area::BlockStartingAt { bo: tile_bo.0 }), |
| 570 | 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), |
| 571 | 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), |
| 572 | _ => unimplemented!(), |
| 573 | }; |
| 574 | |
| 575 | let mut best: MotionSearchResult = full_pixel_me( |
| 576 | fi, |
| 577 | ts, |
| 578 | org_region, |
| 579 | p_ref, |
| 580 | tile_bo, |
| 581 | po, |
| 582 | lambda, |
| 583 | pmv.unwrap_or(global_mv), |
| 584 | w, |
| 585 | h, |
| 586 | mvx_min, |
| 587 | mvx_max, |
| 588 | mvy_min, |
| 589 | mvy_max, |
| 590 | ref_frame, |
| 591 | corner, |
| 592 | extensive_search, |
| 593 | ssdec, |
| 594 | ); |
| 595 | |
| 596 | if let Some(pmv) = pmv { |
| 597 | let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel; |
| 598 | if use_satd { |
| 599 | best.rd = get_fullpel_mv_rd( |
| 600 | fi, |
| 601 | po, |
| 602 | org_region, |
| 603 | p_ref, |
| 604 | fi.sequence.bit_depth, |
| 605 | pmv, |
| 606 | lambda, |
| 607 | use_satd, |
| 608 | mvx_min, |
| 609 | mvx_max, |
| 610 | mvy_min, |
| 611 | mvy_max, |
| 612 | w, |
| 613 | h, |
| 614 | best.mv, |
| 615 | ); |
| 616 | } |
| 617 | |
| 618 | sub_pixel_me( |
| 619 | fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min, |
| 620 | mvy_max, w, h, use_satd, &mut best, ref_frame, |
| 621 | ); |
| 622 | } |
| 623 | |
| 624 | // Scale motion vectors to full res size |
| 625 | best.mv = best.mv << ssdec; |
| 626 | |
| 627 | Some(best) |
| 628 | } else { |
| 629 | None |
| 630 | } |
| 631 | } |
| 632 | |
| 633 | /// Refine motion estimation that was computed one level of subsampling up. |
| 634 | fn refine_subsampled_motion_estimate<T: Pixel>( |
| 635 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize, |
| 636 | tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32, |
| 637 | ) -> Option<MotionSearchResult> { |
| 638 | if let Some(ref rec) = |
| 639 | fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] |
| 640 | { |
| 641 | let frame_bo = ts.to_frame_block_offset(tile_bo); |
| 642 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
| 643 | get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec); |
| 644 | |
| 645 | let pmv = [MotionVector { row: 0, col: 0 }; 2]; |
| 646 | |
| 647 | let po = frame_bo.to_luma_plane_offset(); |
| 648 | let (mvx_min, mvx_max, mvy_min, mvy_max) = |
| 649 | (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec); |
| 650 | let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec }; |
| 651 | let p_ref = match ssdec { |
| 652 | 0 => &rec.frame.planes[0], |
| 653 | 1 => &rec.input_hres, |
| 654 | 2 => &rec.input_qres, |
| 655 | _ => unimplemented!(), |
| 656 | }; |
| 657 | |
| 658 | let org_region = &match ssdec { |
| 659 | 0 => ts.input_tile.planes[0] |
| 660 | .subregion(Area::BlockStartingAt { bo: tile_bo.0 }), |
| 661 | 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), |
| 662 | 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), |
| 663 | _ => unimplemented!(), |
| 664 | }; |
| 665 | |
| 666 | let mv = |
| 667 | ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec; |
| 668 | |
| 669 | // Given a motion vector at 0 at higher subsampling: |
| 670 | // | -1 | 0 | 1 | |
| 671 | // then the vectors at -1 to 2 should be tested at the current subsampling. |
| 672 | // |-------------| |
| 673 | // | -2 -1 | 0 1 | 2 3 | |
| 674 | // This corresponds to a 4x4 full search. |
| 675 | let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8); |
| 676 | let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8); |
| 677 | let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8); |
| 678 | let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8); |
| 679 | let mut results = full_search( |
| 680 | fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv, |
| 681 | ); |
| 682 | |
| 683 | // Scale motion vectors to full res size |
| 684 | results.mv = results.mv << ssdec; |
| 685 | |
| 686 | Some(results) |
| 687 | } else { |
| 688 | None |
| 689 | } |
| 690 | } |
| 691 | |
| 692 | #[profiling::function ] |
| 693 | fn full_pixel_me<T: Pixel>( |
| 694 | fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, |
| 695 | org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset, |
| 696 | po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize, |
| 697 | mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, |
| 698 | ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool, |
| 699 | ssdec: u8, |
| 700 | ) -> MotionSearchResult { |
| 701 | let ref_frame_id = ref_frame.to_index(); |
| 702 | let tile_me_stats = &ts.me_stats[ref_frame_id].as_const(); |
| 703 | let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize] |
| 704 | .as_ref() |
| 705 | .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock" )); |
| 706 | let subsets = get_subset_predictors( |
| 707 | tile_bo, |
| 708 | tile_me_stats, |
| 709 | frame_ref, |
| 710 | ref_frame_id, |
| 711 | w, |
| 712 | h, |
| 713 | mvx_min, |
| 714 | mvx_max, |
| 715 | mvy_min, |
| 716 | mvy_max, |
| 717 | corner, |
| 718 | ssdec, |
| 719 | ); |
| 720 | |
| 721 | let try_cands = |predictors: &[MotionVector], |
| 722 | best: &mut MotionSearchResult| { |
| 723 | let mut results = get_best_predictor( |
| 724 | fi, |
| 725 | po, |
| 726 | org_region, |
| 727 | p_ref, |
| 728 | predictors, |
| 729 | fi.sequence.bit_depth, |
| 730 | pmv, |
| 731 | lambda, |
| 732 | mvx_min, |
| 733 | mvx_max, |
| 734 | mvy_min, |
| 735 | mvy_max, |
| 736 | w, |
| 737 | h, |
| 738 | ); |
| 739 | fullpel_diamond_search( |
| 740 | fi, |
| 741 | po, |
| 742 | org_region, |
| 743 | p_ref, |
| 744 | &mut results, |
| 745 | fi.sequence.bit_depth, |
| 746 | pmv, |
| 747 | lambda, |
| 748 | mvx_min, |
| 749 | mvx_max, |
| 750 | mvy_min, |
| 751 | mvy_max, |
| 752 | w, |
| 753 | h, |
| 754 | ); |
| 755 | |
| 756 | if results.rd.cost < best.rd.cost { |
| 757 | *best = results; |
| 758 | } |
| 759 | }; |
| 760 | |
| 761 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
| 762 | if !extensive_search { |
| 763 | try_cands(&subsets.all_mvs(), &mut best); |
| 764 | best |
| 765 | } else { |
| 766 | // Perform a more thorough search before resorting to full search. |
| 767 | // Search the median, the best mvs of neighboring blocks, and motion vectors |
| 768 | // from the previous frame. Stop once a candidate with a sad less than a |
| 769 | // threshold is found. |
| 770 | |
| 771 | let thresh = (subsets.min_sad as f32 * 1.2) as u32 |
| 772 | + (((w * h) as u32) << (fi.sequence.bit_depth - 8)); |
| 773 | |
| 774 | if let Some(median) = subsets.median { |
| 775 | try_cands(&[median], &mut best); |
| 776 | |
| 777 | if best.rd.sad < thresh { |
| 778 | return best; |
| 779 | } |
| 780 | } |
| 781 | |
| 782 | try_cands(&subsets.subset_b, &mut best); |
| 783 | |
| 784 | if best.rd.sad < thresh { |
| 785 | return best; |
| 786 | } |
| 787 | |
| 788 | try_cands(&subsets.subset_c, &mut best); |
| 789 | |
| 790 | if best.rd.sad < thresh { |
| 791 | return best; |
| 792 | } |
| 793 | |
| 794 | // Preform UMH search, either as the last possible search when full search |
| 795 | // is disabled, or as the last search before resorting to full search. |
| 796 | uneven_multi_hex_search( |
| 797 | fi, |
| 798 | po, |
| 799 | org_region, |
| 800 | p_ref, |
| 801 | &mut best, |
| 802 | fi.sequence.bit_depth, |
| 803 | pmv, |
| 804 | lambda, |
| 805 | mvx_min, |
| 806 | mvx_max, |
| 807 | mvy_min, |
| 808 | mvy_max, |
| 809 | w, |
| 810 | h, |
| 811 | // Use 24, since it is the largest range that x264 uses. |
| 812 | 24, |
| 813 | ); |
| 814 | |
| 815 | if !fi.config.speed_settings.motion.me_allow_full_search |
| 816 | || best.rd.sad < thresh |
| 817 | { |
| 818 | return best; |
| 819 | } |
| 820 | |
| 821 | { |
| 822 | let range_x = (192 * fi.me_range_scale as isize) >> ssdec; |
| 823 | let range_y = (64 * fi.me_range_scale as isize) >> ssdec; |
| 824 | let x_lo = po.x + (-range_x).max(mvx_min / 8); |
| 825 | let x_hi = po.x + (range_x).min(mvx_max / 8); |
| 826 | let y_lo = po.y + (-range_y).max(mvy_min / 8); |
| 827 | let y_hi = po.y + (range_y).min(mvy_max / 8); |
| 828 | |
| 829 | let results = full_search( |
| 830 | fi, |
| 831 | x_lo, |
| 832 | x_hi, |
| 833 | y_lo, |
| 834 | y_hi, |
| 835 | w, |
| 836 | h, |
| 837 | org_region, |
| 838 | p_ref, |
| 839 | po, |
| 840 | // Full search is run at quarter resolution, except for short edges. |
| 841 | // When subsampling is lower than usual, the step size is raised so that |
| 842 | // the number of search locations stays the same. |
| 843 | 4 >> ssdec, |
| 844 | lambda, |
| 845 | [MotionVector::default(); 2], |
| 846 | ); |
| 847 | |
| 848 | if results.rd.cost < best.rd.cost { |
| 849 | results |
| 850 | } else { |
| 851 | best |
| 852 | } |
| 853 | } |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | fn sub_pixel_me<T: Pixel>( |
| 858 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 859 | p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize, |
| 860 | mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize, |
| 861 | use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType, |
| 862 | ) { |
| 863 | subpel_diamond_search( |
| 864 | fi, |
| 865 | po, |
| 866 | org_region, |
| 867 | p_ref, |
| 868 | fi.sequence.bit_depth, |
| 869 | pmv, |
| 870 | lambda, |
| 871 | mvx_min, |
| 872 | mvx_max, |
| 873 | mvy_min, |
| 874 | mvy_max, |
| 875 | w, |
| 876 | h, |
| 877 | use_satd, |
| 878 | current:best, |
| 879 | ref_frame, |
| 880 | ); |
| 881 | } |
| 882 | |
| 883 | #[profiling::function ] |
| 884 | fn get_best_predictor<T: Pixel>( |
| 885 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 886 | p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize, |
| 887 | pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, |
| 888 | mvy_min: isize, mvy_max: isize, w: usize, h: usize, |
| 889 | ) -> MotionSearchResult { |
| 890 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
| 891 | |
| 892 | for &init_mv: MotionVector in predictors.iter() { |
| 893 | let rd: MVCandidateRD = get_fullpel_mv_rd( |
| 894 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, use_satd:false, mvx_min, |
| 895 | mvx_max, mvy_min, mvy_max, w, h, cand_mv:init_mv, |
| 896 | ); |
| 897 | |
| 898 | if rd.cost < best.rd.cost { |
| 899 | best.mv = init_mv; |
| 900 | best.rd = rd; |
| 901 | } |
| 902 | } |
| 903 | |
| 904 | best |
| 905 | } |
| 906 | |
| 907 | /// Declares an array of motion vectors in structure of arrays syntax. |
| 908 | /// Compared to [`search_pattern_subpel`], this version creates motion vectors |
| 909 | /// in fullpel resolution (x8). |
| 910 | macro_rules! search_pattern { |
| 911 | ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => { |
| 912 | [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*] |
| 913 | }; |
| 914 | } |
| 915 | |
| 916 | /// Declares an array of motion vectors in structure of arrays syntax. |
| 917 | macro_rules! search_pattern_subpel { |
| 918 | ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => { |
| 919 | [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*] |
| 920 | }; |
| 921 | } |
| 922 | |
| 923 | /// Diamond pattern of radius 1 as shown below. For fullpel search, use |
| 924 | /// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search. |
| 925 | /// ```text |
| 926 | /// X |
| 927 | /// XoX |
| 928 | /// X |
| 929 | /// ``` |
| 930 | /// 'X's are motion candidates and the 'o' is the center. |
| 931 | /// |
| 932 | const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!( |
| 933 | col: [ 0, 1, 0, -1], |
| 934 | row: [ 1, 0, -1, 0] |
| 935 | ); |
| 936 | /// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the |
| 937 | /// vectors have been shifted fullpel scale. |
| 938 | /// ```text |
| 939 | /// X |
| 940 | /// XoX |
| 941 | /// X |
| 942 | /// ``` |
| 943 | /// 'X's are motion candidates and the 'o' is the center. |
| 944 | const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!( |
| 945 | col: [ 0, 1, 0, -1], |
| 946 | row: [ 1, 0, -1, 0] |
| 947 | ); |
| 948 | |
| 949 | /// Run a full pixel diamond search. The search is run on multiple step sizes. |
| 950 | /// |
| 951 | /// For each step size, candidate motion vectors are examined for improvement |
| 952 | /// to the current search location. The search location is moved to the best |
| 953 | /// candidate (if any). This is repeated until the search location stops moving. |
| 954 | #[profiling::function ] |
| 955 | fn fullpel_diamond_search<T: Pixel>( |
| 956 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 957 | p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize, |
| 958 | pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, |
| 959 | mvy_min: isize, mvy_max: isize, w: usize, h: usize, |
| 960 | ) { |
| 961 | // Define the initial and the final scale (log2) of the diamond. |
| 962 | let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8); |
| 963 | |
| 964 | loop { |
| 965 | // Find the best candidate from the diamond pattern. |
| 966 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
| 967 | for &offset in &DIAMOND_R1_PATTERN { |
| 968 | let cand_mv = current.mv + (offset << diamond_radius_log2); |
| 969 | let rd = get_fullpel_mv_rd( |
| 970 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 971 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 972 | ); |
| 973 | |
| 974 | if rd.cost < best_cand.rd.cost { |
| 975 | best_cand.mv = cand_mv; |
| 976 | best_cand.rd = rd; |
| 977 | } |
| 978 | } |
| 979 | |
| 980 | // Continue the search at this scale until the can't find a better candidate |
| 981 | // to use. |
| 982 | if current.rd.cost <= best_cand.rd.cost { |
| 983 | if diamond_radius_log2 == diamond_radius_end_log2 { |
| 984 | break; |
| 985 | } else { |
| 986 | diamond_radius_log2 -= 1; |
| 987 | } |
| 988 | } else { |
| 989 | *current = best_cand; |
| 990 | } |
| 991 | } |
| 992 | |
| 993 | assert!(!current.is_empty()); |
| 994 | } |
| 995 | |
| 996 | /// A hexagon pattern around a center point. The pattern is ordered so that the |
| 997 | /// offsets circle around the center. This is done to allow pruning locations |
| 998 | /// covered by the last iteration. |
| 999 | /// ```text |
| 1000 | /// 21012 |
| 1001 | /// 2 X X |
| 1002 | /// 1 |
| 1003 | /// 0 X o X |
| 1004 | /// 1 |
| 1005 | /// 2 X X |
| 1006 | /// ``` |
| 1007 | /// 'X's are motion candidates and the 'o' is the center. |
| 1008 | /// |
| 1009 | /// The illustration below shows the process of a hexagon search. |
| 1010 | /// ```text |
| 1011 | /// Step 1 Step 2 |
| 1012 | /// 1 1 1 1 2 |
| 1013 | /// |
| 1014 | /// 1(0)1 => 1 0(1)2 |
| 1015 | /// |
| 1016 | /// 1 1 1 1 2 |
| 1017 | /// ``` |
| 1018 | /// The search above has gone through the following steps. |
| 1019 | /// 1. Search '1' elements for better candidates than the center '0'. |
| 1020 | /// 2. Recenter around the best candidate ('(1)') and hexagon candidates that |
| 1021 | /// don't overlap with the previous search step (labeled '2'). |
| 1022 | const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!( |
| 1023 | col: [ 0, 2, 2, 0, -2, -2], |
| 1024 | row: [ -2, -1, 1, 2, 1, -1] |
| 1025 | ); |
| 1026 | |
| 1027 | /// A small square pattern around a center point. |
| 1028 | /// ```text |
| 1029 | /// 101 |
| 1030 | /// 1 XXX |
| 1031 | /// 0 XoX |
| 1032 | /// 1 XXX |
| 1033 | /// ``` |
| 1034 | /// 'X's are motion candidates and the 'o' is the center. |
| 1035 | const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!( |
| 1036 | col: [ -1, 0, 1, -1, 1, -1, 0, 1], |
| 1037 | row: [ 1, 1, 1, 0, 0, -1, -1, -1] |
| 1038 | ); |
| 1039 | |
| 1040 | /// Perform hexagon search and refine afterwards. |
| 1041 | /// |
| 1042 | /// In the hexagon search stage, candidate motion vectors are examined for |
| 1043 | /// improvement to the current search location. The search location is moved to |
| 1044 | /// the best candidate (if any). This is repeated until the search location |
| 1045 | /// stops moving. |
| 1046 | /// |
| 1047 | /// Refinement uses a square pattern that fits between the hexagon candidates. |
| 1048 | /// |
| 1049 | /// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement |
| 1050 | /// is defined by [`SQUARE_REFINE_PATTERN`]. |
| 1051 | /// |
| 1052 | /// `current` provides the initial search location and serves as |
| 1053 | /// the output for the final search results. |
| 1054 | #[profiling::function ] |
| 1055 | fn hexagon_search<T: Pixel>( |
| 1056 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 1057 | p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize, |
| 1058 | pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, |
| 1059 | mvy_min: isize, mvy_max: isize, w: usize, h: usize, |
| 1060 | ) { |
| 1061 | // The first iteration of hexagon search is implemented separate from |
| 1062 | // subsequent iterations, which overlap with previous iterations. |
| 1063 | |
| 1064 | // Holds what candidate is used (if any). This is used to determine which |
| 1065 | // candidates have already been tested in a previous iteration and can be |
| 1066 | // skipped. |
| 1067 | let mut best_cand_idx: usize = 0; |
| 1068 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
| 1069 | |
| 1070 | // First iteration of hexagon search. There are six candidates to consider. |
| 1071 | for i in 0..6 { |
| 1072 | let cand_mv = current.mv + HEXAGON_PATTERN[i]; |
| 1073 | let rd = get_fullpel_mv_rd( |
| 1074 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1075 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1076 | ); |
| 1077 | |
| 1078 | if rd.cost < best_cand.rd.cost { |
| 1079 | best_cand_idx = i; |
| 1080 | best_cand.mv = cand_mv; |
| 1081 | best_cand.rd = rd; |
| 1082 | } |
| 1083 | } |
| 1084 | |
| 1085 | // Run additional iterations of hexagon search until the search location |
| 1086 | // doesn't update. |
| 1087 | while best_cand.rd.cost < current.rd.cost { |
| 1088 | // Update the search location. |
| 1089 | *current = best_cand; |
| 1090 | best_cand = MotionSearchResult::empty(); |
| 1091 | |
| 1092 | // Save the index/direction taken in the previous iteration to the current |
| 1093 | // search location. |
| 1094 | let center_cand_idx = best_cand_idx; |
| 1095 | |
| 1096 | // Look only at candidates that don't overlap with previous iterations. This |
| 1097 | // corresponds with the three offsets (2D) with the closest direction to |
| 1098 | // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise |
| 1099 | // order, so the last direction -1, +0, and +1 (mod 6) give the indices for |
| 1100 | // these offsets. |
| 1101 | for idx_offset_mod6 in 5..=7 { |
| 1102 | let i = (center_cand_idx + idx_offset_mod6) % 6; |
| 1103 | let cand_mv = current.mv + HEXAGON_PATTERN[i]; |
| 1104 | |
| 1105 | let rd = get_fullpel_mv_rd( |
| 1106 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1107 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1108 | ); |
| 1109 | |
| 1110 | if rd.cost < best_cand.rd.cost { |
| 1111 | best_cand_idx = i; |
| 1112 | best_cand.mv = cand_mv; |
| 1113 | best_cand.rd = rd; |
| 1114 | } |
| 1115 | } |
| 1116 | } |
| 1117 | |
| 1118 | // Refine the motion after completing hexagon search. |
| 1119 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
| 1120 | for &offset in &SQUARE_REFINE_PATTERN { |
| 1121 | let cand_mv = current.mv + offset; |
| 1122 | let rd = get_fullpel_mv_rd( |
| 1123 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1124 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1125 | ); |
| 1126 | |
| 1127 | if rd.cost < best_cand.rd.cost { |
| 1128 | best_cand.mv = cand_mv; |
| 1129 | best_cand.rd = rd; |
| 1130 | } |
| 1131 | } |
| 1132 | if best_cand.rd.cost < current.rd.cost { |
| 1133 | *current = best_cand; |
| 1134 | } |
| 1135 | |
| 1136 | assert!(!current.is_empty()); |
| 1137 | } |
| 1138 | |
| 1139 | /// Uneven multi-hexagon search pattern around a center point. Used for locating |
| 1140 | /// irregular movement. |
| 1141 | /// ```text |
| 1142 | /// X |
| 1143 | /// X X |
| 1144 | /// X X |
| 1145 | /// X X |
| 1146 | /// X o X |
| 1147 | /// X X |
| 1148 | /// X X |
| 1149 | /// X X |
| 1150 | /// X |
| 1151 | /// ``` |
| 1152 | /// 'X's are motion candidates and the 'o' is the center. |
| 1153 | const UMH_PATTERN: [MotionVector; 16] = search_pattern!( |
| 1154 | col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3], |
| 1155 | row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2] |
| 1156 | ); |
| 1157 | |
| 1158 | /// Perform an uneven multi-hexagon search. There are 4 stages: |
| 1159 | /// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions |
| 1160 | /// for the general direction of the motion. |
| 1161 | /// 2. A 5x5 full search is done to refine the current candidate. |
| 1162 | /// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`]. |
| 1163 | /// 4. Refinement using standard hexagon search. |
| 1164 | /// |
| 1165 | /// `current` provides the initial search location and serves as |
| 1166 | /// the output for the final search results. |
| 1167 | /// |
| 1168 | /// `me_range` parameter determines how far these stages can search. |
| 1169 | #[profiling::function ] |
| 1170 | fn uneven_multi_hex_search<T: Pixel>( |
| 1171 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 1172 | p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize, |
| 1173 | pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, |
| 1174 | mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16, |
| 1175 | ) { |
| 1176 | assert!(!current.is_empty()); |
| 1177 | |
| 1178 | // Search in a cross pattern to obtain a rough approximate of motion. |
| 1179 | // The cross is split into a horizontal and vertical component. Video content |
| 1180 | // tends to have more horizontal motion, so the horizontal part of the cross |
| 1181 | // is twice as large as the vertical half. |
| 1182 | // X - |
| 1183 | // | <- me_range/2 |
| 1184 | // X | |
| 1185 | // X X X XoX X X X - |
| 1186 | // X |
| 1187 | // |
| 1188 | // X |
| 1189 | // |------| |
| 1190 | // \ |
| 1191 | // me_range |
| 1192 | let center = current.mv; |
| 1193 | |
| 1194 | // The larger, horizontal, part of the cross search. |
| 1195 | for i in (1..=me_range).step_by(2) { |
| 1196 | const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!( |
| 1197 | col: [ 0, 0], |
| 1198 | row: [-1, 1] |
| 1199 | ); |
| 1200 | |
| 1201 | for &offset in &HORIZONTAL_LINE { |
| 1202 | let cand_mv = center + offset * i; |
| 1203 | let rd = get_fullpel_mv_rd( |
| 1204 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1205 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1206 | ); |
| 1207 | |
| 1208 | if rd.cost < current.rd.cost { |
| 1209 | current.mv = cand_mv; |
| 1210 | current.rd = rd; |
| 1211 | } |
| 1212 | } |
| 1213 | } |
| 1214 | |
| 1215 | // The smaller, vertical, part of the cross search |
| 1216 | for i in (1..=me_range >> 1).step_by(2) { |
| 1217 | const VERTICAL_LINE: [MotionVector; 2] = search_pattern!( |
| 1218 | col: [-1, 1], |
| 1219 | row: [ 0, 0] |
| 1220 | ); |
| 1221 | |
| 1222 | for &offset in &VERTICAL_LINE { |
| 1223 | let cand_mv = center + offset * i; |
| 1224 | let rd = get_fullpel_mv_rd( |
| 1225 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1226 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1227 | ); |
| 1228 | |
| 1229 | if rd.cost < current.rd.cost { |
| 1230 | current.mv = cand_mv; |
| 1231 | current.rd = rd; |
| 1232 | } |
| 1233 | } |
| 1234 | } |
| 1235 | |
| 1236 | // 5x5 full search. Search a 5x5 square region around the current best mv. |
| 1237 | let center = current.mv; |
| 1238 | for row in -2..=2 { |
| 1239 | for col in -2..=2 { |
| 1240 | if row == 0 && col == 0 { |
| 1241 | continue; |
| 1242 | } |
| 1243 | let cand_mv = center + MotionVector { row, col }; |
| 1244 | let rd = get_fullpel_mv_rd( |
| 1245 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1246 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1247 | ); |
| 1248 | |
| 1249 | if rd.cost < current.rd.cost { |
| 1250 | current.mv = cand_mv; |
| 1251 | current.rd = rd; |
| 1252 | } |
| 1253 | } |
| 1254 | } |
| 1255 | |
| 1256 | // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested |
| 1257 | // around the best vector at multiple scales. |
| 1258 | // Example of the UMH pattern run on a scale of 1 and 2: |
| 1259 | // 2 - |
| 1260 | // | <- me_range |
| 1261 | // 2 2 | |
| 1262 | // | |
| 1263 | // 2 1 2 | |
| 1264 | // 1 1 | |
| 1265 | // 2 1 1 2 | |
| 1266 | // 1 1 | |
| 1267 | // 2 1 o 1 2 | |
| 1268 | // 1 1 | |
| 1269 | // 2 1 1 2 | |
| 1270 | // 1 1 | |
| 1271 | // 2 1 2 | |
| 1272 | // | |
| 1273 | // 2 2 | |
| 1274 | // | |
| 1275 | // 2 - |
| 1276 | // |---------------| |
| 1277 | // \ |
| 1278 | // me_range |
| 1279 | let center = current.mv; |
| 1280 | |
| 1281 | // Divide by 4, the radius of the UMH's hexagon. |
| 1282 | let iterations = me_range >> 2; |
| 1283 | for i in 1..=iterations { |
| 1284 | for &offset in &UMH_PATTERN { |
| 1285 | let cand_mv = center + offset * i; |
| 1286 | let rd = get_fullpel_mv_rd( |
| 1287 | fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, |
| 1288 | mvx_max, mvy_min, mvy_max, w, h, cand_mv, |
| 1289 | ); |
| 1290 | |
| 1291 | if rd.cost < current.rd.cost { |
| 1292 | current.mv = cand_mv; |
| 1293 | current.rd = rd; |
| 1294 | } |
| 1295 | } |
| 1296 | } |
| 1297 | |
| 1298 | // Refine the search results using a 'normal' hexagon search. |
| 1299 | hexagon_search( |
| 1300 | fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min, |
| 1301 | mvx_max, mvy_min, mvy_max, w, h, |
| 1302 | ); |
| 1303 | } |
| 1304 | |
| 1305 | /// Run a subpixel diamond search. The search is run on multiple step sizes. |
| 1306 | /// |
| 1307 | /// For each step size, candidate motion vectors are examined for improvement |
| 1308 | /// to the current search location. The search location is moved to the best |
| 1309 | /// candidate (if any). This is repeated until the search location stops moving. |
| 1310 | #[profiling::function ] |
| 1311 | fn subpel_diamond_search<T: Pixel>( |
| 1312 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 1313 | _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, |
| 1314 | mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, |
| 1315 | h: usize, use_satd: bool, current: &mut MotionSearchResult, |
| 1316 | ref_frame: RefType, |
| 1317 | ) { |
| 1318 | use crate::util::Aligned; |
| 1319 | |
| 1320 | // Motion compensation assembly has special requirements for edges |
| 1321 | let mc_w = w.next_power_of_two(); |
| 1322 | let mc_h = (h + 1) & !1; |
| 1323 | |
| 1324 | // Metadata for subpel scratch pad. |
| 1325 | let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>()); |
| 1326 | // Stack allocation for subpel scratch pad. |
| 1327 | // SAFETY: We write to the array below before reading from it. |
| 1328 | let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() }; |
| 1329 | let mut tmp_region = PlaneRegionMut::from_slice( |
| 1330 | &mut buf.data, |
| 1331 | &cfg, |
| 1332 | Rect { x: 0, y: 0, width: cfg.width, height: cfg.height }, |
| 1333 | ); |
| 1334 | |
| 1335 | // start at 1/2 pel and end at 1/4 or 1/8 pel |
| 1336 | let (mut diamond_radius_log2, diamond_radius_end_log2) = |
| 1337 | (2u8, u8::from(!fi.allow_high_precision_mv)); |
| 1338 | |
| 1339 | loop { |
| 1340 | // Find the best candidate from the diamond pattern. |
| 1341 | let mut best_cand: MotionSearchResult = MotionSearchResult::empty(); |
| 1342 | for &offset in &DIAMOND_R1_PATTERN_SUBPEL { |
| 1343 | let cand_mv = current.mv + (offset << diamond_radius_log2); |
| 1344 | |
| 1345 | let rd = get_subpel_mv_rd( |
| 1346 | fi, |
| 1347 | po, |
| 1348 | org_region, |
| 1349 | bit_depth, |
| 1350 | pmv, |
| 1351 | lambda, |
| 1352 | use_satd, |
| 1353 | mvx_min, |
| 1354 | mvx_max, |
| 1355 | mvy_min, |
| 1356 | mvy_max, |
| 1357 | w, |
| 1358 | h, |
| 1359 | cand_mv, |
| 1360 | &mut tmp_region, |
| 1361 | ref_frame, |
| 1362 | ); |
| 1363 | |
| 1364 | if rd.cost < best_cand.rd.cost { |
| 1365 | best_cand.mv = cand_mv; |
| 1366 | best_cand.rd = rd; |
| 1367 | } |
| 1368 | } |
| 1369 | |
| 1370 | // Continue the search at this scale until a better candidate isn't found. |
| 1371 | if current.rd.cost <= best_cand.rd.cost { |
| 1372 | if diamond_radius_log2 == diamond_radius_end_log2 { |
| 1373 | break; |
| 1374 | } else { |
| 1375 | diamond_radius_log2 -= 1; |
| 1376 | } |
| 1377 | } else { |
| 1378 | *current = best_cand; |
| 1379 | } |
| 1380 | } |
| 1381 | |
| 1382 | assert!(!current.is_empty()); |
| 1383 | } |
| 1384 | |
| 1385 | #[inline ] |
| 1386 | fn get_fullpel_mv_rd<T: Pixel>( |
| 1387 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 1388 | p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, |
| 1389 | use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize, |
| 1390 | mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector, |
| 1391 | ) -> MVCandidateRD { |
| 1392 | if (cand_mv.col as isize) < mvx_min |
| 1393 | || (cand_mv.col as isize) > mvx_max |
| 1394 | || (cand_mv.row as isize) < mvy_min |
| 1395 | || (cand_mv.row as isize) > mvy_max |
| 1396 | { |
| 1397 | return MVCandidateRD::empty(); |
| 1398 | } |
| 1399 | |
| 1400 | // Convert the motion vector into an full pixel offset. |
| 1401 | let plane_ref: PlaneRegion<'_, T> = p_ref.region(Area::StartingAt { |
| 1402 | x: po.x + (cand_mv.col / 8) as isize, |
| 1403 | y: po.y + (cand_mv.row / 8) as isize, |
| 1404 | }); |
| 1405 | compute_mv_rd( |
| 1406 | fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, plane_org:org_region, |
| 1407 | &plane_ref, |
| 1408 | ) |
| 1409 | } |
| 1410 | |
| 1411 | fn get_subpel_mv_rd<T: Pixel>( |
| 1412 | fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>, |
| 1413 | bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool, |
| 1414 | mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, |
| 1415 | h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>, |
| 1416 | ref_frame: RefType, |
| 1417 | ) -> MVCandidateRD { |
| 1418 | if (cand_mv.col as isize) < mvx_min |
| 1419 | || (cand_mv.col as isize) > mvx_max |
| 1420 | || (cand_mv.row as isize) < mvy_min |
| 1421 | || (cand_mv.row as isize) > mvy_max |
| 1422 | { |
| 1423 | return MVCandidateRD::empty(); |
| 1424 | } |
| 1425 | |
| 1426 | let tmp_width = tmp_region.rect().width; |
| 1427 | let tmp_height = tmp_region.rect().height; |
| 1428 | let tile_rect = |
| 1429 | TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height }; |
| 1430 | |
| 1431 | PredictionMode::NEWMV.predict_inter_single( |
| 1432 | fi, tile_rect, 0, po, tmp_region, |
| 1433 | // motion comp's w & h on edges can be different than distortion's |
| 1434 | tmp_width, tmp_height, ref_frame, cand_mv, |
| 1435 | ); |
| 1436 | let plane_ref = tmp_region.as_const(); |
| 1437 | compute_mv_rd( |
| 1438 | fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region, |
| 1439 | &plane_ref, |
| 1440 | ) |
| 1441 | } |
| 1442 | |
| 1443 | /// Compute the rate distortion stats for a motion vector. |
| 1444 | #[inline (always)] |
| 1445 | fn compute_mv_rd<T: Pixel>( |
| 1446 | fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32, |
| 1447 | use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector, |
| 1448 | plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, |
| 1449 | ) -> MVCandidateRD { |
| 1450 | let sad: u32 = if use_satd { |
| 1451 | get_satd(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level) |
| 1452 | } else { |
| 1453 | get_sad(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level) |
| 1454 | }; |
| 1455 | |
| 1456 | let rate1: u32 = get_mv_rate(a:cand_mv, b:pmv[0], fi.allow_high_precision_mv); |
| 1457 | let rate2: u32 = get_mv_rate(a:cand_mv, b:pmv[1], fi.allow_high_precision_mv); |
| 1458 | let rate: u32 = rate1.min(rate2 + 1); |
| 1459 | |
| 1460 | MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad } |
| 1461 | } |
| 1462 | |
| 1463 | #[profiling::function ] |
| 1464 | fn full_search<T: Pixel>( |
| 1465 | fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, |
| 1466 | w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>, |
| 1467 | po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2], |
| 1468 | ) -> MotionSearchResult { |
| 1469 | let search_region = p_ref.region(Area::Rect { |
| 1470 | x: x_lo, |
| 1471 | y: y_lo, |
| 1472 | width: (x_hi - x_lo) as usize + w, |
| 1473 | height: (y_hi - y_lo) as usize + h, |
| 1474 | }); |
| 1475 | |
| 1476 | let mut best: MotionSearchResult = MotionSearchResult::empty(); |
| 1477 | |
| 1478 | // Select rectangular regions within search region with vert+horz windows |
| 1479 | for vert_window in search_region.vert_windows(h).step_by(step) { |
| 1480 | for ref_window in vert_window.horz_windows(w).step_by(step) { |
| 1481 | let &Rect { x, y, .. } = ref_window.rect(); |
| 1482 | |
| 1483 | let mv = MotionVector { |
| 1484 | row: 8 * (y as i16 - po.y as i16), |
| 1485 | col: 8 * (x as i16 - po.x as i16), |
| 1486 | }; |
| 1487 | |
| 1488 | let rd = compute_mv_rd( |
| 1489 | fi, |
| 1490 | pmv, |
| 1491 | lambda, |
| 1492 | false, |
| 1493 | fi.sequence.bit_depth, |
| 1494 | w, |
| 1495 | h, |
| 1496 | mv, |
| 1497 | org_region, |
| 1498 | &ref_window, |
| 1499 | ); |
| 1500 | |
| 1501 | if rd.cost < best.rd.cost { |
| 1502 | best.rd = rd; |
| 1503 | best.mv = mv; |
| 1504 | } |
| 1505 | } |
| 1506 | } |
| 1507 | |
| 1508 | best |
| 1509 | } |
| 1510 | |
| 1511 | #[inline (always)] |
| 1512 | fn get_mv_rate( |
| 1513 | a: MotionVector, b: MotionVector, allow_high_precision_mv: bool, |
| 1514 | ) -> u32 { |
| 1515 | #[inline (always)] |
| 1516 | fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 { |
| 1517 | let d: i16 = if allow_high_precision_mv { diff } else { diff >> 1 }; |
| 1518 | 2 * ILog::ilog(self:d.abs()) as u32 |
| 1519 | } |
| 1520 | |
| 1521 | diff_to_rate(diff:a.row - b.row, allow_high_precision_mv) |
| 1522 | + diff_to_rate(diff:a.col - b.col, allow_high_precision_mv) |
| 1523 | } |
| 1524 | |