1// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::api::InterConfig;
11use crate::context::{
12 BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13 TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14 MI_SIZE_LOG2, SB_SIZE,
15};
16use crate::dist::*;
17use crate::frame::*;
18use crate::mc::MotionVector;
19use crate::partition::*;
20use crate::predict::PredictionMode;
21use crate::tiling::*;
22use crate::util::ILog;
23use crate::util::{clamp, Pixel};
24use crate::FrameInvariants;
25
26use arrayvec::*;
27use std::ops::{Index, IndexMut};
28use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30#[derive(Debug, Copy, Clone, Default)]
31pub struct MEStats {
32 pub mv: MotionVector,
33 /// sad value, on the scale of a 128x128 block
34 pub normalized_sad: u32,
35}
36
37#[derive(Debug, Clone)]
38pub struct FrameMEStats {
39 stats: Box<[MEStats]>,
40 pub cols: usize,
41 pub rows: usize,
42}
43
44/// cbindgen:ignore
45pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46/// cbindgen:ignore
47pub type ReadGuardMEStats<'a> =
48 RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49/// cbindgen:ignore
50pub type WriteGuardMEStats<'a> =
51 RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53impl FrameMEStats {
54 #[inline]
55 pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56 self.stats.chunks_exact(self.cols)
57 }
58
59 pub fn new(cols: usize, rows: usize) -> Self {
60 Self {
61 // dynamic allocation: once per frame
62 stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63 cols,
64 rows,
65 }
66 }
67 pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68 Arc::new(RwLock::new([
69 FrameMEStats::new(cols, rows),
70 FrameMEStats::new(cols, rows),
71 FrameMEStats::new(cols, rows),
72 FrameMEStats::new(cols, rows),
73 FrameMEStats::new(cols, rows),
74 FrameMEStats::new(cols, rows),
75 FrameMEStats::new(cols, rows),
76 FrameMEStats::new(cols, rows),
77 ]))
78 }
79}
80
81impl Index<usize> for FrameMEStats {
82 type Output = [MEStats];
83 #[inline]
84 fn index(&self, index: usize) -> &Self::Output {
85 &self.stats[index * self.cols..(index + 1) * self.cols]
86 }
87}
88
89impl IndexMut<usize> for FrameMEStats {
90 #[inline]
91 fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92 &mut self.stats[index * self.cols..(index + 1) * self.cols]
93 }
94}
95
96/// Result of motion search.
97#[derive(Debug, Copy, Clone)]
98pub struct MotionSearchResult {
99 /// Motion vector chosen by the motion search.
100 pub mv: MotionVector,
101 /// Rate distortion data associated with `mv`.
102 pub rd: MVCandidateRD,
103}
104
105impl MotionSearchResult {
106 /// Creates an 'empty' value.
107 ///
108 /// To be considered empty, cost is set higher than any naturally occurring
109 /// cost value. The idea is that comparing to any valid rd output, the search
110 /// result will always be replaced.
111 #[inline(always)]
112 pub fn empty() -> MotionSearchResult {
113 MotionSearchResult {
114 mv: MotionVector::default(),
115 rd: MVCandidateRD::empty(),
116 }
117 }
118
119 /// Check if the value should be considered to be empty.
120 #[inline(always)]
121 const fn is_empty(&self) -> bool {
122 self.rd.cost == u64::MAX
123 }
124}
125
126/// Holds data from computing rate distortion of a motion vector.
127#[derive(Debug, Copy, Clone)]
128pub struct MVCandidateRD {
129 /// Rate distortion cost of the motion vector.
130 pub cost: u64,
131 /// Distortion metric value for the motion vector.
132 pub sad: u32,
133}
134
135impl MVCandidateRD {
136 /// Creates an 'empty' value.
137 ///
138 /// To be considered empty, cost is set higher than any naturally occurring
139 /// cost value. The idea is that comparing to any valid rd output, the search
140 /// result will always be replaced.
141 #[inline(always)]
142 const fn empty() -> MVCandidateRD {
143 MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144 }
145}
146
147#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148pub enum MVSamplingMode {
149 INIT,
150 CORNER { right: bool, bottom: bool },
151}
152
153pub fn estimate_tile_motion<T: Pixel>(
154 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155 inter_cfg: &InterConfig,
156) {
157 let init_size = MIB_SIZE_LOG2;
158
159 let mut prev_ssdec: Option<u8> = None;
160 for mv_size_in_b_log2 in (2..=init_size).rev() {
161 let init = mv_size_in_b_log2 == init_size;
162
163 // Choose subsampling. Pass one is quarter res and pass two is at half res.
164 let ssdec = match init_size - mv_size_in_b_log2 {
165 0 => 2,
166 1 => 1,
167 _ => 0,
168 };
169
170 let new_subsampling =
171 if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
172 prev_ssdec = Some(ssdec);
173
174 // 0.5 and 0.125 are a fudge factors
175 let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
176 * if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
177
178 for sby in 0..ts.sb_height {
179 for sbx in 0..ts.sb_width {
180 let mut tested_frames_flags = 0;
181 for &ref_frame in inter_cfg.allowed_ref_frames() {
182 let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
183 if tested_frames_flags & frame_flag == frame_flag {
184 continue;
185 }
186 tested_frames_flags |= frame_flag;
187
188 let tile_bo =
189 TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190 .block_offset(0, 0);
191
192 if new_subsampling {
193 refine_subsampled_sb_motion(
194 fi,
195 ts,
196 ref_frame,
197 mv_size_in_b_log2 + 1,
198 tile_bo,
199 ssdec,
200 lambda,
201 );
202 }
203
204 estimate_sb_motion(
205 fi,
206 ts,
207 ref_frame,
208 mv_size_in_b_log2,
209 tile_bo,
210 init,
211 ssdec,
212 lambda,
213 );
214 }
215 }
216 }
217 }
218}
219
220fn estimate_sb_motion<T: Pixel>(
221 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222 mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223 lambda: u32,
224) {
225 let pix_offset = tile_bo.to_luma_plane_offset();
226 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229 let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231 // Process in blocks, cropping at edges.
232 for y in (0..sb_h).step_by(mv_size) {
233 for x in (0..sb_w).step_by(mv_size) {
234 let corner: MVSamplingMode = if init {
235 MVSamplingMode::INIT
236 } else {
237 // Processing the block a size up produces data that can be used by
238 // the right and bottom corners.
239 MVSamplingMode::CORNER {
240 right: x & mv_size == mv_size,
241 bottom: y & mv_size == mv_size,
242 }
243 };
244
245 let sub_bo = tile_bo
246 .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248 // Clamp to frame edge, rounding up in the case of subsampling.
249 // The rounding makes some assumptions about how subsampling is done.
250 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
251 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
252
253 // Run motion estimation.
254 // Note that the initial search (init) instructs the called function to
255 // perform a more extensive search.
256 if let Some(results) = estimate_motion(
257 fi,
258 ts,
259 w,
260 h,
261 sub_bo,
262 ref_frame,
263 None,
264 corner,
265 init,
266 ssdec,
267 Some(lambda),
268 ) {
269 // normalize sad to 128x128 block
270 let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
271 / (w * h) as u64) as u32;
272 save_me_stats(
273 ts,
274 mv_size_in_b_log2,
275 sub_bo,
276 ref_frame,
277 MEStats { mv: results.mv, normalized_sad: sad },
278 );
279 }
280 }
281 }
282}
283
284fn refine_subsampled_sb_motion<T: Pixel>(
285 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286 mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287) {
288 let pix_offset = tile_bo.to_luma_plane_offset();
289 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292 let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294 // Process in blocks, cropping at edges.
295 for y in (0..sb_h).step_by(mv_size) {
296 for x in (0..sb_w).step_by(mv_size) {
297 let sub_bo = tile_bo
298 .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300 // Clamp to frame edge, rounding up in the case of subsampling.
301 // The rounding makes some assumptions about how subsampling is done.
302 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
303 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
304
305 // Refine the existing motion estimate
306 if let Some(results) = refine_subsampled_motion_estimate(
307 fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308 ) {
309 // normalize sad to 128x128 block
310 let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
311 / (w * h) as u64) as u32;
312 save_me_stats(
313 ts,
314 mv_size_in_b_log2,
315 sub_bo,
316 ref_frame,
317 MEStats { mv: results.mv, normalized_sad: sad },
318 );
319 }
320 }
321 }
322}
323
324fn save_me_stats<T: Pixel>(
325 ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326 tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327) {
328 let size_in_b: usize = 1 << mv_size_in_b_log2;
329 let tile_me_stats: &mut TileMEStatsMut<'_> = &mut ts.me_stats[ref_frame.to_index()];
330 let tile_bo_x_end: usize = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331 let tile_bo_y_end: usize = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332 for mi_y: usize in tile_bo.0.y..tile_bo_y_end {
333 for a: &mut MEStats in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334 *a = stats;
335 }
336 }
337}
338
339fn get_mv_range(
340 w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341 blk_h: usize,
342) -> (isize, isize, isize, isize) {
343 let border_w: isize = 128 + blk_w as isize * 8;
344 let border_h: isize = 128 + blk_h as isize * 8;
345 let mvx_min: isize = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
346 let mvx_max: isize = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347 * (8 * MI_SIZE) as isize
348 + border_w;
349 let mvy_min: isize = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
350 let mvy_max: isize = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351 * (8 * MI_SIZE) as isize
352 + border_h;
353
354 // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics>
355 use crate::context::{MV_LOW, MV_UPP};
356 (
357 mvx_min.max(MV_LOW as isize + 1),
358 mvx_max.min(MV_UPP as isize - 1),
359 mvy_min.max(MV_LOW as isize + 1),
360 mvy_max.min(MV_UPP as isize - 1),
361 )
362}
363
364struct MotionEstimationSubsets {
365 min_sad: u32,
366 median: Option<MotionVector>,
367 subset_b: ArrayVec<MotionVector, 5>,
368 subset_c: ArrayVec<MotionVector, 5>,
369}
370
371impl MotionEstimationSubsets {
372 fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
373 let mut all: ArrayVec = ArrayVec::new();
374 if let Some(median: MotionVector) = self.median {
375 all.push(element:median);
376 }
377
378 all.extend(self.subset_b.iter().copied());
379 all.extend(self.subset_c.iter().copied());
380
381 all
382 }
383}
384
385#[profiling::function]
386fn get_subset_predictors(
387 tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388 frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389 pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390 mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391) -> MotionEstimationSubsets {
392 let mut min_sad: u32 = u32::MAX;
393 let mut subset_b = ArrayVec::<MotionVector, 5>::new();
394 let mut subset_c = ArrayVec::<MotionVector, 5>::new();
395
396 // rounded up width in blocks
397 let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
398 let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
399
400 // Get predictors from the same frame.
401
402 let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
403 let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
404
405 let mut process_cand = |stats: MEStats| -> MotionVector {
406 min_sad = min_sad.min(stats.normalized_sad);
407 let mv = stats.mv.quantize_to_fullpel();
408 MotionVector {
409 col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410 row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411 }
412 };
413
414 // Sample the middle of all block edges bordering this one.
415 // Note: If motion vectors haven't been precomputed to a given blocksize, then
416 // the right and bottom edges will be duplicates of the center predictor when
417 // processing in raster order.
418
419 // left
420 if tile_bo.0.x > 0 {
421 subset_b.push(process_cand(
422 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
423 ));
424 }
425 // top
426 if tile_bo.0.y > 0 {
427 subset_b.push(process_cand(
428 tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
429 ));
430 }
431
432 // Sampling far right and far bottom edges was tested, but had worse results
433 // without an extensive threshold test (with threshold being applied after
434 // checking median and the best of each subset).
435
436 // right
437 if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
438 if tile_bo.0.x + w < tile_me_stats.cols() {
439 subset_b.push(process_cand(
440 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441 ));
442 }
443 }
444 // bottom
445 if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
446 if tile_bo.0.y + h < tile_me_stats.rows() {
447 subset_b.push(process_cand(
448 tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449 ));
450 }
451 }
452
453 let median = if corner != MVSamplingMode::INIT {
454 // Sample the center of the current block.
455 Some(process_cand(
456 tile_me_stats[tile_bo.0.y + clipped_half_h]
457 [tile_bo.0.x + clipped_half_w],
458 ))
459 } else if subset_b.len() != 3 {
460 None
461 } else {
462 let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
463 let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
464 rows.as_mut_slice().sort_unstable();
465 cols.as_mut_slice().sort_unstable();
466 Some(MotionVector { row: rows[1], col: cols[1] })
467 };
468
469 // Zero motion vector, don't use add_cand since it skips zero vectors.
470 subset_b.push(MotionVector::default());
471
472 // EPZS subset C predictors.
473 // Sample the middle of bordering side of the left, right, top and bottom
474 // blocks of the previous frame.
475 // Sample the middle of this block in the previous frame.
476
477 if let Some(frame_me_stats) = frame_ref_opt {
478 let prev_frame = &frame_me_stats[ref_frame_id];
479
480 let frame_bo = PlaneBlockOffset(BlockOffset {
481 x: tile_me_stats.x() + tile_bo.0.x,
482 y: tile_me_stats.y() + tile_bo.0.y,
483 });
484 let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
485 let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
486
487 // left
488 if frame_bo.0.x > 0 {
489 subset_c.push(process_cand(
490 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
491 ));
492 }
493 // top
494 if frame_bo.0.y > 0 {
495 subset_c.push(process_cand(
496 prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
497 ));
498 }
499 // right
500 if frame_bo.0.x + w < prev_frame.cols {
501 subset_c.push(process_cand(
502 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503 ));
504 }
505 // bottom
506 if frame_bo.0.y + h < prev_frame.rows {
507 subset_c.push(process_cand(
508 prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509 ));
510 }
511
512 subset_c.push(process_cand(
513 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514 ));
515 }
516
517 // Undo normalization to 128x128 block size
518 let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519 >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
520
521 let dec_mv = |mv: MotionVector| MotionVector {
522 col: mv.col >> ssdec,
523 row: mv.row >> ssdec,
524 };
525 let median = median.map(dec_mv);
526 for mv in subset_b.iter_mut() {
527 *mv = dec_mv(*mv);
528 }
529 for mv in subset_c.iter_mut() {
530 *mv = dec_mv(*mv);
531 }
532
533 MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534}
535
536pub fn estimate_motion<T: Pixel>(
537 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538 tile_bo: TileBlockOffset, ref_frame: RefType,
539 pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
540 extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541) -> Option<MotionSearchResult> {
542 if let Some(ref rec) =
543 fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544 {
545 let frame_bo = ts.to_frame_block_offset(tile_bo);
546 let (mvx_min, mvx_max, mvy_min, mvy_max) =
547 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549 let lambda = lambda.unwrap_or({
550 // 0.5 is a fudge factor
551 (fi.me_lambda * 256.0 * 0.5) as u32
552 });
553
554 let global_mv = [MotionVector { row: 0, col: 0 }; 2];
555
556 let po = frame_bo.to_luma_plane_offset();
557 let (mvx_min, mvx_max, mvy_min, mvy_max) =
558 (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559 let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560 let p_ref = match ssdec {
561 0 => &rec.frame.planes[0],
562 1 => &rec.input_hres,
563 2 => &rec.input_qres,
564 _ => unimplemented!(),
565 };
566
567 let org_region = &match ssdec {
568 0 => ts.input_tile.planes[0]
569 .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572 _ => unimplemented!(),
573 };
574
575 let mut best: MotionSearchResult = full_pixel_me(
576 fi,
577 ts,
578 org_region,
579 p_ref,
580 tile_bo,
581 po,
582 lambda,
583 pmv.unwrap_or(global_mv),
584 w,
585 h,
586 mvx_min,
587 mvx_max,
588 mvy_min,
589 mvy_max,
590 ref_frame,
591 corner,
592 extensive_search,
593 ssdec,
594 );
595
596 if let Some(pmv) = pmv {
597 let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598 if use_satd {
599 best.rd = get_fullpel_mv_rd(
600 fi,
601 po,
602 org_region,
603 p_ref,
604 fi.sequence.bit_depth,
605 pmv,
606 lambda,
607 use_satd,
608 mvx_min,
609 mvx_max,
610 mvy_min,
611 mvy_max,
612 w,
613 h,
614 best.mv,
615 );
616 }
617
618 sub_pixel_me(
619 fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620 mvy_max, w, h, use_satd, &mut best, ref_frame,
621 );
622 }
623
624 // Scale motion vectors to full res size
625 best.mv = best.mv << ssdec;
626
627 Some(best)
628 } else {
629 None
630 }
631}
632
633/// Refine motion estimation that was computed one level of subsampling up.
634fn refine_subsampled_motion_estimate<T: Pixel>(
635 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636 tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637) -> Option<MotionSearchResult> {
638 if let Some(ref rec) =
639 fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640 {
641 let frame_bo = ts.to_frame_block_offset(tile_bo);
642 let (mvx_min, mvx_max, mvy_min, mvy_max) =
643 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645 let pmv = [MotionVector { row: 0, col: 0 }; 2];
646
647 let po = frame_bo.to_luma_plane_offset();
648 let (mvx_min, mvx_max, mvy_min, mvy_max) =
649 (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650 let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651 let p_ref = match ssdec {
652 0 => &rec.frame.planes[0],
653 1 => &rec.input_hres,
654 2 => &rec.input_qres,
655 _ => unimplemented!(),
656 };
657
658 let org_region = &match ssdec {
659 0 => ts.input_tile.planes[0]
660 .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663 _ => unimplemented!(),
664 };
665
666 let mv =
667 ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669 // Given a motion vector at 0 at higher subsampling:
670 // | -1 | 0 | 1 |
671 // then the vectors at -1 to 2 should be tested at the current subsampling.
672 // |-------------|
673 // | -2 -1 | 0 1 | 2 3 |
674 // This corresponds to a 4x4 full search.
675 let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
676 let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
677 let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
678 let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
679 let mut results = full_search(
680 fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
681 );
682
683 // Scale motion vectors to full res size
684 results.mv = results.mv << ssdec;
685
686 Some(results)
687 } else {
688 None
689 }
690}
691
692#[profiling::function]
693fn full_pixel_me<T: Pixel>(
694 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695 org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696 po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
697 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698 ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699 ssdec: u8,
700) -> MotionSearchResult {
701 let ref_frame_id = ref_frame.to_index();
702 let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703 let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
704 .as_ref()
705 .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
706 let subsets = get_subset_predictors(
707 tile_bo,
708 tile_me_stats,
709 frame_ref,
710 ref_frame_id,
711 w,
712 h,
713 mvx_min,
714 mvx_max,
715 mvy_min,
716 mvy_max,
717 corner,
718 ssdec,
719 );
720
721 let try_cands = |predictors: &[MotionVector],
722 best: &mut MotionSearchResult| {
723 let mut results = get_best_predictor(
724 fi,
725 po,
726 org_region,
727 p_ref,
728 predictors,
729 fi.sequence.bit_depth,
730 pmv,
731 lambda,
732 mvx_min,
733 mvx_max,
734 mvy_min,
735 mvy_max,
736 w,
737 h,
738 );
739 fullpel_diamond_search(
740 fi,
741 po,
742 org_region,
743 p_ref,
744 &mut results,
745 fi.sequence.bit_depth,
746 pmv,
747 lambda,
748 mvx_min,
749 mvx_max,
750 mvy_min,
751 mvy_max,
752 w,
753 h,
754 );
755
756 if results.rd.cost < best.rd.cost {
757 *best = results;
758 }
759 };
760
761 let mut best: MotionSearchResult = MotionSearchResult::empty();
762 if !extensive_search {
763 try_cands(&subsets.all_mvs(), &mut best);
764 best
765 } else {
766 // Perform a more thorough search before resorting to full search.
767 // Search the median, the best mvs of neighboring blocks, and motion vectors
768 // from the previous frame. Stop once a candidate with a sad less than a
769 // threshold is found.
770
771 let thresh = (subsets.min_sad as f32 * 1.2) as u32
772 + (((w * h) as u32) << (fi.sequence.bit_depth - 8));
773
774 if let Some(median) = subsets.median {
775 try_cands(&[median], &mut best);
776
777 if best.rd.sad < thresh {
778 return best;
779 }
780 }
781
782 try_cands(&subsets.subset_b, &mut best);
783
784 if best.rd.sad < thresh {
785 return best;
786 }
787
788 try_cands(&subsets.subset_c, &mut best);
789
790 if best.rd.sad < thresh {
791 return best;
792 }
793
794 // Preform UMH search, either as the last possible search when full search
795 // is disabled, or as the last search before resorting to full search.
796 uneven_multi_hex_search(
797 fi,
798 po,
799 org_region,
800 p_ref,
801 &mut best,
802 fi.sequence.bit_depth,
803 pmv,
804 lambda,
805 mvx_min,
806 mvx_max,
807 mvy_min,
808 mvy_max,
809 w,
810 h,
811 // Use 24, since it is the largest range that x264 uses.
812 24,
813 );
814
815 if !fi.config.speed_settings.motion.me_allow_full_search
816 || best.rd.sad < thresh
817 {
818 return best;
819 }
820
821 {
822 let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
823 let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
824 let x_lo = po.x + (-range_x).max(mvx_min / 8);
825 let x_hi = po.x + (range_x).min(mvx_max / 8);
826 let y_lo = po.y + (-range_y).max(mvy_min / 8);
827 let y_hi = po.y + (range_y).min(mvy_max / 8);
828
829 let results = full_search(
830 fi,
831 x_lo,
832 x_hi,
833 y_lo,
834 y_hi,
835 w,
836 h,
837 org_region,
838 p_ref,
839 po,
840 // Full search is run at quarter resolution, except for short edges.
841 // When subsampling is lower than usual, the step size is raised so that
842 // the number of search locations stays the same.
843 4 >> ssdec,
844 lambda,
845 [MotionVector::default(); 2],
846 );
847
848 if results.rd.cost < best.rd.cost {
849 results
850 } else {
851 best
852 }
853 }
854 }
855}
856
857fn sub_pixel_me<T: Pixel>(
858 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859 p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
860 mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861 use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862) {
863 subpel_diamond_search(
864 fi,
865 po,
866 org_region,
867 p_ref,
868 fi.sequence.bit_depth,
869 pmv,
870 lambda,
871 mvx_min,
872 mvx_max,
873 mvy_min,
874 mvy_max,
875 w,
876 h,
877 use_satd,
878 current:best,
879 ref_frame,
880 );
881}
882
883#[profiling::function]
884fn get_best_predictor<T: Pixel>(
885 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886 p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
888 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889) -> MotionSearchResult {
890 let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892 for &init_mv: MotionVector in predictors.iter() {
893 let rd: MVCandidateRD = get_fullpel_mv_rd(
894 fi, po, org_region, p_ref, bit_depth, pmv, lambda, use_satd:false, mvx_min,
895 mvx_max, mvy_min, mvy_max, w, h, cand_mv:init_mv,
896 );
897
898 if rd.cost < best.rd.cost {
899 best.mv = init_mv;
900 best.rd = rd;
901 }
902 }
903
904 best
905}
906
907/// Declares an array of motion vectors in structure of arrays syntax.
908/// Compared to [`search_pattern_subpel`], this version creates motion vectors
909/// in fullpel resolution (x8).
910macro_rules! search_pattern {
911 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
912 [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
913 };
914}
915
916/// Declares an array of motion vectors in structure of arrays syntax.
917macro_rules! search_pattern_subpel {
918 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
919 [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920 };
921}
922
923/// Diamond pattern of radius 1 as shown below. For fullpel search, use
924/// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search.
925/// ```text
926/// X
927/// XoX
928/// X
929/// ```
930/// 'X's are motion candidates and the 'o' is the center.
931///
932const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
933 col: [ 0, 1, 0, -1],
934 row: [ 1, 0, -1, 0]
935);
936/// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the
937/// vectors have been shifted fullpel scale.
938/// ```text
939/// X
940/// XoX
941/// X
942/// ```
943/// 'X's are motion candidates and the 'o' is the center.
944const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
945 col: [ 0, 1, 0, -1],
946 row: [ 1, 0, -1, 0]
947);
948
949/// Run a full pixel diamond search. The search is run on multiple step sizes.
950///
951/// For each step size, candidate motion vectors are examined for improvement
952/// to the current search location. The search location is moved to the best
953/// candidate (if any). This is repeated until the search location stops moving.
954#[profiling::function]
955fn fullpel_diamond_search<T: Pixel>(
956 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
959 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960) {
961 // Define the initial and the final scale (log2) of the diamond.
962 let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
963
964 loop {
965 // Find the best candidate from the diamond pattern.
966 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967 for &offset in &DIAMOND_R1_PATTERN {
968 let cand_mv = current.mv + (offset << diamond_radius_log2);
969 let rd = get_fullpel_mv_rd(
970 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
971 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972 );
973
974 if rd.cost < best_cand.rd.cost {
975 best_cand.mv = cand_mv;
976 best_cand.rd = rd;
977 }
978 }
979
980 // Continue the search at this scale until the can't find a better candidate
981 // to use.
982 if current.rd.cost <= best_cand.rd.cost {
983 if diamond_radius_log2 == diamond_radius_end_log2 {
984 break;
985 } else {
986 diamond_radius_log2 -= 1;
987 }
988 } else {
989 *current = best_cand;
990 }
991 }
992
993 assert!(!current.is_empty());
994}
995
996/// A hexagon pattern around a center point. The pattern is ordered so that the
997/// offsets circle around the center. This is done to allow pruning locations
998/// covered by the last iteration.
999/// ```text
1000/// 21012
1001/// 2 X X
1002/// 1
1003/// 0 X o X
1004/// 1
1005/// 2 X X
1006/// ```
1007/// 'X's are motion candidates and the 'o' is the center.
1008///
1009/// The illustration below shows the process of a hexagon search.
1010/// ```text
1011/// Step 1 Step 2
1012/// 1 1 1 1 2
1013///
1014/// 1(0)1 => 1 0(1)2
1015///
1016/// 1 1 1 1 2
1017/// ```
1018/// The search above has gone through the following steps.
1019/// 1. Search '1' elements for better candidates than the center '0'.
1020/// 2. Recenter around the best candidate ('(1)') and hexagon candidates that
1021/// don't overlap with the previous search step (labeled '2').
1022const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
1023 col: [ 0, 2, 2, 0, -2, -2],
1024 row: [ -2, -1, 1, 2, 1, -1]
1025);
1026
1027/// A small square pattern around a center point.
1028/// ```text
1029/// 101
1030/// 1 XXX
1031/// 0 XoX
1032/// 1 XXX
1033/// ```
1034/// 'X's are motion candidates and the 'o' is the center.
1035const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
1036 col: [ -1, 0, 1, -1, 1, -1, 0, 1],
1037 row: [ 1, 1, 1, 0, 0, -1, -1, -1]
1038);
1039
1040/// Perform hexagon search and refine afterwards.
1041///
1042/// In the hexagon search stage, candidate motion vectors are examined for
1043/// improvement to the current search location. The search location is moved to
1044/// the best candidate (if any). This is repeated until the search location
1045/// stops moving.
1046///
1047/// Refinement uses a square pattern that fits between the hexagon candidates.
1048///
1049/// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement
1050/// is defined by [`SQUARE_REFINE_PATTERN`].
1051///
1052/// `current` provides the initial search location and serves as
1053/// the output for the final search results.
1054#[profiling::function]
1055fn hexagon_search<T: Pixel>(
1056 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1059 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060) {
1061 // The first iteration of hexagon search is implemented separate from
1062 // subsequent iterations, which overlap with previous iterations.
1063
1064 // Holds what candidate is used (if any). This is used to determine which
1065 // candidates have already been tested in a previous iteration and can be
1066 // skipped.
1067 let mut best_cand_idx: usize = 0;
1068 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070 // First iteration of hexagon search. There are six candidates to consider.
1071 for i in 0..6 {
1072 let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073 let rd = get_fullpel_mv_rd(
1074 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1075 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076 );
1077
1078 if rd.cost < best_cand.rd.cost {
1079 best_cand_idx = i;
1080 best_cand.mv = cand_mv;
1081 best_cand.rd = rd;
1082 }
1083 }
1084
1085 // Run additional iterations of hexagon search until the search location
1086 // doesn't update.
1087 while best_cand.rd.cost < current.rd.cost {
1088 // Update the search location.
1089 *current = best_cand;
1090 best_cand = MotionSearchResult::empty();
1091
1092 // Save the index/direction taken in the previous iteration to the current
1093 // search location.
1094 let center_cand_idx = best_cand_idx;
1095
1096 // Look only at candidates that don't overlap with previous iterations. This
1097 // corresponds with the three offsets (2D) with the closest direction to
1098 // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise
1099 // order, so the last direction -1, +0, and +1 (mod 6) give the indices for
1100 // these offsets.
1101 for idx_offset_mod6 in 5..=7 {
1102 let i = (center_cand_idx + idx_offset_mod6) % 6;
1103 let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105 let rd = get_fullpel_mv_rd(
1106 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1107 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108 );
1109
1110 if rd.cost < best_cand.rd.cost {
1111 best_cand_idx = i;
1112 best_cand.mv = cand_mv;
1113 best_cand.rd = rd;
1114 }
1115 }
1116 }
1117
1118 // Refine the motion after completing hexagon search.
1119 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120 for &offset in &SQUARE_REFINE_PATTERN {
1121 let cand_mv = current.mv + offset;
1122 let rd = get_fullpel_mv_rd(
1123 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1124 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125 );
1126
1127 if rd.cost < best_cand.rd.cost {
1128 best_cand.mv = cand_mv;
1129 best_cand.rd = rd;
1130 }
1131 }
1132 if best_cand.rd.cost < current.rd.cost {
1133 *current = best_cand;
1134 }
1135
1136 assert!(!current.is_empty());
1137}
1138
1139/// Uneven multi-hexagon search pattern around a center point. Used for locating
1140/// irregular movement.
1141/// ```text
1142/// X
1143/// X X
1144/// X X
1145/// X X
1146/// X o X
1147/// X X
1148/// X X
1149/// X X
1150/// X
1151/// ```
1152/// 'X's are motion candidates and the 'o' is the center.
1153const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
1154 col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3],
1155 row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2]
1156);
1157
1158/// Perform an uneven multi-hexagon search. There are 4 stages:
1159/// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions
1160/// for the general direction of the motion.
1161/// 2. A 5x5 full search is done to refine the current candidate.
1162/// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`].
1163/// 4. Refinement using standard hexagon search.
1164///
1165/// `current` provides the initial search location and serves as
1166/// the output for the final search results.
1167///
1168/// `me_range` parameter determines how far these stages can search.
1169#[profiling::function]
1170fn uneven_multi_hex_search<T: Pixel>(
1171 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1174 mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175) {
1176 assert!(!current.is_empty());
1177
1178 // Search in a cross pattern to obtain a rough approximate of motion.
1179 // The cross is split into a horizontal and vertical component. Video content
1180 // tends to have more horizontal motion, so the horizontal part of the cross
1181 // is twice as large as the vertical half.
1182 // X -
1183 // | <- me_range/2
1184 // X |
1185 // X X X XoX X X X -
1186 // X
1187 //
1188 // X
1189 // |------|
1190 // \
1191 // me_range
1192 let center = current.mv;
1193
1194 // The larger, horizontal, part of the cross search.
1195 for i in (1..=me_range).step_by(2) {
1196 const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1197 col: [ 0, 0],
1198 row: [-1, 1]
1199 );
1200
1201 for &offset in &HORIZONTAL_LINE {
1202 let cand_mv = center + offset * i;
1203 let rd = get_fullpel_mv_rd(
1204 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1205 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206 );
1207
1208 if rd.cost < current.rd.cost {
1209 current.mv = cand_mv;
1210 current.rd = rd;
1211 }
1212 }
1213 }
1214
1215 // The smaller, vertical, part of the cross search
1216 for i in (1..=me_range >> 1).step_by(2) {
1217 const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1218 col: [-1, 1],
1219 row: [ 0, 0]
1220 );
1221
1222 for &offset in &VERTICAL_LINE {
1223 let cand_mv = center + offset * i;
1224 let rd = get_fullpel_mv_rd(
1225 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1226 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227 );
1228
1229 if rd.cost < current.rd.cost {
1230 current.mv = cand_mv;
1231 current.rd = rd;
1232 }
1233 }
1234 }
1235
1236 // 5x5 full search. Search a 5x5 square region around the current best mv.
1237 let center = current.mv;
1238 for row in -2..=2 {
1239 for col in -2..=2 {
1240 if row == 0 && col == 0 {
1241 continue;
1242 }
1243 let cand_mv = center + MotionVector { row, col };
1244 let rd = get_fullpel_mv_rd(
1245 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1246 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247 );
1248
1249 if rd.cost < current.rd.cost {
1250 current.mv = cand_mv;
1251 current.rd = rd;
1252 }
1253 }
1254 }
1255
1256 // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested
1257 // around the best vector at multiple scales.
1258 // Example of the UMH pattern run on a scale of 1 and 2:
1259 // 2 -
1260 // | <- me_range
1261 // 2 2 |
1262 // |
1263 // 2 1 2 |
1264 // 1 1 |
1265 // 2 1 1 2 |
1266 // 1 1 |
1267 // 2 1 o 1 2 |
1268 // 1 1 |
1269 // 2 1 1 2 |
1270 // 1 1 |
1271 // 2 1 2 |
1272 // |
1273 // 2 2 |
1274 // |
1275 // 2 -
1276 // |---------------|
1277 // \
1278 // me_range
1279 let center = current.mv;
1280
1281 // Divide by 4, the radius of the UMH's hexagon.
1282 let iterations = me_range >> 2;
1283 for i in 1..=iterations {
1284 for &offset in &UMH_PATTERN {
1285 let cand_mv = center + offset * i;
1286 let rd = get_fullpel_mv_rd(
1287 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1288 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289 );
1290
1291 if rd.cost < current.rd.cost {
1292 current.mv = cand_mv;
1293 current.rd = rd;
1294 }
1295 }
1296 }
1297
1298 // Refine the search results using a 'normal' hexagon search.
1299 hexagon_search(
1300 fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301 mvx_max, mvy_min, mvy_max, w, h,
1302 );
1303}
1304
1305/// Run a subpixel diamond search. The search is run on multiple step sizes.
1306///
1307/// For each step size, candidate motion vectors are examined for improvement
1308/// to the current search location. The search location is moved to the best
1309/// candidate (if any). This is repeated until the search location stops moving.
1310#[profiling::function]
1311fn subpel_diamond_search<T: Pixel>(
1312 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313 _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1314 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315 h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316 ref_frame: RefType,
1317) {
1318 use crate::util::Aligned;
1319
1320 // Motion compensation assembly has special requirements for edges
1321 let mc_w = w.next_power_of_two();
1322 let mc_h = (h + 1) & !1;
1323
1324 // Metadata for subpel scratch pad.
1325 let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1326 // Stack allocation for subpel scratch pad.
1327 // SAFETY: We write to the array below before reading from it.
1328 let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
1329 let mut tmp_region = PlaneRegionMut::from_slice(
1330 &mut buf.data,
1331 &cfg,
1332 Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
1333 );
1334
1335 // start at 1/2 pel and end at 1/4 or 1/8 pel
1336 let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337 (2u8, u8::from(!fi.allow_high_precision_mv));
1338
1339 loop {
1340 // Find the best candidate from the diamond pattern.
1341 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342 for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343 let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345 let rd = get_subpel_mv_rd(
1346 fi,
1347 po,
1348 org_region,
1349 bit_depth,
1350 pmv,
1351 lambda,
1352 use_satd,
1353 mvx_min,
1354 mvx_max,
1355 mvy_min,
1356 mvy_max,
1357 w,
1358 h,
1359 cand_mv,
1360 &mut tmp_region,
1361 ref_frame,
1362 );
1363
1364 if rd.cost < best_cand.rd.cost {
1365 best_cand.mv = cand_mv;
1366 best_cand.rd = rd;
1367 }
1368 }
1369
1370 // Continue the search at this scale until a better candidate isn't found.
1371 if current.rd.cost <= best_cand.rd.cost {
1372 if diamond_radius_log2 == diamond_radius_end_log2 {
1373 break;
1374 } else {
1375 diamond_radius_log2 -= 1;
1376 }
1377 } else {
1378 *current = best_cand;
1379 }
1380 }
1381
1382 assert!(!current.is_empty());
1383}
1384
1385#[inline]
1386fn get_fullpel_mv_rd<T: Pixel>(
1387 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388 p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1389 use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390 mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391) -> MVCandidateRD {
1392 if (cand_mv.col as isize) < mvx_min
1393 || (cand_mv.col as isize) > mvx_max
1394 || (cand_mv.row as isize) < mvy_min
1395 || (cand_mv.row as isize) > mvy_max
1396 {
1397 return MVCandidateRD::empty();
1398 }
1399
1400 // Convert the motion vector into an full pixel offset.
1401 let plane_ref: PlaneRegion<'_, T> = p_ref.region(Area::StartingAt {
1402 x: po.x + (cand_mv.col / 8) as isize,
1403 y: po.y + (cand_mv.row / 8) as isize,
1404 });
1405 compute_mv_rd(
1406 fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, plane_org:org_region,
1407 &plane_ref,
1408 )
1409}
1410
1411fn get_subpel_mv_rd<T: Pixel>(
1412 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413 bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
1414 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415 h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416 ref_frame: RefType,
1417) -> MVCandidateRD {
1418 if (cand_mv.col as isize) < mvx_min
1419 || (cand_mv.col as isize) > mvx_max
1420 || (cand_mv.row as isize) < mvy_min
1421 || (cand_mv.row as isize) > mvy_max
1422 {
1423 return MVCandidateRD::empty();
1424 }
1425
1426 let tmp_width = tmp_region.rect().width;
1427 let tmp_height = tmp_region.rect().height;
1428 let tile_rect =
1429 TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
1430
1431 PredictionMode::NEWMV.predict_inter_single(
1432 fi, tile_rect, 0, po, tmp_region,
1433 // motion comp's w & h on edges can be different than distortion's
1434 tmp_width, tmp_height, ref_frame, cand_mv,
1435 );
1436 let plane_ref = tmp_region.as_const();
1437 compute_mv_rd(
1438 fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439 &plane_ref,
1440 )
1441}
1442
1443/// Compute the rate distortion stats for a motion vector.
1444#[inline(always)]
1445fn compute_mv_rd<T: Pixel>(
1446 fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
1447 use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448 plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449) -> MVCandidateRD {
1450 let sad: u32 = if use_satd {
1451 get_satd(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level)
1452 } else {
1453 get_sad(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level)
1454 };
1455
1456 let rate1: u32 = get_mv_rate(a:cand_mv, b:pmv[0], fi.allow_high_precision_mv);
1457 let rate2: u32 = get_mv_rate(a:cand_mv, b:pmv[1], fi.allow_high_precision_mv);
1458 let rate: u32 = rate1.min(rate2 + 1);
1459
1460 MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
1461}
1462
1463#[profiling::function]
1464fn full_search<T: Pixel>(
1465 fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466 w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467 po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
1468) -> MotionSearchResult {
1469 let search_region = p_ref.region(Area::Rect {
1470 x: x_lo,
1471 y: y_lo,
1472 width: (x_hi - x_lo) as usize + w,
1473 height: (y_hi - y_lo) as usize + h,
1474 });
1475
1476 let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478 // Select rectangular regions within search region with vert+horz windows
1479 for vert_window in search_region.vert_windows(h).step_by(step) {
1480 for ref_window in vert_window.horz_windows(w).step_by(step) {
1481 let &Rect { x, y, .. } = ref_window.rect();
1482
1483 let mv = MotionVector {
1484 row: 8 * (y as i16 - po.y as i16),
1485 col: 8 * (x as i16 - po.x as i16),
1486 };
1487
1488 let rd = compute_mv_rd(
1489 fi,
1490 pmv,
1491 lambda,
1492 false,
1493 fi.sequence.bit_depth,
1494 w,
1495 h,
1496 mv,
1497 org_region,
1498 &ref_window,
1499 );
1500
1501 if rd.cost < best.rd.cost {
1502 best.rd = rd;
1503 best.mv = mv;
1504 }
1505 }
1506 }
1507
1508 best
1509}
1510
1511#[inline(always)]
1512fn get_mv_rate(
1513 a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514) -> u32 {
1515 #[inline(always)]
1516 fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517 let d: i16 = if allow_high_precision_mv { diff } else { diff >> 1 };
1518 2 * ILog::ilog(self:d.abs()) as u32
1519 }
1520
1521 diff_to_rate(diff:a.row - b.row, allow_high_precision_mv)
1522 + diff_to_rate(diff:a.col - b.col, allow_high_precision_mv)
1523}
1524