me.rs source code [crates/rav1e/src/me.rs]

1	// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	use crate::api::InterConfig;
11	use crate::context::{
12	BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13	TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14	MI_SIZE_LOG2, SB_SIZE,
15	};
16	use crate::dist::*;
17	use crate::frame::*;
18	use crate::mc::MotionVector;
19	use crate::partition::*;
20	use crate::predict::PredictionMode;
21	use crate::tiling::*;
22	use crate::util::ILog;
23	use crate::util::{clamp, Pixel};
24	use crate::FrameInvariants;
25
26	use arrayvec::*;
27	use std::ops::{Index, IndexMut};
28	use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30	#[derive(Debug, Copy, Clone, Default)]
31	pub struct MEStats {
32	pub mv: MotionVector,
33	/// sad value, on the scale of a 128x128 block
34	pub normalized_sad: u32,
35	}
36
37	#[derive(Debug, Clone)]
38	pub struct FrameMEStats {
39	stats: Box<[MEStats]>,
40	pub cols: usize,
41	pub rows: usize,
42	}
43
44	/// cbindgen:ignore
45	pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46	/// cbindgen:ignore
47	pub type ReadGuardMEStats<'a> =
48	RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49	/// cbindgen:ignore
50	pub type WriteGuardMEStats<'a> =
51	RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53	impl FrameMEStats {
54	#[inline]
55	pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56	self.stats.chunks_exact(self.cols)
57	}
58
59	pub fn new(cols: usize, rows: usize) -> Self {
60	Self {
61	// dynamic allocation: once per frame
62	stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63	cols,
64	rows,
65	}
66	}
67	pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68	Arc::new(RwLock::new([
69	FrameMEStats::new(cols, rows),
70	FrameMEStats::new(cols, rows),
71	FrameMEStats::new(cols, rows),
72	FrameMEStats::new(cols, rows),
73	FrameMEStats::new(cols, rows),
74	FrameMEStats::new(cols, rows),
75	FrameMEStats::new(cols, rows),
76	FrameMEStats::new(cols, rows),
77	]))
78	}
79	}
80
81	impl Index<usize> for FrameMEStats {
82	type Output = [MEStats];
83	#[inline]
84	fn index(&self, index: usize) -> &Self::Output {
85	&self.stats[index * self.cols..(index + `1`) * self.cols]
86	}
87	}
88
89	impl IndexMut<usize> for FrameMEStats {
90	#[inline]
91	fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92	&mut self.stats[index * self.cols..(index + `1`) * self.cols]
93	}
94	}
95
96	/// Result of motion search.
97	#[derive(Debug, Copy, Clone)]
98	pub struct MotionSearchResult {
99	/// Motion vector chosen by the motion search.
100	pub mv: MotionVector,
101	/// Rate distortion data associated with `mv`.
102	pub rd: MVCandidateRD,
103	}
104
105	impl MotionSearchResult {
106	/// Creates an 'empty' value.
107	///
108	/// To be considered empty, cost is set higher than any naturally occurring
109	/// cost value. The idea is that comparing to any valid rd output, the search
110	/// result will always be replaced.
111	#[inline(always)]
112	pub fn empty() -> MotionSearchResult {
113	MotionSearchResult {
114	mv: MotionVector::default(),
115	rd: MVCandidateRD::empty(),
116	}
117	}
118
119	/// Check if the value should be considered to be empty.
120	#[inline(always)]
121	const fn is_empty(&self) -> bool {
122	self.rd.cost == u64::MAX
123	}
124	}
125
126	/// Holds data from computing rate distortion of a motion vector.
127	#[derive(Debug, Copy, Clone)]
128	pub struct MVCandidateRD {
129	/// Rate distortion cost of the motion vector.
130	pub cost: u64,
131	/// Distortion metric value for the motion vector.
132	pub sad: u32,
133	}
134
135	impl MVCandidateRD {
136	/// Creates an 'empty' value.
137	///
138	/// To be considered empty, cost is set higher than any naturally occurring
139	/// cost value. The idea is that comparing to any valid rd output, the search
140	/// result will always be replaced.
141	#[inline(always)]
142	const fn empty() -> MVCandidateRD {
143	MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144	}
145	}
146
147	#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148	pub enum MVSamplingMode {
149	INIT,
150	CORNER { right: bool, bottom: bool },
151	}
152
153	pub fn estimate_tile_motion<T: Pixel>(
154	fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155	inter_cfg: &InterConfig,
156	) {
157	let init_size = MIB_SIZE_LOG2;
158
159	let mut prev_ssdec: Option<u8> = None;
160	for mv_size_in_b_log2 in (`2`..=init_size).rev() {
161	let init = mv_size_in_b_log2 == init_size;
162
163	// Choose subsampling. Pass one is quarter res and pass two is at half res.
164	let ssdec = match init_size - mv_size_in_b_log2 {
165	`0` => `2`,
166	`1` => `1`,
167	_ => `0`,
168	};
169
170	let new_subsampling =
171	if let Some(prev) = prev_ssdec { prev != ssdec } else { `false` };
172	prev_ssdec = Some(ssdec);
173
174	// 0.5 and 0.125 are a fudge factors
175	let lambda = (fi.me_lambda * `256.0` / (`1` << (`2` * ssdec)) as f64
176	* if ssdec == `0` { `0.5` } else { `0.125` }) as u32;
177
178	for sby in `0`..ts.sb_height {
179	for sbx in `0`..ts.sb_width {
180	let mut tested_frames_flags = `0`;
181	for &ref_frame in inter_cfg.allowed_ref_frames() {
182	let frame_flag = `1` << fi.ref_frames[ref_frame.to_index()];
183	if tested_frames_flags & frame_flag == frame_flag {
184	continue;
185	}
186	tested_frames_flags \|= frame_flag;
187
188	let tile_bo =
189	TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190	.block_offset(`0`, `0`);
191
192	if new_subsampling {
193	refine_subsampled_sb_motion(
194	fi,
195	ts,
196	ref_frame,
197	mv_size_in_b_log2 + `1`,
198	tile_bo,
199	ssdec,
200	lambda,
201	);
202	}
203
204	estimate_sb_motion(
205	fi,
206	ts,
207	ref_frame,
208	mv_size_in_b_log2,
209	tile_bo,
210	init,
211	ssdec,
212	lambda,
213	);
214	}
215	}
216	}
217	}
218	}
219
220	fn estimate_sb_motion<T: Pixel>(
221	fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222	mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223	lambda: u32,
224	) {
225	let pix_offset = tile_bo.to_luma_plane_offset();
226	let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227	let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229	let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231	// Process in blocks, cropping at edges.
232	for y in (`0`..sb_h).step_by(mv_size) {
233	for x in (`0`..sb_w).step_by(mv_size) {
234	let corner: MVSamplingMode = if init {
235	MVSamplingMode::INIT
236	} else {
237	// Processing the block a size up produces data that can be used by
238	// the right and bottom corners.
239	MVSamplingMode::CORNER {
240	right: x & mv_size == mv_size,
241	bottom: y & mv_size == mv_size,
242	}
243	};
244
245	let sub_bo = tile_bo
246	.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248	// Clamp to frame edge, rounding up in the case of subsampling.
249	// The rounding makes some assumptions about how subsampling is done.
250	let w = mv_size.min(sb_w - x + (`1` << ssdec) - `1`) >> ssdec;
251	let h = mv_size.min(sb_h - y + (`1` << ssdec) - `1`) >> ssdec;
252
253	// Run motion estimation.
254	// Note that the initial search (init) instructs the called function to
255	// perform a more extensive search.
256	if let Some(results) = estimate_motion(
257	fi,
258	ts,
259	w,
260	h,
261	sub_bo,
262	ref_frame,
263	None,
264	corner,
265	init,
266	ssdec,
267	Some(lambda),
268	) {
269	// normalize sad to 128x128 block
270	let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * `2`))
271	/ (w * h) as u64) as u32;
272	save_me_stats(
273	ts,
274	mv_size_in_b_log2,
275	sub_bo,
276	ref_frame,
277	MEStats { mv: results.mv, normalized_sad: sad },
278	);
279	}
280	}
281	}
282	}
283
284	fn refine_subsampled_sb_motion<T: Pixel>(
285	fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286	mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287	) {
288	let pix_offset = tile_bo.to_luma_plane_offset();
289	let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290	let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292	let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294	// Process in blocks, cropping at edges.
295	for y in (`0`..sb_h).step_by(mv_size) {
296	for x in (`0`..sb_w).step_by(mv_size) {
297	let sub_bo = tile_bo
298	.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300	// Clamp to frame edge, rounding up in the case of subsampling.
301	// The rounding makes some assumptions about how subsampling is done.
302	let w = mv_size.min(sb_w - x + (`1` << ssdec) - `1`) >> ssdec;
303	let h = mv_size.min(sb_h - y + (`1` << ssdec) - `1`) >> ssdec;
304
305	// Refine the existing motion estimate
306	if let Some(results) = refine_subsampled_motion_estimate(
307	fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308	) {
309	// normalize sad to 128x128 block
310	let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * `2`))
311	/ (w * h) as u64) as u32;
312	save_me_stats(
313	ts,
314	mv_size_in_b_log2,
315	sub_bo,
316	ref_frame,
317	MEStats { mv: results.mv, normalized_sad: sad },
318	);
319	}
320	}
321	}
322	}
323
324	fn save_me_stats<T: Pixel>(
325	ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326	tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327	) {
328	let size_in_b: usize = `1` << mv_size_in_b_log2;
329	let tile_me_stats: &mut TileMEStatsMut<'_> = &mut ts.me_stats[ref_frame.to_index()];
330	let tile_bo_x_end: usize = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331	let tile_bo_y_end: usize = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332	for mi_y: usize in tile_bo.0.y..tile_bo_y_end {
333	for a: &mut MEStats in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334	*a = stats;
335	}
336	}
337	}
338
339	fn get_mv_range(
340	w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341	blk_h: usize,
342	) -> (isize, isize, isize, isize) {
343	let border_w: isize = `128` + blk_w as isize * `8`;
344	let border_h: isize = `128` + blk_h as isize * `8`;
345	let mvx_min: isize = -(bo.0.x as isize) * (`8` * MI_SIZE) as isize - border_w;
346	let mvx_max: isize = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347	* (`8` * MI_SIZE) as isize
348	+ border_w;
349	let mvy_min: isize = -(bo.0.y as isize) * (`8` * MI_SIZE) as isize - border_h;
350	let mvy_max: isize = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351	* (`8` * MI_SIZE) as isize
352	+ border_h;
353
354	// <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics>
355	use crate::context::{MV_LOW, MV_UPP};
356	(
357	mvx_min.max(MV_LOW as isize + `1`),
358	mvx_max.min(MV_UPP as isize - `1`),
359	mvy_min.max(MV_LOW as isize + `1`),
360	mvy_max.min(MV_UPP as isize - `1`),
361	)
362	}
363
364	struct MotionEstimationSubsets {
365	min_sad: u32,
366	median: Option<MotionVector>,
367	subset_b: ArrayVec<MotionVector, `5`>,
368	subset_c: ArrayVec<MotionVector, `5`>,
369	}
370
371	impl MotionEstimationSubsets {
372	fn all_mvs(&self) -> ArrayVec<MotionVector, `11`> {
373	let mut all: ArrayVec = ArrayVec::new();
374	if let Some(median: MotionVector) = self.median {
375	all.push(element:median);
376	}
377
378	all.extend(self.subset_b.iter().copied());
379	all.extend(self.subset_c.iter().copied());
380
381	all
382	}
383	}
384
385	#[profiling::function]
386	fn get_subset_predictors(
387	tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388	frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389	pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390	mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391	) -> MotionEstimationSubsets {
392	let mut min_sad: u32 = u32::MAX;
393	let mut subset_b = ArrayVec::<MotionVector, `5`>::new();
394	let mut subset_c = ArrayVec::<MotionVector, `5`>::new();
395
396	// rounded up width in blocks
397	let w = ((pix_w << ssdec) + MI_SIZE - `1`) >> MI_SIZE_LOG2;
398	let h = ((pix_h << ssdec) + MI_SIZE - `1`) >> MI_SIZE_LOG2;
399
400	// Get predictors from the same frame.
401
402	let clipped_half_w = (w >> `1`).min(tile_me_stats.cols() - `1` - tile_bo.0.x);
403	let clipped_half_h = (h >> `1`).min(tile_me_stats.rows() - `1` - tile_bo.0.y);
404
405	let mut process_cand = \|stats: MEStats\| -> MotionVector {
406	min_sad = min_sad.min(stats.normalized_sad);
407	let mv = stats.mv.quantize_to_fullpel();
408	MotionVector {
409	col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410	row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411	}
412	};
413
414	// Sample the middle of all block edges bordering this one.
415	// Note: If motion vectors haven't been precomputed to a given blocksize, then
416	// the right and bottom edges will be duplicates of the center predictor when
417	// processing in raster order.
418
419	// left
420	if tile_bo.0.x > `0` {
421	subset_b.push(process_cand(
422	tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - `1`],
423	));
424	}
425	// top
426	if tile_bo.0.y > `0` {
427	subset_b.push(process_cand(
428	tile_me_stats[tile_bo.0.y - `1`][tile_bo.0.x + clipped_half_w],
429	));
430	}
431
432	// Sampling far right and far bottom edges was tested, but had worse results
433	// without an extensive threshold test (with threshold being applied after
434	// checking median and the best of each subset).
435
436	// right
437	if let MVSamplingMode::CORNER { right: `true`, bottom: _ } = corner {
438	if tile_bo.0.x + w < tile_me_stats.cols() {
439	subset_b.push(process_cand(
440	tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441	));
442	}
443	}
444	// bottom
445	if let MVSamplingMode::CORNER { right: _, bottom: `true` } = corner {
446	if tile_bo.0.y + h < tile_me_stats.rows() {
447	subset_b.push(process_cand(
448	tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449	));
450	}
451	}
452
453	let median = if corner != MVSamplingMode::INIT {
454	// Sample the center of the current block.
455	Some(process_cand(
456	tile_me_stats[tile_bo.0.y + clipped_half_h]
457	[tile_bo.0.x + clipped_half_w],
458	))
459	} else if subset_b.len() != `3` {
460	None
461	} else {
462	let mut rows: ArrayVec<i16, `3`> = subset_b.iter().map(\|&a\| a.row).collect();
463	let mut cols: ArrayVec<i16, `3`> = subset_b.iter().map(\|&a\| a.col).collect();
464	rows.as_mut_slice().sort_unstable();
465	cols.as_mut_slice().sort_unstable();
466	Some(MotionVector { row: rows[`1`], col: cols[`1`] })
467	};
468
469	// Zero motion vector, don't use add_cand since it skips zero vectors.
470	subset_b.push(MotionVector::default());
471
472	// EPZS subset C predictors.
473	// Sample the middle of bordering side of the left, right, top and bottom
474	// blocks of the previous frame.
475	// Sample the middle of this block in the previous frame.
476
477	if let Some(frame_me_stats) = frame_ref_opt {
478	let prev_frame = &frame_me_stats[ref_frame_id];
479
480	let frame_bo = PlaneBlockOffset(BlockOffset {
481	x: tile_me_stats.x() + tile_bo.0.x,
482	y: tile_me_stats.y() + tile_bo.0.y,
483	});
484	let clipped_half_w = (w >> `1`).min(prev_frame.cols - `1` - frame_bo.0.x);
485	let clipped_half_h = (h >> `1`).min(prev_frame.rows - `1` - frame_bo.0.y);
486
487	// left
488	if frame_bo.0.x > `0` {
489	subset_c.push(process_cand(
490	prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - `1`],
491	));
492	}
493	// top
494	if frame_bo.0.y > `0` {
495	subset_c.push(process_cand(
496	prev_frame[frame_bo.0.y - `1`][frame_bo.0.x + clipped_half_w],
497	));
498	}
499	// right
500	if frame_bo.0.x + w < prev_frame.cols {
501	subset_c.push(process_cand(
502	prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503	));
504	}
505	// bottom
506	if frame_bo.0.y + h < prev_frame.rows {
507	subset_c.push(process_cand(
508	prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509	));
510	}
511
512	subset_c.push(process_cand(
513	prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514	));
515	}
516
517	// Undo normalization to 128x128 block size
518	let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519	>> (MAX_SB_SIZE_LOG2 * `2`)) as u32;
520
521	let dec_mv = \|mv: MotionVector\| MotionVector {
522	col: mv.col >> ssdec,
523	row: mv.row >> ssdec,
524	};
525	let median = median.map(dec_mv);
526	for mv in subset_b.iter_mut() {
527	mv = dec_mv(mv);
528	}
529	for mv in subset_c.iter_mut() {
530	mv = dec_mv(mv);
531	}
532
533	MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534	}
535
536	pub fn estimate_motion<T: Pixel>(
537	fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538	tile_bo: TileBlockOffset, ref_frame: RefType,
539	pmv: Option<[MotionVector; `2`]>, corner: MVSamplingMode,
540	extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541	) -> Option<MotionSearchResult> {
542	if let Some(ref rec) =
543	fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544	{
545	let frame_bo = ts.to_frame_block_offset(tile_bo);
546	let (mvx_min, mvx_max, mvy_min, mvy_max) =
547	get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549	let lambda = lambda.unwrap_or({
550	// 0.5 is a fudge factor
551	(fi.me_lambda * `256.0` * `0.5`) as u32
552	});
553
554	let global_mv = [MotionVector { row: `0`, col: `0` }; `2`];
555
556	let po = frame_bo.to_luma_plane_offset();
557	let (mvx_min, mvx_max, mvy_min, mvy_max) =
558	(mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559	let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560	let p_ref = match ssdec {
561	`0` => &rec.frame.planes[`0`],
562	`1` => &rec.input_hres,
563	`2` => &rec.input_qres,
564	_ => unimplemented!(),
565	};
566
567	let org_region = &match ssdec {
568	`0` => ts.input_tile.planes[`0`]
569	.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570	`1` => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571	`2` => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572	_ => unimplemented!(),
573	};
574
575	let mut best: MotionSearchResult = full_pixel_me(
576	fi,
577	ts,
578	org_region,
579	p_ref,
580	tile_bo,
581	po,
582	lambda,
583	pmv.unwrap_or(global_mv),
584	w,
585	h,
586	mvx_min,
587	mvx_max,
588	mvy_min,
589	mvy_max,
590	ref_frame,
591	corner,
592	extensive_search,
593	ssdec,
594	);
595
596	if let Some(pmv) = pmv {
597	let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598	if use_satd {
599	best.rd = get_fullpel_mv_rd(
600	fi,
601	po,
602	org_region,
603	p_ref,
604	fi.sequence.bit_depth,
605	pmv,
606	lambda,
607	use_satd,
608	mvx_min,
609	mvx_max,
610	mvy_min,
611	mvy_max,
612	w,
613	h,
614	best.mv,
615	);
616	}
617
618	sub_pixel_me(
619	fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620	mvy_max, w, h, use_satd, &mut best, ref_frame,
621	);
622	}
623
624	// Scale motion vectors to full res size
625	best.mv = best.mv << ssdec;
626
627	Some(best)
628	} else {
629	None
630	}
631	}
632
633	/// Refine motion estimation that was computed one level of subsampling up.
634	fn refine_subsampled_motion_estimate<T: Pixel>(
635	fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636	tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637	) -> Option<MotionSearchResult> {
638	if let Some(ref rec) =
639	fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640	{
641	let frame_bo = ts.to_frame_block_offset(tile_bo);
642	let (mvx_min, mvx_max, mvy_min, mvy_max) =
643	get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645	let pmv = [MotionVector { row: `0`, col: `0` }; `2`];
646
647	let po = frame_bo.to_luma_plane_offset();
648	let (mvx_min, mvx_max, mvy_min, mvy_max) =
649	(mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650	let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651	let p_ref = match ssdec {
652	`0` => &rec.frame.planes[`0`],
653	`1` => &rec.input_hres,
654	`2` => &rec.input_qres,
655	_ => unimplemented!(),
656	};
657
658	let org_region = &match ssdec {
659	`0` => ts.input_tile.planes[`0`]
660	.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661	`1` => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662	`2` => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663	_ => unimplemented!(),
664	};
665
666	let mv =
667	ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669	// Given a motion vector at 0 at higher subsampling:
670	// \| -1 \| 0 \| 1 \|
671	// then the vectors at -1 to 2 should be tested at the current subsampling.
672	// \|-------------\|
673	// \| -2 -1 \| 0 1 \| 2 3 \|
674	// This corresponds to a 4x4 full search.
675	let x_lo = po.x + (mv.col as isize / `8` - `1`).max(mvx_min / `8`);
676	let x_hi = po.x + (mv.col as isize / `8` + `2`).min(mvx_max / `8`);
677	let y_lo = po.y + (mv.row as isize / `8` - `1`).max(mvy_min / `8`);
678	let y_hi = po.y + (mv.row as isize / `8` + `2`).min(mvy_max / `8`);
679	let mut results = full_search(
680	fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, `1`, lambda, pmv,
681	);
682
683	// Scale motion vectors to full res size
684	results.mv = results.mv << ssdec;
685
686	Some(results)
687	} else {
688	None
689	}
690	}
691
692	#[profiling::function]
693	fn full_pixel_me<T: Pixel>(
694	fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695	org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696	po: PlaneOffset, lambda: u32, pmv: [MotionVector; `2`], w: usize, h: usize,
697	mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698	ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699	ssdec: u8,
700	) -> MotionSearchResult {
701	let ref_frame_id = ref_frame.to_index();
702	let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703	let frame_ref = fi.rec_buffer.frames[fi.ref_frames[`0`] as usize]
704	.as_ref()
705	.map(\|frame_ref\| frame_ref.frame_me_stats.read().expect("poisoned lock"));
706	let subsets = get_subset_predictors(
707	tile_bo,
708	tile_me_stats,
709	frame_ref,
710	ref_frame_id,
711	w,
712	h,
713	mvx_min,
714	mvx_max,
715	mvy_min,
716	mvy_max,
717	corner,
718	ssdec,
719	);
720
721	let try_cands = \|predictors: &[MotionVector],
722	best: &mut MotionSearchResult\| {
723	let mut results = get_best_predictor(
724	fi,
725	po,
726	org_region,
727	p_ref,
728	predictors,
729	fi.sequence.bit_depth,
730	pmv,
731	lambda,
732	mvx_min,
733	mvx_max,
734	mvy_min,
735	mvy_max,
736	w,
737	h,
738	);
739	fullpel_diamond_search(
740	fi,
741	po,
742	org_region,
743	p_ref,
744	&mut results,
745	fi.sequence.bit_depth,
746	pmv,
747	lambda,
748	mvx_min,
749	mvx_max,
750	mvy_min,
751	mvy_max,
752	w,
753	h,
754	);
755
756	if results.rd.cost < best.rd.cost {
757	*best = results;
758	}
759	};
760
761	let mut best: MotionSearchResult = MotionSearchResult::empty();
762	if !extensive_search {
763	try_cands(&subsets.all_mvs(), &mut best);
764	best
765	} else {
766	// Perform a more thorough search before resorting to full search.
767	// Search the median, the best mvs of neighboring blocks, and motion vectors
768	// from the previous frame. Stop once a candidate with a sad less than a
769	// threshold is found.
770
771	let thresh = (subsets.min_sad as f32 * `1.2`) as u32
772	+ (((w * h) as u32) << (fi.sequence.bit_depth - `8`));
773
774	if let Some(median) = subsets.median {
775	try_cands(&[median], &mut best);
776
777	if best.rd.sad < thresh {
778	return best;
779	}
780	}
781
782	try_cands(&subsets.subset_b, &mut best);
783
784	if best.rd.sad < thresh {
785	return best;
786	}
787
788	try_cands(&subsets.subset_c, &mut best);
789
790	if best.rd.sad < thresh {
791	return best;
792	}
793
794	// Preform UMH search, either as the last possible search when full search
795	// is disabled, or as the last search before resorting to full search.
796	uneven_multi_hex_search(
797	fi,
798	po,
799	org_region,
800	p_ref,
801	&mut best,
802	fi.sequence.bit_depth,
803	pmv,
804	lambda,
805	mvx_min,
806	mvx_max,
807	mvy_min,
808	mvy_max,
809	w,
810	h,
811	// Use 24, since it is the largest range that x264 uses.
812	`24`,
813	);
814
815	if !fi.config.speed_settings.motion.me_allow_full_search
816	\|\| best.rd.sad < thresh
817	{
818	return best;
819	}
820
821	{
822	let range_x = (`192` * fi.me_range_scale as isize) >> ssdec;
823	let range_y = (`64` * fi.me_range_scale as isize) >> ssdec;
824	let x_lo = po.x + (-range_x).max(mvx_min / `8`);
825	let x_hi = po.x + (range_x).min(mvx_max / `8`);
826	let y_lo = po.y + (-range_y).max(mvy_min / `8`);
827	let y_hi = po.y + (range_y).min(mvy_max / `8`);
828
829	let results = full_search(
830	fi,
831	x_lo,
832	x_hi,
833	y_lo,
834	y_hi,
835	w,
836	h,
837	org_region,
838	p_ref,
839	po,
840	// Full search is run at quarter resolution, except for short edges.
841	// When subsampling is lower than usual, the step size is raised so that
842	// the number of search locations stays the same.
843	`4` >> ssdec,
844	lambda,
845	[MotionVector::default(); `2`],
846	);
847
848	if results.rd.cost < best.rd.cost {
849	results
850	} else {
851	best
852	}
853	}
854	}
855	}
856
857	fn sub_pixel_me<T: Pixel>(
858	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859	p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; `2`], mvx_min: isize,
860	mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861	use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862	) {
863	subpel_diamond_search(
864	fi,
865	po,
866	org_region,
867	p_ref,
868	fi.sequence.bit_depth,
869	pmv,
870	lambda,
871	mvx_min,
872	mvx_max,
873	mvy_min,
874	mvy_max,
875	w,
876	h,
877	use_satd,
878	current:best,
879	ref_frame,
880	);
881	}
882
883	#[profiling::function]
884	fn get_best_predictor<T: Pixel>(
885	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886	p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887	pmv: [MotionVector; `2`], lambda: u32, mvx_min: isize, mvx_max: isize,
888	mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889	) -> MotionSearchResult {
890	let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892	for &init_mv: MotionVector in predictors.iter() {
893	let rd: MVCandidateRD = get_fullpel_mv_rd(
894	fi, po, org_region, p_ref, bit_depth, pmv, lambda, use_satd:`false`, mvx_min,
895	mvx_max, mvy_min, mvy_max, w, h, cand_mv:init_mv,
896	);
897
898	if rd.cost < best.rd.cost {
899	best.mv = init_mv;
900	best.rd = rd;
901	}
902	}
903
904	best
905	}
906
907	/// Declares an array of motion vectors in structure of arrays syntax.
908	/// Compared to [`search_pattern_subpel`], this version creates motion vectors
909	/// in fullpel resolution (x8).
910	macro_rules! search_pattern {
911	($field_a:ident: [$($ll_a:expr),], $field_b:ident: [$($ll_b:expr),]) => {
912	[ $(MotionVector { $field_a: $ll_a << `3`, $field_b: $ll_b << `3` } ),*]
913	};
914	}
915
916	/// Declares an array of motion vectors in structure of arrays syntax.
917	macro_rules! search_pattern_subpel {
918	($field_a:ident: [$($ll_a:expr),], $field_b:ident: [$($ll_b:expr),]) => {
919	[ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920	};
921	}
922
923	/// Diamond pattern of radius 1 as shown below. For fullpel search, use
924	/// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search.
925	/// ```text
926	/// X
927	/// XoX
928	/// X
929	/// ```
930	/// 'X's are motion candidates and the 'o' is the center.
931	///
932	const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; `4`] = search_pattern_subpel!(
933	col: [ `0`, `1`, `0`, `-1`],
934	row: [ `1`, `0`, `-1`, `0`]
935	);
936	/// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the
937	/// vectors have been shifted fullpel scale.
938	/// ```text
939	/// X
940	/// XoX
941	/// X
942	/// ```
943	/// 'X's are motion candidates and the 'o' is the center.
944	const DIAMOND_R1_PATTERN: [MotionVector; `4`] = search_pattern!(
945	col: [ `0`, `1`, `0`, `-1`],
946	row: [ `1`, `0`, `-1`, `0`]
947	);
948
949	/// Run a full pixel diamond search. The search is run on multiple step sizes.
950	///
951	/// For each step size, candidate motion vectors are examined for improvement
952	/// to the current search location. The search location is moved to the best
953	/// candidate (if any). This is repeated until the search location stops moving.
954	#[profiling::function]
955	fn fullpel_diamond_search<T: Pixel>(
956	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957	p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958	pmv: [MotionVector; `2`], lambda: u32, mvx_min: isize, mvx_max: isize,
959	mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960	) {
961	// Define the initial and the final scale (log2) of the diamond.
962	let (mut diamond_radius_log2, diamond_radius_end_log2) = (`1u8`, `0u8`);
963
964	loop {
965	// Find the best candidate from the diamond pattern.
966	let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967	for &offset in &DIAMOND_R1_PATTERN {
968	let cand_mv = current.mv + (offset << diamond_radius_log2);
969	let rd = get_fullpel_mv_rd(
970	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
971	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972	);
973
974	if rd.cost < best_cand.rd.cost {
975	best_cand.mv = cand_mv;
976	best_cand.rd = rd;
977	}
978	}
979
980	// Continue the search at this scale until the can't find a better candidate
981	// to use.
982	if current.rd.cost <= best_cand.rd.cost {
983	if diamond_radius_log2 == diamond_radius_end_log2 {
984	break;
985	} else {
986	diamond_radius_log2 -= `1`;
987	}
988	} else {
989	*current = best_cand;
990	}
991	}
992
993	assert!(!current.is_empty());
994	}
995
996	/// A hexagon pattern around a center point. The pattern is ordered so that the
997	/// offsets circle around the center. This is done to allow pruning locations
998	/// covered by the last iteration.
999	/// ```text
1000	/// 21012
1001	/// 2 X X
1002	/// 1
1003	/// 0 X o X
1004	/// 1
1005	/// 2 X X
1006	/// ```
1007	/// 'X's are motion candidates and the 'o' is the center.
1008	///
1009	/// The illustration below shows the process of a hexagon search.
1010	/// ```text
1011	/// Step 1 Step 2
1012	/// 1 1 1 1 2
1013	///
1014	/// 1(0)1 => 1 0(1)2
1015	///
1016	/// 1 1 1 1 2
1017	/// ```
1018	/// The search above has gone through the following steps.
1019	/// 1. Search '1' elements for better candidates than the center '0'.
1020	/// 2. Recenter around the best candidate ('(1)') and hexagon candidates that
1021	/// don't overlap with the previous search step (labeled '2').
1022	const HEXAGON_PATTERN: [MotionVector; `6`] = search_pattern!(
1023	col: [ `0`, `2`, `2`, `0`, `-2`, `-2`],
1024	row: [ `-2`, `-1`, `1`, `2`, `1`, `-1`]
1025	);
1026
1027	/// A small square pattern around a center point.
1028	/// ```text
1029	/// 101
1030	/// 1 XXX
1031	/// 0 XoX
1032	/// 1 XXX
1033	/// ```
1034	/// 'X's are motion candidates and the 'o' is the center.
1035	const SQUARE_REFINE_PATTERN: [MotionVector; `8`] = search_pattern!(
1036	col: [ `-1`, `0`, `1`, `-1`, `1`, `-1`, `0`, `1`],
1037	row: [ `1`, `1`, `1`, `0`, `0`, `-1`, `-1`, `-1`]
1038	);
1039
1040	/// Perform hexagon search and refine afterwards.
1041	///
1042	/// In the hexagon search stage, candidate motion vectors are examined for
1043	/// improvement to the current search location. The search location is moved to
1044	/// the best candidate (if any). This is repeated until the search location
1045	/// stops moving.
1046	///
1047	/// Refinement uses a square pattern that fits between the hexagon candidates.
1048	///
1049	/// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement
1050	/// is defined by [`SQUARE_REFINE_PATTERN`].
1051	///
1052	/// `current` provides the initial search location and serves as
1053	/// the output for the final search results.
1054	#[profiling::function]
1055	fn hexagon_search<T: Pixel>(
1056	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057	p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058	pmv: [MotionVector; `2`], lambda: u32, mvx_min: isize, mvx_max: isize,
1059	mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060	) {
1061	// The first iteration of hexagon search is implemented separate from
1062	// subsequent iterations, which overlap with previous iterations.
1063
1064	// Holds what candidate is used (if any). This is used to determine which
1065	// candidates have already been tested in a previous iteration and can be
1066	// skipped.
1067	let mut best_cand_idx: usize = `0`;
1068	let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070	// First iteration of hexagon search. There are six candidates to consider.
1071	for i in `0`..`6` {
1072	let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073	let rd = get_fullpel_mv_rd(
1074	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1075	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076	);
1077
1078	if rd.cost < best_cand.rd.cost {
1079	best_cand_idx = i;
1080	best_cand.mv = cand_mv;
1081	best_cand.rd = rd;
1082	}
1083	}
1084
1085	// Run additional iterations of hexagon search until the search location
1086	// doesn't update.
1087	while best_cand.rd.cost < current.rd.cost {
1088	// Update the search location.
1089	*current = best_cand;
1090	best_cand = MotionSearchResult::empty();
1091
1092	// Save the index/direction taken in the previous iteration to the current
1093	// search location.
1094	let center_cand_idx = best_cand_idx;
1095
1096	// Look only at candidates that don't overlap with previous iterations. This
1097	// corresponds with the three offsets (2D) with the closest direction to
1098	// that traveled by the previous iteration. HEXAGON_PATTERN has clockwise
1099	// order, so the last direction -1, +0, and +1 (mod 6) give the indices for
1100	// these offsets.
1101	for idx_offset_mod6 in `5`..=`7` {
1102	let i = (center_cand_idx + idx_offset_mod6) % `6`;
1103	let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105	let rd = get_fullpel_mv_rd(
1106	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1107	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108	);
1109
1110	if rd.cost < best_cand.rd.cost {
1111	best_cand_idx = i;
1112	best_cand.mv = cand_mv;
1113	best_cand.rd = rd;
1114	}
1115	}
1116	}
1117
1118	// Refine the motion after completing hexagon search.
1119	let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120	for &offset in &SQUARE_REFINE_PATTERN {
1121	let cand_mv = current.mv + offset;
1122	let rd = get_fullpel_mv_rd(
1123	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1124	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125	);
1126
1127	if rd.cost < best_cand.rd.cost {
1128	best_cand.mv = cand_mv;
1129	best_cand.rd = rd;
1130	}
1131	}
1132	if best_cand.rd.cost < current.rd.cost {
1133	*current = best_cand;
1134	}
1135
1136	assert!(!current.is_empty());
1137	}
1138
1139	/// Uneven multi-hexagon search pattern around a center point. Used for locating
1140	/// irregular movement.
1141	/// ```text
1142	/// X
1143	/// X X
1144	/// X X
1145	/// X X
1146	/// X o X
1147	/// X X
1148	/// X X
1149	/// X X
1150	/// X
1151	/// ```
1152	/// 'X's are motion candidates and the 'o' is the center.
1153	const UMH_PATTERN: [MotionVector; `16`] = search_pattern!(
1154	col: [ `-2`, `-1`, `0`, `1`, `2`, `3`, `4`, `3`, `2`, `1`, `0`, `-1`, `-2`, `3`, `-4`, `-3`],
1155	row: [ `4`, `4`, `4`, `4`, `4`, `2`, `0`, `-2`, `-4`, `-4`, `-4`, `-4`, `-4`, `-2`, `0`, `2`]
1156	);
1157
1158	/// Perform an uneven multi-hexagon search. There are 4 stages:
1159	/// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions
1160	/// for the general direction of the motion.
1161	/// 2. A 5x5 full search is done to refine the current candidate.
1162	/// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`].
1163	/// 4. Refinement using standard hexagon search.
1164	///
1165	/// `current` provides the initial search location and serves as
1166	/// the output for the final search results.
1167	///
1168	/// `me_range` parameter determines how far these stages can search.
1169	#[profiling::function]
1170	fn uneven_multi_hex_search<T: Pixel>(
1171	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172	p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173	pmv: [MotionVector; `2`], lambda: u32, mvx_min: isize, mvx_max: isize,
1174	mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175	) {
1176	assert!(!current.is_empty());
1177
1178	// Search in a cross pattern to obtain a rough approximate of motion.
1179	// The cross is split into a horizontal and vertical component. Video content
1180	// tends to have more horizontal motion, so the horizontal part of the cross
1181	// is twice as large as the vertical half.
1182	// X -
1183	// \| <- me_range/2
1184	// X \|
1185	// X X X XoX X X X -
1186	// X
1187	//
1188	// X
1189	// \|------\|
1190	// \
1191	// me_range
1192	let center = current.mv;
1193
1194	// The larger, horizontal, part of the cross search.
1195	for i in (`1`..=me_range).step_by(`2`) {
1196	const HORIZONTAL_LINE: [MotionVector; `2`] = search_pattern!(
1197	col: [ `0`, `0`],
1198	row: [`-1`, `1`]
1199	);
1200
1201	for &offset in &HORIZONTAL_LINE {
1202	let cand_mv = center + offset * i;
1203	let rd = get_fullpel_mv_rd(
1204	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1205	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206	);
1207
1208	if rd.cost < current.rd.cost {
1209	current.mv = cand_mv;
1210	current.rd = rd;
1211	}
1212	}
1213	}
1214
1215	// The smaller, vertical, part of the cross search
1216	for i in (`1`..=me_range >> `1`).step_by(`2`) {
1217	const VERTICAL_LINE: [MotionVector; `2`] = search_pattern!(
1218	col: [`-1`, `1`],
1219	row: [ `0`, `0`]
1220	);
1221
1222	for &offset in &VERTICAL_LINE {
1223	let cand_mv = center + offset * i;
1224	let rd = get_fullpel_mv_rd(
1225	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1226	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227	);
1228
1229	if rd.cost < current.rd.cost {
1230	current.mv = cand_mv;
1231	current.rd = rd;
1232	}
1233	}
1234	}
1235
1236	// 5x5 full search. Search a 5x5 square region around the current best mv.
1237	let center = current.mv;
1238	for row in `-2`..=`2` {
1239	for col in `-2`..=`2` {
1240	if row == `0` && col == `0` {
1241	continue;
1242	}
1243	let cand_mv = center + MotionVector { row, col };
1244	let rd = get_fullpel_mv_rd(
1245	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1246	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247	);
1248
1249	if rd.cost < current.rd.cost {
1250	current.mv = cand_mv;
1251	current.rd = rd;
1252	}
1253	}
1254	}
1255
1256	// Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested
1257	// around the best vector at multiple scales.
1258	// Example of the UMH pattern run on a scale of 1 and 2:
1259	// 2 -
1260	// \| <- me_range
1261	// 2 2 \|
1262	// \|
1263	// 2 1 2 \|
1264	// 1 1 \|
1265	// 2 1 1 2 \|
1266	// 1 1 \|
1267	// 2 1 o 1 2 \|
1268	// 1 1 \|
1269	// 2 1 1 2 \|
1270	// 1 1 \|
1271	// 2 1 2 \|
1272	// \|
1273	// 2 2 \|
1274	// \|
1275	// 2 -
1276	// \|---------------\|
1277	// \
1278	// me_range
1279	let center = current.mv;
1280
1281	// Divide by 4, the radius of the UMH's hexagon.
1282	let iterations = me_range >> `2`;
1283	for i in `1`..=iterations {
1284	for &offset in &UMH_PATTERN {
1285	let cand_mv = center + offset * i;
1286	let rd = get_fullpel_mv_rd(
1287	fi, po, org_region, p_ref, bit_depth, pmv, lambda, `false`, mvx_min,
1288	mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289	);
1290
1291	if rd.cost < current.rd.cost {
1292	current.mv = cand_mv;
1293	current.rd = rd;
1294	}
1295	}
1296	}
1297
1298	// Refine the search results using a 'normal' hexagon search.
1299	hexagon_search(
1300	fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301	mvx_max, mvy_min, mvy_max, w, h,
1302	);
1303	}
1304
1305	/// Run a subpixel diamond search. The search is run on multiple step sizes.
1306	///
1307	/// For each step size, candidate motion vectors are examined for improvement
1308	/// to the current search location. The search location is moved to the best
1309	/// candidate (if any). This is repeated until the search location stops moving.
1310	#[profiling::function]
1311	fn subpel_diamond_search<T: Pixel>(
1312	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313	_p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; `2`], lambda: u32,
1314	mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315	h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316	ref_frame: RefType,
1317	) {
1318	use crate::util::Aligned;
1319
1320	// Motion compensation assembly has special requirements for edges
1321	let mc_w = w.next_power_of_two();
1322	let mc_h = (h + `1`) & !`1`;
1323
1324	// Metadata for subpel scratch pad.
1325	let cfg = PlaneConfig::new(mc_w, mc_h, `0`, `0`, `0`, `0`, std::mem::size_of::<T>());
1326	// Stack allocation for subpel scratch pad.
1327	// SAFETY: We write to the array below before reading from it.
1328	let mut buf: Aligned<[T; `128` * `128`]> = unsafe { Aligned::uninitialized() };
1329	let mut tmp_region = PlaneRegionMut::from_slice(
1330	&mut buf.data,
1331	&cfg,
1332	Rect { x: `0`, y: `0`, width: cfg.width, height: cfg.height },
1333	);
1334
1335	// start at 1/2 pel and end at 1/4 or 1/8 pel
1336	let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337	(`2u8`, u8::from(!fi.allow_high_precision_mv));
1338
1339	loop {
1340	// Find the best candidate from the diamond pattern.
1341	let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342	for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343	let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345	let rd = get_subpel_mv_rd(
1346	fi,
1347	po,
1348	org_region,
1349	bit_depth,
1350	pmv,
1351	lambda,
1352	use_satd,
1353	mvx_min,
1354	mvx_max,
1355	mvy_min,
1356	mvy_max,
1357	w,
1358	h,
1359	cand_mv,
1360	&mut tmp_region,
1361	ref_frame,
1362	);
1363
1364	if rd.cost < best_cand.rd.cost {
1365	best_cand.mv = cand_mv;
1366	best_cand.rd = rd;
1367	}
1368	}
1369
1370	// Continue the search at this scale until a better candidate isn't found.
1371	if current.rd.cost <= best_cand.rd.cost {
1372	if diamond_radius_log2 == diamond_radius_end_log2 {
1373	break;
1374	} else {
1375	diamond_radius_log2 -= `1`;
1376	}
1377	} else {
1378	*current = best_cand;
1379	}
1380	}
1381
1382	assert!(!current.is_empty());
1383	}
1384
1385	#[inline]
1386	fn get_fullpel_mv_rd<T: Pixel>(
1387	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388	p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; `2`], lambda: u32,
1389	use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390	mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391	) -> MVCandidateRD {
1392	if (cand_mv.col as isize) < mvx_min
1393	\|\| (cand_mv.col as isize) > mvx_max
1394	\|\| (cand_mv.row as isize) < mvy_min
1395	\|\| (cand_mv.row as isize) > mvy_max
1396	{
1397	return MVCandidateRD::empty();
1398	}
1399
1400	// Convert the motion vector into an full pixel offset.
1401	let plane_ref: PlaneRegion<'_, T> = p_ref.region(Area::StartingAt {
1402	x: po.x + (cand_mv.col / `8`) as isize,
1403	y: po.y + (cand_mv.row / `8`) as isize,
1404	});
1405	compute_mv_rd(
1406	fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, plane_org:org_region,
1407	&plane_ref,
1408	)
1409	}
1410
1411	fn get_subpel_mv_rd<T: Pixel>(
1412	fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413	bit_depth: usize, pmv: [MotionVector; `2`], lambda: u32, use_satd: bool,
1414	mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415	h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416	ref_frame: RefType,
1417	) -> MVCandidateRD {
1418	if (cand_mv.col as isize) < mvx_min
1419	\|\| (cand_mv.col as isize) > mvx_max
1420	\|\| (cand_mv.row as isize) < mvy_min
1421	\|\| (cand_mv.row as isize) > mvy_max
1422	{
1423	return MVCandidateRD::empty();
1424	}
1425
1426	let tmp_width = tmp_region.rect().width;
1427	let tmp_height = tmp_region.rect().height;
1428	let tile_rect =
1429	TileRect { x: `0`, y: `0`, width: tmp_width, height: tmp_height };
1430
1431	PredictionMode::NEWMV.predict_inter_single(
1432	fi, tile_rect, `0`, po, tmp_region,
1433	// motion comp's w & h on edges can be different than distortion's
1434	tmp_width, tmp_height, ref_frame, cand_mv,
1435	);
1436	let plane_ref = tmp_region.as_const();
1437	compute_mv_rd(
1438	fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439	&plane_ref,
1440	)
1441	}
1442
1443	/// Compute the rate distortion stats for a motion vector.
1444	#[inline(always)]
1445	fn compute_mv_rd<T: Pixel>(
1446	fi: &FrameInvariants<T>, pmv: [MotionVector; `2`], lambda: u32,
1447	use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448	plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449	) -> MVCandidateRD {
1450	let sad: u32 = if use_satd {
1451	get_satd(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level)
1452	} else {
1453	get_sad(plane_org, plane_ref, w, h, bit_depth, _cpu:fi.cpu_feature_level)
1454	};
1455
1456	let rate1: u32 = get_mv_rate(a:cand_mv, b:pmv[`0`], fi.allow_high_precision_mv);
1457	let rate2: u32 = get_mv_rate(a:cand_mv, b:pmv[`1`], fi.allow_high_precision_mv);
1458	let rate: u32 = rate1.min(rate2 + `1`);
1459
1460	MVCandidateRD { cost: `256` * sad as u64 + rate as u64 * lambda as u64, sad }
1461	}
1462
1463	#[profiling::function]
1464	fn full_search<T: Pixel>(
1465	fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466	w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467	po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; `2`],
1468	) -> MotionSearchResult {
1469	let search_region = p_ref.region(Area::Rect {
1470	x: x_lo,
1471	y: y_lo,
1472	width: (x_hi - x_lo) as usize + w,
1473	height: (y_hi - y_lo) as usize + h,
1474	});
1475
1476	let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478	// Select rectangular regions within search region with vert+horz windows
1479	for vert_window in search_region.vert_windows(h).step_by(step) {
1480	for ref_window in vert_window.horz_windows(w).step_by(step) {
1481	let &Rect { x, y, .. } = ref_window.rect();
1482
1483	let mv = MotionVector {
1484	row: `8` * (y as i16 - po.y as i16),
1485	col: `8` * (x as i16 - po.x as i16),
1486	};
1487
1488	let rd = compute_mv_rd(
1489	fi,
1490	pmv,
1491	lambda,
1492	`false`,
1493	fi.sequence.bit_depth,
1494	w,
1495	h,
1496	mv,
1497	org_region,
1498	&ref_window,
1499	);
1500
1501	if rd.cost < best.rd.cost {
1502	best.rd = rd;
1503	best.mv = mv;
1504	}
1505	}
1506	}
1507
1508	best
1509	}
1510
1511	#[inline(always)]
1512	fn get_mv_rate(
1513	a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514	) -> u32 {
1515	#[inline(always)]
1516	fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517	let d: i16 = if allow_high_precision_mv { diff } else { diff >> `1` };
1518	`2` * ILog::ilog(self:d.abs()) as u32
1519	}
1520
1521	diff_to_rate(diff:a.row - b.row, allow_high_precision_mv)
1522	+ diff_to_rate(diff:a.col - b.col, allow_high_precision_mv)
1523	}
1524