1// Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
2// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
3//
4// This source code is subject to the terms of the BSD 2 Clause License and
5// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6// was not distributed with this source code in the LICENSE file, you can
7// obtain it at www.aomedia.org/license/software. If the Alliance for Open
8// Media Patent License 1.0 was not distributed with this source code in the
9// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
11#![allow(non_camel_case_types)]
12
13use crate::api::*;
14use crate::cdef::*;
15use crate::context::*;
16use crate::cpu_features::CpuFeatureLevel;
17use crate::deblock::*;
18use crate::dist::*;
19use crate::ec::{Writer, WriterCounter, OD_BITRES};
20use crate::encode_block_with_modes;
21use crate::encoder::{FrameInvariants, IMPORTANCE_BLOCK_SIZE};
22use crate::frame::Frame;
23use crate::frame::*;
24use crate::header::ReferenceMode;
25use crate::lrf::*;
26use crate::mc::MotionVector;
27use crate::me::estimate_motion;
28use crate::me::MVSamplingMode;
29use crate::me::MotionSearchResult;
30use crate::motion_compensate;
31use crate::partition::PartitionType::*;
32use crate::partition::RefType::*;
33use crate::partition::*;
34use crate::predict::{
35 luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
36 RAV1E_INTER_COMPOUND_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTRA_MODES,
37};
38use crate::rdo_tables::*;
39use crate::tiling::*;
40use crate::transform::{TxSet, TxSize, TxType, RAV1E_TX_TYPES};
41use crate::util::{init_slice_repeat_mut, Aligned, Pixel};
42use crate::write_tx_blocks;
43use crate::write_tx_tree;
44use crate::Tune;
45use crate::{encode_block_post_cdef, encode_block_pre_cdef};
46
47use arrayvec::*;
48use itertools::izip;
49use std::fmt;
50use std::mem::MaybeUninit;
51
52#[derive(Copy, Clone, PartialEq, Eq)]
53pub enum RDOType {
54 PixelDistRealRate,
55 TxDistRealRate,
56 TxDistEstRate,
57}
58
59impl RDOType {
60 #[inline]
61 pub const fn needs_tx_dist(self) -> bool {
62 match self {
63 // Pixel-domain distortion and exact ec rate
64 RDOType::PixelDistRealRate => false,
65 // Tx-domain distortion and exact ec rate
66 RDOType::TxDistRealRate => true,
67 // Tx-domain distortion and txdist-based rate
68 RDOType::TxDistEstRate => true,
69 }
70 }
71 #[inline]
72 pub const fn needs_coeff_rate(self) -> bool {
73 match self {
74 RDOType::PixelDistRealRate => true,
75 RDOType::TxDistRealRate => true,
76 RDOType::TxDistEstRate => false,
77 }
78 }
79}
80
81#[derive(Clone)]
82pub struct PartitionGroupParameters {
83 pub rd_cost: f64,
84 pub part_type: PartitionType,
85 pub part_modes: ArrayVec<PartitionParameters, 4>,
86}
87
88#[derive(Clone, Debug)]
89pub struct PartitionParameters {
90 pub rd_cost: f64,
91 pub bo: TileBlockOffset,
92 pub bsize: BlockSize,
93 pub pred_mode_luma: PredictionMode,
94 pub pred_mode_chroma: PredictionMode,
95 pub pred_cfl_params: CFLParams,
96 pub angle_delta: AngleDelta,
97 pub ref_frames: [RefType; 2],
98 pub mvs: [MotionVector; 2],
99 pub skip: bool,
100 pub has_coeff: bool,
101 pub tx_size: TxSize,
102 pub tx_type: TxType,
103 pub sidx: u8,
104}
105
106impl Default for PartitionParameters {
107 fn default() -> Self {
108 PartitionParameters {
109 rd_cost: std::f64::MAX,
110 bo: TileBlockOffset::default(),
111 bsize: BlockSize::BLOCK_32X32,
112 pred_mode_luma: PredictionMode::default(),
113 pred_mode_chroma: PredictionMode::default(),
114 pred_cfl_params: CFLParams::default(),
115 angle_delta: AngleDelta::default(),
116 ref_frames: [RefType::INTRA_FRAME, RefType::NONE_FRAME],
117 mvs: [MotionVector::default(); 2],
118 skip: false,
119 has_coeff: true,
120 tx_size: TxSize::TX_4X4,
121 tx_type: TxType::DCT_DCT,
122 sidx: 0,
123 }
124 }
125}
126
127pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
128 let bs_index: usize = ts as usize;
129 let q_bin_idx: usize = (qindex as usize) / RDO_QUANT_DIV;
130 let bin_idx_down: u64 =
131 ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64);
132 let bin_idx_up: u64 = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64);
133 let x0: i64 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64;
134 let x1: i64 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64;
135 let y0: i64 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64;
136 let y1: i64 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64;
137 let slope: i64 = ((y1 - y0) << 8) / (x1 - x0);
138 (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64
139}
140
141#[allow(unused)]
142pub fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
143 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
144 bit_depth: usize, compute_bias: F, cpu: CpuFeatureLevel,
145) -> Distortion {
146 debug_assert!(src1.plane_cfg.xdec == 0);
147 debug_assert!(src1.plane_cfg.ydec == 0);
148 debug_assert!(src2.plane_cfg.xdec == 0);
149 debug_assert!(src2.plane_cfg.ydec == 0);
150
151 let mut sum = Distortion::zero();
152 for y in (0..h).step_by(8) {
153 for x in (0..w).step_by(8) {
154 let kernel_h = (h - y).min(8);
155 let kernel_w = (w - x).min(8);
156 let area = Area::StartingAt { x: x as isize, y: y as isize };
157
158 let value = RawDistortion(cdef_dist_kernel(
159 &src1.subregion(area),
160 &src2.subregion(area),
161 kernel_w,
162 kernel_h,
163 bit_depth,
164 cpu,
165 ) as u64);
166
167 // cdef is always called on non-subsampled planes, so BLOCK_8X8 is
168 // correct here.
169 sum += value * compute_bias(area, BlockSize::BLOCK_8X8);
170 }
171 }
172 sum
173}
174
175/// Sum of Squared Error for a wxh block
176/// Currently limited to w and h of valid blocks
177pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
178 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
179 compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel,
180) -> Distortion {
181 // See get_weighted_sse in src/dist.rs.
182 // Provide a scale to get_weighted_sse for each square region of this size.
183 const CHUNK_SIZE: usize = IMPORTANCE_BLOCK_SIZE >> 1;
184
185 // To bias the distortion correctly, compute it in blocks up to the size
186 // importance block size in a non-subsampled plane.
187 let imp_block_w = CHUNK_SIZE << src1.plane_cfg.xdec;
188 let imp_block_h = CHUNK_SIZE << src1.plane_cfg.ydec;
189
190 let imp_bsize = BlockSize::from_width_and_height(imp_block_w, imp_block_h);
191
192 let n_imp_blocks_w = (w + CHUNK_SIZE - 1) / CHUNK_SIZE;
193 let n_imp_blocks_h = (h + CHUNK_SIZE - 1) / CHUNK_SIZE;
194
195 // TODO: Copying biases into a buffer is slow. It would be best if biases were
196 // passed directly. To do this, we would need different versions of the
197 // weighted sse function for decimated/subsampled data. Also requires
198 // eliminating use of unbiased sse.
199 // It should also be noted that the current copy code does not auto-vectorize.
200
201 // Copy biases into a buffer.
202 let mut buf_storage = Aligned::new(
203 [MaybeUninit::<u32>::uninit(); 128 / CHUNK_SIZE * 128 / CHUNK_SIZE],
204 );
205 let buf_stride = n_imp_blocks_w.next_power_of_two();
206 let buf = init_slice_repeat_mut(
207 &mut buf_storage.data[..buf_stride * n_imp_blocks_h],
208 0,
209 );
210
211 for block_y in 0..n_imp_blocks_h {
212 for block_x in 0..n_imp_blocks_w {
213 let block = Area::StartingAt {
214 x: (block_x * CHUNK_SIZE) as isize,
215 y: (block_y * CHUNK_SIZE) as isize,
216 };
217 buf[block_y * buf_stride + block_x] = compute_bias(block, imp_bsize).0;
218 }
219 }
220
221 Distortion(get_weighted_sse(
222 src1, src2, buf, buf_stride, w, h, bit_depth, cpu,
223 ))
224}
225
226pub const fn clip_visible_bsize(
227 frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize,
228) -> (usize, usize) {
229 let blk_w: usize = bsize.width();
230 let blk_h: usize = bsize.height();
231
232 let visible_w: usize = if x + blk_w <= frame_w {
233 blk_w
234 } else if x >= frame_w {
235 0
236 } else {
237 frame_w - x
238 };
239
240 let visible_h: usize = if y + blk_h <= frame_h {
241 blk_h
242 } else if y >= frame_h {
243 0
244 } else {
245 frame_h - y
246 };
247
248 (visible_w, visible_h)
249}
250
251// Compute the pixel-domain distortion for an encode
252fn compute_distortion<T: Pixel>(
253 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
254 is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool,
255) -> ScaledDistortion {
256 let area = Area::BlockStartingAt { bo: tile_bo.0 };
257 let input_region = ts.input_tile.planes[0].subregion(area);
258 let rec_region = ts.rec.planes[0].subregion(area);
259
260 // clip a block to have visible pixles only
261 let frame_bo = ts.to_frame_block_offset(tile_bo);
262 let (visible_w, visible_h) = clip_visible_bsize(
263 fi.width,
264 fi.height,
265 bsize,
266 frame_bo.0.x << MI_SIZE_LOG2,
267 frame_bo.0.y << MI_SIZE_LOG2,
268 );
269
270 if visible_w == 0 || visible_h == 0 {
271 return ScaledDistortion::zero();
272 }
273
274 let mut distortion = match fi.config.tune {
275 Tune::Psychovisual => cdef_dist_wxh(
276 &input_region,
277 &rec_region,
278 visible_w,
279 visible_h,
280 fi.sequence.bit_depth,
281 |bias_area, bsize| {
282 distortion_scale(
283 fi,
284 input_region.subregion(bias_area).frame_block_offset(),
285 bsize,
286 )
287 },
288 fi.cpu_feature_level,
289 ),
290 Tune::Psnr => sse_wxh(
291 &input_region,
292 &rec_region,
293 visible_w,
294 visible_h,
295 |bias_area, bsize| {
296 distortion_scale(
297 fi,
298 input_region.subregion(bias_area).frame_block_offset(),
299 bsize,
300 )
301 },
302 fi.sequence.bit_depth,
303 fi.cpu_feature_level,
304 ),
305 } * fi.dist_scale[0];
306
307 if is_chroma_block
308 && !luma_only
309 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
310 {
311 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
312 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
313 (visible_w + xdec) >> xdec
314 } else {
315 (4 + visible_w + xdec) >> xdec
316 };
317 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
318 (visible_h + ydec) >> ydec
319 } else {
320 (4 + visible_h + ydec) >> ydec
321 };
322
323 for p in 1..3 {
324 let input_region = ts.input_tile.planes[p].subregion(area);
325 let rec_region = ts.rec.planes[p].subregion(area);
326 distortion += sse_wxh(
327 &input_region,
328 &rec_region,
329 chroma_w,
330 chroma_h,
331 |bias_area, bsize| {
332 distortion_scale(
333 fi,
334 input_region.subregion(bias_area).frame_block_offset(),
335 bsize,
336 )
337 },
338 fi.sequence.bit_depth,
339 fi.cpu_feature_level,
340 ) * fi.dist_scale[p];
341 }
342 }
343 distortion
344}
345
346// Compute the transform-domain distortion for an encode
347fn compute_tx_distortion<T: Pixel>(
348 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
349 is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion,
350 skip: bool, luma_only: bool,
351) -> ScaledDistortion {
352 assert!(fi.config.tune == Tune::Psnr);
353 let area = Area::BlockStartingAt { bo: tile_bo.0 };
354 let input_region = ts.input_tile.planes[0].subregion(area);
355 let rec_region = ts.rec.planes[0].subregion(area);
356
357 let (visible_w, visible_h) = if !skip {
358 (bsize.width(), bsize.height())
359 } else {
360 let frame_bo = ts.to_frame_block_offset(tile_bo);
361 clip_visible_bsize(
362 fi.width,
363 fi.height,
364 bsize,
365 frame_bo.0.x << MI_SIZE_LOG2,
366 frame_bo.0.y << MI_SIZE_LOG2,
367 )
368 };
369
370 if visible_w == 0 || visible_h == 0 {
371 return ScaledDistortion::zero();
372 }
373
374 let mut distortion = if skip {
375 sse_wxh(
376 &input_region,
377 &rec_region,
378 visible_w,
379 visible_h,
380 |bias_area, bsize| {
381 distortion_scale(
382 fi,
383 input_region.subregion(bias_area).frame_block_offset(),
384 bsize,
385 )
386 },
387 fi.sequence.bit_depth,
388 fi.cpu_feature_level,
389 ) * fi.dist_scale[0]
390 } else {
391 tx_dist
392 };
393
394 if is_chroma_block
395 && !luma_only
396 && skip
397 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
398 {
399 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
400 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
401 (visible_w + xdec) >> xdec
402 } else {
403 (4 + visible_w + xdec) >> xdec
404 };
405 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
406 (visible_h + ydec) >> ydec
407 } else {
408 (4 + visible_h + ydec) >> ydec
409 };
410
411 for p in 1..3 {
412 let input_region = ts.input_tile.planes[p].subregion(area);
413 let rec_region = ts.rec.planes[p].subregion(area);
414 distortion += sse_wxh(
415 &input_region,
416 &rec_region,
417 chroma_w,
418 chroma_h,
419 |bias_area, bsize| {
420 distortion_scale(
421 fi,
422 input_region.subregion(bias_area).frame_block_offset(),
423 bsize,
424 )
425 },
426 fi.sequence.bit_depth,
427 fi.cpu_feature_level,
428 ) * fi.dist_scale[p];
429 }
430 }
431 distortion
432}
433
434/// Compute a scaling factor to multiply the distortion of a block by,
435/// this factor is determined using temporal RDO.
436///
437/// # Panics
438///
439/// - If called with `bsize` of 8x8 or smaller
440/// - If the coded frame data doesn't exist on the `FrameInvariants`
441pub fn distortion_scale<T: Pixel>(
442 fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
443) -> DistortionScale {
444 if !fi.config.temporal_rdo() {
445 return DistortionScale::default();
446 }
447 // EncoderConfig::temporal_rdo() should always return false in situations
448 // where distortion is computed on > 8x8 blocks, so we should never hit this
449 // assert.
450 assert!(bsize <= BlockSize::BLOCK_8X8);
451
452 let x: usize = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
453 let y: usize = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
454
455 let coded_data: &CodedFrameData = fi.coded_frame_data.as_ref().unwrap();
456 coded_data.distortion_scales[y * coded_data.w_in_imp_b + x]
457}
458
459/// # Panics
460///
461/// - If the coded frame data doesn't exist on the `FrameInvariants`
462pub fn spatiotemporal_scale<T: Pixel>(
463 fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
464) -> DistortionScale {
465 if !fi.config.temporal_rdo() && fi.config.tune != Tune::Psychovisual {
466 return DistortionScale::default();
467 }
468
469 let coded_data = fi.coded_frame_data.as_ref().unwrap();
470
471 let x0 = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
472 let y0 = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
473 let x1 = (x0 + bsize.width_imp_b()).min(coded_data.w_in_imp_b);
474 let y1 = (y0 + bsize.height_imp_b()).min(coded_data.h_in_imp_b);
475 let den = (((x1 - x0) * (y1 - y0)) as u64) << DistortionScale::SHIFT;
476
477 // calling this on each slice individually improves autovectorization
478 // compared to using `Iterator::take`
479 #[inline(always)]
480 fn take_slice<T>(slice: &[T], n: usize) -> &[T] {
481 slice.get(..n).unwrap_or(slice)
482 }
483
484 let mut sum = 0;
485 for y in y0..y1 {
486 sum += take_slice(
487 &coded_data.distortion_scales[y * coded_data.w_in_imp_b..][x0..x1],
488 MAX_SB_IN_IMP_B,
489 )
490 .iter()
491 .zip(
492 take_slice(
493 &coded_data.activity_scales[y * coded_data.w_in_imp_b..][x0..x1],
494 MAX_SB_IN_IMP_B,
495 )
496 .iter(),
497 )
498 .map(|(d, a)| d.0 as u64 * a.0 as u64)
499 .sum::<u64>();
500 }
501 DistortionScale(((sum + (den >> 1)) / den) as u32)
502}
503
504pub fn distortion_scale_for(
505 propagate_cost: f64, intra_cost: f64,
506) -> DistortionScale {
507 // The mbtree paper \cite{mbtree} uses the following formula:
508 //
509 // QP_delta = -strength * log2(1 + (propagate_cost / intra_cost))
510 //
511 // Since this is H.264, this corresponds to the following quantizer:
512 //
513 // Q' = Q * 2^(QP_delta/6)
514 //
515 // Since lambda is proportial to Q^2, this means we want to minimize:
516 //
517 // D + lambda' * R
518 // = D + 2^(QP_delta / 3) * lambda * R
519 //
520 // If we want to keep lambda fixed, we can instead scale distortion and
521 // minimize:
522 //
523 // D * scale + lambda * R
524 //
525 // where:
526 //
527 // scale = 2^(QP_delta / -3)
528 // = (1 + (propagate_cost / intra_cost))^(strength / 3)
529 //
530 // The original paper empirically chooses strength = 2.0, but strength = 1.0
531 // seems to work best in rav1e currently, this may have something to do with
532 // the fact that they use 16x16 blocks whereas our "importance blocks" are
533 // 8x8, but everything should be scale invariant here so that's weird.
534 //
535 // @article{mbtree,
536 // title={A novel macroblock-tree algorithm for high-performance
537 // optimization of dependent video coding in H.264/AVC},
538 // author={Garrett-Glaser, Jason},
539 // journal={Tech. Rep.},
540 // year={2009},
541 // url={https://pdfs.semanticscholar.org/032f/1ab7d9db385780a02eb2d579af8303b266d2.pdf}
542 // }
543
544 if intra_cost == 0. {
545 return DistortionScale::default(); // no scaling
546 }
547
548 let strength = 1.0; // empirical, see comment above
549 let frac = (intra_cost + propagate_cost) / intra_cost;
550 frac.powf(strength / 3.0).into()
551}
552
553/// Fixed point arithmetic version of distortion scale
554#[repr(transparent)]
555#[derive(Copy, Clone)]
556pub struct DistortionScale(pub u32);
557
558#[repr(transparent)]
559pub struct RawDistortion(u64);
560
561#[repr(transparent)]
562pub struct Distortion(pub u64);
563
564#[repr(transparent)]
565pub struct ScaledDistortion(u64);
566
567impl DistortionScale {
568 /// Bits past the radix point
569 const SHIFT: u32 = 14;
570 /// Number of bits used. Determines the max value.
571 /// 28 bits is quite excessive.
572 const BITS: u32 = 28;
573 /// Maximum internal value
574 const MAX: u64 = (1 << Self::BITS) - 1;
575
576 #[inline]
577 pub const fn new(num: u64, den: u64) -> Self {
578 let raw = (num << Self::SHIFT).saturating_add(den / 2) / den;
579 let mask = (raw <= Self::MAX) as u64;
580 Self((mask * raw + (1 - mask) * Self::MAX) as u32)
581 }
582
583 pub fn inv_mean(slice: &[Self]) -> Self {
584 use crate::util::{bexp64, blog32_q11};
585 let sum = slice.iter().map(|&s| blog32_q11(s.0) as i64).sum::<i64>();
586 let log_inv_mean_q11 =
587 (Self::SHIFT << 11) as i64 - sum / slice.len() as i64;
588 Self(
589 bexp64((log_inv_mean_q11 + (Self::SHIFT << 11) as i64) << (57 - 11))
590 .clamp(1, (1 << Self::BITS) - 1) as u32,
591 )
592 }
593
594 /// Binary logarithm in Q11
595 #[inline]
596 pub const fn blog16(self) -> i16 {
597 use crate::util::blog32_q11;
598 (blog32_q11(self.0) - ((Self::SHIFT as i32) << 11)) as i16
599 }
600
601 /// Binary logarithm in Q57
602 #[inline]
603 pub const fn blog64(self) -> i64 {
604 use crate::util::{blog64, q57};
605 blog64(self.0 as i64) - q57(Self::SHIFT as i32)
606 }
607
608 /// Multiply, round and shift
609 /// Internal implementation, so don't use multiply trait.
610 #[inline]
611 pub const fn mul_u64(self, dist: u64) -> u64 {
612 (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT
613 }
614}
615
616impl std::ops::Mul for DistortionScale {
617 type Output = Self;
618
619 /// Multiply, round and shift
620 #[inline]
621 fn mul(self, rhs: Self) -> Self {
622 Self(
623 (((self.0 as u64 * rhs.0 as u64) + (1 << (Self::SHIFT - 1)))
624 >> Self::SHIFT)
625 .clamp(min:1, (1 << Self::BITS) - 1) as u32,
626 )
627 }
628}
629
630impl std::ops::MulAssign for DistortionScale {
631 fn mul_assign(&mut self, rhs: Self) {
632 *self = *self * rhs;
633 }
634}
635
636// Default value for DistortionScale is a fixed point 1
637impl Default for DistortionScale {
638 #[inline]
639 fn default() -> Self {
640 Self(1 << Self::SHIFT)
641 }
642}
643
644impl fmt::Debug for DistortionScale {
645 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
646 write!(f, "{}", f64::from(*self))
647 }
648}
649
650impl From<f64> for DistortionScale {
651 #[inline]
652 fn from(scale: f64) -> Self {
653 let den: u64 = 1 << (Self::SHIFT + 1);
654 Self::new((scale * den as f64) as u64, den)
655 }
656}
657
658impl From<DistortionScale> for f64 {
659 #[inline]
660 fn from(scale: DistortionScale) -> Self {
661 scale.0 as f64 / (1 << DistortionScale::SHIFT) as f64
662 }
663}
664
665impl RawDistortion {
666 #[inline]
667 pub const fn new(dist: u64) -> Self {
668 Self(dist)
669 }
670}
671
672impl std::ops::Mul<DistortionScale> for RawDistortion {
673 type Output = Distortion;
674 #[inline]
675 fn mul(self, rhs: DistortionScale) -> Distortion {
676 Distortion(rhs.mul_u64(self.0))
677 }
678}
679
680impl Distortion {
681 #[inline]
682 pub const fn zero() -> Self {
683 Self(0)
684 }
685}
686
687impl std::ops::Mul<DistortionScale> for Distortion {
688 type Output = ScaledDistortion;
689 #[inline]
690 fn mul(self, rhs: DistortionScale) -> ScaledDistortion {
691 ScaledDistortion(rhs.mul_u64(self.0))
692 }
693}
694
695impl std::ops::AddAssign for Distortion {
696 #[inline]
697 fn add_assign(&mut self, other: Self) {
698 self.0 += other.0;
699 }
700}
701
702impl ScaledDistortion {
703 #[inline]
704 pub const fn zero() -> Self {
705 Self(0)
706 }
707}
708
709impl std::ops::AddAssign for ScaledDistortion {
710 #[inline]
711 fn add_assign(&mut self, other: Self) {
712 self.0 += other.0;
713 }
714}
715
716pub fn compute_rd_cost<T: Pixel>(
717 fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion,
718) -> f64 {
719 let rate_in_bits: f64 = (rate as f64) / ((1 << OD_BITRES) as f64);
720 fi.lambda.mul_add(a:rate_in_bits, b:distortion.0 as f64)
721}
722
723pub fn rdo_tx_size_type<T: Pixel>(
724 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
725 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
726 luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
727 skip: bool,
728) -> (TxSize, TxType) {
729 let is_inter = !luma_mode.is_intra();
730 let mut tx_size = max_txsize_rect_lookup[bsize as usize];
731
732 if fi.enable_inter_txfm_split && is_inter && !skip {
733 tx_size = sub_tx_size_map[tx_size as usize]; // Always choose one level split size
734 }
735
736 let mut best_tx_type = TxType::DCT_DCT;
737 let mut best_tx_size = tx_size;
738 let mut best_rd = std::f64::MAX;
739
740 let do_rdo_tx_size = fi.tx_mode_select
741 && fi.config.speed_settings.transform.rdo_tx_decision
742 && !is_inter;
743 let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 };
744 let mut cw_checkpoint: Option<ContextWriterCheckpoint> = None;
745
746 for _ in 0..=rdo_tx_depth {
747 let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);
748
749 let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY
750 && fi.config.speed_settings.transform.rdo_tx_decision
751 && !is_inter
752 && !skip;
753
754 if !do_rdo_tx_size && !do_rdo_tx_type {
755 return (best_tx_size, best_tx_type);
756 };
757
758 let tx_types =
759 if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] };
760
761 // Luma plane transform type decision
762 let (tx_type, rd_cost) = rdo_tx_type_decision(
763 fi,
764 ts,
765 cw,
766 &mut cw_checkpoint,
767 luma_mode,
768 ref_frames,
769 mvs,
770 bsize,
771 tile_bo,
772 tx_size,
773 tx_set,
774 tx_types,
775 best_rd,
776 );
777
778 if rd_cost < best_rd {
779 best_tx_size = tx_size;
780 best_tx_type = tx_type;
781 best_rd = rd_cost;
782 }
783
784 debug_assert!(tx_size.width_log2() <= bsize.width_log2());
785 debug_assert!(tx_size.height_log2() <= bsize.height_log2());
786 debug_assert!(
787 tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
788 );
789
790 let next_tx_size = sub_tx_size_map[tx_size as usize];
791
792 if next_tx_size == tx_size {
793 break;
794 } else {
795 tx_size = next_tx_size;
796 };
797 }
798
799 (best_tx_size, best_tx_type)
800}
801
802#[inline]
803const fn dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool {
804 let diff_row: i32 = mv.row as i32 - ref_mv.row as i32;
805 let diff_col: i32 = mv.col as i32 - ref_mv.col as i32;
806 diff_row >= MV_LOW
807 && diff_row <= MV_UPP
808 && diff_col >= MV_LOW
809 && diff_col <= MV_UPP
810}
811
812#[inline]
813#[profiling::function]
814fn luma_chroma_mode_rdo<T: Pixel>(
815 luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize,
816 tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>,
817 cw: &mut ContextWriter, rdo_type: RDOType,
818 cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters,
819 mvs: [MotionVector; 2], ref_frames: [RefType; 2],
820 mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
821 mode_context: usize, mv_stack: &ArrayVec<CandidateMV, 9>,
822 angle_delta: AngleDelta,
823) {
824 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
825
826 let is_chroma_block =
827 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
828
829 if !luma_mode_is_intra {
830 let ref_mvs = if mv_stack.is_empty() {
831 [MotionVector::default(); 2]
832 } else {
833 [mv_stack[0].this_mv, mv_stack[0].comp_mv]
834 };
835
836 if (luma_mode == PredictionMode::NEWMV
837 || luma_mode == PredictionMode::NEW_NEWMV
838 || luma_mode == PredictionMode::NEW_NEARESTMV)
839 && !dmv_in_range(mvs[0], ref_mvs[0])
840 {
841 return;
842 }
843
844 if (luma_mode == PredictionMode::NEW_NEWMV
845 || luma_mode == PredictionMode::NEAREST_NEWMV)
846 && !dmv_in_range(mvs[1], ref_mvs[1])
847 {
848 return;
849 }
850 }
851
852 // Find the best chroma prediction mode for the current luma prediction mode
853 let mut chroma_rdo = |skip: bool| -> bool {
854 use crate::segmentation::select_segment;
855
856 let mut zero_distortion = false;
857
858 for sidx in select_segment(fi, ts, tile_bo, bsize, skip) {
859 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, sidx);
860
861 let (tx_size, tx_type) = rdo_tx_size_type(
862 fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, skip,
863 );
864 for &chroma_mode in mode_set_chroma.iter() {
865 let wr = &mut WriterCounter::new();
866 let tell = wr.tell_frac();
867
868 if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
869 cw.write_partition(
870 wr,
871 tile_bo,
872 PartitionType::PARTITION_NONE,
873 bsize,
874 );
875 }
876
877 // TODO(yushin): luma and chroma would have different decision based on chroma format
878 let need_recon_pixel =
879 luma_mode_is_intra && tx_size.block_size() != bsize;
880
881 encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip);
882 let (has_coeff, tx_dist) = encode_block_post_cdef(
883 fi,
884 ts,
885 cw,
886 wr,
887 luma_mode,
888 chroma_mode,
889 angle_delta,
890 ref_frames,
891 mvs,
892 bsize,
893 tile_bo,
894 skip,
895 CFLParams::default(),
896 tx_size,
897 tx_type,
898 mode_context,
899 mv_stack,
900 rdo_type,
901 need_recon_pixel,
902 None,
903 );
904
905 let rate = wr.tell_frac() - tell;
906 let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel {
907 compute_tx_distortion(
908 fi,
909 ts,
910 bsize,
911 is_chroma_block,
912 tile_bo,
913 tx_dist,
914 skip,
915 false,
916 )
917 } else {
918 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false)
919 };
920 let is_zero_dist = distortion.0 == 0;
921 let rd = compute_rd_cost(fi, rate, distortion);
922 if rd < best.rd_cost {
923 //if rd < best.rd_cost || luma_mode == PredictionMode::NEW_NEWMV {
924 best.rd_cost = rd;
925 best.pred_mode_luma = luma_mode;
926 best.pred_mode_chroma = chroma_mode;
927 best.angle_delta = angle_delta;
928 best.ref_frames = ref_frames;
929 best.mvs = mvs;
930 best.skip = skip;
931 best.has_coeff = has_coeff;
932 best.tx_size = tx_size;
933 best.tx_type = tx_type;
934 best.sidx = sidx;
935 zero_distortion = is_zero_dist;
936 }
937
938 cw.rollback(cw_checkpoint);
939 }
940 }
941
942 zero_distortion
943 };
944
945 // Don't skip when using intra modes
946 let zero_distortion =
947 if !luma_mode_is_intra { chroma_rdo(true) } else { false };
948 // early skip
949 if !zero_distortion {
950 chroma_rdo(false);
951 }
952}
953
954/// RDO-based mode decision
955///
956/// # Panics
957///
958/// - If the best RD found is negative.
959/// This should never happen and indicates a development error.
960#[profiling::function]
961pub fn rdo_mode_decision<T: Pixel>(
962 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
963 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
964 inter_cfg: &InterConfig,
965) -> PartitionParameters {
966 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
967 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
968
969 let rdo_type = if fi.use_tx_domain_rate {
970 RDOType::TxDistEstRate
971 } else if fi.use_tx_domain_distortion {
972 RDOType::TxDistRealRate
973 } else {
974 RDOType::PixelDistRealRate
975 };
976
977 let mut best = if fi.frame_type.has_inter() {
978 assert!(fi.frame_type != FrameType::KEY);
979
980 inter_frame_rdo_mode_decision(
981 fi,
982 ts,
983 cw,
984 bsize,
985 tile_bo,
986 inter_cfg,
987 &cw_checkpoint,
988 rdo_type,
989 )
990 } else {
991 PartitionParameters::default()
992 };
993
994 let is_chroma_block =
995 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
996
997 if !best.skip {
998 best = intra_frame_rdo_mode_decision(
999 fi,
1000 ts,
1001 cw,
1002 bsize,
1003 tile_bo,
1004 &cw_checkpoint,
1005 rdo_type,
1006 best,
1007 is_chroma_block,
1008 );
1009 }
1010
1011 if best.pred_mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
1012 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, best.sidx);
1013
1014 let chroma_mode = PredictionMode::UV_CFL_PRED;
1015 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1016 let mut wr = WriterCounter::new();
1017 let angle_delta = AngleDelta { y: best.angle_delta.y, uv: 0 };
1018
1019 write_tx_blocks(
1020 fi,
1021 ts,
1022 cw,
1023 &mut wr,
1024 best.pred_mode_luma,
1025 best.pred_mode_luma,
1026 angle_delta,
1027 tile_bo,
1028 bsize,
1029 best.tx_size,
1030 best.tx_type,
1031 false,
1032 CFLParams::default(),
1033 true,
1034 rdo_type,
1035 true,
1036 );
1037 cw.rollback(&cw_checkpoint);
1038 if fi.sequence.chroma_sampling != ChromaSampling::Cs400 {
1039 if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, best.tx_size, fi) {
1040 let mut wr = WriterCounter::new();
1041 let tell = wr.tell_frac();
1042
1043 encode_block_pre_cdef(
1044 &fi.sequence,
1045 ts,
1046 cw,
1047 &mut wr,
1048 bsize,
1049 tile_bo,
1050 best.skip,
1051 );
1052 let (has_coeff, _) = encode_block_post_cdef(
1053 fi,
1054 ts,
1055 cw,
1056 &mut wr,
1057 best.pred_mode_luma,
1058 chroma_mode,
1059 angle_delta,
1060 best.ref_frames,
1061 best.mvs,
1062 bsize,
1063 tile_bo,
1064 best.skip,
1065 cfl,
1066 best.tx_size,
1067 best.tx_type,
1068 0,
1069 &[],
1070 rdo_type,
1071 true, // For CFL, luma should be always reconstructed.
1072 None,
1073 );
1074
1075 let rate = wr.tell_frac() - tell;
1076
1077 // For CFL, tx-domain distortion is not an option.
1078 let distortion =
1079 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false);
1080 let rd = compute_rd_cost(fi, rate, distortion);
1081 if rd < best.rd_cost {
1082 best.rd_cost = rd;
1083 best.pred_mode_chroma = chroma_mode;
1084 best.angle_delta = angle_delta;
1085 best.has_coeff = has_coeff;
1086 best.pred_cfl_params = cfl;
1087 }
1088
1089 cw.rollback(&cw_checkpoint);
1090 }
1091 }
1092 }
1093
1094 cw.bc.blocks.set_mode(tile_bo, bsize, best.pred_mode_luma);
1095 cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames);
1096 cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs);
1097
1098 assert!(best.rd_cost >= 0_f64);
1099
1100 PartitionParameters {
1101 bo: tile_bo,
1102 bsize,
1103 pred_mode_luma: best.pred_mode_luma,
1104 pred_mode_chroma: best.pred_mode_chroma,
1105 pred_cfl_params: best.pred_cfl_params,
1106 angle_delta: best.angle_delta,
1107 ref_frames: best.ref_frames,
1108 mvs: best.mvs,
1109 rd_cost: best.rd_cost,
1110 skip: best.skip,
1111 has_coeff: best.has_coeff,
1112 tx_size: best.tx_size,
1113 tx_type: best.tx_type,
1114 sidx: best.sidx,
1115 }
1116}
1117
1118#[profiling::function]
1119fn inter_frame_rdo_mode_decision<T: Pixel>(
1120 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1121 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1122 inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint,
1123 rdo_type: RDOType,
1124) -> PartitionParameters {
1125 let mut best = PartitionParameters::default();
1126
1127 // we can never have more than 7 reference frame sets
1128 let mut ref_frames_set = ArrayVec::<_, 7>::new();
1129 // again, max of 7 ref slots
1130 let mut ref_slot_set = ArrayVec::<_, 7>::new();
1131 // our implementation never returns more than 3 at the moment
1132 let mut mvs_from_me = ArrayVec::<_, 3>::new();
1133 let mut fwdref = None;
1134 let mut bwdref = None;
1135
1136 for i in inter_cfg.allowed_ref_frames().iter().copied() {
1137 // Don't search LAST3 since it's used only for probs
1138 if i == LAST3_FRAME {
1139 continue;
1140 }
1141
1142 if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) {
1143 if fwdref.is_none() && i.is_fwd_ref() {
1144 fwdref = Some(ref_frames_set.len());
1145 }
1146 if bwdref.is_none() && i.is_bwd_ref() {
1147 bwdref = Some(ref_frames_set.len());
1148 }
1149 ref_frames_set.push([i, NONE_FRAME]);
1150 let slot_idx = fi.ref_frames[i.to_index()];
1151 ref_slot_set.push(slot_idx);
1152 }
1153 }
1154 assert!(!ref_frames_set.is_empty());
1155
1156 let mut inter_mode_set = ArrayVec::<(PredictionMode, usize), 20>::new();
1157 let mut mvs_set = ArrayVec::<[MotionVector; 2], 20>::new();
1158 let mut satds = ArrayVec::<u32, 20>::new();
1159 let mut mv_stacks = ArrayVec::<_, 20>::new();
1160 let mut mode_contexts = ArrayVec::<_, 7>::new();
1161
1162 for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
1163 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1164 mode_contexts.push(cw.find_mvrefs(
1165 tile_bo,
1166 ref_frames,
1167 &mut mv_stack,
1168 bsize,
1169 fi,
1170 false,
1171 ));
1172
1173 let mut pmv = [MotionVector::default(); 2];
1174 if !mv_stack.is_empty() {
1175 pmv[0] = mv_stack[0].this_mv;
1176 }
1177 if mv_stack.len() > 1 {
1178 pmv[1] = mv_stack[1].this_mv;
1179 }
1180
1181 let res = estimate_motion(
1182 fi,
1183 ts,
1184 bsize.width(),
1185 bsize.height(),
1186 tile_bo,
1187 ref_frames[0],
1188 Some(pmv),
1189 MVSamplingMode::CORNER { right: true, bottom: true },
1190 false,
1191 0,
1192 None,
1193 )
1194 .unwrap_or_else(MotionSearchResult::empty);
1195 let b_me = res.mv;
1196
1197 mvs_from_me.push([b_me, MotionVector::default()]);
1198
1199 for &x in RAV1E_INTER_MODES_MINIMAL {
1200 inter_mode_set.push((x, i));
1201 }
1202 if !mv_stack.is_empty() {
1203 inter_mode_set.push((PredictionMode::NEAR0MV, i));
1204 }
1205 if mv_stack.len() >= 2 {
1206 inter_mode_set.push((PredictionMode::GLOBALMV, i));
1207 }
1208 let include_near_mvs = fi.config.speed_settings.motion.include_near_mvs;
1209 if include_near_mvs {
1210 if mv_stack.len() >= 3 {
1211 inter_mode_set.push((PredictionMode::NEAR1MV, i));
1212 }
1213 if mv_stack.len() >= 4 {
1214 inter_mode_set.push((PredictionMode::NEAR2MV, i));
1215 }
1216 }
1217 let same_row_col = |x: &CandidateMV| {
1218 x.this_mv.row == mvs_from_me[i][0].row
1219 && x.this_mv.col == mvs_from_me[i][0].col
1220 };
1221 if !mv_stack
1222 .iter()
1223 .take(if include_near_mvs { 4 } else { 2 })
1224 .any(same_row_col)
1225 && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0)
1226 {
1227 inter_mode_set.push((PredictionMode::NEWMV, i));
1228 }
1229
1230 mv_stacks.push(mv_stack);
1231 }
1232
1233 let sz = bsize.width_mi().min(bsize.height_mi());
1234
1235 // To use non single reference modes, block width and height must be greater than 4.
1236 if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
1237 // Adding compound candidate
1238 if let Some(r0) = fwdref {
1239 if let Some(r1) = bwdref {
1240 let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
1241 ref_frames_set.push(ref_frames);
1242 let mv0 = mvs_from_me[r0][0];
1243 let mv1 = mvs_from_me[r1][0];
1244 mvs_from_me.push([mv0, mv1]);
1245 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1246 mode_contexts.push(cw.find_mvrefs(
1247 tile_bo,
1248 ref_frames,
1249 &mut mv_stack,
1250 bsize,
1251 fi,
1252 true,
1253 ));
1254 for &x in RAV1E_INTER_COMPOUND_MODES {
1255 // exclude any NEAR mode based on speed setting
1256 if fi.config.speed_settings.motion.include_near_mvs
1257 || !x.has_nearmv()
1258 {
1259 let mv_stack_idx = ref_frames_set.len() - 1;
1260 // exclude NEAR modes if the mv_stack is too short
1261 if !(x.has_nearmv() && x.ref_mv_idx() >= mv_stack.len()) {
1262 inter_mode_set.push((x, mv_stack_idx));
1263 }
1264 }
1265 }
1266 mv_stacks.push(mv_stack);
1267 }
1268 }
1269 }
1270
1271 let num_modes_rdo = if fi.config.speed_settings.prediction.prediction_modes
1272 >= PredictionModesSetting::ComplexAll
1273 {
1274 inter_mode_set.len()
1275 } else {
1276 9 // This number is determined by AWCY test
1277 };
1278
1279 inter_mode_set.iter().for_each(|&(luma_mode, i)| {
1280 let mvs = match luma_mode {
1281 PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
1282 PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => {
1283 if !mv_stacks[i].is_empty() {
1284 [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
1285 } else {
1286 [MotionVector::default(); 2]
1287 }
1288 }
1289 PredictionMode::NEAR0MV | PredictionMode::NEAR_NEAR0MV => {
1290 if mv_stacks[i].len() > 1 {
1291 [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
1292 } else {
1293 [MotionVector::default(); 2]
1294 }
1295 }
1296 PredictionMode::NEAR1MV
1297 | PredictionMode::NEAR2MV
1298 | PredictionMode::NEAR_NEAR1MV
1299 | PredictionMode::NEAR_NEAR2MV => [
1300 mv_stacks[i][luma_mode.ref_mv_idx()].this_mv,
1301 mv_stacks[i][luma_mode.ref_mv_idx()].comp_mv,
1302 ],
1303 PredictionMode::NEAREST_NEWMV => {
1304 [mv_stacks[i][0].this_mv, mvs_from_me[i][1]]
1305 }
1306 PredictionMode::NEW_NEARESTMV => {
1307 [mvs_from_me[i][0], mv_stacks[i][0].comp_mv]
1308 }
1309 PredictionMode::GLOBALMV | PredictionMode::GLOBAL_GLOBALMV => {
1310 [MotionVector::default(); 2]
1311 }
1312 _ => {
1313 unimplemented!();
1314 }
1315 };
1316 mvs_set.push(mvs);
1317
1318 // Calculate SATD for each mode
1319 if num_modes_rdo != inter_mode_set.len() {
1320 let tile_rect = ts.tile_rect();
1321 let rec = &mut ts.rec.planes[0];
1322 let po = tile_bo.plane_offset(rec.plane_cfg);
1323 let mut rec_region =
1324 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1325
1326 luma_mode.predict_inter(
1327 fi,
1328 tile_rect,
1329 0,
1330 po,
1331 &mut rec_region,
1332 bsize.width(),
1333 bsize.height(),
1334 ref_frames_set[i],
1335 mvs,
1336 &mut ts.inter_compound_buffers,
1337 );
1338
1339 let plane_org = ts.input_tile.planes[0]
1340 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1341 let plane_ref = rec_region.as_const();
1342
1343 let satd = get_satd(
1344 &plane_org,
1345 &plane_ref,
1346 bsize.width(),
1347 bsize.height(),
1348 fi.sequence.bit_depth,
1349 fi.cpu_feature_level,
1350 );
1351 satds.push(satd);
1352 } else {
1353 satds.push(0);
1354 }
1355 });
1356
1357 let mut sorted =
1358 izip!(inter_mode_set, mvs_set, satds).collect::<ArrayVec<_, 20>>();
1359 if num_modes_rdo != sorted.len() {
1360 sorted.sort_by_key(|((_mode, _i), _mvs, satd)| *satd);
1361 }
1362
1363 sorted.iter().take(num_modes_rdo).for_each(
1364 |&((luma_mode, i), mvs, _satd)| {
1365 let mode_set_chroma = ArrayVec::from([luma_mode]);
1366
1367 luma_chroma_mode_rdo(
1368 luma_mode,
1369 fi,
1370 bsize,
1371 tile_bo,
1372 ts,
1373 cw,
1374 rdo_type,
1375 cw_checkpoint,
1376 &mut best,
1377 mvs,
1378 ref_frames_set[i],
1379 &mode_set_chroma,
1380 false,
1381 mode_contexts[i],
1382 &mv_stacks[i],
1383 AngleDelta::default(),
1384 );
1385 },
1386 );
1387
1388 best
1389}
1390
1391#[profiling::function]
1392fn intra_frame_rdo_mode_decision<T: Pixel>(
1393 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1394 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1395 cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType,
1396 mut best: PartitionParameters, is_chroma_block: bool,
1397) -> PartitionParameters {
1398 let mut modes = ArrayVec::<_, INTRA_MODES>::new();
1399
1400 // Reduce number of prediction modes at higher speed levels
1401 let num_modes_rdo = if (fi.frame_type == FrameType::KEY
1402 && fi.config.speed_settings.prediction.prediction_modes
1403 >= PredictionModesSetting::ComplexKeyframes)
1404 || (fi.frame_type.has_inter()
1405 && fi.config.speed_settings.prediction.prediction_modes
1406 >= PredictionModesSetting::ComplexAll)
1407 {
1408 7
1409 } else {
1410 3
1411 };
1412
1413 let intra_mode_set = RAV1E_INTRA_MODES;
1414
1415 // Find mode with lowest rate cost
1416 {
1417 use crate::ec::cdf_to_pdf;
1418
1419 let probs_all = cdf_to_pdf(if fi.frame_type.has_inter() {
1420 cw.get_cdf_intra_mode(bsize)
1421 } else {
1422 cw.get_cdf_intra_mode_kf(tile_bo)
1423 });
1424
1425 modes.try_extend_from_slice(intra_mode_set).unwrap();
1426 modes.sort_by_key(|&a| !probs_all[a as usize]);
1427 }
1428
1429 // If tx partition (i.e. fi.tx_mode_select) is enabled, the below intra prediction screening
1430 // may be improved by emulating prediction for each tx block.
1431 {
1432 let satds = {
1433 // FIXME: If tx partition is used, this whole sads block should be fixed
1434 let tx_size = bsize.tx_size();
1435 let mut edge_buf = Aligned::uninit_array();
1436 let edge_buf = {
1437 let rec = &ts.rec.planes[0].as_const();
1438 let po = tile_bo.plane_offset(rec.plane_cfg);
1439 // FIXME: If tx partition is used, get_intra_edges() should be called for each tx block
1440 get_intra_edges(
1441 &mut edge_buf,
1442 rec,
1443 tile_bo,
1444 0,
1445 0,
1446 bsize,
1447 po,
1448 tx_size,
1449 fi.sequence.bit_depth,
1450 None,
1451 fi.sequence.enable_intra_edge_filter,
1452 IntraParam::None,
1453 )
1454 };
1455
1456 let ief_params = if fi.sequence.enable_intra_edge_filter {
1457 let above_block_info = ts.above_block_info(tile_bo, 0, 0);
1458 let left_block_info = ts.left_block_info(tile_bo, 0, 0);
1459 Some(IntraEdgeFilterParameters::new(
1460 0,
1461 above_block_info,
1462 left_block_info,
1463 ))
1464 } else {
1465 None
1466 };
1467
1468 let mut satds_all = [0; INTRA_MODES];
1469 for &luma_mode in modes.iter().skip(num_modes_rdo / 2) {
1470 let tile_rect = ts.tile_rect();
1471 let rec = &mut ts.rec.planes[0];
1472 let mut rec_region =
1473 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1474 // FIXME: If tx partition is used, luma_mode.predict_intra() should be called for each tx block
1475 luma_mode.predict_intra(
1476 tile_rect,
1477 &mut rec_region,
1478 tx_size,
1479 fi.sequence.bit_depth,
1480 &[0i16; 2],
1481 IntraParam::None,
1482 if luma_mode.is_directional() { ief_params } else { None },
1483 &edge_buf,
1484 fi.cpu_feature_level,
1485 );
1486
1487 let plane_org = ts.input_tile.planes[0]
1488 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1489 let plane_ref = rec_region.as_const();
1490
1491 satds_all[luma_mode as usize] = get_satd(
1492 &plane_org,
1493 &plane_ref,
1494 tx_size.width(),
1495 tx_size.height(),
1496 fi.sequence.bit_depth,
1497 fi.cpu_feature_level,
1498 );
1499 }
1500 satds_all
1501 };
1502
1503 modes[num_modes_rdo / 2..].sort_by_key(|&a| satds[a as usize]);
1504 }
1505
1506 debug_assert!(num_modes_rdo >= 1);
1507
1508 modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
1509 let mvs = [MotionVector::default(); 2];
1510 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1511 let mut mode_set_chroma = ArrayVec::<_, 2>::new();
1512 mode_set_chroma.push(luma_mode);
1513 if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
1514 mode_set_chroma.push(PredictionMode::DC_PRED);
1515 }
1516 luma_chroma_mode_rdo(
1517 luma_mode,
1518 fi,
1519 bsize,
1520 tile_bo,
1521 ts,
1522 cw,
1523 rdo_type,
1524 cw_checkpoint,
1525 &mut best,
1526 mvs,
1527 ref_frames,
1528 &mode_set_chroma,
1529 true,
1530 0,
1531 &ArrayVec::<CandidateMV, 9>::new(),
1532 AngleDelta::default(),
1533 );
1534 });
1535
1536 if fi.config.speed_settings.prediction.fine_directional_intra
1537 && bsize >= BlockSize::BLOCK_8X8
1538 {
1539 // Find the best angle delta for the current best prediction mode
1540 let luma_deltas = best.pred_mode_luma.angle_delta_count();
1541 let chroma_deltas = best.pred_mode_chroma.angle_delta_count();
1542
1543 let mvs = [MotionVector::default(); 2];
1544 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1545 let mode_set_chroma = [best.pred_mode_chroma];
1546 let mv_stack = ArrayVec::<_, 9>::new();
1547 let mut best_angle_delta = best.angle_delta;
1548 let mut angle_delta_rdo = |y, uv| -> AngleDelta {
1549 if best.angle_delta.y != y || best.angle_delta.uv != uv {
1550 luma_chroma_mode_rdo(
1551 best.pred_mode_luma,
1552 fi,
1553 bsize,
1554 tile_bo,
1555 ts,
1556 cw,
1557 rdo_type,
1558 cw_checkpoint,
1559 &mut best,
1560 mvs,
1561 ref_frames,
1562 &mode_set_chroma,
1563 true,
1564 0,
1565 &mv_stack,
1566 AngleDelta { y, uv },
1567 );
1568 }
1569 best.angle_delta
1570 };
1571
1572 for i in 0..luma_deltas {
1573 let angle_delta_y =
1574 if luma_deltas == 1 { 0 } else { i - MAX_ANGLE_DELTA as i8 };
1575 best_angle_delta = angle_delta_rdo(angle_delta_y, best_angle_delta.uv);
1576 }
1577 for j in 0..chroma_deltas {
1578 let angle_delta_uv =
1579 if chroma_deltas == 1 { 0 } else { j - MAX_ANGLE_DELTA as i8 };
1580 best_angle_delta = angle_delta_rdo(best_angle_delta.y, angle_delta_uv);
1581 }
1582 }
1583
1584 best
1585}
1586
1587/// # Panics
1588///
1589/// - If the block size is invalid for subsampling.
1590#[profiling::function]
1591pub fn rdo_cfl_alpha<T: Pixel>(
1592 ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize,
1593 luma_tx_size: TxSize, fi: &FrameInvariants<T>,
1594) -> Option<CFLParams> {
1595 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1596 let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
1597 debug_assert!(
1598 bsize.subsampled_size(xdec, ydec).unwrap() == uv_tx_size.block_size()
1599 );
1600
1601 let frame_bo = ts.to_frame_block_offset(tile_bo);
1602 let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1603 (fi.width + xdec) >> xdec,
1604 (fi.height + ydec) >> ydec,
1605 uv_tx_size.block_size(),
1606 (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1607 (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1608 );
1609
1610 if visible_tx_w == 0 || visible_tx_h == 0 {
1611 return None;
1612 };
1613 let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
1614 let ac = luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi);
1615 let best_alpha: ArrayVec<i16, 2> = (1..3)
1616 .map(|p| {
1617 let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg;
1618 let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1619 let rec = &mut ts.rec.planes[p];
1620 let input = &ts.input_tile.planes[p];
1621 let po = tile_bo.plane_offset(rec.plane_cfg);
1622 let mut edge_buf = Aligned::uninit_array();
1623 let edge_buf = get_intra_edges(
1624 &mut edge_buf,
1625 &rec.as_const(),
1626 tile_bo,
1627 0,
1628 0,
1629 bsize,
1630 po,
1631 uv_tx_size,
1632 fi.sequence.bit_depth,
1633 Some(PredictionMode::UV_CFL_PRED),
1634 fi.sequence.enable_intra_edge_filter,
1635 IntraParam::None,
1636 );
1637 let mut alpha_cost = |alpha: i16| -> u64 {
1638 let mut rec_region =
1639 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1640 PredictionMode::UV_CFL_PRED.predict_intra(
1641 tile_rect,
1642 &mut rec_region,
1643 uv_tx_size,
1644 fi.sequence.bit_depth,
1645 ac,
1646 IntraParam::Alpha(alpha),
1647 None,
1648 &edge_buf,
1649 fi.cpu_feature_level,
1650 );
1651 sse_wxh(
1652 &input.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1653 &rec_region.as_const(),
1654 visible_tx_w,
1655 visible_tx_h,
1656 |_, _| DistortionScale::default(), // We're not doing RDO here.
1657 fi.sequence.bit_depth,
1658 fi.cpu_feature_level,
1659 )
1660 .0
1661 };
1662 let mut best = (alpha_cost(0), 0);
1663 let mut count = 2;
1664 for alpha in 1i16..=16i16 {
1665 let cost = (alpha_cost(alpha), alpha_cost(-alpha));
1666 if cost.0 < best.0 {
1667 best = (cost.0, alpha);
1668 count += 2;
1669 }
1670 if cost.1 < best.0 {
1671 best = (cost.1, -alpha);
1672 count += 2;
1673 }
1674 if count < alpha {
1675 break;
1676 }
1677 }
1678 best.1
1679 })
1680 .collect();
1681
1682 if best_alpha[0] == 0 && best_alpha[1] == 0 {
1683 None
1684 } else {
1685 Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
1686 }
1687}
1688
1689/// RDO-based transform type decision
1690/// If `cw_checkpoint` is `None`, a checkpoint for cw's (`ContextWriter`) current
1691/// state is created and stored for later use.
1692///
1693/// # Panics
1694///
1695/// - If a writer checkpoint is never created before or within the function.
1696/// This should never happen and indicates a development error.
1697/// - If the best RD found is negative.
1698/// This should never happen and indicates a development error.
1699pub fn rdo_tx_type_decision<T: Pixel>(
1700 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1701 cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>,
1702 mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
1703 bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet,
1704 tx_types: &[TxType], cur_best_rd: f64,
1705) -> (TxType, f64) {
1706 let mut best_type = TxType::DCT_DCT;
1707 let mut best_rd = std::f64::MAX;
1708
1709 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1710 let is_chroma_block =
1711 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
1712
1713 let is_inter = !mode.is_intra();
1714
1715 if cw_checkpoint.is_none() {
1716 // Only run the first call
1717 // Prevents creating multiple checkpoints for own version of cw
1718 *cw_checkpoint =
1719 Some(cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling));
1720 }
1721
1722 let rdo_type = if fi.use_tx_domain_distortion {
1723 RDOType::TxDistRealRate
1724 } else {
1725 RDOType::PixelDistRealRate
1726 };
1727 let need_recon_pixel = tx_size.block_size() != bsize && !is_inter;
1728
1729 let mut first_iteration = true;
1730 for &tx_type in tx_types {
1731 // Skip unsupported transform types
1732 if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
1733 continue;
1734 }
1735
1736 if is_inter {
1737 motion_compensate(
1738 fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true,
1739 );
1740 }
1741
1742 let mut wr = WriterCounter::new();
1743 let tell = wr.tell_frac();
1744 let (_, tx_dist) = if is_inter {
1745 write_tx_tree(
1746 fi,
1747 ts,
1748 cw,
1749 &mut wr,
1750 mode,
1751 0,
1752 tile_bo,
1753 bsize,
1754 tx_size,
1755 tx_type,
1756 false,
1757 true,
1758 rdo_type,
1759 need_recon_pixel,
1760 )
1761 } else {
1762 write_tx_blocks(
1763 fi,
1764 ts,
1765 cw,
1766 &mut wr,
1767 mode,
1768 mode,
1769 AngleDelta::default(),
1770 tile_bo,
1771 bsize,
1772 tx_size,
1773 tx_type,
1774 false,
1775 CFLParams::default(), // Unused.
1776 true,
1777 rdo_type,
1778 need_recon_pixel,
1779 )
1780 };
1781
1782 let rate = wr.tell_frac() - tell;
1783 let distortion = if fi.use_tx_domain_distortion {
1784 compute_tx_distortion(
1785 fi,
1786 ts,
1787 bsize,
1788 is_chroma_block,
1789 tile_bo,
1790 tx_dist,
1791 false,
1792 true,
1793 )
1794 } else {
1795 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, true)
1796 };
1797 cw.rollback(cw_checkpoint.as_ref().unwrap());
1798
1799 let rd = compute_rd_cost(fi, rate, distortion);
1800
1801 if first_iteration {
1802 // We use an optimization to early exit after testing the first
1803 // transform type if the cost is higher than the existing best.
1804 // The idea is that if this transform size is not better than he
1805 // previous size, it is not worth testing remaining modes for this size.
1806 if rd > cur_best_rd {
1807 break;
1808 }
1809 first_iteration = false;
1810 }
1811
1812 if rd < best_rd {
1813 best_rd = rd;
1814 best_type = tx_type;
1815 }
1816 }
1817
1818 assert!(best_rd >= 0_f64);
1819
1820 (best_type, best_rd)
1821}
1822
1823pub fn get_sub_partitions(
1824 four_partitions: &[TileBlockOffset; 4], partition: PartitionType,
1825) -> ArrayVec<TileBlockOffset, 4> {
1826 let mut partition_offsets: ArrayVec = ArrayVec::<TileBlockOffset, 4>::new();
1827
1828 partition_offsets.push(element:four_partitions[0]);
1829
1830 if partition == PARTITION_NONE {
1831 return partition_offsets;
1832 }
1833 if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
1834 partition_offsets.push(element:four_partitions[1]);
1835 };
1836 if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
1837 partition_offsets.push(element:four_partitions[2]);
1838 };
1839 if partition == PARTITION_SPLIT {
1840 partition_offsets.push(element:four_partitions[3]);
1841 };
1842
1843 partition_offsets
1844}
1845
1846#[inline(always)]
1847fn rdo_partition_none<T: Pixel>(
1848 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1849 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1850 inter_cfg: &InterConfig, child_modes: &mut ArrayVec<PartitionParameters, 4>,
1851) -> f64 {
1852 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1853
1854 let mode: PartitionParameters = rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
1855 let cost: f64 = mode.rd_cost;
1856
1857 child_modes.push(element:mode);
1858
1859 cost
1860}
1861
1862// VERTICAL, HORIZONTAL or simple SPLIT
1863#[inline(always)]
1864fn rdo_partition_simple<T: Pixel, W: Writer>(
1865 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1866 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1867 bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig,
1868 partition: PartitionType, rdo_type: RDOType, best_rd: f64,
1869 child_modes: &mut ArrayVec<PartitionParameters, 4>,
1870) -> Option<f64> {
1871 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1872 let subsize = bsize.subsize(partition).unwrap();
1873
1874 let cost = if bsize >= BlockSize::BLOCK_8X8 {
1875 let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1876 let tell = w.tell_frac();
1877 cw.write_partition(w, tile_bo, partition, bsize);
1878 compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
1879 } else {
1880 0.0
1881 };
1882
1883 let hbsw = subsize.width_mi(); // Half the block size width in blocks
1884 let hbsh = subsize.height_mi(); // Half the block size height in blocks
1885 let four_partitions = [
1886 tile_bo,
1887 TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }),
1888 TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }),
1889 TileBlockOffset(BlockOffset {
1890 x: tile_bo.0.x + hbsw,
1891 y: tile_bo.0.y + hbsh,
1892 }),
1893 ];
1894
1895 let partitions = get_sub_partitions(&four_partitions, partition);
1896
1897 let mut rd_cost_sum = 0.0;
1898
1899 for offset in partitions {
1900 let hbs = subsize.width_mi() >> 1;
1901 let has_cols = offset.0.x + hbs < ts.mi_width;
1902 let has_rows = offset.0.y + hbs < ts.mi_height;
1903
1904 if has_cols && has_rows {
1905 let mode_decision =
1906 rdo_mode_decision(fi, ts, cw, subsize, offset, inter_cfg);
1907
1908 rd_cost_sum += mode_decision.rd_cost;
1909
1910 if fi.enable_early_exit && rd_cost_sum > best_rd {
1911 return None;
1912 }
1913 if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() {
1914 let w: &mut W =
1915 if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1916 cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize);
1917 }
1918 encode_block_with_modes(
1919 fi,
1920 ts,
1921 cw,
1922 w_pre_cdef,
1923 w_post_cdef,
1924 subsize,
1925 offset,
1926 &mode_decision,
1927 rdo_type,
1928 None,
1929 );
1930 child_modes.push(mode_decision);
1931 } else {
1932 //rd_cost_sum += std::f64::MAX;
1933 return None;
1934 }
1935 }
1936
1937 Some(cost + rd_cost_sum)
1938}
1939
1940/// RDO-based single level partitioning decision
1941///
1942/// # Panics
1943///
1944/// - If the best RD found is negative.
1945/// This should never happen, and indicates a development error.
1946#[profiling::function]
1947pub fn rdo_partition_decision<T: Pixel, W: Writer>(
1948 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1949 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1950 bsize: BlockSize, tile_bo: TileBlockOffset,
1951 cached_block: &PartitionGroupParameters, partition_types: &[PartitionType],
1952 rdo_type: RDOType, inter_cfg: &InterConfig,
1953) -> PartitionGroupParameters {
1954 let mut best_partition = cached_block.part_type;
1955 let mut best_rd = cached_block.rd_cost;
1956 let mut best_pred_modes = cached_block.part_modes.clone();
1957
1958 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1959 let w_pre_checkpoint = w_pre_cdef.checkpoint();
1960 let w_post_checkpoint = w_post_cdef.checkpoint();
1961
1962 for &partition in partition_types {
1963 // Do not re-encode results we already have
1964 if partition == cached_block.part_type {
1965 continue;
1966 }
1967
1968 let mut child_modes = ArrayVec::<_, 4>::new();
1969
1970 let cost = match partition {
1971 PARTITION_NONE if bsize <= BlockSize::BLOCK_64X64 => {
1972 Some(rdo_partition_none(
1973 fi,
1974 ts,
1975 cw,
1976 bsize,
1977 tile_bo,
1978 inter_cfg,
1979 &mut child_modes,
1980 ))
1981 }
1982 PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
1983 rdo_partition_simple(
1984 fi,
1985 ts,
1986 cw,
1987 w_pre_cdef,
1988 w_post_cdef,
1989 bsize,
1990 tile_bo,
1991 inter_cfg,
1992 partition,
1993 rdo_type,
1994 best_rd,
1995 &mut child_modes,
1996 )
1997 }
1998 _ => {
1999 unreachable!();
2000 }
2001 };
2002
2003 if let Some(rd) = cost {
2004 if rd < best_rd {
2005 best_rd = rd;
2006 best_partition = partition;
2007 best_pred_modes = child_modes.clone();
2008 }
2009 }
2010 cw.rollback(&cw_checkpoint);
2011 w_pre_cdef.rollback(&w_pre_checkpoint);
2012 w_post_cdef.rollback(&w_post_checkpoint);
2013 }
2014
2015 assert!(best_rd >= 0_f64);
2016
2017 PartitionGroupParameters {
2018 rd_cost: best_rd,
2019 part_type: best_partition,
2020 part_modes: best_pred_modes,
2021 }
2022}
2023
2024#[profiling::function]
2025fn rdo_loop_plane_error<T: Pixel>(
2026 base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset,
2027 sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
2028 blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize,
2029) -> ScaledDistortion {
2030 let sb_w_blocks =
2031 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w;
2032 let sb_h_blocks =
2033 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h;
2034 // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
2035 // accumulating in-frame and unpadded
2036 let mut err = Distortion::zero();
2037 for by in 0..sb_h_blocks {
2038 for bx in 0..sb_w_blocks {
2039 let loop_bo = offset_sbo.block_offset(bx << 1, by << 1);
2040 if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() {
2041 let src_plane = &src.planes[pli];
2042 let test_plane = &test.planes[pli];
2043 let PlaneConfig { xdec, ydec, .. } = *src_plane.plane_cfg;
2044 debug_assert_eq!(xdec, test_plane.cfg.xdec);
2045 debug_assert_eq!(ydec, test_plane.cfg.ydec);
2046
2047 // Unfortunately, our distortion biases are only available via
2048 // Frame-absolute addressing, so we need a block offset
2049 // relative to the full frame origin (not the tile or analysis
2050 // area)
2051 let frame_bo = (base_sbo + offset_sbo).block_offset(bx << 1, by << 1);
2052 let bias = distortion_scale(
2053 fi,
2054 ts.to_frame_block_offset(frame_bo),
2055 BlockSize::BLOCK_8X8,
2056 );
2057
2058 let src_region =
2059 src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 });
2060 let test_region =
2061 test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 });
2062
2063 err += if pli == 0 {
2064 // For loop filters, We intentionally use cdef_dist even with
2065 // `--tune Psnr`. Using SSE instead gives no PSNR gain but has a
2066 // significant negative impact on other metrics and visual quality.
2067 RawDistortion(cdef_dist_kernel(
2068 &src_region,
2069 &test_region,
2070 8,
2071 8,
2072 fi.sequence.bit_depth,
2073 fi.cpu_feature_level,
2074 ) as u64)
2075 * bias
2076 } else {
2077 sse_wxh(
2078 &src_region,
2079 &test_region,
2080 8 >> xdec,
2081 8 >> ydec,
2082 |_, _| bias,
2083 fi.sequence.bit_depth,
2084 fi.cpu_feature_level,
2085 )
2086 };
2087 }
2088 }
2089 }
2090 err * fi.dist_scale[pli]
2091}
2092
2093/// Passed in a superblock offset representing the upper left corner of
2094/// the LRU area we're optimizing. This area covers the largest LRU in
2095/// any of the present planes, but may consist of a number of
2096/// superblocks and full, smaller LRUs in the other planes
2097///
2098/// # Panics
2099///
2100/// - If both CDEF and LRF are disabled.
2101#[profiling::function]
2102pub fn rdo_loop_decision<T: Pixel, W: Writer>(
2103 base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>,
2104 ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W,
2105 deblock_p: bool,
2106) {
2107 let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
2108 1
2109 } else {
2110 MAX_PLANES
2111 };
2112 assert!(fi.sequence.enable_cdef || fi.sequence.enable_restoration);
2113 // Determine area of optimization: Which plane has the largest LRUs?
2114 // How many LRUs for each?
2115 let mut sb_w = 1; // how many superblocks wide the largest LRU
2116 // is/how many SBs we're processing (same thing)
2117 let mut sb_h = 1; // how many superblocks wide the largest LRU
2118 // is/how many SBs we're processing (same thing)
2119 let mut lru_w = [0; MAX_PLANES]; // how many LRUs we're processing
2120 let mut lru_h = [0; MAX_PLANES]; // how many LRUs we're processing
2121 for pli in 0..planes {
2122 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2123 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2124 if sb_w < (1 << sb_h_shift) {
2125 sb_w = 1 << sb_h_shift;
2126 }
2127 if sb_h < (1 << sb_v_shift) {
2128 sb_h = 1 << sb_v_shift;
2129 }
2130 }
2131 for pli in 0..planes {
2132 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2133 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2134 lru_w[pli] = sb_w / (1 << sb_h_shift);
2135 lru_h[pli] = sb_h / (1 << sb_v_shift);
2136 }
2137
2138 // The superblock width/height determinations may be calling for us
2139 // to compute over superblocks that do not actually exist in the
2140 // frame (off the right or lower edge). Trim sb width/height down
2141 // to actual superblocks. Note that these last superblocks on the
2142 // right/bottom may themselves still span the edge of the frame, but
2143 // they do hold at least some visible pixels.
2144 sb_w = sb_w.min(ts.sb_width - base_sbo.0.x);
2145 sb_h = sb_h.min(ts.sb_height - base_sbo.0.y);
2146
2147 // We have need to know the Y visible pixel limits as well (the
2148 // sb_w/sb_h figures above can be used to determine how many
2149 // allocated pixels, possibly beyond the visible frame, exist).
2150 let crop_w =
2151 fi.width - ((ts.sbo.0.x + base_sbo.0.x) << SUPERBLOCK_TO_PLANE_SHIFT);
2152 let crop_h =
2153 fi.height - ((ts.sbo.0.y + base_sbo.0.y) << SUPERBLOCK_TO_PLANE_SHIFT);
2154 let pixel_w = crop_w.min(sb_w << SUPERBLOCK_TO_PLANE_SHIFT);
2155 let pixel_h = crop_h.min(sb_h << SUPERBLOCK_TO_PLANE_SHIFT);
2156
2157 // Based on `RestorationState::new`
2158 const MAX_SB_SHIFT: usize = 4;
2159 const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT;
2160 const MAX_LRU_SIZE: usize = MAX_SB_SIZE;
2161
2162 // Static allocation relies on the "minimal LRU area for all N planes" invariant.
2163 let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE];
2164 let mut best_lrf =
2165 [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2166
2167 // due to imprecision in the reconstruction parameter solver, we
2168 // need to make sure we don't fall into a limit cycle. Track our
2169 // best cost at LRF so that we can break if we get a solution that doesn't
2170 // improve at the reconstruction stage.
2171 let mut best_lrf_cost = [[-1.0; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2172
2173 // sub-setted region of the TileBlocks for our working frame area.
2174 // Note that the size of this subset is what signals CDEF as to the
2175 // actual coded size.
2176 let mut tileblocks_subset = cw.bc.blocks.subregion_mut(
2177 base_sbo.block_offset(0, 0).0.x,
2178 base_sbo.block_offset(0, 0).0.y,
2179 sb_w << SUPERBLOCK_TO_BLOCK_SHIFT,
2180 sb_h << SUPERBLOCK_TO_BLOCK_SHIFT,
2181 );
2182
2183 // cdef doesn't run on superblocks that are completely skipped.
2184 // Determine which super blocks are marked as skipped so we can avoid running
2185 // them. If all blocks are skipped, we can avoid some of the overhead related
2186 // to setting up for cdef.
2187 let mut cdef_skip = [true; MAX_SB_SIZE * MAX_SB_SIZE];
2188 let mut cdef_skip_all = true;
2189 if fi.sequence.enable_cdef {
2190 for sby in 0..sb_h {
2191 for sbx in 0..sb_w {
2192 let blocks = tileblocks_subset.subregion(16 * sbx, 16 * sby, 16, 16);
2193 let mut skip = true;
2194 for y in 0..blocks.rows() {
2195 for block in blocks[y].iter() {
2196 skip &= block.skip;
2197 }
2198 }
2199 cdef_skip[sby * MAX_SB_SIZE + sbx] = skip;
2200 cdef_skip_all &= skip;
2201 }
2202 }
2203 }
2204
2205 // Unlike cdef, loop restoration will run regardless of whether blocks are
2206 // skipped or not. At the same time, the most significant improvement will
2207 // generally be from un-skipped blocks, so lru is only performed if there are
2208 // un-skipped blocks.
2209 // This should be the same as `cdef_skip_all`, except when cdef is disabled.
2210 let mut lru_skip_all = true;
2211 let mut lru_skip = [[true; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2212 if fi.sequence.enable_restoration {
2213 if fi.config.speed_settings.lru_on_skip {
2214 lru_skip_all = false;
2215 lru_skip = [[false; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2216 } else {
2217 for pli in 0..planes {
2218 // width, in sb, of an LRU in this plane
2219 let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2220 // height, in sb, of an LRU in this plane
2221 let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2222 for lru_y in 0..lru_h[pli] {
2223 // number of LRUs vertically
2224 for lru_x in 0..lru_w[pli] {
2225 // number of LRUs horizontally
2226
2227 let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2228 x: lru_x * lru_sb_w,
2229 y: lru_y * lru_sb_h,
2230 });
2231
2232 if !ts.restoration.has_restoration_unit(
2233 base_sbo + loop_sbo,
2234 pli,
2235 false,
2236 ) {
2237 continue;
2238 }
2239
2240 let start = loop_sbo.block_offset(0, 0).0;
2241 let size = TileSuperBlockOffset(SuperBlockOffset {
2242 x: lru_sb_w,
2243 y: lru_sb_h,
2244 })
2245 .block_offset(0, 0)
2246 .0;
2247
2248 let blocks =
2249 tileblocks_subset.subregion(start.x, start.y, size.x, size.y);
2250 let mut skip = true;
2251 for y in 0..blocks.rows() {
2252 for block in blocks[y].iter() {
2253 skip &= block.skip;
2254 }
2255 }
2256 lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] = skip;
2257 lru_skip_all &= skip;
2258 }
2259 }
2260 }
2261 }
2262 }
2263
2264 // Return early if all blocks are skipped for lru and cdef.
2265 if lru_skip_all && cdef_skip_all {
2266 return;
2267 }
2268
2269 // Loop filter RDO is an iterative process and we need temporary
2270 // scratch data to hold the results of deblocking, cdef, and the
2271 // loop reconstruction filter so that each can be partially updated
2272 // without recomputing the entire stack. Construct
2273 // largest-LRU-sized frames for each, accounting for padding
2274 // required by deblocking, cdef and [optionally] LR.
2275 let mut rec_subset = ts
2276 .rec
2277 .subregion(Area::BlockRect {
2278 bo: base_sbo.block_offset(0, 0).0,
2279 width: (pixel_w + 7) >> 3 << 3,
2280 height: (pixel_h + 7) >> 3 << 3,
2281 })
2282 .scratch_copy();
2283
2284 // const, no need to copy, just need the subregion (but do zero the
2285 // origin to match the other copies/new backing frames).
2286 let src_subset = ts
2287 .input_tile
2288 .subregion(Area::BlockRect {
2289 bo: base_sbo.block_offset(0, 0).0,
2290 width: (pixel_w + 7) >> 3 << 3,
2291 height: (pixel_h + 7) >> 3 << 3,
2292 })
2293 .home();
2294
2295 if deblock_p {
2296 // Find a good deblocking filter solution for the passed in area.
2297 // This is not RDO of deblocking itself, merely a solution to get
2298 // better results from CDEF/LRF RDO.
2299 let deblock_levels = deblock_filter_optimize(
2300 fi,
2301 &rec_subset.as_tile(),
2302 &src_subset,
2303 &tileblocks_subset.as_const(),
2304 crop_w,
2305 crop_h,
2306 );
2307
2308 // Deblock the contents of our reconstruction copy.
2309 if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
2310 // copy ts.deblock because we need to set some of our own values here
2311 let mut deblock_copy = *ts.deblock;
2312 deblock_copy.levels = deblock_levels;
2313
2314 // finally, deblock the temp frame
2315 deblock_filter_frame(
2316 &deblock_copy,
2317 &mut rec_subset.as_tile_mut(),
2318 &tileblocks_subset.as_const(),
2319 crop_w,
2320 crop_h,
2321 fi.sequence.bit_depth,
2322 planes,
2323 );
2324 }
2325 }
2326
2327 let mut cdef_work =
2328 if !cdef_skip_all { Some(rec_subset.clone()) } else { None };
2329 let mut lrf_work = if !lru_skip_all {
2330 Some(Frame {
2331 planes: {
2332 let new_plane = |pli: usize| {
2333 let PlaneConfig { xdec, ydec, width, height, .. } =
2334 rec_subset.planes[pli].cfg;
2335 Plane::new(width, height, xdec, ydec, 0, 0)
2336 };
2337 [new_plane(0), new_plane(1), new_plane(2)]
2338 },
2339 })
2340 } else {
2341 None
2342 };
2343
2344 // Precompute directional analysis for CDEF
2345 let cdef_data = {
2346 if cdef_work.is_some() {
2347 Some((
2348 &rec_subset,
2349 cdef_analyze_superblock_range(
2350 fi,
2351 &rec_subset,
2352 &tileblocks_subset.as_const(),
2353 sb_w,
2354 sb_h,
2355 ),
2356 ))
2357 } else {
2358 None
2359 }
2360 };
2361
2362 // CDEF/LRF decision iteration
2363 // Start with a default of CDEF 0 and RestorationFilter::None
2364 // Try all CDEF options for each sb with current LRF; if new CDEF+LRF choice is better, select it.
2365 // Then try all LRF options with current CDEFs; if new CDEFs+LRF choice is better, select it.
2366 // If LRF choice changed for any plane, repeat until no changes
2367 // Limit iterations and where we break based on speed setting (in the TODO list ;-)
2368 let mut cdef_change = true;
2369 let mut lrf_change = true;
2370 while cdef_change || lrf_change {
2371 // search for improved cdef indices, superblock by superblock, if cdef is enabled.
2372 if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) =
2373 (&cdef_data, &mut cdef_work.as_mut())
2374 {
2375 for sby in 0..sb_h {
2376 for sbx in 0..sb_w {
2377 // determine whether this superblock can be skipped
2378 if cdef_skip[sby * MAX_SB_SIZE + sbx] {
2379 continue;
2380 }
2381
2382 let prev_best_index = best_index[sby * sb_w + sbx];
2383 let mut best_cost = -1.;
2384 let mut best_new_index = -1i8;
2385
2386 /* offset of the superblock we're currently testing within the larger
2387 analysis area */
2388 let loop_sbo =
2389 TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
2390
2391 /* cdef index testing loop */
2392 for cdef_index in 0..(1 << fi.cdef_bits) {
2393 let mut err = ScaledDistortion::zero();
2394 let mut rate = 0;
2395
2396 cdef_filter_superblock(
2397 fi,
2398 &rec_subset,
2399 &mut cdef_ref.as_tile_mut(),
2400 &tileblocks_subset.as_const(),
2401 loop_sbo,
2402 cdef_index,
2403 &cdef_dirs[sby * sb_w + sbx],
2404 );
2405 // apply LRF if any
2406 for pli in 0..planes {
2407 // We need the cropped-to-visible-frame area of this SB
2408 let wh =
2409 if fi.sequence.use_128x128_superblock { 128 } else { 64 };
2410 let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg;
2411 let vis_width = (wh >> xdec).min(
2412 (crop_w >> xdec)
2413 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x
2414 as usize,
2415 );
2416 let vis_height = (wh >> ydec).min(
2417 (crop_h >> ydec)
2418 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y
2419 as usize,
2420 );
2421 // which LRU are we currently testing against?
2422 if let (Some((lru_x, lru_y)), Some(lrf_ref)) = {
2423 let rp = &ts.restoration.planes[pli];
2424 (
2425 rp.restoration_unit_offset(base_sbo, loop_sbo, false),
2426 &mut lrf_work,
2427 )
2428 } {
2429 // We have a valid LRU, apply LRF, compute error
2430 match best_lrf[lru_y * lru_w[pli] + lru_x][pli] {
2431 RestorationFilter::None {} => {
2432 err += rdo_loop_plane_error(
2433 base_sbo,
2434 loop_sbo,
2435 1,
2436 1,
2437 fi,
2438 ts,
2439 &tileblocks_subset.as_const(),
2440 cdef_ref,
2441 &src_subset,
2442 pli,
2443 );
2444 rate += if fi.sequence.enable_restoration {
2445 cw.fc.count_lrf_switchable(
2446 w,
2447 &ts.restoration.as_const(),
2448 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2449 pli,
2450 )
2451 } else {
2452 0 // no relative cost differeneces to different
2453 // CDEF params. If cdef is on, it's a wash.
2454 };
2455 }
2456 RestorationFilter::Sgrproj { set, xqd } => {
2457 // only run on this single superblock
2458 let loop_po =
2459 loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg);
2460 // todo: experiment with borrowing border pixels
2461 // rather than edge-extending. Right now this is
2462 // hard-clipping to the superblock boundary.
2463 setup_integral_image(
2464 &mut ts.integral_buffer,
2465 SOLVE_IMAGE_STRIDE,
2466 vis_width,
2467 vis_height,
2468 vis_width,
2469 vis_height,
2470 &cdef_ref.planes[pli].slice(loop_po),
2471 &cdef_ref.planes[pli].slice(loop_po),
2472 );
2473 sgrproj_stripe_filter(
2474 set,
2475 xqd,
2476 fi,
2477 &ts.integral_buffer,
2478 SOLVE_IMAGE_STRIDE,
2479 &cdef_ref.planes[pli].slice(loop_po),
2480 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2481 x: loop_po.x,
2482 y: loop_po.y,
2483 width: vis_width,
2484 height: vis_height,
2485 }),
2486 );
2487 err += rdo_loop_plane_error(
2488 base_sbo,
2489 loop_sbo,
2490 1,
2491 1,
2492 fi,
2493 ts,
2494 &tileblocks_subset.as_const(),
2495 lrf_ref,
2496 &src_subset,
2497 pli,
2498 );
2499 rate += cw.fc.count_lrf_switchable(
2500 w,
2501 &ts.restoration.as_const(),
2502 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2503 pli,
2504 );
2505 }
2506 RestorationFilter::Wiener { .. } => unreachable!(), // coming soon
2507 }
2508 } else {
2509 // No actual LRU here, compute error directly from CDEF output.
2510 err += rdo_loop_plane_error(
2511 base_sbo,
2512 loop_sbo,
2513 1,
2514 1,
2515 fi,
2516 ts,
2517 &tileblocks_subset.as_const(),
2518 cdef_ref,
2519 &src_subset,
2520 pli,
2521 );
2522 // no relative cost differeneces to different
2523 // CDEF params. If cdef is on, it's a wash.
2524 // rate += 0;
2525 }
2526 }
2527
2528 let cost = compute_rd_cost(fi, rate, err);
2529 if best_cost < 0. || cost < best_cost {
2530 best_cost = cost;
2531 best_new_index = cdef_index as i8;
2532 }
2533 }
2534
2535 // Did we change any preexisting choices?
2536 if best_new_index != prev_best_index {
2537 cdef_change = true;
2538 best_index[sby * sb_w + sbx] = best_new_index;
2539 tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8);
2540 }
2541
2542 let mut cdef_ref_tm = TileMut::new(
2543 cdef_ref,
2544 TileRect {
2545 x: 0,
2546 y: 0,
2547 width: cdef_ref.planes[0].cfg.width,
2548 height: cdef_ref.planes[0].cfg.height,
2549 },
2550 );
2551
2552 // Keep cdef output up to date; we need it for restoration
2553 // both below and above (padding)
2554 cdef_filter_superblock(
2555 fi,
2556 rec_copy,
2557 &mut cdef_ref_tm,
2558 &tileblocks_subset.as_const(),
2559 loop_sbo,
2560 best_index[sby * sb_w + sbx] as u8,
2561 &cdef_dirs[sby * sb_w + sbx],
2562 );
2563 }
2564 }
2565 }
2566
2567 if !cdef_change {
2568 break;
2569 }
2570 cdef_change = false;
2571 lrf_change = false;
2572
2573 // search for improved restoration filter parameters if restoration is enabled
2574 if let Some(lrf_ref) = &mut lrf_work.as_mut() {
2575 let lrf_input = if cdef_work.is_some() {
2576 // When CDEF is enabled, we pull from the CDEF output
2577 cdef_work.as_ref().unwrap()
2578 } else {
2579 // When CDEF is disabled, we pull from the [optionally
2580 // deblocked] reconstruction
2581 &rec_subset
2582 };
2583 for pli in 0..planes {
2584 // Nominal size of LRU in pixels before clipping to visible frame
2585 let unit_size = ts.restoration.planes[pli].rp_cfg.unit_size;
2586 // width, in sb, of an LRU in this plane
2587 let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2588 // height, in sb, of an LRU in this plane
2589 let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2590 let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg;
2591 for lru_y in 0..lru_h[pli] {
2592 // number of LRUs vertically
2593 for lru_x in 0..lru_w[pli] {
2594 // number of LRUs horizontally
2595
2596 // determine whether this lru should be skipped
2597 if lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] {
2598 continue;
2599 }
2600
2601 let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2602 x: lru_x * lru_sb_w,
2603 y: lru_y * lru_sb_h,
2604 });
2605 if ts.restoration.has_restoration_unit(
2606 base_sbo + loop_sbo,
2607 pli,
2608 false,
2609 ) {
2610 let src_plane = &src_subset.planes[pli]; // uncompressed input for reference
2611 let lrf_in_plane = &lrf_input.planes[pli];
2612 let lrf_po = loop_sbo.plane_offset(src_plane.plane_cfg);
2613 let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli];
2614 let mut best_cost =
2615 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli];
2616
2617 // Check the no filter option
2618 {
2619 let err = rdo_loop_plane_error(
2620 base_sbo,
2621 loop_sbo,
2622 lru_sb_w,
2623 lru_sb_h,
2624 fi,
2625 ts,
2626 &tileblocks_subset.as_const(),
2627 lrf_input,
2628 &src_subset,
2629 pli,
2630 );
2631 let rate = cw.fc.count_lrf_switchable(
2632 w,
2633 &ts.restoration.as_const(),
2634 best_new_lrf,
2635 pli,
2636 );
2637
2638 let cost = compute_rd_cost(fi, rate, err);
2639 // Was this choice actually an improvement?
2640 if best_cost < 0. || cost < best_cost {
2641 best_cost = cost;
2642 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2643 best_new_lrf = RestorationFilter::None;
2644 }
2645 }
2646
2647 // Look for a self guided filter
2648 // We need the cropped-to-visible-frame computation area of this LRU
2649 let vis_width = unit_size.min(
2650 (crop_w >> xdec)
2651 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize,
2652 );
2653 let vis_height = unit_size.min(
2654 (crop_h >> ydec)
2655 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize,
2656 );
2657
2658 // todo: experiment with borrowing border pixels
2659 // rather than edge-extending. Right now this is
2660 // hard-clipping to the superblock boundary.
2661 setup_integral_image(
2662 &mut ts.integral_buffer,
2663 SOLVE_IMAGE_STRIDE,
2664 vis_width,
2665 vis_height,
2666 vis_width,
2667 vis_height,
2668 &lrf_in_plane.slice(lrf_po),
2669 &lrf_in_plane.slice(lrf_po),
2670 );
2671
2672 for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity)
2673 {
2674 let (xqd0, xqd1) = sgrproj_solve(
2675 set,
2676 fi,
2677 &ts.integral_buffer,
2678 &src_plane
2679 .subregion(Area::StartingAt { x: lrf_po.x, y: lrf_po.y }),
2680 &lrf_in_plane.slice(lrf_po),
2681 vis_width,
2682 vis_height,
2683 );
2684 let current_lrf =
2685 RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] };
2686 if let RestorationFilter::Sgrproj { set, xqd } = current_lrf {
2687 sgrproj_stripe_filter(
2688 set,
2689 xqd,
2690 fi,
2691 &ts.integral_buffer,
2692 SOLVE_IMAGE_STRIDE,
2693 &lrf_in_plane.slice(lrf_po),
2694 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2695 x: lrf_po.x,
2696 y: lrf_po.y,
2697 width: vis_width,
2698 height: vis_height,
2699 }),
2700 );
2701 }
2702 let err = rdo_loop_plane_error(
2703 base_sbo,
2704 loop_sbo,
2705 lru_sb_w,
2706 lru_sb_h,
2707 fi,
2708 ts,
2709 &tileblocks_subset.as_const(),
2710 lrf_ref,
2711 &src_subset,
2712 pli,
2713 );
2714 let rate = cw.fc.count_lrf_switchable(
2715 w,
2716 &ts.restoration.as_const(),
2717 current_lrf,
2718 pli,
2719 );
2720 let cost = compute_rd_cost(fi, rate, err);
2721 if cost < best_cost {
2722 best_cost = cost;
2723 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2724 best_new_lrf = current_lrf;
2725 }
2726 }
2727
2728 if best_lrf[lru_y * lru_w[pli] + lru_x][pli]
2729 .notequal(best_new_lrf)
2730 {
2731 best_lrf[lru_y * lru_w[pli] + lru_x][pli] = best_new_lrf;
2732 lrf_change = true;
2733 if let Some(ru) = ts.restoration.planes[pli]
2734 .restoration_unit_mut(base_sbo + loop_sbo)
2735 {
2736 ru.filter = best_new_lrf;
2737 }
2738 }
2739 }
2740 }
2741 }
2742 }
2743 }
2744 }
2745}
2746
2747#[test]
2748fn estimate_rate_test() {
2749 assert_eq!(estimate_rate(0, TxSize::TX_4X4, 0), RDO_RATE_TABLE[0][0][0]);
2750}
2751