1// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10#![allow(non_upper_case_globals)]
11#![allow(non_camel_case_types)]
12#![allow(dead_code)]
13
14use std::mem::MaybeUninit;
15
16cfg_if::cfg_if! {
17 if #[cfg(nasm_x86_64)] {
18 pub use crate::asm::x86::predict::*;
19 } else if #[cfg(asm_neon)] {
20 pub use crate::asm::aarch64::predict::*;
21 } else {
22 pub use self::rust::*;
23 }
24}
25
26use crate::context::{TileBlockOffset, MAX_SB_SIZE_LOG2, MAX_TX_SIZE};
27use crate::cpu_features::CpuFeatureLevel;
28use crate::encoder::FrameInvariants;
29use crate::frame::*;
30use crate::mc::*;
31use crate::partition::*;
32use crate::tiling::*;
33use crate::transform::*;
34use crate::util::*;
35use std::convert::TryInto;
36
37pub const ANGLE_STEP: i8 = 3;
38
39// TODO: Review the order of this list.
40// The order impacts compression efficiency.
41pub static RAV1E_INTRA_MODES: &[PredictionMode] = &[
42 PredictionMode::DC_PRED,
43 PredictionMode::H_PRED,
44 PredictionMode::V_PRED,
45 PredictionMode::SMOOTH_PRED,
46 PredictionMode::SMOOTH_H_PRED,
47 PredictionMode::SMOOTH_V_PRED,
48 PredictionMode::PAETH_PRED,
49 PredictionMode::D45_PRED,
50 PredictionMode::D135_PRED,
51 PredictionMode::D113_PRED,
52 PredictionMode::D157_PRED,
53 PredictionMode::D203_PRED,
54 PredictionMode::D67_PRED,
55];
56
57pub static RAV1E_INTER_MODES_MINIMAL: &[PredictionMode] =
58 &[PredictionMode::NEARESTMV];
59
60pub static RAV1E_INTER_COMPOUND_MODES: &[PredictionMode] = &[
61 PredictionMode::GLOBAL_GLOBALMV,
62 PredictionMode::NEAREST_NEARESTMV,
63 PredictionMode::NEW_NEWMV,
64 PredictionMode::NEAREST_NEWMV,
65 PredictionMode::NEW_NEARESTMV,
66 PredictionMode::NEAR_NEAR0MV,
67 PredictionMode::NEAR_NEAR1MV,
68 PredictionMode::NEAR_NEAR2MV,
69];
70
71// There are more modes than in the spec because every allowed
72// drl index for NEAR modes is considered its own mode.
73#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Default)]
74pub enum PredictionMode {
75 #[default]
76 DC_PRED, // Average of above and left pixels
77 V_PRED, // Vertical
78 H_PRED, // Horizontal
79 D45_PRED, // Directional 45 degree
80 D135_PRED, // Directional 135 degree
81 D113_PRED, // Directional 113 degree
82 D157_PRED, // Directional 157 degree
83 D203_PRED, // Directional 203 degree
84 D67_PRED, // Directional 67 degree
85 SMOOTH_PRED, // Combination of horizontal and vertical interpolation
86 SMOOTH_V_PRED,
87 SMOOTH_H_PRED,
88 PAETH_PRED,
89 UV_CFL_PRED,
90 NEARESTMV,
91 NEAR0MV,
92 NEAR1MV,
93 NEAR2MV,
94 GLOBALMV,
95 NEWMV,
96 // Compound ref compound modes
97 NEAREST_NEARESTMV,
98 NEAR_NEAR0MV,
99 NEAR_NEAR1MV,
100 NEAR_NEAR2MV,
101 NEAREST_NEWMV,
102 NEW_NEARESTMV,
103 NEAR_NEW0MV,
104 NEAR_NEW1MV,
105 NEAR_NEW2MV,
106 NEW_NEAR0MV,
107 NEW_NEAR1MV,
108 NEW_NEAR2MV,
109 GLOBAL_GLOBALMV,
110 NEW_NEWMV,
111}
112
113// This is a higher number than in the spec and cannot be used
114// for bitstream writing purposes.
115pub const PREDICTION_MODES: usize = 34;
116
117#[derive(Copy, Clone, Debug)]
118pub enum PredictionVariant {
119 NONE,
120 LEFT,
121 TOP,
122 BOTH,
123}
124
125impl PredictionVariant {
126 #[inline]
127 const fn new(x: usize, y: usize) -> Self {
128 match (x, y) {
129 (0, 0) => PredictionVariant::NONE,
130 (_, 0) => PredictionVariant::LEFT,
131 (0, _) => PredictionVariant::TOP,
132 _ => PredictionVariant::BOTH,
133 }
134 }
135}
136
137pub const fn intra_mode_to_angle(mode: PredictionMode) -> isize {
138 match mode {
139 PredictionMode::V_PRED => 90,
140 PredictionMode::H_PRED => 180,
141 PredictionMode::D45_PRED => 45,
142 PredictionMode::D135_PRED => 135,
143 PredictionMode::D113_PRED => 113,
144 PredictionMode::D157_PRED => 157,
145 PredictionMode::D203_PRED => 203,
146 PredictionMode::D67_PRED => 67,
147 _ => 0,
148 }
149}
150
151impl PredictionMode {
152 #[inline]
153 pub fn is_compound(self) -> bool {
154 self >= PredictionMode::NEAREST_NEARESTMV
155 }
156 #[inline]
157 pub fn has_nearmv(self) -> bool {
158 self == PredictionMode::NEAR0MV
159 || self == PredictionMode::NEAR1MV
160 || self == PredictionMode::NEAR2MV
161 || self == PredictionMode::NEAR_NEAR0MV
162 || self == PredictionMode::NEAR_NEAR1MV
163 || self == PredictionMode::NEAR_NEAR2MV
164 || self == PredictionMode::NEAR_NEW0MV
165 || self == PredictionMode::NEAR_NEW1MV
166 || self == PredictionMode::NEAR_NEW2MV
167 || self == PredictionMode::NEW_NEAR0MV
168 || self == PredictionMode::NEW_NEAR1MV
169 || self == PredictionMode::NEW_NEAR2MV
170 }
171 #[inline]
172 pub fn has_newmv(self) -> bool {
173 self == PredictionMode::NEWMV
174 || self == PredictionMode::NEW_NEWMV
175 || self == PredictionMode::NEAREST_NEWMV
176 || self == PredictionMode::NEW_NEARESTMV
177 || self == PredictionMode::NEAR_NEW0MV
178 || self == PredictionMode::NEAR_NEW1MV
179 || self == PredictionMode::NEAR_NEW2MV
180 || self == PredictionMode::NEW_NEAR0MV
181 || self == PredictionMode::NEW_NEAR1MV
182 || self == PredictionMode::NEW_NEAR2MV
183 }
184 #[inline]
185 pub fn ref_mv_idx(self) -> usize {
186 if self == PredictionMode::NEAR0MV
187 || self == PredictionMode::NEAR1MV
188 || self == PredictionMode::NEAR2MV
189 {
190 self as usize - PredictionMode::NEAR0MV as usize + 1
191 } else if self == PredictionMode::NEAR_NEAR0MV
192 || self == PredictionMode::NEAR_NEAR1MV
193 || self == PredictionMode::NEAR_NEAR2MV
194 {
195 self as usize - PredictionMode::NEAR_NEAR0MV as usize + 1
196 } else {
197 1
198 }
199 }
200
201 /// # Panics
202 ///
203 /// - If called on an inter `PredictionMode`
204 pub fn predict_intra<T: Pixel>(
205 self, tile_rect: TileRect, dst: &mut PlaneRegionMut<'_, T>,
206 tx_size: TxSize, bit_depth: usize, ac: &[i16], intra_param: IntraParam,
207 ief_params: Option<IntraEdgeFilterParameters>, edge_buf: &IntraEdge<T>,
208 cpu: CpuFeatureLevel,
209 ) {
210 assert!(self.is_intra());
211 let &Rect { x: frame_x, y: frame_y, .. } = dst.rect();
212 debug_assert!(frame_x >= 0 && frame_y >= 0);
213 // x and y are expressed relative to the tile
214 let x = frame_x as usize - tile_rect.x;
215 let y = frame_y as usize - tile_rect.y;
216
217 let variant = PredictionVariant::new(x, y);
218
219 let alpha = match intra_param {
220 IntraParam::Alpha(val) => val,
221 _ => 0,
222 };
223 let angle_delta = match intra_param {
224 IntraParam::AngleDelta(val) => val,
225 _ => 0,
226 };
227
228 let mode = match self {
229 PredictionMode::PAETH_PRED => match variant {
230 PredictionVariant::NONE => PredictionMode::DC_PRED,
231 PredictionVariant::TOP => PredictionMode::V_PRED,
232 PredictionVariant::LEFT => PredictionMode::H_PRED,
233 PredictionVariant::BOTH => PredictionMode::PAETH_PRED,
234 },
235 PredictionMode::UV_CFL_PRED if alpha == 0 => PredictionMode::DC_PRED,
236 _ => self,
237 };
238
239 let angle = match mode {
240 PredictionMode::UV_CFL_PRED => alpha as isize,
241 _ => intra_mode_to_angle(mode) + (angle_delta * ANGLE_STEP) as isize,
242 };
243
244 dispatch_predict_intra::<T>(
245 mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf,
246 cpu,
247 );
248 }
249
250 #[inline]
251 pub fn is_intra(self) -> bool {
252 self < PredictionMode::NEARESTMV
253 }
254
255 #[inline]
256 pub fn is_cfl(self) -> bool {
257 self == PredictionMode::UV_CFL_PRED
258 }
259
260 #[inline]
261 pub fn is_directional(self) -> bool {
262 self >= PredictionMode::V_PRED && self <= PredictionMode::D67_PRED
263 }
264
265 #[inline(always)]
266 pub const fn angle_delta_count(self) -> i8 {
267 match self {
268 PredictionMode::V_PRED
269 | PredictionMode::H_PRED
270 | PredictionMode::D45_PRED
271 | PredictionMode::D135_PRED
272 | PredictionMode::D113_PRED
273 | PredictionMode::D157_PRED
274 | PredictionMode::D203_PRED
275 | PredictionMode::D67_PRED => 7,
276 _ => 1,
277 }
278 }
279
280 // Used by inter prediction to extract the fractional component of a mv and
281 // obtain the correct PlaneSlice to operate on.
282 #[inline]
283 fn get_mv_params<T: Pixel>(
284 rec_plane: &Plane<T>, po: PlaneOffset, mv: MotionVector,
285 ) -> (i32, i32, PlaneSlice<T>) {
286 let &PlaneConfig { xdec, ydec, .. } = &rec_plane.cfg;
287 let row_offset = mv.row as i32 >> (3 + ydec);
288 let col_offset = mv.col as i32 >> (3 + xdec);
289 let row_frac = ((mv.row as i32) << (1 - ydec)) & 0xf;
290 let col_frac = ((mv.col as i32) << (1 - xdec)) & 0xf;
291 let qo = PlaneOffset {
292 x: po.x + col_offset as isize - 3,
293 y: po.y + row_offset as isize - 3,
294 };
295 (row_frac, col_frac, rec_plane.slice(qo).clamp().subslice(3, 3))
296 }
297
298 /// Inter prediction with a single reference (i.e. not compound mode)
299 ///
300 /// # Panics
301 ///
302 /// - If called on an intra `PredictionMode`
303 pub fn predict_inter_single<T: Pixel>(
304 self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
305 po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
306 height: usize, ref_frame: RefType, mv: MotionVector,
307 ) {
308 assert!(!self.is_intra());
309 let frame_po = tile_rect.to_frame_plane_offset(po);
310
311 let mode = fi.default_filter;
312
313 if let Some(ref rec) =
314 fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
315 {
316 let (row_frac, col_frac, src) =
317 PredictionMode::get_mv_params(&rec.frame.planes[p], frame_po, mv);
318 put_8tap(
319 dst,
320 src,
321 width,
322 height,
323 col_frac,
324 row_frac,
325 mode,
326 mode,
327 fi.sequence.bit_depth,
328 fi.cpu_feature_level,
329 );
330 }
331 }
332
333 /// Inter prediction with two references.
334 ///
335 /// # Panics
336 ///
337 /// - If called on an intra `PredictionMode`
338 pub fn predict_inter_compound<T: Pixel>(
339 self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
340 po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
341 height: usize, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
342 buffer: &mut InterCompoundBuffers,
343 ) {
344 assert!(!self.is_intra());
345 let frame_po = tile_rect.to_frame_plane_offset(po);
346
347 let mode = fi.default_filter;
348
349 for i in 0..2 {
350 if let Some(ref rec) =
351 fi.rec_buffer.frames[fi.ref_frames[ref_frames[i].to_index()] as usize]
352 {
353 let (row_frac, col_frac, src) = PredictionMode::get_mv_params(
354 &rec.frame.planes[p],
355 frame_po,
356 mvs[i],
357 );
358 prep_8tap(
359 buffer.get_buffer_mut(i),
360 src,
361 width,
362 height,
363 col_frac,
364 row_frac,
365 mode,
366 mode,
367 fi.sequence.bit_depth,
368 fi.cpu_feature_level,
369 );
370 }
371 }
372 mc_avg(
373 dst,
374 buffer.get_buffer(0),
375 buffer.get_buffer(1),
376 width,
377 height,
378 fi.sequence.bit_depth,
379 fi.cpu_feature_level,
380 );
381 }
382
383 /// Inter prediction that determines whether compound mode is being used based
384 /// on the second [`RefType`] in [`ref_frames`].
385 pub fn predict_inter<T: Pixel>(
386 self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
387 po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
388 height: usize, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
389 compound_buffer: &mut InterCompoundBuffers,
390 ) {
391 let is_compound = ref_frames[1] != RefType::INTRA_FRAME
392 && ref_frames[1] != RefType::NONE_FRAME;
393
394 if !is_compound {
395 self.predict_inter_single(
396 fi,
397 tile_rect,
398 p,
399 po,
400 dst,
401 width,
402 height,
403 ref_frames[0],
404 mvs[0],
405 )
406 } else {
407 self.predict_inter_compound(
408 fi,
409 tile_rect,
410 p,
411 po,
412 dst,
413 width,
414 height,
415 ref_frames,
416 mvs,
417 compound_buffer,
418 );
419 }
420 }
421}
422
423/// A pair of buffers holding the interpolation of two references. Use for
424/// compound inter prediction.
425#[derive(Debug)]
426pub struct InterCompoundBuffers {
427 data: AlignedBoxedSlice<i16>,
428}
429
430impl InterCompoundBuffers {
431 // Size of one of the two buffers used.
432 const BUFFER_SIZE: usize = 1 << (2 * MAX_SB_SIZE_LOG2);
433
434 /// Get the buffer for eith
435 #[inline]
436 fn get_buffer_mut(&mut self, i: usize) -> &mut [i16] {
437 match i {
438 0 => &mut self.data[0..Self::BUFFER_SIZE],
439 1 => &mut self.data[Self::BUFFER_SIZE..2 * Self::BUFFER_SIZE],
440 _ => panic!(),
441 }
442 }
443
444 #[inline]
445 fn get_buffer(&self, i: usize) -> &[i16] {
446 match i {
447 0 => &self.data[0..Self::BUFFER_SIZE],
448 1 => &self.data[Self::BUFFER_SIZE..2 * Self::BUFFER_SIZE],
449 _ => panic!(),
450 }
451 }
452}
453
454impl Default for InterCompoundBuffers {
455 fn default() -> Self {
456 Self { data: AlignedBoxedSlice::new(len:2 * Self::BUFFER_SIZE, val:0) }
457 }
458}
459
460#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
461pub enum InterIntraMode {
462 II_DC_PRED,
463 II_V_PRED,
464 II_H_PRED,
465 II_SMOOTH_PRED,
466 INTERINTRA_MODES,
467}
468
469#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
470pub enum CompoundType {
471 COMPOUND_AVERAGE,
472 COMPOUND_WEDGE,
473 COMPOUND_DIFFWTD,
474 COMPOUND_TYPES,
475}
476
477#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
478pub enum MotionMode {
479 SIMPLE_TRANSLATION,
480 OBMC_CAUSAL, // 2-sided OBMC
481 WARPED_CAUSAL, // 2-sided WARPED
482 MOTION_MODES,
483}
484
485#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
486pub enum PaletteSize {
487 TWO_COLORS,
488 THREE_COLORS,
489 FOUR_COLORS,
490 FIVE_COLORS,
491 SIX_COLORS,
492 SEVEN_COLORS,
493 EIGHT_COLORS,
494 PALETTE_SIZES,
495}
496
497#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
498pub enum PaletteColor {
499 PALETTE_COLOR_ONE,
500 PALETTE_COLOR_TWO,
501 PALETTE_COLOR_THREE,
502 PALETTE_COLOR_FOUR,
503 PALETTE_COLOR_FIVE,
504 PALETTE_COLOR_SIX,
505 PALETTE_COLOR_SEVEN,
506 PALETTE_COLOR_EIGHT,
507 PALETTE_COLORS,
508}
509
510#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
511pub enum FilterIntraMode {
512 FILTER_DC_PRED,
513 FILTER_V_PRED,
514 FILTER_H_PRED,
515 FILTER_D157_PRED,
516 FILTER_PAETH_PRED,
517 FILTER_INTRA_MODES,
518}
519
520#[derive(Copy, Clone, Debug)]
521pub enum IntraParam {
522 AngleDelta(i8),
523 Alpha(i16),
524 None,
525}
526
527#[derive(Debug, Clone, Copy, Default)]
528pub struct AngleDelta {
529 pub y: i8,
530 pub uv: i8,
531}
532
533#[derive(Copy, Clone, Default)]
534pub struct IntraEdgeFilterParameters {
535 pub plane: usize,
536 pub above_ref_frame_types: Option<[RefType; 2]>,
537 pub left_ref_frame_types: Option<[RefType; 2]>,
538 pub above_mode: Option<PredictionMode>,
539 pub left_mode: Option<PredictionMode>,
540}
541
542impl IntraEdgeFilterParameters {
543 pub fn new(
544 plane: usize, above_ctx: Option<CodedBlockInfo>,
545 left_ctx: Option<CodedBlockInfo>,
546 ) -> Self {
547 IntraEdgeFilterParameters {
548 plane,
549 above_mode: match above_ctx {
550 Some(bi) => match plane {
551 0 => bi.luma_mode,
552 _ => bi.chroma_mode,
553 }
554 .into(),
555 None => None,
556 },
557 left_mode: match left_ctx {
558 Some(bi) => match plane {
559 0 => bi.luma_mode,
560 _ => bi.chroma_mode,
561 }
562 .into(),
563 None => None,
564 },
565 above_ref_frame_types: above_ctx.map(|bi| bi.reference_types),
566 left_ref_frame_types: left_ctx.map(|bi| bi.reference_types),
567 }
568 }
569
570 /// # Panics
571 ///
572 /// - If the appropriate ref frame types are not set on `self`
573 pub fn use_smooth_filter(self) -> bool {
574 let above_smooth = match self.above_mode {
575 Some(PredictionMode::SMOOTH_PRED)
576 | Some(PredictionMode::SMOOTH_V_PRED)
577 | Some(PredictionMode::SMOOTH_H_PRED) => {
578 self.plane == 0
579 || self.above_ref_frame_types.unwrap()[0] == RefType::INTRA_FRAME
580 }
581 _ => false,
582 };
583
584 let left_smooth = match self.left_mode {
585 Some(PredictionMode::SMOOTH_PRED)
586 | Some(PredictionMode::SMOOTH_V_PRED)
587 | Some(PredictionMode::SMOOTH_H_PRED) => {
588 self.plane == 0
589 || self.left_ref_frame_types.unwrap()[0] == RefType::INTRA_FRAME
590 }
591 _ => false,
592 };
593
594 above_smooth || left_smooth
595 }
596}
597
598// Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale.
599const sm_weight_log2_scale: u8 = 8;
600
601// Smooth predictor weights
602#[rustfmt::skip]
603static sm_weight_arrays: [u8; 2 * MAX_TX_SIZE] = [
604 // Unused, because we always offset by bs, which is at least 2.
605 0, 0,
606 // bs = 2
607 255, 128,
608 // bs = 4
609 255, 149, 85, 64,
610 // bs = 8
611 255, 197, 146, 105, 73, 50, 37, 32,
612 // bs = 16
613 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
614 // bs = 32
615 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
616 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
617 // bs = 64
618 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
619 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
620 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
621 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
622];
623
624#[inline(always)]
625const fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
626 let scaled_luma_q6: i32 = (alpha_q3 as i32) * (ac_pred_q3 as i32);
627 let abs_scaled_luma_q0: i32 = (scaled_luma_q6.abs() + 32) >> 6;
628 if scaled_luma_q6 < 0 {
629 -abs_scaled_luma_q0
630 } else {
631 abs_scaled_luma_q0
632 }
633}
634
635/// # Returns
636///
637/// Initialized luma AC coefficients
638///
639/// # Panics
640///
641/// - If the block size is invalid for subsampling
642///
643pub fn luma_ac<'ac, T: Pixel>(
644 ac: &'ac mut [MaybeUninit<i16>], ts: &mut TileStateMut<'_, T>,
645 tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize,
646 fi: &FrameInvariants<T>,
647) -> &'ac mut [i16] {
648 use crate::context::MI_SIZE_LOG2;
649
650 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
651 let plane_bsize = bsize.subsampled_size(xdec, ydec).unwrap();
652
653 // ensure ac has the right length, so there aren't any uninitialized elements at the end
654 let ac = &mut ac[..plane_bsize.area()];
655
656 let bo = if bsize.is_sub8x8(xdec, ydec) {
657 let offset = bsize.sub8x8_offset(xdec, ydec);
658 tile_bo.with_offset(offset.0, offset.1)
659 } else {
660 tile_bo
661 };
662 let rec = &ts.rec.planes[0];
663 let luma = &rec.subregion(Area::BlockStartingAt { bo: bo.0 });
664 let frame_bo = ts.to_frame_block_offset(bo);
665
666 let frame_clipped_bw: usize =
667 ((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2).min(bsize.width());
668 let frame_clipped_bh: usize =
669 ((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2).min(bsize.height());
670
671 // Similar to 'MaxLumaW' and 'MaxLumaH' stated in https://aomediacodec.github.io/av1-spec/#transform-block-semantics
672 let max_luma_w = if bsize.width() > BlockSize::BLOCK_8X8.width() {
673 let txw_log2 = tx_size.width_log2();
674 ((frame_clipped_bw + (1 << txw_log2) - 1) >> txw_log2) << txw_log2
675 } else {
676 bsize.width()
677 };
678 let max_luma_h = if bsize.height() > BlockSize::BLOCK_8X8.height() {
679 let txh_log2 = tx_size.height_log2();
680 ((frame_clipped_bh + (1 << txh_log2) - 1) >> txh_log2) << txh_log2
681 } else {
682 bsize.height()
683 };
684
685 let w_pad = (bsize.width() - max_luma_w) >> (2 + xdec);
686 let h_pad = (bsize.height() - max_luma_h) >> (2 + ydec);
687 let cpu = fi.cpu_feature_level;
688
689 (match (xdec, ydec) {
690 (0, 0) => pred_cfl_ac::<T, 0, 0>,
691 (1, 0) => pred_cfl_ac::<T, 1, 0>,
692 (_, _) => pred_cfl_ac::<T, 1, 1>,
693 })(ac, luma, plane_bsize, w_pad, h_pad, cpu);
694
695 // SAFETY: it relies on individual pred_cfl_ac implementations to initialize the ac
696 unsafe { slice_assume_init_mut(ac) }
697}
698
699pub(crate) mod rust {
700 use super::*;
701 use crate::context::MAX_TX_SIZE;
702 use crate::cpu_features::CpuFeatureLevel;
703 use crate::tiling::PlaneRegionMut;
704 use crate::transform::TxSize;
705 use crate::util::round_shift;
706 use crate::Pixel;
707 use std::mem::{size_of, MaybeUninit};
708
709 #[inline(always)]
710 pub fn dispatch_predict_intra<T: Pixel>(
711 mode: PredictionMode, variant: PredictionVariant,
712 dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
713 ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
714 edge_buf: &IntraEdge<T>, _cpu: CpuFeatureLevel,
715 ) {
716 let width = tx_size.width();
717 let height = tx_size.height();
718
719 // left pixels are ordered from bottom to top and right-aligned
720 let (left, top_left, above) = edge_buf.as_slices();
721
722 let above_slice = above;
723 let left_slice = &left[left.len().saturating_sub(height)..];
724 let left_and_left_below_slice =
725 &left[left.len().saturating_sub(width + height)..];
726
727 match mode {
728 PredictionMode::DC_PRED => {
729 (match variant {
730 PredictionVariant::NONE => pred_dc_128,
731 PredictionVariant::LEFT => pred_dc_left,
732 PredictionVariant::TOP => pred_dc_top,
733 PredictionVariant::BOTH => pred_dc,
734 })(dst, above_slice, left_slice, width, height, bit_depth)
735 }
736 PredictionMode::V_PRED if angle == 90 => {
737 pred_v(dst, above_slice, width, height)
738 }
739 PredictionMode::H_PRED if angle == 180 => {
740 pred_h(dst, left_slice, width, height)
741 }
742 PredictionMode::H_PRED
743 | PredictionMode::V_PRED
744 | PredictionMode::D45_PRED
745 | PredictionMode::D135_PRED
746 | PredictionMode::D113_PRED
747 | PredictionMode::D157_PRED
748 | PredictionMode::D203_PRED
749 | PredictionMode::D67_PRED => pred_directional(
750 dst,
751 above_slice,
752 left_and_left_below_slice,
753 top_left,
754 angle as usize,
755 width,
756 height,
757 bit_depth,
758 ief_params,
759 ),
760 PredictionMode::SMOOTH_PRED => {
761 pred_smooth(dst, above_slice, left_slice, width, height)
762 }
763 PredictionMode::SMOOTH_V_PRED => {
764 pred_smooth_v(dst, above_slice, left_slice, width, height)
765 }
766 PredictionMode::SMOOTH_H_PRED => {
767 pred_smooth_h(dst, above_slice, left_slice, width, height)
768 }
769 PredictionMode::PAETH_PRED => {
770 pred_paeth(dst, above_slice, left_slice, top_left[0], width, height)
771 }
772 PredictionMode::UV_CFL_PRED => (match variant {
773 PredictionVariant::NONE => pred_cfl_128,
774 PredictionVariant::LEFT => pred_cfl_left,
775 PredictionVariant::TOP => pred_cfl_top,
776 PredictionVariant::BOTH => pred_cfl,
777 })(
778 dst,
779 ac,
780 angle as i16,
781 above_slice,
782 left_slice,
783 width,
784 height,
785 bit_depth,
786 ),
787 _ => unimplemented!(),
788 }
789 }
790
791 pub(crate) fn pred_dc<T: Pixel>(
792 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
793 height: usize, _bit_depth: usize,
794 ) {
795 let edges = left[..height].iter().chain(above[..width].iter());
796 let len = (width + height) as u32;
797 let avg = (edges.fold(0u32, |acc, &v| {
798 let v: u32 = v.into();
799 v + acc
800 }) + (len >> 1))
801 / len;
802 let avg = T::cast_from(avg);
803
804 for line in output.rows_iter_mut().take(height) {
805 line[..width].fill(avg);
806 }
807 }
808
809 pub(crate) fn pred_dc_128<T: Pixel>(
810 output: &mut PlaneRegionMut<'_, T>, _above: &[T], _left: &[T],
811 width: usize, height: usize, bit_depth: usize,
812 ) {
813 let v = T::cast_from(128u32 << (bit_depth - 8));
814 for line in output.rows_iter_mut().take(height) {
815 line[..width].fill(v);
816 }
817 }
818
819 pub(crate) fn pred_dc_left<T: Pixel>(
820 output: &mut PlaneRegionMut<'_, T>, _above: &[T], left: &[T],
821 width: usize, height: usize, _bit_depth: usize,
822 ) {
823 let sum = left[..].iter().fold(0u32, |acc, &v| {
824 let v: u32 = v.into();
825 v + acc
826 });
827 let avg = T::cast_from((sum + (height >> 1) as u32) / height as u32);
828 for line in output.rows_iter_mut().take(height) {
829 line[..width].fill(avg);
830 }
831 }
832
833 pub(crate) fn pred_dc_top<T: Pixel>(
834 output: &mut PlaneRegionMut<'_, T>, above: &[T], _left: &[T],
835 width: usize, height: usize, _bit_depth: usize,
836 ) {
837 let sum = above[..width].iter().fold(0u32, |acc, &v| {
838 let v: u32 = v.into();
839 v + acc
840 });
841 let avg = T::cast_from((sum + (width >> 1) as u32) / width as u32);
842 for line in output.rows_iter_mut().take(height) {
843 line[..width].fill(avg);
844 }
845 }
846
847 pub(crate) fn pred_h<T: Pixel>(
848 output: &mut PlaneRegionMut<'_, T>, left: &[T], width: usize,
849 height: usize,
850 ) {
851 for (line, l) in output.rows_iter_mut().zip(left[..height].iter().rev()) {
852 line[..width].fill(*l);
853 }
854 }
855
856 pub(crate) fn pred_v<T: Pixel>(
857 output: &mut PlaneRegionMut<'_, T>, above: &[T], width: usize,
858 height: usize,
859 ) {
860 for line in output.rows_iter_mut().take(height) {
861 line[..width].copy_from_slice(&above[..width])
862 }
863 }
864
865 pub(crate) fn pred_paeth<T: Pixel>(
866 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T],
867 above_left: T, width: usize, height: usize,
868 ) {
869 for r in 0..height {
870 let row = &mut output[r];
871 for c in 0..width {
872 // Top-left pixel is fixed in libaom
873 let raw_top_left: i32 = above_left.into();
874 let raw_left: i32 = left[height - 1 - r].into();
875 let raw_top: i32 = above[c].into();
876
877 let p_base = raw_top + raw_left - raw_top_left;
878 let p_left = (p_base - raw_left).abs();
879 let p_top = (p_base - raw_top).abs();
880 let p_top_left = (p_base - raw_top_left).abs();
881
882 // Return nearest to base of left, top and top_left
883 if p_left <= p_top && p_left <= p_top_left {
884 row[c] = T::cast_from(raw_left);
885 } else if p_top <= p_top_left {
886 row[c] = T::cast_from(raw_top);
887 } else {
888 row[c] = T::cast_from(raw_top_left);
889 }
890 }
891 }
892 }
893
894 pub(crate) fn pred_smooth<T: Pixel>(
895 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
896 height: usize,
897 ) {
898 let below_pred = left[0]; // estimated by bottom-left pixel
899 let right_pred = above[width - 1]; // estimated by top-right pixel
900 let sm_weights_w = &sm_weight_arrays[width..];
901 let sm_weights_h = &sm_weight_arrays[height..];
902
903 let log2_scale = 1 + sm_weight_log2_scale;
904 let scale = 1_u16 << sm_weight_log2_scale;
905
906 // Weights sanity checks
907 assert!((sm_weights_w[0] as u16) < scale);
908 assert!((sm_weights_h[0] as u16) < scale);
909 assert!((scale - sm_weights_w[width - 1] as u16) < scale);
910 assert!((scale - sm_weights_h[height - 1] as u16) < scale);
911 // ensures no overflow when calculating predictor
912 assert!(log2_scale as usize + size_of::<T>() < 31);
913
914 for r in 0..height {
915 let row = &mut output[r];
916 for c in 0..width {
917 let pixels = [above[c], below_pred, left[height - 1 - r], right_pred];
918
919 let weights = [
920 sm_weights_h[r] as u16,
921 scale - sm_weights_h[r] as u16,
922 sm_weights_w[c] as u16,
923 scale - sm_weights_w[c] as u16,
924 ];
925
926 assert!(
927 scale >= (sm_weights_h[r] as u16)
928 && scale >= (sm_weights_w[c] as u16)
929 );
930
931 // Sum up weighted pixels
932 let mut this_pred: u32 = weights
933 .iter()
934 .zip(pixels.iter())
935 .map(|(w, p)| {
936 let p: u32 = (*p).into();
937 (*w as u32) * p
938 })
939 .sum();
940 this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
941
942 row[c] = T::cast_from(this_pred);
943 }
944 }
945 }
946
947 pub(crate) fn pred_smooth_h<T: Pixel>(
948 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
949 height: usize,
950 ) {
951 let right_pred = above[width - 1]; // estimated by top-right pixel
952 let sm_weights = &sm_weight_arrays[width..];
953
954 let log2_scale = sm_weight_log2_scale;
955 let scale = 1_u16 << sm_weight_log2_scale;
956
957 // Weights sanity checks
958 assert!((sm_weights[0] as u16) < scale);
959 assert!((scale - sm_weights[width - 1] as u16) < scale);
960 // ensures no overflow when calculating predictor
961 assert!(log2_scale as usize + size_of::<T>() < 31);
962
963 for r in 0..height {
964 let row = &mut output[r];
965 for c in 0..width {
966 let pixels = [left[height - 1 - r], right_pred];
967 let weights = [sm_weights[c] as u16, scale - sm_weights[c] as u16];
968
969 assert!(scale >= sm_weights[c] as u16);
970
971 let mut this_pred: u32 = weights
972 .iter()
973 .zip(pixels.iter())
974 .map(|(w, p)| {
975 let p: u32 = (*p).into();
976 (*w as u32) * p
977 })
978 .sum();
979 this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
980
981 row[c] = T::cast_from(this_pred);
982 }
983 }
984 }
985
986 pub(crate) fn pred_smooth_v<T: Pixel>(
987 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
988 height: usize,
989 ) {
990 let below_pred = left[0]; // estimated by bottom-left pixel
991 let sm_weights = &sm_weight_arrays[height..];
992
993 let log2_scale = sm_weight_log2_scale;
994 let scale = 1_u16 << sm_weight_log2_scale;
995
996 // Weights sanity checks
997 assert!((sm_weights[0] as u16) < scale);
998 assert!((scale - sm_weights[height - 1] as u16) < scale);
999 // ensures no overflow when calculating predictor
1000 assert!(log2_scale as usize + size_of::<T>() < 31);
1001
1002 for r in 0..height {
1003 let row = &mut output[r];
1004 for c in 0..width {
1005 let pixels = [above[c], below_pred];
1006 let weights = [sm_weights[r] as u16, scale - sm_weights[r] as u16];
1007
1008 assert!(scale >= sm_weights[r] as u16);
1009
1010 let mut this_pred: u32 = weights
1011 .iter()
1012 .zip(pixels.iter())
1013 .map(|(w, p)| {
1014 let p: u32 = (*p).into();
1015 (*w as u32) * p
1016 })
1017 .sum();
1018 this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
1019
1020 row[c] = T::cast_from(this_pred);
1021 }
1022 }
1023 }
1024
1025 pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
1026 ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>,
1027 plane_bsize: BlockSize, w_pad: usize, h_pad: usize, _cpu: CpuFeatureLevel,
1028 ) {
1029 let max_luma_w = (plane_bsize.width() - w_pad * 4) << XDEC;
1030 let max_luma_h = (plane_bsize.height() - h_pad * 4) << YDEC;
1031 let max_luma_x: usize = max_luma_w.max(8) - (1 << XDEC);
1032 let max_luma_y: usize = max_luma_h.max(8) - (1 << YDEC);
1033 let mut sum: i32 = 0;
1034
1035 let ac = &mut ac[..plane_bsize.area()];
1036
1037 for (sub_y, ac_rows) in
1038 ac.chunks_exact_mut(plane_bsize.width()).enumerate()
1039 {
1040 for (sub_x, ac_item) in ac_rows.iter_mut().enumerate() {
1041 // Refer to https://aomediacodec.github.io/av1-spec/#predict-chroma-from-luma-process
1042 let luma_y = sub_y << YDEC;
1043 let luma_x = sub_x << XDEC;
1044 let y = luma_y.min(max_luma_y);
1045 let x = luma_x.min(max_luma_x);
1046 let mut sample: i16 = i16::cast_from(luma[y][x]);
1047 if XDEC != 0 {
1048 sample += i16::cast_from(luma[y][x + 1]);
1049 }
1050 if YDEC != 0 {
1051 debug_assert!(XDEC != 0);
1052 sample += i16::cast_from(luma[y + 1][x])
1053 + i16::cast_from(luma[y + 1][x + 1]);
1054 }
1055 sample <<= 3 - XDEC - YDEC;
1056 ac_item.write(sample);
1057 sum += sample as i32;
1058 }
1059 }
1060 // SAFETY: the loop above has initialized all items
1061 let ac = unsafe { slice_assume_init_mut(ac) };
1062 let shift = plane_bsize.width_log2() + plane_bsize.height_log2();
1063 let average = ((sum + (1 << (shift - 1))) >> shift) as i16;
1064
1065 for val in ac {
1066 *val -= average;
1067 }
1068 }
1069
1070 pub(crate) fn pred_cfl_inner<T: Pixel>(
1071 output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, width: usize,
1072 height: usize, bit_depth: usize,
1073 ) {
1074 if alpha == 0 {
1075 return;
1076 }
1077 debug_assert!(ac.len() >= width * height);
1078 assert!(output.plane_cfg.stride >= width);
1079 assert!(output.rows_iter().len() >= height);
1080
1081 let sample_max = (1 << bit_depth) - 1;
1082 let avg: i32 = output[0][0].into();
1083
1084 for (line, luma) in
1085 output.rows_iter_mut().zip(ac.chunks_exact(width)).take(height)
1086 {
1087 for (v, &l) in line[..width].iter_mut().zip(luma[..width].iter()) {
1088 *v = T::cast_from(
1089 (avg + get_scaled_luma_q0(alpha, l)).clamp(0, sample_max),
1090 );
1091 }
1092 }
1093 }
1094
1095 pub(crate) fn pred_cfl<T: Pixel>(
1096 output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1097 left: &[T], width: usize, height: usize, bit_depth: usize,
1098 ) {
1099 pred_dc(output, above, left, width, height, bit_depth);
1100 pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1101 }
1102
1103 pub(crate) fn pred_cfl_128<T: Pixel>(
1104 output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1105 left: &[T], width: usize, height: usize, bit_depth: usize,
1106 ) {
1107 pred_dc_128(output, above, left, width, height, bit_depth);
1108 pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1109 }
1110
1111 pub(crate) fn pred_cfl_left<T: Pixel>(
1112 output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1113 left: &[T], width: usize, height: usize, bit_depth: usize,
1114 ) {
1115 pred_dc_left(output, above, left, width, height, bit_depth);
1116 pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1117 }
1118
1119 pub(crate) fn pred_cfl_top<T: Pixel>(
1120 output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1121 left: &[T], width: usize, height: usize, bit_depth: usize,
1122 ) {
1123 pred_dc_top(output, above, left, width, height, bit_depth);
1124 pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1125 }
1126
1127 #[allow(clippy::collapsible_if)]
1128 #[allow(clippy::collapsible_else_if)]
1129 #[allow(clippy::needless_return)]
1130 pub(crate) const fn select_ief_strength(
1131 width: usize, height: usize, smooth_filter: bool, angle_delta: isize,
1132 ) -> u8 {
1133 let block_wh = width + height;
1134 let abs_delta = angle_delta.unsigned_abs();
1135
1136 if smooth_filter {
1137 if block_wh <= 8 {
1138 if abs_delta >= 64 {
1139 return 2;
1140 }
1141 if abs_delta >= 40 {
1142 return 1;
1143 }
1144 } else if block_wh <= 16 {
1145 if abs_delta >= 48 {
1146 return 2;
1147 }
1148 if abs_delta >= 20 {
1149 return 1;
1150 }
1151 } else if block_wh <= 24 {
1152 if abs_delta >= 4 {
1153 return 3;
1154 }
1155 } else {
1156 return 3;
1157 }
1158 } else {
1159 if block_wh <= 8 {
1160 if abs_delta >= 56 {
1161 return 1;
1162 }
1163 } else if block_wh <= 16 {
1164 if abs_delta >= 40 {
1165 return 1;
1166 }
1167 } else if block_wh <= 24 {
1168 if abs_delta >= 32 {
1169 return 3;
1170 }
1171 if abs_delta >= 16 {
1172 return 2;
1173 }
1174 if abs_delta >= 8 {
1175 return 1;
1176 }
1177 } else if block_wh <= 32 {
1178 if abs_delta >= 32 {
1179 return 3;
1180 }
1181 if abs_delta >= 4 {
1182 return 2;
1183 }
1184 return 1;
1185 } else {
1186 return 3;
1187 }
1188 }
1189
1190 return 0;
1191 }
1192
1193 pub(crate) const fn select_ief_upsample(
1194 width: usize, height: usize, smooth_filter: bool, angle_delta: isize,
1195 ) -> bool {
1196 let block_wh = width + height;
1197 let abs_delta = angle_delta.unsigned_abs();
1198
1199 if abs_delta == 0 || abs_delta >= 40 {
1200 false
1201 } else if smooth_filter {
1202 block_wh <= 8
1203 } else {
1204 block_wh <= 16
1205 }
1206 }
1207
1208 pub(crate) fn filter_edge<T: Pixel>(
1209 size: usize, strength: u8, edge: &mut [T],
1210 ) {
1211 const INTRA_EDGE_KERNEL: [[u32; 5]; 3] =
1212 [[0, 4, 8, 4, 0], [0, 5, 6, 5, 0], [2, 4, 4, 4, 2]];
1213
1214 if strength == 0 {
1215 return;
1216 }
1217
1218 // Copy the edge buffer to avoid predicting from
1219 // just-filtered samples.
1220 let mut edge_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1221 let edge_filtered =
1222 init_slice_repeat_mut(&mut edge_filtered[..edge.len()], T::zero());
1223 edge_filtered.copy_from_slice(&edge[..edge.len()]);
1224
1225 for i in 1..size {
1226 let mut s = 0;
1227
1228 for j in 0..INTRA_EDGE_KERNEL[0].len() {
1229 let k = (i + j).saturating_sub(2).min(size - 1);
1230 s += INTRA_EDGE_KERNEL[(strength - 1) as usize][j]
1231 * edge[k].to_u32().unwrap();
1232 }
1233
1234 edge_filtered[i] = T::cast_from((s + 8) >> 4);
1235 }
1236 edge.copy_from_slice(edge_filtered);
1237 }
1238
1239 pub(crate) fn upsample_edge<T: Pixel>(
1240 size: usize, edge: &mut [T], bit_depth: usize,
1241 ) {
1242 // The input edge should be valid in the -1..size range,
1243 // where the -1 index is the top-left edge pixel. Since
1244 // negative indices are unsafe in Rust, the caller is
1245 // expected to globally offset it by 1, which makes the
1246 // input range 0..=size.
1247 let mut dup = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE];
1248 let dup = init_slice_repeat_mut(&mut dup[..size + 3], T::zero());
1249 dup[0] = edge[0];
1250 dup[1..=size + 1].copy_from_slice(&edge[0..=size]);
1251 dup[size + 2] = edge[size];
1252
1253 // Past here the edge is being filtered, and its
1254 // effective range is shifted from -1..size to
1255 // -2..2*size-1. Again, because this is safe Rust,
1256 // we cannot use negative indices, and the actual range
1257 // will be 0..=2*size. The caller is expected to adjust
1258 // its indices on receipt of the filtered edge.
1259 edge[0] = dup[0];
1260
1261 for i in 0..size {
1262 let mut s = -dup[i].to_i32().unwrap()
1263 + (9 * dup[i + 1].to_i32().unwrap())
1264 + (9 * dup[i + 2].to_i32().unwrap())
1265 - dup[i + 3].to_i32().unwrap();
1266 s = ((s + 8) / 16).clamp(0, (1 << bit_depth) - 1);
1267
1268 edge[2 * i + 1] = T::cast_from(s);
1269 edge[2 * i + 2] = dup[i + 2];
1270 }
1271 }
1272
1273 pub(crate) const fn dr_intra_derivative(p_angle: usize) -> usize {
1274 match p_angle {
1275 3 => 1023,
1276 6 => 547,
1277 9 => 372,
1278 14 => 273,
1279 17 => 215,
1280 20 => 178,
1281 23 => 151,
1282 26 => 132,
1283 29 => 116,
1284 32 => 102,
1285 36 => 90,
1286 39 => 80,
1287 42 => 71,
1288 45 => 64,
1289 48 => 57,
1290 51 => 51,
1291 54 => 45,
1292 58 => 40,
1293 61 => 35,
1294 64 => 31,
1295 67 => 27,
1296 70 => 23,
1297 73 => 19,
1298 76 => 15,
1299 81 => 11,
1300 84 => 7,
1301 87 => 3,
1302 _ => 0,
1303 }
1304 }
1305
1306 pub(crate) fn pred_directional<T: Pixel>(
1307 output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T],
1308 top_left: &[T], p_angle: usize, width: usize, height: usize,
1309 bit_depth: usize, ief_params: Option<IntraEdgeFilterParameters>,
1310 ) {
1311 let sample_max = (1 << bit_depth) - 1;
1312
1313 let max_x = output.plane_cfg.width as isize - 1;
1314 let max_y = output.plane_cfg.height as isize - 1;
1315
1316 let mut upsample_above = false;
1317 let mut upsample_left = false;
1318
1319 let mut above_edge: &[T] = above;
1320 let mut left_edge: &[T] = left;
1321 let top_left_edge: T = top_left[0];
1322
1323 let enable_edge_filter = ief_params.is_some();
1324
1325 // Initialize above and left edge buffers of the largest possible needed size if upsampled
1326 // The first value is the top left pixel, also mutable and indexed at -1 in the spec
1327 let mut above_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1328 let above_filtered = init_slice_repeat_mut(
1329 &mut above_filtered[..=(width + height) * 2],
1330 T::zero(),
1331 );
1332 let mut left_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1333 let left_filtered = init_slice_repeat_mut(
1334 &mut left_filtered[..=(width + height) * 2],
1335 T::zero(),
1336 );
1337
1338 if enable_edge_filter {
1339 let above_len = above.len().min(above_filtered.len() - 1);
1340 let left_len = left.len().min(left_filtered.len() - 1);
1341 above_filtered[1..=above_len].clone_from_slice(&above[..above_len]);
1342 for i in 1..=left_len {
1343 left_filtered[i] = left[left.len() - i];
1344 }
1345
1346 let smooth_filter = ief_params.unwrap().use_smooth_filter();
1347
1348 if p_angle != 90 && p_angle != 180 {
1349 above_filtered[0] = top_left_edge;
1350 left_filtered[0] = top_left_edge;
1351
1352 let num_px = (
1353 width.min((max_x - output.rect().x + 1).try_into().unwrap())
1354 + if p_angle < 90 { height } else { 0 }
1355 + 1, // above
1356 height.min((max_y - output.rect().y + 1).try_into().unwrap())
1357 + if p_angle > 180 { width } else { 0 }
1358 + 1, // left
1359 );
1360
1361 let filter_strength = select_ief_strength(
1362 width,
1363 height,
1364 smooth_filter,
1365 p_angle as isize - 90,
1366 );
1367 filter_edge(num_px.0, filter_strength, above_filtered);
1368 let filter_strength = select_ief_strength(
1369 width,
1370 height,
1371 smooth_filter,
1372 p_angle as isize - 180,
1373 );
1374 filter_edge(num_px.1, filter_strength, left_filtered);
1375 }
1376
1377 let num_px = (
1378 width + if p_angle < 90 { height } else { 0 }, // above
1379 height + if p_angle > 180 { width } else { 0 }, // left
1380 );
1381
1382 upsample_above = select_ief_upsample(
1383 width,
1384 height,
1385 smooth_filter,
1386 p_angle as isize - 90,
1387 );
1388 if upsample_above {
1389 upsample_edge(num_px.0, above_filtered, bit_depth);
1390 }
1391 upsample_left = select_ief_upsample(
1392 width,
1393 height,
1394 smooth_filter,
1395 p_angle as isize - 180,
1396 );
1397 if upsample_left {
1398 upsample_edge(num_px.1, left_filtered, bit_depth);
1399 }
1400
1401 left_filtered.reverse();
1402 above_edge = above_filtered;
1403 left_edge = left_filtered;
1404 }
1405
1406 let dx = if p_angle < 90 {
1407 dr_intra_derivative(p_angle)
1408 } else if p_angle > 90 && p_angle < 180 {
1409 dr_intra_derivative(180 - p_angle)
1410 } else {
1411 0 // undefined
1412 };
1413
1414 let dy = if p_angle > 90 && p_angle < 180 {
1415 dr_intra_derivative(p_angle - 90)
1416 } else if p_angle > 180 {
1417 dr_intra_derivative(270 - p_angle)
1418 } else {
1419 0 // undefined
1420 };
1421
1422 // edge buffer index offsets applied due to the fact
1423 // that we cannot safely use negative indices in Rust
1424 let upsample_above = upsample_above as usize;
1425 let upsample_left = upsample_left as usize;
1426 let offset_above = (enable_edge_filter as usize) << upsample_above;
1427 let offset_left = (enable_edge_filter as usize) << upsample_left;
1428
1429 if p_angle < 90 {
1430 for i in 0..height {
1431 let row = &mut output[i];
1432 for j in 0..width {
1433 let idx = (i + 1) * dx;
1434 let base = (idx >> (6 - upsample_above)) + (j << upsample_above);
1435 let shift = (((idx << upsample_above) >> 1) & 31) as i32;
1436 let max_base_x = (height + width - 1) << upsample_above;
1437 let v = (if base < max_base_x {
1438 let a: i32 = above_edge[base + offset_above].into();
1439 let b: i32 = above_edge[base + 1 + offset_above].into();
1440 round_shift(a * (32 - shift) + b * shift, 5)
1441 } else {
1442 let c: i32 = above_edge[max_base_x + offset_above].into();
1443 c
1444 })
1445 .clamp(0, sample_max);
1446 row[j] = T::cast_from(v);
1447 }
1448 }
1449 } else if p_angle > 90 && p_angle < 180 {
1450 for i in 0..height {
1451 let row = &mut output[i];
1452 for j in 0..width {
1453 let idx = (j << 6) as isize - ((i + 1) * dx) as isize;
1454 let base = idx >> (6 - upsample_above);
1455 if base >= -(1 << upsample_above) {
1456 let shift = (((idx << upsample_above) >> 1) & 31) as i32;
1457 let a: i32 = if !enable_edge_filter && base < 0 {
1458 top_left_edge
1459 } else {
1460 above_edge[(base + offset_above as isize) as usize]
1461 }
1462 .into();
1463 let b: i32 =
1464 above_edge[(base + 1 + offset_above as isize) as usize].into();
1465 let v = round_shift(a * (32 - shift) + b * shift, 5)
1466 .clamp(0, sample_max);
1467 row[j] = T::cast_from(v);
1468 } else {
1469 let idx = (i << 6) as isize - ((j + 1) * dy) as isize;
1470 let base = idx >> (6 - upsample_left);
1471 let shift = (((idx << upsample_left) >> 1) & 31) as i32;
1472 let l = left_edge.len() - 1;
1473 let a: i32 = if !enable_edge_filter && base < 0 {
1474 top_left_edge
1475 } else if (base + offset_left as isize) == -2 {
1476 left_edge[0]
1477 } else {
1478 left_edge[l - (base + offset_left as isize) as usize]
1479 }
1480 .into();
1481 let b: i32 = if (base + offset_left as isize) == -2 {
1482 left_edge[1]
1483 } else {
1484 left_edge[l - (base + offset_left as isize + 1) as usize]
1485 }
1486 .into();
1487 let v = round_shift(a * (32 - shift) + b * shift, 5)
1488 .clamp(0, sample_max);
1489 row[j] = T::cast_from(v);
1490 }
1491 }
1492 }
1493 } else if p_angle > 180 {
1494 for i in 0..height {
1495 let row = &mut output[i];
1496 for j in 0..width {
1497 let idx = (j + 1) * dy;
1498 let base = (idx >> (6 - upsample_left)) + (i << upsample_left);
1499 let shift = (((idx << upsample_left) >> 1) & 31) as i32;
1500 let l = left_edge.len() - 1;
1501 let a: i32 = left_edge[l.saturating_sub(base + offset_left)].into();
1502 let b: i32 =
1503 left_edge[l.saturating_sub(base + offset_left + 1)].into();
1504 let v =
1505 round_shift(a * (32 - shift) + b * shift, 5).clamp(0, sample_max);
1506 row[j] = T::cast_from(v);
1507 }
1508 }
1509 }
1510 }
1511}
1512
1513#[cfg(test)]
1514mod test {
1515 use super::*;
1516 use crate::predict::rust::*;
1517 use crate::util::Aligned;
1518 use num_traits::*;
1519
1520 #[test]
1521 fn pred_matches_u8() {
1522 let edge_buf =
1523 Aligned::from_fn(|i| (i + 32).saturating_sub(MAX_TX_SIZE * 2).as_());
1524 let (all_left, top_left, above) = IntraEdge::mock(&edge_buf).as_slices();
1525 let left = &all_left[all_left.len() - 4..];
1526
1527 let mut output = Plane::from_slice(&[0u8; 4 * 4], 4);
1528
1529 pred_dc(&mut output.as_region_mut(), above, left, 4, 4, 8);
1530 assert_eq!(&output.data[..], [32u8; 16]);
1531
1532 pred_dc_top(&mut output.as_region_mut(), above, left, 4, 4, 8);
1533 assert_eq!(&output.data[..], [35u8; 16]);
1534
1535 pred_dc_left(&mut output.as_region_mut(), above, left, 4, 4, 8);
1536 assert_eq!(&output.data[..], [30u8; 16]);
1537
1538 pred_dc_128(&mut output.as_region_mut(), above, left, 4, 4, 8);
1539 assert_eq!(&output.data[..], [128u8; 16]);
1540
1541 pred_v(&mut output.as_region_mut(), above, 4, 4);
1542 assert_eq!(
1543 &output.data[..],
1544 [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36]
1545 );
1546
1547 pred_h(&mut output.as_region_mut(), left, 4, 4);
1548 assert_eq!(
1549 &output.data[..],
1550 [31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28]
1551 );
1552
1553 pred_paeth(&mut output.as_region_mut(), above, left, top_left[0], 4, 4);
1554 assert_eq!(
1555 &output.data[..],
1556 [32, 34, 35, 36, 30, 32, 32, 36, 29, 32, 32, 32, 28, 28, 32, 32]
1557 );
1558
1559 pred_smooth(&mut output.as_region_mut(), above, left, 4, 4);
1560 assert_eq!(
1561 &output.data[..],
1562 [32, 34, 35, 35, 30, 32, 33, 34, 29, 31, 32, 32, 29, 30, 32, 32]
1563 );
1564
1565 pred_smooth_h(&mut output.as_region_mut(), above, left, 4, 4);
1566 assert_eq!(
1567 &output.data[..],
1568 [31, 33, 34, 35, 30, 33, 34, 35, 29, 32, 34, 34, 28, 31, 33, 34]
1569 );
1570
1571 pred_smooth_v(&mut output.as_region_mut(), above, left, 4, 4);
1572 assert_eq!(
1573 &output.data[..],
1574 [33, 34, 35, 36, 31, 31, 32, 33, 30, 30, 30, 31, 29, 30, 30, 30]
1575 );
1576
1577 let left = &all_left[all_left.len() - 8..];
1578 let angles = [
1579 3, 6, 9, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45, 48, 51, 54, 58, 61,
1580 64, 67, 70, 73, 76, 81, 84, 87,
1581 ];
1582 let expected = [
1583 [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1584 [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1585 [39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1586 [37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1587 [36, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1588 [36, 37, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1589 [35, 36, 37, 38, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1590 [35, 36, 37, 38, 37, 38, 39, 40, 39, 40, 40, 40, 40, 40, 40, 40],
1591 [35, 36, 37, 38, 37, 38, 39, 40, 38, 39, 40, 40, 40, 40, 40, 40],
1592 [35, 36, 37, 38, 36, 37, 38, 39, 38, 39, 40, 40, 39, 40, 40, 40],
1593 [34, 35, 36, 37, 36, 37, 38, 39, 37, 38, 39, 40, 39, 40, 40, 40],
1594 [34, 35, 36, 37, 36, 37, 38, 39, 37, 38, 39, 40, 38, 39, 40, 40],
1595 [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1596 [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1597 [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1598 [34, 35, 36, 37, 35, 36, 37, 38, 35, 36, 37, 38, 36, 37, 38, 39],
1599 [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39],
1600 [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39],
1601 [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 35, 36, 37, 38],
1602 [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38],
1603 [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38],
1604 [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 34, 35, 36, 37],
1605 [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 34, 35, 36, 37],
1606 [33, 34, 35, 36, 33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37],
1607 [33, 34, 35, 36, 33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37],
1608 [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36],
1609 [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36],
1610 ];
1611 for (&angle, expected) in angles.iter().zip(expected.iter()) {
1612 pred_directional(
1613 &mut output.as_region_mut(),
1614 above,
1615 left,
1616 top_left,
1617 angle,
1618 4,
1619 4,
1620 8,
1621 None,
1622 );
1623 assert_eq!(&output.data[..], expected);
1624 }
1625 }
1626
1627 #[test]
1628 fn pred_max() {
1629 let max12bit = 4096 - 1;
1630 let above = [max12bit; 32];
1631 let left = [max12bit; 32];
1632
1633 let mut o = Plane::from_slice(&vec![0u16; 32 * 32], 32);
1634
1635 pred_dc(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4, 16);
1636
1637 for l in o.data.chunks(32).take(4) {
1638 for v in l[..4].iter() {
1639 assert_eq!(*v, max12bit);
1640 }
1641 }
1642
1643 pred_h(&mut o.as_region_mut(), &left[..4], 4, 4);
1644
1645 for l in o.data.chunks(32).take(4) {
1646 for v in l[..4].iter() {
1647 assert_eq!(*v, max12bit);
1648 }
1649 }
1650
1651 pred_v(&mut o.as_region_mut(), &above[..4], 4, 4);
1652
1653 for l in o.data.chunks(32).take(4) {
1654 for v in l[..4].iter() {
1655 assert_eq!(*v, max12bit);
1656 }
1657 }
1658
1659 let above_left = max12bit;
1660
1661 pred_paeth(
1662 &mut o.as_region_mut(),
1663 &above[..4],
1664 &left[..4],
1665 above_left,
1666 4,
1667 4,
1668 );
1669
1670 for l in o.data.chunks(32).take(4) {
1671 for v in l[..4].iter() {
1672 assert_eq!(*v, max12bit);
1673 }
1674 }
1675
1676 pred_smooth(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1677
1678 for l in o.data.chunks(32).take(4) {
1679 for v in l[..4].iter() {
1680 assert_eq!(*v, max12bit);
1681 }
1682 }
1683
1684 pred_smooth_h(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1685
1686 for l in o.data.chunks(32).take(4) {
1687 for v in l[..4].iter() {
1688 assert_eq!(*v, max12bit);
1689 }
1690 }
1691
1692 pred_smooth_v(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1693
1694 for l in o.data.chunks(32).take(4) {
1695 for v in l[..4].iter() {
1696 assert_eq!(*v, max12bit);
1697 }
1698 }
1699 }
1700}
1701