forward.rs source code [crates/rav1e/src/transform/forward.rs]

1	// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	use crate::cpu_features::CpuFeatureLevel;
11	use crate::util::*;
12
13	use super::TxType;
14
15	cfg_if::cfg_if! {
16	if #[cfg(nasm_x86_64)] {
17	pub use crate::asm::x86::transform::forward::*;
18	} else if #[cfg(asm_neon)] {
19	pub use crate::asm::aarch64::transform::forward::*;
20	} else {
21	pub use self::rust::*;
22	}
23	}
24
25	pub mod rust {
26	use super::*;
27	use std::mem::MaybeUninit;
28
29	use crate::transform::forward_shared::*;
30	use crate::transform::{av1_round_shift_array, valid_av1_transform, TxSize};
31	use simd_helpers::cold_for_target_arch;
32
33	type TxfmFunc = fn(&mut [i32]);
34
35	impl_1d_tx!();
36
37	impl TxOperations for i32 {
38	fn zero() -> Self {
39	`0`
40	}
41
42	fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self {
43	((self * mul) + (`1` << SHIFT >> `1`)) >> SHIFT
44	}
45
46	fn rshift1(self) -> Self {
47	(self + i32::from(self < `0`)) >> `1`
48	}
49
50	fn add(self, b: Self) -> Self {
51	self + b
52	}
53
54	fn sub(self, b: Self) -> Self {
55	self - b
56	}
57
58	fn add_avg(self, b: Self) -> Self {
59	(self + b) >> `1`
60	}
61
62	fn sub_avg(self, b: Self) -> Self {
63	(self - b) >> `1`
64	}
65	}
66
67	/// # Panics
68	///
69	/// - If called with an invalid combination of `tx_size` and `tx_type`
70	#[cold_for_target_arch("x86_64")]
71	pub fn forward_transform<T: Coefficient>(
72	input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
73	tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel,
74	) {
75	assert!(valid_av1_transform(tx_size, tx_type));
76
77	// Note when assigning txfm_size_col, we use the txfm_size from the
78	// row configuration and vice versa. This is intentionally done to
79	// accurately perform rectangular transforms. When the transform is
80	// rectangular, the number of columns will be the same as the
81	// txfm_size stored in the row cfg struct. It will make no difference
82	// for square transforms.
83	let txfm_size_col = tx_size.width();
84	let txfm_size_row = tx_size.height();
85
86	let mut buf = Aligned::<[MaybeUninit<i32>; `64` * `64`]>::uninit_array();
87	let buf = &mut buf.data[..txfm_size_col * txfm_size_row];
88
89	let cfg = Txfm2DFlipCfg::fwd(tx_type, tx_size, bd);
90
91	let txfm_func_col = get_func(cfg.txfm_type_col);
92	let txfm_func_row = get_func(cfg.txfm_type_row);
93
94	// Columns
95	for c in `0`..txfm_size_col {
96	let mut col_coeffs = Aligned::<[MaybeUninit<i32>; `64`]>::uninit_array();
97	let col_coeffs = &mut col_coeffs.data[..txfm_size_row];
98	if cfg.ud_flip {
99	// flip upside down
100	for r in `0`..txfm_size_row {
101	col_coeffs[r]
102	.write((input[(txfm_size_row - r - `1`) * stride + c]).into());
103	}
104	} else {
105	for r in `0`..txfm_size_row {
106	col_coeffs[r].write((input[r * stride + c]).into());
107	}
108	}
109	// SAFETY: The loops above have initialized all txfm_size_row elements
110	let col_coeffs = unsafe { slice_assume_init_mut(col_coeffs) };
111
112	av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[`0`]);
113	txfm_func_col(col_coeffs);
114	av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[`1`]);
115	if cfg.lr_flip {
116	for r in `0`..txfm_size_row {
117	// flip from left to right
118	buf[r * txfm_size_col + (txfm_size_col - c - `1`)]
119	.write(col_coeffs[r]);
120	}
121	} else {
122	for r in `0`..txfm_size_row {
123	buf[r * txfm_size_col + c].write(col_coeffs[r]);
124	}
125	}
126	}
127	// SAFETY: The loops above have initialized the entire buf
128	let buf = unsafe { slice_assume_init_mut(buf) };
129
130	// Rows
131	for (r, row_coeffs) in buf.chunks_exact_mut(txfm_size_col).enumerate() {
132	txfm_func_row(row_coeffs);
133	av1_round_shift_array(row_coeffs, txfm_size_col, -cfg.shift[`2`]);
134
135	// Store output in at most 32x32 chunks so that the first 32x32
136	// coefficients are stored first. When we don't have 64 rows, there is no
137	// change in order. With 64 rows, the chunks are in this order
138	// - First 32 rows and first 32 cols
139	// - Last 32 rows and first 32 cols
140	// - First 32 rows and last 32 cols
141	// - Last 32 rows and last 32 cols
142
143	// Output is grouped into 32x32 chunks so a stride of at most 32 is
144	// used for each chunk.
145	let output_stride = txfm_size_row.min(`32`);
146
147	// Split the first 32 rows from the last 32 rows
148	let output = &mut output
149	[(r >= `32`) as usize * output_stride * txfm_size_col.min(`32`)..];
150
151	for cg in (`0`..txfm_size_col).step_by(`32`) {
152	// Split the first 32 cols from the last 32 cols
153	let output = &mut output[txfm_size_row * cg..];
154
155	for c in `0`..txfm_size_col.min(`32`) {
156	output[c * output_stride + (r & `31`)]
157	.write(T::cast_from(row_coeffs[c + cg]));
158	}
159	}
160	}
161	}
162	}
163