cdef.rs source code [crates/rav1e/src/cdef.rs]

1	// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	use crate::color::ChromaSampling::Cs400;
11	use crate::context::*;
12	use crate::encoder::FrameInvariants;
13	use crate::frame::*;
14	use crate::tiling::*;
15	use crate::util::{clamp, msb, CastFromPrimitive, Pixel};
16
17	use crate::cpu_features::CpuFeatureLevel;
18	use std::cmp;
19
20	cfg_if::cfg_if! {
21	if #[cfg(nasm_x86_64)] {
22	pub(crate) use crate::asm::x86::cdef::*;
23	} else if #[cfg(asm_neon)] {
24	pub(crate) use crate::asm::aarch64::cdef::*;
25	} else {
26	pub(crate) use self::rust::*;
27	}
28	}
29
30	pub const CDEF_VERY_LARGE: u16 = `0x8000`;
31	// These values match dav1d; flags indicating where padding exists
32	pub const CDEF_HAVE_LEFT: u8 = `1` << `0`;
33	pub const CDEF_HAVE_RIGHT: u8 = `1` << `1`;
34	pub const CDEF_HAVE_TOP: u8 = `1` << `2`;
35	pub const CDEF_HAVE_BOTTOM: u8 = `1` << `3`;
36	pub const CDEF_HAVE_ALL: u8 =
37	CDEF_HAVE_LEFT \| CDEF_HAVE_RIGHT \| CDEF_HAVE_TOP \| CDEF_HAVE_BOTTOM;
38
39	pub(crate) const CDEF_SEC_STRENGTHS: u8 = `4`;
40
41	pub struct CdefDirections {
42	dir: [[u8; `8`]; `8`],
43	var: [[i32; `8`]; `8`],
44	}
45
46	pub(crate) mod rust {
47	use super::*;
48
49	use simd_helpers::cold_for_target_arch;
50
51	// Instead of dividing by n between 2 and 8, we multiply by 3578/n.*
52	// The output is then 840 times larger, but we don't care for finding
53	// the max.
54	const CDEF_DIV_TABLE: [i32; `9`] = [`0`, `840`, `420`, `280`, `210`, `168`, `140`, `120`, `105`];
55
56	/// Returns the position and value of the first instance of the max element in
57	/// a slice as a tuple.
58	///
59	/// # Arguments
60	///
61	/// `elems` - A non-empty slice of integers*
62	///
63	/// # Panics
64	///
65	/// Panics if `elems` is empty
66	#[inline]
67	fn first_max_element(elems: &[i32]) -> (usize, i32) {
68	// In case of a tie, the first element must be selected.
69	let (max_idx, max_value) = elems
70	.iter()
71	.enumerate()
72	.max_by_key(\|&(i, v)\| (v, -(i as isize)))
73	.unwrap();
74	(max_idx, *max_value)
75	}
76
77	// Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
78	// The search minimizes the weighted variance along all the lines in a
79	// particular direction, i.e. the squared error between the input and a
80	// "predicted" block where each pixel is replaced by the average along a line
81	// in a particular direction. Since each direction have the same sum(x^2) term,
82	// that term is never computed. See Section 2, step 2, of:
83	// http://jmvalin.ca/notes/intra_paint.pdf
84	pub fn cdef_find_dir<T: Pixel>(
85	img: &PlaneSlice<'_, T>, var: &mut u32, coeff_shift: usize,
86	_cpu: CpuFeatureLevel,
87	) -> i32 {
88	let mut cost: [i32; `8`] = [`0`; `8`];
89	let mut partial: [[i32; `15`]; `8`] = [[`0`; `15`]; `8`];
90	for i in `0`..`8` {
91	for j in `0`..`8` {
92	let p: i32 = i32::cast_from(img[i][j]);
93	// We subtract 128 here to reduce the maximum range of the squared
94	// partial sums.
95	debug_assert!(p >> coeff_shift <= `255`);
96	let x = (p >> coeff_shift) - `128`;
97	partial[`0`][i + j] += x;
98	partial[`1`][i + j / `2`] += x;
99	partial[`2`][i] += x;
100	partial[`3`][`3` + i - j / `2`] += x;
101	partial[`4`][`7` + i - j] += x;
102	partial[`5`][`3` - i / `2` + j] += x;
103	partial[`6`][j] += x;
104	partial[`7`][i / `2` + j] += x;
105	}
106	}
107	for i in `0`..`8` {
108	cost[`2`] += partial[`2`][i] * partial[`2`][i];
109	cost[`6`] += partial[`6`][i] * partial[`6`][i];
110	}
111	cost[`2`] *= CDEF_DIV_TABLE[`8`];
112	cost[`6`] *= CDEF_DIV_TABLE[`8`];
113	for i in `0`..`7` {
114	cost[`0`] += (partial[`0`][i] * partial[`0`][i]
115	+ partial[`0`][`14` - i] * partial[`0`][`14` - i])
116	* CDEF_DIV_TABLE[i + `1`];
117	cost[`4`] += (partial[`4`][i] * partial[`4`][i]
118	+ partial[`4`][`14` - i] * partial[`4`][`14` - i])
119	* CDEF_DIV_TABLE[i + `1`];
120	}
121	cost[`0`] += partial[`0`][`7`] * partial[`0`][`7`] * CDEF_DIV_TABLE[`8`];
122	cost[`4`] += partial[`4`][`7`] * partial[`4`][`7`] * CDEF_DIV_TABLE[`8`];
123	for i in (`1`..`8`).step_by(`2`) {
124	for j in `0`..`5` {
125	cost[i] += partial[i][`3` + j] * partial[i][`3` + j];
126	}
127	cost[i] *= CDEF_DIV_TABLE[`8`];
128	for j in `0`..`3` {
129	cost[i] += (partial[i][j] * partial[i][j]
130	+ partial[i][`10` - j] * partial[i][`10` - j])
131	* CDEF_DIV_TABLE[`2` * j + `2`];
132	}
133	}
134
135	let (best_dir, best_cost) = first_max_element(&cost);
136	// Difference between the optimal variance and the variance along the
137	// orthogonal direction. Again, the sum(x^2) terms cancel out.
138	// We'd normally divide by 840, but dividing by 1024 is close enough
139	// for what we're going to do with this. /*
140	var = ((best_cost - cost[(best_dir + `4`) & `7`]) >> `10`) as u32*;
141
142	best_dir as i32
143	}
144
145	#[inline(always)]
146	fn constrain(diff: i32, threshold: i32, damping: i32) -> i32 {
147	if threshold != `0` {
148	let shift = cmp::max(`0`, damping - msb(threshold));
149	let magnitude = (threshold - (diff.abs() >> shift)).clamp(`0`, diff.abs());
150
151	if diff < `0` {
152	-magnitude
153	} else {
154	magnitude
155	}
156	} else {
157	`0`
158	}
159	}
160
161	pub unsafe fn pad_into_tmp16<T: Pixel>(
162	dst: *mut u16, dst_stride: isize, src: *const T, src_stride: isize,
163	block_width: usize, block_height: usize, edges: u8,
164	) {
165	let mut w = block_width;
166	let mut h = block_height;
167	let (dst_col, src_col) = if (edges & CDEF_HAVE_LEFT) != `0` {
168	w += `2`;
169	(dst, src.offset(`-2`))
170	} else {
171	(dst.offset(`2`), src)
172	};
173	if (edges & CDEF_HAVE_RIGHT) != `0` {
174	w += `2`;
175	};
176
177	let (mut dst_ptr, mut src_ptr) = if (edges & CDEF_HAVE_TOP) != `0` {
178	h += `2`;
179	(dst_col, src_col.offset(`-2` * src_stride))
180	} else {
181	(dst_col.offset(`2` * dst_stride), src_col)
182	};
183	if (edges & CDEF_HAVE_BOTTOM) != `0` {
184	h += `2`;
185	};
186
187	for _y in `0`..h {
188	for x in `0`..w {
189	dst_ptr.add(x) = u16::cast_from(src_ptr.add(x));
190	}
191	src_ptr = src_ptr.offset(src_stride);
192	dst_ptr = dst_ptr.offset(dst_stride);
193	}
194	}
195
196	#[cold_for_target_arch("x86_64")]
197	#[allow(clippy::erasing_op, clippy::identity_op, clippy::neg_multiply)]
198	pub(crate) unsafe fn cdef_filter_block<T: Pixel, U: Pixel>(
199	dst: &mut PlaneRegionMut<'_, T>, input: *const U, istride: isize,
200	pri_strength: i32, sec_strength: i32, dir: usize, damping: i32,
201	bit_depth: usize, xdec: usize, ydec: usize, edges: u8,
202	_cpu: CpuFeatureLevel,
203	) {
204	if edges != CDEF_HAVE_ALL {
205	// slowpath for unpadded border[s]
206	let tmpstride = `2` + (`8` >> xdec) + `2`;
207	let mut tmp = [CDEF_VERY_LARGE; (`2` + `8` + `2`) * (`2` + `8` + `2`)];
208	// copy in what pixels we have/are allowed to use
209	pad_into_tmp16(
210	tmp.as_mut_ptr(), // points to padding* upper left*
211	tmpstride,
212	input, // points to block* upper left*
213	istride,
214	`8` >> xdec,
215	`8` >> ydec,
216	edges,
217	);
218	cdef_filter_block(
219	dst,
220	tmp.as_ptr().offset(`2` * tmpstride + `2`),
221	tmpstride,
222	pri_strength,
223	sec_strength,
224	dir,
225	damping,
226	bit_depth,
227	xdec,
228	ydec,
229	CDEF_HAVE_ALL,
230	_cpu,
231	);
232	} else {
233	let xsize = (`8` >> xdec) as isize;
234	let ysize = (`8` >> ydec) as isize;
235	let coeff_shift = bit_depth - `8`;
236	let cdef_pri_taps = [[`4`, `2`], [`3`, `3`]];
237	let cdef_sec_taps = [[`2`, `1`], [`2`, `1`]];
238	let pri_taps =
239	cdef_pri_taps[((pri_strength >> coeff_shift) & `1`) as usize];
240	let sec_taps =
241	cdef_sec_taps[((pri_strength >> coeff_shift) & `1`) as usize];
242	let cdef_directions = [
243	[`-1` * istride + `1`, `-2` * istride + `2`],
244	[`0` * istride + `1`, `-1` * istride + `2`],
245	[`0` * istride + `1`, `0` * istride + `2`],
246	[`0` * istride + `1`, `1` * istride + `2`],
247	[`1` * istride + `1`, `2` * istride + `2`],
248	[`1` * istride + `0`, `2` * istride + `1`],
249	[`1` * istride + `0`, `2` * istride + `0`],
250	[`1` * istride + `0`, `2` * istride - `1`],
251	];
252	for i in `0`..ysize {
253	for j in `0`..xsize {
254	let ptr_in = input.offset(i * istride + j);
255	let x = i32::cast_from(*ptr_in);
256	let mut sum: i32 = `0`;
257	let mut max = x;
258	let mut min = x;
259	for k in `0`..`2usize` {
260	let cdef_dirs = [
261	cdef_directions[dir][k],
262	cdef_directions[(dir + `2`) & `7`][k],
263	cdef_directions[(dir + `6`) & `7`][k],
264	];
265	let pri_tap = pri_taps[k];
266	let p = [
267	i32::cast_from(*ptr_in.offset(cdef_dirs[`0`])),
268	i32::cast_from(*ptr_in.offset(-cdef_dirs[`0`])),
269	];
270	for p_elem in p.iter() {
271	sum += pri_tap * constrain(*p_elem - x, pri_strength, damping);
272	if p_elem != CDEF_VERY_LARGE as i32* {
273	max = cmp::max(*p_elem, max);
274	}
275	min = cmp::min(*p_elem, min);
276	}
277
278	let s = [
279	i32::cast_from(*ptr_in.offset(cdef_dirs[`1`])),
280	i32::cast_from(*ptr_in.offset(-cdef_dirs[`1`])),
281	i32::cast_from(*ptr_in.offset(cdef_dirs[`2`])),
282	i32::cast_from(*ptr_in.offset(-cdef_dirs[`2`])),
283	];
284	let sec_tap = sec_taps[k];
285	for s_elem in s.iter() {
286	if s_elem != CDEF_VERY_LARGE as i32* {
287	max = cmp::max(*s_elem, max);
288	}
289	min = cmp::min(*s_elem, min);
290	sum += sec_tap * constrain(*s_elem - x, sec_strength, damping);
291	}
292	}
293	let v = x + ((`8` + sum - (sum < `0`) as i32) >> `4`);
294	dst[i as usize][j as usize] = T::cast_from(clamp(v, min, max));
295	}
296	}
297	}
298	}
299
300	#[cfg(test)]
301	mod test {
302	use super::*;
303
304	#[test]
305	fn check_max_element() {
306	assert_eq!(first_max_element(&[-`1`, -`1`, `1`, `2`, `3`, `4`, `6`, `6`]), (`6`, `6`));
307	assert_eq!(first_max_element(&[-`1`, -`1`, `1`, `2`, `3`, `4`, `7`, `6`]), (`6`, `7`));
308	assert_eq!(first_max_element(&[`0`, `0`]), (`0`, `0`));
309	}
310	}
311	}
312
313	// We use the variance of an 8x8 block to adjust the effective filter strength.
314	#[inline]
315	fn adjust_strength(strength: i32, var: i32) -> i32 {
316	let i: i32 = if (var >> `6`) != `0` { cmp::min(v1:msb(var >> `6`), v2:`12`) } else { `0` };
317	if var != `0` {
318	(strength * (`4` + i) + `8`) >> `4`
319	} else {
320	`0`
321	}
322	}
323
324	#[profiling::function]
325	pub fn cdef_analyze_superblock_range<T: Pixel>(
326	fi: &FrameInvariants<T>, in_frame: &Frame<T>, blocks: &TileBlocks<'_>,
327	sb_w: usize, sb_h: usize,
328	) -> Vec<CdefDirections> {
329	let mut ret: Vec = Vec::<CdefDirections>::with_capacity(sb_h * sb_w);
330	for sby: usize in `0`..sb_h {
331	for sbx: usize in `0`..sb_w {
332	let sbo: TileSuperBlockOffset = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
333	ret.push(cdef_analyze_superblock(fi, in_frame, blocks, sbo));
334	}
335	}
336	ret
337	}
338
339	#[profiling::function]
340	pub fn cdef_analyze_superblock<T: Pixel>(
341	fi: &FrameInvariants<T>, in_frame: &Frame<T>, blocks: &TileBlocks<'_>,
342	sbo: TileSuperBlockOffset,
343	) -> CdefDirections {
344	let coeff_shift = fi.sequence.bit_depth - `8`;
345	let mut dir: CdefDirections =
346	CdefDirections { dir: [[`0`; `8`]; `8`], var: [[`0`; `8`]; `8`] };
347	// Each direction block is 8x8 in y, and direction computation only looks at y
348	for by in `0`..`8` {
349	for bx in `0`..`8` {
350	let block_offset = sbo.block_offset(bx << `1`, by << `1`);
351	if block_offset.0.x < blocks.cols() && block_offset.0.y < blocks.rows() {
352	let skip = blocks[block_offset].skip
353	& blocks[sbo.block_offset(`2` * bx + `1`, `2` * by)].skip
354	& blocks[sbo.block_offset(`2` * bx, `2` * by + `1`)].skip
355	& blocks[sbo.block_offset(`2` * bx + `1`, `2` * by + `1`)].skip;
356
357	if !skip {
358	let mut var: u32 = `0`;
359	let in_plane = &in_frame.planes[`0`];
360	let in_po = sbo.plane_offset(&in_plane.cfg);
361	let in_slice = in_plane.slice(in_po);
362	dir.dir[bx][by] = cdef_find_dir::<T>(
363	&in_slice.reslice(`8` * bx as isize, `8` * by as isize),
364	&mut var,
365	coeff_shift,
366	fi.cpu_feature_level,
367	) as u8;
368	dir.var[bx][by] = var as i32;
369	}
370	}
371	}
372	}
373	dir
374	}
375
376	// input: A Frame of reconstructed/deblocked pixels prepared to
377	// undergo CDEF. Note that the input is a Frame and not a Tile due to
378	// Tiles not allowing [supervised] out-of-rect access for padding
379	// pixels. This will be corrected at some point in the future.
380
381	// tile_sbo: specifies an offset into the output Tile, not an
382	// absolute offset in the visible frame. The Tile's own offset is
383	// added to this in order to address into the input Frame.
384
385	// tb: the TileBlocks associated with the filtered region; the
386	// provided blocks co-locate with the output region. The TileBlocks
387	// provide by-[super]qblock CDEF parameters.
388
389	// output: TileMut destination for filtered pixels. The output's
390	// rect specifies the region of the input to be processed (x and y
391	// are relative to the input Frame's origin). Note that an
392	// additional area of 2 pixels of padding is used for CDEF. When
393	// these pixels are unavailable (beyond the visible frame or at a
394	// tile boundary), the filtering process ignores input pixels that
395	// don't exist.
396
397	/// # Panics
398	///
399	/// - If called with invalid parameters
400	#[profiling::function]
401	pub fn cdef_filter_superblock<T: Pixel>(
402	fi: &FrameInvariants<T>, input: &Frame<T>, output: &mut TileMut<'_, T>,
403	blocks: &TileBlocks<'_>, tile_sbo: TileSuperBlockOffset, cdef_index: u8,
404	cdef_dirs: &CdefDirections,
405	) {
406	let bit_depth = fi.sequence.bit_depth;
407	let coeff_shift = fi.sequence.bit_depth as i32 - `8`;
408	let cdef_damping = fi.cdef_damping as i32;
409	let cdef_y_strength = fi.cdef_y_strengths[cdef_index as usize];
410	let cdef_uv_strength = fi.cdef_uv_strengths[cdef_index as usize];
411	let cdef_pri_y_strength = (cdef_y_strength / CDEF_SEC_STRENGTHS) as i32;
412	let mut cdef_sec_y_strength = (cdef_y_strength % CDEF_SEC_STRENGTHS) as i32;
413	let cdef_pri_uv_strength = (cdef_uv_strength / CDEF_SEC_STRENGTHS) as i32;
414	let planes = if fi.sequence.chroma_sampling == Cs400 { `1` } else { `3` };
415	let mut cdef_sec_uv_strength =
416	(cdef_uv_strength % CDEF_SEC_STRENGTHS) as i32;
417	if cdef_sec_y_strength == `3` {
418	cdef_sec_y_strength += `1`;
419	}
420	if cdef_sec_uv_strength == `3` {
421	cdef_sec_uv_strength += `1`;
422	}
423
424	let tile_rect = *output.planes[`0`].rect();
425	let input_xoffset =
426	tile_rect.x + tile_sbo.plane_offset(&input.planes[`0`].cfg).x;
427	let input_yoffset =
428	tile_rect.y + tile_sbo.plane_offset(&input.planes[`0`].cfg).y;
429	let input_xavail = input.planes[`0`].cfg.width as isize - input_xoffset;
430	let input_yavail = input.planes[`0`].cfg.height as isize - input_yoffset;
431
432	/ determine what edge padding we have, and what padding we don't.*
433	* We don't pad here, but rather tell the filter_block call what it
434	* needs to do, then let it handle the specifics (following dav1d's
435	* lead). We make one assumption that's not obvious: Because the
436	* cdef clipping area is rounded up to an even 8x8 luma block, we
437	* don't need to guard against having only one (as opposed to two)
438	* pixels of padding past the current block boundary. The padding
439	* is all-or-nothing. */
440
441	// Slightly harder than in dav1d; we're not always doing full-frame.
442	let have_top_p =
443	if tile_sbo.0.y as isize + tile_rect.y > `0` { CDEF_HAVE_TOP } else { `0` };
444	let have_left_p =
445	if tile_sbo.0.x as isize + tile_rect.x > `0` { CDEF_HAVE_LEFT } else { `0` };
446	let mut edges = have_top_p \| CDEF_HAVE_BOTTOM;
447
448	// Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
449	for by in `0`..`8usize` {
450	if by + `1` >= (input_yavail as usize >> `3`) {
451	edges &= !CDEF_HAVE_BOTTOM
452	};
453	edges &= !CDEF_HAVE_LEFT;
454	edges \|= have_left_p;
455	edges \|= CDEF_HAVE_RIGHT;
456	for bx in `0`..`8usize` {
457	if bx + `1` >= (input_xavail as usize >> `3`) {
458	edges &= !CDEF_HAVE_RIGHT
459	};
460	let block_offset = tile_sbo.block_offset(bx << `1`, by << `1`);
461	if block_offset.0.x < blocks.cols() && block_offset.0.y < blocks.rows() {
462	let skip = blocks[block_offset].skip
463	& blocks[tile_sbo.block_offset(`2` * bx + `1`, `2` * by)].skip
464	& blocks[tile_sbo.block_offset(`2` * bx, `2` * by + `1`)].skip
465	& blocks[tile_sbo.block_offset(`2` * bx + `1`, `2` * by + `1`)].skip;
466	let dir = cdef_dirs.dir[bx][by];
467	let var = cdef_dirs.var[bx][by];
468	for p in `0`..planes {
469	let out_plane = &mut output.planes[p];
470	let in_plane = &input.planes[p];
471	let xdec = in_plane.cfg.xdec;
472	let ydec = in_plane.cfg.ydec;
473	let xsize = `8` >> xdec;
474	let ysize = `8` >> ydec;
475	let in_po = PlaneOffset {
476	x: (input_xoffset >> xdec) + (bx * xsize) as isize,
477	y: (input_yoffset >> ydec) + (by * ysize) as isize,
478	};
479	let in_stride = in_plane.cfg.stride;
480	let in_slice = &in_plane.slice(in_po);
481
482	let out_block = &mut out_plane.subregion_mut(Area::BlockRect {
483	bo: tile_sbo.block_offset(`2` * bx, `2` * by).0,
484	width: xsize,
485	height: ysize,
486	});
487
488	if !skip {
489	let local_pri_strength;
490	let local_sec_strength;
491	let mut local_damping: i32 = cdef_damping + coeff_shift;
492	// See `Cdef_Uv_Dir` constant lookup table in Section 7.15.1
493	// <https://aomediacodec.github.io/av1-spec/#cdef-block-process>
494	let local_dir = if p == `0` {
495	local_pri_strength =
496	adjust_strength(cdef_pri_y_strength << coeff_shift, var);
497	local_sec_strength = cdef_sec_y_strength << coeff_shift;
498	if cdef_pri_y_strength != `0` {
499	dir as usize
500	} else {
501	`0`
502	}
503	} else {
504	local_pri_strength = cdef_pri_uv_strength << coeff_shift;
505	local_sec_strength = cdef_sec_uv_strength << coeff_shift;
506	local_damping -= `1`;
507	if cdef_pri_uv_strength != `0` {
508	if xdec != ydec {
509	[`7`, `0`, `2`, `4`, `5`, `6`, `6`, `6`][dir as usize]
510	} else {
511	dir as usize
512	}
513	} else {
514	`0`
515	}
516	};
517
518	// SAFETY: `cdef_filter_block` may call Assembly code.
519	// The asserts here verify that we are not calling it
520	// with invalid parameters.
521	unsafe {
522	assert!(
523	input.planes[p].cfg.width as isize
524	>= in_po.x
525	+ xsize as isize
526	+ if edges & CDEF_HAVE_RIGHT > `0` { `2` } else { `0` }
527	);
528	assert!(
529	`0` <= in_po.x - if edges & CDEF_HAVE_LEFT > `0` { `2` } else { `0` }
530	);
531	assert!(
532	input.planes[p].cfg.height as isize
533	>= in_po.y
534	+ ysize as isize
535	+ if edges & CDEF_HAVE_BOTTOM > `0` { `2` } else { `0` }
536	);
537	assert!(
538	`0` <= in_po.y - if edges & CDEF_HAVE_TOP > `0` { `2` } else { `0` }
539	);
540
541	cdef_filter_block(
542	out_block,
543	in_slice.as_ptr(),
544	in_stride as isize,
545	local_pri_strength,
546	local_sec_strength,
547	local_dir,
548	local_damping,
549	bit_depth,
550	xdec,
551	ydec,
552	edges,
553	fi.cpu_feature_level,
554	);
555	}
556	} else {
557	// no filtering, but we need to copy input to output
558	for i in `0`..ysize {
559	for j in `0`..xsize {
560	out_block[i][j] = in_slice[i][j];
561	}
562	}
563	}
564	}
565	}
566	edges \|= CDEF_HAVE_LEFT;
567	}
568	edges \|= CDEF_HAVE_TOP;
569	}
570	}
571
572	// The purpose of CDEF is to perform deringing based on the detected
573	// direction of blocks. CDEF parameters are stored for each 64 by 64
574	// block of pixels. The CDEF filter is applied on each 8 by 8 block
575	// of pixels. Reference:
576	// http://av1-spec.argondesign.com/av1-spec/av1-spec.html#cdef-process
577
578	// input: A Frame of reconstructed/deblocked pixels prepared to
579	// undergo CDEF. cdef_filter_tile acts on a subset of these input
580	// pixels, as specified by the PlaneRegion rect of the output. Note
581	// that the input is a Frame and not a Tile due to Tiles not
582	// allowing [supervised] out-of-rect access for padding pixels.
583	// This will be corrected at some point in the future.
584
585	// tb: the TileBlocks associated with the filtered region; the
586	// provided blocks co-locate with the output region.
587
588	// output: TileMut destination for filtered pixels. The output's
589	// rect specifies the region of the input to be processed (x and y
590	// are relative to the input Frame's origin). Note that an
591	// additional area of 2 pixels of padding is used for CDEF. When
592	// these pixels are unavailable (beyond the visible frame or at a
593	// tile boundary), the filtering process ignores input pixels that
594	// don't exist.
595
596	#[profiling::function]
597	pub fn cdef_filter_tile<T: Pixel>(
598	fi: &FrameInvariants<T>, input: &Frame<T>, tb: &TileBlocks,
599	output: &mut TileMut<'_, T>,
600	) {
601	// Each filter block is 64x64, except right and/or bottom for non-multiple-of-64 sizes.
602	// FIXME: 128x128 SB support will break this, we need FilterBlockOffset etc.
603
604	// No need to guard against having fewer actual coded blocks than
605	// the output.rect() area. Inner code already guards this case.
606	let fb_width = (output.planes[`0`].rect().width + `63`) / `64`;
607	let fb_height = (output.planes[`0`].rect().height + `63`) / `64`;
608
609	// should parallelize this
610	for fby in `0`..fb_height {
611	for fbx in `0`..fb_width {
612	// tile_sbo is treated as an offset into the Tiles' plane
613	// regions, not as an absolute offset in the visible frame. The
614	// Tile's own offset is added to this in order to address into
615	// the input Frame.
616	let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: fbx, y: fby });
617	let cdef_index = tb.get_cdef(tile_sbo);
618	let cdef_dirs = cdef_analyze_superblock(fi, input, tb, tile_sbo);
619
620	cdef_filter_superblock(
621	fi, input, output, tb, tile_sbo, cdef_index, &cdef_dirs,
622	);
623	}
624	}
625	}
626