1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::api::FrameType;
11use crate::color::ChromaSampling::Cs400;
12use crate::context::*;
13use crate::encoder::FrameInvariants;
14use crate::partition::RefType::*;
15use crate::predict::PredictionMode::*;
16use crate::quantize::*;
17use crate::tiling::*;
18use crate::util::{clamp, ILog, Pixel};
19use crate::DeblockState;
20use rayon::iter::*;
21use std::cmp;
22
23fn deblock_adjusted_level(
24 deblock: &DeblockState, block: &Block, pli: usize, vertical: bool,
25) -> usize {
26 let idx = if pli == 0 { usize::from(!vertical) } else { pli + 1 };
27
28 let level = if deblock.block_deltas_enabled {
29 // By-block filter strength delta, if the feature is active.
30 let block_delta = if deblock.block_delta_multi {
31 block.deblock_deltas[idx] << deblock.block_delta_shift
32 } else {
33 block.deblock_deltas[0] << deblock.block_delta_shift
34 };
35
36 // Add to frame-specified filter strength (Y-vertical, Y-horizontal, U, V)
37 clamp(block_delta + deblock.levels[idx] as i8, 0, MAX_LOOP_FILTER as i8)
38 as u8
39 } else {
40 deblock.levels[idx]
41 };
42
43 // if fi.seg_feaure_active {
44 // rav1e does not yet support segments or segment features
45 // }
46
47 // Are delta modifiers for specific references and modes active? If so, add them too.
48 if deblock.deltas_enabled {
49 let mode = block.mode;
50 let reference = block.ref_frames[0];
51 let mode_type = usize::from(
52 mode >= NEARESTMV && mode != GLOBALMV && mode != GLOBAL_GLOBALMV,
53 );
54 let l5 = level >> 5;
55 clamp(
56 level as i32
57 + ((deblock.ref_deltas[reference.to_index()] as i32) << l5)
58 + if reference == INTRA_FRAME {
59 0
60 } else {
61 (deblock.mode_deltas[mode_type] as i32) << l5
62 },
63 0,
64 MAX_LOOP_FILTER as i32,
65 ) as usize
66 } else {
67 level as usize
68 }
69}
70
71#[inline]
72fn deblock_left<'a, T: Pixel>(
73 blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>,
74) -> &'a Block {
75 let xdec: usize = p.plane_cfg.xdec;
76 let ydec: usize = p.plane_cfg.ydec;
77
78 // subsampled chroma uses odd mi row/col
79 // We already know we're not at the upper/left corner, so prev_block is in frame
80 &blocks[in_bo.0.y | ydec][(in_bo.0.x | xdec) - (1 << xdec)]
81}
82
83#[inline]
84fn deblock_up<'a, T: Pixel>(
85 blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>,
86) -> &'a Block {
87 let xdec: usize = p.plane_cfg.xdec;
88 let ydec: usize = p.plane_cfg.ydec;
89
90 // subsampled chroma uses odd mi row/col
91 &blocks[(in_bo.0.y | ydec) - (1 << ydec)][in_bo.0.x | xdec]
92}
93
94// Must be called on a tx edge, and not on a frame edge. This is enforced above the call.
95fn deblock_size<T: Pixel>(
96 block: &Block, prev_block: &Block, p: &PlaneRegion<T>, pli: usize,
97 vertical: bool, block_edge: bool,
98) -> usize {
99 let xdec = p.plane_cfg.xdec;
100 let ydec = p.plane_cfg.ydec;
101
102 // filter application is conditional on skip and block edge
103 if !(block_edge
104 || !block.skip
105 || !prev_block.skip
106 || block.ref_frames[0] == INTRA_FRAME
107 || prev_block.ref_frames[0] == INTRA_FRAME)
108 {
109 0
110 } else {
111 let (txsize, prev_txsize) = if pli == 0 {
112 (block.txsize, prev_block.txsize)
113 } else {
114 (
115 block.bsize.largest_chroma_tx_size(xdec, ydec),
116 prev_block.bsize.largest_chroma_tx_size(xdec, ydec),
117 )
118 };
119 let (tx_n, prev_tx_n) = if vertical {
120 (cmp::max(txsize.width_mi(), 1), cmp::max(prev_txsize.width_mi(), 1))
121 } else {
122 (cmp::max(txsize.height_mi(), 1), cmp::max(prev_txsize.height_mi(), 1))
123 };
124 cmp::min(
125 if pli == 0 { 14 } else { 6 },
126 cmp::min(tx_n, prev_tx_n) << MI_SIZE_LOG2,
127 )
128 }
129}
130
131// Must be called on a tx edge
132#[inline]
133fn deblock_level(
134 deblock: &DeblockState, block: &Block, prev_block: &Block, pli: usize,
135 vertical: bool,
136) -> usize {
137 let level: usize = deblock_adjusted_level(deblock, block, pli, vertical);
138 if level == 0 {
139 deblock_adjusted_level(deblock, prev_block, pli, vertical)
140 } else {
141 level
142 }
143}
144
145// four taps, 4 outputs (two are trivial)
146#[inline]
147fn filter_narrow2_4(
148 p1: i32, p0: i32, q0: i32, q1: i32, shift: usize,
149) -> [i32; 4] {
150 let filter0 = clamp(p1 - q1, -128 << shift, (128 << shift) - 1);
151 let filter1 =
152 clamp(filter0 + 3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3;
153 // be certain our optimization removing a clamp is sound
154 debug_assert!({
155 let base =
156 clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
157 let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3;
158 filter1 == test
159 });
160 let filter2 =
161 clamp(filter0 + 3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3;
162 // be certain our optimization removing a clamp is sound
163 debug_assert!({
164 let base =
165 clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
166 let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3;
167 filter2 == test
168 });
169 [
170 p1,
171 clamp(p0 + filter2, 0, (256 << shift) - 1),
172 clamp(q0 - filter1, 0, (256 << shift) - 1),
173 q1,
174 ]
175}
176
177// six taps, 6 outputs (four are trivial)
178#[inline]
179fn filter_narrow2_6(
180 p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
181) -> [i32; 6] {
182 let x: [i32; 4] = filter_narrow2_4(p1, p0, q0, q1, shift);
183 [p2, x[0], x[1], x[2], x[3], q2]
184}
185
186// 12 taps, 12 outputs (ten are trivial)
187#[inline]
188fn filter_narrow2_12(
189 p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
190 q2: i32, q3: i32, q4: i32, q5: i32, shift: usize,
191) -> [i32; 12] {
192 let x: [i32; 4] = filter_narrow2_4(p1, p0, q0, q1, shift);
193 [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5]
194}
195
196// four taps, 4 outputs
197#[inline]
198fn filter_narrow4_4(
199 p1: i32, p0: i32, q0: i32, q1: i32, shift: usize,
200) -> [i32; 4] {
201 let filter1 =
202 clamp(3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3;
203 // be certain our optimization removing a clamp is sound
204 debug_assert!({
205 let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
206 let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3;
207 filter1 == test
208 });
209 let filter2 =
210 clamp(3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3;
211 // be certain our optimization removing a clamp is sound
212 debug_assert!({
213 let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
214 let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3;
215 filter2 == test
216 });
217 let filter3 = (filter1 + 1) >> 1;
218 [
219 clamp(p1 + filter3, 0, (256 << shift) - 1),
220 clamp(p0 + filter2, 0, (256 << shift) - 1),
221 clamp(q0 - filter1, 0, (256 << shift) - 1),
222 clamp(q1 - filter3, 0, (256 << shift) - 1),
223 ]
224}
225
226// six taps, 6 outputs (two are trivial)
227#[inline]
228fn filter_narrow4_6(
229 p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
230) -> [i32; 6] {
231 let x: [i32; 4] = filter_narrow4_4(p1, p0, q0, q1, shift);
232 [p2, x[0], x[1], x[2], x[3], q2]
233}
234
235// 12 taps, 12 outputs (eight are trivial)
236#[inline]
237fn filter_narrow4_12(
238 p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
239 q2: i32, q3: i32, q4: i32, q5: i32, shift: usize,
240) -> [i32; 12] {
241 let x: [i32; 4] = filter_narrow4_4(p1, p0, q0, q1, shift);
242 [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5]
243}
244
245// six taps, 4 outputs
246#[rustfmt::skip]
247#[inline]
248const fn filter_wide6_4(
249 p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32
250) -> [i32; 4] {
251 [
252 (p2*3 + p1*2 + p0*2 + q0 + (1<<2)) >> 3,
253 (p2 + p1*2 + p0*2 + q0*2 + q1 + (1<<2)) >> 3,
254 (p1 + p0*2 + q0*2 + q1*2 + q2 + (1<<2)) >> 3,
255 (p0 + q0*2 + q1*2 + q2*3 + (1<<2)) >> 3
256 ]
257}
258
259// eight taps, 6 outputs
260#[rustfmt::skip]
261#[inline]
262const fn filter_wide8_6(
263 p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32
264) -> [i32; 6] {
265 [
266 (p3*3 + p2*2 + p1 + p0 + q0 + (1<<2)) >> 3,
267 (p3*2 + p2 + p1*2 + p0 + q0 + q1 + (1<<2)) >> 3,
268 (p3 + p2 + p1 + p0*2 + q0 + q1 + q2 +(1<<2)) >> 3,
269 (p2 + p1 + p0 + q0*2 + q1 + q2 + q3 + (1<<2)) >> 3,
270 (p1 + p0 + q0 + q1*2 + q2 + q3*2 + (1<<2)) >> 3,
271 (p0 + q0 + q1 + q2*2 + q3*3 + (1<<2)) >> 3
272 ]
273}
274
275// 12 taps, 12 outputs (six are trivial)
276#[inline]
277const fn filter_wide8_12(
278 p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
279 q2: i32, q3: i32, q4: i32, q5: i32,
280) -> [i32; 12] {
281 let x: [i32; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3);
282 [p5, p4, p3, x[0], x[1], x[2], x[3], x[4], x[5], q3, q4, q5]
283}
284
285// fourteen taps, 12 outputs
286#[rustfmt::skip]
287#[inline]
288const fn filter_wide14_12(
289 p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32,
290 q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32
291) -> [i32; 12] {
292 [
293 (p6*7 + p5*2 + p4*2 + p3 + p2 + p1 + p0 + q0 + (1<<3)) >> 4,
294 (p6*5 + p5*2 + p4*2 + p3*2 + p2 + p1 + p0 + q0 + q1 + (1<<3)) >> 4,
295 (p6*4 + p5 + p4*2 + p3*2 + p2*2 + p1 + p0 + q0 + q1 + q2 + (1<<3)) >> 4,
296 (p6*3 + p5 + p4 + p3*2 + p2*2 + p1*2 + p0 + q0 + q1 + q2 + q3 + (1<<3)) >> 4,
297 (p6*2 + p5 + p4 + p3 + p2*2 + p1*2 + p0*2 + q0 + q1 + q2 + q3 + q4 + (1<<3)) >> 4,
298 (p6 + p5 + p4 + p3 + p2 + p1*2 + p0*2 + q0*2 + q1 + q2 + q3 + q4 + q5 + (1<<3)) >> 4,
299 (p5 + p4 + p3 + p2 + p1 + p0*2 + q0*2 + q1*2 + q2 + q3 + q4 + q5 + q6 + (1<<3)) >> 4,
300 (p4 + p3 + p2 + p1 + p0 + q0*2 + q1*2 + q2*2 + q3 + q4 + q5 + q6*2 + (1<<3)) >> 4,
301 (p3 + p2 + p1 + p0 + q0 + q1*2 + q2*2 + q3*2 + q4 + q5 + q6*3 + (1<<3)) >> 4,
302 (p2 + p1 + p0 + q0 + q1 + q2*2 + q3*2 + q4*2 + q5 + q6*4 + (1<<3)) >> 4,
303 (p1 + p0 + q0 + q1 + q2 + q3*2 + q4*2 + q5*2 + q6*5 + (1<<3)) >> 4,
304 (p0 + q0 + q1 + q2 + q3 + q4*2 + q5*2 + q6*7 + (1<<3)) >> 4
305 ]
306}
307
308#[inline]
309fn copy_horizontal<T: Pixel>(
310 dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32],
311) {
312 let row: &mut [T] = &mut dst[y][x..];
313 for (dst: &mut T, src: &i32) in row.iter_mut().take(src.len()).zip(src) {
314 *dst = T::cast_from(*src);
315 }
316}
317
318#[inline]
319fn copy_vertical<T: Pixel>(
320 dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32],
321) {
322 for (i: usize, v: &i32) in src.iter().enumerate() {
323 let p: &mut T = &mut dst[y + i][x];
324 *p = T::cast_from(*v);
325 }
326}
327
328#[inline]
329fn stride_sse<const LEN: usize>(a: &[i32; LEN], b: &[i32; LEN]) -> i64 {
330 a.iter().zip(b).map(|(a: &i32, b: &i32)| (a - b) * (a - b)).sum::<i32>() as i64
331}
332
333#[inline]
334const fn _level_to_limit(level: i32, shift: usize) -> i32 {
335 level << shift
336}
337
338#[inline]
339const fn limit_to_level(limit: i32, shift: usize) -> i32 {
340 (limit + (1 << shift) - 1) >> shift
341}
342
343#[inline]
344const fn _level_to_blimit(level: i32, shift: usize) -> i32 {
345 (3 * level + 4) << shift
346}
347
348#[inline]
349const fn blimit_to_level(blimit: i32, shift: usize) -> i32 {
350 (((blimit + (1 << shift) - 1) >> shift) - 2) / 3
351}
352
353#[inline]
354const fn _level_to_thresh(level: i32, shift: usize) -> i32 {
355 level >> 4 << shift
356}
357
358#[inline]
359const fn thresh_to_level(thresh: i32, shift: usize) -> i32 {
360 (thresh + (1 << shift) - 1) >> shift << 4
361}
362
363#[inline]
364fn nhev4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize {
365 thresh_to_level(thresh:cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift) as usize
366}
367
368#[inline]
369fn mask4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize {
370 cmp::max(
371 v1:limit_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift),
372 v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
373 ) as usize
374}
375
376#[inline]
377fn deblock_size4_inner(
378 [p1: i32, p0: i32, q0: i32, q1: i32]: [i32; 4], level: usize, bd: usize,
379) -> Option<[i32; 4]> {
380 if mask4(p1, p0, q0, q1, shift:bd - 8) <= level {
381 let x: [i32; 4] = if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level {
382 filter_narrow4_4(p1, p0, q0, q1, shift:bd - 8)
383 } else {
384 filter_narrow2_4(p1, p0, q0, q1, shift:bd - 8)
385 };
386 Some(x)
387 } else {
388 None
389 }
390}
391
392// Assumes rec[0] is set 2 taps back from the edge
393fn deblock_v_size4<T: Pixel>(
394 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
395) {
396 for y: usize in 0..4 {
397 let p: &[T] = &rec[y];
398 let vals: [i32; 4] = [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_()];
399 if let Some(data: [i32; 4]) = deblock_size4_inner(vals, level, bd) {
400 copy_horizontal(dst:rec, x:0, y, &data);
401 }
402 }
403}
404
405// Assumes rec[0] is set 2 taps back from the edge
406fn deblock_h_size4<T: Pixel>(
407 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
408) {
409 for x: usize in 0..4 {
410 let vals: [i32; 4] =
411 [rec[0][x].as_(), rec[1][x].as_(), rec[2][x].as_(), rec[3][x].as_()];
412 if let Some(data: [i32; 4]) = deblock_size4_inner(vals, level, bd) {
413 copy_vertical(dst:rec, x, y:0, &data);
414 }
415 }
416}
417
418// Assumes rec[0] and src[0] are set 2 taps back from the edge.
419// Accesses four taps, accumulates four pixels into the tally
420fn sse_size4<T: Pixel>(
421 rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
422 tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
423) {
424 for i in 0..4 {
425 let (p1, p0, q0, q1, a) = if horizontal_p {
426 (
427 rec[0][i].as_(),
428 rec[1][i].as_(),
429 rec[2][i].as_(),
430 rec[3][i].as_(),
431 [src[0][i].as_(), src[1][i].as_(), src[2][i].as_(), src[3][i].as_()],
432 )
433 } else {
434 (
435 rec[i][0].as_(),
436 rec[i][1].as_(),
437 rec[i][2].as_(),
438 rec[i][3].as_(),
439 [src[i][0].as_(), src[i][1].as_(), src[i][2].as_(), src[i][3].as_()],
440 )
441 };
442
443 // three possibilities: no filter, narrow2 and narrow4
444 // All possibilities produce four outputs
445 let none: [_; 4] = [p1, p0, q0, q1];
446 let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8);
447 let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8);
448
449 // mask4 sets the dividing line for filter vs no filter
450 // nhev4 sets the dividing line between narrow2 and narrow4
451 let mask = clamp(mask4(p1, p0, q0, q1, bd - 8), 1, MAX_LOOP_FILTER + 1);
452 let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
453
454 // sse for each; short-circuit the 'special' no-op cases.
455 let sse_none = stride_sse(&a, &none);
456 let sse_narrow2 =
457 if nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
458 let sse_narrow4 = if nhev <= MAX_LOOP_FILTER {
459 stride_sse(&a, &narrow4)
460 } else {
461 sse_none
462 };
463
464 // accumulate possible filter values into the tally
465 // level 0 is a special case
466 tally[0] += sse_none;
467 tally[mask] -= sse_none;
468 tally[mask] += sse_narrow2;
469 tally[nhev] -= sse_narrow2;
470 tally[nhev] += sse_narrow4;
471 }
472}
473
474#[inline]
475fn mask6(
476 p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
477) -> usize {
478 cmp::max(
479 v1:limit_to_level(
480 cmp::max(
481 (p2 - p1).abs(),
482 cmp::max((p1 - p0).abs(), cmp::max((q2 - q1).abs(), (q1 - q0).abs())),
483 ),
484 shift,
485 ),
486 v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
487 ) as usize
488}
489
490#[inline]
491fn flat6(p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32) -> usize {
492 cmp::max(
493 (p1 - p0).abs(),
494 v2:cmp::max((q1 - q0).abs(), v2:cmp::max((p2 - p0).abs(), (q2 - q0).abs())),
495 ) as usize
496}
497
498#[inline]
499fn deblock_size6_inner(
500 [p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32]: [i32; 6], level: usize, bd: usize,
501) -> Option<[i32; 4]> {
502 if mask6(p2, p1, p0, q0, q1, q2, shift:bd - 8) <= level {
503 let flat: usize = 1 << (bd - 8);
504 let x: [i32; 4] = if flat6(p2, p1, p0, q0, q1, q2) <= flat {
505 filter_wide6_4(p2, p1, p0, q0, q1, q2)
506 } else if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level {
507 filter_narrow4_4(p1, p0, q0, q1, shift:bd - 8)
508 } else {
509 filter_narrow2_4(p1, p0, q0, q1, shift:bd - 8)
510 };
511 Some(x)
512 } else {
513 None
514 }
515}
516
517// Assumes slice[0] is set 3 taps back from the edge
518fn deblock_v_size6<T: Pixel>(
519 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
520) {
521 for y: usize in 0..4 {
522 let p: &[T] = &rec[y];
523 let vals: [i32; 6] =
524 [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_(), p[4].as_(), p[5].as_()];
525 if let Some(data: [i32; 4]) = deblock_size6_inner(vals, level, bd) {
526 copy_horizontal(dst:rec, x:1, y, &data);
527 }
528 }
529}
530
531// Assumes slice[0] is set 3 taps back from the edge
532fn deblock_h_size6<T: Pixel>(
533 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
534) {
535 for x: usize in 0..4 {
536 let vals: [i32; 6] = [
537 rec[0][x].as_(),
538 rec[1][x].as_(),
539 rec[2][x].as_(),
540 rec[3][x].as_(),
541 rec[4][x].as_(),
542 rec[5][x].as_(),
543 ];
544 if let Some(data: [i32; 4]) = deblock_size6_inner(vals, level, bd) {
545 copy_vertical(dst:rec, x, y:1, &data);
546 }
547 }
548}
549
550// Assumes rec[0] and src[0] are set 3 taps back from the edge.
551// Accesses six taps, accumulates four pixels into the tally
552fn sse_size6<T: Pixel>(
553 rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
554 tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
555) {
556 let flat = 1 << (bd - 8);
557 for i in 0..4 {
558 let (p2, p1, p0, q0, q1, q2, a) = if horizontal_p {
559 // six taps
560 (
561 rec[0][i].as_(),
562 rec[1][i].as_(),
563 rec[2][i].as_(),
564 rec[3][i].as_(),
565 rec[4][i].as_(),
566 rec[5][i].as_(),
567 // four pixels to compare so offset one forward
568 [src[1][i].as_(), src[2][i].as_(), src[3][i].as_(), src[4][i].as_()],
569 )
570 } else {
571 // six taps
572 (
573 rec[i][0].as_(),
574 rec[i][1].as_(),
575 rec[i][2].as_(),
576 rec[i][3].as_(),
577 rec[i][4].as_(),
578 rec[i][5].as_(),
579 // four pixels to compare so offset one forward
580 [src[i][1].as_(), src[i][2].as_(), src[i][3].as_(), src[i][4].as_()],
581 )
582 };
583
584 // Four possibilities: no filter, wide6, narrow2 and narrow4
585 // All possibilities produce four outputs
586 let none: [_; 4] = [p1, p0, q0, q1];
587 let wide6 = filter_wide6_4(p2, p1, p0, q0, q1, q2);
588 let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8);
589 let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8);
590
591 // mask6 sets the dividing line for filter vs no filter
592 // flat6 decides between wide and narrow filters (unrelated to level)
593 // nhev4 sets the dividing line between narrow2 and narrow4
594 let mask =
595 clamp(mask6(p2, p1, p0, q0, q1, q2, bd - 8), 1, MAX_LOOP_FILTER + 1);
596 let flatp = flat6(p2, p1, p0, q0, q1, q2) <= flat;
597 let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
598
599 // sse for each; short-circuit the 'special' no-op cases.
600 let sse_none = stride_sse(&a, &none);
601 let sse_wide6 = if flatp && mask <= MAX_LOOP_FILTER {
602 stride_sse(&a, &wide6)
603 } else {
604 sse_none
605 };
606 let sse_narrow2 =
607 if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
608 let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER {
609 stride_sse(&a, &narrow4)
610 } else {
611 sse_none
612 };
613
614 // accumulate possible filter values into the tally
615 tally[0] += sse_none;
616 tally[mask] -= sse_none;
617 if flatp {
618 tally[mask] += sse_wide6;
619 } else {
620 tally[mask] += sse_narrow2;
621 tally[nhev] -= sse_narrow2;
622 tally[nhev] += sse_narrow4;
623 }
624 }
625}
626
627#[inline]
628fn mask8(
629 p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32,
630 shift: usize,
631) -> usize {
632 cmp::max(
633 v1:limit_to_level(
634 cmp::max(
635 (p3 - p2).abs(),
636 cmp::max(
637 (p2 - p1).abs(),
638 cmp::max(
639 (p1 - p0).abs(),
640 cmp::max(
641 (q3 - q2).abs(),
642 cmp::max((q2 - q1).abs(), (q1 - q0).abs()),
643 ),
644 ),
645 ),
646 ),
647 shift,
648 ),
649 v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
650 ) as usize
651}
652
653#[inline]
654fn flat8(
655 p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32,
656) -> usize {
657 cmp::max(
658 (p1 - p0).abs(),
659 v2:cmp::max(
660 (q1 - q0).abs(),
661 v2:cmp::max(
662 (p2 - p0).abs(),
663 v2:cmp::max((q2 - q0).abs(), v2:cmp::max((p3 - p0).abs(), (q3 - q0).abs())),
664 ),
665 ),
666 ) as usize
667}
668
669#[inline]
670fn deblock_size8_inner(
671 [p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32]: [i32; 8], level: usize, bd: usize,
672) -> Option<[i32; 6]> {
673 if mask8(p3, p2, p1, p0, q0, q1, q2, q3, shift:bd - 8) <= level {
674 let flat: usize = 1 << (bd - 8);
675 let x: [i32; 6] = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat {
676 filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3)
677 } else if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level {
678 filter_narrow4_6(p2, p1, p0, q0, q1, q2, shift:bd - 8)
679 } else {
680 filter_narrow2_6(p2, p1, p0, q0, q1, q2, shift:bd - 8)
681 };
682 Some(x)
683 } else {
684 None
685 }
686}
687
688// Assumes rec[0] is set 4 taps back from the edge
689fn deblock_v_size8<T: Pixel>(
690 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
691) {
692 for y: usize in 0..4 {
693 let p: &[T] = &rec[y];
694 let vals: [i32; 8] = [
695 p[0].as_(),
696 p[1].as_(),
697 p[2].as_(),
698 p[3].as_(),
699 p[4].as_(),
700 p[5].as_(),
701 p[6].as_(),
702 p[7].as_(),
703 ];
704 if let Some(data: [i32; 6]) = deblock_size8_inner(vals, level, bd) {
705 copy_horizontal(dst:rec, x:1, y, &data);
706 }
707 }
708}
709
710// Assumes rec[0] is set 4 taps back from the edge
711fn deblock_h_size8<T: Pixel>(
712 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
713) {
714 for x: usize in 0..4 {
715 let vals: [i32; 8] = [
716 rec[0][x].as_(),
717 rec[1][x].as_(),
718 rec[2][x].as_(),
719 rec[3][x].as_(),
720 rec[4][x].as_(),
721 rec[5][x].as_(),
722 rec[6][x].as_(),
723 rec[7][x].as_(),
724 ];
725 if let Some(data: [i32; 6]) = deblock_size8_inner(vals, level, bd) {
726 copy_vertical(dst:rec, x, y:1, &data);
727 }
728 }
729}
730
731// Assumes rec[0] and src[0] are set 4 taps back from the edge.
732// Accesses eight taps, accumulates six pixels into the tally
733fn sse_size8<T: Pixel>(
734 rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
735 tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
736) {
737 let flat = 1 << (bd - 8);
738
739 for i in 0..4 {
740 let (p3, p2, p1, p0, q0, q1, q2, q3, a) = if horizontal_p {
741 // eight taps
742 (
743 rec[0][i].as_(),
744 rec[1][i].as_(),
745 rec[2][i].as_(),
746 rec[3][i].as_(),
747 rec[4][i].as_(),
748 rec[5][i].as_(),
749 rec[6][i].as_(),
750 rec[7][i].as_(),
751 // six pixels to compare so offset one forward
752 [
753 src[1][i].as_(),
754 src[2][i].as_(),
755 src[3][i].as_(),
756 src[4][i].as_(),
757 src[5][i].as_(),
758 src[6][i].as_(),
759 ],
760 )
761 } else {
762 // eight taps
763 (
764 rec[i][0].as_(),
765 rec[i][1].as_(),
766 rec[i][2].as_(),
767 rec[i][3].as_(),
768 rec[i][4].as_(),
769 rec[i][5].as_(),
770 rec[i][6].as_(),
771 rec[i][7].as_(),
772 // six pixels to compare so offset one forward
773 [
774 src[i][1].as_(),
775 src[i][2].as_(),
776 src[i][3].as_(),
777 src[i][4].as_(),
778 src[i][5].as_(),
779 src[i][6].as_(),
780 ],
781 )
782 };
783
784 // Four possibilities: no filter, wide8, narrow2 and narrow4
785 let none: [_; 6] = [p2, p1, p0, q0, q1, q2];
786 let wide8: [_; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3);
787 let narrow2: [_; 6] = filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8);
788 let narrow4: [_; 6] = filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8);
789
790 // mask8 sets the dividing line for filter vs no filter
791 // flat8 decides between wide and narrow filters (unrelated to level)
792 // nhev4 sets the dividing line between narrow2 and narrow4
793 let mask = clamp(
794 mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8),
795 1,
796 MAX_LOOP_FILTER + 1,
797 );
798 let flatp = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat;
799 let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
800
801 // sse for each; short-circuit the 'special' no-op cases.
802 let sse_none = stride_sse(&a, &none);
803 let sse_wide8 = if flatp && mask <= MAX_LOOP_FILTER {
804 stride_sse(&a, &wide8)
805 } else {
806 sse_none
807 };
808 let sse_narrow2 =
809 if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
810 let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER {
811 stride_sse(&a, &narrow4)
812 } else {
813 sse_none
814 };
815
816 // accumulate possible filter values into the tally
817 tally[0] += sse_none;
818 tally[mask] -= sse_none;
819 if flatp {
820 tally[mask] += sse_wide8;
821 } else {
822 tally[mask] += sse_narrow2;
823 tally[nhev] -= sse_narrow2;
824 tally[nhev] += sse_narrow4;
825 }
826 }
827}
828
829#[inline]
830fn flat14_outer(
831 p6: i32, p5: i32, p4: i32, p0: i32, q0: i32, q4: i32, q5: i32, q6: i32,
832) -> usize {
833 cmp::max(
834 (p4 - p0).abs(),
835 v2:cmp::max(
836 (q4 - q0).abs(),
837 v2:cmp::max(
838 (p5 - p0).abs(),
839 v2:cmp::max((q5 - q0).abs(), v2:cmp::max((p6 - p0).abs(), (q6 - q0).abs())),
840 ),
841 ),
842 ) as usize
843}
844
845#[inline]
846fn deblock_size14_inner(
847 [p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32]: [i32; 14],
848 level: usize, bd: usize,
849) -> Option<[i32; 12]> {
850 // 'mask' test
851 if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level {
852 let flat = 1 << (bd - 8);
853 // inner flatness test
854 let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat {
855 // outer flatness test
856 if flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat {
857 // sufficient flatness across 14 pixel width; run full-width filter
858 filter_wide14_12(
859 p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6,
860 )
861 } else {
862 // only flat in inner area, run 8-tap
863 filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5)
864 }
865 } else if nhev4(p1, p0, q0, q1, bd - 8) <= level {
866 // not flat, run narrow filter
867 filter_narrow4_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8)
868 } else {
869 filter_narrow2_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8)
870 };
871 Some(x)
872 } else {
873 None
874 }
875}
876
877// Assumes rec[0] is set 7 taps back from the edge
878fn deblock_v_size14<T: Pixel>(
879 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
880) {
881 for y: usize in 0..4 {
882 let p: &[T] = &rec[y];
883 let vals: [i32; 14] = [
884 p[0].as_(),
885 p[1].as_(),
886 p[2].as_(),
887 p[3].as_(),
888 p[4].as_(),
889 p[5].as_(),
890 p[6].as_(),
891 p[7].as_(),
892 p[8].as_(),
893 p[9].as_(),
894 p[10].as_(),
895 p[11].as_(),
896 p[12].as_(),
897 p[13].as_(),
898 ];
899 if let Some(data: [i32; 12]) = deblock_size14_inner(vals, level, bd) {
900 copy_horizontal(dst:rec, x:1, y, &data);
901 }
902 }
903}
904
905// Assumes rec[0] is set 7 taps back from the edge
906fn deblock_h_size14<T: Pixel>(
907 rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
908) {
909 for x: usize in 0..4 {
910 let vals: [i32; 14] = [
911 rec[0][x].as_(),
912 rec[1][x].as_(),
913 rec[2][x].as_(),
914 rec[3][x].as_(),
915 rec[4][x].as_(),
916 rec[5][x].as_(),
917 rec[6][x].as_(),
918 rec[7][x].as_(),
919 rec[8][x].as_(),
920 rec[9][x].as_(),
921 rec[10][x].as_(),
922 rec[11][x].as_(),
923 rec[12][x].as_(),
924 rec[13][x].as_(),
925 ];
926 if let Some(data: [i32; 12]) = deblock_size14_inner(vals, level, bd) {
927 copy_vertical(dst:rec, x, y:1, &data);
928 }
929 }
930}
931
932// Assumes rec[0] and src[0] are set 7 taps back from the edge.
933// Accesses fourteen taps, accumulates twelve pixels into the tally
934fn sse_size14<T: Pixel>(
935 rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
936 tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
937) {
938 let flat = 1 << (bd - 8);
939 for i in 0..4 {
940 let (p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, a) =
941 if horizontal_p {
942 // 14 taps
943 (
944 rec[0][i].as_(),
945 rec[1][i].as_(),
946 rec[2][i].as_(),
947 rec[3][i].as_(),
948 rec[4][i].as_(),
949 rec[5][i].as_(),
950 rec[6][i].as_(),
951 rec[7][i].as_(),
952 rec[8][i].as_(),
953 rec[9][i].as_(),
954 rec[10][i].as_(),
955 rec[11][i].as_(),
956 rec[12][i].as_(),
957 rec[13][i].as_(),
958 // 12 pixels to compare so offset one forward
959 [
960 src[1][i].as_(),
961 src[2][i].as_(),
962 src[3][i].as_(),
963 src[4][i].as_(),
964 src[5][i].as_(),
965 src[6][i].as_(),
966 src[7][i].as_(),
967 src[8][i].as_(),
968 src[9][i].as_(),
969 src[10][i].as_(),
970 src[11][i].as_(),
971 src[12][i].as_(),
972 ],
973 )
974 } else {
975 // 14 taps
976 (
977 rec[i][0].as_(),
978 rec[i][1].as_(),
979 rec[i][2].as_(),
980 rec[i][3].as_(),
981 rec[i][4].as_(),
982 rec[i][5].as_(),
983 rec[i][6].as_(),
984 rec[i][7].as_(),
985 rec[i][8].as_(),
986 rec[i][9].as_(),
987 rec[i][10].as_(),
988 rec[i][11].as_(),
989 rec[i][12].as_(),
990 rec[i][13].as_(),
991 // 12 pixels to compare so offset one forward
992 [
993 src[i][1].as_(),
994 src[i][2].as_(),
995 src[i][3].as_(),
996 src[i][4].as_(),
997 src[i][5].as_(),
998 src[i][6].as_(),
999 src[i][7].as_(),
1000 src[i][8].as_(),
1001 src[i][9].as_(),
1002 src[i][10].as_(),
1003 src[i][11].as_(),
1004 src[i][12].as_(),
1005 ],
1006 )
1007 };
1008
1009 // Five possibilities: no filter, wide14, wide8, narrow2 and narrow4
1010 let none: [i32; 12] = [p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5];
1011 let wide14 =
1012 filter_wide14_12(p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6);
1013 let wide8 =
1014 filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5);
1015 let narrow2 = filter_narrow2_12(
1016 p5,
1017 p4,
1018 p3,
1019 p2,
1020 p1,
1021 p0,
1022 q0,
1023 q1,
1024 q2,
1025 q3,
1026 q4,
1027 q5,
1028 bd - 8,
1029 );
1030 let narrow4 = filter_narrow4_12(
1031 p5,
1032 p4,
1033 p3,
1034 p2,
1035 p1,
1036 p0,
1037 q0,
1038 q1,
1039 q2,
1040 q3,
1041 q4,
1042 q5,
1043 bd - 8,
1044 );
1045
1046 // mask8 sets the dividing line for filter vs no filter
1047 // flat8 decides between wide and narrow filters (unrelated to level)
1048 // flat14 decides between wide14 and wide8 filters
1049 // nhev4 sets the dividing line between narrow2 and narrow4
1050 let mask = clamp(
1051 mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8),
1052 1,
1053 MAX_LOOP_FILTER + 1,
1054 );
1055 let flat8p = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat;
1056 let flat14p = flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat;
1057 let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
1058
1059 // sse for each; short-circuit the 'special' no-op cases.
1060 let sse_none = stride_sse(&a, &none);
1061 let sse_wide8 = if flat8p && !flat14p && mask <= MAX_LOOP_FILTER {
1062 stride_sse(&a, &wide8)
1063 } else {
1064 sse_none
1065 };
1066 let sse_wide14 = if flat8p && flat14p && mask <= MAX_LOOP_FILTER {
1067 stride_sse(&a, &wide14)
1068 } else {
1069 sse_none
1070 };
1071 let sse_narrow2 = if !flat8p && nhev != mask {
1072 stride_sse(&a, &narrow2)
1073 } else {
1074 sse_none
1075 };
1076 let sse_narrow4 = if !flat8p && nhev <= MAX_LOOP_FILTER {
1077 stride_sse(&a, &narrow4)
1078 } else {
1079 sse_none
1080 };
1081
1082 // accumulate possible filter values into the tally
1083 tally[0] += sse_none;
1084 tally[mask] -= sse_none;
1085 if flat8p {
1086 if flat14p {
1087 tally[mask] += sse_wide14;
1088 } else {
1089 tally[mask] += sse_wide8;
1090 }
1091 } else {
1092 tally[mask] += sse_narrow2;
1093 tally[nhev] -= sse_narrow2;
1094 tally[nhev] += sse_narrow4;
1095 }
1096 }
1097}
1098
1099fn filter_v_edge<T: Pixel>(
1100 deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset,
1101 p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize,
1102) {
1103 let block = &blocks[bo];
1104 let txsize = if pli == 0 {
1105 block.txsize
1106 } else {
1107 block.bsize.largest_chroma_tx_size(xdec, ydec)
1108 };
1109 let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0;
1110 if tx_edge {
1111 let prev_block = deblock_left(blocks, bo, &p.as_const());
1112 let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0;
1113 let filter_size =
1114 deblock_size(block, prev_block, &p.as_const(), pli, true, block_edge);
1115 if filter_size > 0 {
1116 let level = deblock_level(deblock, block, prev_block, pli, true);
1117 if level > 0 {
1118 let po = bo.plane_offset(p.plane_cfg);
1119 let mut plane_region = p.subregion_mut(Area::Rect {
1120 x: po.x - (filter_size >> 1) as isize,
1121 y: po.y,
1122 width: filter_size,
1123 height: 4,
1124 });
1125 match filter_size {
1126 4 => {
1127 deblock_v_size4(&mut plane_region, level, bd);
1128 }
1129 6 => {
1130 deblock_v_size6(&mut plane_region, level, bd);
1131 }
1132 8 => {
1133 deblock_v_size8(&mut plane_region, level, bd);
1134 }
1135 14 => {
1136 deblock_v_size14(&mut plane_region, level, bd);
1137 }
1138 _ => unreachable!(),
1139 }
1140 }
1141 }
1142 }
1143}
1144
1145fn sse_v_edge<T: Pixel>(
1146 blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>,
1147 src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2],
1148 pli: usize, bd: usize, xdec: usize, ydec: usize,
1149) {
1150 let block = &blocks[bo];
1151 let txsize = if pli == 0 {
1152 block.txsize
1153 } else {
1154 block.bsize.largest_chroma_tx_size(xdec, ydec)
1155 };
1156 let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0;
1157 if tx_edge {
1158 let prev_block = deblock_left(blocks, bo, rec_plane);
1159 let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0;
1160 let filter_size =
1161 deblock_size(block, prev_block, rec_plane, pli, true, block_edge);
1162 if filter_size > 0 {
1163 let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling
1164 let rec_region = rec_plane.subregion(Area::Rect {
1165 x: po.x - (filter_size >> 1) as isize,
1166 y: po.y,
1167 width: filter_size,
1168 height: 4,
1169 });
1170 let src_region = src_plane.subregion(Area::Rect {
1171 x: po.x - (filter_size >> 1) as isize,
1172 y: po.y,
1173 width: filter_size,
1174 height: 4,
1175 });
1176 match filter_size {
1177 4 => {
1178 sse_size4(&rec_region, &src_region, tally, false, bd);
1179 }
1180 6 => {
1181 sse_size6(&rec_region, &src_region, tally, false, bd);
1182 }
1183 8 => {
1184 sse_size8(&rec_region, &src_region, tally, false, bd);
1185 }
1186 14 => {
1187 sse_size14(&rec_region, &src_region, tally, false, bd);
1188 }
1189 _ => unreachable!(),
1190 }
1191 }
1192 }
1193}
1194
1195fn filter_h_edge<T: Pixel>(
1196 deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset,
1197 p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize,
1198) {
1199 let block = &blocks[bo];
1200 let txsize = if pli == 0 {
1201 block.txsize
1202 } else {
1203 block.bsize.largest_chroma_tx_size(xdec, ydec)
1204 };
1205 let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0;
1206 if tx_edge {
1207 let prev_block = deblock_up(blocks, bo, &p.as_const());
1208 let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0;
1209 let filter_size =
1210 deblock_size(block, prev_block, &p.as_const(), pli, false, block_edge);
1211 if filter_size > 0 {
1212 let level = deblock_level(deblock, block, prev_block, pli, false);
1213 if level > 0 {
1214 let po = bo.plane_offset(p.plane_cfg);
1215 let mut plane_region = p.subregion_mut(Area::Rect {
1216 x: po.x,
1217 y: po.y - (filter_size >> 1) as isize,
1218 width: 4,
1219 height: filter_size,
1220 });
1221 match filter_size {
1222 4 => {
1223 deblock_h_size4(&mut plane_region, level, bd);
1224 }
1225 6 => {
1226 deblock_h_size6(&mut plane_region, level, bd);
1227 }
1228 8 => {
1229 deblock_h_size8(&mut plane_region, level, bd);
1230 }
1231 14 => {
1232 deblock_h_size14(&mut plane_region, level, bd);
1233 }
1234 _ => unreachable!(),
1235 }
1236 }
1237 }
1238 }
1239}
1240
1241fn sse_h_edge<T: Pixel>(
1242 blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>,
1243 src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2],
1244 pli: usize, bd: usize, xdec: usize, ydec: usize,
1245) {
1246 let block = &blocks[bo];
1247 let txsize = if pli == 0 {
1248 block.txsize
1249 } else {
1250 block.bsize.largest_chroma_tx_size(xdec, ydec)
1251 };
1252 let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0;
1253 if tx_edge {
1254 let prev_block = deblock_up(blocks, bo, rec_plane);
1255 let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0;
1256 let filter_size =
1257 deblock_size(block, prev_block, rec_plane, pli, true, block_edge);
1258 if filter_size > 0 {
1259 let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling
1260 let rec_region = rec_plane.subregion(Area::Rect {
1261 x: po.x,
1262 y: po.y - (filter_size >> 1) as isize,
1263 width: 4,
1264 height: filter_size,
1265 });
1266 let src_region = src_plane.subregion(Area::Rect {
1267 x: po.x,
1268 y: po.y - (filter_size >> 1) as isize,
1269 width: 4,
1270 height: filter_size,
1271 });
1272
1273 match filter_size {
1274 4 => {
1275 sse_size4(&rec_region, &src_region, tally, true, bd);
1276 }
1277 6 => {
1278 sse_size6(&rec_region, &src_region, tally, true, bd);
1279 }
1280 8 => {
1281 sse_size8(&rec_region, &src_region, tally, true, bd);
1282 }
1283 14 => {
1284 sse_size14(&rec_region, &src_region, tally, true, bd);
1285 }
1286 _ => unreachable!(),
1287 }
1288 }
1289 }
1290}
1291
1292// Deblocks all edges, vertical and horizontal, in a single plane
1293#[profiling::function]
1294pub fn deblock_plane<T: Pixel>(
1295 deblock: &DeblockState, p: &mut PlaneRegionMut<T>, pli: usize,
1296 blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize,
1297) {
1298 let xdec = p.plane_cfg.xdec;
1299 let ydec = p.plane_cfg.ydec;
1300 assert!(xdec <= 1 && ydec <= 1);
1301
1302 match pli {
1303 0 => {
1304 if deblock.levels[0] == 0 && deblock.levels[1] == 0 {
1305 return;
1306 }
1307 }
1308 1 => {
1309 if deblock.levels[2] == 0 {
1310 return;
1311 }
1312 }
1313 2 => {
1314 if deblock.levels[3] == 0 {
1315 return;
1316 }
1317 }
1318 _ => return,
1319 }
1320
1321 let rect = p.rect();
1322 let cols = (cmp::min(
1323 blocks.cols(),
1324 ((crop_w - rect.x as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2,
1325 ) + (1 << xdec >> 1))
1326 >> xdec
1327 << xdec; // Clippy can go suck an egg
1328 let rows = (cmp::min(
1329 blocks.rows(),
1330 ((crop_h - rect.y as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2,
1331 ) + (1 << ydec >> 1))
1332 >> ydec
1333 << ydec; // Clippy can go suck an egg
1334
1335 // vertical edge filtering leads horizontal by one full MI-sized
1336 // row (and horizontal filtering doesn't happen along the upper
1337 // edge). Unroll to avoid corner-cases.
1338 if rows > 0 {
1339 for x in (1 << xdec..cols).step_by(1 << xdec) {
1340 filter_v_edge(
1341 deblock,
1342 blocks,
1343 TileBlockOffset(BlockOffset { x, y: 0 }),
1344 p,
1345 pli,
1346 bd,
1347 xdec,
1348 ydec,
1349 );
1350 }
1351 if rows > 1 << ydec {
1352 for x in (1 << xdec..cols).step_by(1 << xdec) {
1353 filter_v_edge(
1354 deblock,
1355 blocks,
1356 TileBlockOffset(BlockOffset { x, y: 1 << ydec }),
1357 p,
1358 pli,
1359 bd,
1360 xdec,
1361 ydec,
1362 );
1363 }
1364 }
1365 }
1366
1367 // filter rows where vertical and horizontal edge filtering both
1368 // happen (horizontal edge filtering lags vertical by one row).
1369 for y in ((2 << ydec)..rows).step_by(1 << ydec) {
1370 // Check for vertical edge at first MI block boundary on this row
1371 if cols > 1 << xdec {
1372 filter_v_edge(
1373 deblock,
1374 blocks,
1375 TileBlockOffset(BlockOffset { x: 1 << xdec, y }),
1376 p,
1377 pli,
1378 bd,
1379 xdec,
1380 ydec,
1381 );
1382 }
1383 // run the rest of the row with both vertical and horizontal edge filtering.
1384 // Horizontal lags vertical edge by one row and two columns.
1385 for x in (2 << xdec..cols).step_by(1 << xdec) {
1386 filter_v_edge(
1387 deblock,
1388 blocks,
1389 TileBlockOffset(BlockOffset { x, y }),
1390 p,
1391 pli,
1392 bd,
1393 xdec,
1394 ydec,
1395 );
1396 filter_h_edge(
1397 deblock,
1398 blocks,
1399 TileBlockOffset(BlockOffset {
1400 x: x - (2 << xdec),
1401 y: y - (1 << ydec),
1402 }),
1403 p,
1404 pli,
1405 bd,
1406 xdec,
1407 ydec,
1408 );
1409 }
1410 // ..and the last two horizontal edges for the row
1411 if cols >= 2 << xdec {
1412 filter_h_edge(
1413 deblock,
1414 blocks,
1415 TileBlockOffset(BlockOffset {
1416 x: cols - (2 << xdec),
1417 y: y - (1 << ydec),
1418 }),
1419 p,
1420 pli,
1421 bd,
1422 xdec,
1423 ydec,
1424 );
1425 }
1426 if cols >= 1 << xdec {
1427 filter_h_edge(
1428 deblock,
1429 blocks,
1430 TileBlockOffset(BlockOffset {
1431 x: cols - (1 << xdec),
1432 y: y - (1 << ydec),
1433 }),
1434 p,
1435 pli,
1436 bd,
1437 xdec,
1438 ydec,
1439 );
1440 }
1441 }
1442
1443 // Last horizontal row, vertical is already complete
1444 if rows > 1 << ydec {
1445 for x in (0..cols).step_by(1 << xdec) {
1446 filter_h_edge(
1447 deblock,
1448 blocks,
1449 TileBlockOffset(BlockOffset { x, y: rows - (1 << ydec) }),
1450 p,
1451 pli,
1452 bd,
1453 xdec,
1454 ydec,
1455 );
1456 }
1457 }
1458}
1459
1460// sse count of all edges in a single plane, accumulates into vertical and horizontal counts
1461fn sse_plane<T: Pixel>(
1462 rec: &PlaneRegion<T>, src: &PlaneRegion<T>,
1463 v_sse: &mut [i64; MAX_LOOP_FILTER + 2],
1464 h_sse: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, blocks: &TileBlocks,
1465 crop_w: usize, crop_h: usize, bd: usize,
1466) {
1467 let xdec = rec.plane_cfg.xdec;
1468 let ydec = rec.plane_cfg.ydec;
1469 assert!(xdec <= 1 && ydec <= 1);
1470 let rect = rec.rect();
1471 let cols = (cmp::min(
1472 blocks.cols(),
1473 (crop_w - rect.x as usize + MI_SIZE - 1) >> MI_SIZE_LOG2,
1474 ) + (1 << xdec >> 1))
1475 >> xdec
1476 << xdec; // Clippy can go suck an egg
1477 let rows = (cmp::min(
1478 blocks.rows(),
1479 (crop_h - rect.y as usize + MI_SIZE - 1) >> MI_SIZE_LOG2,
1480 ) + (1 << ydec >> 1))
1481 >> ydec
1482 << ydec; // Clippy can go suck an egg
1483
1484 // No horizontal edge filtering along top of frame
1485 for x in (1 << xdec..cols).step_by(1 << xdec) {
1486 sse_v_edge(
1487 blocks,
1488 TileBlockOffset(BlockOffset { x, y: 0 }),
1489 rec,
1490 src,
1491 v_sse,
1492 pli,
1493 bd,
1494 xdec,
1495 ydec,
1496 );
1497 }
1498
1499 // Unlike actual filtering, we're counting horizontal and vertical
1500 // as separable cases. No need to lag the horizontal processing
1501 // behind vertical.
1502 for y in (1 << ydec..rows).step_by(1 << ydec) {
1503 // No vertical filtering along left edge of frame
1504 sse_h_edge(
1505 blocks,
1506 TileBlockOffset(BlockOffset { x: 0, y }),
1507 rec,
1508 src,
1509 h_sse,
1510 pli,
1511 bd,
1512 xdec,
1513 ydec,
1514 );
1515 for x in (1 << xdec..cols).step_by(1 << xdec) {
1516 sse_v_edge(
1517 blocks,
1518 TileBlockOffset(BlockOffset { x, y }),
1519 rec,
1520 src,
1521 v_sse,
1522 pli,
1523 bd,
1524 xdec,
1525 ydec,
1526 );
1527 sse_h_edge(
1528 blocks,
1529 TileBlockOffset(BlockOffset { x, y }),
1530 rec,
1531 src,
1532 h_sse,
1533 pli,
1534 bd,
1535 xdec,
1536 ydec,
1537 );
1538 }
1539 }
1540}
1541
1542// Deblocks all edges in all planes of a frame
1543#[profiling::function]
1544pub fn deblock_filter_frame<T: Pixel>(
1545 deblock: &DeblockState, tile: &mut TileMut<T>, blocks: &TileBlocks,
1546 crop_w: usize, crop_h: usize, bd: usize, planes: usize,
1547) {
1548 tile.planes[..planes].par_iter_mut().enumerate().for_each(|(pli: usize, plane: &mut PlaneRegionMut<'_, T>)| {
1549 deblock_plane(deblock, p:plane, pli, blocks, crop_w, crop_h, bd);
1550 });
1551}
1552
1553fn sse_optimize<T: Pixel>(
1554 rec: &Tile<T>, input: &Tile<T>, blocks: &TileBlocks, crop_w: usize,
1555 crop_h: usize, bd: usize, monochrome: bool,
1556) -> [u8; 4] {
1557 // i64 allows us to accumulate a total of ~ 35 bits worth of pixels
1558 assert!(
1559 ILog::ilog(input.planes[0].plane_cfg.width)
1560 + ILog::ilog(input.planes[0].plane_cfg.height)
1561 < 35
1562 );
1563 let mut level = [0; 4];
1564 let planes = if monochrome { 1 } else { MAX_PLANES };
1565
1566 for pli in 0..planes {
1567 let mut v_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2];
1568 let mut h_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2];
1569
1570 sse_plane(
1571 &rec.planes[pli],
1572 &input.planes[pli],
1573 &mut v_tally,
1574 &mut h_tally,
1575 pli,
1576 blocks,
1577 crop_w,
1578 crop_h,
1579 bd,
1580 );
1581
1582 for i in 1..=MAX_LOOP_FILTER {
1583 v_tally[i] += v_tally[i - 1];
1584 h_tally[i] += h_tally[i - 1];
1585 }
1586
1587 match pli {
1588 0 => {
1589 let mut best_v = 999;
1590 let mut best_h = 999;
1591 for i in 0..=MAX_LOOP_FILTER {
1592 if best_v == 999 || v_tally[best_v] > v_tally[i] {
1593 best_v = i;
1594 };
1595 if best_h == 999 || h_tally[best_h] > h_tally[i] {
1596 best_h = i;
1597 };
1598 }
1599 level[0] = best_v as u8;
1600 level[1] = best_h as u8;
1601 }
1602 1 | 2 => {
1603 let mut best = 999;
1604 for i in 0..=MAX_LOOP_FILTER {
1605 if best == 999
1606 || v_tally[best] + h_tally[best] > v_tally[i] + h_tally[i]
1607 {
1608 best = i;
1609 };
1610 }
1611 level[pli + 1] = best as u8;
1612 }
1613 _ => unreachable!(),
1614 }
1615 }
1616 level
1617}
1618
1619#[profiling::function]
1620pub fn deblock_filter_optimize<T: Pixel, U: Pixel>(
1621 fi: &FrameInvariants<T>, rec: &Tile<U>, input: &Tile<U>,
1622 blocks: &TileBlocks, crop_w: usize, crop_h: usize,
1623) -> [u8; 4] {
1624 if fi.config.speed_settings.fast_deblock {
1625 let q = ac_q(fi.base_q_idx, 0, fi.sequence.bit_depth).get() as i32;
1626 let level = clamp(
1627 match fi.sequence.bit_depth {
1628 8 => {
1629 if fi.frame_type == FrameType::KEY {
1630 (q * 17563 - 421_574 + (1 << 18 >> 1)) >> 18
1631 } else {
1632 (q * 6017 + 650_707 + (1 << 18 >> 1)) >> 18
1633 }
1634 }
1635 10 => {
1636 if fi.frame_type == FrameType::KEY {
1637 ((q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20) - 4
1638 } else {
1639 (q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20
1640 }
1641 }
1642 12 => {
1643 if fi.frame_type == FrameType::KEY {
1644 ((q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22) - 4
1645 } else {
1646 (q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22
1647 }
1648 }
1649 _ => unreachable!(),
1650 },
1651 0,
1652 MAX_LOOP_FILTER as i32,
1653 ) as u8;
1654 [level; 4]
1655 } else {
1656 // Deblocking happens in 4x4 (luma) units; luma x,y are clipped to
1657 // the *crop frame* of the entire frame by 4x4 block.
1658 sse_optimize(
1659 rec,
1660 input,
1661 blocks,
1662 crop_w,
1663 crop_h,
1664 fi.sequence.bit_depth,
1665 fi.sequence.chroma_sampling == Cs400,
1666 )
1667 }
1668}
1669