1 | // Copyright (c) 2018-2022, The rav1e contributors. All rights reserved |
2 | // |
3 | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | // was not distributed with this source code in the LICENSE file, you can |
6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | // Media Patent License 1.0 was not distributed with this source code in the |
8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | |
10 | use crate::api::FrameType; |
11 | use crate::color::ChromaSampling::Cs400; |
12 | use crate::context::*; |
13 | use crate::encoder::FrameInvariants; |
14 | use crate::partition::RefType::*; |
15 | use crate::predict::PredictionMode::*; |
16 | use crate::quantize::*; |
17 | use crate::tiling::*; |
18 | use crate::util::{clamp, ILog, Pixel}; |
19 | use crate::DeblockState; |
20 | use rayon::iter::*; |
21 | use std::cmp; |
22 | |
23 | fn deblock_adjusted_level( |
24 | deblock: &DeblockState, block: &Block, pli: usize, vertical: bool, |
25 | ) -> usize { |
26 | let idx = if pli == 0 { usize::from(!vertical) } else { pli + 1 }; |
27 | |
28 | let level = if deblock.block_deltas_enabled { |
29 | // By-block filter strength delta, if the feature is active. |
30 | let block_delta = if deblock.block_delta_multi { |
31 | block.deblock_deltas[idx] << deblock.block_delta_shift |
32 | } else { |
33 | block.deblock_deltas[0] << deblock.block_delta_shift |
34 | }; |
35 | |
36 | // Add to frame-specified filter strength (Y-vertical, Y-horizontal, U, V) |
37 | clamp(block_delta + deblock.levels[idx] as i8, 0, MAX_LOOP_FILTER as i8) |
38 | as u8 |
39 | } else { |
40 | deblock.levels[idx] |
41 | }; |
42 | |
43 | // if fi.seg_feaure_active { |
44 | // rav1e does not yet support segments or segment features |
45 | // } |
46 | |
47 | // Are delta modifiers for specific references and modes active? If so, add them too. |
48 | if deblock.deltas_enabled { |
49 | let mode = block.mode; |
50 | let reference = block.ref_frames[0]; |
51 | let mode_type = usize::from( |
52 | mode >= NEARESTMV && mode != GLOBALMV && mode != GLOBAL_GLOBALMV, |
53 | ); |
54 | let l5 = level >> 5; |
55 | clamp( |
56 | level as i32 |
57 | + ((deblock.ref_deltas[reference.to_index()] as i32) << l5) |
58 | + if reference == INTRA_FRAME { |
59 | 0 |
60 | } else { |
61 | (deblock.mode_deltas[mode_type] as i32) << l5 |
62 | }, |
63 | 0, |
64 | MAX_LOOP_FILTER as i32, |
65 | ) as usize |
66 | } else { |
67 | level as usize |
68 | } |
69 | } |
70 | |
71 | #[inline ] |
72 | fn deblock_left<'a, T: Pixel>( |
73 | blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>, |
74 | ) -> &'a Block { |
75 | let xdec: usize = p.plane_cfg.xdec; |
76 | let ydec: usize = p.plane_cfg.ydec; |
77 | |
78 | // subsampled chroma uses odd mi row/col |
79 | // We already know we're not at the upper/left corner, so prev_block is in frame |
80 | &blocks[in_bo.0.y | ydec][(in_bo.0.x | xdec) - (1 << xdec)] |
81 | } |
82 | |
83 | #[inline ] |
84 | fn deblock_up<'a, T: Pixel>( |
85 | blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>, |
86 | ) -> &'a Block { |
87 | let xdec: usize = p.plane_cfg.xdec; |
88 | let ydec: usize = p.plane_cfg.ydec; |
89 | |
90 | // subsampled chroma uses odd mi row/col |
91 | &blocks[(in_bo.0.y | ydec) - (1 << ydec)][in_bo.0.x | xdec] |
92 | } |
93 | |
94 | // Must be called on a tx edge, and not on a frame edge. This is enforced above the call. |
95 | fn deblock_size<T: Pixel>( |
96 | block: &Block, prev_block: &Block, p: &PlaneRegion<T>, pli: usize, |
97 | vertical: bool, block_edge: bool, |
98 | ) -> usize { |
99 | let xdec = p.plane_cfg.xdec; |
100 | let ydec = p.plane_cfg.ydec; |
101 | |
102 | // filter application is conditional on skip and block edge |
103 | if !(block_edge |
104 | || !block.skip |
105 | || !prev_block.skip |
106 | || block.ref_frames[0] == INTRA_FRAME |
107 | || prev_block.ref_frames[0] == INTRA_FRAME) |
108 | { |
109 | 0 |
110 | } else { |
111 | let (txsize, prev_txsize) = if pli == 0 { |
112 | (block.txsize, prev_block.txsize) |
113 | } else { |
114 | ( |
115 | block.bsize.largest_chroma_tx_size(xdec, ydec), |
116 | prev_block.bsize.largest_chroma_tx_size(xdec, ydec), |
117 | ) |
118 | }; |
119 | let (tx_n, prev_tx_n) = if vertical { |
120 | (cmp::max(txsize.width_mi(), 1), cmp::max(prev_txsize.width_mi(), 1)) |
121 | } else { |
122 | (cmp::max(txsize.height_mi(), 1), cmp::max(prev_txsize.height_mi(), 1)) |
123 | }; |
124 | cmp::min( |
125 | if pli == 0 { 14 } else { 6 }, |
126 | cmp::min(tx_n, prev_tx_n) << MI_SIZE_LOG2, |
127 | ) |
128 | } |
129 | } |
130 | |
131 | // Must be called on a tx edge |
132 | #[inline ] |
133 | fn deblock_level( |
134 | deblock: &DeblockState, block: &Block, prev_block: &Block, pli: usize, |
135 | vertical: bool, |
136 | ) -> usize { |
137 | let level: usize = deblock_adjusted_level(deblock, block, pli, vertical); |
138 | if level == 0 { |
139 | deblock_adjusted_level(deblock, prev_block, pli, vertical) |
140 | } else { |
141 | level |
142 | } |
143 | } |
144 | |
145 | // four taps, 4 outputs (two are trivial) |
146 | #[inline ] |
147 | fn filter_narrow2_4( |
148 | p1: i32, p0: i32, q0: i32, q1: i32, shift: usize, |
149 | ) -> [i32; 4] { |
150 | let filter0 = clamp(p1 - q1, -128 << shift, (128 << shift) - 1); |
151 | let filter1 = |
152 | clamp(filter0 + 3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3; |
153 | // be certain our optimization removing a clamp is sound |
154 | debug_assert!({ |
155 | let base = |
156 | clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
157 | let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3; |
158 | filter1 == test |
159 | }); |
160 | let filter2 = |
161 | clamp(filter0 + 3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3; |
162 | // be certain our optimization removing a clamp is sound |
163 | debug_assert!({ |
164 | let base = |
165 | clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
166 | let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3; |
167 | filter2 == test |
168 | }); |
169 | [ |
170 | p1, |
171 | clamp(p0 + filter2, 0, (256 << shift) - 1), |
172 | clamp(q0 - filter1, 0, (256 << shift) - 1), |
173 | q1, |
174 | ] |
175 | } |
176 | |
177 | // six taps, 6 outputs (four are trivial) |
178 | #[inline ] |
179 | fn filter_narrow2_6( |
180 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
181 | ) -> [i32; 6] { |
182 | let x: [i32; 4] = filter_narrow2_4(p1, p0, q0, q1, shift); |
183 | [p2, x[0], x[1], x[2], x[3], q2] |
184 | } |
185 | |
186 | // 12 taps, 12 outputs (ten are trivial) |
187 | #[inline ] |
188 | fn filter_narrow2_12( |
189 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
190 | q2: i32, q3: i32, q4: i32, q5: i32, shift: usize, |
191 | ) -> [i32; 12] { |
192 | let x: [i32; 4] = filter_narrow2_4(p1, p0, q0, q1, shift); |
193 | [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5] |
194 | } |
195 | |
196 | // four taps, 4 outputs |
197 | #[inline ] |
198 | fn filter_narrow4_4( |
199 | p1: i32, p0: i32, q0: i32, q1: i32, shift: usize, |
200 | ) -> [i32; 4] { |
201 | let filter1 = |
202 | clamp(3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3; |
203 | // be certain our optimization removing a clamp is sound |
204 | debug_assert!({ |
205 | let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
206 | let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3; |
207 | filter1 == test |
208 | }); |
209 | let filter2 = |
210 | clamp(3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3; |
211 | // be certain our optimization removing a clamp is sound |
212 | debug_assert!({ |
213 | let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1); |
214 | let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3; |
215 | filter2 == test |
216 | }); |
217 | let filter3 = (filter1 + 1) >> 1; |
218 | [ |
219 | clamp(p1 + filter3, 0, (256 << shift) - 1), |
220 | clamp(p0 + filter2, 0, (256 << shift) - 1), |
221 | clamp(q0 - filter1, 0, (256 << shift) - 1), |
222 | clamp(q1 - filter3, 0, (256 << shift) - 1), |
223 | ] |
224 | } |
225 | |
226 | // six taps, 6 outputs (two are trivial) |
227 | #[inline ] |
228 | fn filter_narrow4_6( |
229 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
230 | ) -> [i32; 6] { |
231 | let x: [i32; 4] = filter_narrow4_4(p1, p0, q0, q1, shift); |
232 | [p2, x[0], x[1], x[2], x[3], q2] |
233 | } |
234 | |
235 | // 12 taps, 12 outputs (eight are trivial) |
236 | #[inline ] |
237 | fn filter_narrow4_12( |
238 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
239 | q2: i32, q3: i32, q4: i32, q5: i32, shift: usize, |
240 | ) -> [i32; 12] { |
241 | let x: [i32; 4] = filter_narrow4_4(p1, p0, q0, q1, shift); |
242 | [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5] |
243 | } |
244 | |
245 | // six taps, 4 outputs |
246 | #[rustfmt::skip] |
247 | #[inline ] |
248 | const fn filter_wide6_4( |
249 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32 |
250 | ) -> [i32; 4] { |
251 | [ |
252 | (p2*3 + p1*2 + p0*2 + q0 + (1<<2)) >> 3, |
253 | (p2 + p1*2 + p0*2 + q0*2 + q1 + (1<<2)) >> 3, |
254 | (p1 + p0*2 + q0*2 + q1*2 + q2 + (1<<2)) >> 3, |
255 | (p0 + q0*2 + q1*2 + q2*3 + (1<<2)) >> 3 |
256 | ] |
257 | } |
258 | |
259 | // eight taps, 6 outputs |
260 | #[rustfmt::skip] |
261 | #[inline ] |
262 | const fn filter_wide8_6( |
263 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32 |
264 | ) -> [i32; 6] { |
265 | [ |
266 | (p3*3 + p2*2 + p1 + p0 + q0 + (1<<2)) >> 3, |
267 | (p3*2 + p2 + p1*2 + p0 + q0 + q1 + (1<<2)) >> 3, |
268 | (p3 + p2 + p1 + p0*2 + q0 + q1 + q2 +(1<<2)) >> 3, |
269 | (p2 + p1 + p0 + q0*2 + q1 + q2 + q3 + (1<<2)) >> 3, |
270 | (p1 + p0 + q0 + q1*2 + q2 + q3*2 + (1<<2)) >> 3, |
271 | (p0 + q0 + q1 + q2*2 + q3*3 + (1<<2)) >> 3 |
272 | ] |
273 | } |
274 | |
275 | // 12 taps, 12 outputs (six are trivial) |
276 | #[inline ] |
277 | const fn filter_wide8_12( |
278 | p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, |
279 | q2: i32, q3: i32, q4: i32, q5: i32, |
280 | ) -> [i32; 12] { |
281 | let x: [i32; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3); |
282 | [p5, p4, p3, x[0], x[1], x[2], x[3], x[4], x[5], q3, q4, q5] |
283 | } |
284 | |
285 | // fourteen taps, 12 outputs |
286 | #[rustfmt::skip] |
287 | #[inline ] |
288 | const fn filter_wide14_12( |
289 | p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, |
290 | q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32 |
291 | ) -> [i32; 12] { |
292 | [ |
293 | (p6*7 + p5*2 + p4*2 + p3 + p2 + p1 + p0 + q0 + (1<<3)) >> 4, |
294 | (p6*5 + p5*2 + p4*2 + p3*2 + p2 + p1 + p0 + q0 + q1 + (1<<3)) >> 4, |
295 | (p6*4 + p5 + p4*2 + p3*2 + p2*2 + p1 + p0 + q0 + q1 + q2 + (1<<3)) >> 4, |
296 | (p6*3 + p5 + p4 + p3*2 + p2*2 + p1*2 + p0 + q0 + q1 + q2 + q3 + (1<<3)) >> 4, |
297 | (p6*2 + p5 + p4 + p3 + p2*2 + p1*2 + p0*2 + q0 + q1 + q2 + q3 + q4 + (1<<3)) >> 4, |
298 | (p6 + p5 + p4 + p3 + p2 + p1*2 + p0*2 + q0*2 + q1 + q2 + q3 + q4 + q5 + (1<<3)) >> 4, |
299 | (p5 + p4 + p3 + p2 + p1 + p0*2 + q0*2 + q1*2 + q2 + q3 + q4 + q5 + q6 + (1<<3)) >> 4, |
300 | (p4 + p3 + p2 + p1 + p0 + q0*2 + q1*2 + q2*2 + q3 + q4 + q5 + q6*2 + (1<<3)) >> 4, |
301 | (p3 + p2 + p1 + p0 + q0 + q1*2 + q2*2 + q3*2 + q4 + q5 + q6*3 + (1<<3)) >> 4, |
302 | (p2 + p1 + p0 + q0 + q1 + q2*2 + q3*2 + q4*2 + q5 + q6*4 + (1<<3)) >> 4, |
303 | (p1 + p0 + q0 + q1 + q2 + q3*2 + q4*2 + q5*2 + q6*5 + (1<<3)) >> 4, |
304 | (p0 + q0 + q1 + q2 + q3 + q4*2 + q5*2 + q6*7 + (1<<3)) >> 4 |
305 | ] |
306 | } |
307 | |
308 | #[inline ] |
309 | fn copy_horizontal<T: Pixel>( |
310 | dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32], |
311 | ) { |
312 | let row: &mut [T] = &mut dst[y][x..]; |
313 | for (dst: &mut T, src: &i32) in row.iter_mut().take(src.len()).zip(src) { |
314 | *dst = T::cast_from(*src); |
315 | } |
316 | } |
317 | |
318 | #[inline ] |
319 | fn copy_vertical<T: Pixel>( |
320 | dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32], |
321 | ) { |
322 | for (i: usize, v: &i32) in src.iter().enumerate() { |
323 | let p: &mut T = &mut dst[y + i][x]; |
324 | *p = T::cast_from(*v); |
325 | } |
326 | } |
327 | |
328 | #[inline ] |
329 | fn stride_sse<const LEN: usize>(a: &[i32; LEN], b: &[i32; LEN]) -> i64 { |
330 | a.iter().zip(b).map(|(a: &i32, b: &i32)| (a - b) * (a - b)).sum::<i32>() as i64 |
331 | } |
332 | |
333 | #[inline ] |
334 | const fn _level_to_limit(level: i32, shift: usize) -> i32 { |
335 | level << shift |
336 | } |
337 | |
338 | #[inline ] |
339 | const fn limit_to_level(limit: i32, shift: usize) -> i32 { |
340 | (limit + (1 << shift) - 1) >> shift |
341 | } |
342 | |
343 | #[inline ] |
344 | const fn _level_to_blimit(level: i32, shift: usize) -> i32 { |
345 | (3 * level + 4) << shift |
346 | } |
347 | |
348 | #[inline ] |
349 | const fn blimit_to_level(blimit: i32, shift: usize) -> i32 { |
350 | (((blimit + (1 << shift) - 1) >> shift) - 2) / 3 |
351 | } |
352 | |
353 | #[inline ] |
354 | const fn _level_to_thresh(level: i32, shift: usize) -> i32 { |
355 | level >> 4 << shift |
356 | } |
357 | |
358 | #[inline ] |
359 | const fn thresh_to_level(thresh: i32, shift: usize) -> i32 { |
360 | (thresh + (1 << shift) - 1) >> shift << 4 |
361 | } |
362 | |
363 | #[inline ] |
364 | fn nhev4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize { |
365 | thresh_to_level(thresh:cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift) as usize |
366 | } |
367 | |
368 | #[inline ] |
369 | fn mask4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize { |
370 | cmp::max( |
371 | v1:limit_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift), |
372 | v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
373 | ) as usize |
374 | } |
375 | |
376 | #[inline ] |
377 | fn deblock_size4_inner( |
378 | [p1: i32, p0: i32, q0: i32, q1: i32]: [i32; 4], level: usize, bd: usize, |
379 | ) -> Option<[i32; 4]> { |
380 | if mask4(p1, p0, q0, q1, shift:bd - 8) <= level { |
381 | let x: [i32; 4] = if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level { |
382 | filter_narrow4_4(p1, p0, q0, q1, shift:bd - 8) |
383 | } else { |
384 | filter_narrow2_4(p1, p0, q0, q1, shift:bd - 8) |
385 | }; |
386 | Some(x) |
387 | } else { |
388 | None |
389 | } |
390 | } |
391 | |
392 | // Assumes rec[0] is set 2 taps back from the edge |
393 | fn deblock_v_size4<T: Pixel>( |
394 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
395 | ) { |
396 | for y: usize in 0..4 { |
397 | let p: &[T] = &rec[y]; |
398 | let vals: [i32; 4] = [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_()]; |
399 | if let Some(data: [i32; 4]) = deblock_size4_inner(vals, level, bd) { |
400 | copy_horizontal(dst:rec, x:0, y, &data); |
401 | } |
402 | } |
403 | } |
404 | |
405 | // Assumes rec[0] is set 2 taps back from the edge |
406 | fn deblock_h_size4<T: Pixel>( |
407 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
408 | ) { |
409 | for x: usize in 0..4 { |
410 | let vals: [i32; 4] = |
411 | [rec[0][x].as_(), rec[1][x].as_(), rec[2][x].as_(), rec[3][x].as_()]; |
412 | if let Some(data: [i32; 4]) = deblock_size4_inner(vals, level, bd) { |
413 | copy_vertical(dst:rec, x, y:0, &data); |
414 | } |
415 | } |
416 | } |
417 | |
418 | // Assumes rec[0] and src[0] are set 2 taps back from the edge. |
419 | // Accesses four taps, accumulates four pixels into the tally |
420 | fn sse_size4<T: Pixel>( |
421 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
422 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
423 | ) { |
424 | for i in 0..4 { |
425 | let (p1, p0, q0, q1, a) = if horizontal_p { |
426 | ( |
427 | rec[0][i].as_(), |
428 | rec[1][i].as_(), |
429 | rec[2][i].as_(), |
430 | rec[3][i].as_(), |
431 | [src[0][i].as_(), src[1][i].as_(), src[2][i].as_(), src[3][i].as_()], |
432 | ) |
433 | } else { |
434 | ( |
435 | rec[i][0].as_(), |
436 | rec[i][1].as_(), |
437 | rec[i][2].as_(), |
438 | rec[i][3].as_(), |
439 | [src[i][0].as_(), src[i][1].as_(), src[i][2].as_(), src[i][3].as_()], |
440 | ) |
441 | }; |
442 | |
443 | // three possibilities: no filter, narrow2 and narrow4 |
444 | // All possibilities produce four outputs |
445 | let none: [_; 4] = [p1, p0, q0, q1]; |
446 | let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8); |
447 | let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8); |
448 | |
449 | // mask4 sets the dividing line for filter vs no filter |
450 | // nhev4 sets the dividing line between narrow2 and narrow4 |
451 | let mask = clamp(mask4(p1, p0, q0, q1, bd - 8), 1, MAX_LOOP_FILTER + 1); |
452 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
453 | |
454 | // sse for each; short-circuit the 'special' no-op cases. |
455 | let sse_none = stride_sse(&a, &none); |
456 | let sse_narrow2 = |
457 | if nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
458 | let sse_narrow4 = if nhev <= MAX_LOOP_FILTER { |
459 | stride_sse(&a, &narrow4) |
460 | } else { |
461 | sse_none |
462 | }; |
463 | |
464 | // accumulate possible filter values into the tally |
465 | // level 0 is a special case |
466 | tally[0] += sse_none; |
467 | tally[mask] -= sse_none; |
468 | tally[mask] += sse_narrow2; |
469 | tally[nhev] -= sse_narrow2; |
470 | tally[nhev] += sse_narrow4; |
471 | } |
472 | } |
473 | |
474 | #[inline ] |
475 | fn mask6( |
476 | p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize, |
477 | ) -> usize { |
478 | cmp::max( |
479 | v1:limit_to_level( |
480 | cmp::max( |
481 | (p2 - p1).abs(), |
482 | cmp::max((p1 - p0).abs(), cmp::max((q2 - q1).abs(), (q1 - q0).abs())), |
483 | ), |
484 | shift, |
485 | ), |
486 | v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
487 | ) as usize |
488 | } |
489 | |
490 | #[inline ] |
491 | fn flat6(p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32) -> usize { |
492 | cmp::max( |
493 | (p1 - p0).abs(), |
494 | v2:cmp::max((q1 - q0).abs(), v2:cmp::max((p2 - p0).abs(), (q2 - q0).abs())), |
495 | ) as usize |
496 | } |
497 | |
498 | #[inline ] |
499 | fn deblock_size6_inner( |
500 | [p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32]: [i32; 6], level: usize, bd: usize, |
501 | ) -> Option<[i32; 4]> { |
502 | if mask6(p2, p1, p0, q0, q1, q2, shift:bd - 8) <= level { |
503 | let flat: usize = 1 << (bd - 8); |
504 | let x: [i32; 4] = if flat6(p2, p1, p0, q0, q1, q2) <= flat { |
505 | filter_wide6_4(p2, p1, p0, q0, q1, q2) |
506 | } else if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level { |
507 | filter_narrow4_4(p1, p0, q0, q1, shift:bd - 8) |
508 | } else { |
509 | filter_narrow2_4(p1, p0, q0, q1, shift:bd - 8) |
510 | }; |
511 | Some(x) |
512 | } else { |
513 | None |
514 | } |
515 | } |
516 | |
517 | // Assumes slice[0] is set 3 taps back from the edge |
518 | fn deblock_v_size6<T: Pixel>( |
519 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
520 | ) { |
521 | for y: usize in 0..4 { |
522 | let p: &[T] = &rec[y]; |
523 | let vals: [i32; 6] = |
524 | [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_(), p[4].as_(), p[5].as_()]; |
525 | if let Some(data: [i32; 4]) = deblock_size6_inner(vals, level, bd) { |
526 | copy_horizontal(dst:rec, x:1, y, &data); |
527 | } |
528 | } |
529 | } |
530 | |
531 | // Assumes slice[0] is set 3 taps back from the edge |
532 | fn deblock_h_size6<T: Pixel>( |
533 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
534 | ) { |
535 | for x: usize in 0..4 { |
536 | let vals: [i32; 6] = [ |
537 | rec[0][x].as_(), |
538 | rec[1][x].as_(), |
539 | rec[2][x].as_(), |
540 | rec[3][x].as_(), |
541 | rec[4][x].as_(), |
542 | rec[5][x].as_(), |
543 | ]; |
544 | if let Some(data: [i32; 4]) = deblock_size6_inner(vals, level, bd) { |
545 | copy_vertical(dst:rec, x, y:1, &data); |
546 | } |
547 | } |
548 | } |
549 | |
550 | // Assumes rec[0] and src[0] are set 3 taps back from the edge. |
551 | // Accesses six taps, accumulates four pixels into the tally |
552 | fn sse_size6<T: Pixel>( |
553 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
554 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
555 | ) { |
556 | let flat = 1 << (bd - 8); |
557 | for i in 0..4 { |
558 | let (p2, p1, p0, q0, q1, q2, a) = if horizontal_p { |
559 | // six taps |
560 | ( |
561 | rec[0][i].as_(), |
562 | rec[1][i].as_(), |
563 | rec[2][i].as_(), |
564 | rec[3][i].as_(), |
565 | rec[4][i].as_(), |
566 | rec[5][i].as_(), |
567 | // four pixels to compare so offset one forward |
568 | [src[1][i].as_(), src[2][i].as_(), src[3][i].as_(), src[4][i].as_()], |
569 | ) |
570 | } else { |
571 | // six taps |
572 | ( |
573 | rec[i][0].as_(), |
574 | rec[i][1].as_(), |
575 | rec[i][2].as_(), |
576 | rec[i][3].as_(), |
577 | rec[i][4].as_(), |
578 | rec[i][5].as_(), |
579 | // four pixels to compare so offset one forward |
580 | [src[i][1].as_(), src[i][2].as_(), src[i][3].as_(), src[i][4].as_()], |
581 | ) |
582 | }; |
583 | |
584 | // Four possibilities: no filter, wide6, narrow2 and narrow4 |
585 | // All possibilities produce four outputs |
586 | let none: [_; 4] = [p1, p0, q0, q1]; |
587 | let wide6 = filter_wide6_4(p2, p1, p0, q0, q1, q2); |
588 | let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8); |
589 | let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8); |
590 | |
591 | // mask6 sets the dividing line for filter vs no filter |
592 | // flat6 decides between wide and narrow filters (unrelated to level) |
593 | // nhev4 sets the dividing line between narrow2 and narrow4 |
594 | let mask = |
595 | clamp(mask6(p2, p1, p0, q0, q1, q2, bd - 8), 1, MAX_LOOP_FILTER + 1); |
596 | let flatp = flat6(p2, p1, p0, q0, q1, q2) <= flat; |
597 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
598 | |
599 | // sse for each; short-circuit the 'special' no-op cases. |
600 | let sse_none = stride_sse(&a, &none); |
601 | let sse_wide6 = if flatp && mask <= MAX_LOOP_FILTER { |
602 | stride_sse(&a, &wide6) |
603 | } else { |
604 | sse_none |
605 | }; |
606 | let sse_narrow2 = |
607 | if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
608 | let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER { |
609 | stride_sse(&a, &narrow4) |
610 | } else { |
611 | sse_none |
612 | }; |
613 | |
614 | // accumulate possible filter values into the tally |
615 | tally[0] += sse_none; |
616 | tally[mask] -= sse_none; |
617 | if flatp { |
618 | tally[mask] += sse_wide6; |
619 | } else { |
620 | tally[mask] += sse_narrow2; |
621 | tally[nhev] -= sse_narrow2; |
622 | tally[nhev] += sse_narrow4; |
623 | } |
624 | } |
625 | } |
626 | |
627 | #[inline ] |
628 | fn mask8( |
629 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, |
630 | shift: usize, |
631 | ) -> usize { |
632 | cmp::max( |
633 | v1:limit_to_level( |
634 | cmp::max( |
635 | (p3 - p2).abs(), |
636 | cmp::max( |
637 | (p2 - p1).abs(), |
638 | cmp::max( |
639 | (p1 - p0).abs(), |
640 | cmp::max( |
641 | (q3 - q2).abs(), |
642 | cmp::max((q2 - q1).abs(), (q1 - q0).abs()), |
643 | ), |
644 | ), |
645 | ), |
646 | ), |
647 | shift, |
648 | ), |
649 | v2:blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift), |
650 | ) as usize |
651 | } |
652 | |
653 | #[inline ] |
654 | fn flat8( |
655 | p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, |
656 | ) -> usize { |
657 | cmp::max( |
658 | (p1 - p0).abs(), |
659 | v2:cmp::max( |
660 | (q1 - q0).abs(), |
661 | v2:cmp::max( |
662 | (p2 - p0).abs(), |
663 | v2:cmp::max((q2 - q0).abs(), v2:cmp::max((p3 - p0).abs(), (q3 - q0).abs())), |
664 | ), |
665 | ), |
666 | ) as usize |
667 | } |
668 | |
669 | #[inline ] |
670 | fn deblock_size8_inner( |
671 | [p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32]: [i32; 8], level: usize, bd: usize, |
672 | ) -> Option<[i32; 6]> { |
673 | if mask8(p3, p2, p1, p0, q0, q1, q2, q3, shift:bd - 8) <= level { |
674 | let flat: usize = 1 << (bd - 8); |
675 | let x: [i32; 6] = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat { |
676 | filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3) |
677 | } else if nhev4(p1, p0, q0, q1, shift:bd - 8) <= level { |
678 | filter_narrow4_6(p2, p1, p0, q0, q1, q2, shift:bd - 8) |
679 | } else { |
680 | filter_narrow2_6(p2, p1, p0, q0, q1, q2, shift:bd - 8) |
681 | }; |
682 | Some(x) |
683 | } else { |
684 | None |
685 | } |
686 | } |
687 | |
688 | // Assumes rec[0] is set 4 taps back from the edge |
689 | fn deblock_v_size8<T: Pixel>( |
690 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
691 | ) { |
692 | for y: usize in 0..4 { |
693 | let p: &[T] = &rec[y]; |
694 | let vals: [i32; 8] = [ |
695 | p[0].as_(), |
696 | p[1].as_(), |
697 | p[2].as_(), |
698 | p[3].as_(), |
699 | p[4].as_(), |
700 | p[5].as_(), |
701 | p[6].as_(), |
702 | p[7].as_(), |
703 | ]; |
704 | if let Some(data: [i32; 6]) = deblock_size8_inner(vals, level, bd) { |
705 | copy_horizontal(dst:rec, x:1, y, &data); |
706 | } |
707 | } |
708 | } |
709 | |
710 | // Assumes rec[0] is set 4 taps back from the edge |
711 | fn deblock_h_size8<T: Pixel>( |
712 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
713 | ) { |
714 | for x: usize in 0..4 { |
715 | let vals: [i32; 8] = [ |
716 | rec[0][x].as_(), |
717 | rec[1][x].as_(), |
718 | rec[2][x].as_(), |
719 | rec[3][x].as_(), |
720 | rec[4][x].as_(), |
721 | rec[5][x].as_(), |
722 | rec[6][x].as_(), |
723 | rec[7][x].as_(), |
724 | ]; |
725 | if let Some(data: [i32; 6]) = deblock_size8_inner(vals, level, bd) { |
726 | copy_vertical(dst:rec, x, y:1, &data); |
727 | } |
728 | } |
729 | } |
730 | |
731 | // Assumes rec[0] and src[0] are set 4 taps back from the edge. |
732 | // Accesses eight taps, accumulates six pixels into the tally |
733 | fn sse_size8<T: Pixel>( |
734 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
735 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
736 | ) { |
737 | let flat = 1 << (bd - 8); |
738 | |
739 | for i in 0..4 { |
740 | let (p3, p2, p1, p0, q0, q1, q2, q3, a) = if horizontal_p { |
741 | // eight taps |
742 | ( |
743 | rec[0][i].as_(), |
744 | rec[1][i].as_(), |
745 | rec[2][i].as_(), |
746 | rec[3][i].as_(), |
747 | rec[4][i].as_(), |
748 | rec[5][i].as_(), |
749 | rec[6][i].as_(), |
750 | rec[7][i].as_(), |
751 | // six pixels to compare so offset one forward |
752 | [ |
753 | src[1][i].as_(), |
754 | src[2][i].as_(), |
755 | src[3][i].as_(), |
756 | src[4][i].as_(), |
757 | src[5][i].as_(), |
758 | src[6][i].as_(), |
759 | ], |
760 | ) |
761 | } else { |
762 | // eight taps |
763 | ( |
764 | rec[i][0].as_(), |
765 | rec[i][1].as_(), |
766 | rec[i][2].as_(), |
767 | rec[i][3].as_(), |
768 | rec[i][4].as_(), |
769 | rec[i][5].as_(), |
770 | rec[i][6].as_(), |
771 | rec[i][7].as_(), |
772 | // six pixels to compare so offset one forward |
773 | [ |
774 | src[i][1].as_(), |
775 | src[i][2].as_(), |
776 | src[i][3].as_(), |
777 | src[i][4].as_(), |
778 | src[i][5].as_(), |
779 | src[i][6].as_(), |
780 | ], |
781 | ) |
782 | }; |
783 | |
784 | // Four possibilities: no filter, wide8, narrow2 and narrow4 |
785 | let none: [_; 6] = [p2, p1, p0, q0, q1, q2]; |
786 | let wide8: [_; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3); |
787 | let narrow2: [_; 6] = filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8); |
788 | let narrow4: [_; 6] = filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8); |
789 | |
790 | // mask8 sets the dividing line for filter vs no filter |
791 | // flat8 decides between wide and narrow filters (unrelated to level) |
792 | // nhev4 sets the dividing line between narrow2 and narrow4 |
793 | let mask = clamp( |
794 | mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8), |
795 | 1, |
796 | MAX_LOOP_FILTER + 1, |
797 | ); |
798 | let flatp = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat; |
799 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
800 | |
801 | // sse for each; short-circuit the 'special' no-op cases. |
802 | let sse_none = stride_sse(&a, &none); |
803 | let sse_wide8 = if flatp && mask <= MAX_LOOP_FILTER { |
804 | stride_sse(&a, &wide8) |
805 | } else { |
806 | sse_none |
807 | }; |
808 | let sse_narrow2 = |
809 | if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none }; |
810 | let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER { |
811 | stride_sse(&a, &narrow4) |
812 | } else { |
813 | sse_none |
814 | }; |
815 | |
816 | // accumulate possible filter values into the tally |
817 | tally[0] += sse_none; |
818 | tally[mask] -= sse_none; |
819 | if flatp { |
820 | tally[mask] += sse_wide8; |
821 | } else { |
822 | tally[mask] += sse_narrow2; |
823 | tally[nhev] -= sse_narrow2; |
824 | tally[nhev] += sse_narrow4; |
825 | } |
826 | } |
827 | } |
828 | |
829 | #[inline ] |
830 | fn flat14_outer( |
831 | p6: i32, p5: i32, p4: i32, p0: i32, q0: i32, q4: i32, q5: i32, q6: i32, |
832 | ) -> usize { |
833 | cmp::max( |
834 | (p4 - p0).abs(), |
835 | v2:cmp::max( |
836 | (q4 - q0).abs(), |
837 | v2:cmp::max( |
838 | (p5 - p0).abs(), |
839 | v2:cmp::max((q5 - q0).abs(), v2:cmp::max((p6 - p0).abs(), (q6 - q0).abs())), |
840 | ), |
841 | ), |
842 | ) as usize |
843 | } |
844 | |
845 | #[inline ] |
846 | fn deblock_size14_inner( |
847 | [p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32]: [i32; 14], |
848 | level: usize, bd: usize, |
849 | ) -> Option<[i32; 12]> { |
850 | // 'mask' test |
851 | if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level { |
852 | let flat = 1 << (bd - 8); |
853 | // inner flatness test |
854 | let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat { |
855 | // outer flatness test |
856 | if flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat { |
857 | // sufficient flatness across 14 pixel width; run full-width filter |
858 | filter_wide14_12( |
859 | p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, |
860 | ) |
861 | } else { |
862 | // only flat in inner area, run 8-tap |
863 | filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5) |
864 | } |
865 | } else if nhev4(p1, p0, q0, q1, bd - 8) <= level { |
866 | // not flat, run narrow filter |
867 | filter_narrow4_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8) |
868 | } else { |
869 | filter_narrow2_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8) |
870 | }; |
871 | Some(x) |
872 | } else { |
873 | None |
874 | } |
875 | } |
876 | |
877 | // Assumes rec[0] is set 7 taps back from the edge |
878 | fn deblock_v_size14<T: Pixel>( |
879 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
880 | ) { |
881 | for y: usize in 0..4 { |
882 | let p: &[T] = &rec[y]; |
883 | let vals: [i32; 14] = [ |
884 | p[0].as_(), |
885 | p[1].as_(), |
886 | p[2].as_(), |
887 | p[3].as_(), |
888 | p[4].as_(), |
889 | p[5].as_(), |
890 | p[6].as_(), |
891 | p[7].as_(), |
892 | p[8].as_(), |
893 | p[9].as_(), |
894 | p[10].as_(), |
895 | p[11].as_(), |
896 | p[12].as_(), |
897 | p[13].as_(), |
898 | ]; |
899 | if let Some(data: [i32; 12]) = deblock_size14_inner(vals, level, bd) { |
900 | copy_horizontal(dst:rec, x:1, y, &data); |
901 | } |
902 | } |
903 | } |
904 | |
905 | // Assumes rec[0] is set 7 taps back from the edge |
906 | fn deblock_h_size14<T: Pixel>( |
907 | rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize, |
908 | ) { |
909 | for x: usize in 0..4 { |
910 | let vals: [i32; 14] = [ |
911 | rec[0][x].as_(), |
912 | rec[1][x].as_(), |
913 | rec[2][x].as_(), |
914 | rec[3][x].as_(), |
915 | rec[4][x].as_(), |
916 | rec[5][x].as_(), |
917 | rec[6][x].as_(), |
918 | rec[7][x].as_(), |
919 | rec[8][x].as_(), |
920 | rec[9][x].as_(), |
921 | rec[10][x].as_(), |
922 | rec[11][x].as_(), |
923 | rec[12][x].as_(), |
924 | rec[13][x].as_(), |
925 | ]; |
926 | if let Some(data: [i32; 12]) = deblock_size14_inner(vals, level, bd) { |
927 | copy_vertical(dst:rec, x, y:1, &data); |
928 | } |
929 | } |
930 | } |
931 | |
932 | // Assumes rec[0] and src[0] are set 7 taps back from the edge. |
933 | // Accesses fourteen taps, accumulates twelve pixels into the tally |
934 | fn sse_size14<T: Pixel>( |
935 | rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, |
936 | tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, |
937 | ) { |
938 | let flat = 1 << (bd - 8); |
939 | for i in 0..4 { |
940 | let (p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, a) = |
941 | if horizontal_p { |
942 | // 14 taps |
943 | ( |
944 | rec[0][i].as_(), |
945 | rec[1][i].as_(), |
946 | rec[2][i].as_(), |
947 | rec[3][i].as_(), |
948 | rec[4][i].as_(), |
949 | rec[5][i].as_(), |
950 | rec[6][i].as_(), |
951 | rec[7][i].as_(), |
952 | rec[8][i].as_(), |
953 | rec[9][i].as_(), |
954 | rec[10][i].as_(), |
955 | rec[11][i].as_(), |
956 | rec[12][i].as_(), |
957 | rec[13][i].as_(), |
958 | // 12 pixels to compare so offset one forward |
959 | [ |
960 | src[1][i].as_(), |
961 | src[2][i].as_(), |
962 | src[3][i].as_(), |
963 | src[4][i].as_(), |
964 | src[5][i].as_(), |
965 | src[6][i].as_(), |
966 | src[7][i].as_(), |
967 | src[8][i].as_(), |
968 | src[9][i].as_(), |
969 | src[10][i].as_(), |
970 | src[11][i].as_(), |
971 | src[12][i].as_(), |
972 | ], |
973 | ) |
974 | } else { |
975 | // 14 taps |
976 | ( |
977 | rec[i][0].as_(), |
978 | rec[i][1].as_(), |
979 | rec[i][2].as_(), |
980 | rec[i][3].as_(), |
981 | rec[i][4].as_(), |
982 | rec[i][5].as_(), |
983 | rec[i][6].as_(), |
984 | rec[i][7].as_(), |
985 | rec[i][8].as_(), |
986 | rec[i][9].as_(), |
987 | rec[i][10].as_(), |
988 | rec[i][11].as_(), |
989 | rec[i][12].as_(), |
990 | rec[i][13].as_(), |
991 | // 12 pixels to compare so offset one forward |
992 | [ |
993 | src[i][1].as_(), |
994 | src[i][2].as_(), |
995 | src[i][3].as_(), |
996 | src[i][4].as_(), |
997 | src[i][5].as_(), |
998 | src[i][6].as_(), |
999 | src[i][7].as_(), |
1000 | src[i][8].as_(), |
1001 | src[i][9].as_(), |
1002 | src[i][10].as_(), |
1003 | src[i][11].as_(), |
1004 | src[i][12].as_(), |
1005 | ], |
1006 | ) |
1007 | }; |
1008 | |
1009 | // Five possibilities: no filter, wide14, wide8, narrow2 and narrow4 |
1010 | let none: [i32; 12] = [p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5]; |
1011 | let wide14 = |
1012 | filter_wide14_12(p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6); |
1013 | let wide8 = |
1014 | filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5); |
1015 | let narrow2 = filter_narrow2_12( |
1016 | p5, |
1017 | p4, |
1018 | p3, |
1019 | p2, |
1020 | p1, |
1021 | p0, |
1022 | q0, |
1023 | q1, |
1024 | q2, |
1025 | q3, |
1026 | q4, |
1027 | q5, |
1028 | bd - 8, |
1029 | ); |
1030 | let narrow4 = filter_narrow4_12( |
1031 | p5, |
1032 | p4, |
1033 | p3, |
1034 | p2, |
1035 | p1, |
1036 | p0, |
1037 | q0, |
1038 | q1, |
1039 | q2, |
1040 | q3, |
1041 | q4, |
1042 | q5, |
1043 | bd - 8, |
1044 | ); |
1045 | |
1046 | // mask8 sets the dividing line for filter vs no filter |
1047 | // flat8 decides between wide and narrow filters (unrelated to level) |
1048 | // flat14 decides between wide14 and wide8 filters |
1049 | // nhev4 sets the dividing line between narrow2 and narrow4 |
1050 | let mask = clamp( |
1051 | mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8), |
1052 | 1, |
1053 | MAX_LOOP_FILTER + 1, |
1054 | ); |
1055 | let flat8p = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat; |
1056 | let flat14p = flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat; |
1057 | let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1); |
1058 | |
1059 | // sse for each; short-circuit the 'special' no-op cases. |
1060 | let sse_none = stride_sse(&a, &none); |
1061 | let sse_wide8 = if flat8p && !flat14p && mask <= MAX_LOOP_FILTER { |
1062 | stride_sse(&a, &wide8) |
1063 | } else { |
1064 | sse_none |
1065 | }; |
1066 | let sse_wide14 = if flat8p && flat14p && mask <= MAX_LOOP_FILTER { |
1067 | stride_sse(&a, &wide14) |
1068 | } else { |
1069 | sse_none |
1070 | }; |
1071 | let sse_narrow2 = if !flat8p && nhev != mask { |
1072 | stride_sse(&a, &narrow2) |
1073 | } else { |
1074 | sse_none |
1075 | }; |
1076 | let sse_narrow4 = if !flat8p && nhev <= MAX_LOOP_FILTER { |
1077 | stride_sse(&a, &narrow4) |
1078 | } else { |
1079 | sse_none |
1080 | }; |
1081 | |
1082 | // accumulate possible filter values into the tally |
1083 | tally[0] += sse_none; |
1084 | tally[mask] -= sse_none; |
1085 | if flat8p { |
1086 | if flat14p { |
1087 | tally[mask] += sse_wide14; |
1088 | } else { |
1089 | tally[mask] += sse_wide8; |
1090 | } |
1091 | } else { |
1092 | tally[mask] += sse_narrow2; |
1093 | tally[nhev] -= sse_narrow2; |
1094 | tally[nhev] += sse_narrow4; |
1095 | } |
1096 | } |
1097 | } |
1098 | |
1099 | fn filter_v_edge<T: Pixel>( |
1100 | deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset, |
1101 | p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize, |
1102 | ) { |
1103 | let block = &blocks[bo]; |
1104 | let txsize = if pli == 0 { |
1105 | block.txsize |
1106 | } else { |
1107 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1108 | }; |
1109 | let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0; |
1110 | if tx_edge { |
1111 | let prev_block = deblock_left(blocks, bo, &p.as_const()); |
1112 | let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0; |
1113 | let filter_size = |
1114 | deblock_size(block, prev_block, &p.as_const(), pli, true, block_edge); |
1115 | if filter_size > 0 { |
1116 | let level = deblock_level(deblock, block, prev_block, pli, true); |
1117 | if level > 0 { |
1118 | let po = bo.plane_offset(p.plane_cfg); |
1119 | let mut plane_region = p.subregion_mut(Area::Rect { |
1120 | x: po.x - (filter_size >> 1) as isize, |
1121 | y: po.y, |
1122 | width: filter_size, |
1123 | height: 4, |
1124 | }); |
1125 | match filter_size { |
1126 | 4 => { |
1127 | deblock_v_size4(&mut plane_region, level, bd); |
1128 | } |
1129 | 6 => { |
1130 | deblock_v_size6(&mut plane_region, level, bd); |
1131 | } |
1132 | 8 => { |
1133 | deblock_v_size8(&mut plane_region, level, bd); |
1134 | } |
1135 | 14 => { |
1136 | deblock_v_size14(&mut plane_region, level, bd); |
1137 | } |
1138 | _ => unreachable!(), |
1139 | } |
1140 | } |
1141 | } |
1142 | } |
1143 | } |
1144 | |
1145 | fn sse_v_edge<T: Pixel>( |
1146 | blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>, |
1147 | src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], |
1148 | pli: usize, bd: usize, xdec: usize, ydec: usize, |
1149 | ) { |
1150 | let block = &blocks[bo]; |
1151 | let txsize = if pli == 0 { |
1152 | block.txsize |
1153 | } else { |
1154 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1155 | }; |
1156 | let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0; |
1157 | if tx_edge { |
1158 | let prev_block = deblock_left(blocks, bo, rec_plane); |
1159 | let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0; |
1160 | let filter_size = |
1161 | deblock_size(block, prev_block, rec_plane, pli, true, block_edge); |
1162 | if filter_size > 0 { |
1163 | let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling |
1164 | let rec_region = rec_plane.subregion(Area::Rect { |
1165 | x: po.x - (filter_size >> 1) as isize, |
1166 | y: po.y, |
1167 | width: filter_size, |
1168 | height: 4, |
1169 | }); |
1170 | let src_region = src_plane.subregion(Area::Rect { |
1171 | x: po.x - (filter_size >> 1) as isize, |
1172 | y: po.y, |
1173 | width: filter_size, |
1174 | height: 4, |
1175 | }); |
1176 | match filter_size { |
1177 | 4 => { |
1178 | sse_size4(&rec_region, &src_region, tally, false, bd); |
1179 | } |
1180 | 6 => { |
1181 | sse_size6(&rec_region, &src_region, tally, false, bd); |
1182 | } |
1183 | 8 => { |
1184 | sse_size8(&rec_region, &src_region, tally, false, bd); |
1185 | } |
1186 | 14 => { |
1187 | sse_size14(&rec_region, &src_region, tally, false, bd); |
1188 | } |
1189 | _ => unreachable!(), |
1190 | } |
1191 | } |
1192 | } |
1193 | } |
1194 | |
1195 | fn filter_h_edge<T: Pixel>( |
1196 | deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset, |
1197 | p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize, |
1198 | ) { |
1199 | let block = &blocks[bo]; |
1200 | let txsize = if pli == 0 { |
1201 | block.txsize |
1202 | } else { |
1203 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1204 | }; |
1205 | let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0; |
1206 | if tx_edge { |
1207 | let prev_block = deblock_up(blocks, bo, &p.as_const()); |
1208 | let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0; |
1209 | let filter_size = |
1210 | deblock_size(block, prev_block, &p.as_const(), pli, false, block_edge); |
1211 | if filter_size > 0 { |
1212 | let level = deblock_level(deblock, block, prev_block, pli, false); |
1213 | if level > 0 { |
1214 | let po = bo.plane_offset(p.plane_cfg); |
1215 | let mut plane_region = p.subregion_mut(Area::Rect { |
1216 | x: po.x, |
1217 | y: po.y - (filter_size >> 1) as isize, |
1218 | width: 4, |
1219 | height: filter_size, |
1220 | }); |
1221 | match filter_size { |
1222 | 4 => { |
1223 | deblock_h_size4(&mut plane_region, level, bd); |
1224 | } |
1225 | 6 => { |
1226 | deblock_h_size6(&mut plane_region, level, bd); |
1227 | } |
1228 | 8 => { |
1229 | deblock_h_size8(&mut plane_region, level, bd); |
1230 | } |
1231 | 14 => { |
1232 | deblock_h_size14(&mut plane_region, level, bd); |
1233 | } |
1234 | _ => unreachable!(), |
1235 | } |
1236 | } |
1237 | } |
1238 | } |
1239 | } |
1240 | |
1241 | fn sse_h_edge<T: Pixel>( |
1242 | blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>, |
1243 | src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], |
1244 | pli: usize, bd: usize, xdec: usize, ydec: usize, |
1245 | ) { |
1246 | let block = &blocks[bo]; |
1247 | let txsize = if pli == 0 { |
1248 | block.txsize |
1249 | } else { |
1250 | block.bsize.largest_chroma_tx_size(xdec, ydec) |
1251 | }; |
1252 | let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0; |
1253 | if tx_edge { |
1254 | let prev_block = deblock_up(blocks, bo, rec_plane); |
1255 | let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0; |
1256 | let filter_size = |
1257 | deblock_size(block, prev_block, rec_plane, pli, true, block_edge); |
1258 | if filter_size > 0 { |
1259 | let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling |
1260 | let rec_region = rec_plane.subregion(Area::Rect { |
1261 | x: po.x, |
1262 | y: po.y - (filter_size >> 1) as isize, |
1263 | width: 4, |
1264 | height: filter_size, |
1265 | }); |
1266 | let src_region = src_plane.subregion(Area::Rect { |
1267 | x: po.x, |
1268 | y: po.y - (filter_size >> 1) as isize, |
1269 | width: 4, |
1270 | height: filter_size, |
1271 | }); |
1272 | |
1273 | match filter_size { |
1274 | 4 => { |
1275 | sse_size4(&rec_region, &src_region, tally, true, bd); |
1276 | } |
1277 | 6 => { |
1278 | sse_size6(&rec_region, &src_region, tally, true, bd); |
1279 | } |
1280 | 8 => { |
1281 | sse_size8(&rec_region, &src_region, tally, true, bd); |
1282 | } |
1283 | 14 => { |
1284 | sse_size14(&rec_region, &src_region, tally, true, bd); |
1285 | } |
1286 | _ => unreachable!(), |
1287 | } |
1288 | } |
1289 | } |
1290 | } |
1291 | |
1292 | // Deblocks all edges, vertical and horizontal, in a single plane |
1293 | #[profiling::function ] |
1294 | pub fn deblock_plane<T: Pixel>( |
1295 | deblock: &DeblockState, p: &mut PlaneRegionMut<T>, pli: usize, |
1296 | blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize, |
1297 | ) { |
1298 | let xdec = p.plane_cfg.xdec; |
1299 | let ydec = p.plane_cfg.ydec; |
1300 | assert!(xdec <= 1 && ydec <= 1); |
1301 | |
1302 | match pli { |
1303 | 0 => { |
1304 | if deblock.levels[0] == 0 && deblock.levels[1] == 0 { |
1305 | return; |
1306 | } |
1307 | } |
1308 | 1 => { |
1309 | if deblock.levels[2] == 0 { |
1310 | return; |
1311 | } |
1312 | } |
1313 | 2 => { |
1314 | if deblock.levels[3] == 0 { |
1315 | return; |
1316 | } |
1317 | } |
1318 | _ => return, |
1319 | } |
1320 | |
1321 | let rect = p.rect(); |
1322 | let cols = (cmp::min( |
1323 | blocks.cols(), |
1324 | ((crop_w - rect.x as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1325 | ) + (1 << xdec >> 1)) |
1326 | >> xdec |
1327 | << xdec; // Clippy can go suck an egg |
1328 | let rows = (cmp::min( |
1329 | blocks.rows(), |
1330 | ((crop_h - rect.y as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1331 | ) + (1 << ydec >> 1)) |
1332 | >> ydec |
1333 | << ydec; // Clippy can go suck an egg |
1334 | |
1335 | // vertical edge filtering leads horizontal by one full MI-sized |
1336 | // row (and horizontal filtering doesn't happen along the upper |
1337 | // edge). Unroll to avoid corner-cases. |
1338 | if rows > 0 { |
1339 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1340 | filter_v_edge( |
1341 | deblock, |
1342 | blocks, |
1343 | TileBlockOffset(BlockOffset { x, y: 0 }), |
1344 | p, |
1345 | pli, |
1346 | bd, |
1347 | xdec, |
1348 | ydec, |
1349 | ); |
1350 | } |
1351 | if rows > 1 << ydec { |
1352 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1353 | filter_v_edge( |
1354 | deblock, |
1355 | blocks, |
1356 | TileBlockOffset(BlockOffset { x, y: 1 << ydec }), |
1357 | p, |
1358 | pli, |
1359 | bd, |
1360 | xdec, |
1361 | ydec, |
1362 | ); |
1363 | } |
1364 | } |
1365 | } |
1366 | |
1367 | // filter rows where vertical and horizontal edge filtering both |
1368 | // happen (horizontal edge filtering lags vertical by one row). |
1369 | for y in ((2 << ydec)..rows).step_by(1 << ydec) { |
1370 | // Check for vertical edge at first MI block boundary on this row |
1371 | if cols > 1 << xdec { |
1372 | filter_v_edge( |
1373 | deblock, |
1374 | blocks, |
1375 | TileBlockOffset(BlockOffset { x: 1 << xdec, y }), |
1376 | p, |
1377 | pli, |
1378 | bd, |
1379 | xdec, |
1380 | ydec, |
1381 | ); |
1382 | } |
1383 | // run the rest of the row with both vertical and horizontal edge filtering. |
1384 | // Horizontal lags vertical edge by one row and two columns. |
1385 | for x in (2 << xdec..cols).step_by(1 << xdec) { |
1386 | filter_v_edge( |
1387 | deblock, |
1388 | blocks, |
1389 | TileBlockOffset(BlockOffset { x, y }), |
1390 | p, |
1391 | pli, |
1392 | bd, |
1393 | xdec, |
1394 | ydec, |
1395 | ); |
1396 | filter_h_edge( |
1397 | deblock, |
1398 | blocks, |
1399 | TileBlockOffset(BlockOffset { |
1400 | x: x - (2 << xdec), |
1401 | y: y - (1 << ydec), |
1402 | }), |
1403 | p, |
1404 | pli, |
1405 | bd, |
1406 | xdec, |
1407 | ydec, |
1408 | ); |
1409 | } |
1410 | // ..and the last two horizontal edges for the row |
1411 | if cols >= 2 << xdec { |
1412 | filter_h_edge( |
1413 | deblock, |
1414 | blocks, |
1415 | TileBlockOffset(BlockOffset { |
1416 | x: cols - (2 << xdec), |
1417 | y: y - (1 << ydec), |
1418 | }), |
1419 | p, |
1420 | pli, |
1421 | bd, |
1422 | xdec, |
1423 | ydec, |
1424 | ); |
1425 | } |
1426 | if cols >= 1 << xdec { |
1427 | filter_h_edge( |
1428 | deblock, |
1429 | blocks, |
1430 | TileBlockOffset(BlockOffset { |
1431 | x: cols - (1 << xdec), |
1432 | y: y - (1 << ydec), |
1433 | }), |
1434 | p, |
1435 | pli, |
1436 | bd, |
1437 | xdec, |
1438 | ydec, |
1439 | ); |
1440 | } |
1441 | } |
1442 | |
1443 | // Last horizontal row, vertical is already complete |
1444 | if rows > 1 << ydec { |
1445 | for x in (0..cols).step_by(1 << xdec) { |
1446 | filter_h_edge( |
1447 | deblock, |
1448 | blocks, |
1449 | TileBlockOffset(BlockOffset { x, y: rows - (1 << ydec) }), |
1450 | p, |
1451 | pli, |
1452 | bd, |
1453 | xdec, |
1454 | ydec, |
1455 | ); |
1456 | } |
1457 | } |
1458 | } |
1459 | |
1460 | // sse count of all edges in a single plane, accumulates into vertical and horizontal counts |
1461 | fn sse_plane<T: Pixel>( |
1462 | rec: &PlaneRegion<T>, src: &PlaneRegion<T>, |
1463 | v_sse: &mut [i64; MAX_LOOP_FILTER + 2], |
1464 | h_sse: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, blocks: &TileBlocks, |
1465 | crop_w: usize, crop_h: usize, bd: usize, |
1466 | ) { |
1467 | let xdec = rec.plane_cfg.xdec; |
1468 | let ydec = rec.plane_cfg.ydec; |
1469 | assert!(xdec <= 1 && ydec <= 1); |
1470 | let rect = rec.rect(); |
1471 | let cols = (cmp::min( |
1472 | blocks.cols(), |
1473 | (crop_w - rect.x as usize + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1474 | ) + (1 << xdec >> 1)) |
1475 | >> xdec |
1476 | << xdec; // Clippy can go suck an egg |
1477 | let rows = (cmp::min( |
1478 | blocks.rows(), |
1479 | (crop_h - rect.y as usize + MI_SIZE - 1) >> MI_SIZE_LOG2, |
1480 | ) + (1 << ydec >> 1)) |
1481 | >> ydec |
1482 | << ydec; // Clippy can go suck an egg |
1483 | |
1484 | // No horizontal edge filtering along top of frame |
1485 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1486 | sse_v_edge( |
1487 | blocks, |
1488 | TileBlockOffset(BlockOffset { x, y: 0 }), |
1489 | rec, |
1490 | src, |
1491 | v_sse, |
1492 | pli, |
1493 | bd, |
1494 | xdec, |
1495 | ydec, |
1496 | ); |
1497 | } |
1498 | |
1499 | // Unlike actual filtering, we're counting horizontal and vertical |
1500 | // as separable cases. No need to lag the horizontal processing |
1501 | // behind vertical. |
1502 | for y in (1 << ydec..rows).step_by(1 << ydec) { |
1503 | // No vertical filtering along left edge of frame |
1504 | sse_h_edge( |
1505 | blocks, |
1506 | TileBlockOffset(BlockOffset { x: 0, y }), |
1507 | rec, |
1508 | src, |
1509 | h_sse, |
1510 | pli, |
1511 | bd, |
1512 | xdec, |
1513 | ydec, |
1514 | ); |
1515 | for x in (1 << xdec..cols).step_by(1 << xdec) { |
1516 | sse_v_edge( |
1517 | blocks, |
1518 | TileBlockOffset(BlockOffset { x, y }), |
1519 | rec, |
1520 | src, |
1521 | v_sse, |
1522 | pli, |
1523 | bd, |
1524 | xdec, |
1525 | ydec, |
1526 | ); |
1527 | sse_h_edge( |
1528 | blocks, |
1529 | TileBlockOffset(BlockOffset { x, y }), |
1530 | rec, |
1531 | src, |
1532 | h_sse, |
1533 | pli, |
1534 | bd, |
1535 | xdec, |
1536 | ydec, |
1537 | ); |
1538 | } |
1539 | } |
1540 | } |
1541 | |
1542 | // Deblocks all edges in all planes of a frame |
1543 | #[profiling::function ] |
1544 | pub fn deblock_filter_frame<T: Pixel>( |
1545 | deblock: &DeblockState, tile: &mut TileMut<T>, blocks: &TileBlocks, |
1546 | crop_w: usize, crop_h: usize, bd: usize, planes: usize, |
1547 | ) { |
1548 | tile.planes[..planes].par_iter_mut().enumerate().for_each(|(pli: usize, plane: &mut PlaneRegionMut<'_, T>)| { |
1549 | deblock_plane(deblock, p:plane, pli, blocks, crop_w, crop_h, bd); |
1550 | }); |
1551 | } |
1552 | |
1553 | fn sse_optimize<T: Pixel>( |
1554 | rec: &Tile<T>, input: &Tile<T>, blocks: &TileBlocks, crop_w: usize, |
1555 | crop_h: usize, bd: usize, monochrome: bool, |
1556 | ) -> [u8; 4] { |
1557 | // i64 allows us to accumulate a total of ~ 35 bits worth of pixels |
1558 | assert!( |
1559 | ILog::ilog(input.planes[0].plane_cfg.width) |
1560 | + ILog::ilog(input.planes[0].plane_cfg.height) |
1561 | < 35 |
1562 | ); |
1563 | let mut level = [0; 4]; |
1564 | let planes = if monochrome { 1 } else { MAX_PLANES }; |
1565 | |
1566 | for pli in 0..planes { |
1567 | let mut v_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2]; |
1568 | let mut h_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2]; |
1569 | |
1570 | sse_plane( |
1571 | &rec.planes[pli], |
1572 | &input.planes[pli], |
1573 | &mut v_tally, |
1574 | &mut h_tally, |
1575 | pli, |
1576 | blocks, |
1577 | crop_w, |
1578 | crop_h, |
1579 | bd, |
1580 | ); |
1581 | |
1582 | for i in 1..=MAX_LOOP_FILTER { |
1583 | v_tally[i] += v_tally[i - 1]; |
1584 | h_tally[i] += h_tally[i - 1]; |
1585 | } |
1586 | |
1587 | match pli { |
1588 | 0 => { |
1589 | let mut best_v = 999; |
1590 | let mut best_h = 999; |
1591 | for i in 0..=MAX_LOOP_FILTER { |
1592 | if best_v == 999 || v_tally[best_v] > v_tally[i] { |
1593 | best_v = i; |
1594 | }; |
1595 | if best_h == 999 || h_tally[best_h] > h_tally[i] { |
1596 | best_h = i; |
1597 | }; |
1598 | } |
1599 | level[0] = best_v as u8; |
1600 | level[1] = best_h as u8; |
1601 | } |
1602 | 1 | 2 => { |
1603 | let mut best = 999; |
1604 | for i in 0..=MAX_LOOP_FILTER { |
1605 | if best == 999 |
1606 | || v_tally[best] + h_tally[best] > v_tally[i] + h_tally[i] |
1607 | { |
1608 | best = i; |
1609 | }; |
1610 | } |
1611 | level[pli + 1] = best as u8; |
1612 | } |
1613 | _ => unreachable!(), |
1614 | } |
1615 | } |
1616 | level |
1617 | } |
1618 | |
1619 | #[profiling::function ] |
1620 | pub fn deblock_filter_optimize<T: Pixel, U: Pixel>( |
1621 | fi: &FrameInvariants<T>, rec: &Tile<U>, input: &Tile<U>, |
1622 | blocks: &TileBlocks, crop_w: usize, crop_h: usize, |
1623 | ) -> [u8; 4] { |
1624 | if fi.config.speed_settings.fast_deblock { |
1625 | let q = ac_q(fi.base_q_idx, 0, fi.sequence.bit_depth).get() as i32; |
1626 | let level = clamp( |
1627 | match fi.sequence.bit_depth { |
1628 | 8 => { |
1629 | if fi.frame_type == FrameType::KEY { |
1630 | (q * 17563 - 421_574 + (1 << 18 >> 1)) >> 18 |
1631 | } else { |
1632 | (q * 6017 + 650_707 + (1 << 18 >> 1)) >> 18 |
1633 | } |
1634 | } |
1635 | 10 => { |
1636 | if fi.frame_type == FrameType::KEY { |
1637 | ((q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20) - 4 |
1638 | } else { |
1639 | (q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20 |
1640 | } |
1641 | } |
1642 | 12 => { |
1643 | if fi.frame_type == FrameType::KEY { |
1644 | ((q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22) - 4 |
1645 | } else { |
1646 | (q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22 |
1647 | } |
1648 | } |
1649 | _ => unreachable!(), |
1650 | }, |
1651 | 0, |
1652 | MAX_LOOP_FILTER as i32, |
1653 | ) as u8; |
1654 | [level; 4] |
1655 | } else { |
1656 | // Deblocking happens in 4x4 (luma) units; luma x,y are clipped to |
1657 | // the *crop frame* of the entire frame by 4x4 block. |
1658 | sse_optimize( |
1659 | rec, |
1660 | input, |
1661 | blocks, |
1662 | crop_w, |
1663 | crop_h, |
1664 | fi.sequence.bit_depth, |
1665 | fi.sequence.chroma_sampling == Cs400, |
1666 | ) |
1667 | } |
1668 | } |
1669 | |