1 | // Copyright (c) 2018-2022, The rav1e contributors. All rights reserved |
2 | // |
3 | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | // was not distributed with this source code in the LICENSE file, you can |
6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | // Media Patent License 1.0 was not distributed with this source code in the |
8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | |
10 | cfg_if::cfg_if! { |
11 | if #[cfg(nasm_x86_64)] { |
12 | pub use crate::asm::x86::transform::inverse::*; |
13 | } else if #[cfg(asm_neon)] { |
14 | pub use crate::asm::aarch64::transform::inverse::*; |
15 | } else { |
16 | pub use self::rust::*; |
17 | } |
18 | } |
19 | |
20 | use crate::tiling::PlaneRegionMut; |
21 | use crate::util::*; |
22 | |
23 | // TODO: move 1d txfm code to rust module. |
24 | |
25 | use super::clamp_value; |
26 | use super::consts::*; |
27 | use super::get_1d_tx_types; |
28 | use super::get_rect_tx_log_ratio; |
29 | use super::half_btf; |
30 | use super::TxSize; |
31 | use super::TxType; |
32 | |
33 | /// # Panics |
34 | /// |
35 | /// - If `input` or `output` have fewer than 4 items. |
36 | pub fn av1_iwht4(input: &[i32], output: &mut [i32], _range: usize) { |
37 | assert!(input.len() >= 4); |
38 | assert!(output.len() >= 4); |
39 | |
40 | // <https://aomediacodec.github.io/av1-spec/#inverse-walsh-hadamard-transform-process> |
41 | let x0: i32 = input[0]; |
42 | let x1: i32 = input[1]; |
43 | let x2: i32 = input[2]; |
44 | let x3: i32 = input[3]; |
45 | let s0: i32 = x0 + x1; |
46 | let s2: i32 = x2 - x3; |
47 | let s4: i32 = (s0 - s2) >> 1; |
48 | let s3: i32 = s4 - x3; |
49 | let s1: i32 = s4 - x1; |
50 | output[0] = s0 - s3; |
51 | output[1] = s3; |
52 | output[2] = s1; |
53 | output[3] = s2 + s1; |
54 | } |
55 | |
56 | static COSPI_INV: [i32; 64] = [ |
57 | 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948, |
58 | 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, |
59 | 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, 2824, 2751, 2675, |
60 | 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, |
61 | 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, 799, 700, 601, 501, 401, 301, |
62 | 201, 101, |
63 | ]; |
64 | |
65 | static SINPI_INV: [i32; 5] = [0, 1321, 2482, 3344, 3803]; |
66 | |
67 | const INV_COS_BIT: usize = 12; |
68 | |
69 | /// # Panics |
70 | /// |
71 | /// - If `input` or `output` have fewer than 4 items. |
72 | pub fn av1_idct4(input: &[i32], output: &mut [i32], range: usize) { |
73 | assert!(input.len() >= 4); |
74 | assert!(output.len() >= 4); |
75 | |
76 | // stage 1 |
77 | let stg1: [i32; 4] = [input[0], input[2], input[1], input[3]]; |
78 | |
79 | // stage 2 |
80 | let stg2: [i32; 4] = [ |
81 | half_btf(w0:COSPI_INV[32], in0:stg1[0], w1:COSPI_INV[32], in1:stg1[1], INV_COS_BIT), |
82 | half_btf(w0:COSPI_INV[32], in0:stg1[0], -COSPI_INV[32], in1:stg1[1], INV_COS_BIT), |
83 | half_btf(w0:COSPI_INV[48], in0:stg1[2], -COSPI_INV[16], in1:stg1[3], INV_COS_BIT), |
84 | half_btf(w0:COSPI_INV[16], in0:stg1[2], w1:COSPI_INV[48], in1:stg1[3], INV_COS_BIT), |
85 | ]; |
86 | |
87 | // stage 3 |
88 | output[0] = clamp_value(value:stg2[0] + stg2[3], bit:range); |
89 | output[1] = clamp_value(value:stg2[1] + stg2[2], bit:range); |
90 | output[2] = clamp_value(value:stg2[1] - stg2[2], bit:range); |
91 | output[3] = clamp_value(value:stg2[0] - stg2[3], bit:range); |
92 | } |
93 | |
94 | pub fn av1_iflipadst4(input: &[i32], output: &mut [i32], range: usize) { |
95 | av1_iadst4(input, output, range); |
96 | output[..4].reverse(); |
97 | } |
98 | |
99 | /// # Panics |
100 | /// |
101 | /// - If `input` or `output` have fewer than 4 items. |
102 | #[inline (always)] |
103 | pub fn av1_iadst4(input: &[i32], output: &mut [i32], _range: usize) { |
104 | assert!(input.len() >= 4); |
105 | assert!(output.len() >= 4); |
106 | |
107 | let bit = 12; |
108 | |
109 | let x0 = input[0]; |
110 | let x1 = input[1]; |
111 | let x2 = input[2]; |
112 | let x3 = input[3]; |
113 | |
114 | // stage 1 |
115 | let s0 = SINPI_INV[1] * x0; |
116 | let s1 = SINPI_INV[2] * x0; |
117 | let s2 = SINPI_INV[3] * x1; |
118 | let s3 = SINPI_INV[4] * x2; |
119 | let s4 = SINPI_INV[1] * x2; |
120 | let s5 = SINPI_INV[2] * x3; |
121 | let s6 = SINPI_INV[4] * x3; |
122 | |
123 | // stage 2 |
124 | let s7 = (x0 - x2) + x3; |
125 | |
126 | // stage 3 |
127 | let s0 = s0 + s3; |
128 | let s1 = s1 - s4; |
129 | let s3 = s2; |
130 | let s2 = SINPI_INV[3] * s7; |
131 | |
132 | // stage 4 |
133 | let s0 = s0 + s5; |
134 | let s1 = s1 - s6; |
135 | |
136 | // stage 5 |
137 | let x0 = s0 + s3; |
138 | let x1 = s1 + s3; |
139 | let x2 = s2; |
140 | let x3 = s0 + s1; |
141 | |
142 | // stage 6 |
143 | let x3 = x3 - s3; |
144 | |
145 | output[0] = round_shift(x0, bit); |
146 | output[1] = round_shift(x1, bit); |
147 | output[2] = round_shift(x2, bit); |
148 | output[3] = round_shift(x3, bit); |
149 | } |
150 | |
151 | pub fn av1_iidentity4(input: &[i32], output: &mut [i32], _range: usize) { |
152 | outputimpl Iterator [..4] |
153 | .iter_mut() |
154 | .zip(input[..4].iter()) |
155 | .for_each(|(outp: &mut i32, inp: &i32)| *outp = round_shift(value:SQRT2 * *inp, bit:12)); |
156 | } |
157 | |
158 | /// # Panics |
159 | /// |
160 | /// - If `input` or `output` have fewer than 8 items. |
161 | pub fn av1_idct8(input: &[i32], output: &mut [i32], range: usize) { |
162 | assert!(input.len() >= 8); |
163 | assert!(output.len() >= 8); |
164 | |
165 | // call idct4 |
166 | let temp_in = [input[0], input[2], input[4], input[6]]; |
167 | let mut temp_out: [i32; 4] = [0; 4]; |
168 | av1_idct4(&temp_in, &mut temp_out, range); |
169 | |
170 | // stage 0 |
171 | |
172 | // stage 1 |
173 | let stg1 = [input[1], input[5], input[3], input[7]]; |
174 | |
175 | // stage 2 |
176 | let stg2 = [ |
177 | half_btf(COSPI_INV[56], stg1[0], -COSPI_INV[8], stg1[3], INV_COS_BIT), |
178 | half_btf(COSPI_INV[24], stg1[1], -COSPI_INV[40], stg1[2], INV_COS_BIT), |
179 | half_btf(COSPI_INV[40], stg1[1], COSPI_INV[24], stg1[2], INV_COS_BIT), |
180 | half_btf(COSPI_INV[8], stg1[0], COSPI_INV[56], stg1[3], INV_COS_BIT), |
181 | ]; |
182 | |
183 | // stage 3 |
184 | let stg3 = [ |
185 | clamp_value(stg2[0] + stg2[1], range), |
186 | clamp_value(stg2[0] - stg2[1], range), |
187 | clamp_value(-stg2[2] + stg2[3], range), |
188 | clamp_value(stg2[2] + stg2[3], range), |
189 | ]; |
190 | |
191 | // stage 4 |
192 | let stg4 = [ |
193 | stg3[0], |
194 | half_btf(-COSPI_INV[32], stg3[1], COSPI_INV[32], stg3[2], INV_COS_BIT), |
195 | half_btf(COSPI_INV[32], stg3[1], COSPI_INV[32], stg3[2], INV_COS_BIT), |
196 | stg3[3], |
197 | ]; |
198 | |
199 | // stage 5 |
200 | output[0] = clamp_value(temp_out[0] + stg4[3], range); |
201 | output[1] = clamp_value(temp_out[1] + stg4[2], range); |
202 | output[2] = clamp_value(temp_out[2] + stg4[1], range); |
203 | output[3] = clamp_value(temp_out[3] + stg4[0], range); |
204 | output[4] = clamp_value(temp_out[3] - stg4[0], range); |
205 | output[5] = clamp_value(temp_out[2] - stg4[1], range); |
206 | output[6] = clamp_value(temp_out[1] - stg4[2], range); |
207 | output[7] = clamp_value(temp_out[0] - stg4[3], range); |
208 | } |
209 | |
210 | pub fn av1_iflipadst8(input: &[i32], output: &mut [i32], range: usize) { |
211 | av1_iadst8(input, output, range); |
212 | output[..8].reverse(); |
213 | } |
214 | |
215 | /// # Panics |
216 | /// |
217 | /// - If `input` or `output` have fewer than 8 items. |
218 | #[inline (always)] |
219 | pub fn av1_iadst8(input: &[i32], output: &mut [i32], range: usize) { |
220 | assert!(input.len() >= 8); |
221 | assert!(output.len() >= 8); |
222 | |
223 | // stage 1 |
224 | let stg1 = [ |
225 | input[7], input[0], input[5], input[2], input[3], input[4], input[1], |
226 | input[6], |
227 | ]; |
228 | |
229 | // stage 2 |
230 | let stg2 = [ |
231 | half_btf(COSPI_INV[4], stg1[0], COSPI_INV[60], stg1[1], INV_COS_BIT), |
232 | half_btf(COSPI_INV[60], stg1[0], -COSPI_INV[4], stg1[1], INV_COS_BIT), |
233 | half_btf(COSPI_INV[20], stg1[2], COSPI_INV[44], stg1[3], INV_COS_BIT), |
234 | half_btf(COSPI_INV[44], stg1[2], -COSPI_INV[20], stg1[3], INV_COS_BIT), |
235 | half_btf(COSPI_INV[36], stg1[4], COSPI_INV[28], stg1[5], INV_COS_BIT), |
236 | half_btf(COSPI_INV[28], stg1[4], -COSPI_INV[36], stg1[5], INV_COS_BIT), |
237 | half_btf(COSPI_INV[52], stg1[6], COSPI_INV[12], stg1[7], INV_COS_BIT), |
238 | half_btf(COSPI_INV[12], stg1[6], -COSPI_INV[52], stg1[7], INV_COS_BIT), |
239 | ]; |
240 | |
241 | // stage 3 |
242 | let stg3 = [ |
243 | clamp_value(stg2[0] + stg2[4], range), |
244 | clamp_value(stg2[1] + stg2[5], range), |
245 | clamp_value(stg2[2] + stg2[6], range), |
246 | clamp_value(stg2[3] + stg2[7], range), |
247 | clamp_value(stg2[0] - stg2[4], range), |
248 | clamp_value(stg2[1] - stg2[5], range), |
249 | clamp_value(stg2[2] - stg2[6], range), |
250 | clamp_value(stg2[3] - stg2[7], range), |
251 | ]; |
252 | |
253 | // stage 4 |
254 | let stg4 = [ |
255 | stg3[0], |
256 | stg3[1], |
257 | stg3[2], |
258 | stg3[3], |
259 | half_btf(COSPI_INV[16], stg3[4], COSPI_INV[48], stg3[5], INV_COS_BIT), |
260 | half_btf(COSPI_INV[48], stg3[4], -COSPI_INV[16], stg3[5], INV_COS_BIT), |
261 | half_btf(-COSPI_INV[48], stg3[6], COSPI_INV[16], stg3[7], INV_COS_BIT), |
262 | half_btf(COSPI_INV[16], stg3[6], COSPI_INV[48], stg3[7], INV_COS_BIT), |
263 | ]; |
264 | |
265 | // stage 5 |
266 | let stg5 = [ |
267 | clamp_value(stg4[0] + stg4[2], range), |
268 | clamp_value(stg4[1] + stg4[3], range), |
269 | clamp_value(stg4[0] - stg4[2], range), |
270 | clamp_value(stg4[1] - stg4[3], range), |
271 | clamp_value(stg4[4] + stg4[6], range), |
272 | clamp_value(stg4[5] + stg4[7], range), |
273 | clamp_value(stg4[4] - stg4[6], range), |
274 | clamp_value(stg4[5] - stg4[7], range), |
275 | ]; |
276 | |
277 | // stage 6 |
278 | let stg6 = [ |
279 | stg5[0], |
280 | stg5[1], |
281 | half_btf(COSPI_INV[32], stg5[2], COSPI_INV[32], stg5[3], INV_COS_BIT), |
282 | half_btf(COSPI_INV[32], stg5[2], -COSPI_INV[32], stg5[3], INV_COS_BIT), |
283 | stg5[4], |
284 | stg5[5], |
285 | half_btf(COSPI_INV[32], stg5[6], COSPI_INV[32], stg5[7], INV_COS_BIT), |
286 | half_btf(COSPI_INV[32], stg5[6], -COSPI_INV[32], stg5[7], INV_COS_BIT), |
287 | ]; |
288 | |
289 | // stage 7 |
290 | output[0] = stg6[0]; |
291 | output[1] = -stg6[4]; |
292 | output[2] = stg6[6]; |
293 | output[3] = -stg6[2]; |
294 | output[4] = stg6[3]; |
295 | output[5] = -stg6[7]; |
296 | output[6] = stg6[5]; |
297 | output[7] = -stg6[1]; |
298 | } |
299 | |
300 | pub fn av1_iidentity8(input: &[i32], output: &mut [i32], _range: usize) { |
301 | outputimpl Iterator [..8] |
302 | .iter_mut() |
303 | .zip(input[..8].iter()) |
304 | .for_each(|(outp: &mut i32, inp: &i32)| *outp = 2 * *inp); |
305 | } |
306 | |
307 | fn av1_idct16(input: &[i32], output: &mut [i32], range: usize) { |
308 | assert!(input.len() >= 16); |
309 | assert!(output.len() >= 16); |
310 | |
311 | // call idct8 |
312 | let temp_in = [ |
313 | input[0], input[2], input[4], input[6], input[8], input[10], input[12], |
314 | input[14], |
315 | ]; |
316 | let mut temp_out: [i32; 8] = [0; 8]; |
317 | av1_idct8(&temp_in, &mut temp_out, range); |
318 | |
319 | // stage 1 |
320 | let stg1 = [ |
321 | input[1], input[9], input[5], input[13], input[3], input[11], input[7], |
322 | input[15], |
323 | ]; |
324 | |
325 | // stage 2 |
326 | let stg2 = [ |
327 | half_btf(COSPI_INV[60], stg1[0], -COSPI_INV[4], stg1[7], INV_COS_BIT), |
328 | half_btf(COSPI_INV[28], stg1[1], -COSPI_INV[36], stg1[6], INV_COS_BIT), |
329 | half_btf(COSPI_INV[44], stg1[2], -COSPI_INV[20], stg1[5], INV_COS_BIT), |
330 | half_btf(COSPI_INV[12], stg1[3], -COSPI_INV[52], stg1[4], INV_COS_BIT), |
331 | half_btf(COSPI_INV[52], stg1[3], COSPI_INV[12], stg1[4], INV_COS_BIT), |
332 | half_btf(COSPI_INV[20], stg1[2], COSPI_INV[44], stg1[5], INV_COS_BIT), |
333 | half_btf(COSPI_INV[36], stg1[1], COSPI_INV[28], stg1[6], INV_COS_BIT), |
334 | half_btf(COSPI_INV[4], stg1[0], COSPI_INV[60], stg1[7], INV_COS_BIT), |
335 | ]; |
336 | |
337 | // stage 3 |
338 | let stg3 = [ |
339 | clamp_value(stg2[0] + stg2[1], range), |
340 | clamp_value(stg2[0] - stg2[1], range), |
341 | clamp_value(-stg2[2] + stg2[3], range), |
342 | clamp_value(stg2[2] + stg2[3], range), |
343 | clamp_value(stg2[4] + stg2[5], range), |
344 | clamp_value(stg2[4] - stg2[5], range), |
345 | clamp_value(-stg2[6] + stg2[7], range), |
346 | clamp_value(stg2[6] + stg2[7], range), |
347 | ]; |
348 | |
349 | // stage 4 |
350 | let stg4 = [ |
351 | stg3[0], |
352 | half_btf(-COSPI_INV[16], stg3[1], COSPI_INV[48], stg3[6], INV_COS_BIT), |
353 | half_btf(-COSPI_INV[48], stg3[2], -COSPI_INV[16], stg3[5], INV_COS_BIT), |
354 | stg3[3], |
355 | stg3[4], |
356 | half_btf(-COSPI_INV[16], stg3[2], COSPI_INV[48], stg3[5], INV_COS_BIT), |
357 | half_btf(COSPI_INV[48], stg3[1], COSPI_INV[16], stg3[6], INV_COS_BIT), |
358 | stg3[7], |
359 | ]; |
360 | |
361 | // stage 5 |
362 | let stg5 = [ |
363 | clamp_value(stg4[0] + stg4[3], range), |
364 | clamp_value(stg4[1] + stg4[2], range), |
365 | clamp_value(stg4[1] - stg4[2], range), |
366 | clamp_value(stg4[0] - stg4[3], range), |
367 | clamp_value(-stg4[4] + stg4[7], range), |
368 | clamp_value(-stg4[5] + stg4[6], range), |
369 | clamp_value(stg4[5] + stg4[6], range), |
370 | clamp_value(stg4[4] + stg4[7], range), |
371 | ]; |
372 | |
373 | // stage 6 |
374 | let stg6 = [ |
375 | stg5[0], |
376 | stg5[1], |
377 | half_btf(-COSPI_INV[32], stg5[2], COSPI_INV[32], stg5[5], INV_COS_BIT), |
378 | half_btf(-COSPI_INV[32], stg5[3], COSPI_INV[32], stg5[4], INV_COS_BIT), |
379 | half_btf(COSPI_INV[32], stg5[3], COSPI_INV[32], stg5[4], INV_COS_BIT), |
380 | half_btf(COSPI_INV[32], stg5[2], COSPI_INV[32], stg5[5], INV_COS_BIT), |
381 | stg5[6], |
382 | stg5[7], |
383 | ]; |
384 | |
385 | // stage 7 |
386 | output[0] = clamp_value(temp_out[0] + stg6[7], range); |
387 | output[1] = clamp_value(temp_out[1] + stg6[6], range); |
388 | output[2] = clamp_value(temp_out[2] + stg6[5], range); |
389 | output[3] = clamp_value(temp_out[3] + stg6[4], range); |
390 | output[4] = clamp_value(temp_out[4] + stg6[3], range); |
391 | output[5] = clamp_value(temp_out[5] + stg6[2], range); |
392 | output[6] = clamp_value(temp_out[6] + stg6[1], range); |
393 | output[7] = clamp_value(temp_out[7] + stg6[0], range); |
394 | output[8] = clamp_value(temp_out[7] - stg6[0], range); |
395 | output[9] = clamp_value(temp_out[6] - stg6[1], range); |
396 | output[10] = clamp_value(temp_out[5] - stg6[2], range); |
397 | output[11] = clamp_value(temp_out[4] - stg6[3], range); |
398 | output[12] = clamp_value(temp_out[3] - stg6[4], range); |
399 | output[13] = clamp_value(temp_out[2] - stg6[5], range); |
400 | output[14] = clamp_value(temp_out[1] - stg6[6], range); |
401 | output[15] = clamp_value(temp_out[0] - stg6[7], range); |
402 | } |
403 | |
404 | pub fn av1_iflipadst16(input: &[i32], output: &mut [i32], range: usize) { |
405 | av1_iadst16(input, output, range); |
406 | output[..16].reverse(); |
407 | } |
408 | |
409 | #[inline (always)] |
410 | fn av1_iadst16(input: &[i32], output: &mut [i32], range: usize) { |
411 | assert!(input.len() >= 16); |
412 | assert!(output.len() >= 16); |
413 | |
414 | // stage 1 |
415 | let stg1 = [ |
416 | input[15], input[0], input[13], input[2], input[11], input[4], input[9], |
417 | input[6], input[7], input[8], input[5], input[10], input[3], input[12], |
418 | input[1], input[14], |
419 | ]; |
420 | |
421 | // stage 2 |
422 | let stg2 = [ |
423 | half_btf(COSPI_INV[2], stg1[0], COSPI_INV[62], stg1[1], INV_COS_BIT), |
424 | half_btf(COSPI_INV[62], stg1[0], -COSPI_INV[2], stg1[1], INV_COS_BIT), |
425 | half_btf(COSPI_INV[10], stg1[2], COSPI_INV[54], stg1[3], INV_COS_BIT), |
426 | half_btf(COSPI_INV[54], stg1[2], -COSPI_INV[10], stg1[3], INV_COS_BIT), |
427 | half_btf(COSPI_INV[18], stg1[4], COSPI_INV[46], stg1[5], INV_COS_BIT), |
428 | half_btf(COSPI_INV[46], stg1[4], -COSPI_INV[18], stg1[5], INV_COS_BIT), |
429 | half_btf(COSPI_INV[26], stg1[6], COSPI_INV[38], stg1[7], INV_COS_BIT), |
430 | half_btf(COSPI_INV[38], stg1[6], -COSPI_INV[26], stg1[7], INV_COS_BIT), |
431 | half_btf(COSPI_INV[34], stg1[8], COSPI_INV[30], stg1[9], INV_COS_BIT), |
432 | half_btf(COSPI_INV[30], stg1[8], -COSPI_INV[34], stg1[9], INV_COS_BIT), |
433 | half_btf(COSPI_INV[42], stg1[10], COSPI_INV[22], stg1[11], INV_COS_BIT), |
434 | half_btf(COSPI_INV[22], stg1[10], -COSPI_INV[42], stg1[11], INV_COS_BIT), |
435 | half_btf(COSPI_INV[50], stg1[12], COSPI_INV[14], stg1[13], INV_COS_BIT), |
436 | half_btf(COSPI_INV[14], stg1[12], -COSPI_INV[50], stg1[13], INV_COS_BIT), |
437 | half_btf(COSPI_INV[58], stg1[14], COSPI_INV[6], stg1[15], INV_COS_BIT), |
438 | half_btf(COSPI_INV[6], stg1[14], -COSPI_INV[58], stg1[15], INV_COS_BIT), |
439 | ]; |
440 | |
441 | // stage 3 |
442 | let stg3 = [ |
443 | clamp_value(stg2[0] + stg2[8], range), |
444 | clamp_value(stg2[1] + stg2[9], range), |
445 | clamp_value(stg2[2] + stg2[10], range), |
446 | clamp_value(stg2[3] + stg2[11], range), |
447 | clamp_value(stg2[4] + stg2[12], range), |
448 | clamp_value(stg2[5] + stg2[13], range), |
449 | clamp_value(stg2[6] + stg2[14], range), |
450 | clamp_value(stg2[7] + stg2[15], range), |
451 | clamp_value(stg2[0] - stg2[8], range), |
452 | clamp_value(stg2[1] - stg2[9], range), |
453 | clamp_value(stg2[2] - stg2[10], range), |
454 | clamp_value(stg2[3] - stg2[11], range), |
455 | clamp_value(stg2[4] - stg2[12], range), |
456 | clamp_value(stg2[5] - stg2[13], range), |
457 | clamp_value(stg2[6] - stg2[14], range), |
458 | clamp_value(stg2[7] - stg2[15], range), |
459 | ]; |
460 | |
461 | // stage 4 |
462 | let stg4 = [ |
463 | stg3[0], |
464 | stg3[1], |
465 | stg3[2], |
466 | stg3[3], |
467 | stg3[4], |
468 | stg3[5], |
469 | stg3[6], |
470 | stg3[7], |
471 | half_btf(COSPI_INV[8], stg3[8], COSPI_INV[56], stg3[9], INV_COS_BIT), |
472 | half_btf(COSPI_INV[56], stg3[8], -COSPI_INV[8], stg3[9], INV_COS_BIT), |
473 | half_btf(COSPI_INV[40], stg3[10], COSPI_INV[24], stg3[11], INV_COS_BIT), |
474 | half_btf(COSPI_INV[24], stg3[10], -COSPI_INV[40], stg3[11], INV_COS_BIT), |
475 | half_btf(-COSPI_INV[56], stg3[12], COSPI_INV[8], stg3[13], INV_COS_BIT), |
476 | half_btf(COSPI_INV[8], stg3[12], COSPI_INV[56], stg3[13], INV_COS_BIT), |
477 | half_btf(-COSPI_INV[24], stg3[14], COSPI_INV[40], stg3[15], INV_COS_BIT), |
478 | half_btf(COSPI_INV[40], stg3[14], COSPI_INV[24], stg3[15], INV_COS_BIT), |
479 | ]; |
480 | |
481 | // stage 5 |
482 | let stg5 = [ |
483 | clamp_value(stg4[0] + stg4[4], range), |
484 | clamp_value(stg4[1] + stg4[5], range), |
485 | clamp_value(stg4[2] + stg4[6], range), |
486 | clamp_value(stg4[3] + stg4[7], range), |
487 | clamp_value(stg4[0] - stg4[4], range), |
488 | clamp_value(stg4[1] - stg4[5], range), |
489 | clamp_value(stg4[2] - stg4[6], range), |
490 | clamp_value(stg4[3] - stg4[7], range), |
491 | clamp_value(stg4[8] + stg4[12], range), |
492 | clamp_value(stg4[9] + stg4[13], range), |
493 | clamp_value(stg4[10] + stg4[14], range), |
494 | clamp_value(stg4[11] + stg4[15], range), |
495 | clamp_value(stg4[8] - stg4[12], range), |
496 | clamp_value(stg4[9] - stg4[13], range), |
497 | clamp_value(stg4[10] - stg4[14], range), |
498 | clamp_value(stg4[11] - stg4[15], range), |
499 | ]; |
500 | |
501 | // stage 6 |
502 | let stg6 = [ |
503 | stg5[0], |
504 | stg5[1], |
505 | stg5[2], |
506 | stg5[3], |
507 | half_btf(COSPI_INV[16], stg5[4], COSPI_INV[48], stg5[5], INV_COS_BIT), |
508 | half_btf(COSPI_INV[48], stg5[4], -COSPI_INV[16], stg5[5], INV_COS_BIT), |
509 | half_btf(-COSPI_INV[48], stg5[6], COSPI_INV[16], stg5[7], INV_COS_BIT), |
510 | half_btf(COSPI_INV[16], stg5[6], COSPI_INV[48], stg5[7], INV_COS_BIT), |
511 | stg5[8], |
512 | stg5[9], |
513 | stg5[10], |
514 | stg5[11], |
515 | half_btf(COSPI_INV[16], stg5[12], COSPI_INV[48], stg5[13], INV_COS_BIT), |
516 | half_btf(COSPI_INV[48], stg5[12], -COSPI_INV[16], stg5[13], INV_COS_BIT), |
517 | half_btf(-COSPI_INV[48], stg5[14], COSPI_INV[16], stg5[15], INV_COS_BIT), |
518 | half_btf(COSPI_INV[16], stg5[14], COSPI_INV[48], stg5[15], INV_COS_BIT), |
519 | ]; |
520 | |
521 | // stage 7 |
522 | let stg7 = [ |
523 | clamp_value(stg6[0] + stg6[2], range), |
524 | clamp_value(stg6[1] + stg6[3], range), |
525 | clamp_value(stg6[0] - stg6[2], range), |
526 | clamp_value(stg6[1] - stg6[3], range), |
527 | clamp_value(stg6[4] + stg6[6], range), |
528 | clamp_value(stg6[5] + stg6[7], range), |
529 | clamp_value(stg6[4] - stg6[6], range), |
530 | clamp_value(stg6[5] - stg6[7], range), |
531 | clamp_value(stg6[8] + stg6[10], range), |
532 | clamp_value(stg6[9] + stg6[11], range), |
533 | clamp_value(stg6[8] - stg6[10], range), |
534 | clamp_value(stg6[9] - stg6[11], range), |
535 | clamp_value(stg6[12] + stg6[14], range), |
536 | clamp_value(stg6[13] + stg6[15], range), |
537 | clamp_value(stg6[12] - stg6[14], range), |
538 | clamp_value(stg6[13] - stg6[15], range), |
539 | ]; |
540 | |
541 | // stage 8 |
542 | let stg8 = [ |
543 | stg7[0], |
544 | stg7[1], |
545 | half_btf(COSPI_INV[32], stg7[2], COSPI_INV[32], stg7[3], INV_COS_BIT), |
546 | half_btf(COSPI_INV[32], stg7[2], -COSPI_INV[32], stg7[3], INV_COS_BIT), |
547 | stg7[4], |
548 | stg7[5], |
549 | half_btf(COSPI_INV[32], stg7[6], COSPI_INV[32], stg7[7], INV_COS_BIT), |
550 | half_btf(COSPI_INV[32], stg7[6], -COSPI_INV[32], stg7[7], INV_COS_BIT), |
551 | stg7[8], |
552 | stg7[9], |
553 | half_btf(COSPI_INV[32], stg7[10], COSPI_INV[32], stg7[11], INV_COS_BIT), |
554 | half_btf(COSPI_INV[32], stg7[10], -COSPI_INV[32], stg7[11], INV_COS_BIT), |
555 | stg7[12], |
556 | stg7[13], |
557 | half_btf(COSPI_INV[32], stg7[14], COSPI_INV[32], stg7[15], INV_COS_BIT), |
558 | half_btf(COSPI_INV[32], stg7[14], -COSPI_INV[32], stg7[15], INV_COS_BIT), |
559 | ]; |
560 | |
561 | // stage 9 |
562 | output[0] = stg8[0]; |
563 | output[1] = -stg8[8]; |
564 | output[2] = stg8[12]; |
565 | output[3] = -stg8[4]; |
566 | output[4] = stg8[6]; |
567 | output[5] = -stg8[14]; |
568 | output[6] = stg8[10]; |
569 | output[7] = -stg8[2]; |
570 | output[8] = stg8[3]; |
571 | output[9] = -stg8[11]; |
572 | output[10] = stg8[15]; |
573 | output[11] = -stg8[7]; |
574 | output[12] = stg8[5]; |
575 | output[13] = -stg8[13]; |
576 | output[14] = stg8[9]; |
577 | output[15] = -stg8[1]; |
578 | } |
579 | |
580 | fn av1_iidentity16(input: &[i32], output: &mut [i32], _range: usize) { |
581 | outputimpl Iterator [..16] |
582 | .iter_mut() |
583 | .zip(input[..16].iter()) |
584 | .for_each(|(outp: &mut i32, inp: &i32)| *outp = round_shift(value:SQRT2 * 2 * *inp, bit:12)); |
585 | } |
586 | |
587 | fn av1_idct32(input: &[i32], output: &mut [i32], range: usize) { |
588 | assert!(input.len() >= 32); |
589 | assert!(output.len() >= 32); |
590 | |
591 | // stage 1; |
592 | let stg1 = [ |
593 | input[0], input[16], input[8], input[24], input[4], input[20], input[12], |
594 | input[28], input[2], input[18], input[10], input[26], input[6], input[22], |
595 | input[14], input[30], input[1], input[17], input[9], input[25], input[5], |
596 | input[21], input[13], input[29], input[3], input[19], input[11], |
597 | input[27], input[7], input[23], input[15], input[31], |
598 | ]; |
599 | |
600 | // stage 2 |
601 | let stg2 = [ |
602 | stg1[0], |
603 | stg1[1], |
604 | stg1[2], |
605 | stg1[3], |
606 | stg1[4], |
607 | stg1[5], |
608 | stg1[6], |
609 | stg1[7], |
610 | stg1[8], |
611 | stg1[9], |
612 | stg1[10], |
613 | stg1[11], |
614 | stg1[12], |
615 | stg1[13], |
616 | stg1[14], |
617 | stg1[15], |
618 | half_btf(COSPI_INV[62], stg1[16], -COSPI_INV[2], stg1[31], INV_COS_BIT), |
619 | half_btf(COSPI_INV[30], stg1[17], -COSPI_INV[34], stg1[30], INV_COS_BIT), |
620 | half_btf(COSPI_INV[46], stg1[18], -COSPI_INV[18], stg1[29], INV_COS_BIT), |
621 | half_btf(COSPI_INV[14], stg1[19], -COSPI_INV[50], stg1[28], INV_COS_BIT), |
622 | half_btf(COSPI_INV[54], stg1[20], -COSPI_INV[10], stg1[27], INV_COS_BIT), |
623 | half_btf(COSPI_INV[22], stg1[21], -COSPI_INV[42], stg1[26], INV_COS_BIT), |
624 | half_btf(COSPI_INV[38], stg1[22], -COSPI_INV[26], stg1[25], INV_COS_BIT), |
625 | half_btf(COSPI_INV[6], stg1[23], -COSPI_INV[58], stg1[24], INV_COS_BIT), |
626 | half_btf(COSPI_INV[58], stg1[23], COSPI_INV[6], stg1[24], INV_COS_BIT), |
627 | half_btf(COSPI_INV[26], stg1[22], COSPI_INV[38], stg1[25], INV_COS_BIT), |
628 | half_btf(COSPI_INV[42], stg1[21], COSPI_INV[22], stg1[26], INV_COS_BIT), |
629 | half_btf(COSPI_INV[10], stg1[20], COSPI_INV[54], stg1[27], INV_COS_BIT), |
630 | half_btf(COSPI_INV[50], stg1[19], COSPI_INV[14], stg1[28], INV_COS_BIT), |
631 | half_btf(COSPI_INV[18], stg1[18], COSPI_INV[46], stg1[29], INV_COS_BIT), |
632 | half_btf(COSPI_INV[34], stg1[17], COSPI_INV[30], stg1[30], INV_COS_BIT), |
633 | half_btf(COSPI_INV[2], stg1[16], COSPI_INV[62], stg1[31], INV_COS_BIT), |
634 | ]; |
635 | |
636 | // stage 3 |
637 | let stg3 = [ |
638 | stg2[0], |
639 | stg2[1], |
640 | stg2[2], |
641 | stg2[3], |
642 | stg2[4], |
643 | stg2[5], |
644 | stg2[6], |
645 | stg2[7], |
646 | half_btf(COSPI_INV[60], stg2[8], -COSPI_INV[4], stg2[15], INV_COS_BIT), |
647 | half_btf(COSPI_INV[28], stg2[9], -COSPI_INV[36], stg2[14], INV_COS_BIT), |
648 | half_btf(COSPI_INV[44], stg2[10], -COSPI_INV[20], stg2[13], INV_COS_BIT), |
649 | half_btf(COSPI_INV[12], stg2[11], -COSPI_INV[52], stg2[12], INV_COS_BIT), |
650 | half_btf(COSPI_INV[52], stg2[11], COSPI_INV[12], stg2[12], INV_COS_BIT), |
651 | half_btf(COSPI_INV[20], stg2[10], COSPI_INV[44], stg2[13], INV_COS_BIT), |
652 | half_btf(COSPI_INV[36], stg2[9], COSPI_INV[28], stg2[14], INV_COS_BIT), |
653 | half_btf(COSPI_INV[4], stg2[8], COSPI_INV[60], stg2[15], INV_COS_BIT), |
654 | clamp_value(stg2[16] + stg2[17], range), |
655 | clamp_value(stg2[16] - stg2[17], range), |
656 | clamp_value(-stg2[18] + stg2[19], range), |
657 | clamp_value(stg2[18] + stg2[19], range), |
658 | clamp_value(stg2[20] + stg2[21], range), |
659 | clamp_value(stg2[20] - stg2[21], range), |
660 | clamp_value(-stg2[22] + stg2[23], range), |
661 | clamp_value(stg2[22] + stg2[23], range), |
662 | clamp_value(stg2[24] + stg2[25], range), |
663 | clamp_value(stg2[24] - stg2[25], range), |
664 | clamp_value(-stg2[26] + stg2[27], range), |
665 | clamp_value(stg2[26] + stg2[27], range), |
666 | clamp_value(stg2[28] + stg2[29], range), |
667 | clamp_value(stg2[28] - stg2[29], range), |
668 | clamp_value(-stg2[30] + stg2[31], range), |
669 | clamp_value(stg2[30] + stg2[31], range), |
670 | ]; |
671 | |
672 | // stage 4 |
673 | let stg4 = [ |
674 | stg3[0], |
675 | stg3[1], |
676 | stg3[2], |
677 | stg3[3], |
678 | half_btf(COSPI_INV[56], stg3[4], -COSPI_INV[8], stg3[7], INV_COS_BIT), |
679 | half_btf(COSPI_INV[24], stg3[5], -COSPI_INV[40], stg3[6], INV_COS_BIT), |
680 | half_btf(COSPI_INV[40], stg3[5], COSPI_INV[24], stg3[6], INV_COS_BIT), |
681 | half_btf(COSPI_INV[8], stg3[4], COSPI_INV[56], stg3[7], INV_COS_BIT), |
682 | clamp_value(stg3[8] + stg3[9], range), |
683 | clamp_value(stg3[8] - stg3[9], range), |
684 | clamp_value(-stg3[10] + stg3[11], range), |
685 | clamp_value(stg3[10] + stg3[11], range), |
686 | clamp_value(stg3[12] + stg3[13], range), |
687 | clamp_value(stg3[12] - stg3[13], range), |
688 | clamp_value(-stg3[14] + stg3[15], range), |
689 | clamp_value(stg3[14] + stg3[15], range), |
690 | stg3[16], |
691 | half_btf(-COSPI_INV[8], stg3[17], COSPI_INV[56], stg3[30], INV_COS_BIT), |
692 | half_btf(-COSPI_INV[56], stg3[18], -COSPI_INV[8], stg3[29], INV_COS_BIT), |
693 | stg3[19], |
694 | stg3[20], |
695 | half_btf(-COSPI_INV[40], stg3[21], COSPI_INV[24], stg3[26], INV_COS_BIT), |
696 | half_btf(-COSPI_INV[24], stg3[22], -COSPI_INV[40], stg3[25], INV_COS_BIT), |
697 | stg3[23], |
698 | stg3[24], |
699 | half_btf(-COSPI_INV[40], stg3[22], COSPI_INV[24], stg3[25], INV_COS_BIT), |
700 | half_btf(COSPI_INV[24], stg3[21], COSPI_INV[40], stg3[26], INV_COS_BIT), |
701 | stg3[27], |
702 | stg3[28], |
703 | half_btf(-COSPI_INV[8], stg3[18], COSPI_INV[56], stg3[29], INV_COS_BIT), |
704 | half_btf(COSPI_INV[56], stg3[17], COSPI_INV[8], stg3[30], INV_COS_BIT), |
705 | stg3[31], |
706 | ]; |
707 | |
708 | // stage 5 |
709 | let stg5 = [ |
710 | half_btf(COSPI_INV[32], stg4[0], COSPI_INV[32], stg4[1], INV_COS_BIT), |
711 | half_btf(COSPI_INV[32], stg4[0], -COSPI_INV[32], stg4[1], INV_COS_BIT), |
712 | half_btf(COSPI_INV[48], stg4[2], -COSPI_INV[16], stg4[3], INV_COS_BIT), |
713 | half_btf(COSPI_INV[16], stg4[2], COSPI_INV[48], stg4[3], INV_COS_BIT), |
714 | clamp_value(stg4[4] + stg4[5], range), |
715 | clamp_value(stg4[4] - stg4[5], range), |
716 | clamp_value(-stg4[6] + stg4[7], range), |
717 | clamp_value(stg4[6] + stg4[7], range), |
718 | stg4[8], |
719 | half_btf(-COSPI_INV[16], stg4[9], COSPI_INV[48], stg4[14], INV_COS_BIT), |
720 | half_btf(-COSPI_INV[48], stg4[10], -COSPI_INV[16], stg4[13], INV_COS_BIT), |
721 | stg4[11], |
722 | stg4[12], |
723 | half_btf(-COSPI_INV[16], stg4[10], COSPI_INV[48], stg4[13], INV_COS_BIT), |
724 | half_btf(COSPI_INV[48], stg4[9], COSPI_INV[16], stg4[14], INV_COS_BIT), |
725 | stg4[15], |
726 | clamp_value(stg4[16] + stg4[19], range), |
727 | clamp_value(stg4[17] + stg4[18], range), |
728 | clamp_value(stg4[17] - stg4[18], range), |
729 | clamp_value(stg4[16] - stg4[19], range), |
730 | clamp_value(-stg4[20] + stg4[23], range), |
731 | clamp_value(-stg4[21] + stg4[22], range), |
732 | clamp_value(stg4[21] + stg4[22], range), |
733 | clamp_value(stg4[20] + stg4[23], range), |
734 | clamp_value(stg4[24] + stg4[27], range), |
735 | clamp_value(stg4[25] + stg4[26], range), |
736 | clamp_value(stg4[25] - stg4[26], range), |
737 | clamp_value(stg4[24] - stg4[27], range), |
738 | clamp_value(-stg4[28] + stg4[31], range), |
739 | clamp_value(-stg4[29] + stg4[30], range), |
740 | clamp_value(stg4[29] + stg4[30], range), |
741 | clamp_value(stg4[28] + stg4[31], range), |
742 | ]; |
743 | |
744 | // stage 6 |
745 | let stg6 = [ |
746 | clamp_value(stg5[0] + stg5[3], range), |
747 | clamp_value(stg5[1] + stg5[2], range), |
748 | clamp_value(stg5[1] - stg5[2], range), |
749 | clamp_value(stg5[0] - stg5[3], range), |
750 | stg5[4], |
751 | half_btf(-COSPI_INV[32], stg5[5], COSPI_INV[32], stg5[6], INV_COS_BIT), |
752 | half_btf(COSPI_INV[32], stg5[5], COSPI_INV[32], stg5[6], INV_COS_BIT), |
753 | stg5[7], |
754 | clamp_value(stg5[8] + stg5[11], range), |
755 | clamp_value(stg5[9] + stg5[10], range), |
756 | clamp_value(stg5[9] - stg5[10], range), |
757 | clamp_value(stg5[8] - stg5[11], range), |
758 | clamp_value(-stg5[12] + stg5[15], range), |
759 | clamp_value(-stg5[13] + stg5[14], range), |
760 | clamp_value(stg5[13] + stg5[14], range), |
761 | clamp_value(stg5[12] + stg5[15], range), |
762 | stg5[16], |
763 | stg5[17], |
764 | half_btf(-COSPI_INV[16], stg5[18], COSPI_INV[48], stg5[29], INV_COS_BIT), |
765 | half_btf(-COSPI_INV[16], stg5[19], COSPI_INV[48], stg5[28], INV_COS_BIT), |
766 | half_btf(-COSPI_INV[48], stg5[20], -COSPI_INV[16], stg5[27], INV_COS_BIT), |
767 | half_btf(-COSPI_INV[48], stg5[21], -COSPI_INV[16], stg5[26], INV_COS_BIT), |
768 | stg5[22], |
769 | stg5[23], |
770 | stg5[24], |
771 | stg5[25], |
772 | half_btf(-COSPI_INV[16], stg5[21], COSPI_INV[48], stg5[26], INV_COS_BIT), |
773 | half_btf(-COSPI_INV[16], stg5[20], COSPI_INV[48], stg5[27], INV_COS_BIT), |
774 | half_btf(COSPI_INV[48], stg5[19], COSPI_INV[16], stg5[28], INV_COS_BIT), |
775 | half_btf(COSPI_INV[48], stg5[18], COSPI_INV[16], stg5[29], INV_COS_BIT), |
776 | stg5[30], |
777 | stg5[31], |
778 | ]; |
779 | |
780 | // stage 7 |
781 | let stg7 = [ |
782 | clamp_value(stg6[0] + stg6[7], range), |
783 | clamp_value(stg6[1] + stg6[6], range), |
784 | clamp_value(stg6[2] + stg6[5], range), |
785 | clamp_value(stg6[3] + stg6[4], range), |
786 | clamp_value(stg6[3] - stg6[4], range), |
787 | clamp_value(stg6[2] - stg6[5], range), |
788 | clamp_value(stg6[1] - stg6[6], range), |
789 | clamp_value(stg6[0] - stg6[7], range), |
790 | stg6[8], |
791 | stg6[9], |
792 | half_btf(-COSPI_INV[32], stg6[10], COSPI_INV[32], stg6[13], INV_COS_BIT), |
793 | half_btf(-COSPI_INV[32], stg6[11], COSPI_INV[32], stg6[12], INV_COS_BIT), |
794 | half_btf(COSPI_INV[32], stg6[11], COSPI_INV[32], stg6[12], INV_COS_BIT), |
795 | half_btf(COSPI_INV[32], stg6[10], COSPI_INV[32], stg6[13], INV_COS_BIT), |
796 | stg6[14], |
797 | stg6[15], |
798 | clamp_value(stg6[16] + stg6[23], range), |
799 | clamp_value(stg6[17] + stg6[22], range), |
800 | clamp_value(stg6[18] + stg6[21], range), |
801 | clamp_value(stg6[19] + stg6[20], range), |
802 | clamp_value(stg6[19] - stg6[20], range), |
803 | clamp_value(stg6[18] - stg6[21], range), |
804 | clamp_value(stg6[17] - stg6[22], range), |
805 | clamp_value(stg6[16] - stg6[23], range), |
806 | clamp_value(-stg6[24] + stg6[31], range), |
807 | clamp_value(-stg6[25] + stg6[30], range), |
808 | clamp_value(-stg6[26] + stg6[29], range), |
809 | clamp_value(-stg6[27] + stg6[28], range), |
810 | clamp_value(stg6[27] + stg6[28], range), |
811 | clamp_value(stg6[26] + stg6[29], range), |
812 | clamp_value(stg6[25] + stg6[30], range), |
813 | clamp_value(stg6[24] + stg6[31], range), |
814 | ]; |
815 | |
816 | // stage 8 |
817 | let stg8 = [ |
818 | clamp_value(stg7[0] + stg7[15], range), |
819 | clamp_value(stg7[1] + stg7[14], range), |
820 | clamp_value(stg7[2] + stg7[13], range), |
821 | clamp_value(stg7[3] + stg7[12], range), |
822 | clamp_value(stg7[4] + stg7[11], range), |
823 | clamp_value(stg7[5] + stg7[10], range), |
824 | clamp_value(stg7[6] + stg7[9], range), |
825 | clamp_value(stg7[7] + stg7[8], range), |
826 | clamp_value(stg7[7] - stg7[8], range), |
827 | clamp_value(stg7[6] - stg7[9], range), |
828 | clamp_value(stg7[5] - stg7[10], range), |
829 | clamp_value(stg7[4] - stg7[11], range), |
830 | clamp_value(stg7[3] - stg7[12], range), |
831 | clamp_value(stg7[2] - stg7[13], range), |
832 | clamp_value(stg7[1] - stg7[14], range), |
833 | clamp_value(stg7[0] - stg7[15], range), |
834 | stg7[16], |
835 | stg7[17], |
836 | stg7[18], |
837 | stg7[19], |
838 | half_btf(-COSPI_INV[32], stg7[20], COSPI_INV[32], stg7[27], INV_COS_BIT), |
839 | half_btf(-COSPI_INV[32], stg7[21], COSPI_INV[32], stg7[26], INV_COS_BIT), |
840 | half_btf(-COSPI_INV[32], stg7[22], COSPI_INV[32], stg7[25], INV_COS_BIT), |
841 | half_btf(-COSPI_INV[32], stg7[23], COSPI_INV[32], stg7[24], INV_COS_BIT), |
842 | half_btf(COSPI_INV[32], stg7[23], COSPI_INV[32], stg7[24], INV_COS_BIT), |
843 | half_btf(COSPI_INV[32], stg7[22], COSPI_INV[32], stg7[25], INV_COS_BIT), |
844 | half_btf(COSPI_INV[32], stg7[21], COSPI_INV[32], stg7[26], INV_COS_BIT), |
845 | half_btf(COSPI_INV[32], stg7[20], COSPI_INV[32], stg7[27], INV_COS_BIT), |
846 | stg7[28], |
847 | stg7[29], |
848 | stg7[30], |
849 | stg7[31], |
850 | ]; |
851 | |
852 | // stage 9 |
853 | output[0] = clamp_value(stg8[0] + stg8[31], range); |
854 | output[1] = clamp_value(stg8[1] + stg8[30], range); |
855 | output[2] = clamp_value(stg8[2] + stg8[29], range); |
856 | output[3] = clamp_value(stg8[3] + stg8[28], range); |
857 | output[4] = clamp_value(stg8[4] + stg8[27], range); |
858 | output[5] = clamp_value(stg8[5] + stg8[26], range); |
859 | output[6] = clamp_value(stg8[6] + stg8[25], range); |
860 | output[7] = clamp_value(stg8[7] + stg8[24], range); |
861 | output[8] = clamp_value(stg8[8] + stg8[23], range); |
862 | output[9] = clamp_value(stg8[9] + stg8[22], range); |
863 | output[10] = clamp_value(stg8[10] + stg8[21], range); |
864 | output[11] = clamp_value(stg8[11] + stg8[20], range); |
865 | output[12] = clamp_value(stg8[12] + stg8[19], range); |
866 | output[13] = clamp_value(stg8[13] + stg8[18], range); |
867 | output[14] = clamp_value(stg8[14] + stg8[17], range); |
868 | output[15] = clamp_value(stg8[15] + stg8[16], range); |
869 | output[16] = clamp_value(stg8[15] - stg8[16], range); |
870 | output[17] = clamp_value(stg8[14] - stg8[17], range); |
871 | output[18] = clamp_value(stg8[13] - stg8[18], range); |
872 | output[19] = clamp_value(stg8[12] - stg8[19], range); |
873 | output[20] = clamp_value(stg8[11] - stg8[20], range); |
874 | output[21] = clamp_value(stg8[10] - stg8[21], range); |
875 | output[22] = clamp_value(stg8[9] - stg8[22], range); |
876 | output[23] = clamp_value(stg8[8] - stg8[23], range); |
877 | output[24] = clamp_value(stg8[7] - stg8[24], range); |
878 | output[25] = clamp_value(stg8[6] - stg8[25], range); |
879 | output[26] = clamp_value(stg8[5] - stg8[26], range); |
880 | output[27] = clamp_value(stg8[4] - stg8[27], range); |
881 | output[28] = clamp_value(stg8[3] - stg8[28], range); |
882 | output[29] = clamp_value(stg8[2] - stg8[29], range); |
883 | output[30] = clamp_value(stg8[1] - stg8[30], range); |
884 | output[31] = clamp_value(stg8[0] - stg8[31], range); |
885 | } |
886 | |
887 | fn av1_iidentity32(input: &[i32], output: &mut [i32], _range: usize) { |
888 | outputimpl Iterator [..32] |
889 | .iter_mut() |
890 | .zip(input[..32].iter()) |
891 | .for_each(|(outp: &mut i32, inp: &i32)| *outp = 4 * *inp); |
892 | } |
893 | |
894 | fn av1_idct64(input: &[i32], output: &mut [i32], range: usize) { |
895 | assert!(input.len() >= 64); |
896 | assert!(output.len() >= 64); |
897 | |
898 | // stage 1; |
899 | let stg1 = [ |
900 | input[0], input[32], input[16], input[48], input[8], input[40], input[24], |
901 | input[56], input[4], input[36], input[20], input[52], input[12], |
902 | input[44], input[28], input[60], input[2], input[34], input[18], |
903 | input[50], input[10], input[42], input[26], input[58], input[6], |
904 | input[38], input[22], input[54], input[14], input[46], input[30], |
905 | input[62], input[1], input[33], input[17], input[49], input[9], input[41], |
906 | input[25], input[57], input[5], input[37], input[21], input[53], |
907 | input[13], input[45], input[29], input[61], input[3], input[35], |
908 | input[19], input[51], input[11], input[43], input[27], input[59], |
909 | input[7], input[39], input[23], input[55], input[15], input[47], |
910 | input[31], input[63], |
911 | ]; |
912 | |
913 | // stage 2 |
914 | let stg2 = [ |
915 | stg1[0], |
916 | stg1[1], |
917 | stg1[2], |
918 | stg1[3], |
919 | stg1[4], |
920 | stg1[5], |
921 | stg1[6], |
922 | stg1[7], |
923 | stg1[8], |
924 | stg1[9], |
925 | stg1[10], |
926 | stg1[11], |
927 | stg1[12], |
928 | stg1[13], |
929 | stg1[14], |
930 | stg1[15], |
931 | stg1[16], |
932 | stg1[17], |
933 | stg1[18], |
934 | stg1[19], |
935 | stg1[20], |
936 | stg1[21], |
937 | stg1[22], |
938 | stg1[23], |
939 | stg1[24], |
940 | stg1[25], |
941 | stg1[26], |
942 | stg1[27], |
943 | stg1[28], |
944 | stg1[29], |
945 | stg1[30], |
946 | stg1[31], |
947 | half_btf(COSPI_INV[63], stg1[32], -COSPI_INV[1], stg1[63], INV_COS_BIT), |
948 | half_btf(COSPI_INV[31], stg1[33], -COSPI_INV[33], stg1[62], INV_COS_BIT), |
949 | half_btf(COSPI_INV[47], stg1[34], -COSPI_INV[17], stg1[61], INV_COS_BIT), |
950 | half_btf(COSPI_INV[15], stg1[35], -COSPI_INV[49], stg1[60], INV_COS_BIT), |
951 | half_btf(COSPI_INV[55], stg1[36], -COSPI_INV[9], stg1[59], INV_COS_BIT), |
952 | half_btf(COSPI_INV[23], stg1[37], -COSPI_INV[41], stg1[58], INV_COS_BIT), |
953 | half_btf(COSPI_INV[39], stg1[38], -COSPI_INV[25], stg1[57], INV_COS_BIT), |
954 | half_btf(COSPI_INV[7], stg1[39], -COSPI_INV[57], stg1[56], INV_COS_BIT), |
955 | half_btf(COSPI_INV[59], stg1[40], -COSPI_INV[5], stg1[55], INV_COS_BIT), |
956 | half_btf(COSPI_INV[27], stg1[41], -COSPI_INV[37], stg1[54], INV_COS_BIT), |
957 | half_btf(COSPI_INV[43], stg1[42], -COSPI_INV[21], stg1[53], INV_COS_BIT), |
958 | half_btf(COSPI_INV[11], stg1[43], -COSPI_INV[53], stg1[52], INV_COS_BIT), |
959 | half_btf(COSPI_INV[51], stg1[44], -COSPI_INV[13], stg1[51], INV_COS_BIT), |
960 | half_btf(COSPI_INV[19], stg1[45], -COSPI_INV[45], stg1[50], INV_COS_BIT), |
961 | half_btf(COSPI_INV[35], stg1[46], -COSPI_INV[29], stg1[49], INV_COS_BIT), |
962 | half_btf(COSPI_INV[3], stg1[47], -COSPI_INV[61], stg1[48], INV_COS_BIT), |
963 | half_btf(COSPI_INV[61], stg1[47], COSPI_INV[3], stg1[48], INV_COS_BIT), |
964 | half_btf(COSPI_INV[29], stg1[46], COSPI_INV[35], stg1[49], INV_COS_BIT), |
965 | half_btf(COSPI_INV[45], stg1[45], COSPI_INV[19], stg1[50], INV_COS_BIT), |
966 | half_btf(COSPI_INV[13], stg1[44], COSPI_INV[51], stg1[51], INV_COS_BIT), |
967 | half_btf(COSPI_INV[53], stg1[43], COSPI_INV[11], stg1[52], INV_COS_BIT), |
968 | half_btf(COSPI_INV[21], stg1[42], COSPI_INV[43], stg1[53], INV_COS_BIT), |
969 | half_btf(COSPI_INV[37], stg1[41], COSPI_INV[27], stg1[54], INV_COS_BIT), |
970 | half_btf(COSPI_INV[5], stg1[40], COSPI_INV[59], stg1[55], INV_COS_BIT), |
971 | half_btf(COSPI_INV[57], stg1[39], COSPI_INV[7], stg1[56], INV_COS_BIT), |
972 | half_btf(COSPI_INV[25], stg1[38], COSPI_INV[39], stg1[57], INV_COS_BIT), |
973 | half_btf(COSPI_INV[41], stg1[37], COSPI_INV[23], stg1[58], INV_COS_BIT), |
974 | half_btf(COSPI_INV[9], stg1[36], COSPI_INV[55], stg1[59], INV_COS_BIT), |
975 | half_btf(COSPI_INV[49], stg1[35], COSPI_INV[15], stg1[60], INV_COS_BIT), |
976 | half_btf(COSPI_INV[17], stg1[34], COSPI_INV[47], stg1[61], INV_COS_BIT), |
977 | half_btf(COSPI_INV[33], stg1[33], COSPI_INV[31], stg1[62], INV_COS_BIT), |
978 | half_btf(COSPI_INV[1], stg1[32], COSPI_INV[63], stg1[63], INV_COS_BIT), |
979 | ]; |
980 | |
981 | // stage 3 |
982 | let stg3 = [ |
983 | stg2[0], |
984 | stg2[1], |
985 | stg2[2], |
986 | stg2[3], |
987 | stg2[4], |
988 | stg2[5], |
989 | stg2[6], |
990 | stg2[7], |
991 | stg2[8], |
992 | stg2[9], |
993 | stg2[10], |
994 | stg2[11], |
995 | stg2[12], |
996 | stg2[13], |
997 | stg2[14], |
998 | stg2[15], |
999 | half_btf(COSPI_INV[62], stg2[16], -COSPI_INV[2], stg2[31], INV_COS_BIT), |
1000 | half_btf(COSPI_INV[30], stg2[17], -COSPI_INV[34], stg2[30], INV_COS_BIT), |
1001 | half_btf(COSPI_INV[46], stg2[18], -COSPI_INV[18], stg2[29], INV_COS_BIT), |
1002 | half_btf(COSPI_INV[14], stg2[19], -COSPI_INV[50], stg2[28], INV_COS_BIT), |
1003 | half_btf(COSPI_INV[54], stg2[20], -COSPI_INV[10], stg2[27], INV_COS_BIT), |
1004 | half_btf(COSPI_INV[22], stg2[21], -COSPI_INV[42], stg2[26], INV_COS_BIT), |
1005 | half_btf(COSPI_INV[38], stg2[22], -COSPI_INV[26], stg2[25], INV_COS_BIT), |
1006 | half_btf(COSPI_INV[6], stg2[23], -COSPI_INV[58], stg2[24], INV_COS_BIT), |
1007 | half_btf(COSPI_INV[58], stg2[23], COSPI_INV[6], stg2[24], INV_COS_BIT), |
1008 | half_btf(COSPI_INV[26], stg2[22], COSPI_INV[38], stg2[25], INV_COS_BIT), |
1009 | half_btf(COSPI_INV[42], stg2[21], COSPI_INV[22], stg2[26], INV_COS_BIT), |
1010 | half_btf(COSPI_INV[10], stg2[20], COSPI_INV[54], stg2[27], INV_COS_BIT), |
1011 | half_btf(COSPI_INV[50], stg2[19], COSPI_INV[14], stg2[28], INV_COS_BIT), |
1012 | half_btf(COSPI_INV[18], stg2[18], COSPI_INV[46], stg2[29], INV_COS_BIT), |
1013 | half_btf(COSPI_INV[34], stg2[17], COSPI_INV[30], stg2[30], INV_COS_BIT), |
1014 | half_btf(COSPI_INV[2], stg2[16], COSPI_INV[62], stg2[31], INV_COS_BIT), |
1015 | clamp_value(stg2[32] + stg2[33], range), |
1016 | clamp_value(stg2[32] - stg2[33], range), |
1017 | clamp_value(-stg2[34] + stg2[35], range), |
1018 | clamp_value(stg2[34] + stg2[35], range), |
1019 | clamp_value(stg2[36] + stg2[37], range), |
1020 | clamp_value(stg2[36] - stg2[37], range), |
1021 | clamp_value(-stg2[38] + stg2[39], range), |
1022 | clamp_value(stg2[38] + stg2[39], range), |
1023 | clamp_value(stg2[40] + stg2[41], range), |
1024 | clamp_value(stg2[40] - stg2[41], range), |
1025 | clamp_value(-stg2[42] + stg2[43], range), |
1026 | clamp_value(stg2[42] + stg2[43], range), |
1027 | clamp_value(stg2[44] + stg2[45], range), |
1028 | clamp_value(stg2[44] - stg2[45], range), |
1029 | clamp_value(-stg2[46] + stg2[47], range), |
1030 | clamp_value(stg2[46] + stg2[47], range), |
1031 | clamp_value(stg2[48] + stg2[49], range), |
1032 | clamp_value(stg2[48] - stg2[49], range), |
1033 | clamp_value(-stg2[50] + stg2[51], range), |
1034 | clamp_value(stg2[50] + stg2[51], range), |
1035 | clamp_value(stg2[52] + stg2[53], range), |
1036 | clamp_value(stg2[52] - stg2[53], range), |
1037 | clamp_value(-stg2[54] + stg2[55], range), |
1038 | clamp_value(stg2[54] + stg2[55], range), |
1039 | clamp_value(stg2[56] + stg2[57], range), |
1040 | clamp_value(stg2[56] - stg2[57], range), |
1041 | clamp_value(-stg2[58] + stg2[59], range), |
1042 | clamp_value(stg2[58] + stg2[59], range), |
1043 | clamp_value(stg2[60] + stg2[61], range), |
1044 | clamp_value(stg2[60] - stg2[61], range), |
1045 | clamp_value(-stg2[62] + stg2[63], range), |
1046 | clamp_value(stg2[62] + stg2[63], range), |
1047 | ]; |
1048 | |
1049 | // stage 4 |
1050 | let stg4 = [ |
1051 | stg3[0], |
1052 | stg3[1], |
1053 | stg3[2], |
1054 | stg3[3], |
1055 | stg3[4], |
1056 | stg3[5], |
1057 | stg3[6], |
1058 | stg3[7], |
1059 | half_btf(COSPI_INV[60], stg3[8], -COSPI_INV[4], stg3[15], INV_COS_BIT), |
1060 | half_btf(COSPI_INV[28], stg3[9], -COSPI_INV[36], stg3[14], INV_COS_BIT), |
1061 | half_btf(COSPI_INV[44], stg3[10], -COSPI_INV[20], stg3[13], INV_COS_BIT), |
1062 | half_btf(COSPI_INV[12], stg3[11], -COSPI_INV[52], stg3[12], INV_COS_BIT), |
1063 | half_btf(COSPI_INV[52], stg3[11], COSPI_INV[12], stg3[12], INV_COS_BIT), |
1064 | half_btf(COSPI_INV[20], stg3[10], COSPI_INV[44], stg3[13], INV_COS_BIT), |
1065 | half_btf(COSPI_INV[36], stg3[9], COSPI_INV[28], stg3[14], INV_COS_BIT), |
1066 | half_btf(COSPI_INV[4], stg3[8], COSPI_INV[60], stg3[15], INV_COS_BIT), |
1067 | clamp_value(stg3[16] + stg3[17], range), |
1068 | clamp_value(stg3[16] - stg3[17], range), |
1069 | clamp_value(-stg3[18] + stg3[19], range), |
1070 | clamp_value(stg3[18] + stg3[19], range), |
1071 | clamp_value(stg3[20] + stg3[21], range), |
1072 | clamp_value(stg3[20] - stg3[21], range), |
1073 | clamp_value(-stg3[22] + stg3[23], range), |
1074 | clamp_value(stg3[22] + stg3[23], range), |
1075 | clamp_value(stg3[24] + stg3[25], range), |
1076 | clamp_value(stg3[24] - stg3[25], range), |
1077 | clamp_value(-stg3[26] + stg3[27], range), |
1078 | clamp_value(stg3[26] + stg3[27], range), |
1079 | clamp_value(stg3[28] + stg3[29], range), |
1080 | clamp_value(stg3[28] - stg3[29], range), |
1081 | clamp_value(-stg3[30] + stg3[31], range), |
1082 | clamp_value(stg3[30] + stg3[31], range), |
1083 | stg3[32], |
1084 | half_btf(-COSPI_INV[4], stg3[33], COSPI_INV[60], stg3[62], INV_COS_BIT), |
1085 | half_btf(-COSPI_INV[60], stg3[34], -COSPI_INV[4], stg3[61], INV_COS_BIT), |
1086 | stg3[35], |
1087 | stg3[36], |
1088 | half_btf(-COSPI_INV[36], stg3[37], COSPI_INV[28], stg3[58], INV_COS_BIT), |
1089 | half_btf(-COSPI_INV[28], stg3[38], -COSPI_INV[36], stg3[57], INV_COS_BIT), |
1090 | stg3[39], |
1091 | stg3[40], |
1092 | half_btf(-COSPI_INV[20], stg3[41], COSPI_INV[44], stg3[54], INV_COS_BIT), |
1093 | half_btf(-COSPI_INV[44], stg3[42], -COSPI_INV[20], stg3[53], INV_COS_BIT), |
1094 | stg3[43], |
1095 | stg3[44], |
1096 | half_btf(-COSPI_INV[52], stg3[45], COSPI_INV[12], stg3[50], INV_COS_BIT), |
1097 | half_btf(-COSPI_INV[12], stg3[46], -COSPI_INV[52], stg3[49], INV_COS_BIT), |
1098 | stg3[47], |
1099 | stg3[48], |
1100 | half_btf(-COSPI_INV[52], stg3[46], COSPI_INV[12], stg3[49], INV_COS_BIT), |
1101 | half_btf(COSPI_INV[12], stg3[45], COSPI_INV[52], stg3[50], INV_COS_BIT), |
1102 | stg3[51], |
1103 | stg3[52], |
1104 | half_btf(-COSPI_INV[20], stg3[42], COSPI_INV[44], stg3[53], INV_COS_BIT), |
1105 | half_btf(COSPI_INV[44], stg3[41], COSPI_INV[20], stg3[54], INV_COS_BIT), |
1106 | stg3[55], |
1107 | stg3[56], |
1108 | half_btf(-COSPI_INV[36], stg3[38], COSPI_INV[28], stg3[57], INV_COS_BIT), |
1109 | half_btf(COSPI_INV[28], stg3[37], COSPI_INV[36], stg3[58], INV_COS_BIT), |
1110 | stg3[59], |
1111 | stg3[60], |
1112 | half_btf(-COSPI_INV[4], stg3[34], COSPI_INV[60], stg3[61], INV_COS_BIT), |
1113 | half_btf(COSPI_INV[60], stg3[33], COSPI_INV[4], stg3[62], INV_COS_BIT), |
1114 | stg3[63], |
1115 | ]; |
1116 | |
1117 | // stage 5 |
1118 | let stg5 = [ |
1119 | stg4[0], |
1120 | stg4[1], |
1121 | stg4[2], |
1122 | stg4[3], |
1123 | half_btf(COSPI_INV[56], stg4[4], -COSPI_INV[8], stg4[7], INV_COS_BIT), |
1124 | half_btf(COSPI_INV[24], stg4[5], -COSPI_INV[40], stg4[6], INV_COS_BIT), |
1125 | half_btf(COSPI_INV[40], stg4[5], COSPI_INV[24], stg4[6], INV_COS_BIT), |
1126 | half_btf(COSPI_INV[8], stg4[4], COSPI_INV[56], stg4[7], INV_COS_BIT), |
1127 | clamp_value(stg4[8] + stg4[9], range), |
1128 | clamp_value(stg4[8] - stg4[9], range), |
1129 | clamp_value(-stg4[10] + stg4[11], range), |
1130 | clamp_value(stg4[10] + stg4[11], range), |
1131 | clamp_value(stg4[12] + stg4[13], range), |
1132 | clamp_value(stg4[12] - stg4[13], range), |
1133 | clamp_value(-stg4[14] + stg4[15], range), |
1134 | clamp_value(stg4[14] + stg4[15], range), |
1135 | stg4[16], |
1136 | half_btf(-COSPI_INV[8], stg4[17], COSPI_INV[56], stg4[30], INV_COS_BIT), |
1137 | half_btf(-COSPI_INV[56], stg4[18], -COSPI_INV[8], stg4[29], INV_COS_BIT), |
1138 | stg4[19], |
1139 | stg4[20], |
1140 | half_btf(-COSPI_INV[40], stg4[21], COSPI_INV[24], stg4[26], INV_COS_BIT), |
1141 | half_btf(-COSPI_INV[24], stg4[22], -COSPI_INV[40], stg4[25], INV_COS_BIT), |
1142 | stg4[23], |
1143 | stg4[24], |
1144 | half_btf(-COSPI_INV[40], stg4[22], COSPI_INV[24], stg4[25], INV_COS_BIT), |
1145 | half_btf(COSPI_INV[24], stg4[21], COSPI_INV[40], stg4[26], INV_COS_BIT), |
1146 | stg4[27], |
1147 | stg4[28], |
1148 | half_btf(-COSPI_INV[8], stg4[18], COSPI_INV[56], stg4[29], INV_COS_BIT), |
1149 | half_btf(COSPI_INV[56], stg4[17], COSPI_INV[8], stg4[30], INV_COS_BIT), |
1150 | stg4[31], |
1151 | clamp_value(stg4[32] + stg4[35], range), |
1152 | clamp_value(stg4[33] + stg4[34], range), |
1153 | clamp_value(stg4[33] - stg4[34], range), |
1154 | clamp_value(stg4[32] - stg4[35], range), |
1155 | clamp_value(-stg4[36] + stg4[39], range), |
1156 | clamp_value(-stg4[37] + stg4[38], range), |
1157 | clamp_value(stg4[37] + stg4[38], range), |
1158 | clamp_value(stg4[36] + stg4[39], range), |
1159 | clamp_value(stg4[40] + stg4[43], range), |
1160 | clamp_value(stg4[41] + stg4[42], range), |
1161 | clamp_value(stg4[41] - stg4[42], range), |
1162 | clamp_value(stg4[40] - stg4[43], range), |
1163 | clamp_value(-stg4[44] + stg4[47], range), |
1164 | clamp_value(-stg4[45] + stg4[46], range), |
1165 | clamp_value(stg4[45] + stg4[46], range), |
1166 | clamp_value(stg4[44] + stg4[47], range), |
1167 | clamp_value(stg4[48] + stg4[51], range), |
1168 | clamp_value(stg4[49] + stg4[50], range), |
1169 | clamp_value(stg4[49] - stg4[50], range), |
1170 | clamp_value(stg4[48] - stg4[51], range), |
1171 | clamp_value(-stg4[52] + stg4[55], range), |
1172 | clamp_value(-stg4[53] + stg4[54], range), |
1173 | clamp_value(stg4[53] + stg4[54], range), |
1174 | clamp_value(stg4[52] + stg4[55], range), |
1175 | clamp_value(stg4[56] + stg4[59], range), |
1176 | clamp_value(stg4[57] + stg4[58], range), |
1177 | clamp_value(stg4[57] - stg4[58], range), |
1178 | clamp_value(stg4[56] - stg4[59], range), |
1179 | clamp_value(-stg4[60] + stg4[63], range), |
1180 | clamp_value(-stg4[61] + stg4[62], range), |
1181 | clamp_value(stg4[61] + stg4[62], range), |
1182 | clamp_value(stg4[60] + stg4[63], range), |
1183 | ]; |
1184 | |
1185 | // stage 6 |
1186 | let stg6 = [ |
1187 | half_btf(COSPI_INV[32], stg5[0], COSPI_INV[32], stg5[1], INV_COS_BIT), |
1188 | half_btf(COSPI_INV[32], stg5[0], -COSPI_INV[32], stg5[1], INV_COS_BIT), |
1189 | half_btf(COSPI_INV[48], stg5[2], -COSPI_INV[16], stg5[3], INV_COS_BIT), |
1190 | half_btf(COSPI_INV[16], stg5[2], COSPI_INV[48], stg5[3], INV_COS_BIT), |
1191 | clamp_value(stg5[4] + stg5[5], range), |
1192 | clamp_value(stg5[4] - stg5[5], range), |
1193 | clamp_value(-stg5[6] + stg5[7], range), |
1194 | clamp_value(stg5[6] + stg5[7], range), |
1195 | stg5[8], |
1196 | half_btf(-COSPI_INV[16], stg5[9], COSPI_INV[48], stg5[14], INV_COS_BIT), |
1197 | half_btf(-COSPI_INV[48], stg5[10], -COSPI_INV[16], stg5[13], INV_COS_BIT), |
1198 | stg5[11], |
1199 | stg5[12], |
1200 | half_btf(-COSPI_INV[16], stg5[10], COSPI_INV[48], stg5[13], INV_COS_BIT), |
1201 | half_btf(COSPI_INV[48], stg5[9], COSPI_INV[16], stg5[14], INV_COS_BIT), |
1202 | stg5[15], |
1203 | clamp_value(stg5[16] + stg5[19], range), |
1204 | clamp_value(stg5[17] + stg5[18], range), |
1205 | clamp_value(stg5[17] - stg5[18], range), |
1206 | clamp_value(stg5[16] - stg5[19], range), |
1207 | clamp_value(-stg5[20] + stg5[23], range), |
1208 | clamp_value(-stg5[21] + stg5[22], range), |
1209 | clamp_value(stg5[21] + stg5[22], range), |
1210 | clamp_value(stg5[20] + stg5[23], range), |
1211 | clamp_value(stg5[24] + stg5[27], range), |
1212 | clamp_value(stg5[25] + stg5[26], range), |
1213 | clamp_value(stg5[25] - stg5[26], range), |
1214 | clamp_value(stg5[24] - stg5[27], range), |
1215 | clamp_value(-stg5[28] + stg5[31], range), |
1216 | clamp_value(-stg5[29] + stg5[30], range), |
1217 | clamp_value(stg5[29] + stg5[30], range), |
1218 | clamp_value(stg5[28] + stg5[31], range), |
1219 | stg5[32], |
1220 | stg5[33], |
1221 | half_btf(-COSPI_INV[8], stg5[34], COSPI_INV[56], stg5[61], INV_COS_BIT), |
1222 | half_btf(-COSPI_INV[8], stg5[35], COSPI_INV[56], stg5[60], INV_COS_BIT), |
1223 | half_btf(-COSPI_INV[56], stg5[36], -COSPI_INV[8], stg5[59], INV_COS_BIT), |
1224 | half_btf(-COSPI_INV[56], stg5[37], -COSPI_INV[8], stg5[58], INV_COS_BIT), |
1225 | stg5[38], |
1226 | stg5[39], |
1227 | stg5[40], |
1228 | stg5[41], |
1229 | half_btf(-COSPI_INV[40], stg5[42], COSPI_INV[24], stg5[53], INV_COS_BIT), |
1230 | half_btf(-COSPI_INV[40], stg5[43], COSPI_INV[24], stg5[52], INV_COS_BIT), |
1231 | half_btf(-COSPI_INV[24], stg5[44], -COSPI_INV[40], stg5[51], INV_COS_BIT), |
1232 | half_btf(-COSPI_INV[24], stg5[45], -COSPI_INV[40], stg5[50], INV_COS_BIT), |
1233 | stg5[46], |
1234 | stg5[47], |
1235 | stg5[48], |
1236 | stg5[49], |
1237 | half_btf(-COSPI_INV[40], stg5[45], COSPI_INV[24], stg5[50], INV_COS_BIT), |
1238 | half_btf(-COSPI_INV[40], stg5[44], COSPI_INV[24], stg5[51], INV_COS_BIT), |
1239 | half_btf(COSPI_INV[24], stg5[43], COSPI_INV[40], stg5[52], INV_COS_BIT), |
1240 | half_btf(COSPI_INV[24], stg5[42], COSPI_INV[40], stg5[53], INV_COS_BIT), |
1241 | stg5[54], |
1242 | stg5[55], |
1243 | stg5[56], |
1244 | stg5[57], |
1245 | half_btf(-COSPI_INV[8], stg5[37], COSPI_INV[56], stg5[58], INV_COS_BIT), |
1246 | half_btf(-COSPI_INV[8], stg5[36], COSPI_INV[56], stg5[59], INV_COS_BIT), |
1247 | half_btf(COSPI_INV[56], stg5[35], COSPI_INV[8], stg5[60], INV_COS_BIT), |
1248 | half_btf(COSPI_INV[56], stg5[34], COSPI_INV[8], stg5[61], INV_COS_BIT), |
1249 | stg5[62], |
1250 | stg5[63], |
1251 | ]; |
1252 | |
1253 | // stage 7 |
1254 | let stg7 = [ |
1255 | clamp_value(stg6[0] + stg6[3], range), |
1256 | clamp_value(stg6[1] + stg6[2], range), |
1257 | clamp_value(stg6[1] - stg6[2], range), |
1258 | clamp_value(stg6[0] - stg6[3], range), |
1259 | stg6[4], |
1260 | half_btf(-COSPI_INV[32], stg6[5], COSPI_INV[32], stg6[6], INV_COS_BIT), |
1261 | half_btf(COSPI_INV[32], stg6[5], COSPI_INV[32], stg6[6], INV_COS_BIT), |
1262 | stg6[7], |
1263 | clamp_value(stg6[8] + stg6[11], range), |
1264 | clamp_value(stg6[9] + stg6[10], range), |
1265 | clamp_value(stg6[9] - stg6[10], range), |
1266 | clamp_value(stg6[8] - stg6[11], range), |
1267 | clamp_value(-stg6[12] + stg6[15], range), |
1268 | clamp_value(-stg6[13] + stg6[14], range), |
1269 | clamp_value(stg6[13] + stg6[14], range), |
1270 | clamp_value(stg6[12] + stg6[15], range), |
1271 | stg6[16], |
1272 | stg6[17], |
1273 | half_btf(-COSPI_INV[16], stg6[18], COSPI_INV[48], stg6[29], INV_COS_BIT), |
1274 | half_btf(-COSPI_INV[16], stg6[19], COSPI_INV[48], stg6[28], INV_COS_BIT), |
1275 | half_btf(-COSPI_INV[48], stg6[20], -COSPI_INV[16], stg6[27], INV_COS_BIT), |
1276 | half_btf(-COSPI_INV[48], stg6[21], -COSPI_INV[16], stg6[26], INV_COS_BIT), |
1277 | stg6[22], |
1278 | stg6[23], |
1279 | stg6[24], |
1280 | stg6[25], |
1281 | half_btf(-COSPI_INV[16], stg6[21], COSPI_INV[48], stg6[26], INV_COS_BIT), |
1282 | half_btf(-COSPI_INV[16], stg6[20], COSPI_INV[48], stg6[27], INV_COS_BIT), |
1283 | half_btf(COSPI_INV[48], stg6[19], COSPI_INV[16], stg6[28], INV_COS_BIT), |
1284 | half_btf(COSPI_INV[48], stg6[18], COSPI_INV[16], stg6[29], INV_COS_BIT), |
1285 | stg6[30], |
1286 | stg6[31], |
1287 | clamp_value(stg6[32] + stg6[39], range), |
1288 | clamp_value(stg6[33] + stg6[38], range), |
1289 | clamp_value(stg6[34] + stg6[37], range), |
1290 | clamp_value(stg6[35] + stg6[36], range), |
1291 | clamp_value(stg6[35] - stg6[36], range), |
1292 | clamp_value(stg6[34] - stg6[37], range), |
1293 | clamp_value(stg6[33] - stg6[38], range), |
1294 | clamp_value(stg6[32] - stg6[39], range), |
1295 | clamp_value(-stg6[40] + stg6[47], range), |
1296 | clamp_value(-stg6[41] + stg6[46], range), |
1297 | clamp_value(-stg6[42] + stg6[45], range), |
1298 | clamp_value(-stg6[43] + stg6[44], range), |
1299 | clamp_value(stg6[43] + stg6[44], range), |
1300 | clamp_value(stg6[42] + stg6[45], range), |
1301 | clamp_value(stg6[41] + stg6[46], range), |
1302 | clamp_value(stg6[40] + stg6[47], range), |
1303 | clamp_value(stg6[48] + stg6[55], range), |
1304 | clamp_value(stg6[49] + stg6[54], range), |
1305 | clamp_value(stg6[50] + stg6[53], range), |
1306 | clamp_value(stg6[51] + stg6[52], range), |
1307 | clamp_value(stg6[51] - stg6[52], range), |
1308 | clamp_value(stg6[50] - stg6[53], range), |
1309 | clamp_value(stg6[49] - stg6[54], range), |
1310 | clamp_value(stg6[48] - stg6[55], range), |
1311 | clamp_value(-stg6[56] + stg6[63], range), |
1312 | clamp_value(-stg6[57] + stg6[62], range), |
1313 | clamp_value(-stg6[58] + stg6[61], range), |
1314 | clamp_value(-stg6[59] + stg6[60], range), |
1315 | clamp_value(stg6[59] + stg6[60], range), |
1316 | clamp_value(stg6[58] + stg6[61], range), |
1317 | clamp_value(stg6[57] + stg6[62], range), |
1318 | clamp_value(stg6[56] + stg6[63], range), |
1319 | ]; |
1320 | |
1321 | // stage 8 |
1322 | let stg8 = [ |
1323 | clamp_value(stg7[0] + stg7[7], range), |
1324 | clamp_value(stg7[1] + stg7[6], range), |
1325 | clamp_value(stg7[2] + stg7[5], range), |
1326 | clamp_value(stg7[3] + stg7[4], range), |
1327 | clamp_value(stg7[3] - stg7[4], range), |
1328 | clamp_value(stg7[2] - stg7[5], range), |
1329 | clamp_value(stg7[1] - stg7[6], range), |
1330 | clamp_value(stg7[0] - stg7[7], range), |
1331 | stg7[8], |
1332 | stg7[9], |
1333 | half_btf(-COSPI_INV[32], stg7[10], COSPI_INV[32], stg7[13], INV_COS_BIT), |
1334 | half_btf(-COSPI_INV[32], stg7[11], COSPI_INV[32], stg7[12], INV_COS_BIT), |
1335 | half_btf(COSPI_INV[32], stg7[11], COSPI_INV[32], stg7[12], INV_COS_BIT), |
1336 | half_btf(COSPI_INV[32], stg7[10], COSPI_INV[32], stg7[13], INV_COS_BIT), |
1337 | stg7[14], |
1338 | stg7[15], |
1339 | clamp_value(stg7[16] + stg7[23], range), |
1340 | clamp_value(stg7[17] + stg7[22], range), |
1341 | clamp_value(stg7[18] + stg7[21], range), |
1342 | clamp_value(stg7[19] + stg7[20], range), |
1343 | clamp_value(stg7[19] - stg7[20], range), |
1344 | clamp_value(stg7[18] - stg7[21], range), |
1345 | clamp_value(stg7[17] - stg7[22], range), |
1346 | clamp_value(stg7[16] - stg7[23], range), |
1347 | clamp_value(-stg7[24] + stg7[31], range), |
1348 | clamp_value(-stg7[25] + stg7[30], range), |
1349 | clamp_value(-stg7[26] + stg7[29], range), |
1350 | clamp_value(-stg7[27] + stg7[28], range), |
1351 | clamp_value(stg7[27] + stg7[28], range), |
1352 | clamp_value(stg7[26] + stg7[29], range), |
1353 | clamp_value(stg7[25] + stg7[30], range), |
1354 | clamp_value(stg7[24] + stg7[31], range), |
1355 | stg7[32], |
1356 | stg7[33], |
1357 | stg7[34], |
1358 | stg7[35], |
1359 | half_btf(-COSPI_INV[16], stg7[36], COSPI_INV[48], stg7[59], INV_COS_BIT), |
1360 | half_btf(-COSPI_INV[16], stg7[37], COSPI_INV[48], stg7[58], INV_COS_BIT), |
1361 | half_btf(-COSPI_INV[16], stg7[38], COSPI_INV[48], stg7[57], INV_COS_BIT), |
1362 | half_btf(-COSPI_INV[16], stg7[39], COSPI_INV[48], stg7[56], INV_COS_BIT), |
1363 | half_btf(-COSPI_INV[48], stg7[40], -COSPI_INV[16], stg7[55], INV_COS_BIT), |
1364 | half_btf(-COSPI_INV[48], stg7[41], -COSPI_INV[16], stg7[54], INV_COS_BIT), |
1365 | half_btf(-COSPI_INV[48], stg7[42], -COSPI_INV[16], stg7[53], INV_COS_BIT), |
1366 | half_btf(-COSPI_INV[48], stg7[43], -COSPI_INV[16], stg7[52], INV_COS_BIT), |
1367 | stg7[44], |
1368 | stg7[45], |
1369 | stg7[46], |
1370 | stg7[47], |
1371 | stg7[48], |
1372 | stg7[49], |
1373 | stg7[50], |
1374 | stg7[51], |
1375 | half_btf(-COSPI_INV[16], stg7[43], COSPI_INV[48], stg7[52], INV_COS_BIT), |
1376 | half_btf(-COSPI_INV[16], stg7[42], COSPI_INV[48], stg7[53], INV_COS_BIT), |
1377 | half_btf(-COSPI_INV[16], stg7[41], COSPI_INV[48], stg7[54], INV_COS_BIT), |
1378 | half_btf(-COSPI_INV[16], stg7[40], COSPI_INV[48], stg7[55], INV_COS_BIT), |
1379 | half_btf(COSPI_INV[48], stg7[39], COSPI_INV[16], stg7[56], INV_COS_BIT), |
1380 | half_btf(COSPI_INV[48], stg7[38], COSPI_INV[16], stg7[57], INV_COS_BIT), |
1381 | half_btf(COSPI_INV[48], stg7[37], COSPI_INV[16], stg7[58], INV_COS_BIT), |
1382 | half_btf(COSPI_INV[48], stg7[36], COSPI_INV[16], stg7[59], INV_COS_BIT), |
1383 | stg7[60], |
1384 | stg7[61], |
1385 | stg7[62], |
1386 | stg7[63], |
1387 | ]; |
1388 | |
1389 | // stage 9 |
1390 | let stg9 = [ |
1391 | clamp_value(stg8[0] + stg8[15], range), |
1392 | clamp_value(stg8[1] + stg8[14], range), |
1393 | clamp_value(stg8[2] + stg8[13], range), |
1394 | clamp_value(stg8[3] + stg8[12], range), |
1395 | clamp_value(stg8[4] + stg8[11], range), |
1396 | clamp_value(stg8[5] + stg8[10], range), |
1397 | clamp_value(stg8[6] + stg8[9], range), |
1398 | clamp_value(stg8[7] + stg8[8], range), |
1399 | clamp_value(stg8[7] - stg8[8], range), |
1400 | clamp_value(stg8[6] - stg8[9], range), |
1401 | clamp_value(stg8[5] - stg8[10], range), |
1402 | clamp_value(stg8[4] - stg8[11], range), |
1403 | clamp_value(stg8[3] - stg8[12], range), |
1404 | clamp_value(stg8[2] - stg8[13], range), |
1405 | clamp_value(stg8[1] - stg8[14], range), |
1406 | clamp_value(stg8[0] - stg8[15], range), |
1407 | stg8[16], |
1408 | stg8[17], |
1409 | stg8[18], |
1410 | stg8[19], |
1411 | half_btf(-COSPI_INV[32], stg8[20], COSPI_INV[32], stg8[27], INV_COS_BIT), |
1412 | half_btf(-COSPI_INV[32], stg8[21], COSPI_INV[32], stg8[26], INV_COS_BIT), |
1413 | half_btf(-COSPI_INV[32], stg8[22], COSPI_INV[32], stg8[25], INV_COS_BIT), |
1414 | half_btf(-COSPI_INV[32], stg8[23], COSPI_INV[32], stg8[24], INV_COS_BIT), |
1415 | half_btf(COSPI_INV[32], stg8[23], COSPI_INV[32], stg8[24], INV_COS_BIT), |
1416 | half_btf(COSPI_INV[32], stg8[22], COSPI_INV[32], stg8[25], INV_COS_BIT), |
1417 | half_btf(COSPI_INV[32], stg8[21], COSPI_INV[32], stg8[26], INV_COS_BIT), |
1418 | half_btf(COSPI_INV[32], stg8[20], COSPI_INV[32], stg8[27], INV_COS_BIT), |
1419 | stg8[28], |
1420 | stg8[29], |
1421 | stg8[30], |
1422 | stg8[31], |
1423 | clamp_value(stg8[32] + stg8[47], range), |
1424 | clamp_value(stg8[33] + stg8[46], range), |
1425 | clamp_value(stg8[34] + stg8[45], range), |
1426 | clamp_value(stg8[35] + stg8[44], range), |
1427 | clamp_value(stg8[36] + stg8[43], range), |
1428 | clamp_value(stg8[37] + stg8[42], range), |
1429 | clamp_value(stg8[38] + stg8[41], range), |
1430 | clamp_value(stg8[39] + stg8[40], range), |
1431 | clamp_value(stg8[39] - stg8[40], range), |
1432 | clamp_value(stg8[38] - stg8[41], range), |
1433 | clamp_value(stg8[37] - stg8[42], range), |
1434 | clamp_value(stg8[36] - stg8[43], range), |
1435 | clamp_value(stg8[35] - stg8[44], range), |
1436 | clamp_value(stg8[34] - stg8[45], range), |
1437 | clamp_value(stg8[33] - stg8[46], range), |
1438 | clamp_value(stg8[32] - stg8[47], range), |
1439 | clamp_value(-stg8[48] + stg8[63], range), |
1440 | clamp_value(-stg8[49] + stg8[62], range), |
1441 | clamp_value(-stg8[50] + stg8[61], range), |
1442 | clamp_value(-stg8[51] + stg8[60], range), |
1443 | clamp_value(-stg8[52] + stg8[59], range), |
1444 | clamp_value(-stg8[53] + stg8[58], range), |
1445 | clamp_value(-stg8[54] + stg8[57], range), |
1446 | clamp_value(-stg8[55] + stg8[56], range), |
1447 | clamp_value(stg8[55] + stg8[56], range), |
1448 | clamp_value(stg8[54] + stg8[57], range), |
1449 | clamp_value(stg8[53] + stg8[58], range), |
1450 | clamp_value(stg8[52] + stg8[59], range), |
1451 | clamp_value(stg8[51] + stg8[60], range), |
1452 | clamp_value(stg8[50] + stg8[61], range), |
1453 | clamp_value(stg8[49] + stg8[62], range), |
1454 | clamp_value(stg8[48] + stg8[63], range), |
1455 | ]; |
1456 | |
1457 | // stage 10 |
1458 | let stg10 = [ |
1459 | clamp_value(stg9[0] + stg9[31], range), |
1460 | clamp_value(stg9[1] + stg9[30], range), |
1461 | clamp_value(stg9[2] + stg9[29], range), |
1462 | clamp_value(stg9[3] + stg9[28], range), |
1463 | clamp_value(stg9[4] + stg9[27], range), |
1464 | clamp_value(stg9[5] + stg9[26], range), |
1465 | clamp_value(stg9[6] + stg9[25], range), |
1466 | clamp_value(stg9[7] + stg9[24], range), |
1467 | clamp_value(stg9[8] + stg9[23], range), |
1468 | clamp_value(stg9[9] + stg9[22], range), |
1469 | clamp_value(stg9[10] + stg9[21], range), |
1470 | clamp_value(stg9[11] + stg9[20], range), |
1471 | clamp_value(stg9[12] + stg9[19], range), |
1472 | clamp_value(stg9[13] + stg9[18], range), |
1473 | clamp_value(stg9[14] + stg9[17], range), |
1474 | clamp_value(stg9[15] + stg9[16], range), |
1475 | clamp_value(stg9[15] - stg9[16], range), |
1476 | clamp_value(stg9[14] - stg9[17], range), |
1477 | clamp_value(stg9[13] - stg9[18], range), |
1478 | clamp_value(stg9[12] - stg9[19], range), |
1479 | clamp_value(stg9[11] - stg9[20], range), |
1480 | clamp_value(stg9[10] - stg9[21], range), |
1481 | clamp_value(stg9[9] - stg9[22], range), |
1482 | clamp_value(stg9[8] - stg9[23], range), |
1483 | clamp_value(stg9[7] - stg9[24], range), |
1484 | clamp_value(stg9[6] - stg9[25], range), |
1485 | clamp_value(stg9[5] - stg9[26], range), |
1486 | clamp_value(stg9[4] - stg9[27], range), |
1487 | clamp_value(stg9[3] - stg9[28], range), |
1488 | clamp_value(stg9[2] - stg9[29], range), |
1489 | clamp_value(stg9[1] - stg9[30], range), |
1490 | clamp_value(stg9[0] - stg9[31], range), |
1491 | stg9[32], |
1492 | stg9[33], |
1493 | stg9[34], |
1494 | stg9[35], |
1495 | stg9[36], |
1496 | stg9[37], |
1497 | stg9[38], |
1498 | stg9[39], |
1499 | half_btf(-COSPI_INV[32], stg9[40], COSPI_INV[32], stg9[55], INV_COS_BIT), |
1500 | half_btf(-COSPI_INV[32], stg9[41], COSPI_INV[32], stg9[54], INV_COS_BIT), |
1501 | half_btf(-COSPI_INV[32], stg9[42], COSPI_INV[32], stg9[53], INV_COS_BIT), |
1502 | half_btf(-COSPI_INV[32], stg9[43], COSPI_INV[32], stg9[52], INV_COS_BIT), |
1503 | half_btf(-COSPI_INV[32], stg9[44], COSPI_INV[32], stg9[51], INV_COS_BIT), |
1504 | half_btf(-COSPI_INV[32], stg9[45], COSPI_INV[32], stg9[50], INV_COS_BIT), |
1505 | half_btf(-COSPI_INV[32], stg9[46], COSPI_INV[32], stg9[49], INV_COS_BIT), |
1506 | half_btf(-COSPI_INV[32], stg9[47], COSPI_INV[32], stg9[48], INV_COS_BIT), |
1507 | half_btf(COSPI_INV[32], stg9[47], COSPI_INV[32], stg9[48], INV_COS_BIT), |
1508 | half_btf(COSPI_INV[32], stg9[46], COSPI_INV[32], stg9[49], INV_COS_BIT), |
1509 | half_btf(COSPI_INV[32], stg9[45], COSPI_INV[32], stg9[50], INV_COS_BIT), |
1510 | half_btf(COSPI_INV[32], stg9[44], COSPI_INV[32], stg9[51], INV_COS_BIT), |
1511 | half_btf(COSPI_INV[32], stg9[43], COSPI_INV[32], stg9[52], INV_COS_BIT), |
1512 | half_btf(COSPI_INV[32], stg9[42], COSPI_INV[32], stg9[53], INV_COS_BIT), |
1513 | half_btf(COSPI_INV[32], stg9[41], COSPI_INV[32], stg9[54], INV_COS_BIT), |
1514 | half_btf(COSPI_INV[32], stg9[40], COSPI_INV[32], stg9[55], INV_COS_BIT), |
1515 | stg9[56], |
1516 | stg9[57], |
1517 | stg9[58], |
1518 | stg9[59], |
1519 | stg9[60], |
1520 | stg9[61], |
1521 | stg9[62], |
1522 | stg9[63], |
1523 | ]; |
1524 | |
1525 | // stage 11 |
1526 | output[0] = clamp_value(stg10[0] + stg10[63], range); |
1527 | output[1] = clamp_value(stg10[1] + stg10[62], range); |
1528 | output[2] = clamp_value(stg10[2] + stg10[61], range); |
1529 | output[3] = clamp_value(stg10[3] + stg10[60], range); |
1530 | output[4] = clamp_value(stg10[4] + stg10[59], range); |
1531 | output[5] = clamp_value(stg10[5] + stg10[58], range); |
1532 | output[6] = clamp_value(stg10[6] + stg10[57], range); |
1533 | output[7] = clamp_value(stg10[7] + stg10[56], range); |
1534 | output[8] = clamp_value(stg10[8] + stg10[55], range); |
1535 | output[9] = clamp_value(stg10[9] + stg10[54], range); |
1536 | output[10] = clamp_value(stg10[10] + stg10[53], range); |
1537 | output[11] = clamp_value(stg10[11] + stg10[52], range); |
1538 | output[12] = clamp_value(stg10[12] + stg10[51], range); |
1539 | output[13] = clamp_value(stg10[13] + stg10[50], range); |
1540 | output[14] = clamp_value(stg10[14] + stg10[49], range); |
1541 | output[15] = clamp_value(stg10[15] + stg10[48], range); |
1542 | output[16] = clamp_value(stg10[16] + stg10[47], range); |
1543 | output[17] = clamp_value(stg10[17] + stg10[46], range); |
1544 | output[18] = clamp_value(stg10[18] + stg10[45], range); |
1545 | output[19] = clamp_value(stg10[19] + stg10[44], range); |
1546 | output[20] = clamp_value(stg10[20] + stg10[43], range); |
1547 | output[21] = clamp_value(stg10[21] + stg10[42], range); |
1548 | output[22] = clamp_value(stg10[22] + stg10[41], range); |
1549 | output[23] = clamp_value(stg10[23] + stg10[40], range); |
1550 | output[24] = clamp_value(stg10[24] + stg10[39], range); |
1551 | output[25] = clamp_value(stg10[25] + stg10[38], range); |
1552 | output[26] = clamp_value(stg10[26] + stg10[37], range); |
1553 | output[27] = clamp_value(stg10[27] + stg10[36], range); |
1554 | output[28] = clamp_value(stg10[28] + stg10[35], range); |
1555 | output[29] = clamp_value(stg10[29] + stg10[34], range); |
1556 | output[30] = clamp_value(stg10[30] + stg10[33], range); |
1557 | output[31] = clamp_value(stg10[31] + stg10[32], range); |
1558 | output[32] = clamp_value(stg10[31] - stg10[32], range); |
1559 | output[33] = clamp_value(stg10[30] - stg10[33], range); |
1560 | output[34] = clamp_value(stg10[29] - stg10[34], range); |
1561 | output[35] = clamp_value(stg10[28] - stg10[35], range); |
1562 | output[36] = clamp_value(stg10[27] - stg10[36], range); |
1563 | output[37] = clamp_value(stg10[26] - stg10[37], range); |
1564 | output[38] = clamp_value(stg10[25] - stg10[38], range); |
1565 | output[39] = clamp_value(stg10[24] - stg10[39], range); |
1566 | output[40] = clamp_value(stg10[23] - stg10[40], range); |
1567 | output[41] = clamp_value(stg10[22] - stg10[41], range); |
1568 | output[42] = clamp_value(stg10[21] - stg10[42], range); |
1569 | output[43] = clamp_value(stg10[20] - stg10[43], range); |
1570 | output[44] = clamp_value(stg10[19] - stg10[44], range); |
1571 | output[45] = clamp_value(stg10[18] - stg10[45], range); |
1572 | output[46] = clamp_value(stg10[17] - stg10[46], range); |
1573 | output[47] = clamp_value(stg10[16] - stg10[47], range); |
1574 | output[48] = clamp_value(stg10[15] - stg10[48], range); |
1575 | output[49] = clamp_value(stg10[14] - stg10[49], range); |
1576 | output[50] = clamp_value(stg10[13] - stg10[50], range); |
1577 | output[51] = clamp_value(stg10[12] - stg10[51], range); |
1578 | output[52] = clamp_value(stg10[11] - stg10[52], range); |
1579 | output[53] = clamp_value(stg10[10] - stg10[53], range); |
1580 | output[54] = clamp_value(stg10[9] - stg10[54], range); |
1581 | output[55] = clamp_value(stg10[8] - stg10[55], range); |
1582 | output[56] = clamp_value(stg10[7] - stg10[56], range); |
1583 | output[57] = clamp_value(stg10[6] - stg10[57], range); |
1584 | output[58] = clamp_value(stg10[5] - stg10[58], range); |
1585 | output[59] = clamp_value(stg10[4] - stg10[59], range); |
1586 | output[60] = clamp_value(stg10[3] - stg10[60], range); |
1587 | output[61] = clamp_value(stg10[2] - stg10[61], range); |
1588 | output[62] = clamp_value(stg10[1] - stg10[62], range); |
1589 | output[63] = clamp_value(stg10[0] - stg10[63], range); |
1590 | } |
1591 | |
1592 | type InvTxfmFn = fn(input: &[i32], output: &mut [i32], range: usize); |
1593 | |
1594 | static INV_TXFM_FNS: [[InvTxfmFn; 5]; 5] = [ |
1595 | [av1_idct4, av1_idct8, av1_idct16, av1_idct32, av1_idct64], |
1596 | [ |
1597 | av1_iadst4, |
1598 | av1_iadst8, |
1599 | av1_iadst16, |
1600 | |_, _, _| unimplemented!(), |
1601 | |_, _, _| unimplemented!(), |
1602 | ], |
1603 | [ |
1604 | av1_iflipadst4, |
1605 | av1_iflipadst8, |
1606 | av1_iflipadst16, |
1607 | |_, _, _| unimplemented!(), |
1608 | |_, _, _| unimplemented!(), |
1609 | ], |
1610 | [ |
1611 | av1_iidentity4, |
1612 | av1_iidentity8, |
1613 | av1_iidentity16, |
1614 | av1_iidentity32, |
1615 | |_, _, _| unimplemented!(), |
1616 | ], |
1617 | [ |
1618 | av1_iwht4, |
1619 | |_, _, _| unimplemented!(), |
1620 | |_, _, _| unimplemented!(), |
1621 | |_, _, _| unimplemented!(), |
1622 | |_, _, _| unimplemented!(), |
1623 | ], |
1624 | ]; |
1625 | |
1626 | pub(crate) mod rust { |
1627 | use super::*; |
1628 | use crate::cpu_features::CpuFeatureLevel; |
1629 | use crate::util::clamp; |
1630 | |
1631 | use simd_helpers::cold_for_target_arch; |
1632 | use std::cmp; |
1633 | |
1634 | #[cold_for_target_arch ("x86_64" , "aarch64" )] |
1635 | pub fn inverse_transform_add<T: Pixel>( |
1636 | input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: u16, |
1637 | tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel, |
1638 | ) { |
1639 | let width: usize = tx_size.width(); |
1640 | let height: usize = tx_size.height(); |
1641 | |
1642 | // Only use at most 32 columns and 32 rows of input coefficients. |
1643 | let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)]; |
1644 | |
1645 | // For 64 point transforms, rely on the last 32 columns being initialized |
1646 | // to zero for filling out missing input coeffs. |
1647 | let mut buffer = vec![0i32; width * height].into_boxed_slice(); |
1648 | let rect_type = get_rect_tx_log_ratio(width, height); |
1649 | let tx_types_1d = get_1d_tx_types(tx_type); |
1650 | let lossless = tx_type == TxType::WHT_WHT; |
1651 | |
1652 | // perform inv txfm on every row |
1653 | let range = bd + 8; |
1654 | let txfm_fn = INV_TXFM_FNS[tx_types_1d.1 as usize][ILog::ilog(width) - 3]; |
1655 | // 64 point transforms only signal 32 coeffs. We only take chunks of 32 |
1656 | // and skip over the last 32 transforms here. |
1657 | for (r, buffer_slice) in (0..height.min(32)).zip(buffer.chunks_mut(width)) |
1658 | { |
1659 | // For 64 point transforms, rely on the last 32 elements being |
1660 | // initialized to zero for filling out the missing coeffs. |
1661 | let mut temp_in: [i32; 64] = [0; 64]; |
1662 | for (raw, clamped) in input[r..] |
1663 | .iter() |
1664 | .map(|a| i32::cast_from(*a)) |
1665 | .step_by(height.min(32)) |
1666 | .zip(temp_in.iter_mut()) |
1667 | { |
1668 | let val = if rect_type.abs() == 1 { |
1669 | round_shift(raw * INV_SQRT2, SQRT2_BITS) |
1670 | } else if lossless { |
1671 | raw >> 2 |
1672 | } else { |
1673 | raw |
1674 | }; |
1675 | *clamped = clamp_value(val, range); |
1676 | } |
1677 | txfm_fn(&temp_in, buffer_slice, range); |
1678 | } |
1679 | |
1680 | // perform inv txfm on every col |
1681 | let range = cmp::max(bd + 6, 16); |
1682 | let txfm_fn = INV_TXFM_FNS[tx_types_1d.0 as usize][ILog::ilog(height) - 3]; |
1683 | for c in 0..width { |
1684 | let mut temp_in: [i32; 64] = [0; 64]; |
1685 | let mut temp_out: [i32; 64] = [0; 64]; |
1686 | for (raw, clamped) in |
1687 | buffer[c..].iter().step_by(width).zip(temp_in.iter_mut()) |
1688 | { |
1689 | *clamped = clamp_value( |
1690 | round_shift(*raw, INV_INTERMEDIATE_SHIFTS[tx_size as usize]), |
1691 | range, |
1692 | ); |
1693 | } |
1694 | txfm_fn(&temp_in, &mut temp_out, range); |
1695 | for (temp, out) in temp_out |
1696 | .iter() |
1697 | .zip(output.rows_iter_mut().map(|row| &mut row[c]).take(height)) |
1698 | { |
1699 | let v: i32 = (*out).as_(); |
1700 | let r = if lossless { *temp } else { round_shift(*temp, 4) }; |
1701 | let v = clamp(v + r, 0, (1 << bd) - 1); |
1702 | *out = T::cast_from(v); |
1703 | } |
1704 | } |
1705 | } |
1706 | |
1707 | /* From AV1 Spec. |
1708 | https://aomediacodec.github.io/av1-spec/#2d-inverse-transform-process |
1709 | */ |
1710 | const INV_INTERMEDIATE_SHIFTS: [usize; TxSize::TX_SIZES_ALL] = |
1711 | [0, 1, 2, 2, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]; |
1712 | } |
1713 | |