1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use super::TxSize;
11use super::TxType;
12
13use super::HTX_TAB;
14use super::VTX_TAB;
15
16pub type TxfmShift = [i8; 3];
17pub type TxfmShifts = [TxfmShift; 3];
18
19// Shift so that the first shift is 4 - (bd - 8) to align with the initial
20// design of daala_tx
21// 8 bit 4x4 is an exception and only shifts by 3 in the first stage
22const FWD_SHIFT_4X4: TxfmShifts = [[3, 0, 0], [2, 0, 1], [0, 0, 3]];
23const FWD_SHIFT_8X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
24const FWD_SHIFT_16X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
25const FWD_SHIFT_32X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
26const FWD_SHIFT_64X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
27const FWD_SHIFT_4X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
28const FWD_SHIFT_8X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
29const FWD_SHIFT_8X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
30const FWD_SHIFT_16X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
31const FWD_SHIFT_16X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
32const FWD_SHIFT_32X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
33const FWD_SHIFT_32X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
34const FWD_SHIFT_64X32: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
35const FWD_SHIFT_4X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
36const FWD_SHIFT_16X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
37const FWD_SHIFT_8X32: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
38const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
39const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
40const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
41
42const FWD_SHIFT_4X4_WHT: TxfmShift = [0, 0, 2];
43
44pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [
45 FWD_SHIFT_4X4,
46 FWD_SHIFT_8X8,
47 FWD_SHIFT_16X16,
48 FWD_SHIFT_32X32,
49 FWD_SHIFT_64X64,
50 FWD_SHIFT_4X8,
51 FWD_SHIFT_8X4,
52 FWD_SHIFT_8X16,
53 FWD_SHIFT_16X8,
54 FWD_SHIFT_16X32,
55 FWD_SHIFT_32X16,
56 FWD_SHIFT_32X64,
57 FWD_SHIFT_64X32,
58 FWD_SHIFT_4X16,
59 FWD_SHIFT_16X4,
60 FWD_SHIFT_8X32,
61 FWD_SHIFT_32X8,
62 FWD_SHIFT_16X64,
63 FWD_SHIFT_64X16,
64];
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum TxfmType {
68 DCT4,
69 DCT8,
70 DCT16,
71 DCT32,
72 DCT64,
73 ADST4,
74 ADST8,
75 ADST16,
76 Identity4,
77 Identity8,
78 Identity16,
79 Identity32,
80 WHT4,
81}
82
83impl TxfmType {
84 const TX_TYPES_1D: usize = 5;
85 const AV1_TXFM_TYPE_LS: [[Option<TxfmType>; Self::TX_TYPES_1D]; 5] = [
86 [
87 Some(TxfmType::DCT4),
88 Some(TxfmType::ADST4),
89 Some(TxfmType::ADST4),
90 Some(TxfmType::Identity4),
91 Some(TxfmType::WHT4),
92 ],
93 [
94 Some(TxfmType::DCT8),
95 Some(TxfmType::ADST8),
96 Some(TxfmType::ADST8),
97 Some(TxfmType::Identity8),
98 None,
99 ],
100 [
101 Some(TxfmType::DCT16),
102 Some(TxfmType::ADST16),
103 Some(TxfmType::ADST16),
104 Some(TxfmType::Identity16),
105 None,
106 ],
107 [Some(TxfmType::DCT32), None, None, Some(TxfmType::Identity32), None],
108 [Some(TxfmType::DCT64), None, None, None, None],
109 ];
110}
111
112#[derive(Debug, Clone, Copy)]
113pub struct Txfm2DFlipCfg {
114 pub tx_size: TxSize,
115 /// Flip upside down
116 pub ud_flip: bool,
117 /// Flip left to right
118 pub lr_flip: bool,
119 pub shift: TxfmShift,
120 pub txfm_type_col: TxfmType,
121 pub txfm_type_row: TxfmType,
122}
123
124impl Txfm2DFlipCfg {
125 /// # Panics
126 ///
127 /// - If called with an invalid combination of `tx_size` and `tx_type`
128 pub fn fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self {
129 let tx_type_1d_col = VTX_TAB[tx_type as usize];
130 let tx_type_1d_row = HTX_TAB[tx_type as usize];
131 let txw_idx = tx_size.width_index();
132 let txh_idx = tx_size.height_index();
133 let txfm_type_col =
134 TxfmType::AV1_TXFM_TYPE_LS[txh_idx][tx_type_1d_col as usize].unwrap();
135 let txfm_type_row =
136 TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize].unwrap();
137 let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type);
138 let shift = if tx_type == TxType::WHT_WHT {
139 FWD_SHIFT_4X4_WHT
140 } else {
141 FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2]
142 };
143
144 Txfm2DFlipCfg {
145 tx_size,
146 ud_flip,
147 lr_flip,
148 shift,
149 txfm_type_col,
150 txfm_type_row,
151 }
152 }
153
154 /// Determine the flip config, returning `(ud_flip, lr_flip)`
155 const fn get_flip_cfg(tx_type: TxType) -> (bool, bool) {
156 use self::TxType::*;
157 match tx_type {
158 DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
159 | V_ADST | H_ADST | WHT_WHT => (false, false),
160 FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
161 DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
162 FLIPADST_FLIPADST => (true, true),
163 }
164 }
165}
166
167macro_rules! store_coeffs {
168 ( $arr:expr, $( $x:expr ),* ) => {
169 {
170 let mut i: i32 = -1;
171 $(
172 i += 1;
173 $arr[i as usize] = $x;
174 )*
175 }
176 };
177}
178
179macro_rules! impl_1d_tx {
180() => {
181 impl_1d_tx! {allow(unused_attributes), }
182};
183
184($m:meta, $($s:ident),*) => {
185 pub trait TxOperations: Copy {
186 $($s)* fn zero() -> Self;
187
188 $($s)* fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self;
189 $($s)* fn rshift1(self) -> Self;
190 $($s)* fn add(self, b: Self) -> Self;
191 $($s)* fn sub(self, b: Self) -> Self;
192 $($s)* fn add_avg(self, b: Self) -> Self;
193 $($s)* fn sub_avg(self, b: Self) -> Self;
194
195 $($s)* fn copy_fn(self) -> Self {
196 self
197 }
198 }
199
200 #[inline]
201 fn get_func(t: TxfmType) -> TxfmFunc {
202 use self::TxfmType::*;
203 match t {
204 DCT4 => daala_fdct4,
205 DCT8 => daala_fdct8,
206 DCT16 => daala_fdct16,
207 DCT32 => daala_fdct32,
208 DCT64 => daala_fdct64,
209 ADST4 => daala_fdst_vii_4,
210 ADST8 => daala_fdst8,
211 ADST16 => daala_fdst16,
212 Identity4 => fidentity,
213 Identity8 => fidentity,
214 Identity16 => fidentity,
215 Identity32 => fidentity,
216 WHT4 => fwht4,
217 }
218 }
219
220 trait RotateKernelPi4<T: TxOperations> {
221 const ADD: $($s)* fn(T, T) -> T;
222 const SUB: $($s)* fn(T, T) -> T;
223
224 #[$m]
225 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32>(p0: T, p1: T, m: (i32, i32)) -> (T, T) {
226 let t = Self::ADD(p1, p0);
227 let (a, out0) = (p0.tx_mul::<SHIFT0>(m.0), t.tx_mul::<SHIFT1>(m.1));
228 let out1 = Self::SUB(a, out0);
229 (out0, out1)
230 }
231}
232
233struct RotatePi4Add;
234struct RotatePi4AddAvg;
235struct RotatePi4Sub;
236struct RotatePi4SubAvg;
237
238impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Add {
239 const ADD: $($s)* fn(T, T) -> T = T::add;
240 const SUB: $($s)* fn(T, T) -> T = T::sub;
241}
242
243impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4AddAvg {
244 const ADD: $($s)* fn(T, T) -> T = T::add_avg;
245 const SUB: $($s)* fn(T, T) -> T = T::sub;
246}
247
248impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Sub {
249 const ADD: $($s)* fn(T, T) -> T = T::sub;
250 const SUB: $($s)* fn(T, T) -> T = T::add;
251}
252
253impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4SubAvg {
254 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
255 const SUB: $($s)* fn(T, T) -> T = T::add;
256}
257
258trait RotateKernel<T: TxOperations> {
259 const ADD: $($s)* fn(T, T) -> T;
260 const SUB: $($s)* fn(T, T) -> T;
261 const SHIFT: $($s)* fn(T) -> T;
262
263 #[$m]
264 $($s)* fn half_kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(
265 p0: (T, T), p1: T, m: (i32, i32, i32),
266 ) -> (T, T) {
267 let t = Self::ADD(p1, p0.0);
268 let (a, b, c) = (p0.1.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
269 let out0 = b.add(c);
270 let shifted = Self::SHIFT(c);
271 let out1 = Self::SUB(a, shifted);
272 (out0, out1)
273 }
274
275 #[$m]
276 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
277 Self::half_kernel::<SHIFT0, SHIFT1, SHIFT2>((p0, p0), p1, m)
278 }
279}
280
281trait RotateKernelNeg<T: TxOperations> {
282 const ADD: $($s)* fn(T, T) -> T;
283
284 #[$m]
285 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
286 let t = Self::ADD(p0, p1);
287 let (a, b, c) = (p0.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
288 let out0 = b.sub(c);
289 let out1 = c.sub(a);
290 (out0, out1)
291 }
292}
293
294struct RotateAdd;
295struct RotateAddAvg;
296struct RotateAddShift;
297struct RotateSub;
298struct RotateSubAvg;
299struct RotateSubShift;
300struct RotateNeg;
301struct RotateNegAvg;
302
303impl<T: TxOperations> RotateKernel<T> for RotateAdd {
304 const ADD: $($s)* fn(T, T) -> T = T::add;
305 const SUB: $($s)* fn(T, T) -> T = T::sub;
306 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
307}
308
309impl<T: TxOperations> RotateKernel<T> for RotateAddAvg {
310 const ADD: $($s)* fn(T, T) -> T = T::add_avg;
311 const SUB: $($s)* fn(T, T) -> T = T::sub;
312 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
313}
314
315impl<T: TxOperations> RotateKernel<T> for RotateAddShift {
316 const ADD: $($s)* fn(T, T) -> T = T::add;
317 const SUB: $($s)* fn(T, T) -> T = T::sub;
318 const SHIFT: $($s)* fn(T) -> T = T::rshift1;
319}
320
321impl<T: TxOperations> RotateKernel<T> for RotateSub {
322 const ADD: $($s)* fn(T, T) -> T = T::sub;
323 const SUB: $($s)* fn(T, T) -> T = T::add;
324 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
325}
326
327impl<T: TxOperations> RotateKernel<T> for RotateSubAvg {
328 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
329 const SUB: $($s)* fn(T, T) -> T = T::add;
330 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
331}
332
333impl<T: TxOperations> RotateKernel<T> for RotateSubShift {
334 const ADD: $($s)* fn(T, T) -> T = T::sub;
335 const SUB: $($s)* fn(T, T) -> T = T::add;
336 const SHIFT: $($s)* fn(T) -> T = T::rshift1;
337}
338
339impl<T: TxOperations> RotateKernelNeg<T> for RotateNeg {
340 const ADD: $($s)* fn(T, T) -> T = T::sub;
341}
342
343impl<T: TxOperations> RotateKernelNeg<T> for RotateNegAvg {
344 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
345}
346
347#[inline]
348#[$m]
349$($s)* fn butterfly_add<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
350 let p0 = p0.add(p1);
351 let p0h = p0.rshift1();
352 let p1h = p1.sub(p0h);
353 ((p0h, p0), p1h)
354}
355
356#[inline]
357#[$m]
358$($s)* fn butterfly_sub<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
359 let p0 = p0.sub(p1);
360 let p0h = p0.rshift1();
361 let p1h = p1.add(p0h);
362 ((p0h, p0), p1h)
363}
364
365#[inline]
366#[$m]
367$($s)* fn butterfly_neg<T: TxOperations>(p0: T, p1: T) -> (T, (T, T)) {
368 let p1 = p0.sub(p1);
369 let p1h = p1.rshift1();
370 let p0h = p0.sub(p1h);
371 (p0h, (p1h, p1))
372}
373
374#[inline]
375#[$m]
376$($s)* fn butterfly_add_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
377 let p1 = p1h.add(p0.0);
378 let p0 = p0.1.sub(p1);
379 (p0, p1)
380}
381
382#[inline]
383#[$m]
384$($s)* fn butterfly_sub_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
385 let p1 = p1h.sub(p0.0);
386 let p0 = p0.1.add(p1);
387 (p0, p1)
388}
389
390#[inline]
391#[$m]
392$($s)* fn butterfly_neg_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
393 let p0 = p0h.add(p1.0);
394 let p1 = p0.sub(p1.1);
395 (p0, p1)
396}
397
398#[$m]
399$($s)* fn daala_fdct_ii_2_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
400 butterfly_neg_asym(p0h, p1)
401}
402
403#[$m]
404$($s)* fn daala_fdst_iv_2_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
405 // 473/512 = (Sin[3*Pi/8] + Cos[3*Pi/8])/Sqrt[2] = 0.9238795325112867
406 // 3135/4096 = (Sin[3*Pi/8] - Cos[3*Pi/8])*Sqrt[2] = 0.7653668647301795
407 // 4433/8192 = Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971
408 RotateAdd::half_kernel::<9, 12, 13>(p0, p1h, (473, 3135, 4433))
409}
410
411#[$m]
412$($s)* fn daala_fdct_ii_4<T: TxOperations>(
413 q0: T, q1: T, q2: T, q3: T, output: &mut [T],
414) {
415 // +/- Butterflies with asymmetric output.
416 let (q0h, q3) = butterfly_neg(q0, q3);
417 let (q1, q2h) = butterfly_add(q1, q2);
418
419 // Embedded 2-point transforms with asymmetric input.
420 let (q0, q1) = daala_fdct_ii_2_asym(q0h, q1);
421 let (q3, q2) = daala_fdst_iv_2_asym(q3, q2h);
422
423 store_coeffs!(output, q0, q1, q2, q3);
424}
425
426#[$m]
427$($s)* fn daala_fdct4<T: TxOperations>(coeffs: &mut [T]) {
428 assert!(coeffs.len() >= 4);
429 let mut temp_out: [T; 4] = [T::zero(); 4];
430 daala_fdct_ii_4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], &mut temp_out);
431
432 coeffs[0] = temp_out[0];
433 coeffs[1] = temp_out[2];
434 coeffs[2] = temp_out[1];
435 coeffs[3] = temp_out[3];
436}
437
438#[$m]
439$($s)* fn daala_fdst_vii_4<T: TxOperations>(coeffs: &mut [T]) {
440 assert!(coeffs.len() >= 4);
441
442 let q0 = coeffs[0];
443 let q1 = coeffs[1];
444 let q2 = coeffs[2];
445 let q3 = coeffs[3];
446 let t0 = q1.add(q3);
447 // t1 = (q0 + q1 - q3)/2
448 let t1 = q1.add(q0.sub_avg(t0));
449 let t2 = q0.sub(q1);
450 let t3 = q2;
451 let t4 = q0.add(q3);
452 // 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360
453 let t0 = t0.tx_mul::<14>(7021);
454 // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252
455 let t1 = t1.tx_mul::<15>(37837);
456 // 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139
457 let t2 = t2.tx_mul::<15>(21513);
458 // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252
459 let t3 = t3.tx_mul::<15>(37837);
460 // 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779
461 let t4 = t4.tx_mul::<11>(467);
462 let t3h = t3.rshift1();
463 let u4 = t4.add(t3h);
464 coeffs[0] = t0.add(u4);
465 coeffs[1] = t1;
466 coeffs[2] = t0.add(t2.sub(t3h));
467 coeffs[3] = t2.add(t3.sub(u4));
468}
469
470#[$m]
471$($s)* fn daala_fdct_ii_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
472 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
473 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
474 let (p1, p0) = RotatePi4SubAvg::kernel::<13, 13>(p1, p0, (11585, 11585));
475 (p0, p1)
476}
477
478#[$m]
479$($s)* fn daala_fdst_iv_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
480 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
481 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461971
482 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
483 RotateAddAvg::kernel::<13, 14, 12>(p0, p1, (10703, 8867, 3135))
484}
485
486#[$m]
487$($s)* fn daala_fdct_ii_4_asym<T: TxOperations>(
488 q0h: T, q1: (T, T), q2h: T, q3: (T, T), output: &mut [T],
489) {
490 // +/- Butterflies with asymmetric input.
491 let (q0, q3) = butterfly_neg_asym(q0h, q3);
492 let (q1, q2) = butterfly_sub_asym(q1, q2h);
493
494 // Embedded 2-point orthonormal transforms.
495 let (q0, q1) = daala_fdct_ii_2(q0, q1);
496 let (q3, q2) = daala_fdst_iv_2(q3, q2);
497
498 store_coeffs!(output, q0, q1, q2, q3);
499}
500
501#[$m]
502$($s)* fn daala_fdst_iv_4_asym<T: TxOperations>(
503 q0: (T, T), q1h: T, q2: (T, T), q3h: T, output: &mut [T],
504) {
505 // Stage 0
506 // 9633/16384 = (Sin[7*Pi/16] + Cos[7*Pi/16])/2 = 0.5879378012096793
507 // 12873/8192 = (Sin[7*Pi/16] - Cos[7*Pi/16])*2 = 1.5713899167742045
508 // 12785/32768 = Cos[7*Pi/16]*2 = 0.3901806440322565
509 let (q0, q3) = RotateAddShift::half_kernel::<14, 13, 15>(
510 q0,
511 q3h,
512 (9633, 12873, 12785),
513 );
514 // 11363/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/2 = 0.6935199226610738
515 // 18081/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*2 = 0.5517987585658861
516 // 4551/4096 = Cos[5*Pi/16]*2 = 1.1111404660392044
517 let (q2, q1) = RotateSubShift::half_kernel::<14, 15, 12>(
518 q2,
519 q1h,
520 (11363, 18081, 4551),
521 );
522
523 // Stage 1
524 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
525 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
526
527 // Stage 2
528 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
529 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
530 let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
531
532 store_coeffs!(output, q0, q1, q2, q3);
533}
534
535#[$m]
536$($s)* fn daala_fdct_ii_8<T: TxOperations>(
537 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
538) {
539 // +/- Butterflies with asymmetric output.
540 let (r0h, r7) = butterfly_neg(r0, r7);
541 let (r1, r6h) = butterfly_add(r1, r6);
542 let (r2h, r5) = butterfly_neg(r2, r5);
543 let (r3, r4h) = butterfly_add(r3, r4);
544
545 // Embedded 4-point transforms with asymmetric input.
546 daala_fdct_ii_4_asym(r0h, r1, r2h, r3, &mut output[0..4]);
547 daala_fdst_iv_4_asym(r7, r6h, r5, r4h, &mut output[4..8]);
548 output[4..8].reverse();
549}
550
551#[$m]
552$($s)* fn daala_fdct8<T: TxOperations>(coeffs: &mut [T]) {
553 assert!(coeffs.len() >= 8);
554 let mut temp_out: [T; 8] = [T::zero(); 8];
555 daala_fdct_ii_8(
556 coeffs[0],
557 coeffs[1],
558 coeffs[2],
559 coeffs[3],
560 coeffs[4],
561 coeffs[5],
562 coeffs[6],
563 coeffs[7],
564 &mut temp_out,
565 );
566
567 coeffs[0] = temp_out[0];
568 coeffs[1] = temp_out[4];
569 coeffs[2] = temp_out[2];
570 coeffs[3] = temp_out[6];
571 coeffs[4] = temp_out[1];
572 coeffs[5] = temp_out[5];
573 coeffs[6] = temp_out[3];
574 coeffs[7] = temp_out[7];
575}
576
577#[$m]
578$($s)* fn daala_fdst_iv_8<T: TxOperations>(
579 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
580) {
581 // Stage 0
582 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576
583 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363
584 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606
585 let (r0, r7) =
586 RotateAdd::kernel::<14, 14, 13>(r0, r7, (17911, 14699, 803));
587 // 20435/16384 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.24722501298667123
588 // 21845/32768 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.66665565847774650
589 // 1189/4096 = Cos[13*Pi/32] = 0.29028467725446233
590 let (r6, r1) =
591 RotateSub::kernel::<14, 15, 12>(r6, r1, (20435, 21845, 1189));
592 // 22173/16384 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526
593 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574
594 // 15447/32768 = Cos[11*Pi/32] = 0.47139673682599764
595 let (r2, r5) =
596 RotateAdd::kernel::<14, 13, 15>(r2, r5, (22173, 3363, 15447));
597 // 23059/16384 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826
598 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915
599 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455
600 let (r4, r3) =
601 RotateSub::kernel::<14, 14, 13>(r4, r3, (23059, 2271, 5197));
602
603 // Stage 1
604 let (r0, r3h) = butterfly_add(r0, r3);
605 let (r2, r1h) = butterfly_sub(r2, r1);
606 let (r5, r6h) = butterfly_add(r5, r6);
607 let (r7, r4h) = butterfly_sub(r7, r4);
608
609 // Stage 2
610 let (r7, r6) = butterfly_add_asym(r7, r6h);
611 let (r5, r3) = butterfly_add_asym(r5, r3h);
612 let (r2, r4) = butterfly_add_asym(r2, r4h);
613 let (r0, r1) = butterfly_sub_asym(r0, r1h);
614
615 // Stage 3
616 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
617 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
618 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
619 let (r3, r4) =
620 RotateSubAvg::kernel::<13, 14, 12>(r3, r4, (10703, 8867, 3135));
621 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
622 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
623 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
624 let (r2, r5) =
625 RotateNegAvg::kernel::<13, 14, 12>(r2, r5, (10703, 8867, 3135));
626 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
627 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
628 let (r1, r6) = RotatePi4SubAvg::kernel::<13, 13>(r1, r6, (11585, 11585));
629
630 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
631}
632
633#[$m]
634$($s)* fn daala_fdst8<T: TxOperations>(coeffs: &mut [T]) {
635 assert!(coeffs.len() >= 8);
636 let mut temp_out: [T; 8] = [T::zero(); 8];
637 daala_fdst_iv_8(
638 coeffs[0],
639 coeffs[1],
640 coeffs[2],
641 coeffs[3],
642 coeffs[4],
643 coeffs[5],
644 coeffs[6],
645 coeffs[7],
646 &mut temp_out,
647 );
648
649 coeffs[0] = temp_out[0];
650 coeffs[1] = temp_out[4];
651 coeffs[2] = temp_out[2];
652 coeffs[3] = temp_out[6];
653 coeffs[4] = temp_out[1];
654 coeffs[5] = temp_out[5];
655 coeffs[6] = temp_out[3];
656 coeffs[7] = temp_out[7];
657}
658
659#[$m]
660$($s)* fn daala_fdst_iv_4<T: TxOperations>(
661 q0: T, q1: T, q2: T, q3: T, output: &mut [T],
662) {
663 // Stage 0
664 // 13623/16384 = (Sin[7*Pi/16] + Cos[7*Pi/16])/Sqrt[2] = 0.831469612302545
665 // 4551/4096 = (Sin[7*Pi/16] - Cos[7*Pi/16])*Sqrt[2] = 1.111140466039204
666 // 9041/32768 = Cos[7*Pi/16]*Sqrt[2] = 0.275899379282943
667 let (q0, q3) =
668 RotateAddShift::kernel::<14, 12, 11>(q0, q3, (13623, 4551, 565));
669 // 16069/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/Sqrt[2] = 0.9807852804032304
670 // 12785/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*Sqrt[2] = 0.3901806440322566
671 // 1609/2048 = Cos[5*Pi/16]*Sqrt[2] = 0.7856949583871021
672 let (q2, q1) =
673 RotateSubShift::kernel::<14, 15, 11>(q2, q1, (16069, 12785, 1609));
674
675 // Stage 1
676 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
677 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
678
679 // Stage 2
680 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
681 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
682 let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
683
684 store_coeffs!(output, q0, q1, q2, q3);
685}
686
687
688#[$m]
689$($s)* fn daala_fdct_ii_8_asym<T: TxOperations>(
690 r0h: T, r1: (T, T), r2h: T, r3: (T, T), r4h: T, r5: (T, T), r6h: T,
691 r7: (T, T), output: &mut [T],
692) {
693 // +/- Butterflies with asymmetric input.
694 let (r0, r7) = butterfly_neg_asym(r0h, r7);
695 let (r1, r6) = butterfly_sub_asym(r1, r6h);
696 let (r2, r5) = butterfly_neg_asym(r2h, r5);
697 let (r3, r4) = butterfly_sub_asym(r3, r4h);
698
699 // Embedded 4-point orthonormal transforms.
700 daala_fdct_ii_4(r0, r1, r2, r3, &mut output[0..4]);
701 daala_fdst_iv_4(r7, r6, r5, r4, &mut output[4..8]);
702 output[4..8].reverse();
703}
704
705#[$m]
706$($s)* fn daala_fdst_iv_8_asym<T: TxOperations>(
707 r0: (T, T), r1h: T, r2: (T, T), r3h: T, r4: (T, T), r5h: T, r6: (T, T),
708 r7h: T, output: &mut [T],
709) {
710 // Stage 0
711 // 12665/16384 = (Sin[15*Pi/32] + Cos[15*Pi/32])/Sqrt[2] = 0.77301045336274
712 // 5197/4096 = (Sin[15*Pi/32] - Cos[15*Pi/32])*Sqrt[2] = 1.26878656832729
713 // 2271/16384 = Cos[15*Pi/32]*Sqrt[2] = 0.13861716919909
714 let (r0, r7) =
715 RotateAdd::half_kernel::<14, 12, 14>(r0, r7h, (12665, 5197, 2271));
716 // 14449/16384 = Sin[13*Pi/32] + Cos[13*Pi/32])/Sqrt[2] = 0.881921264348355
717 // 30893/32768 = Sin[13*Pi/32] - Cos[13*Pi/32])*Sqrt[2] = 0.942793473651995
718 // 3363/8192 = Cos[13*Pi/32]*Sqrt[2] = 0.410524527522357
719 let (r6, r1) =
720 RotateSub::half_kernel::<14, 15, 13>(r6, r1h, (14449, 30893, 3363));
721 // 15679/16384 = Sin[11*Pi/32] + Cos[11*Pi/32])/Sqrt[2] = 0.956940335732209
722 // 1189/2048 = Sin[11*Pi/32] - Cos[11*Pi/32])*Sqrt[2] = 0.580569354508925
723 // 5461/8192 = Cos[11*Pi/32]*Sqrt[2] = 0.666655658477747
724 let (r2, r5) =
725 RotateAdd::half_kernel::<14, 11, 13>(r2, r5h, (15679, 1189, 5461));
726 // 16305/16384 = (Sin[9*Pi/32] + Cos[9*Pi/32])/Sqrt[2] = 0.9951847266721969
727 // 803/4096 = (Sin[9*Pi/32] - Cos[9*Pi/32])*Sqrt[2] = 0.1960342806591213
728 // 14699/16384 = Cos[9*Pi/32]*Sqrt[2] = 0.8971675863426364
729 let (r4, r3) =
730 RotateSub::half_kernel::<14, 12, 14>(r4, r3h, (16305, 803, 14699));
731
732 // Stage 1
733 let (r0, r3h) = butterfly_add(r0, r3);
734 let (r2, r1h) = butterfly_sub(r2, r1);
735 let (r5, r6h) = butterfly_add(r5, r6);
736 let (r7, r4h) = butterfly_sub(r7, r4);
737
738 // Stage 2
739 let (r7, r6) = butterfly_add_asym(r7, r6h);
740 let (r5, r3) = butterfly_add_asym(r5, r3h);
741 let (r2, r4) = butterfly_add_asym(r2, r4h);
742 let (r0, r1) = butterfly_sub_asym(r0, r1h);
743
744 // Stage 3
745 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
746 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
747 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
748 let (r3, r4) =
749 RotateSubAvg::kernel::<9, 14, 12>(r3, r4, (669, 8867, 3135));
750 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
751 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
752 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
753 let (r2, r5) =
754 RotateNegAvg::kernel::<9, 14, 12>(r2, r5, (669, 8867, 3135));
755 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
756 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
757 let (r1, r6) = RotatePi4SubAvg::kernel::<12, 13>(r1, r6, (5793, 11585));
758
759 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
760}
761
762#[$m]
763$($s)* fn daala_fdct_ii_16<T: TxOperations>(
764 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
765 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
766) {
767 // +/- Butterflies with asymmetric output.
768 let (s0h, sf) = butterfly_neg(s0, sf);
769 let (s1, seh) = butterfly_add(s1, se);
770 let (s2h, sd) = butterfly_neg(s2, sd);
771 let (s3, sch) = butterfly_add(s3, sc);
772 let (s4h, sb) = butterfly_neg(s4, sb);
773 let (s5, sah) = butterfly_add(s5, sa);
774 let (s6h, s9) = butterfly_neg(s6, s9);
775 let (s7, s8h) = butterfly_add(s7, s8);
776
777 // Embedded 8-point transforms with asymmetric input.
778 daala_fdct_ii_8_asym(s0h, s1, s2h, s3, s4h, s5, s6h, s7, &mut output[0..8]);
779 daala_fdst_iv_8_asym(sf, seh, sd, sch, sb, sah, s9, s8h, &mut output[8..16]);
780 output[8..16].reverse();
781}
782
783#[$m]
784$($s)* fn daala_fdct16<T: TxOperations>(coeffs: &mut [T]) {
785 assert!(coeffs.len() >= 16);
786 let mut temp_out: [T; 16] = [T::zero(); 16];
787 daala_fdct_ii_16(
788 coeffs[0],
789 coeffs[1],
790 coeffs[2],
791 coeffs[3],
792 coeffs[4],
793 coeffs[5],
794 coeffs[6],
795 coeffs[7],
796 coeffs[8],
797 coeffs[9],
798 coeffs[10],
799 coeffs[11],
800 coeffs[12],
801 coeffs[13],
802 coeffs[14],
803 coeffs[15],
804 &mut temp_out,
805 );
806
807 coeffs[0] = temp_out[0];
808 coeffs[1] = temp_out[8];
809 coeffs[2] = temp_out[4];
810 coeffs[3] = temp_out[12];
811 coeffs[4] = temp_out[2];
812 coeffs[5] = temp_out[10];
813 coeffs[6] = temp_out[6];
814 coeffs[7] = temp_out[14];
815 coeffs[8] = temp_out[1];
816 coeffs[9] = temp_out[9];
817 coeffs[10] = temp_out[5];
818 coeffs[11] = temp_out[13];
819 coeffs[12] = temp_out[3];
820 coeffs[13] = temp_out[11];
821 coeffs[14] = temp_out[7];
822 coeffs[15] = temp_out[15];
823}
824
825#[$m]
826$($s)* fn daala_fdst_iv_16<T: TxOperations>(
827 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
828 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
829) {
830 // Stage 0
831 // 24279/32768 = (Sin[31*Pi/64] + Cos[31*Pi/64])/Sqrt[2] = 0.74095112535496
832 // 11003/8192 = (Sin[31*Pi/64] - Cos[31*Pi/64])*Sqrt[2] = 1.34311790969404
833 // 1137/16384 = Cos[31*Pi/64]*Sqrt[2] = 0.06939217050794
834 let (s0, sf) =
835 RotateAddShift::kernel::<15, 13, 14>(s0, sf, (24279, 11003, 1137));
836 // 1645/2048 = (Sin[29*Pi/64] + Cos[29*Pi/64])/Sqrt[2] = 0.8032075314806449
837 // 305/256 = (Sin[29*Pi/64] - Cos[29*Pi/64])*Sqrt[2] = 1.1913986089848667
838 // 425/2048 = Cos[29*Pi/64]*Sqrt[2] = 0.2075082269882116
839 let (se, s1) =
840 RotateSubShift::kernel::<11, 8, 11>(se, s1, (1645, 305, 425));
841 // 14053/32768 = (Sin[27*Pi/64] + Cos[27*Pi/64])/Sqrt[2] = 0.85772861000027
842 // 8423/8192 = (Sin[27*Pi/64] - Cos[27*Pi/64])*Sqrt[2] = 1.02820548838644
843 // 2815/8192 = Cos[27*Pi/64]*Sqrt[2] = 0.34362586580705
844 let (s2, sd) =
845 RotateAddShift::kernel::<14, 13, 13>(s2, sd, (14053, 8423, 2815));
846 // 14811/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/Sqrt[2] = 0.90398929312344
847 // 7005/8192 = (Sin[25*Pi/64] - Cos[25*Pi/64])*Sqrt[2] = 0.85511018686056
848 // 3903/8192 = Cos[25*Pi/64]*Sqrt[2] = 0.47643419969316
849 let (sc, s3) =
850 RotateSubShift::kernel::<14, 13, 13>(sc, s3, (14811, 7005, 3903));
851 // 30853/32768 = (Sin[23*Pi/64] + Cos[23*Pi/64])/Sqrt[2] = 0.94154406518302
852 // 11039/16384 = (Sin[23*Pi/64] - Cos[23*Pi/64])*Sqrt[2] = 0.67377970678444
853 // 9907/16384 = Cos[23*Pi/64]*Sqrt[2] = 0.60465421179080
854 let (s4, sb) =
855 RotateAddShift::kernel::<15, 14, 14>(s4, sb, (30853, 11039, 9907));
856 // 15893/16384 = (Sin[21*Pi/64] + Cos[21*Pi/64])/Sqrt[2] = 0.97003125319454
857 // 3981/8192 = (Sin[21*Pi/64] - Cos[21*Pi/64])*Sqrt[2] = 0.89716758634264
858 // 1489/2048 = Cos[21*Pi/64]*Sqrt[2] = 0.72705107329128
859 let (sa, s5) =
860 RotateSubShift::kernel::<14, 13, 11>(sa, s5, (15893, 3981, 1489));
861 // 32413/32768 = (Sin[19*Pi/64] + Cos[19*Pi/64])/Sqrt[2] = 0.98917650996478
862 // 601/2048 = (Sin[19*Pi/64] - Cos[19*Pi/64])*Sqrt[2] = 0.29346094891072
863 // 13803/16384 = Cos[19*Pi/64]*Sqrt[2] = 0.84244603550942
864 let (s6, s9) =
865 RotateAddShift::kernel::<15, 11, 14>(s6, s9, (32413, 601, 13803));
866 // 32729/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/Sqrt[2] = 0.99879545620517
867 // 201/2048 = (Sin[17*Pi/64] - Cos[17*Pi/64])*Sqrt[2] = 0.09813534865484
868 // 1945/2048 = Cos[17*Pi/64]*Sqrt[2] = 0.94972778187775
869 let (s8, s7) =
870 RotateSubShift::kernel::<15, 11, 11>(s8, s7, (32729, 201, 1945));
871
872 // Stage 1
873 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
874 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
875 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
876 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
877 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
878 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
879 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
880 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
881
882 // Stage 2
883 let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
884 let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
885 let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
886 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
887 let (s0, s3h) = butterfly_add(s0, s3);
888 let (sd, seh) = butterfly_add(sd, se);
889 let (s2, s1h) = butterfly_sub(s2, s1);
890 let (sf, sch) = butterfly_sub(sf, sc);
891
892 // Stage 3
893 // 301/256 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
894 // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
895 // 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565
896 let (s8, s7) =
897 RotateAddAvg::kernel::<8, 11, 15>(s8, s7, (301, 1609, 12785));
898 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
899 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
900 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022
901 let (s9, s6) =
902 RotateAdd::kernel::<13, 15, 13>(s9h, s6h, (11363, 9041, 4551));
903 // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
904 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
905 // 4551/4096 = 2*Cos[5*Pi/16] = 1.1111404660392044
906 let (s5, sa) =
907 RotateNegAvg::kernel::<12, 15, 12>(s5, sa, (5681, 9041, 4551));
908 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
909 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
910 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283
911 let (s4, sb) =
912 RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
913
914 // Stage 4
915 let (s2, sc) = butterfly_add_asym(s2, sch);
916 let (s0, s1) = butterfly_sub_asym(s0, s1h);
917 let (sf, se) = butterfly_add_asym(sf, seh);
918 let (sd, s3) = butterfly_add_asym(sd, s3h);
919 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
920 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
921 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
922 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
923
924 // Stage 5
925 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
926 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
927 // 3135/4096 = 2*Cos[7*Pi/8] = 0.7653668647301796
928 let (sc, s3) =
929 RotateAddAvg::kernel::<9, 14, 12>(sc, s3, (669, 8867, 3135));
930 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475
931 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
932 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796
933 let (s2, sd) =
934 RotateNegAvg::kernel::<9, 14, 12>(s2, sd, (669, 8867, 3135));
935 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
936 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
937 let (sa, s5) = RotatePi4AddAvg::kernel::<12, 13>(sa, s5, (5793, 11585));
938 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
939 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
940 let (s6, s9) = RotatePi4AddAvg::kernel::<12, 13>(s6, s9, (5793, 11585));
941 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
942 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951
943 let (se, s1) = RotatePi4AddAvg::kernel::<12, 13>(se, s1, (5793, 11585));
944
945 store_coeffs!(
946 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
947 );
948}
949
950#[$m]
951$($s)* fn daala_fdst16<T: TxOperations>(coeffs: &mut [T]) {
952 assert!(coeffs.len() >= 16);
953 let mut temp_out: [T; 16] = [T::zero(); 16];
954 daala_fdst_iv_16(
955 coeffs[0],
956 coeffs[1],
957 coeffs[2],
958 coeffs[3],
959 coeffs[4],
960 coeffs[5],
961 coeffs[6],
962 coeffs[7],
963 coeffs[8],
964 coeffs[9],
965 coeffs[10],
966 coeffs[11],
967 coeffs[12],
968 coeffs[13],
969 coeffs[14],
970 coeffs[15],
971 &mut temp_out,
972 );
973
974 coeffs[0] = temp_out[0];
975 coeffs[1] = temp_out[8];
976 coeffs[2] = temp_out[4];
977 coeffs[3] = temp_out[12];
978 coeffs[4] = temp_out[2];
979 coeffs[5] = temp_out[10];
980 coeffs[6] = temp_out[6];
981 coeffs[7] = temp_out[14];
982 coeffs[8] = temp_out[1];
983 coeffs[9] = temp_out[9];
984 coeffs[10] = temp_out[5];
985 coeffs[11] = temp_out[13];
986 coeffs[12] = temp_out[3];
987 coeffs[13] = temp_out[11];
988 coeffs[14] = temp_out[7];
989 coeffs[15] = temp_out[15];
990}
991
992#[$m]
993$($s)* fn daala_fdct_ii_16_asym<T: TxOperations>(
994 s0h: T, s1: (T, T), s2h: T, s3: (T, T), s4h: T, s5: (T, T), s6h: T,
995 s7: (T, T), s8h: T, s9: (T, T), sah: T, sb: (T, T), sch: T, sd: (T, T),
996 seh: T, sf: (T, T), output: &mut [T],
997) {
998 // +/- Butterflies with asymmetric input.
999 let (s0, sf) = butterfly_neg_asym(s0h, sf);
1000 let (s1, se) = butterfly_sub_asym(s1, seh);
1001 let (s2, sd) = butterfly_neg_asym(s2h, sd);
1002 let (s3, sc) = butterfly_sub_asym(s3, sch);
1003 let (s4, sb) = butterfly_neg_asym(s4h, sb);
1004 let (s5, sa) = butterfly_sub_asym(s5, sah);
1005 let (s6, s9) = butterfly_neg_asym(s6h, s9);
1006 let (s7, s8) = butterfly_sub_asym(s7, s8h);
1007
1008 // Embedded 8-point orthonormal transforms.
1009 daala_fdct_ii_8(s0, s1, s2, s3, s4, s5, s6, s7, &mut output[0..8]);
1010 daala_fdst_iv_8(sf, se, sd, sc, sb, sa, s9, s8, &mut output[8..16]);
1011 output[8..16].reverse();
1012}
1013
1014#[$m]
1015$($s)* fn daala_fdst_iv_16_asym<T: TxOperations>(
1016 s0: (T, T), s1h: T, s2: (T, T), s3h: T, s4: (T, T), s5h: T, s6: (T, T),
1017 s7h: T, s8: (T, T), s9h: T, sa: (T, T), sbh: T, sc: (T, T), sdh: T,
1018 se: (T, T), sfh: T, output: &mut [T],
1019) {
1020 // Stage 0
1021 // 1073/2048 = (Sin[31*Pi/64] + Cos[31*Pi/64])/2 = 0.5239315652662953
1022 // 62241/32768 = (Sin[31*Pi/64] - Cos[31*Pi/64])*2 = 1.8994555637555088
1023 // 201/16384 = Cos[31*Pi/64]*2 = 0.0981353486548360
1024 let (s0, sf) =
1025 RotateAddShift::half_kernel::<11, 15, 11>(s0, sfh, (1073, 62241, 201));
1026 // 18611/32768 = (Sin[29*Pi/64] + Cos[29*Pi/64])/2 = 0.5679534922100714
1027 // 55211/32768 = (Sin[29*Pi/64] - Cos[29*Pi/64])*2 = 1.6848920710188384
1028 // 601/2048 = Cos[29*Pi/64]*2 = 0.2934609489107235
1029 let (se, s1) = RotateSubShift::half_kernel::<15, 15, 11>(
1030 se,
1031 s1h,
1032 (18611, 55211, 601),
1033 );
1034 // 9937/16384 = (Sin[27*Pi/64] + Cos[27*Pi/64])/2 = 0.6065057165489039
1035 // 1489/1024 = (Sin[27*Pi/64] - Cos[27*Pi/64])*2 = 1.4541021465825602
1036 // 3981/8192 = Cos[27*Pi/64]*2 = 0.4859603598065277
1037 let (s2, sd) =
1038 RotateAddShift::half_kernel::<14, 10, 13>(s2, sdh, (9937, 1489, 3981));
1039 // 10473/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/2 = 0.6392169592876205
1040 // 39627/32768 = (Sin[25*Pi/64] - Cos[25*Pi/64])*2 = 1.2093084235816014
1041 // 11039/16384 = Cos[25*Pi/64]*2 = 0.6737797067844401
1042 let (sc, s3) = RotateSubShift::half_kernel::<14, 15, 14>(
1043 sc,
1044 s3h,
1045 (10473, 39627, 11039),
1046 );
1047 // 2727/4096 = (Sin[23*Pi/64] + Cos[23*Pi/64])/2 = 0.6657721932768628
1048 // 3903/4096 = (Sin[23*Pi/64] - Cos[23*Pi/64])*2 = 0.9528683993863225
1049 // 7005/8192 = Cos[23*Pi/64]*2 = 0.8551101868605642
1050 let (s4, sb) =
1051 RotateAddShift::half_kernel::<12, 12, 13>(s4, sbh, (2727, 3903, 7005));
1052 // 5619/8192 = (Sin[21*Pi/64] + Cos[21*Pi/64])/2 = 0.6859156770967569
1053 // 2815/4096 = (Sin[21*Pi/64] - Cos[21*Pi/64])*2 = 0.6872517316141069
1054 // 8423/8192 = Cos[21*Pi/64]*2 = 1.0282054883864433
1055 let (sa, s5) =
1056 RotateSubShift::half_kernel::<13, 12, 13>(sa, s5h, (5619, 2815, 8423));
1057 // 2865/4096 = (Sin[19*Pi/64] + Cos[19*Pi/64])/2 = 0.6994534179865391
1058 // 13588/32768 = (Sin[19*Pi/64] - Cos[19*Pi/64])*2 = 0.4150164539764232
1059 // 305/256 = Cos[19*Pi/64]*2 = 1.1913986089848667
1060 let (s6, s9) =
1061 RotateAddShift::half_kernel::<12, 15, 8>(s6, s9h, (2865, 13599, 305));
1062 // 23143/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/2 = 0.7062550401009887
1063 // 1137/8192 = (Sin[17*Pi/64] - Cos[17*Pi/64])*2 = 0.1387843410158816
1064 // 11003/8192 = Cos[17*Pi/64]*2 = 1.3431179096940367
1065 let (s8, s7) = RotateSubShift::half_kernel::<15, 13, 13>(
1066 s8,
1067 s7h,
1068 (23143, 1137, 11003),
1069 );
1070
1071 // Stage 1
1072 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
1073 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
1074 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
1075 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
1076 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
1077 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
1078 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
1079 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
1080
1081 // Stage 2
1082 let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
1083 let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
1084 let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
1085 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
1086 let (s0, s3h) = butterfly_add(s0, s3);
1087 let (sd, seh) = butterfly_add(sd, se);
1088 let (s2, s1h) = butterfly_sub(s2, s1);
1089 let (sf, sch) = butterfly_sub(sf, sc);
1090
1091 // Stage 3
1092 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
1093 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
1094 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283
1095 let (s8, s7) =
1096 RotateAdd::kernel::<13, 14, 15>(s8, s7, (9633, 12873, 6393));
1097 // 22725/16384 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
1098 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
1099 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022
1100 let (s9, s6) =
1101 RotateAdd::kernel::<14, 15, 13>(s9h, s6h, (22725, 9041, 4551));
1102 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
1103 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
1104 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022
1105 let (s5, sa) =
1106 RotateNeg::kernel::<13, 15, 13>(s5, sa, (11363, 9041, 4551));
1107 // 9633/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
1108 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
1109 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283
1110 let (s4, sb) =
1111 RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
1112
1113 // Stage 4
1114 let (s2, sc) = butterfly_add_asym(s2, sch);
1115 let (s0, s1) = butterfly_sub_asym(s0, s1h);
1116 let (sf, se) = butterfly_add_asym(sf, seh);
1117 let (sd, s3) = butterfly_add_asym(sd, s3h);
1118 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
1119 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
1120 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
1121 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
1122
1123 // Stage 5
1124 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1125 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1126 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1127 let (sc, s3) =
1128 RotateAdd::kernel::<13, 14, 13>(sc, s3, (10703, 8867, 3135));
1129 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475
1130 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1131 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1132 let (s2, sd) =
1133 RotateNeg::kernel::<13, 14, 13>(s2, sd, (10703, 8867, 3135));
1134 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1135 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1136 let (sa, s5) = RotatePi4Add::kernel::<13, 13>(sa, s5, (11585, 5793));
1137 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1138 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1139 let (s6, s9) = RotatePi4Add::kernel::<13, 13>(s6, s9, (11585, 5793));
1140 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1141 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1142 let (se, s1) = RotatePi4Add::kernel::<13, 13>(se, s1, (11585, 5793));
1143
1144 store_coeffs!(
1145 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
1146 );
1147}
1148
1149#[$m]
1150$($s)* fn daala_fdct_ii_32<T: TxOperations>(
1151 t0: T, t1: T, t2: T, t3: T, t4: T, t5: T, t6: T, t7: T, t8: T, t9: T, ta: T,
1152 tb: T, tc: T, td: T, te: T, tf: T, tg: T, th: T, ti: T, tj: T, tk: T, tl: T,
1153 tm: T, tn: T, to: T, tp: T, tq: T, tr: T, ts: T, tt: T, tu: T, tv: T,
1154 output: &mut [T],
1155) {
1156 // +/- Butterflies with asymmetric output.
1157 let (t0h, tv) = butterfly_neg(t0, tv);
1158 let (t1, tuh) = butterfly_add(t1, tu);
1159 let (t2h, tt) = butterfly_neg(t2, tt);
1160 let (t3, tsh) = butterfly_add(t3, ts);
1161 let (t4h, tr) = butterfly_neg(t4, tr);
1162 let (t5, tqh) = butterfly_add(t5, tq);
1163 let (t6h, tp) = butterfly_neg(t6, tp);
1164 let (t7, toh) = butterfly_add(t7, to);
1165 let (t8h, tn) = butterfly_neg(t8, tn);
1166 let (t9, tmh) = butterfly_add(t9, tm);
1167 let (tah, tl) = butterfly_neg(ta, tl);
1168 let (tb, tkh) = butterfly_add(tb, tk);
1169 let (tch, tj) = butterfly_neg(tc, tj);
1170 let (td, tih) = butterfly_add(td, ti);
1171 let (teh, th) = butterfly_neg(te, th);
1172 let (tf, tgh) = butterfly_add(tf, tg);
1173
1174 // Embedded 16-point transforms with asymmetric input.
1175 daala_fdct_ii_16_asym(
1176 t0h,
1177 t1,
1178 t2h,
1179 t3,
1180 t4h,
1181 t5,
1182 t6h,
1183 t7,
1184 t8h,
1185 t9,
1186 tah,
1187 tb,
1188 tch,
1189 td,
1190 teh,
1191 tf,
1192 &mut output[0..16],
1193 );
1194 daala_fdst_iv_16_asym(
1195 tv,
1196 tuh,
1197 tt,
1198 tsh,
1199 tr,
1200 tqh,
1201 tp,
1202 toh,
1203 tn,
1204 tmh,
1205 tl,
1206 tkh,
1207 tj,
1208 tih,
1209 th,
1210 tgh,
1211 &mut output[16..32],
1212 );
1213 output[16..32].reverse();
1214}
1215
1216#[$m]
1217$($s)* fn daala_fdct32<T: TxOperations>(coeffs: &mut [T]) {
1218 assert!(coeffs.len() >= 32);
1219 let mut temp_out: [T; 32] = [T::zero(); 32];
1220 daala_fdct_ii_32(
1221 coeffs[0],
1222 coeffs[1],
1223 coeffs[2],
1224 coeffs[3],
1225 coeffs[4],
1226 coeffs[5],
1227 coeffs[6],
1228 coeffs[7],
1229 coeffs[8],
1230 coeffs[9],
1231 coeffs[10],
1232 coeffs[11],
1233 coeffs[12],
1234 coeffs[13],
1235 coeffs[14],
1236 coeffs[15],
1237 coeffs[16],
1238 coeffs[17],
1239 coeffs[18],
1240 coeffs[19],
1241 coeffs[20],
1242 coeffs[21],
1243 coeffs[22],
1244 coeffs[23],
1245 coeffs[24],
1246 coeffs[25],
1247 coeffs[26],
1248 coeffs[27],
1249 coeffs[28],
1250 coeffs[29],
1251 coeffs[30],
1252 coeffs[31],
1253 &mut temp_out,
1254 );
1255
1256 coeffs[0] = temp_out[0];
1257 coeffs[1] = temp_out[16];
1258 coeffs[2] = temp_out[8];
1259 coeffs[3] = temp_out[24];
1260 coeffs[4] = temp_out[4];
1261 coeffs[5] = temp_out[20];
1262 coeffs[6] = temp_out[12];
1263 coeffs[7] = temp_out[28];
1264 coeffs[8] = temp_out[2];
1265 coeffs[9] = temp_out[18];
1266 coeffs[10] = temp_out[10];
1267 coeffs[11] = temp_out[26];
1268 coeffs[12] = temp_out[6];
1269 coeffs[13] = temp_out[22];
1270 coeffs[14] = temp_out[14];
1271 coeffs[15] = temp_out[30];
1272 coeffs[16] = temp_out[1];
1273 coeffs[17] = temp_out[17];
1274 coeffs[18] = temp_out[9];
1275 coeffs[19] = temp_out[25];
1276 coeffs[20] = temp_out[5];
1277 coeffs[21] = temp_out[21];
1278 coeffs[22] = temp_out[13];
1279 coeffs[23] = temp_out[29];
1280 coeffs[24] = temp_out[3];
1281 coeffs[25] = temp_out[19];
1282 coeffs[26] = temp_out[11];
1283 coeffs[27] = temp_out[27];
1284 coeffs[28] = temp_out[7];
1285 coeffs[29] = temp_out[23];
1286 coeffs[30] = temp_out[15];
1287 coeffs[31] = temp_out[31];
1288}
1289
1290#[$m]
1291$($s)* fn daala_fdct_ii_32_asym<T: TxOperations>(
1292 t0h: T, t1: (T, T), t2h: T, t3: (T, T), t4h: T, t5: (T, T), t6h: T,
1293 t7: (T, T), t8h: T, t9: (T, T), tah: T, tb: (T, T), tch: T, td: (T, T),
1294 teh: T, tf: (T, T), tgh: T, th: (T, T), tih: T, tj: (T, T), tkh: T,
1295 tl: (T, T), tmh: T, tn: (T, T), toh: T, tp: (T, T), tqh: T, tr: (T, T),
1296 tsh: T, tt: (T, T), tuh: T, tv: (T, T), output: &mut [T],
1297) {
1298 // +/- Butterflies with asymmetric input.
1299 let (t0, tv) = butterfly_neg_asym(t0h, tv);
1300 let (t1, tu) = butterfly_sub_asym(t1, tuh);
1301 let (t2, tt) = butterfly_neg_asym(t2h, tt);
1302 let (t3, ts) = butterfly_sub_asym(t3, tsh);
1303 let (t4, tr) = butterfly_neg_asym(t4h, tr);
1304 let (t5, tq) = butterfly_sub_asym(t5, tqh);
1305 let (t6, tp) = butterfly_neg_asym(t6h, tp);
1306 let (t7, to) = butterfly_sub_asym(t7, toh);
1307 let (t8, tn) = butterfly_neg_asym(t8h, tn);
1308 let (t9, tm) = butterfly_sub_asym(t9, tmh);
1309 let (ta, tl) = butterfly_neg_asym(tah, tl);
1310 let (tb, tk) = butterfly_sub_asym(tb, tkh);
1311 let (tc, tj) = butterfly_neg_asym(tch, tj);
1312 let (td, ti) = butterfly_sub_asym(td, tih);
1313 let (te, th) = butterfly_neg_asym(teh, th);
1314 let (tf, tg) = butterfly_sub_asym(tf, tgh);
1315
1316 // Embedded 16-point orthonormal transforms.
1317 daala_fdct_ii_16(
1318 t0,
1319 t1,
1320 t2,
1321 t3,
1322 t4,
1323 t5,
1324 t6,
1325 t7,
1326 t8,
1327 t9,
1328 ta,
1329 tb,
1330 tc,
1331 td,
1332 te,
1333 tf,
1334 &mut output[0..16],
1335 );
1336 daala_fdst_iv_16(
1337 tv,
1338 tu,
1339 tt,
1340 ts,
1341 tr,
1342 tq,
1343 tp,
1344 to,
1345 tn,
1346 tm,
1347 tl,
1348 tk,
1349 tj,
1350 ti,
1351 th,
1352 tg,
1353 &mut output[16..32],
1354 );
1355 output[16..32].reverse();
1356}
1357
1358#[$m]
1359$($s)* fn daala_fdst_iv_32_asym<T: TxOperations>(
1360 t0: (T, T), t1h: T, t2: (T, T), t3h: T, t4: (T, T), t5h: T, t6: (T, T),
1361 t7h: T, t8: (T, T), t9h: T, ta: (T, T), tbh: T, tc: (T, T), tdh: T,
1362 te: (T, T), tfh: T, tg: (T, T), thh: T, ti: (T, T), tjh: T, tk: (T, T),
1363 tlh: T, tm: (T, T), tnh: T, to: (T, T), tph: T, tq: (T, T), trh: T,
1364 ts: (T, T), tth: T, tu: (T, T), tvh: T, output: &mut [T],
1365) {
1366 // Stage 0
1367 // 5933/8192 = (Sin[63*Pi/128] + Cos[63*Pi/128])/Sqrt[2] = 0.72424708295147
1368 // 22595/16384 = (Sin[63*Pi/128] - Cos[63*Pi/128])*Sqrt[2] = 1.37908108947413
1369 // 1137/32768 = Cos[63*Pi/128]*Sqrt[2] = 0.03470653821440
1370 let (t0, tv) =
1371 RotateAdd::half_kernel::<13, 14, 15>(t0, tvh, (5933, 22595, 1137));
1372 // 6203/8192 = (Sin[61*Pi/128] + Cos[61*Pi/128])/Sqrt[2] = 0.75720884650648
1373 // 21403/16384 = (Sin[61*Pi/128] - Cos[61*Pi/128])*Sqrt[2] = 1.30634568590755
1374 // 3409/32768 = Cos[61*Pi/128]*Sqrt[2] = 0.10403600355271
1375 let (tu, t1) =
1376 RotateSub::half_kernel::<13, 14, 15>(tu, t1h, (6203, 21403, 3409));
1377 // 25833/32768 = (Sin[59*Pi/128] + Cos[59*Pi/128])/Sqrt[2] = 0.78834642762661
1378 // 315/256 = (Sin[59*Pi/128] - Cos[59*Pi/128])*Sqrt[2] = 1.23046318116125
1379 // 5673/32768 = Cos[59*Pi/128]*Sqrt[2] = 0.17311483704598
1380 let (t2, tt) =
1381 RotateAdd::half_kernel::<15, 8, 15>(t2, tth, (25833, 315, 5673));
1382 // 26791/32768 = (Sin[57*Pi/128] + Cos[57*Pi/128])/Sqrt[2] = 0.81758481315158
1383 // 4717/4096 = (Sin[57*Pi/128] - Cos[57*Pi/128])*Sqrt[2] = 1.15161638283569
1384 // 7923/32768 = Cos[57*Pi/128]*Sqrt[2] = 0.24177662173374
1385 let (ts, t3) =
1386 RotateSub::half_kernel::<15, 12, 15>(ts, t3h, (26791, 4717, 7923));
1387 // 6921/8192 = (Sin[55*Pi/128] + Cos[55*Pi/128])/Sqrt[2] = 0.84485356524971
1388 // 17531/16384 = (Sin[55*Pi/128] - Cos[55*Pi/128])*Sqrt[2] = 1.06999523977419
1389 // 10153/32768 = Cos[55*Pi/128]*Sqrt[2] = 0.30985594536261
1390 let (t4, tr) =
1391 RotateAdd::half_kernel::<13, 14, 15>(t4, trh, (6921, 17531, 10153));
1392 // 28511/32768 = (Sin[53*Pi/128] + Cos[53*Pi/128])/Sqrt[2] = 0.87008699110871
1393 // 32303/32768 = (Sin[53*Pi/128] - Cos[53*Pi/128])*Sqrt[2] = 0.98579638445957
1394 // 1545/4096 = Cos[53*Pi/128]*Sqrt[2] = 0.37718879887893
1395 let (tq, t5) =
1396 RotateSub::half_kernel::<15, 15, 12>(tq, t5h, (28511, 32303, 1545));
1397 // 29269/32768 = (Sin[51*Pi/128] + Cos[51*Pi/128])/Sqrt[2] = 0.89322430119552
1398 // 14733/16384 = (Sin[51*Pi/128] - Cos[51*Pi/128])*Sqrt[2] = 0.89922265930921
1399 // 1817/4096 = Cos[51*Pi/128]*Sqrt[2] = 0.44361297154091
1400 let (t6, tp) =
1401 RotateAdd::half_kernel::<15, 14, 12>(t6, tph, (29269, 14733, 1817));
1402 // 29957/32768 = (Sin[49*Pi/128] + Cos[49*Pi/128])/Sqrt[2] = 0.91420975570353
1403 // 13279/16384 = (Sin[49*Pi/128] - Cos[49*Pi/128])*Sqrt[2] = 0.81048262800998
1404 // 8339/16384 = Cos[49*Pi/128]*Sqrt[2] = 0.50896844169854
1405 let (to, t7) =
1406 RotateSub::half_kernel::<15, 14, 14>(to, t7h, (29957, 13279, 8339));
1407 // 7643/8192 = (Sin[47*Pi/128] + Cos[47*Pi/128])/Sqrt[2] = 0.93299279883474
1408 // 11793/16384 = (Sin[47*Pi/128] - Cos[47*Pi/128])*Sqrt[2] = 0.71979007306998
1409 // 18779/32768 = Cos[47*Pi/128]*Sqrt[2] = 0.57309776229975
1410 let (t8, tn) =
1411 RotateAdd::half_kernel::<13, 14, 15>(t8, tnh, (7643, 11793, 18779));
1412 // 15557/16384 = (Sin[45*Pi/128] + Cos[45*Pi/128])/Sqrt[2] = 0.94952818059304
1413 // 20557/32768 = (Sin[45*Pi/128] - Cos[45*Pi/128])*Sqrt[2] = 0.62736348079778
1414 // 20835/32768 = Cos[45*Pi/128]*Sqrt[2] = 0.63584644019415
1415 let (tm, t9) =
1416 RotateSub::half_kernel::<14, 15, 15>(tm, t9h, (15557, 20557, 20835));
1417 // 31581/32768 = (Sin[43*Pi/128] + Cos[43*Pi/128])/Sqrt[2] = 0.96377606579544
1418 // 17479/32768 = (Sin[43*Pi/128] - Cos[43*Pi/128])*Sqrt[2] = 0.53342551494980
1419 // 22841/32768 = Cos[43*Pi/128]*Sqrt[2] = 0.69706330832054
1420 let (ta, tl) =
1421 RotateAdd::half_kernel::<15, 15, 15>(ta, tlh, (31581, 17479, 22841));
1422 // 7993/8192 = (Sin[41*Pi/128] + Cos[41*Pi/128])/Sqrt[2] = 0.97570213003853
1423 // 14359/32768 = (Sin[41*Pi/128] - Cos[41*Pi/128])*Sqrt[2] = 0.43820248031374
1424 // 3099/4096 = Cos[41*Pi/128]*Sqrt[2] = 0.75660088988166
1425 let (tk, tb) =
1426 RotateSub::half_kernel::<13, 15, 12>(tk, tbh, (7993, 14359, 3099));
1427 // 16143/16384 = (Sin[39*Pi/128] + Cos[39*Pi/128])/Sqrt[2] = 0.98527764238894
1428 // 2801/8192 = (Sin[39*Pi/128] - Cos[39*Pi/128])*Sqrt[2] = 0.34192377752060
1429 // 26683/32768 = Cos[39*Pi/128]*Sqrt[2] = 0.81431575362864
1430 let (tc, tj) =
1431 RotateAdd::half_kernel::<14, 13, 15>(tc, tjh, (16143, 2801, 26683));
1432 // 16261/16384 = (Sin[37*Pi/128] + Cos[37*Pi/128])/Sqrt[2] = 0.99247953459871
1433 // 4011/16384 = (Sin[37*Pi/128] - Cos[37*Pi/128])*Sqrt[2] = 0.24482135039843
1434 // 14255/16384 = Cos[37*Pi/128]*Sqrt[2] = 0.87006885939949
1435 let (ti, td) =
1436 RotateSub::half_kernel::<14, 14, 14>(ti, tdh, (16261, 4011, 14255));
1437 // 32679/32768 = (Sin[35*Pi/128] + Cos[35*Pi/128])/Sqrt[2] = 0.99729045667869
1438 // 4821/32768 = (Sin[35*Pi/128] - Cos[35*Pi/128])*Sqrt[2] = 0.14712912719933
1439 // 30269/32768 = Cos[35*Pi/128]*Sqrt[2] = 0.92372589307902
1440 let (te, th) =
1441 RotateAdd::half_kernel::<15, 15, 15>(te, thh, (32679, 4821, 30269));
1442 // 16379/16384 = (Sin[33*Pi/128] + Cos[33*Pi/128])/Sqrt[2] = 0.99969881869620
1443 // 201/4096 = (Sin[33*Pi/128] - Cos[33*Pi/128])*Sqrt[2] = 0.04908245704582
1444 // 15977/16384 = Cos[33*Pi/128]*Sqrt[2] = 0.97515759017329
1445 let (tg, tf) =
1446 RotateSub::half_kernel::<14, 12, 14>(tg, tfh, (16379, 201, 15977));
1447
1448 // Stage 1
1449 let (t0, tfh) = butterfly_add(t0, tf);
1450 let (tv, tgh) = butterfly_sub(tv, tg);
1451 let (th, tuh) = butterfly_add(th, tu);
1452 let (te, t1h) = butterfly_sub(te, t1);
1453 let (t2, tdh) = butterfly_add(t2, td);
1454 let (tt, tih) = butterfly_sub(tt, ti);
1455 let (tj, tsh) = butterfly_add(tj, ts);
1456 let (tc, t3h) = butterfly_sub(tc, t3);
1457 let (t4, tbh) = butterfly_add(t4, tb);
1458 let (tr, tkh) = butterfly_sub(tr, tk);
1459 let (tl, tqh) = butterfly_add(tl, tq);
1460 let (ta, t5h) = butterfly_sub(ta, t5);
1461 let (t6, t9h) = butterfly_add(t6, t9);
1462 let (tp, tmh) = butterfly_sub(tp, tm);
1463 let (tn, toh) = butterfly_add(tn, to);
1464 let (t8, t7h) = butterfly_sub(t8, t7);
1465
1466 // Stage 2
1467 let (t0, t7) = butterfly_sub_asym(t0, t7h);
1468 let (tv, to) = butterfly_add_asym(tv, toh);
1469 let (tp, tu) = butterfly_sub_asym(tp, tuh);
1470 let (t6, t1) = butterfly_add_asym(t6, t1h);
1471 let (t2, t5) = butterfly_sub_asym(t2, t5h);
1472 let (tt, tq) = butterfly_add_asym(tt, tqh);
1473 let (tr, ts) = butterfly_sub_asym(tr, tsh);
1474 let (t4, t3) = butterfly_add_asym(t4, t3h);
1475 let (t8, tg) = butterfly_add_asym(t8, tgh);
1476 let (te, tm) = butterfly_sub_asym(te, tmh);
1477 let (tn, tf) = butterfly_add_asym(tn, tfh);
1478 let (th, t9) = butterfly_sub_asym(th, t9h);
1479 let (ta, ti) = butterfly_add_asym(ta, tih);
1480 let (tc, tk) = butterfly_sub_asym(tc, tkh);
1481 let (tl, td) = butterfly_add_asym(tl, tdh);
1482 let (tj, tb) = butterfly_sub_asym(tj, tbh);
1483
1484 // Stage 3
1485 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576
1486 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363
1487 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606
1488 let (tf, tg) =
1489 RotateSub::kernel::<14, 14, 13>(tf, tg, (17911, 14699, 803));
1490 // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712
1491 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465
1492 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623
1493 let (th, te) =
1494 RotateAdd::kernel::<13, 13, 12>(th, te, (10217, 5461, 1189));
1495 // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526
1496 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574
1497 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976
1498 let (ti, td) =
1499 RotateAdd::kernel::<12, 13, 14>(ti, td, (5543, 3363, 7723));
1500 // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826
1501 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915
1502 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455
1503 let (tc, tj) =
1504 RotateSub::kernel::<13, 14, 13>(tc, tj, (11529, 2271, 5197));
1505 // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826
1506 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915
1507 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455
1508 let (tb, tk) =
1509 RotateNeg::kernel::<13, 14, 13>(tb, tk, (11529, 2271, 5197));
1510 // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526
1511 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574
1512 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976
1513 let (ta, tl) =
1514 RotateNeg::kernel::<12, 13, 14>(ta, tl, (5543, 3363, 7723));
1515 // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712
1516 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465
1517 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623
1518 let (t9, tm) =
1519 RotateNeg::kernel::<13, 13, 12>(t9, tm, (10217, 5461, 1189));
1520 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576
1521 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363
1522 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606
1523 let (t8, tn) =
1524 RotateNeg::kernel::<14, 14, 13>(t8, tn, (17911, 14699, 803));
1525
1526 // Stage 4
1527 let (t3, t0h) = butterfly_sub(t3, t0);
1528 let (ts, tvh) = butterfly_add(ts, tv);
1529 let (tu, tth) = butterfly_sub(tu, tt);
1530 let (t1, t2h) = butterfly_add(t1, t2);
1531 let ((_toh, to), t4h) = butterfly_add(to, t4);
1532 let ((_tqh, tq), t6h) = butterfly_sub(tq, t6);
1533 let ((_t7h, t7), trh) = butterfly_add(t7, tr);
1534 let ((_t5h, t5), tph) = butterfly_sub(t5, tp);
1535 let (tb, t8h) = butterfly_sub(tb, t8);
1536 let (tk, tnh) = butterfly_add(tk, tn);
1537 let (tm, tlh) = butterfly_sub(tm, tl);
1538 let (t9, tah) = butterfly_add(t9, ta);
1539 let (tf, tch) = butterfly_sub(tf, tc);
1540 let (tg, tjh) = butterfly_add(tg, tj);
1541 let (ti, thh) = butterfly_sub(ti, th);
1542 let (td, teh) = butterfly_add(td, te);
1543
1544 // Stage 5
1545 // 301/256 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
1546 // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
1547 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283
1548 let (to, t7) = RotateAdd::kernel::<8, 11, 15>(to, t7, (301, 1609, 6393));
1549 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
1550 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
1551 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022
1552 let (tph, t6h) =
1553 RotateAdd::kernel::<13, 15, 13>(tph, t6h, (11363, 9041, 4551));
1554 // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475
1555 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431
1556 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022
1557 let (t5, tq) =
1558 RotateNeg::kernel::<12, 15, 13>(t5, tq, (5681, 9041, 4551));
1559 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586
1560 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022
1561 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283
1562 let (t4h, trh) =
1563 RotateNeg::kernel::<13, 14, 15>(t4h, trh, (9633, 12873, 6393));
1564
1565 // Stage 6
1566 let (t1, t0) = butterfly_add_asym(t1, t0h);
1567 let (tu, tv) = butterfly_sub_asym(tu, tvh);
1568 let (ts, t2) = butterfly_sub_asym(ts, t2h);
1569 let (t3, tt) = butterfly_sub_asym(t3, tth);
1570 let (t5, t4) = butterfly_add_asym((t5.rshift1(), t5), t4h);
1571 let (tq, tr) = butterfly_sub_asym((tq.rshift1(), tq), trh);
1572 let (t7, t6) = butterfly_add_asym((t7.rshift1(), t7), t6h);
1573 let (to, tp) = butterfly_sub_asym((to.rshift1(), to), tph);
1574 let (t9, t8) = butterfly_add_asym(t9, t8h);
1575 let (tm, tn) = butterfly_sub_asym(tm, tnh);
1576 let (tk, ta) = butterfly_sub_asym(tk, tah);
1577 let (tb, tl) = butterfly_sub_asym(tb, tlh);
1578 let (ti, tc) = butterfly_add_asym(ti, tch);
1579 let (td, tj) = butterfly_add_asym(td, tjh);
1580 let (tf, te) = butterfly_add_asym(tf, teh);
1581 let (tg, th) = butterfly_sub_asym(tg, thh);
1582
1583 // Stage 7
1584 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1585 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1586 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1587 let (t2, tt) = RotateNeg::kernel::<9, 14, 13>(t2, tt, (669, 8867, 3135));
1588 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1589 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1590 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1591 let (ts, t3) = RotateAdd::kernel::<9, 14, 13>(ts, t3, (669, 8867, 3135));
1592 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1593 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1594 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1595 let (ta, tl) = RotateNeg::kernel::<9, 14, 13>(ta, tl, (669, 8867, 3135));
1596 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1597 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1598 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1599 let (tk, tb) = RotateAdd::kernel::<9, 14, 13>(tk, tb, (669, 8867, 3135));
1600 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1601 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1602 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1603 let (tc, tj) = RotateAdd::kernel::<9, 14, 13>(tc, tj, (669, 8867, 3135));
1604 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766
1605 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969
1606 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898
1607 let (ti, td) = RotateNeg::kernel::<9, 14, 13>(ti, td, (669, 8867, 3135));
1608 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1609 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1610 let (tu, t1) = RotatePi4Add::kernel::<12, 13>(tu, t1, (5793, 5793));
1611 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1612 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1613 let (tq, t5) = RotatePi4Add::kernel::<12, 13>(tq, t5, (5793, 5793));
1614 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1615 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1616 let (tp, t6) = RotatePi4Sub::kernel::<12, 13>(tp, t6, (5793, 5793));
1617 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1618 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1619 let (tm, t9) = RotatePi4Add::kernel::<12, 13>(tm, t9, (5793, 5793));
1620 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951
1621 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475
1622 let (te, th) = RotatePi4Add::kernel::<12, 13>(te, th, (5793, 5793));
1623
1624 store_coeffs!(
1625 output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,
1626 tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv
1627 );
1628}
1629
1630#[allow(clippy::identity_op)]
1631#[$m]
1632$($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) {
1633 assert!(coeffs.len() >= 64);
1634 // Use arrays to avoid ridiculous variable names
1635 let mut asym: [(T, T); 32] = [(T::zero(), T::zero()); 32];
1636 let mut half: [T; 32] = [T::zero(); 32];
1637 // +/- Butterflies with asymmetric output.
1638 {
1639 #[$m]
1640 #[inline]
1641 $($s)* fn butterfly_pair<T: TxOperations>(
1642 half: &mut [T; 32], asym: &mut [(T, T); 32], input: &[T], i: usize
1643 ) {
1644 let j = i * 2;
1645 let (ah, c) = butterfly_neg(input[j], input[63 - j]);
1646 let (b, dh) = butterfly_add(input[j + 1], input[63 - j - 1]);
1647 half[i] = ah;
1648 half[31 - i] = dh;
1649 asym[i] = b;
1650 asym[31 - i] = c;
1651 }
1652 butterfly_pair(&mut half, &mut asym, coeffs, 0);
1653 butterfly_pair(&mut half, &mut asym, coeffs, 1);
1654 butterfly_pair(&mut half, &mut asym, coeffs, 2);
1655 butterfly_pair(&mut half, &mut asym, coeffs, 3);
1656 butterfly_pair(&mut half, &mut asym, coeffs, 4);
1657 butterfly_pair(&mut half, &mut asym, coeffs, 5);
1658 butterfly_pair(&mut half, &mut asym, coeffs, 6);
1659 butterfly_pair(&mut half, &mut asym, coeffs, 7);
1660 butterfly_pair(&mut half, &mut asym, coeffs, 8);
1661 butterfly_pair(&mut half, &mut asym, coeffs, 9);
1662 butterfly_pair(&mut half, &mut asym, coeffs, 10);
1663 butterfly_pair(&mut half, &mut asym, coeffs, 11);
1664 butterfly_pair(&mut half, &mut asym, coeffs, 12);
1665 butterfly_pair(&mut half, &mut asym, coeffs, 13);
1666 butterfly_pair(&mut half, &mut asym, coeffs, 14);
1667 butterfly_pair(&mut half, &mut asym, coeffs, 15);
1668 }
1669
1670 let mut temp_out: [T; 64] = [T::zero(); 64];
1671 // Embedded 2-point transforms with asymmetric input.
1672 daala_fdct_ii_32_asym(
1673 half[0],
1674 asym[0],
1675 half[1],
1676 asym[1],
1677 half[2],
1678 asym[2],
1679 half[3],
1680 asym[3],
1681 half[4],
1682 asym[4],
1683 half[5],
1684 asym[5],
1685 half[6],
1686 asym[6],
1687 half[7],
1688 asym[7],
1689 half[8],
1690 asym[8],
1691 half[9],
1692 asym[9],
1693 half[10],
1694 asym[10],
1695 half[11],
1696 asym[11],
1697 half[12],
1698 asym[12],
1699 half[13],
1700 asym[13],
1701 half[14],
1702 asym[14],
1703 half[15],
1704 asym[15],
1705 &mut temp_out[0..32],
1706 );
1707 daala_fdst_iv_32_asym(
1708 asym[31],
1709 half[31],
1710 asym[30],
1711 half[30],
1712 asym[29],
1713 half[29],
1714 asym[28],
1715 half[28],
1716 asym[27],
1717 half[27],
1718 asym[26],
1719 half[26],
1720 asym[25],
1721 half[25],
1722 asym[24],
1723 half[24],
1724 asym[23],
1725 half[23],
1726 asym[22],
1727 half[22],
1728 asym[21],
1729 half[21],
1730 asym[20],
1731 half[20],
1732 asym[19],
1733 half[19],
1734 asym[18],
1735 half[18],
1736 asym[17],
1737 half[17],
1738 asym[16],
1739 half[16],
1740 &mut temp_out[32..64],
1741 );
1742 temp_out[32..64].reverse();
1743
1744 // Store a reordered version of output in temp_out
1745 #[$m]
1746 #[inline]
1747 $($s)* fn reorder_4<T: TxOperations>(
1748 output: &mut [T], i: usize, tmp: [T; 64], j: usize
1749 ) {
1750 output[0 + i * 4] = tmp[0 + j];
1751 output[1 + i * 4] = tmp[32 + j];
1752 output[2 + i * 4] = tmp[16 + j];
1753 output[3 + i * 4] = tmp[48 + j];
1754 }
1755 reorder_4(coeffs, 0, temp_out, 0);
1756 reorder_4(coeffs, 1, temp_out, 8);
1757 reorder_4(coeffs, 2, temp_out, 4);
1758 reorder_4(coeffs, 3, temp_out, 12);
1759 reorder_4(coeffs, 4, temp_out, 2);
1760 reorder_4(coeffs, 5, temp_out, 10);
1761 reorder_4(coeffs, 6, temp_out, 6);
1762 reorder_4(coeffs, 7, temp_out, 14);
1763
1764 reorder_4(coeffs, 8, temp_out, 1);
1765 reorder_4(coeffs, 9, temp_out, 9);
1766 reorder_4(coeffs, 10, temp_out, 5);
1767 reorder_4(coeffs, 11, temp_out, 13);
1768 reorder_4(coeffs, 12, temp_out, 3);
1769 reorder_4(coeffs, 13, temp_out, 11);
1770 reorder_4(coeffs, 14, temp_out, 7);
1771 reorder_4(coeffs, 15, temp_out, 15);
1772}
1773
1774#[$m]
1775$($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {}
1776
1777#[$m]
1778$($s)* fn fwht4<T: TxOperations>(coeffs: &mut [T]) {
1779 assert!(coeffs.len() >= 4);
1780 let x0 = coeffs[0];
1781 let x1 = coeffs[1];
1782 let x2 = coeffs[2];
1783 let x3 = coeffs[3];
1784
1785 let s0 = x0.add(x1);
1786 let s1 = x3.sub(x2);
1787 let s2 = s0.sub_avg(s1);
1788
1789 let q1 = s2.sub(x2);
1790 let q0 = s0.sub(q1);
1791 let q3 = s2.sub(x1);
1792 let q2 = s1.add(q3);
1793
1794 store_coeffs!(coeffs, q0, q1, q2, q3);
1795}
1796
1797}
1798
1799}
1800