idct.rs source code [crates/zune_jpeg/src/idct.rs]

1	/*
2	* Copyright (c) 2023.
3	*
4	* This software is free software;
5	*
6	* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7	*/
8
9	//! Routines for IDCT
10	//!
11	//! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized
12	//! AVX2 one, i'll talk about them here.
13	//!
14	//! There are 2 reasons why we have the avx one
15	//! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even
16	//! if it exists).
17	//! 2. AVX employs zero short circuit in a way the scalar code cannot employ it.
18	//! - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes
19	//! values directly, if false, it goes the long way of calculating.
20	//! - Although this can be trivially implemented in the scalar version, it generates code
21	//! I'm not happy width(scalar version that basically loops and that is too many branches for me)
22	//! The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster
23	//! than anything I could come up with
24	//!
25	//! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool
26	//! (spoiler alert, i barely understand how it works, that's why I credited the owner).
27	//!
28	#![allow(
29	clippy::excessive_precision,
30	clippy::unreadable_literal,
31	clippy::module_name_repetitions,
32	unused_parens,
33	clippy::wildcard_imports
34	)]
35
36	use zune_core::log::debug;
37	use zune_core::options::DecoderOptions;
38
39	use crate::decoder::IDCTPtr;
40	use crate::idct::scalar::idct_int;
41
42	#[cfg(feature = "x86")]
43	pub mod avx2;
44	#[cfg(feature = "neon")]
45	pub mod neon;
46
47	pub mod scalar;
48
49	/// Choose an appropriate IDCT function
50	#[allow(unused_variables)]
51	pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr {
52	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
53	#[cfg(feature = "x86")]
54	{
55	if options.use_avx2() {
56	debug!("Using vector integer IDCT");
57	// use avx one
58	return crate::idct::avx2::idct_avx2;
59	}
60	}
61	#[cfg(target_arch = "aarch64")]
62	#[cfg(feature = "neon")]
63	{
64	if options.use_neon() {
65	debug!("Using vector integer IDCT");
66	return crate::idct::neon::idct_neon;
67	}
68	}
69	debug!("Using scalar integer IDCT");
70	// use generic one
71	return idct_int;
72	}
73
74	#[cfg(test)]
75	#[allow(unreachable_code)]
76	#[allow(dead_code)]
77	mod tests {
78	use super::*;
79
80	#[test]
81	fn idct_test0() {
82	let stride = `8`;
83	let mut coeff = [`10`; `64`];
84	let mut coeff2 = [`10`; `64`];
85	let mut output_scalar = [`0`; `64`];
86	let mut output_vector = [`0`; `64`];
87	idct_fnc()(&mut coeff, &mut output_vector, stride);
88	idct_int(&mut coeff2, &mut output_scalar, stride);
89	assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
90	}
91
92	#[test]
93	fn do_idct_test1() {
94	let stride = `8`;
95	let mut coeff = [`14`; `64`];
96	let mut coeff2 = [`14`; `64`];
97	let mut output_scalar = [`0`; `64`];
98	let mut output_vector = [`0`; `64`];
99	idct_fnc()(&mut coeff, &mut output_vector, stride);
100	idct_int(&mut coeff2, &mut output_scalar, stride);
101	assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
102	}
103
104	#[test]
105	fn do_idct_test2() {
106	let stride = `8`;
107	let mut coeff = [`0`; `64`];
108	coeff[`0`] = `255`;
109	coeff[`63`] = `-256`;
110	let mut coeff2 = coeff;
111	let mut output_scalar = [`0`; `64`];
112	let mut output_vector = [`0`; `64`];
113	idct_fnc()(&mut coeff, &mut output_vector, stride);
114	idct_int(&mut coeff2, &mut output_scalar, stride);
115	assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
116	}
117
118	#[test]
119	fn do_idct_zeros() {
120	let stride = `8`;
121	let mut coeff = [`0`; `64`];
122	let mut coeff2 = [`0`; `64`];
123	let mut output_scalar = [`0`; `64`];
124	let mut output_vector = [`0`; `64`];
125	idct_fnc()(&mut coeff, &mut output_vector, stride);
126	idct_int(&mut coeff2, &mut output_scalar, stride);
127	assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
128	}
129
130	fn idct_fnc() -> IDCTPtr {
131	#[cfg(feature = "neon")]
132	#[cfg(target_arch = "aarch64")]
133	{
134	use crate::idct::neon::idct_neon;
135	return idct_neon;
136	}
137
138	#[cfg(feature = "x86")]
139	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
140	{
141	use crate::idct::avx2::idct_avx2;
142	return idct_avx2;
143	}
144
145	idct_int
146	}
147	}
148