logexp.rs source code [crates/rav1e/src/util/logexp.rs]

1	// Copyright (c) 2019-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	/// Convert an integer into a Q57 fixed-point fraction.
11	pub const fn q57(v: i32) -> i64 {
12	debug_assert!(v >= `-64` && v <= `63`);
13	(v as i64) << `57`
14	}
15
16	#[rustfmt::skip]
17	const ATANH_LOG2: &[i64; `32`] = &[
18	`0x32B803473F7AD0F4`, `0x2F2A71BD4E25E916`, `0x2E68B244BB93BA06`,
19	`0x2E39FB9198CE62E4`, `0x2E2E683F68565C8F`, `0x2E2B850BE2077FC1`,
20	`0x2E2ACC58FE7B78DB`, `0x2E2A9E2DE52FD5F2`, `0x2E2A92A338D53EEC`,
21	`0x2E2A8FC08F5E19B6`, `0x2E2A8F07E51A485E`, `0x2E2A8ED9BA8AF388`,
22	`0x2E2A8ECE2FE7384A`, `0x2E2A8ECB4D3E4B1A`, `0x2E2A8ECA94940FE8`,
23	`0x2E2A8ECA6669811D`, `0x2E2A8ECA5ADEDD6A`, `0x2E2A8ECA57FC347E`,
24	`0x2E2A8ECA57438A43`, `0x2E2A8ECA57155FB4`, `0x2E2A8ECA5709D510`,
25	`0x2E2A8ECA5706F267`, `0x2E2A8ECA570639BD`, `0x2E2A8ECA57060B92`,
26	`0x2E2A8ECA57060008`, `0x2E2A8ECA5705FD25`, `0x2E2A8ECA5705FC6C`,
27	`0x2E2A8ECA5705FC3E`, `0x2E2A8ECA5705FC33`, `0x2E2A8ECA5705FC30`,
28	`0x2E2A8ECA5705FC2F`, `0x2E2A8ECA5705FC2F`
29	];
30
31	/// Computes the binary exponential of `logq57`.
32	/// `logq57`: a log base 2 in Q57 format.
33	/// Returns a 64 bit integer in Q0 (no fraction).
34	pub const fn bexp64(logq57: i64) -> i64 {
35	let ipart = (logq57 >> `57`) as i32;
36	if ipart < `0` {
37	return `0`;
38	}
39	if ipart >= `63` {
40	return `0x7FFFFFFFFFFFFFFF`;
41	}
42	// z is the fractional part of the log in Q62 format.
43	// We need 1 bit of headroom since the magnitude can get larger than 1
44	// during the iteration, and a sign bit.
45	let mut z = logq57 - q57(ipart);
46	let mut w: i64;
47	if z != `0` {
48	// Rust has 128 bit multiplies, so it should be possible to do this
49	// faster without losing accuracy.
50	z <<= `5`;
51	// w is the exponential in Q61 format (since it also needs headroom and can
52	// get as large as 2.0); we could get another bit if we dropped the sign,
53	// but we'll recover that bit later anyway.
54	// Ideally this should start out as
55	// \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
56	// but in order to guarantee convergence we have to repeat iterations 4,
57	// 13 (=34+1), and 40 (=313+1, etc.), so it winds up somewhat larger.
58	w = `0x26A3D0E401DD846D`;
59	let mut i: i64 = `0`;
60	loop {
61	let mask = -((z < `0`) as i64);
62	w += ((w >> (i + `1`)) + mask) ^ mask;
63	z -= (ATANH_LOG2[i as usize] + mask) ^ mask;
64	// Repeat iteration 4.
65	if i >= `3` {
66	break;
67	}
68	z *= `2`;
69	i += `1`;
70	}
71	loop {
72	let mask = -((z < `0`) as i64);
73	w += ((w >> (i + `1`)) + mask) ^ mask;
74	z -= (ATANH_LOG2[i as usize] + mask) ^ mask;
75	// Repeat iteration 13.
76	if i >= `12` {
77	break;
78	}
79	z *= `2`;
80	i += `1`;
81	}
82	while i < `32` {
83	let mask = -((z < `0`) as i64);
84	w += ((w >> (i + `1`)) + mask) ^ mask;
85	z = (z - ((ATANH_LOG2[i as usize] + mask) ^ mask)) * `2`;
86	i += `1`;
87	}
88	// Skip the remaining iterations unless we really require that much
89	// precision.
90	// We could have bailed out earlier for smaller iparts, but that would
91	// require initializing w from a table, as the limit doesn't converge to
92	// 61-bit precision until n=30.
93	let mut wlo: i32 = `0`;
94	if ipart > `30` {
95	// For these iterations, we just update the low bits, as the high bits
96	// can't possibly be affected.
97	// OD_ATANH_LOG2 has also converged (it actually did so one iteration
98	// earlier, but that's no reason for an extra special case).
99	loop {
100	let mask = -((z < `0`) as i64);
101	wlo += (((w >> i) + mask) ^ mask) as i32;
102	z -= (ATANH_LOG2[`31`] + mask) ^ mask;
103	// Repeat iteration 40.
104	if i >= `39` {
105	break;
106	}
107	z *= `2`;
108	i += `1`;
109	}
110	while i < `61` {
111	let mask = -((z < `0`) as i64);
112	wlo += (((w >> i) + mask) ^ mask) as i32;
113	z = (z - ((ATANH_LOG2[`31`] + mask) ^ mask)) * `2`;
114	i += `1`;
115	}
116	}
117	w = (w << `1`) + (wlo as i64);
118	} else {
119	w = `1i64` << `62`;
120	}
121	if ipart < `62` {
122	w = ((w >> (`61` - ipart)) + `1`) >> `1`;
123	}
124	w
125	}
126
127	/// Computes the binary log of `n`.
128	/// `n`: a 64-bit integer in Q0 (no fraction).
129	/// Returns a 64-bit log in Q57.
130	pub const fn blog64(n: i64) -> i64 {
131	if n <= `0` {
132	return `-1`;
133	}
134	let ipart = `63` - n.leading_zeros() as i32;
135	let w = if ipart > `61` { n >> (ipart - `61`) } else { n << (`61` - ipart) };
136	if (w & (w - `1`)) == `0` {
137	return q57(ipart);
138	}
139	// z is the fractional part of the log in Q61 format.
140	let mut z: i64 = `0`;
141	// Rust has 128 bit multiplies, so it should be possible to do this
142	// faster without losing accuracy.
143	// x and y are the cosh() and sinh(), respectively, in Q61 format.
144	// We are computing z = 2atanh(y/x) = 2atanh((w - 1)/(w + 1)).
145	let mut x = w + (`1i64` << `61`);
146	let mut y = w - (`1i64` << `61`);
147	// Repeat iteration 4.
148	// Repeat iteration 13.
149	// Repeat iteration 40.
150	let bounds = [`3`, `12`, `39`, `61`];
151	let mut i = `0`;
152	let mut j = `0`;
153	loop {
154	let end = bounds[j];
155	loop {
156	let mask = -((y < `0`) as i64);
157	// ATANH_LOG2 has converged at iteration 32.
158	z += ((ATANH_LOG2[if i < `31` { i } else { `31` }] >> i) + mask) ^ mask;
159	let u = x >> (i + `1`);
160	x -= ((y >> (i + `1`)) + mask) ^ mask;
161	y -= (u + mask) ^ mask;
162	if i == end {
163	break;
164	}
165	i += `1`;
166	}
167	j += `1`;
168	if j == bounds.len() {
169	break;
170	}
171	}
172	z = (z + `8`) >> `4`;
173	q57(ipart) + z
174	}
175
176	/// Computes the binary log of `n`.
177	/// `n`: an unsigned 32-bit integer in Q0 (no fraction).
178	/// Returns a signed 32-bit log in Q24.
179	#[allow(unused)]
180	pub const fn blog32(n: u32) -> i32 {
181	if n == `0` {
182	return `-1`;
183	}
184	let ipart = `31` - n.leading_zeros() as i32;
185	let n = n as i64;
186	let w = if ipart > `61` { n >> (ipart - `61`) } else { n << (`61` - ipart) };
187	if (w & (w - `1`)) == `0` {
188	return ipart << `24`;
189	}
190	// z is the fractional part of the log in Q61 format.
191	let mut z: i64 = `0`;
192	// Rust has 128 bit multiplies, so it should be possible to do this
193	// faster without losing accuracy.
194	// x and y are the cosh() and sinh(), respectively, in Q61 format.
195	// We are computing z = 2atanh(y/x) = 2atanh((w - 1)/(w + 1)).
196	let mut x = w + (`1i64` << `61`);
197	let mut y = w - (`1i64` << `61`);
198	// Repeat iteration 4.
199	// Repeat iteration 13.
200	let bounds = [`3`, `12`, `29`];
201	let mut i = `0`;
202	let mut j = `0`;
203	loop {
204	let end = bounds[j];
205	loop {
206	let mask = -((y < `0`) as i64);
207	z += ((ATANH_LOG2[i] >> i) + mask) ^ mask;
208	let u = x >> (i + `1`);
209	x -= ((y >> (i + `1`)) + mask) ^ mask;
210	y -= (u + mask) ^ mask;
211	if i == end {
212	break;
213	}
214	i += `1`;
215	}
216	j += `1`;
217	if j == bounds.len() {
218	break;
219	}
220	}
221	const SHIFT: usize = `61` - `24`;
222	z = (z + (`1` << SHIFT >> `1`)) >> SHIFT;
223	(ipart << `24`) + z as i32
224	}
225
226	/// Converts a Q57 fixed-point fraction to Q24 by rounding.
227	pub const fn q57_to_q24(v: i64) -> i32 {
228	(((v >> `32`) + `1`) >> `1`) as i32
229	}
230
231	/// Converts a Q24 fixed-point fraction to Q57.
232	pub const fn q24_to_q57(v: i32) -> i64 {
233	(v as i64) << `33`
234	}
235
236	/// Binary exponentiation of a `log_scale` with 24-bit fractional precision and
237	/// saturation.
238	/// `log_scale`: A binary logarithm in Q24 format.
239	/// Returns the binary exponential in Q24 format, saturated to 247 - 1 if
240	/// `log_scale` was too large.
241	pub const fn bexp_q24(log_scale: i32) -> i64 {
242	if log_scale < `23` << `24` {
243	let ret: i64 = bexp64(((log_scale as i64) << `33`) + q57(`24`));
244	if ret < (`1i64` << `47`) - `1` {
245	return ret;
246	}
247	}
248	(`1i64` << `47`) - `1`
249	}
250
251	/// Polynomial approximation of a binary exponential.
252	/// Q10 input, Q0 output.
253	#[allow(unused)]
254	pub const fn bexp32_q10(z: i32) -> u32 {
255	let ipart: i32 = z >> `10`;
256	let mut n: u32 = ((z & ((`1` << `10`) - `1`)) << `4`) as u32;
257	n = ({
258	n * (((n * (((n * (((n * `3548`) >> `15`) + `6817`)) >> `15`) + `15823`)) >> `15`)
259	+ `22708`)
260	} >> `15`)
261	+ `16384`;
262	if `14` - ipart > `0` {
263	(n + (`1` << (`13` - ipart))) >> (`14` - ipart)
264	} else {
265	n << (ipart - `14`)
266	}
267	}
268
269	/// Polynomial approximation of a binary logarithm.
270	/// Q0 input, Q11 output.
271	pub const fn blog32_q11(w: u32) -> i32 {
272	if w == `0` {
273	return `-1`;
274	}
275	let ipart: i32 = `32` - w.leading_zeros() as i32;
276	let n: i32 = if ipart - `16` > `0` { w >> (ipart - `16`) } else { w << (`16` - ipart) }
277	as i32
278	- `32768`
279	- `16384`;
280	let fpart: i32 = ({
281	n * (((n * (((n * (((n * `-1402`) >> `15`) + `2546`)) >> `15`) - `5216`)) >> `15`)
282	+ `15745`)
283	} >> `15`)
284	- `6797`;
285	(ipart << `11`) + (fpart >> `3`)
286	}
287
288	#[cfg(test)]
289	mod test {
290	use super::*;
291
292	#[test]
293	fn blog64_vectors() {
294	assert!(blog64(`1793`) == `0x159dc71e24d32daf`);
295	assert!(blog64(`0x678dde6e5fd29f05`) == `0x7d6373ad151ca685`);
296	}
297
298	#[test]
299	fn bexp64_vectors() {
300	assert!(bexp64(`0x159dc71e24d32daf`) == `1793`);
301	assert!((bexp64(`0x7d6373ad151ca685`) - `0x678dde6e5fd29f05`).abs() < `29`);
302	}
303
304	#[test]
305	fn blog64_bexp64_round_trip() {
306	for a in `1`..=std::u16::MAX as i64 {
307	let b = std::i64::MAX / a;
308	let (log_a, log_b, log_ab) = (blog64(a), blog64(b), blog64(a * b));
309	assert!((log_a + log_b - log_ab).abs() < `4`);
310	assert!(bexp64(log_a) == a);
311	assert!((bexp64(log_b) - b).abs() < `128`);
312	assert!((bexp64(log_ab) - a * b).abs() < `128`);
313	}
314	}
315
316	#[test]
317	fn blog32_vectors() {
318	assert_eq!(blog32(`0`), -`1`);
319	assert_eq!(blog32(`1793`), q57_to_q24(`0x159dc71e24d32daf`));
320	}
321
322	#[test]
323	fn bexp_q24_vectors() {
324	assert_eq!(bexp_q24(i32::MAX), (`1i64` << `47`) - `1`);
325	assert_eq!(
326	(bexp_q24(q57_to_q24(`0x159dc71e24d32daf`)) + (`1` << `24` >> `1`)) >> `24`,
327	`1793`
328	);
329	}
330
331	#[test]
332	fn blog32_bexp_q24_round_trip() {
333	for a in `1`..=std::u16::MAX as u32 {
334	let b = (std::u32::MAX >> `9`) / a;
335	let (log_a, log_b, log_ab) = (blog32(a), blog32(b), blog32(a * b));
336	assert!((log_a + log_b - log_ab).abs() < `4`);
337	assert!((bexp_q24(log_a) - (i64::from(a) << `24`)).abs() < (`1` << `24` >> `1`));
338	assert!(((bexp_q24(log_b) >> `24`) - i64::from(b)).abs() < `128`);
339	assert!(
340	((bexp_q24(log_ab) >> `24`) - i64::from(a) * i64::from(b)).abs() < `128`
341	);
342	}
343	}
344
345	#[test]
346	fn blog32_q11_bexp32_q10_round_trip() {
347	for a in `1`..=std::i16::MAX as i32 {
348	let b = std::i16::MAX as i32 / a;
349	let (log_a, log_b, log_ab) = (
350	blog32_q11(a as u32),
351	blog32_q11(b as u32),
352	blog32_q11(a as u32 * b as u32),
353	);
354	assert!((log_a + log_b - log_ab).abs() < `4`);
355	assert!((bexp32_q10((log_a + `1`) >> `1`) as i32 - a).abs() < `18`);
356	assert!((bexp32_q10((log_b + `1`) >> `1`) as i32 - b).abs() < `2`);
357	assert!((bexp32_q10((log_ab + `1`) >> `1`) as i32 - a * b).abs() < `18`);
358	}
359	}
360	}
361