1// Copyright (c) 2019-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10/// Convert an integer into a Q57 fixed-point fraction.
11pub const fn q57(v: i32) -> i64 {
12 debug_assert!(v >= -64 && v <= 63);
13 (v as i64) << 57
14}
15
16#[rustfmt::skip]
17const ATANH_LOG2: &[i64; 32] = &[
18 0x32B803473F7AD0F4, 0x2F2A71BD4E25E916, 0x2E68B244BB93BA06,
19 0x2E39FB9198CE62E4, 0x2E2E683F68565C8F, 0x2E2B850BE2077FC1,
20 0x2E2ACC58FE7B78DB, 0x2E2A9E2DE52FD5F2, 0x2E2A92A338D53EEC,
21 0x2E2A8FC08F5E19B6, 0x2E2A8F07E51A485E, 0x2E2A8ED9BA8AF388,
22 0x2E2A8ECE2FE7384A, 0x2E2A8ECB4D3E4B1A, 0x2E2A8ECA94940FE8,
23 0x2E2A8ECA6669811D, 0x2E2A8ECA5ADEDD6A, 0x2E2A8ECA57FC347E,
24 0x2E2A8ECA57438A43, 0x2E2A8ECA57155FB4, 0x2E2A8ECA5709D510,
25 0x2E2A8ECA5706F267, 0x2E2A8ECA570639BD, 0x2E2A8ECA57060B92,
26 0x2E2A8ECA57060008, 0x2E2A8ECA5705FD25, 0x2E2A8ECA5705FC6C,
27 0x2E2A8ECA5705FC3E, 0x2E2A8ECA5705FC33, 0x2E2A8ECA5705FC30,
28 0x2E2A8ECA5705FC2F, 0x2E2A8ECA5705FC2F
29];
30
31/// Computes the binary exponential of `logq57`.
32/// `logq57`: a log base 2 in Q57 format.
33/// Returns a 64 bit integer in Q0 (no fraction).
34pub const fn bexp64(logq57: i64) -> i64 {
35 let ipart = (logq57 >> 57) as i32;
36 if ipart < 0 {
37 return 0;
38 }
39 if ipart >= 63 {
40 return 0x7FFFFFFFFFFFFFFF;
41 }
42 // z is the fractional part of the log in Q62 format.
43 // We need 1 bit of headroom since the magnitude can get larger than 1
44 // during the iteration, and a sign bit.
45 let mut z = logq57 - q57(ipart);
46 let mut w: i64;
47 if z != 0 {
48 // Rust has 128 bit multiplies, so it should be possible to do this
49 // faster without losing accuracy.
50 z <<= 5;
51 // w is the exponential in Q61 format (since it also needs headroom and can
52 // get as large as 2.0); we could get another bit if we dropped the sign,
53 // but we'll recover that bit later anyway.
54 // Ideally this should start out as
55 // \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
56 // but in order to guarantee convergence we have to repeat iterations 4,
57 // 13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.
58 w = 0x26A3D0E401DD846D;
59 let mut i: i64 = 0;
60 loop {
61 let mask = -((z < 0) as i64);
62 w += ((w >> (i + 1)) + mask) ^ mask;
63 z -= (ATANH_LOG2[i as usize] + mask) ^ mask;
64 // Repeat iteration 4.
65 if i >= 3 {
66 break;
67 }
68 z *= 2;
69 i += 1;
70 }
71 loop {
72 let mask = -((z < 0) as i64);
73 w += ((w >> (i + 1)) + mask) ^ mask;
74 z -= (ATANH_LOG2[i as usize] + mask) ^ mask;
75 // Repeat iteration 13.
76 if i >= 12 {
77 break;
78 }
79 z *= 2;
80 i += 1;
81 }
82 while i < 32 {
83 let mask = -((z < 0) as i64);
84 w += ((w >> (i + 1)) + mask) ^ mask;
85 z = (z - ((ATANH_LOG2[i as usize] + mask) ^ mask)) * 2;
86 i += 1;
87 }
88 // Skip the remaining iterations unless we really require that much
89 // precision.
90 // We could have bailed out earlier for smaller iparts, but that would
91 // require initializing w from a table, as the limit doesn't converge to
92 // 61-bit precision until n=30.
93 let mut wlo: i32 = 0;
94 if ipart > 30 {
95 // For these iterations, we just update the low bits, as the high bits
96 // can't possibly be affected.
97 // OD_ATANH_LOG2 has also converged (it actually did so one iteration
98 // earlier, but that's no reason for an extra special case).
99 loop {
100 let mask = -((z < 0) as i64);
101 wlo += (((w >> i) + mask) ^ mask) as i32;
102 z -= (ATANH_LOG2[31] + mask) ^ mask;
103 // Repeat iteration 40.
104 if i >= 39 {
105 break;
106 }
107 z *= 2;
108 i += 1;
109 }
110 while i < 61 {
111 let mask = -((z < 0) as i64);
112 wlo += (((w >> i) + mask) ^ mask) as i32;
113 z = (z - ((ATANH_LOG2[31] + mask) ^ mask)) * 2;
114 i += 1;
115 }
116 }
117 w = (w << 1) + (wlo as i64);
118 } else {
119 w = 1i64 << 62;
120 }
121 if ipart < 62 {
122 w = ((w >> (61 - ipart)) + 1) >> 1;
123 }
124 w
125}
126
127/// Computes the binary log of `n`.
128/// `n`: a 64-bit integer in Q0 (no fraction).
129/// Returns a 64-bit log in Q57.
130pub const fn blog64(n: i64) -> i64 {
131 if n <= 0 {
132 return -1;
133 }
134 let ipart = 63 - n.leading_zeros() as i32;
135 let w = if ipart > 61 { n >> (ipart - 61) } else { n << (61 - ipart) };
136 if (w & (w - 1)) == 0 {
137 return q57(ipart);
138 }
139 // z is the fractional part of the log in Q61 format.
140 let mut z: i64 = 0;
141 // Rust has 128 bit multiplies, so it should be possible to do this
142 // faster without losing accuracy.
143 // x and y are the cosh() and sinh(), respectively, in Q61 format.
144 // We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).
145 let mut x = w + (1i64 << 61);
146 let mut y = w - (1i64 << 61);
147 // Repeat iteration 4.
148 // Repeat iteration 13.
149 // Repeat iteration 40.
150 let bounds = [3, 12, 39, 61];
151 let mut i = 0;
152 let mut j = 0;
153 loop {
154 let end = bounds[j];
155 loop {
156 let mask = -((y < 0) as i64);
157 // ATANH_LOG2 has converged at iteration 32.
158 z += ((ATANH_LOG2[if i < 31 { i } else { 31 }] >> i) + mask) ^ mask;
159 let u = x >> (i + 1);
160 x -= ((y >> (i + 1)) + mask) ^ mask;
161 y -= (u + mask) ^ mask;
162 if i == end {
163 break;
164 }
165 i += 1;
166 }
167 j += 1;
168 if j == bounds.len() {
169 break;
170 }
171 }
172 z = (z + 8) >> 4;
173 q57(ipart) + z
174}
175
176/// Computes the binary log of `n`.
177/// `n`: an unsigned 32-bit integer in Q0 (no fraction).
178/// Returns a signed 32-bit log in Q24.
179#[allow(unused)]
180pub const fn blog32(n: u32) -> i32 {
181 if n == 0 {
182 return -1;
183 }
184 let ipart = 31 - n.leading_zeros() as i32;
185 let n = n as i64;
186 let w = if ipart > 61 { n >> (ipart - 61) } else { n << (61 - ipart) };
187 if (w & (w - 1)) == 0 {
188 return ipart << 24;
189 }
190 // z is the fractional part of the log in Q61 format.
191 let mut z: i64 = 0;
192 // Rust has 128 bit multiplies, so it should be possible to do this
193 // faster without losing accuracy.
194 // x and y are the cosh() and sinh(), respectively, in Q61 format.
195 // We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).
196 let mut x = w + (1i64 << 61);
197 let mut y = w - (1i64 << 61);
198 // Repeat iteration 4.
199 // Repeat iteration 13.
200 let bounds = [3, 12, 29];
201 let mut i = 0;
202 let mut j = 0;
203 loop {
204 let end = bounds[j];
205 loop {
206 let mask = -((y < 0) as i64);
207 z += ((ATANH_LOG2[i] >> i) + mask) ^ mask;
208 let u = x >> (i + 1);
209 x -= ((y >> (i + 1)) + mask) ^ mask;
210 y -= (u + mask) ^ mask;
211 if i == end {
212 break;
213 }
214 i += 1;
215 }
216 j += 1;
217 if j == bounds.len() {
218 break;
219 }
220 }
221 const SHIFT: usize = 61 - 24;
222 z = (z + (1 << SHIFT >> 1)) >> SHIFT;
223 (ipart << 24) + z as i32
224}
225
226/// Converts a Q57 fixed-point fraction to Q24 by rounding.
227pub const fn q57_to_q24(v: i64) -> i32 {
228 (((v >> 32) + 1) >> 1) as i32
229}
230
231/// Converts a Q24 fixed-point fraction to Q57.
232pub const fn q24_to_q57(v: i32) -> i64 {
233 (v as i64) << 33
234}
235
236/// Binary exponentiation of a `log_scale` with 24-bit fractional precision and
237/// saturation.
238/// `log_scale`: A binary logarithm in Q24 format.
239/// Returns the binary exponential in Q24 format, saturated to 2**47 - 1 if
240/// `log_scale` was too large.
241pub const fn bexp_q24(log_scale: i32) -> i64 {
242 if log_scale < 23 << 24 {
243 let ret: i64 = bexp64(((log_scale as i64) << 33) + q57(24));
244 if ret < (1i64 << 47) - 1 {
245 return ret;
246 }
247 }
248 (1i64 << 47) - 1
249}
250
251/// Polynomial approximation of a binary exponential.
252/// Q10 input, Q0 output.
253#[allow(unused)]
254pub const fn bexp32_q10(z: i32) -> u32 {
255 let ipart: i32 = z >> 10;
256 let mut n: u32 = ((z & ((1 << 10) - 1)) << 4) as u32;
257 n = ({
258 n * (((n * (((n * (((n * 3548) >> 15) + 6817)) >> 15) + 15823)) >> 15)
259 + 22708)
260 } >> 15)
261 + 16384;
262 if 14 - ipart > 0 {
263 (n + (1 << (13 - ipart))) >> (14 - ipart)
264 } else {
265 n << (ipart - 14)
266 }
267}
268
269/// Polynomial approximation of a binary logarithm.
270/// Q0 input, Q11 output.
271pub const fn blog32_q11(w: u32) -> i32 {
272 if w == 0 {
273 return -1;
274 }
275 let ipart: i32 = 32 - w.leading_zeros() as i32;
276 let n: i32 = if ipart - 16 > 0 { w >> (ipart - 16) } else { w << (16 - ipart) }
277 as i32
278 - 32768
279 - 16384;
280 let fpart: i32 = ({
281 n * (((n * (((n * (((n * -1402) >> 15) + 2546)) >> 15) - 5216)) >> 15)
282 + 15745)
283 } >> 15)
284 - 6797;
285 (ipart << 11) + (fpart >> 3)
286}
287
288#[cfg(test)]
289mod test {
290 use super::*;
291
292 #[test]
293 fn blog64_vectors() {
294 assert!(blog64(1793) == 0x159dc71e24d32daf);
295 assert!(blog64(0x678dde6e5fd29f05) == 0x7d6373ad151ca685);
296 }
297
298 #[test]
299 fn bexp64_vectors() {
300 assert!(bexp64(0x159dc71e24d32daf) == 1793);
301 assert!((bexp64(0x7d6373ad151ca685) - 0x678dde6e5fd29f05).abs() < 29);
302 }
303
304 #[test]
305 fn blog64_bexp64_round_trip() {
306 for a in 1..=std::u16::MAX as i64 {
307 let b = std::i64::MAX / a;
308 let (log_a, log_b, log_ab) = (blog64(a), blog64(b), blog64(a * b));
309 assert!((log_a + log_b - log_ab).abs() < 4);
310 assert!(bexp64(log_a) == a);
311 assert!((bexp64(log_b) - b).abs() < 128);
312 assert!((bexp64(log_ab) - a * b).abs() < 128);
313 }
314 }
315
316 #[test]
317 fn blog32_vectors() {
318 assert_eq!(blog32(0), -1);
319 assert_eq!(blog32(1793), q57_to_q24(0x159dc71e24d32daf));
320 }
321
322 #[test]
323 fn bexp_q24_vectors() {
324 assert_eq!(bexp_q24(i32::MAX), (1i64 << 47) - 1);
325 assert_eq!(
326 (bexp_q24(q57_to_q24(0x159dc71e24d32daf)) + (1 << 24 >> 1)) >> 24,
327 1793
328 );
329 }
330
331 #[test]
332 fn blog32_bexp_q24_round_trip() {
333 for a in 1..=std::u16::MAX as u32 {
334 let b = (std::u32::MAX >> 9) / a;
335 let (log_a, log_b, log_ab) = (blog32(a), blog32(b), blog32(a * b));
336 assert!((log_a + log_b - log_ab).abs() < 4);
337 assert!((bexp_q24(log_a) - (i64::from(a) << 24)).abs() < (1 << 24 >> 1));
338 assert!(((bexp_q24(log_b) >> 24) - i64::from(b)).abs() < 128);
339 assert!(
340 ((bexp_q24(log_ab) >> 24) - i64::from(a) * i64::from(b)).abs() < 128
341 );
342 }
343 }
344
345 #[test]
346 fn blog32_q11_bexp32_q10_round_trip() {
347 for a in 1..=std::i16::MAX as i32 {
348 let b = std::i16::MAX as i32 / a;
349 let (log_a, log_b, log_ab) = (
350 blog32_q11(a as u32),
351 blog32_q11(b as u32),
352 blog32_q11(a as u32 * b as u32),
353 );
354 assert!((log_a + log_b - log_ab).abs() < 4);
355 assert!((bexp32_q10((log_a + 1) >> 1) as i32 - a).abs() < 18);
356 assert!((bexp32_q10((log_b + 1) >> 1) as i32 - b).abs() < 2);
357 assert!((bexp32_q10((log_ab + 1) >> 1) as i32 - a * b).abs() < 18);
358 }
359 }
360}
361