| 1 | use std::ptr; |
| 2 | |
| 3 | use v_frame::plane::Plane; |
| 4 | |
| 5 | use crate::diff::BLOCK_SIZE; |
| 6 | |
| 7 | /// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn |
| 8 | #[allow (clippy::many_single_char_names)] |
| 9 | pub(super) fn linsolve( |
| 10 | n: usize, |
| 11 | a: &mut [f64], |
| 12 | stride: usize, |
| 13 | b: &mut [f64], |
| 14 | x: &mut [f64], |
| 15 | ) -> bool { |
| 16 | // SAFETY: We need to ensure that `n` doesn't exceed the bounds of these arrays. |
| 17 | // But this is a crate-private function, so we control all input. |
| 18 | unsafe { |
| 19 | // Forward elimination |
| 20 | for k in 0..(n - 1) { |
| 21 | // Bring the largest magnitude to the diagonal position |
| 22 | ((k + 1)..n).rev().for_each(|i| { |
| 23 | if a.get_unchecked((i - 1) * stride + k).abs() |
| 24 | < a.get_unchecked(i * stride + k).abs() |
| 25 | { |
| 26 | (0..n).for_each(|j| { |
| 27 | swap_unchecked(a, i * stride + j, (i - 1) * stride + j); |
| 28 | }); |
| 29 | swap_unchecked(b, i, i - 1); |
| 30 | } |
| 31 | }); |
| 32 | |
| 33 | for i in k..(n - 1) { |
| 34 | if a.get_unchecked(k * stride + k).abs() < f64::EPSILON { |
| 35 | return false; |
| 36 | } |
| 37 | let c = *a.get_unchecked((i + 1) * stride + k) / *a.get_unchecked(k * stride + k); |
| 38 | (0..n).for_each(|j| { |
| 39 | let a2_val = *a.get_unchecked(k * stride + j); |
| 40 | let a_val = a.get_unchecked_mut((i + 1) * stride + j); |
| 41 | *a_val = c.mul_add(-a2_val, *a_val); |
| 42 | }); |
| 43 | let b2_val = *b.get_unchecked(k); |
| 44 | let b_val = b.get_unchecked_mut(i + 1); |
| 45 | *b_val = c.mul_add(-b2_val, *b_val); |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | // Backward substitution |
| 50 | for i in (0..n).rev() { |
| 51 | if a.get_unchecked(i * stride + i).abs() < f64::EPSILON { |
| 52 | return false; |
| 53 | } |
| 54 | let mut c = 0.0f64; |
| 55 | for j in (i + 1)..n { |
| 56 | c = a |
| 57 | .get_unchecked(i * stride + j) |
| 58 | .mul_add(*x.get_unchecked(j), c); |
| 59 | } |
| 60 | *x.get_unchecked_mut(i) = (*b.get_unchecked(i) - c) / *a.get_unchecked(i * stride + i); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | true |
| 65 | } |
| 66 | |
| 67 | // TODO: This is unstable upstream. Once it's stable upstream, use that. |
| 68 | unsafe fn swap_unchecked<T>(slice: &mut [T], a: usize, b: usize) { |
| 69 | let ptr: *mut T = slice.as_mut_ptr(); |
| 70 | // SAFETY: caller has to guarantee that `a < self.len()` and `b < self.len()` |
| 71 | unsafe { |
| 72 | ptr::swap(x:ptr.add(a), y:ptr.add(count:b)); |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | pub(super) fn multiply_mat( |
| 77 | m1: &[f64], |
| 78 | m2: &[f64], |
| 79 | res: &mut [f64], |
| 80 | m1_rows: usize, |
| 81 | inner_dim: usize, |
| 82 | m2_cols: usize, |
| 83 | ) { |
| 84 | assert!(res.len() >= m1_rows * m2_cols); |
| 85 | assert!(m1.len() >= m1_rows * inner_dim); |
| 86 | assert!(m2.len() >= m2_cols * inner_dim); |
| 87 | let mut idx: usize = 0; |
| 88 | for row: usize in 0..m1_rows { |
| 89 | for col: usize in 0..m2_cols { |
| 90 | let mut sum: f64 = 0f64; |
| 91 | for inner: usize in 0..inner_dim { |
| 92 | // SAFETY: We do the bounds checks once at the top to improve performance. |
| 93 | unsafe { |
| 94 | sum += m1.get_unchecked(index:row * inner_dim + inner) |
| 95 | * m2.get_unchecked(index:inner * m2_cols + col); |
| 96 | } |
| 97 | } |
| 98 | // SAFETY: We do the bounds checks once at the top to improve performance. |
| 99 | unsafe { |
| 100 | *res.get_unchecked_mut(index:idx) = sum; |
| 101 | } |
| 102 | idx += 1; |
| 103 | } |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | #[must_use ] |
| 108 | pub(super) fn normalized_cross_correlation(a: &[f64], b: &[f64], n: usize) -> f64 { |
| 109 | let mut c: f64 = 0f64; |
| 110 | let mut a_len: f64 = 0f64; |
| 111 | let mut b_len: f64 = 0f64; |
| 112 | for (a: &f64, b: &f64) in a.iter().zip(b.iter()).take(n) { |
| 113 | a_len = (*a).mul_add(*a, b:a_len); |
| 114 | b_len = (*b).mul_add(*b, b_len); |
| 115 | c = (*a).mul_add(*b, b:c); |
| 116 | } |
| 117 | c / (a_len.sqrt() * b_len.sqrt()) |
| 118 | } |
| 119 | |
| 120 | #[allow (clippy::too_many_arguments)] |
| 121 | pub(super) fn extract_ar_row( |
| 122 | coords: &[[isize; 2]], |
| 123 | num_coords: usize, |
| 124 | source_origin: &[u8], |
| 125 | denoised_origin: &[u8], |
| 126 | stride: usize, |
| 127 | dec: (usize, usize), |
| 128 | alt_source_origin: Option<&[u8]>, |
| 129 | alt_denoised_origin: Option<&[u8]>, |
| 130 | alt_stride: usize, |
| 131 | x: usize, |
| 132 | y: usize, |
| 133 | buffer: &mut [f64], |
| 134 | ) -> f64 { |
| 135 | debug_assert!(buffer.len() > num_coords); |
| 136 | debug_assert!(coords.len() >= num_coords); |
| 137 | |
| 138 | // SAFETY: We know the indexes we provide do not overflow the data bounds |
| 139 | unsafe { |
| 140 | for i in 0..num_coords { |
| 141 | let x_i = x as isize + coords.get_unchecked(i)[0]; |
| 142 | let y_i = y as isize + coords.get_unchecked(i)[1]; |
| 143 | debug_assert!(x_i >= 0); |
| 144 | debug_assert!(y_i >= 0); |
| 145 | let index = y_i as usize * stride + x_i as usize; |
| 146 | *buffer.get_unchecked_mut(i) = f64::from(*source_origin.get_unchecked(index)) |
| 147 | - f64::from(*denoised_origin.get_unchecked(index)); |
| 148 | } |
| 149 | let val = f64::from(*source_origin.get_unchecked(y * stride + x)) |
| 150 | - f64::from(*denoised_origin.get_unchecked(y * stride + x)); |
| 151 | |
| 152 | if let Some(alt_source_origin) = alt_source_origin { |
| 153 | if let Some(alt_denoised_origin) = alt_denoised_origin { |
| 154 | let mut source_sum = 0u64; |
| 155 | let mut denoised_sum = 0u64; |
| 156 | let mut num_samples = 0usize; |
| 157 | |
| 158 | for dy_i in 0..(1 << dec.1) { |
| 159 | let y_up = (y << dec.1) + dy_i; |
| 160 | for dx_i in 0..(1 << dec.0) { |
| 161 | let x_up = (x << dec.0) + dx_i; |
| 162 | let index = y_up * alt_stride + x_up; |
| 163 | source_sum += u64::from(*alt_source_origin.get_unchecked(index)); |
| 164 | denoised_sum += u64::from(*alt_denoised_origin.get_unchecked(index)); |
| 165 | num_samples += 1; |
| 166 | } |
| 167 | } |
| 168 | *buffer.get_unchecked_mut(num_coords) = |
| 169 | (source_sum as f64 - denoised_sum as f64) / num_samples as f64; |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | val |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | #[must_use ] |
| 178 | pub(super) fn get_block_mean( |
| 179 | source: &Plane<u8>, |
| 180 | frame_dims: (usize, usize), |
| 181 | x_o: usize, |
| 182 | y_o: usize, |
| 183 | ) -> f64 { |
| 184 | let max_h: usize = (frame_dims.1 - y_o).min(BLOCK_SIZE); |
| 185 | let max_w: usize = (frame_dims.0 - x_o).min(BLOCK_SIZE); |
| 186 | |
| 187 | let data_origin: &[u8] = source.data_origin(); |
| 188 | let mut block_sum: u64 = 0u64; |
| 189 | for y: usize in 0..max_h { |
| 190 | for x: usize in 0..max_w { |
| 191 | let index: usize = (y_o + y) * source.cfg.stride + x_o + x; |
| 192 | // SAFETY: We know the index cannot exceed the dimensions of the plane data |
| 193 | unsafe { |
| 194 | block_sum += u64::from(*data_origin.get_unchecked(index)); |
| 195 | } |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | block_sum as f64 / (max_w * max_h) as f64 |
| 200 | } |
| 201 | |
| 202 | #[must_use ] |
| 203 | pub(super) fn get_noise_var( |
| 204 | source: &Plane<u8>, |
| 205 | denoised: &Plane<u8>, |
| 206 | frame_dims: (usize, usize), |
| 207 | x_o: usize, |
| 208 | y_o: usize, |
| 209 | block_w: usize, |
| 210 | block_h: usize, |
| 211 | ) -> f64 { |
| 212 | let max_h: usize = (frame_dims.1 - y_o).min(block_h); |
| 213 | let max_w: usize = (frame_dims.0 - x_o).min(block_w); |
| 214 | |
| 215 | let source_origin: &[u8] = source.data_origin(); |
| 216 | let denoised_origin: &[u8] = denoised.data_origin(); |
| 217 | let mut noise_var_sum: u64 = 0u64; |
| 218 | let mut noise_sum: i64 = 0i64; |
| 219 | for y: usize in 0..max_h { |
| 220 | for x: usize in 0..max_w { |
| 221 | let index: usize = (y_o + y) * source.cfg.stride + x_o + x; |
| 222 | // SAFETY: We know the index cannot exceed the dimensions of the plane data |
| 223 | unsafe { |
| 224 | let noise: i64 = i64::from(*source_origin.get_unchecked(index)) |
| 225 | - i64::from(*denoised_origin.get_unchecked(index)); |
| 226 | noise_sum += noise; |
| 227 | noise_var_sum += noise.pow(exp:2) as u64; |
| 228 | } |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | let noise_mean: f64 = noise_sum as f64 / (max_w * max_h) as f64; |
| 233 | noise_mean.mul_add(-noise_mean, b:noise_var_sum as f64 / (max_w * max_h) as f64) |
| 234 | } |
| 235 | |