1use alloc::vec::*;
2
3#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd")))]
4pub fn get_bitmap(a: &Vec<f32>, length: usize) -> Vec<u8> {
5 use crate::platform::{abs, clamp};
6 use alloc::vec;
7 let mut height = 0.0;
8 assert!(length <= a.len());
9 let mut output = vec![0; length];
10 for i in 0..length {
11 unsafe {
12 height += a.get_unchecked(i);
13 // Clamping because as u8 is undefined outside of its range in rustc.
14 *(output.get_unchecked_mut(i)) = clamp(abs(height) * 255.9, 0.0, 255.0) as u8;
15 }
16 }
17 output
18}
19
20#[allow(clippy::uninit_vec)]
21#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
22pub fn get_bitmap(a: &Vec<f32>, length: usize) -> Vec<u8> {
23 #[cfg(target_arch = "x86")]
24 use core::arch::x86::*;
25 #[cfg(target_arch = "x86_64")]
26 use core::arch::x86_64::*;
27
28 unsafe {
29 // Allocate a 4 byte aligned vector of bytes, and skip zeroing it. Turns out zeroing takes a
30 // while on very large sizes.
31 let mut output = {
32 // Aligned length is ceil(length / 4).
33 let aligned_length = (length + 3) >> 2;
34 let mut aligned: Vec<u32> = Vec::with_capacity(aligned_length);
35 let ptr = aligned.as_mut_ptr();
36 let cap = aligned.capacity() << 2;
37 core::mem::forget(aligned);
38 Vec::from_raw_parts(ptr as *mut u8, aligned_length << 2, cap)
39 };
40 // offset = Zeroed out lanes
41 let mut offset = _mm_setzero_ps();
42 // Negative zero is important here.
43 let nzero = _mm_castps_si128(_mm_set1_ps(-0.0));
44 for i in (0..output.len()).step_by(4) {
45 // x = Read 4 floats from self.a
46 let mut x = _mm_loadu_ps(a.get_unchecked(i));
47 // x += (0.0, x[0], x[1], x[2])
48 x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
49 // x += (0.0, 0.0, x[0], x[1])
50 x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 8)));
51 // x += offset
52 x = _mm_add_ps(x, offset);
53
54 // y = x * 255.9
55 let y = _mm_mul_ps(x, _mm_set1_ps(255.9));
56 // y = abs(y)
57 let y = _mm_andnot_ps(_mm_castsi128_ps(nzero), y);
58 // y = Convert y to i32s and truncate
59 let mut y = _mm_cvttps_epi32(y);
60 // y = Take the first byte of each of the 4 values in y and pack them into
61 // the first 4 bytes of y.
62 y = _mm_packus_epi16(_mm_packs_epi32(y, nzero), nzero);
63
64 // Store the first 4 u8s from y in output.
65 let pointer: &mut i32 = core::mem::transmute(output.get_unchecked_mut(i));
66 *pointer = core::mem::transmute::<__m128i, [i32; 4]>(y)[0];
67 // offset = (x[3], x[3], x[3], x[3])
68 offset = _mm_set1_ps(core::mem::transmute::<__m128, [f32; 4]>(x)[3]);
69 }
70 output.truncate(length);
71 output
72 }
73}
74