get_bitmap.rs source code [crates/fontdue-0.8.0/src/platform/float/get_bitmap.rs]

1	use alloc::vec::*;
2
3	#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd")))]
4	pub fn get_bitmap(a: &Vec<f32>, length: usize) -> Vec<u8> {
5	use crate::platform::{abs, clamp};
6	use alloc::vec;
7	let mut height = `0.0`;
8	assert!(length <= a.len());
9	let mut output = vec![`0`; length];
10	for i in `0`..length {
11	unsafe {
12	height += a.get_unchecked(i);
13	// Clamping because as u8 is undefined outside of its range in rustc.
14	(output.get_unchecked_mut(i)) = clamp(abs(height) `255.9`, `0.0`, `255.0`) as u8;
15	}
16	}
17	output
18	}
19
20	#[allow(clippy::uninit_vec)]
21	#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
22	pub fn get_bitmap(a: &Vec<f32>, length: usize) -> Vec<u8> {
23	#[cfg(target_arch = "x86")]
24	use core::arch::x86::*;
25	#[cfg(target_arch = "x86_64")]
26	use core::arch::x86_64::*;
27
28	unsafe {
29	// Allocate a 4 byte aligned vector of bytes, and skip zeroing it. Turns out zeroing takes a
30	// while on very large sizes.
31	let mut output = {
32	// Aligned length is ceil(length / 4).
33	let aligned_length = (length + `3`) >> `2`;
34	let mut aligned: Vec<u32> = Vec::with_capacity(aligned_length);
35	let ptr = aligned.as_mut_ptr();
36	let cap = aligned.capacity() << `2`;
37	core::mem::forget(aligned);
38	Vec::from_raw_parts(ptr as *mut u8, aligned_length << `2`, cap)
39	};
40	// offset = Zeroed out lanes
41	let mut offset = _mm_setzero_ps();
42	// Negative zero is important here.
43	let nzero = _mm_castps_si128(_mm_set1_ps(`-0.0`));
44	for i in (`0`..output.len()).step_by(`4`) {
45	// x = Read 4 floats from self.a
46	let mut x = _mm_loadu_ps(a.get_unchecked(i));
47	// x += (0.0, x[0], x[1], x[2])
48	x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), `4`)));
49	// x += (0.0, 0.0, x[0], x[1])
50	x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), `8`)));
51	// x += offset
52	x = _mm_add_ps(x, offset);
53
54	// y = x 255.9*
55	let y = _mm_mul_ps(x, _mm_set1_ps(`255.9`));
56	// y = abs(y)
57	let y = _mm_andnot_ps(_mm_castsi128_ps(nzero), y);
58	// y = Convert y to i32s and truncate
59	let mut y = _mm_cvttps_epi32(y);
60	// y = Take the first byte of each of the 4 values in y and pack them into
61	// the first 4 bytes of y.
62	y = _mm_packus_epi16(_mm_packs_epi32(y, nzero), nzero);
63
64	// Store the first 4 u8s from y in output.
65	let pointer: &mut i32 = core::mem::transmute(output.get_unchecked_mut(i));
66	pointer = core::mem::transmute::<__m128i, [i32*; `4`]>(y)[`0`];
67	// offset = (x[3], x[3], x[3], x[3])
68	offset = _mm_set1_ps(core::mem::transmute::<__m128, [f32; `4`]>(x)[`3`]);
69	}
70	output.truncate(length);
71	output
72	}
73	}
74