1 | use super::bitmask::BitMask; |
2 | use super::EMPTY; |
3 | use core::mem; |
4 | |
5 | #[cfg (target_arch = "x86" )] |
6 | use core::arch::x86; |
7 | #[cfg (target_arch = "x86_64" )] |
8 | use core::arch::x86_64 as x86; |
9 | |
10 | pub type BitMaskWord = u16; |
11 | pub const BITMASK_STRIDE: usize = 1; |
12 | pub const BITMASK_MASK: BitMaskWord = 0xffff; |
13 | |
14 | /// Abstraction over a group of control bytes which can be scanned in |
15 | /// parallel. |
16 | /// |
17 | /// This implementation uses a 128-bit SSE value. |
18 | #[derive (Copy, Clone)] |
19 | pub struct Group(x86::__m128i); |
20 | |
21 | // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 |
22 | #[allow (clippy::use_self)] |
23 | impl Group { |
24 | /// Number of bytes in the group. |
25 | pub const WIDTH: usize = mem::size_of::<Self>(); |
26 | |
27 | /// Returns a full group of empty bytes, suitable for use as the initial |
28 | /// value for an empty hash table. |
29 | /// |
30 | /// This is guaranteed to be aligned to the group size. |
31 | #[inline ] |
32 | #[allow (clippy::items_after_statements)] |
33 | pub const fn static_empty() -> &'static [u8; Group::WIDTH] { |
34 | #[repr (C)] |
35 | struct AlignedBytes { |
36 | _align: [Group; 0], |
37 | bytes: [u8; Group::WIDTH], |
38 | } |
39 | const ALIGNED_BYTES: AlignedBytes = AlignedBytes { |
40 | _align: [], |
41 | bytes: [EMPTY; Group::WIDTH], |
42 | }; |
43 | &ALIGNED_BYTES.bytes |
44 | } |
45 | |
46 | /// Loads a group of bytes starting at the given address. |
47 | #[inline ] |
48 | #[allow (clippy::cast_ptr_alignment)] // unaligned load |
49 | pub unsafe fn load(ptr: *const u8) -> Self { |
50 | Group(x86::_mm_loadu_si128(ptr.cast())) |
51 | } |
52 | |
53 | /// Loads a group of bytes starting at the given address, which must be |
54 | /// aligned to `mem::align_of::<Group>()`. |
55 | #[inline ] |
56 | #[allow (clippy::cast_ptr_alignment)] |
57 | pub unsafe fn load_aligned(ptr: *const u8) -> Self { |
58 | // FIXME: use align_offset once it stabilizes |
59 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); |
60 | Group(x86::_mm_load_si128(ptr.cast())) |
61 | } |
62 | |
63 | /// Stores the group of bytes to the given address, which must be |
64 | /// aligned to `mem::align_of::<Group>()`. |
65 | #[inline ] |
66 | #[allow (clippy::cast_ptr_alignment)] |
67 | pub unsafe fn store_aligned(self, ptr: *mut u8) { |
68 | // FIXME: use align_offset once it stabilizes |
69 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); |
70 | x86::_mm_store_si128(ptr.cast(), self.0); |
71 | } |
72 | |
73 | /// Returns a `BitMask` indicating all bytes in the group which have |
74 | /// the given value. |
75 | #[inline ] |
76 | pub fn match_byte(self, byte: u8) -> BitMask { |
77 | #[allow ( |
78 | clippy::cast_possible_wrap, // byte: u8 as i8 |
79 | // byte: i32 as u16 |
80 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the |
81 | // upper 16-bits of the i32 are zeroed: |
82 | clippy::cast_sign_loss, |
83 | clippy::cast_possible_truncation |
84 | )] |
85 | unsafe { |
86 | let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8)); |
87 | BitMask(x86::_mm_movemask_epi8(cmp) as u16) |
88 | } |
89 | } |
90 | |
91 | /// Returns a `BitMask` indicating all bytes in the group which are |
92 | /// `EMPTY`. |
93 | #[inline ] |
94 | pub fn match_empty(self) -> BitMask { |
95 | self.match_byte(EMPTY) |
96 | } |
97 | |
98 | /// Returns a `BitMask` indicating all bytes in the group which are |
99 | /// `EMPTY` or `DELETED`. |
100 | #[inline ] |
101 | pub fn match_empty_or_deleted(self) -> BitMask { |
102 | #[allow ( |
103 | // byte: i32 as u16 |
104 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the |
105 | // upper 16-bits of the i32 are zeroed: |
106 | clippy::cast_sign_loss, |
107 | clippy::cast_possible_truncation |
108 | )] |
109 | unsafe { |
110 | // A byte is EMPTY or DELETED iff the high bit is set |
111 | BitMask(x86::_mm_movemask_epi8(self.0) as u16) |
112 | } |
113 | } |
114 | |
115 | /// Returns a `BitMask` indicating all bytes in the group which are full. |
116 | #[inline ] |
117 | pub fn match_full(&self) -> BitMask { |
118 | self.match_empty_or_deleted().invert() |
119 | } |
120 | |
121 | /// Performs the following transformation on all bytes in the group: |
122 | /// - `EMPTY => EMPTY` |
123 | /// - `DELETED => EMPTY` |
124 | /// - `FULL => DELETED` |
125 | #[inline ] |
126 | pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { |
127 | // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 |
128 | // and high_bit = 0 (FULL) to 1000_0000 |
129 | // |
130 | // Here's this logic expanded to concrete values: |
131 | // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) |
132 | // 1111_1111 | 1000_0000 = 1111_1111 |
133 | // 0000_0000 | 1000_0000 = 1000_0000 |
134 | #[allow ( |
135 | clippy::cast_possible_wrap, // byte: 0x80_u8 as i8 |
136 | )] |
137 | unsafe { |
138 | let zero = x86::_mm_setzero_si128(); |
139 | let special = x86::_mm_cmpgt_epi8(zero, self.0); |
140 | Group(x86::_mm_or_si128( |
141 | special, |
142 | x86::_mm_set1_epi8(0x80_u8 as i8), |
143 | )) |
144 | } |
145 | } |
146 | } |
147 | |