1 | use super::super::{BitMask, Tag}; |
2 | use core::mem; |
3 | use core::num::NonZeroU16; |
4 | |
5 | #[cfg (target_arch = "x86" )] |
6 | use core::arch::x86; |
7 | #[cfg (target_arch = "x86_64" )] |
8 | use core::arch::x86_64 as x86; |
9 | |
10 | pub(crate) type BitMaskWord = u16; |
11 | pub(crate) type NonZeroBitMaskWord = NonZeroU16; |
12 | pub(crate) const BITMASK_STRIDE: usize = 1; |
13 | pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff; |
14 | pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; |
15 | |
16 | /// Abstraction over a group of control tags which can be scanned in |
17 | /// parallel. |
18 | /// |
19 | /// This implementation uses a 128-bit SSE value. |
20 | #[derive (Copy, Clone)] |
21 | pub(crate) struct Group(x86::__m128i); |
22 | |
23 | // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 |
24 | #[allow (clippy::use_self)] |
25 | impl Group { |
26 | /// Number of bytes in the group. |
27 | pub(crate) const WIDTH: usize = mem::size_of::<Self>(); |
28 | |
29 | /// Returns a full group of empty tags, suitable for use as the initial |
30 | /// value for an empty hash table. |
31 | /// |
32 | /// This is guaranteed to be aligned to the group size. |
33 | #[inline ] |
34 | #[allow (clippy::items_after_statements)] |
35 | pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { |
36 | #[repr (C)] |
37 | struct AlignedTags { |
38 | _align: [Group; 0], |
39 | tags: [Tag; Group::WIDTH], |
40 | } |
41 | const ALIGNED_TAGS: AlignedTags = AlignedTags { |
42 | _align: [], |
43 | tags: [Tag::EMPTY; Group::WIDTH], |
44 | }; |
45 | &ALIGNED_TAGS.tags |
46 | } |
47 | |
48 | /// Loads a group of tags starting at the given address. |
49 | #[inline ] |
50 | #[allow (clippy::cast_ptr_alignment)] // unaligned load |
51 | pub(crate) unsafe fn load(ptr: *const Tag) -> Self { |
52 | Group(x86::_mm_loadu_si128(ptr.cast())) |
53 | } |
54 | |
55 | /// Loads a group of tags starting at the given address, which must be |
56 | /// aligned to `mem::align_of::<Group>()`. |
57 | #[inline ] |
58 | #[allow (clippy::cast_ptr_alignment)] |
59 | pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { |
60 | debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0); |
61 | Group(x86::_mm_load_si128(ptr.cast())) |
62 | } |
63 | |
64 | /// Stores the group of tags to the given address, which must be |
65 | /// aligned to `mem::align_of::<Group>()`. |
66 | #[inline ] |
67 | #[allow (clippy::cast_ptr_alignment)] |
68 | pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { |
69 | debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0); |
70 | x86::_mm_store_si128(ptr.cast(), self.0); |
71 | } |
72 | |
73 | /// Returns a `BitMask` indicating all tags in the group which have |
74 | /// the given value. |
75 | #[inline ] |
76 | pub(crate) fn match_tag(self, tag: Tag) -> BitMask { |
77 | #[allow ( |
78 | clippy::cast_possible_wrap, // tag.0: Tag as i8 |
79 | // tag: i32 as u16 |
80 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the |
81 | // upper 16-bits of the i32 are zeroed: |
82 | clippy::cast_sign_loss, |
83 | clippy::cast_possible_truncation |
84 | )] |
85 | unsafe { |
86 | let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8)); |
87 | BitMask(x86::_mm_movemask_epi8(cmp) as u16) |
88 | } |
89 | } |
90 | |
91 | /// Returns a `BitMask` indicating all tags in the group which are |
92 | /// `EMPTY`. |
93 | #[inline ] |
94 | pub(crate) fn match_empty(self) -> BitMask { |
95 | self.match_tag(Tag::EMPTY) |
96 | } |
97 | |
98 | /// Returns a `BitMask` indicating all tags in the group which are |
99 | /// `EMPTY` or `DELETED`. |
100 | #[inline ] |
101 | pub(crate) fn match_empty_or_deleted(self) -> BitMask { |
102 | #[allow ( |
103 | // tag: i32 as u16 |
104 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the |
105 | // upper 16-bits of the i32 are zeroed: |
106 | clippy::cast_sign_loss, |
107 | clippy::cast_possible_truncation |
108 | )] |
109 | unsafe { |
110 | // A tag is EMPTY or DELETED iff the high bit is set |
111 | BitMask(x86::_mm_movemask_epi8(self.0) as u16) |
112 | } |
113 | } |
114 | |
115 | /// Returns a `BitMask` indicating all tags in the group which are full. |
116 | #[inline ] |
117 | pub(crate) fn match_full(&self) -> BitMask { |
118 | self.match_empty_or_deleted().invert() |
119 | } |
120 | |
121 | /// Performs the following transformation on all tags in the group: |
122 | /// - `EMPTY => EMPTY` |
123 | /// - `DELETED => EMPTY` |
124 | /// - `FULL => DELETED` |
125 | #[inline ] |
126 | pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { |
127 | // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 |
128 | // and high_bit = 0 (FULL) to 1000_0000 |
129 | // |
130 | // Here's this logic expanded to concrete values: |
131 | // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) |
132 | // 1111_1111 | 1000_0000 = 1111_1111 |
133 | // 0000_0000 | 1000_0000 = 1000_0000 |
134 | #[allow ( |
135 | clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8 |
136 | )] |
137 | unsafe { |
138 | let zero = x86::_mm_setzero_si128(); |
139 | let special = x86::_mm_cmpgt_epi8(zero, self.0); |
140 | Group(x86::_mm_or_si128( |
141 | special, |
142 | x86::_mm_set1_epi8(Tag::DELETED.0 as i8), |
143 | )) |
144 | } |
145 | } |
146 | } |
147 | |