1 | use crate::attribute::OwnedAttribute; |
2 | use crate::name::OwnedName; |
3 | |
4 | use std::collections::hash_map::RandomState; |
5 | use std::collections::HashSet; |
6 | use std::hash::BuildHasher; |
7 | use std::hash::Hash; |
8 | use std::hash::Hasher; |
9 | |
10 | /// An ordered set |
11 | pub(crate) struct AttributesSet { |
12 | vec: Vec<OwnedAttribute>, |
13 | /// Uses a no-op hasher, because these u64s are hashes already |
14 | may_contain: HashSet<u64, U64HasherBuilder>, |
15 | /// This is real hasher for the `OwnedName` |
16 | hasher: RandomState, |
17 | } |
18 | |
19 | /// Use linear search and don't allocate `HashSet` if there are few attributes, |
20 | /// because allocation costs more than a few comparisons. |
21 | const HASH_THRESHOLD: usize = 8; |
22 | |
23 | impl AttributesSet { |
24 | pub fn new() -> Self { |
25 | Self { |
26 | vec: Vec::new(), |
27 | hasher: RandomState::new(), |
28 | may_contain: HashSet::default(), |
29 | } |
30 | } |
31 | |
32 | fn hash(&self, val: &OwnedName) -> u64 { |
33 | let mut h = self.hasher.build_hasher(); |
34 | val.hash(&mut h); |
35 | h.finish() |
36 | } |
37 | |
38 | pub fn len(&self) -> usize { |
39 | self.vec.len() |
40 | } |
41 | |
42 | pub fn contains(&self, name: &OwnedName) -> bool { |
43 | // fall back to linear search only on duplicate or hash collision |
44 | (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) && |
45 | self.vec.iter().any(move |a| &a.name == name) |
46 | } |
47 | |
48 | pub fn push(&mut self, attr: OwnedAttribute) { |
49 | if self.vec.len() >= HASH_THRESHOLD { |
50 | if self.vec.len() == HASH_THRESHOLD { |
51 | self.may_contain.reserve(HASH_THRESHOLD * 2); |
52 | for attr in &self.vec { |
53 | self.may_contain.insert(self.hash(&attr.name)); |
54 | } |
55 | } |
56 | self.may_contain.insert(self.hash(&attr.name)); |
57 | } |
58 | self.vec.push(attr); |
59 | } |
60 | |
61 | pub fn into_vec(self) -> Vec<OwnedAttribute> { |
62 | self.vec |
63 | } |
64 | } |
65 | |
66 | #[test ] |
67 | fn indexset() { |
68 | let mut s = AttributesSet::new(); |
69 | let not_here = OwnedName { |
70 | local_name: "attr1000" .into(), |
71 | namespace: Some("test" .into()), |
72 | prefix: None, |
73 | }; |
74 | |
75 | // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic |
76 | for i in 0..50000 { |
77 | let name = OwnedName { |
78 | local_name: format!("attr {i}" ), namespace: None, prefix: None, |
79 | }; |
80 | assert!(!s.contains(&name)); |
81 | |
82 | s.push(OwnedAttribute { name, value: String::new() }); |
83 | assert!(!s.contains(¬_here)); |
84 | } |
85 | |
86 | assert!(s.contains(&OwnedName { |
87 | local_name: "attr1234" .into(), namespace: None, prefix: None, |
88 | })); |
89 | assert!(s.contains(&OwnedName { |
90 | local_name: "attr0" .into(), namespace: None, prefix: None, |
91 | })); |
92 | assert!(s.contains(&OwnedName { |
93 | local_name: "attr49999" .into(), namespace: None, prefix: None, |
94 | })); |
95 | } |
96 | |
97 | /// Hashser that does nothing except passing u64 through |
98 | struct U64Hasher(u64); |
99 | |
100 | impl Hasher for U64Hasher { |
101 | fn finish(&self) -> u64 { self.0 } |
102 | fn write(&mut self, slice: &[u8]) { |
103 | for &v: u8 in slice { self.0 ^= u64::from(v) } // unused in practice |
104 | } |
105 | fn write_u64(&mut self, i: u64) { |
106 | self.0 ^= i; |
107 | } |
108 | } |
109 | |
110 | #[derive (Default)] |
111 | struct U64HasherBuilder; |
112 | |
113 | impl BuildHasher for U64HasherBuilder { |
114 | type Hasher = U64Hasher; |
115 | fn build_hasher(&self) -> U64Hasher { U64Hasher(0) } |
116 | } |
117 | |