1 | use crate::attribute::OwnedAttribute; |
2 | use crate::name::OwnedName; |
3 | |
4 | use std::collections::hash_map::RandomState; |
5 | use std::collections::HashSet; |
6 | use std::hash::{BuildHasher, Hash, Hasher}; |
7 | |
8 | /// An ordered set |
9 | pub struct AttributesSet { |
10 | vec: Vec<OwnedAttribute>, |
11 | /// Uses a no-op hasher, because these u64s are hashes already |
12 | may_contain: HashSet<u64, U64HasherBuilder>, |
13 | /// This is real hasher for the `OwnedName` |
14 | hasher: RandomState, |
15 | } |
16 | |
17 | /// Use linear search and don't allocate `HashSet` if there are few attributes, |
18 | /// because allocation costs more than a few comparisons. |
19 | const HASH_THRESHOLD: usize = 8; |
20 | |
21 | impl AttributesSet { |
22 | pub fn new() -> Self { |
23 | Self { |
24 | vec: Vec::new(), |
25 | hasher: RandomState::new(), |
26 | may_contain: HashSet::default(), |
27 | } |
28 | } |
29 | |
30 | fn hash(&self, val: &OwnedName) -> u64 { |
31 | let mut h = self.hasher.build_hasher(); |
32 | val.hash(&mut h); |
33 | h.finish() |
34 | } |
35 | |
36 | pub fn len(&self) -> usize { |
37 | self.vec.len() |
38 | } |
39 | |
40 | pub fn contains(&self, name: &OwnedName) -> bool { |
41 | // fall back to linear search only on duplicate or hash collision |
42 | (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) && |
43 | self.vec.iter().any(move |a| &a.name == name) |
44 | } |
45 | |
46 | pub fn push(&mut self, attr: OwnedAttribute) { |
47 | if self.vec.len() >= HASH_THRESHOLD { |
48 | if self.vec.len() == HASH_THRESHOLD { |
49 | self.may_contain.reserve(HASH_THRESHOLD * 2); |
50 | for attr in &self.vec { |
51 | self.may_contain.insert(self.hash(&attr.name)); |
52 | } |
53 | } |
54 | self.may_contain.insert(self.hash(&attr.name)); |
55 | } |
56 | self.vec.push(attr); |
57 | } |
58 | |
59 | pub fn into_vec(self) -> Vec<OwnedAttribute> { |
60 | self.vec |
61 | } |
62 | } |
63 | |
64 | #[test ] |
65 | fn indexset() { |
66 | let mut s = AttributesSet::new(); |
67 | let not_here = OwnedName { |
68 | local_name: "attr1000" .into(), |
69 | namespace: Some("test" .into()), |
70 | prefix: None, |
71 | }; |
72 | |
73 | // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic |
74 | for i in 0..50000 { |
75 | let name = OwnedName { |
76 | local_name: format!("attr{i}" ), namespace: None, prefix: None, |
77 | }; |
78 | assert!(!s.contains(&name)); |
79 | |
80 | s.push(OwnedAttribute { name, value: String::new() }); |
81 | assert!(!s.contains(¬_here)); |
82 | } |
83 | |
84 | assert!(s.contains(&OwnedName { |
85 | local_name: "attr1234" .into(), namespace: None, prefix: None, |
86 | })); |
87 | assert!(s.contains(&OwnedName { |
88 | local_name: "attr0" .into(), namespace: None, prefix: None, |
89 | })); |
90 | assert!(s.contains(&OwnedName { |
91 | local_name: "attr49999" .into(), namespace: None, prefix: None, |
92 | })); |
93 | } |
94 | |
95 | /// Hashser that does nothing except passing u64 through |
96 | struct U64Hasher(u64); |
97 | |
98 | impl Hasher for U64Hasher { |
99 | fn finish(&self) -> u64 { self.0 } |
100 | fn write(&mut self, slice: &[u8]) { |
101 | for &v: u8 in slice { self.0 ^= u64::from(v) } // unused in practice |
102 | } |
103 | fn write_u64(&mut self, i: u64) { |
104 | self.0 ^= i; |
105 | } |
106 | } |
107 | |
108 | #[derive (Default)] |
109 | struct U64HasherBuilder; |
110 | |
111 | impl BuildHasher for U64HasherBuilder { |
112 | type Hasher = U64Hasher; |
113 | fn build_hasher(&self) -> U64Hasher { U64Hasher(0) } |
114 | } |
115 | |