| 1 | use crate::attribute::OwnedAttribute; |
| 2 | use crate::name::OwnedName; |
| 3 | |
| 4 | use std::collections::hash_map::RandomState; |
| 5 | use std::collections::HashSet; |
| 6 | use std::hash::{BuildHasher, Hash, Hasher}; |
| 7 | |
| 8 | /// An ordered set |
| 9 | pub struct AttributesSet { |
| 10 | vec: Vec<OwnedAttribute>, |
| 11 | /// Uses a no-op hasher, because these u64s are hashes already |
| 12 | may_contain: HashSet<u64, U64HasherBuilder>, |
| 13 | /// This is real hasher for the `OwnedName` |
| 14 | hasher: RandomState, |
| 15 | } |
| 16 | |
| 17 | /// Use linear search and don't allocate `HashSet` if there are few attributes, |
| 18 | /// because allocation costs more than a few comparisons. |
| 19 | const HASH_THRESHOLD: usize = 8; |
| 20 | |
| 21 | impl AttributesSet { |
| 22 | pub fn new() -> Self { |
| 23 | Self { |
| 24 | vec: Vec::new(), |
| 25 | hasher: RandomState::new(), |
| 26 | may_contain: HashSet::default(), |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | fn hash(&self, val: &OwnedName) -> u64 { |
| 31 | let mut h = self.hasher.build_hasher(); |
| 32 | val.hash(&mut h); |
| 33 | h.finish() |
| 34 | } |
| 35 | |
| 36 | pub fn len(&self) -> usize { |
| 37 | self.vec.len() |
| 38 | } |
| 39 | |
| 40 | pub fn contains(&self, name: &OwnedName) -> bool { |
| 41 | // fall back to linear search only on duplicate or hash collision |
| 42 | (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) && |
| 43 | self.vec.iter().any(move |a| &a.name == name) |
| 44 | } |
| 45 | |
| 46 | pub fn push(&mut self, attr: OwnedAttribute) { |
| 47 | if self.vec.len() >= HASH_THRESHOLD { |
| 48 | if self.vec.len() == HASH_THRESHOLD { |
| 49 | self.may_contain.reserve(HASH_THRESHOLD * 2); |
| 50 | for attr in &self.vec { |
| 51 | self.may_contain.insert(self.hash(&attr.name)); |
| 52 | } |
| 53 | } |
| 54 | self.may_contain.insert(self.hash(&attr.name)); |
| 55 | } |
| 56 | self.vec.push(attr); |
| 57 | } |
| 58 | |
| 59 | pub fn into_vec(self) -> Vec<OwnedAttribute> { |
| 60 | self.vec |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | #[test ] |
| 65 | fn indexset() { |
| 66 | let mut s = AttributesSet::new(); |
| 67 | let not_here = OwnedName { |
| 68 | local_name: "attr1000" .into(), |
| 69 | namespace: Some("test" .into()), |
| 70 | prefix: None, |
| 71 | }; |
| 72 | |
| 73 | // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic |
| 74 | for i in 0..50000 { |
| 75 | let name = OwnedName { |
| 76 | local_name: format!("attr{i}" ), namespace: None, prefix: None, |
| 77 | }; |
| 78 | assert!(!s.contains(&name)); |
| 79 | |
| 80 | s.push(OwnedAttribute { name, value: String::new() }); |
| 81 | assert!(!s.contains(¬_here)); |
| 82 | } |
| 83 | |
| 84 | assert!(s.contains(&OwnedName { |
| 85 | local_name: "attr1234" .into(), namespace: None, prefix: None, |
| 86 | })); |
| 87 | assert!(s.contains(&OwnedName { |
| 88 | local_name: "attr0" .into(), namespace: None, prefix: None, |
| 89 | })); |
| 90 | assert!(s.contains(&OwnedName { |
| 91 | local_name: "attr49999" .into(), namespace: None, prefix: None, |
| 92 | })); |
| 93 | } |
| 94 | |
| 95 | /// Hashser that does nothing except passing u64 through |
| 96 | struct U64Hasher(u64); |
| 97 | |
| 98 | impl Hasher for U64Hasher { |
| 99 | fn finish(&self) -> u64 { self.0 } |
| 100 | fn write(&mut self, slice: &[u8]) { |
| 101 | for &v: u8 in slice { self.0 ^= u64::from(v) } // unused in practice |
| 102 | } |
| 103 | fn write_u64(&mut self, i: u64) { |
| 104 | self.0 ^= i; |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | #[derive (Default)] |
| 109 | struct U64HasherBuilder; |
| 110 | |
| 111 | impl BuildHasher for U64HasherBuilder { |
| 112 | type Hasher = U64Hasher; |
| 113 | fn build_hasher(&self) -> U64Hasher { U64Hasher(0) } |
| 114 | } |
| 115 | |