| 1 | //! See [the `phf` crate's documentation][phf] for details. |
| 2 | //! |
| 3 | //! [phf]: https://docs.rs/phf |
| 4 | |
| 5 | #![doc (html_root_url = "https://docs.rs/phf_generator/0.11" )] |
| 6 | use phf_shared::{HashKey, Hashes, PhfHash}; |
| 7 | use rand::distributions::Standard; |
| 8 | use rand::rngs::SmallRng; |
| 9 | use rand::{Rng, SeedableRng}; |
| 10 | |
| 11 | const DEFAULT_LAMBDA: usize = 5; |
| 12 | |
| 13 | const FIXED_SEED: u64 = 1234567890; |
| 14 | |
| 15 | pub struct HashState { |
| 16 | pub key: HashKey, |
| 17 | pub disps: Vec<(u32, u32)>, |
| 18 | pub map: Vec<usize>, |
| 19 | } |
| 20 | |
| 21 | pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState { |
| 22 | generate_hash_with_hash_fn(entries, hash_fn:phf_shared::hash) |
| 23 | } |
| 24 | |
| 25 | pub fn generate_hash_with_hash_fn<T, F>(entries: &[T], hash_fn: F) -> HashState |
| 26 | where |
| 27 | F: Fn(&T, &HashKey) -> Hashes, |
| 28 | { |
| 29 | let mut generator: Generator = Generator::new(table_len:entries.len()); |
| 30 | |
| 31 | SmallRng::seed_from_u64(FIXED_SEED) |
| 32 | .sample_iter(Standard) |
| 33 | .find(|key| { |
| 34 | let hashes = entries.iter().map(|entry| hash_fn(entry, key)); |
| 35 | generator.reset(hashes); |
| 36 | |
| 37 | generator.try_generate_hash() |
| 38 | }) |
| 39 | .map(|key| HashState { |
| 40 | key, |
| 41 | disps: generator.disps, |
| 42 | map: generator.map.into_iter().map(|i| i.unwrap()).collect(), |
| 43 | }) |
| 44 | .expect(msg:"failed to solve PHF" ) |
| 45 | } |
| 46 | |
| 47 | struct Bucket { |
| 48 | idx: usize, |
| 49 | keys: Vec<usize>, |
| 50 | } |
| 51 | |
| 52 | struct Generator { |
| 53 | hashes: Vec<Hashes>, |
| 54 | buckets: Vec<Bucket>, |
| 55 | disps: Vec<(u32, u32)>, |
| 56 | map: Vec<Option<usize>>, |
| 57 | try_map: Vec<u64>, |
| 58 | } |
| 59 | |
| 60 | impl Generator { |
| 61 | fn new(table_len: usize) -> Self { |
| 62 | let hashes = Vec::with_capacity(table_len); |
| 63 | |
| 64 | let buckets_len = (table_len + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; |
| 65 | let buckets: Vec<_> = (0..buckets_len) |
| 66 | .map(|i| Bucket { |
| 67 | idx: i, |
| 68 | keys: vec![], |
| 69 | }) |
| 70 | .collect(); |
| 71 | let disps = vec![(0u32, 0u32); buckets_len]; |
| 72 | |
| 73 | let map = vec![None; table_len]; |
| 74 | let try_map = vec![0u64; table_len]; |
| 75 | |
| 76 | Self { |
| 77 | hashes, |
| 78 | buckets, |
| 79 | disps, |
| 80 | map, |
| 81 | try_map, |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | fn reset<I>(&mut self, hashes: I) |
| 86 | where |
| 87 | I: Iterator<Item = Hashes>, |
| 88 | { |
| 89 | self.buckets.iter_mut().for_each(|b| b.keys.clear()); |
| 90 | self.buckets.sort_by_key(|b| b.idx); |
| 91 | self.disps.iter_mut().for_each(|d| *d = (0, 0)); |
| 92 | self.map.iter_mut().for_each(|m| *m = None); |
| 93 | self.try_map.iter_mut().for_each(|m| *m = 0); |
| 94 | |
| 95 | self.hashes.clear(); |
| 96 | self.hashes.extend(hashes); |
| 97 | } |
| 98 | |
| 99 | fn try_generate_hash(&mut self) -> bool { |
| 100 | let buckets_len = self.buckets.len() as u32; |
| 101 | for (i, hash) in self.hashes.iter().enumerate() { |
| 102 | self.buckets[(hash.g % buckets_len) as usize].keys.push(i); |
| 103 | } |
| 104 | |
| 105 | // Sort descending |
| 106 | self.buckets |
| 107 | .sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); |
| 108 | |
| 109 | let table_len = self.hashes.len(); |
| 110 | |
| 111 | // store whether an element from the bucket being placed is |
| 112 | // located at a certain position, to allow for efficient overlap |
| 113 | // checks. It works by storing the generation in each cell and |
| 114 | // each new placement-attempt is a new generation, so you can tell |
| 115 | // if this is legitimately full by checking that the generations |
| 116 | // are equal. (A u64 is far too large to overflow in a reasonable |
| 117 | // time for current hardware.) |
| 118 | let mut generation = 0u64; |
| 119 | |
| 120 | // the actual values corresponding to the markers above, as |
| 121 | // (index, key) pairs, for adding to the main map once we've |
| 122 | // chosen the right disps. |
| 123 | let mut values_to_add = vec![]; |
| 124 | |
| 125 | 'buckets: for bucket in &self.buckets { |
| 126 | for d1 in 0..(table_len as u32) { |
| 127 | 'disps: for d2 in 0..(table_len as u32) { |
| 128 | values_to_add.clear(); |
| 129 | generation += 1; |
| 130 | |
| 131 | for &key in &bucket.keys { |
| 132 | let idx = |
| 133 | (phf_shared::displace(self.hashes[key].f1, self.hashes[key].f2, d1, d2) |
| 134 | % (table_len as u32)) as usize; |
| 135 | if self.map[idx].is_some() || self.try_map[idx] == generation { |
| 136 | continue 'disps; |
| 137 | } |
| 138 | self.try_map[idx] = generation; |
| 139 | values_to_add.push((idx, key)); |
| 140 | } |
| 141 | |
| 142 | // We've picked a good set of disps |
| 143 | self.disps[bucket.idx] = (d1, d2); |
| 144 | for &(idx, key) in &values_to_add { |
| 145 | self.map[idx] = Some(key); |
| 146 | } |
| 147 | continue 'buckets; |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | // Unable to find displacements for a bucket |
| 152 | return false; |
| 153 | } |
| 154 | true |
| 155 | } |
| 156 | } |
| 157 | |