1 | //! See [the `phf` crate's documentation][phf] for details. |
2 | //! |
3 | //! [phf]: https://docs.rs/phf |
4 | |
5 | #![doc (html_root_url = "https://docs.rs/phf_generator/0.11" )] |
6 | use phf_shared::{HashKey, Hashes, PhfHash}; |
7 | use rand::distributions::Standard; |
8 | use rand::rngs::SmallRng; |
9 | use rand::{Rng, SeedableRng}; |
10 | |
11 | const DEFAULT_LAMBDA: usize = 5; |
12 | |
13 | const FIXED_SEED: u64 = 1234567890; |
14 | |
15 | pub struct HashState { |
16 | pub key: HashKey, |
17 | pub disps: Vec<(u32, u32)>, |
18 | pub map: Vec<usize>, |
19 | } |
20 | |
21 | pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState { |
22 | generate_hash_with_hash_fn(entries, hash_fn:phf_shared::hash) |
23 | } |
24 | |
25 | pub fn generate_hash_with_hash_fn<T, F>(entries: &[T], hash_fn: F) -> HashState |
26 | where |
27 | F: Fn(&T, &HashKey) -> Hashes, |
28 | { |
29 | let mut generator: Generator = Generator::new(table_len:entries.len()); |
30 | |
31 | SmallRng::seed_from_u64(FIXED_SEED) |
32 | .sample_iter(Standard) |
33 | .find(|key| { |
34 | let hashes = entries.iter().map(|entry| hash_fn(entry, key)); |
35 | generator.reset(hashes); |
36 | |
37 | generator.try_generate_hash() |
38 | }) |
39 | .map(|key| HashState { |
40 | key, |
41 | disps: generator.disps, |
42 | map: generator.map.into_iter().map(|i| i.unwrap()).collect(), |
43 | }) |
44 | .expect(msg:"failed to solve PHF" ) |
45 | } |
46 | |
47 | struct Bucket { |
48 | idx: usize, |
49 | keys: Vec<usize>, |
50 | } |
51 | |
52 | struct Generator { |
53 | hashes: Vec<Hashes>, |
54 | buckets: Vec<Bucket>, |
55 | disps: Vec<(u32, u32)>, |
56 | map: Vec<Option<usize>>, |
57 | try_map: Vec<u64>, |
58 | } |
59 | |
60 | impl Generator { |
61 | fn new(table_len: usize) -> Self { |
62 | let hashes = Vec::with_capacity(table_len); |
63 | |
64 | let buckets_len = (table_len + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; |
65 | let buckets: Vec<_> = (0..buckets_len) |
66 | .map(|i| Bucket { |
67 | idx: i, |
68 | keys: vec![], |
69 | }) |
70 | .collect(); |
71 | let disps = vec![(0u32, 0u32); buckets_len]; |
72 | |
73 | let map = vec![None; table_len]; |
74 | let try_map = vec![0u64; table_len]; |
75 | |
76 | Self { |
77 | hashes, |
78 | buckets, |
79 | disps, |
80 | map, |
81 | try_map, |
82 | } |
83 | } |
84 | |
85 | fn reset<I>(&mut self, hashes: I) |
86 | where |
87 | I: Iterator<Item = Hashes>, |
88 | { |
89 | self.buckets.iter_mut().for_each(|b| b.keys.clear()); |
90 | self.buckets.sort_by_key(|b| b.idx); |
91 | self.disps.iter_mut().for_each(|d| *d = (0, 0)); |
92 | self.map.iter_mut().for_each(|m| *m = None); |
93 | self.try_map.iter_mut().for_each(|m| *m = 0); |
94 | |
95 | self.hashes.clear(); |
96 | self.hashes.extend(hashes); |
97 | } |
98 | |
99 | fn try_generate_hash(&mut self) -> bool { |
100 | let buckets_len = self.buckets.len() as u32; |
101 | for (i, hash) in self.hashes.iter().enumerate() { |
102 | self.buckets[(hash.g % buckets_len) as usize].keys.push(i); |
103 | } |
104 | |
105 | // Sort descending |
106 | self.buckets |
107 | .sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); |
108 | |
109 | let table_len = self.hashes.len(); |
110 | |
111 | // store whether an element from the bucket being placed is |
112 | // located at a certain position, to allow for efficient overlap |
113 | // checks. It works by storing the generation in each cell and |
114 | // each new placement-attempt is a new generation, so you can tell |
115 | // if this is legitimately full by checking that the generations |
116 | // are equal. (A u64 is far too large to overflow in a reasonable |
117 | // time for current hardware.) |
118 | let mut generation = 0u64; |
119 | |
120 | // the actual values corresponding to the markers above, as |
121 | // (index, key) pairs, for adding to the main map once we've |
122 | // chosen the right disps. |
123 | let mut values_to_add = vec![]; |
124 | |
125 | 'buckets: for bucket in &self.buckets { |
126 | for d1 in 0..(table_len as u32) { |
127 | 'disps: for d2 in 0..(table_len as u32) { |
128 | values_to_add.clear(); |
129 | generation += 1; |
130 | |
131 | for &key in &bucket.keys { |
132 | let idx = |
133 | (phf_shared::displace(self.hashes[key].f1, self.hashes[key].f2, d1, d2) |
134 | % (table_len as u32)) as usize; |
135 | if self.map[idx].is_some() || self.try_map[idx] == generation { |
136 | continue 'disps; |
137 | } |
138 | self.try_map[idx] = generation; |
139 | values_to_add.push((idx, key)); |
140 | } |
141 | |
142 | // We've picked a good set of disps |
143 | self.disps[bucket.idx] = (d1, d2); |
144 | for &(idx, key) in &values_to_add { |
145 | self.map[idx] = Some(key); |
146 | } |
147 | continue 'buckets; |
148 | } |
149 | } |
150 | |
151 | // Unable to find displacements for a bucket |
152 | return false; |
153 | } |
154 | true |
155 | } |
156 | } |
157 | |