1 | #![doc (html_root_url = "https://docs.rs/phf_generator/0.9" )] |
2 | use phf_shared::{HashKey, PhfHash}; |
3 | use rand::distributions::Standard; |
4 | use rand::rngs::SmallRng; |
5 | use rand::{Rng, SeedableRng}; |
6 | |
7 | const DEFAULT_LAMBDA: usize = 5; |
8 | |
9 | const FIXED_SEED: u64 = 1234567890; |
10 | |
11 | pub struct HashState { |
12 | pub key: HashKey, |
13 | pub disps: Vec<(u32, u32)>, |
14 | pub map: Vec<usize>, |
15 | } |
16 | |
17 | pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState { |
18 | SmallRng::seed_from_u64(FIXED_SEED) |
19 | .sample_iter(Standard) |
20 | .find_map(|key| try_generate_hash(entries, key)) |
21 | .expect(msg:"failed to solve PHF" ) |
22 | } |
23 | |
24 | fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> { |
25 | struct Bucket { |
26 | idx: usize, |
27 | keys: Vec<usize>, |
28 | } |
29 | |
30 | let hashes: Vec<_> = entries |
31 | .iter() |
32 | .map(|entry| phf_shared::hash(entry, &key)) |
33 | .collect(); |
34 | |
35 | let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; |
36 | let mut buckets = (0..buckets_len) |
37 | .map(|i| Bucket { |
38 | idx: i, |
39 | keys: vec![], |
40 | }) |
41 | .collect::<Vec<_>>(); |
42 | |
43 | for (i, hash) in hashes.iter().enumerate() { |
44 | buckets[(hash.g % (buckets_len as u32)) as usize] |
45 | .keys |
46 | .push(i); |
47 | } |
48 | |
49 | // Sort descending |
50 | buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); |
51 | |
52 | let table_len = hashes.len(); |
53 | let mut map = vec![None; table_len]; |
54 | let mut disps = vec![(0u32, 0u32); buckets_len]; |
55 | |
56 | // store whether an element from the bucket being placed is |
57 | // located at a certain position, to allow for efficient overlap |
58 | // checks. It works by storing the generation in each cell and |
59 | // each new placement-attempt is a new generation, so you can tell |
60 | // if this is legitimately full by checking that the generations |
61 | // are equal. (A u64 is far too large to overflow in a reasonable |
62 | // time for current hardware.) |
63 | let mut try_map = vec![0u64; table_len]; |
64 | let mut generation = 0u64; |
65 | |
66 | // the actual values corresponding to the markers above, as |
67 | // (index, key) pairs, for adding to the main map once we've |
68 | // chosen the right disps. |
69 | let mut values_to_add = vec![]; |
70 | |
71 | 'buckets: for bucket in &buckets { |
72 | for d1 in 0..(table_len as u32) { |
73 | 'disps: for d2 in 0..(table_len as u32) { |
74 | values_to_add.clear(); |
75 | generation += 1; |
76 | |
77 | for &key in &bucket.keys { |
78 | let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) |
79 | % (table_len as u32)) as usize; |
80 | if map[idx].is_some() || try_map[idx] == generation { |
81 | continue 'disps; |
82 | } |
83 | try_map[idx] = generation; |
84 | values_to_add.push((idx, key)); |
85 | } |
86 | |
87 | // We've picked a good set of disps |
88 | disps[bucket.idx] = (d1, d2); |
89 | for &(idx, key) in &values_to_add { |
90 | map[idx] = Some(key); |
91 | } |
92 | continue 'buckets; |
93 | } |
94 | } |
95 | |
96 | // Unable to find displacements for a bucket |
97 | return None; |
98 | } |
99 | |
100 | Some(HashState { |
101 | key, |
102 | disps, |
103 | map: map.into_iter().map(|i| i.unwrap()).collect(), |
104 | }) |
105 | } |
106 | |