1 | #ifndef NU_MPH_H |
---|---|
2 | #define NU_MPH_H |
3 | |
4 | /* Intentionally undocumented |
5 | * |
6 | * http://iswsa.acm.org/mphf/index.html |
7 | */ |
8 | |
9 | #include <stdint.h> |
10 | #include <sys/types.h> |
11 | |
12 | #include <libnu/config.h> |
13 | |
14 | #if defined (__cplusplus) || defined (c_plusplus) |
15 | extern "C"{ |
16 | #endif |
17 | |
18 | #ifdef NU_WITH_UDB |
19 | |
20 | /* those need to be the same values as used in MPH generation */ |
21 | #define PRIME 0x01000193 |
22 | |
23 | /** Calculate G offset from codepoint |
24 | */ |
25 | static inline |
26 | uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) { |
27 | if (hash == 0) { |
28 | hash = PRIME; |
29 | } |
30 | |
31 | return hash ^ codepoint; |
32 | } |
33 | |
34 | /** Get hash value of Unicode codepoint |
35 | */ |
36 | static inline |
37 | uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE, |
38 | uint32_t codepoint) { |
39 | |
40 | uint32_t h = _nu_hash(hash: 0, codepoint); |
41 | int16_t offset = G[h % G_SIZE]; |
42 | if (offset < 0) { |
43 | return (uint32_t)(-offset - 1); |
44 | } |
45 | return (_nu_hash(hash: offset, codepoint) % G_SIZE); |
46 | } |
47 | |
48 | /** Lookup value in MPH |
49 | */ |
50 | static inline |
51 | uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I, |
52 | uint32_t codepoint, uint32_t hash) { |
53 | |
54 | const uint32_t *c = (V_C + hash); |
55 | const uint16_t *i = (V_I + hash); |
56 | |
57 | /* due to nature of minimal perfect hash, it will always |
58 | * produce collision for codepoints outside of MPH original set. |
59 | * thus VALUES_C contain original codepoint to check if |
60 | * collision occurred */ |
61 | |
62 | return (*c != codepoint ? 0 : *i); |
63 | } |
64 | |
65 | #endif /* NU_WITH_UDB */ |
66 | |
67 | #if defined (__cplusplus) || defined (c_plusplus) |
68 | } |
69 | #endif |
70 | |
71 | #endif /* NU_MPH_H */ |
72 |