1 | /* |
2 | * Copyright © 2020 Benjamin Otte |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library. If not, see <http://www.gnu.org/licenses/>. |
16 | * |
17 | * Authors: Benjamin Otte <otte@gnome.org> |
18 | */ |
19 | |
20 | #ifndef __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H__ |
21 | #define __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H__ |
22 | |
23 | #include <glib.h> |
24 | |
25 | |
26 | G_BEGIN_DECLS |
27 | |
28 | /* |
29 | * GtkCountingBloomFilter: |
30 | * |
31 | * This implements a counting bloom filter. A bloom filter is a space-efficient |
32 | * probabilistic data structure that is used to test whether an element may be |
33 | * a member of a set. |
34 | * The Wikipedia links provide a lot more details into how and why this data |
35 | * structure works and when to use it. |
36 | * |
37 | * This implementation is based on similar implementations in web browsers, because it's |
38 | * original use case is the same: Making CSS lookups fast. |
39 | * |
40 | * As such, the number of bits is hardcoded to 12 and the elements in the set |
41 | * are 16bit hash values. |
42 | * It is possible to use 32bit hash values or a different number of bits, should this |
43 | * be considered useful. |
44 | * |
45 | * See: [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter), |
46 | * [Counting Bloom filter](https://en.wikipedia.org/wiki/Counting_Bloom_filter) |
47 | */ |
48 | |
49 | /* The number of bits from the hash we care about */ |
50 | #define GTK_COUNTING_BLOOM_FILTER_BITS (12) |
51 | |
52 | /* The necessary size of the filter */ |
53 | #define GTK_COUNTING_BLOOM_FILTER_SIZE (1 << GTK_COUNTING_BLOOM_FILTER_BITS) |
54 | |
55 | typedef struct _GtkCountingBloomFilter GtkCountingBloomFilter; |
56 | |
57 | struct _GtkCountingBloomFilter |
58 | { |
59 | guint8 buckets[GTK_COUNTING_BLOOM_FILTER_SIZE]; |
60 | }; |
61 | |
62 | static inline void gtk_counting_bloom_filter_add (GtkCountingBloomFilter *self, |
63 | guint16 hash); |
64 | static inline void gtk_counting_bloom_filter_remove (GtkCountingBloomFilter *self, |
65 | guint16 hash); |
66 | static inline gboolean gtk_counting_bloom_filter_may_contain (const GtkCountingBloomFilter *self, |
67 | guint16 hash); |
68 | |
69 | |
70 | /* |
71 | * GTK_COUNTING_BLOOM_FILTER_INIT: |
72 | * |
73 | * Initialize the bloom filter. As bloom filters are always stack-allocated, |
74 | * initialization should happen when defining them, like: |
75 | * ```c |
76 | * GtkCountingBloomFilter filter = GTK_COUNTING_BLOOM_FILTER_INIT; |
77 | * ``` |
78 | * |
79 | * The filter does not need to be freed. |
80 | */ |
81 | #define GTK_COUNTING_BLOOM_FILTER_INIT {{0}} |
82 | |
83 | /* |
84 | * gtk_counting_bloom_filter_add: |
85 | * @self: a `GtkCountingBloomFilter` |
86 | * @hash: a hash value to add to the filter |
87 | * |
88 | * Adds the hash value to the filter. |
89 | * |
90 | * If the same hash value gets added multiple times, it will |
91 | * be considered as contained in the hash until it has been |
92 | * removed as many times. |
93 | **/ |
94 | static inline void |
95 | gtk_counting_bloom_filter_add (GtkCountingBloomFilter *self, |
96 | guint16 hash) |
97 | { |
98 | guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE; |
99 | |
100 | if (self->buckets[bucket] == 255) |
101 | return; |
102 | |
103 | self->buckets[bucket]++; |
104 | } |
105 | |
106 | /* |
107 | * gtk_counting_bloom_filter_remove: |
108 | * @self: a `GtkCountingBloomFilter` |
109 | * @hash: a hash value to remove from the filter |
110 | * |
111 | * Removes a hash value from the filter that has previously |
112 | * been added via gtk_counting_bloom_filter_add(). |
113 | **/ |
114 | static inline void |
115 | gtk_counting_bloom_filter_remove (GtkCountingBloomFilter *self, |
116 | guint16 hash) |
117 | { |
118 | guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE; |
119 | |
120 | if (self->buckets[bucket] == 255) |
121 | return; |
122 | |
123 | g_assert (self->buckets[bucket] > 0); |
124 | |
125 | self->buckets[bucket]--; |
126 | } |
127 | |
128 | /* |
129 | * gtk_counting_bloom_filter_may_contain: |
130 | * @self: a `GtkCountingBloomFilter` |
131 | * @hash: the hash value to check |
132 | * |
133 | * Checks if @hash may be contained in @self. |
134 | * |
135 | * A return value of %FALSE means that @hash is definitely not part |
136 | * of @self. |
137 | * |
138 | * A return value of %TRUE means that @hash may or may not have been |
139 | * added to @self. In that case a different method must be used to |
140 | * confirm that @hash is indeed part of the set. |
141 | * |
142 | * Returns: %FALSE if @hash is not part of @self. |
143 | **/ |
144 | static inline gboolean |
145 | gtk_counting_bloom_filter_may_contain (const GtkCountingBloomFilter *self, |
146 | guint16 hash) |
147 | { |
148 | guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE; |
149 | |
150 | return self->buckets[bucket] != 0; |
151 | } |
152 | |
153 | |
154 | G_END_DECLS |
155 | |
156 | |
157 | #endif /* __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H_ */ |
158 | |