1/*
2 * Copyright © 2020 Benjamin Otte
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors: Benjamin Otte <otte@gnome.org>
18 */
19
20#ifndef __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H__
21#define __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H__
22
23#include <glib.h>
24
25
26G_BEGIN_DECLS
27
28/*
29 * GtkCountingBloomFilter:
30 *
31 * This implements a counting bloom filter. A bloom filter is a space-efficient
32 * probabilistic data structure that is used to test whether an element may be
33 * a member of a set.
34 * The Wikipedia links provide a lot more details into how and why this data
35 * structure works and when to use it.
36 *
37 * This implementation is based on similar implementations in web browsers, because it's
38 * original use case is the same: Making CSS lookups fast.
39 *
40 * As such, the number of bits is hardcoded to 12 and the elements in the set
41 * are 16bit hash values.
42 * It is possible to use 32bit hash values or a different number of bits, should this
43 * be considered useful.
44 *
45 * See: [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter),
46 * [Counting Bloom filter](https://en.wikipedia.org/wiki/Counting_Bloom_filter)
47 */
48
49/* The number of bits from the hash we care about */
50#define GTK_COUNTING_BLOOM_FILTER_BITS (12)
51
52/* The necessary size of the filter */
53#define GTK_COUNTING_BLOOM_FILTER_SIZE (1 << GTK_COUNTING_BLOOM_FILTER_BITS)
54
55typedef struct _GtkCountingBloomFilter GtkCountingBloomFilter;
56
57struct _GtkCountingBloomFilter
58{
59 guint8 buckets[GTK_COUNTING_BLOOM_FILTER_SIZE];
60};
61
62static inline void gtk_counting_bloom_filter_add (GtkCountingBloomFilter *self,
63 guint16 hash);
64static inline void gtk_counting_bloom_filter_remove (GtkCountingBloomFilter *self,
65 guint16 hash);
66static inline gboolean gtk_counting_bloom_filter_may_contain (const GtkCountingBloomFilter *self,
67 guint16 hash);
68
69
70/*
71 * GTK_COUNTING_BLOOM_FILTER_INIT:
72 *
73 * Initialize the bloom filter. As bloom filters are always stack-allocated,
74 * initialization should happen when defining them, like:
75 * ```c
76 * GtkCountingBloomFilter filter = GTK_COUNTING_BLOOM_FILTER_INIT;
77 * ```
78 *
79 * The filter does not need to be freed.
80 */
81#define GTK_COUNTING_BLOOM_FILTER_INIT {{0}}
82
83/*
84 * gtk_counting_bloom_filter_add:
85 * @self: a `GtkCountingBloomFilter`
86 * @hash: a hash value to add to the filter
87 *
88 * Adds the hash value to the filter.
89 *
90 * If the same hash value gets added multiple times, it will
91 * be considered as contained in the hash until it has been
92 * removed as many times.
93 **/
94static inline void
95gtk_counting_bloom_filter_add (GtkCountingBloomFilter *self,
96 guint16 hash)
97{
98 guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;
99
100 if (self->buckets[bucket] == 255)
101 return;
102
103 self->buckets[bucket]++;
104}
105
106/*
107 * gtk_counting_bloom_filter_remove:
108 * @self: a `GtkCountingBloomFilter`
109 * @hash: a hash value to remove from the filter
110 *
111 * Removes a hash value from the filter that has previously
112 * been added via gtk_counting_bloom_filter_add().
113 **/
114static inline void
115gtk_counting_bloom_filter_remove (GtkCountingBloomFilter *self,
116 guint16 hash)
117{
118 guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;
119
120 if (self->buckets[bucket] == 255)
121 return;
122
123 g_assert (self->buckets[bucket] > 0);
124
125 self->buckets[bucket]--;
126}
127
128/*
129 * gtk_counting_bloom_filter_may_contain:
130 * @self: a `GtkCountingBloomFilter`
131 * @hash: the hash value to check
132 *
133 * Checks if @hash may be contained in @self.
134 *
135 * A return value of %FALSE means that @hash is definitely not part
136 * of @self.
137 *
138 * A return value of %TRUE means that @hash may or may not have been
139 * added to @self. In that case a different method must be used to
140 * confirm that @hash is indeed part of the set.
141 *
142 * Returns: %FALSE if @hash is not part of @self.
143 **/
144static inline gboolean
145gtk_counting_bloom_filter_may_contain (const GtkCountingBloomFilter *self,
146 guint16 hash)
147{
148 guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;
149
150 return self->buckets[bucket] != 0;
151}
152
153
154G_END_DECLS
155
156
157#endif /* __GTK_COUNTING_BLOOM_FILTER_PRIVATE_H_ */
158

source code of gtk/gtk/gtkcountingbloomfilterprivate.h