| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org> |
| 4 | */ |
| 5 | |
| 6 | #include <linux/kernel.h> |
| 7 | #include <linux/init.h> |
| 8 | #include <linux/module.h> |
| 9 | #include <linux/list.h> |
| 10 | #include <linux/netlink.h> |
| 11 | #include <linux/netfilter.h> |
| 12 | #include <linux/netfilter/nf_tables.h> |
| 13 | #include <net/netfilter/nf_tables_core.h> |
| 14 | |
| 15 | struct nft_bitmap_elem { |
| 16 | struct nft_elem_priv priv; |
| 17 | struct list_head head; |
| 18 | struct nft_set_ext ext; |
| 19 | }; |
| 20 | |
| 21 | /* This bitmap uses two bits to represent one element. These two bits determine |
| 22 | * the element state in the current and the future generation. |
| 23 | * |
| 24 | * An element can be in three states. The generation cursor is represented using |
| 25 | * the ^ character, note that this cursor shifts on every successful transaction. |
| 26 | * If no transaction is going on, we observe all elements are in the following |
| 27 | * state: |
| 28 | * |
| 29 | * 11 = this element is active in the current generation. In case of no updates, |
| 30 | * ^ it stays active in the next generation. |
| 31 | * 00 = this element is inactive in the current generation. In case of no |
| 32 | * ^ updates, it stays inactive in the next generation. |
| 33 | * |
| 34 | * On transaction handling, we observe these two temporary states: |
| 35 | * |
| 36 | * 01 = this element is inactive in the current generation and it becomes active |
| 37 | * ^ in the next one. This happens when the element is inserted but commit |
| 38 | * path has not yet been executed yet, so activation is still pending. On |
| 39 | * transaction abortion, the element is removed. |
| 40 | * 10 = this element is active in the current generation and it becomes inactive |
| 41 | * ^ in the next one. This happens when the element is deactivated but commit |
| 42 | * path has not yet been executed yet, so removal is still pending. On |
| 43 | * transaction abortion, the next generation bit is reset to go back to |
| 44 | * restore its previous state. |
| 45 | */ |
| 46 | struct nft_bitmap { |
| 47 | struct list_head list; |
| 48 | u16 bitmap_size; |
| 49 | u8 bitmap[]; |
| 50 | }; |
| 51 | |
| 52 | static inline void nft_bitmap_location(const struct nft_set *set, |
| 53 | const void *key, |
| 54 | u32 *idx, u32 *off) |
| 55 | { |
| 56 | u32 k; |
| 57 | |
| 58 | if (set->klen == 2) |
| 59 | k = *(u16 *)key; |
| 60 | else |
| 61 | k = *(u8 *)key; |
| 62 | k <<= 1; |
| 63 | |
| 64 | *idx = k / BITS_PER_BYTE; |
| 65 | *off = k % BITS_PER_BYTE; |
| 66 | } |
| 67 | |
| 68 | /* Fetch the two bits that represent the element and check if it is active based |
| 69 | * on the generation mask. |
| 70 | */ |
| 71 | static inline bool |
| 72 | nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask) |
| 73 | { |
| 74 | return (bitmap[idx] & (0x3 << off)) & (genmask << off); |
| 75 | } |
| 76 | |
| 77 | INDIRECT_CALLABLE_SCOPE |
| 78 | const struct nft_set_ext * |
| 79 | nft_bitmap_lookup(const struct net *net, const struct nft_set *set, |
| 80 | const u32 *key) |
| 81 | { |
| 82 | const struct nft_bitmap *priv = nft_set_priv(set); |
| 83 | static const struct nft_set_ext found; |
| 84 | u8 genmask = nft_genmask_cur(net); |
| 85 | u32 idx, off; |
| 86 | |
| 87 | nft_bitmap_location(set, key, idx: &idx, off: &off); |
| 88 | |
| 89 | if (nft_bitmap_active(bitmap: priv->bitmap, idx, off, genmask)) |
| 90 | return &found; |
| 91 | |
| 92 | return NULL; |
| 93 | } |
| 94 | |
| 95 | static struct nft_bitmap_elem * |
| 96 | nft_bitmap_elem_find(const struct net *net, |
| 97 | const struct nft_set *set, struct nft_bitmap_elem *this, |
| 98 | u8 genmask) |
| 99 | { |
| 100 | const struct nft_bitmap *priv = nft_set_priv(set); |
| 101 | struct nft_bitmap_elem *be; |
| 102 | |
| 103 | list_for_each_entry_rcu(be, &priv->list, head, |
| 104 | lockdep_is_held(&nft_pernet(net)->commit_mutex)) { |
| 105 | if (memcmp(p: nft_set_ext_key(ext: &be->ext), |
| 106 | q: nft_set_ext_key(ext: &this->ext), size: set->klen) || |
| 107 | !nft_set_elem_active(ext: &be->ext, genmask)) |
| 108 | continue; |
| 109 | |
| 110 | return be; |
| 111 | } |
| 112 | return NULL; |
| 113 | } |
| 114 | |
| 115 | static struct nft_elem_priv * |
| 116 | nft_bitmap_get(const struct net *net, const struct nft_set *set, |
| 117 | const struct nft_set_elem *elem, unsigned int flags) |
| 118 | { |
| 119 | const struct nft_bitmap *priv = nft_set_priv(set); |
| 120 | u8 genmask = nft_genmask_cur(net); |
| 121 | struct nft_bitmap_elem *be; |
| 122 | |
| 123 | list_for_each_entry_rcu(be, &priv->list, head) { |
| 124 | if (memcmp(p: nft_set_ext_key(ext: &be->ext), q: elem->key.val.data, size: set->klen) || |
| 125 | !nft_set_elem_active(ext: &be->ext, genmask)) |
| 126 | continue; |
| 127 | |
| 128 | return &be->priv; |
| 129 | } |
| 130 | return ERR_PTR(error: -ENOENT); |
| 131 | } |
| 132 | |
| 133 | static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, |
| 134 | const struct nft_set_elem *elem, |
| 135 | struct nft_elem_priv **elem_priv) |
| 136 | { |
| 137 | struct nft_bitmap_elem *new = nft_elem_priv_cast(priv: elem->priv), *be; |
| 138 | struct nft_bitmap *priv = nft_set_priv(set); |
| 139 | u8 genmask = nft_genmask_next(net); |
| 140 | u32 idx, off; |
| 141 | |
| 142 | be = nft_bitmap_elem_find(net, set, this: new, genmask); |
| 143 | if (be) { |
| 144 | *elem_priv = &be->priv; |
| 145 | return -EEXIST; |
| 146 | } |
| 147 | |
| 148 | nft_bitmap_location(set, key: nft_set_ext_key(ext: &new->ext), idx: &idx, off: &off); |
| 149 | /* Enter 01 state. */ |
| 150 | priv->bitmap[idx] |= (genmask << off); |
| 151 | list_add_tail_rcu(new: &new->head, head: &priv->list); |
| 152 | |
| 153 | return 0; |
| 154 | } |
| 155 | |
| 156 | static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, |
| 157 | struct nft_elem_priv *elem_priv) |
| 158 | { |
| 159 | struct nft_bitmap_elem *be = nft_elem_priv_cast(priv: elem_priv); |
| 160 | struct nft_bitmap *priv = nft_set_priv(set); |
| 161 | u8 genmask = nft_genmask_next(net); |
| 162 | u32 idx, off; |
| 163 | |
| 164 | nft_bitmap_location(set, key: nft_set_ext_key(ext: &be->ext), idx: &idx, off: &off); |
| 165 | /* Enter 00 state. */ |
| 166 | priv->bitmap[idx] &= ~(genmask << off); |
| 167 | list_del_rcu(entry: &be->head); |
| 168 | } |
| 169 | |
| 170 | static void nft_bitmap_activate(const struct net *net, |
| 171 | const struct nft_set *set, |
| 172 | struct nft_elem_priv *elem_priv) |
| 173 | { |
| 174 | struct nft_bitmap_elem *be = nft_elem_priv_cast(priv: elem_priv); |
| 175 | struct nft_bitmap *priv = nft_set_priv(set); |
| 176 | u8 genmask = nft_genmask_next(net); |
| 177 | u32 idx, off; |
| 178 | |
| 179 | nft_bitmap_location(set, key: nft_set_ext_key(ext: &be->ext), idx: &idx, off: &off); |
| 180 | /* Enter 11 state. */ |
| 181 | priv->bitmap[idx] |= (genmask << off); |
| 182 | nft_clear(net, &be->ext); |
| 183 | } |
| 184 | |
| 185 | static void nft_bitmap_flush(const struct net *net, |
| 186 | const struct nft_set *set, |
| 187 | struct nft_elem_priv *elem_priv) |
| 188 | { |
| 189 | struct nft_bitmap_elem *be = nft_elem_priv_cast(priv: elem_priv); |
| 190 | struct nft_bitmap *priv = nft_set_priv(set); |
| 191 | u8 genmask = nft_genmask_next(net); |
| 192 | u32 idx, off; |
| 193 | |
| 194 | nft_bitmap_location(set, key: nft_set_ext_key(ext: &be->ext), idx: &idx, off: &off); |
| 195 | /* Enter 10 state, similar to deactivation. */ |
| 196 | priv->bitmap[idx] &= ~(genmask << off); |
| 197 | nft_set_elem_change_active(net, set, ext: &be->ext); |
| 198 | } |
| 199 | |
| 200 | static struct nft_elem_priv * |
| 201 | nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, |
| 202 | const struct nft_set_elem *elem) |
| 203 | { |
| 204 | struct nft_bitmap_elem *this = nft_elem_priv_cast(priv: elem->priv), *be; |
| 205 | struct nft_bitmap *priv = nft_set_priv(set); |
| 206 | u8 genmask = nft_genmask_next(net); |
| 207 | u32 idx, off; |
| 208 | |
| 209 | nft_bitmap_location(set, key: elem->key.val.data, idx: &idx, off: &off); |
| 210 | |
| 211 | be = nft_bitmap_elem_find(net, set, this, genmask); |
| 212 | if (!be) |
| 213 | return NULL; |
| 214 | |
| 215 | /* Enter 10 state. */ |
| 216 | priv->bitmap[idx] &= ~(genmask << off); |
| 217 | nft_set_elem_change_active(net, set, ext: &be->ext); |
| 218 | |
| 219 | return &be->priv; |
| 220 | } |
| 221 | |
| 222 | static void nft_bitmap_walk(const struct nft_ctx *ctx, |
| 223 | struct nft_set *set, |
| 224 | struct nft_set_iter *iter) |
| 225 | { |
| 226 | const struct nft_bitmap *priv = nft_set_priv(set); |
| 227 | struct nft_bitmap_elem *be; |
| 228 | |
| 229 | list_for_each_entry_rcu(be, &priv->list, head, |
| 230 | lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) { |
| 231 | if (iter->count < iter->skip) |
| 232 | goto cont; |
| 233 | |
| 234 | iter->err = iter->fn(ctx, set, iter, &be->priv); |
| 235 | |
| 236 | if (iter->err < 0) |
| 237 | return; |
| 238 | cont: |
| 239 | iter->count++; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | /* The bitmap size is pow(2, key length in bits) / bits per byte. This is |
| 244 | * multiplied by two since each element takes two bits. For 8 bit keys, the |
| 245 | * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes. |
| 246 | */ |
| 247 | static inline u32 nft_bitmap_size(u32 klen) |
| 248 | { |
| 249 | return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; |
| 250 | } |
| 251 | |
| 252 | static inline u64 nft_bitmap_total_size(u32 klen) |
| 253 | { |
| 254 | return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); |
| 255 | } |
| 256 | |
| 257 | static u64 nft_bitmap_privsize(const struct nlattr * const nla[], |
| 258 | const struct nft_set_desc *desc) |
| 259 | { |
| 260 | u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); |
| 261 | |
| 262 | return nft_bitmap_total_size(klen); |
| 263 | } |
| 264 | |
| 265 | static int nft_bitmap_init(const struct nft_set *set, |
| 266 | const struct nft_set_desc *desc, |
| 267 | const struct nlattr * const nla[]) |
| 268 | { |
| 269 | struct nft_bitmap *priv = nft_set_priv(set); |
| 270 | |
| 271 | BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0); |
| 272 | |
| 273 | INIT_LIST_HEAD(list: &priv->list); |
| 274 | priv->bitmap_size = nft_bitmap_size(klen: set->klen); |
| 275 | |
| 276 | return 0; |
| 277 | } |
| 278 | |
| 279 | static void nft_bitmap_destroy(const struct nft_ctx *ctx, |
| 280 | const struct nft_set *set) |
| 281 | { |
| 282 | struct nft_bitmap *priv = nft_set_priv(set); |
| 283 | struct nft_bitmap_elem *be, *n; |
| 284 | |
| 285 | list_for_each_entry_safe(be, n, &priv->list, head) |
| 286 | nf_tables_set_elem_destroy(ctx, set, elem_priv: &be->priv); |
| 287 | } |
| 288 | |
| 289 | static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, |
| 290 | struct nft_set_estimate *est) |
| 291 | { |
| 292 | /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */ |
| 293 | if (desc->klen > 2) |
| 294 | return false; |
| 295 | else if (desc->expr) |
| 296 | return false; |
| 297 | |
| 298 | est->size = nft_bitmap_total_size(klen: desc->klen); |
| 299 | est->lookup = NFT_SET_CLASS_O_1; |
| 300 | est->space = NFT_SET_CLASS_O_1; |
| 301 | |
| 302 | return true; |
| 303 | } |
| 304 | |
| 305 | const struct nft_set_type nft_set_bitmap_type = { |
| 306 | .ops = { |
| 307 | .privsize = nft_bitmap_privsize, |
| 308 | .elemsize = offsetof(struct nft_bitmap_elem, ext), |
| 309 | .estimate = nft_bitmap_estimate, |
| 310 | .init = nft_bitmap_init, |
| 311 | .destroy = nft_bitmap_destroy, |
| 312 | .insert = nft_bitmap_insert, |
| 313 | .remove = nft_bitmap_remove, |
| 314 | .deactivate = nft_bitmap_deactivate, |
| 315 | .flush = nft_bitmap_flush, |
| 316 | .activate = nft_bitmap_activate, |
| 317 | .lookup = nft_bitmap_lookup, |
| 318 | .walk = nft_bitmap_walk, |
| 319 | .get = nft_bitmap_get, |
| 320 | }, |
| 321 | }; |
| 322 | |