| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ |
| 3 | |
| 4 | #ifndef _IP_SET_HASH_GEN_H |
| 5 | #define _IP_SET_HASH_GEN_H |
| 6 | |
| 7 | #include <linux/rcupdate.h> |
| 8 | #include <linux/rcupdate_wait.h> |
| 9 | #include <linux/jhash.h> |
| 10 | #include <linux/types.h> |
| 11 | #include <linux/netfilter/nfnetlink.h> |
| 12 | #include <linux/netfilter/ipset/ip_set.h> |
| 13 | |
| 14 | #define __ipset_dereference(p) \ |
| 15 | rcu_dereference_protected(p, 1) |
| 16 | #define ipset_dereference_nfnl(p) \ |
| 17 | rcu_dereference_protected(p, \ |
| 18 | lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) |
| 19 | #define ipset_dereference_set(p, set) \ |
| 20 | rcu_dereference_protected(p, \ |
| 21 | lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ |
| 22 | lockdep_is_held(&(set)->lock)) |
| 23 | #define ipset_dereference_bh_nfnl(p) \ |
| 24 | rcu_dereference_bh_check(p, \ |
| 25 | lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) |
| 26 | |
| 27 | /* Hashing which uses arrays to resolve clashing. The hash table is resized |
| 28 | * (doubled) when searching becomes too long. |
| 29 | * Internally jhash is used with the assumption that the size of the |
| 30 | * stored data is a multiple of sizeof(u32). |
| 31 | * |
| 32 | * Readers and resizing |
| 33 | * |
| 34 | * Resizing can be triggered by userspace command only, and those |
| 35 | * are serialized by the nfnl mutex. During resizing the set is |
| 36 | * read-locked, so the only possible concurrent operations are |
| 37 | * the kernel side readers. Those must be protected by proper RCU locking. |
| 38 | */ |
| 39 | |
| 40 | /* Number of elements to store in an initial array block */ |
| 41 | #define AHASH_INIT_SIZE 2 |
| 42 | /* Max number of elements to store in an array block */ |
| 43 | #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) |
| 44 | /* Max muber of elements in the array block when tuned */ |
| 45 | #define AHASH_MAX_TUNED 64 |
| 46 | #define AHASH_MAX(h) ((h)->bucketsize) |
| 47 | |
| 48 | /* A hash bucket */ |
| 49 | struct hbucket { |
| 50 | struct rcu_head rcu; /* for call_rcu */ |
| 51 | /* Which positions are used in the array */ |
| 52 | DECLARE_BITMAP(used, AHASH_MAX_TUNED); |
| 53 | u8 size; /* size of the array */ |
| 54 | u8 pos; /* position of the first free entry */ |
| 55 | unsigned char value[] /* the array of the values */ |
| 56 | __aligned(__alignof__(u64)); |
| 57 | }; |
| 58 | |
| 59 | /* Region size for locking == 2^HTABLE_REGION_BITS */ |
| 60 | #define HTABLE_REGION_BITS 10 |
| 61 | #define ahash_numof_locks(htable_bits) \ |
| 62 | ((htable_bits) < HTABLE_REGION_BITS ? 1 \ |
| 63 | : jhash_size((htable_bits) - HTABLE_REGION_BITS)) |
| 64 | #define ahash_sizeof_regions(htable_bits) \ |
| 65 | (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) |
| 66 | #define ahash_region(n) \ |
| 67 | ((n) / jhash_size(HTABLE_REGION_BITS)) |
| 68 | #define ahash_bucket_start(h, htable_bits) \ |
| 69 | ((htable_bits) < HTABLE_REGION_BITS ? 0 \ |
| 70 | : (h) * jhash_size(HTABLE_REGION_BITS)) |
| 71 | #define ahash_bucket_end(h, htable_bits) \ |
| 72 | ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ |
| 73 | : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) |
| 74 | |
| 75 | struct htable_gc { |
| 76 | struct delayed_work dwork; |
| 77 | struct ip_set *set; /* Set the gc belongs to */ |
| 78 | u32 region; /* Last gc run position */ |
| 79 | }; |
| 80 | |
| 81 | /* The hash table: the table size stored here in order to make resizing easy */ |
| 82 | struct htable { |
| 83 | atomic_t ref; /* References for resizing */ |
| 84 | atomic_t uref; /* References for dumping and gc */ |
| 85 | u8 htable_bits; /* size of hash table == 2^htable_bits */ |
| 86 | u32 maxelem; /* Maxelem per region */ |
| 87 | struct ip_set_region *hregion; /* Region locks and ext sizes */ |
| 88 | struct hbucket __rcu *bucket[]; /* hashtable buckets */ |
| 89 | }; |
| 90 | |
| 91 | #define hbucket(h, i) ((h)->bucket[i]) |
| 92 | #define ext_size(n, dsize) \ |
| 93 | (sizeof(struct hbucket) + (n) * (dsize)) |
| 94 | |
| 95 | #ifndef IPSET_NET_COUNT |
| 96 | #define IPSET_NET_COUNT 1 |
| 97 | #endif |
| 98 | |
| 99 | /* Book-keeping of the prefixes added to the set */ |
| 100 | struct net_prefixes { |
| 101 | u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ |
| 102 | u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ |
| 103 | }; |
| 104 | |
| 105 | /* Compute the hash table size */ |
| 106 | static size_t |
| 107 | htable_size(u8 hbits) |
| 108 | { |
| 109 | size_t hsize; |
| 110 | |
| 111 | /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ |
| 112 | if (hbits > 31) |
| 113 | return 0; |
| 114 | hsize = jhash_size(hbits); |
| 115 | if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) |
| 116 | < hsize) |
| 117 | return 0; |
| 118 | |
| 119 | return hsize * sizeof(struct hbucket *) + sizeof(struct htable); |
| 120 | } |
| 121 | |
| 122 | #ifdef IP_SET_HASH_WITH_NETS |
| 123 | #if IPSET_NET_COUNT > 1 |
| 124 | #define __CIDR(cidr, i) (cidr[i]) |
| 125 | #else |
| 126 | #define __CIDR(cidr, i) (cidr) |
| 127 | #endif |
| 128 | |
| 129 | /* cidr + 1 is stored in net_prefixes to support /0 */ |
| 130 | #define NCIDR_PUT(cidr) ((cidr) + 1) |
| 131 | #define NCIDR_GET(cidr) ((cidr) - 1) |
| 132 | |
| 133 | #ifdef IP_SET_HASH_WITH_NETS_PACKED |
| 134 | /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ |
| 135 | #define DCIDR_PUT(cidr) ((cidr) - 1) |
| 136 | #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) |
| 137 | #else |
| 138 | #define DCIDR_PUT(cidr) (cidr) |
| 139 | #define DCIDR_GET(cidr, i) __CIDR(cidr, i) |
| 140 | #endif |
| 141 | |
| 142 | #define INIT_CIDR(cidr, host_mask) \ |
| 143 | DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) |
| 144 | |
| 145 | #ifdef IP_SET_HASH_WITH_NET0 |
| 146 | /* cidr from 0 to HOST_MASK value and c = cidr + 1 */ |
| 147 | #define NLEN (HOST_MASK + 1) |
| 148 | #define CIDR_POS(c) ((c) - 1) |
| 149 | #else |
| 150 | /* cidr from 1 to HOST_MASK value and c = cidr + 1 */ |
| 151 | #define NLEN HOST_MASK |
| 152 | #define CIDR_POS(c) ((c) - 2) |
| 153 | #endif |
| 154 | |
| 155 | #else |
| 156 | #define NLEN 0 |
| 157 | #endif /* IP_SET_HASH_WITH_NETS */ |
| 158 | |
| 159 | #define SET_ELEM_EXPIRED(set, d) \ |
| 160 | (SET_WITH_TIMEOUT(set) && \ |
| 161 | ip_set_timeout_expired(ext_timeout(d, set))) |
| 162 | |
| 163 | #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) |
| 164 | static const union nf_inet_addr onesmask = { |
| 165 | .all[0] = 0xffffffff, |
| 166 | .all[1] = 0xffffffff, |
| 167 | .all[2] = 0xffffffff, |
| 168 | .all[3] = 0xffffffff |
| 169 | }; |
| 170 | |
| 171 | static const union nf_inet_addr zeromask = {}; |
| 172 | #endif |
| 173 | |
| 174 | #endif /* _IP_SET_HASH_GEN_H */ |
| 175 | |
| 176 | #ifndef MTYPE |
| 177 | #error "MTYPE is not defined!" |
| 178 | #endif |
| 179 | |
| 180 | #ifndef HTYPE |
| 181 | #error "HTYPE is not defined!" |
| 182 | #endif |
| 183 | |
| 184 | #ifndef HOST_MASK |
| 185 | #error "HOST_MASK is not defined!" |
| 186 | #endif |
| 187 | |
| 188 | /* Family dependent templates */ |
| 189 | |
| 190 | #undef ahash_data |
| 191 | #undef mtype_data_equal |
| 192 | #undef mtype_do_data_match |
| 193 | #undef mtype_data_set_flags |
| 194 | #undef mtype_data_reset_elem |
| 195 | #undef mtype_data_reset_flags |
| 196 | #undef mtype_data_netmask |
| 197 | #undef mtype_data_list |
| 198 | #undef mtype_data_next |
| 199 | #undef mtype_elem |
| 200 | |
| 201 | #undef mtype_ahash_destroy |
| 202 | #undef mtype_ext_cleanup |
| 203 | #undef mtype_add_cidr |
| 204 | #undef mtype_del_cidr |
| 205 | #undef mtype_ahash_memsize |
| 206 | #undef mtype_flush |
| 207 | #undef mtype_destroy |
| 208 | #undef mtype_same_set |
| 209 | #undef mtype_kadt |
| 210 | #undef mtype_uadt |
| 211 | |
| 212 | #undef mtype_add |
| 213 | #undef mtype_del |
| 214 | #undef mtype_test_cidrs |
| 215 | #undef mtype_test |
| 216 | #undef mtype_uref |
| 217 | #undef mtype_resize |
| 218 | #undef mtype_ext_size |
| 219 | #undef mtype_resize_ad |
| 220 | #undef mtype_head |
| 221 | #undef mtype_list |
| 222 | #undef mtype_gc_do |
| 223 | #undef mtype_gc |
| 224 | #undef mtype_gc_init |
| 225 | #undef mtype_cancel_gc |
| 226 | #undef mtype_variant |
| 227 | #undef mtype_data_match |
| 228 | |
| 229 | #undef htype |
| 230 | #undef HKEY |
| 231 | |
| 232 | #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) |
| 233 | #ifdef IP_SET_HASH_WITH_NETS |
| 234 | #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) |
| 235 | #else |
| 236 | #define mtype_do_data_match(d) 1 |
| 237 | #endif |
| 238 | #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) |
| 239 | #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) |
| 240 | #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) |
| 241 | #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) |
| 242 | #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) |
| 243 | #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) |
| 244 | #define mtype_elem IPSET_TOKEN(MTYPE, _elem) |
| 245 | |
| 246 | #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) |
| 247 | #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) |
| 248 | #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) |
| 249 | #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) |
| 250 | #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) |
| 251 | #define mtype_flush IPSET_TOKEN(MTYPE, _flush) |
| 252 | #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) |
| 253 | #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) |
| 254 | #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) |
| 255 | #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) |
| 256 | |
| 257 | #define mtype_add IPSET_TOKEN(MTYPE, _add) |
| 258 | #define mtype_del IPSET_TOKEN(MTYPE, _del) |
| 259 | #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) |
| 260 | #define mtype_test IPSET_TOKEN(MTYPE, _test) |
| 261 | #define mtype_uref IPSET_TOKEN(MTYPE, _uref) |
| 262 | #define mtype_resize IPSET_TOKEN(MTYPE, _resize) |
| 263 | #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) |
| 264 | #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) |
| 265 | #define mtype_head IPSET_TOKEN(MTYPE, _head) |
| 266 | #define mtype_list IPSET_TOKEN(MTYPE, _list) |
| 267 | #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) |
| 268 | #define mtype_gc IPSET_TOKEN(MTYPE, _gc) |
| 269 | #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) |
| 270 | #define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) |
| 271 | #define mtype_variant IPSET_TOKEN(MTYPE, _variant) |
| 272 | #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) |
| 273 | |
| 274 | #ifndef HKEY_DATALEN |
| 275 | #define HKEY_DATALEN sizeof(struct mtype_elem) |
| 276 | #endif |
| 277 | |
| 278 | #define htype MTYPE |
| 279 | |
| 280 | #define HKEY(data, initval, htable_bits) \ |
| 281 | ({ \ |
| 282 | const u32 *__k = (const u32 *)data; \ |
| 283 | u32 __l = HKEY_DATALEN / sizeof(u32); \ |
| 284 | \ |
| 285 | BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ |
| 286 | \ |
| 287 | jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ |
| 288 | }) |
| 289 | |
| 290 | /* The generic hash structure */ |
| 291 | struct htype { |
| 292 | struct htable __rcu *table; /* the hash table */ |
| 293 | struct htable_gc gc; /* gc workqueue */ |
| 294 | u32 maxelem; /* max elements in the hash */ |
| 295 | u32 initval; /* random jhash init value */ |
| 296 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 297 | u32 markmask; /* markmask value for mark mask to store */ |
| 298 | #endif |
| 299 | u8 bucketsize; /* max elements in an array block */ |
| 300 | #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) |
| 301 | u8 netmask; /* netmask value for subnets to store */ |
| 302 | union nf_inet_addr bitmask; /* stores bitmask */ |
| 303 | #endif |
| 304 | struct list_head ad; /* Resize add|del backlist */ |
| 305 | struct mtype_elem next; /* temporary storage for uadd */ |
| 306 | #ifdef IP_SET_HASH_WITH_NETS |
| 307 | struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ |
| 308 | #endif |
| 309 | }; |
| 310 | |
| 311 | /* ADD|DEL entries saved during resize */ |
| 312 | struct mtype_resize_ad { |
| 313 | struct list_head list; |
| 314 | enum ipset_adt ad; /* ADD|DEL element */ |
| 315 | struct mtype_elem d; /* Element value */ |
| 316 | struct ip_set_ext ext; /* Extensions for ADD */ |
| 317 | struct ip_set_ext mext; /* Target extensions for ADD */ |
| 318 | u32 flags; /* Flags for ADD */ |
| 319 | }; |
| 320 | |
| 321 | #ifdef IP_SET_HASH_WITH_NETS |
| 322 | /* Network cidr size book keeping when the hash stores different |
| 323 | * sized networks. cidr == real cidr + 1 to support /0. |
| 324 | */ |
| 325 | static void |
| 326 | mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) |
| 327 | { |
| 328 | int i, j; |
| 329 | |
| 330 | spin_lock_bh(&set->lock); |
| 331 | /* Add in increasing prefix order, so larger cidr first */ |
| 332 | for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { |
| 333 | if (j != -1) { |
| 334 | continue; |
| 335 | } else if (h->nets[i].cidr[n] < cidr) { |
| 336 | j = i; |
| 337 | } else if (h->nets[i].cidr[n] == cidr) { |
| 338 | h->nets[CIDR_POS(cidr)].nets[n]++; |
| 339 | goto unlock; |
| 340 | } |
| 341 | } |
| 342 | if (j != -1) { |
| 343 | for (; i > j; i--) |
| 344 | h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; |
| 345 | } |
| 346 | h->nets[i].cidr[n] = cidr; |
| 347 | h->nets[CIDR_POS(cidr)].nets[n] = 1; |
| 348 | unlock: |
| 349 | spin_unlock_bh(&set->lock); |
| 350 | } |
| 351 | |
| 352 | static void |
| 353 | mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) |
| 354 | { |
| 355 | u8 i, j, net_end = NLEN - 1; |
| 356 | |
| 357 | spin_lock_bh(&set->lock); |
| 358 | for (i = 0; i < NLEN; i++) { |
| 359 | if (h->nets[i].cidr[n] != cidr) |
| 360 | continue; |
| 361 | h->nets[CIDR_POS(cidr)].nets[n]--; |
| 362 | if (h->nets[CIDR_POS(cidr)].nets[n] > 0) |
| 363 | goto unlock; |
| 364 | for (j = i; j < net_end && h->nets[j].cidr[n]; j++) |
| 365 | h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; |
| 366 | h->nets[j].cidr[n] = 0; |
| 367 | goto unlock; |
| 368 | } |
| 369 | unlock: |
| 370 | spin_unlock_bh(&set->lock); |
| 371 | } |
| 372 | #endif |
| 373 | |
| 374 | /* Calculate the actual memory size of the set data */ |
| 375 | static size_t |
| 376 | mtype_ahash_memsize(const struct htype *h, const struct htable *t) |
| 377 | { |
| 378 | return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); |
| 379 | } |
| 380 | |
| 381 | /* Get the ith element from the array block n */ |
| 382 | #define ahash_data(n, i, dsize) \ |
| 383 | ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) |
| 384 | |
| 385 | static void |
| 386 | mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) |
| 387 | { |
| 388 | int i; |
| 389 | |
| 390 | for (i = 0; i < n->pos; i++) |
| 391 | if (test_bit(i, n->used)) |
| 392 | ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); |
| 393 | } |
| 394 | |
| 395 | /* Flush a hash type of set: destroy all elements */ |
| 396 | static void |
| 397 | mtype_flush(struct ip_set *set) |
| 398 | { |
| 399 | struct htype *h = set->data; |
| 400 | struct htable *t; |
| 401 | struct hbucket *n; |
| 402 | u32 r, i; |
| 403 | |
| 404 | t = ipset_dereference_nfnl(h->table); |
| 405 | for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { |
| 406 | spin_lock_bh(lock: &t->hregion[r].lock); |
| 407 | for (i = ahash_bucket_start(r, t->htable_bits); |
| 408 | i < ahash_bucket_end(r, t->htable_bits); i++) { |
| 409 | n = __ipset_dereference(hbucket(t, i)); |
| 410 | if (!n) |
| 411 | continue; |
| 412 | if (set->extensions & IPSET_EXT_DESTROY) |
| 413 | mtype_ext_cleanup(set, n); |
| 414 | /* FIXME: use slab cache */ |
| 415 | rcu_assign_pointer(hbucket(t, i), NULL); |
| 416 | kfree_rcu(n, rcu); |
| 417 | } |
| 418 | t->hregion[r].ext_size = 0; |
| 419 | t->hregion[r].elements = 0; |
| 420 | spin_unlock_bh(lock: &t->hregion[r].lock); |
| 421 | } |
| 422 | #ifdef IP_SET_HASH_WITH_NETS |
| 423 | memset(h->nets, 0, sizeof(h->nets)); |
| 424 | #endif |
| 425 | } |
| 426 | |
| 427 | /* Destroy the hashtable part of the set */ |
| 428 | static void |
| 429 | mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) |
| 430 | { |
| 431 | struct hbucket *n; |
| 432 | u32 i; |
| 433 | |
| 434 | for (i = 0; i < jhash_size(t->htable_bits); i++) { |
| 435 | n = (__force struct hbucket *)hbucket(t, i); |
| 436 | if (!n) |
| 437 | continue; |
| 438 | if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) |
| 439 | mtype_ext_cleanup(set, n); |
| 440 | /* FIXME: use slab cache */ |
| 441 | kfree(objp: n); |
| 442 | } |
| 443 | |
| 444 | ip_set_free(members: t->hregion); |
| 445 | ip_set_free(members: t); |
| 446 | } |
| 447 | |
| 448 | /* Destroy a hash type of set */ |
| 449 | static void |
| 450 | mtype_destroy(struct ip_set *set) |
| 451 | { |
| 452 | struct htype *h = set->data; |
| 453 | struct list_head *l, *lt; |
| 454 | |
| 455 | mtype_ahash_destroy(set, t: (__force struct htable *)h->table, ext_destroy: true); |
| 456 | list_for_each_safe(l, lt, &h->ad) { |
| 457 | list_del(entry: l); |
| 458 | kfree(objp: l); |
| 459 | } |
| 460 | kfree(objp: h); |
| 461 | |
| 462 | set->data = NULL; |
| 463 | } |
| 464 | |
| 465 | static bool |
| 466 | mtype_same_set(const struct ip_set *a, const struct ip_set *b) |
| 467 | { |
| 468 | const struct htype *x = a->data; |
| 469 | const struct htype *y = b->data; |
| 470 | |
| 471 | /* Resizing changes htable_bits, so we ignore it */ |
| 472 | return x->maxelem == y->maxelem && |
| 473 | a->timeout == b->timeout && |
| 474 | #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) |
| 475 | nf_inet_addr_cmp(a1: &x->bitmask, a2: &y->bitmask) && |
| 476 | #endif |
| 477 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 478 | x->markmask == y->markmask && |
| 479 | #endif |
| 480 | a->extensions == b->extensions; |
| 481 | } |
| 482 | |
| 483 | static void |
| 484 | mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) |
| 485 | { |
| 486 | struct hbucket *n, *tmp; |
| 487 | struct mtype_elem *data; |
| 488 | u32 i, j, d; |
| 489 | size_t dsize = set->dsize; |
| 490 | #ifdef IP_SET_HASH_WITH_NETS |
| 491 | u8 k; |
| 492 | #endif |
| 493 | u8 htable_bits = t->htable_bits; |
| 494 | |
| 495 | spin_lock_bh(lock: &t->hregion[r].lock); |
| 496 | for (i = ahash_bucket_start(r, htable_bits); |
| 497 | i < ahash_bucket_end(r, htable_bits); i++) { |
| 498 | n = __ipset_dereference(hbucket(t, i)); |
| 499 | if (!n) |
| 500 | continue; |
| 501 | for (j = 0, d = 0; j < n->pos; j++) { |
| 502 | if (!test_bit(j, n->used)) { |
| 503 | d++; |
| 504 | continue; |
| 505 | } |
| 506 | data = ahash_data(n, j, dsize); |
| 507 | if (!ip_set_timeout_expired(ext_timeout(data, set))) |
| 508 | continue; |
| 509 | pr_debug("expired %u/%u\n" , i, j); |
| 510 | clear_bit(nr: j, addr: n->used); |
| 511 | smp_mb__after_atomic(); |
| 512 | #ifdef IP_SET_HASH_WITH_NETS |
| 513 | for (k = 0; k < IPSET_NET_COUNT; k++) |
| 514 | mtype_del_cidr(set, h, |
| 515 | NCIDR_PUT(DCIDR_GET(data->cidr, k)), |
| 516 | k); |
| 517 | #endif |
| 518 | t->hregion[r].elements--; |
| 519 | ip_set_ext_destroy(set, data); |
| 520 | d++; |
| 521 | } |
| 522 | if (d >= AHASH_INIT_SIZE) { |
| 523 | if (d >= n->size) { |
| 524 | t->hregion[r].ext_size -= |
| 525 | ext_size(n->size, dsize); |
| 526 | rcu_assign_pointer(hbucket(t, i), NULL); |
| 527 | kfree_rcu(n, rcu); |
| 528 | continue; |
| 529 | } |
| 530 | tmp = kzalloc(sizeof(*tmp) + |
| 531 | (n->size - AHASH_INIT_SIZE) * dsize, |
| 532 | GFP_ATOMIC); |
| 533 | if (!tmp) |
| 534 | /* Still try to delete expired elements. */ |
| 535 | continue; |
| 536 | tmp->size = n->size - AHASH_INIT_SIZE; |
| 537 | for (j = 0, d = 0; j < n->pos; j++) { |
| 538 | if (!test_bit(j, n->used)) |
| 539 | continue; |
| 540 | data = ahash_data(n, j, dsize); |
| 541 | memcpy(tmp->value + d * dsize, |
| 542 | data, dsize); |
| 543 | set_bit(nr: d, addr: tmp->used); |
| 544 | d++; |
| 545 | } |
| 546 | tmp->pos = d; |
| 547 | t->hregion[r].ext_size -= |
| 548 | ext_size(AHASH_INIT_SIZE, dsize); |
| 549 | rcu_assign_pointer(hbucket(t, i), tmp); |
| 550 | kfree_rcu(n, rcu); |
| 551 | } |
| 552 | } |
| 553 | spin_unlock_bh(lock: &t->hregion[r].lock); |
| 554 | } |
| 555 | |
| 556 | static void |
| 557 | mtype_gc(struct work_struct *work) |
| 558 | { |
| 559 | struct htable_gc *gc; |
| 560 | struct ip_set *set; |
| 561 | struct htype *h; |
| 562 | struct htable *t; |
| 563 | u32 r, numof_locks; |
| 564 | unsigned int next_run; |
| 565 | |
| 566 | gc = container_of(work, struct htable_gc, dwork.work); |
| 567 | set = gc->set; |
| 568 | h = set->data; |
| 569 | |
| 570 | spin_lock_bh(lock: &set->lock); |
| 571 | t = ipset_dereference_set(h->table, set); |
| 572 | atomic_inc(v: &t->uref); |
| 573 | numof_locks = ahash_numof_locks(t->htable_bits); |
| 574 | r = gc->region++; |
| 575 | if (r >= numof_locks) { |
| 576 | r = gc->region = 0; |
| 577 | } |
| 578 | next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; |
| 579 | if (next_run < HZ/10) |
| 580 | next_run = HZ/10; |
| 581 | spin_unlock_bh(lock: &set->lock); |
| 582 | |
| 583 | mtype_gc_do(set, h, t, r); |
| 584 | |
| 585 | if (atomic_dec_and_test(v: &t->uref) && atomic_read(v: &t->ref)) { |
| 586 | pr_debug("Table destroy after resize by expire: %p\n" , t); |
| 587 | mtype_ahash_destroy(set, t, ext_destroy: false); |
| 588 | } |
| 589 | |
| 590 | queue_delayed_work(wq: system_power_efficient_wq, dwork: &gc->dwork, delay: next_run); |
| 591 | |
| 592 | } |
| 593 | |
| 594 | static void |
| 595 | mtype_gc_init(struct htable_gc *gc) |
| 596 | { |
| 597 | INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); |
| 598 | queue_delayed_work(wq: system_power_efficient_wq, dwork: &gc->dwork, HZ); |
| 599 | } |
| 600 | |
| 601 | static void |
| 602 | mtype_cancel_gc(struct ip_set *set) |
| 603 | { |
| 604 | struct htype *h = set->data; |
| 605 | |
| 606 | if (SET_WITH_TIMEOUT(set)) |
| 607 | cancel_delayed_work_sync(dwork: &h->gc.dwork); |
| 608 | } |
| 609 | |
| 610 | static int |
| 611 | mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, |
| 612 | struct ip_set_ext *mext, u32 flags); |
| 613 | static int |
| 614 | mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, |
| 615 | struct ip_set_ext *mext, u32 flags); |
| 616 | |
| 617 | /* Resize a hash: create a new hash table with doubling the hashsize |
| 618 | * and inserting the elements to it. Repeat until we succeed or |
| 619 | * fail due to memory pressures. |
| 620 | */ |
| 621 | static int |
| 622 | mtype_resize(struct ip_set *set, bool retried) |
| 623 | { |
| 624 | struct htype *h = set->data; |
| 625 | struct htable *t, *orig; |
| 626 | u8 htable_bits; |
| 627 | size_t hsize, dsize = set->dsize; |
| 628 | #ifdef IP_SET_HASH_WITH_NETS |
| 629 | u8 flags; |
| 630 | struct mtype_elem *tmp; |
| 631 | #endif |
| 632 | struct mtype_elem *data; |
| 633 | struct mtype_elem *d; |
| 634 | struct hbucket *n, *m; |
| 635 | struct list_head *l, *lt; |
| 636 | struct mtype_resize_ad *x; |
| 637 | u32 i, j, r, nr, key; |
| 638 | int ret; |
| 639 | |
| 640 | #ifdef IP_SET_HASH_WITH_NETS |
| 641 | tmp = kmalloc(dsize, GFP_KERNEL); |
| 642 | if (!tmp) |
| 643 | return -ENOMEM; |
| 644 | #endif |
| 645 | orig = ipset_dereference_bh_nfnl(h->table); |
| 646 | htable_bits = orig->htable_bits; |
| 647 | |
| 648 | retry: |
| 649 | ret = 0; |
| 650 | htable_bits++; |
| 651 | if (!htable_bits) |
| 652 | goto hbwarn; |
| 653 | hsize = htable_size(hbits: htable_bits); |
| 654 | if (!hsize) |
| 655 | goto hbwarn; |
| 656 | t = ip_set_alloc(size: hsize); |
| 657 | if (!t) { |
| 658 | ret = -ENOMEM; |
| 659 | goto out; |
| 660 | } |
| 661 | t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); |
| 662 | if (!t->hregion) { |
| 663 | ip_set_free(members: t); |
| 664 | ret = -ENOMEM; |
| 665 | goto out; |
| 666 | } |
| 667 | t->htable_bits = htable_bits; |
| 668 | t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); |
| 669 | for (i = 0; i < ahash_numof_locks(htable_bits); i++) |
| 670 | spin_lock_init(&t->hregion[i].lock); |
| 671 | |
| 672 | /* There can't be another parallel resizing, |
| 673 | * but dumping, gc, kernel side add/del are possible |
| 674 | */ |
| 675 | orig = ipset_dereference_bh_nfnl(h->table); |
| 676 | atomic_set(v: &orig->ref, i: 1); |
| 677 | atomic_inc(v: &orig->uref); |
| 678 | pr_debug("attempt to resize set %s from %u to %u, t %p\n" , |
| 679 | set->name, orig->htable_bits, htable_bits, orig); |
| 680 | for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { |
| 681 | /* Expire may replace a hbucket with another one */ |
| 682 | rcu_read_lock_bh(); |
| 683 | for (i = ahash_bucket_start(r, orig->htable_bits); |
| 684 | i < ahash_bucket_end(r, orig->htable_bits); i++) { |
| 685 | n = __ipset_dereference(hbucket(orig, i)); |
| 686 | if (!n) |
| 687 | continue; |
| 688 | for (j = 0; j < n->pos; j++) { |
| 689 | if (!test_bit(j, n->used)) |
| 690 | continue; |
| 691 | data = ahash_data(n, j, dsize); |
| 692 | if (SET_ELEM_EXPIRED(set, data)) |
| 693 | continue; |
| 694 | #ifdef IP_SET_HASH_WITH_NETS |
| 695 | /* We have readers running parallel with us, |
| 696 | * so the live data cannot be modified. |
| 697 | */ |
| 698 | flags = 0; |
| 699 | memcpy(tmp, data, dsize); |
| 700 | data = tmp; |
| 701 | mtype_data_reset_flags(data, &flags); |
| 702 | #endif |
| 703 | key = HKEY(data, h->initval, htable_bits); |
| 704 | m = __ipset_dereference(hbucket(t, key)); |
| 705 | nr = ahash_region(key); |
| 706 | if (!m) { |
| 707 | m = kzalloc(sizeof(*m) + |
| 708 | AHASH_INIT_SIZE * dsize, |
| 709 | GFP_ATOMIC); |
| 710 | if (!m) { |
| 711 | ret = -ENOMEM; |
| 712 | goto cleanup; |
| 713 | } |
| 714 | m->size = AHASH_INIT_SIZE; |
| 715 | t->hregion[nr].ext_size += |
| 716 | ext_size(AHASH_INIT_SIZE, |
| 717 | dsize); |
| 718 | RCU_INIT_POINTER(hbucket(t, key), m); |
| 719 | } else if (m->pos >= m->size) { |
| 720 | struct hbucket *ht; |
| 721 | |
| 722 | if (m->size >= AHASH_MAX(h)) { |
| 723 | ret = -EAGAIN; |
| 724 | } else { |
| 725 | ht = kzalloc(sizeof(*ht) + |
| 726 | (m->size + AHASH_INIT_SIZE) |
| 727 | * dsize, |
| 728 | GFP_ATOMIC); |
| 729 | if (!ht) |
| 730 | ret = -ENOMEM; |
| 731 | } |
| 732 | if (ret < 0) |
| 733 | goto cleanup; |
| 734 | memcpy(ht, m, sizeof(struct hbucket) + |
| 735 | m->size * dsize); |
| 736 | ht->size = m->size + AHASH_INIT_SIZE; |
| 737 | t->hregion[nr].ext_size += |
| 738 | ext_size(AHASH_INIT_SIZE, |
| 739 | dsize); |
| 740 | kfree(objp: m); |
| 741 | m = ht; |
| 742 | RCU_INIT_POINTER(hbucket(t, key), ht); |
| 743 | } |
| 744 | d = ahash_data(m, m->pos, dsize); |
| 745 | memcpy(d, data, dsize); |
| 746 | set_bit(nr: m->pos++, addr: m->used); |
| 747 | t->hregion[nr].elements++; |
| 748 | #ifdef IP_SET_HASH_WITH_NETS |
| 749 | mtype_data_reset_flags(d, &flags); |
| 750 | #endif |
| 751 | } |
| 752 | } |
| 753 | rcu_read_unlock_bh(); |
| 754 | } |
| 755 | |
| 756 | /* There can't be any other writer. */ |
| 757 | rcu_assign_pointer(h->table, t); |
| 758 | |
| 759 | /* Give time to other readers of the set */ |
| 760 | synchronize_rcu(); |
| 761 | |
| 762 | pr_debug("set %s resized from %u (%p) to %u (%p)\n" , set->name, |
| 763 | orig->htable_bits, orig, t->htable_bits, t); |
| 764 | /* Add/delete elements processed by the SET target during resize. |
| 765 | * Kernel-side add cannot trigger a resize and userspace actions |
| 766 | * are serialized by the mutex. |
| 767 | */ |
| 768 | list_for_each_safe(l, lt, &h->ad) { |
| 769 | x = list_entry(l, struct mtype_resize_ad, list); |
| 770 | if (x->ad == IPSET_ADD) { |
| 771 | mtype_add(set, value: &x->d, ext: &x->ext, mext: &x->mext, flags: x->flags); |
| 772 | } else { |
| 773 | mtype_del(set, value: &x->d, NULL, NULL, flags: 0); |
| 774 | } |
| 775 | list_del(entry: l); |
| 776 | kfree(objp: l); |
| 777 | } |
| 778 | /* If there's nobody else using the table, destroy it */ |
| 779 | if (atomic_dec_and_test(v: &orig->uref)) { |
| 780 | pr_debug("Table destroy by resize %p\n" , orig); |
| 781 | mtype_ahash_destroy(set, t: orig, ext_destroy: false); |
| 782 | } |
| 783 | |
| 784 | out: |
| 785 | #ifdef IP_SET_HASH_WITH_NETS |
| 786 | kfree(tmp); |
| 787 | #endif |
| 788 | return ret; |
| 789 | |
| 790 | cleanup: |
| 791 | rcu_read_unlock_bh(); |
| 792 | atomic_set(v: &orig->ref, i: 0); |
| 793 | atomic_dec(v: &orig->uref); |
| 794 | mtype_ahash_destroy(set, t, ext_destroy: false); |
| 795 | if (ret == -EAGAIN) |
| 796 | goto retry; |
| 797 | goto out; |
| 798 | |
| 799 | hbwarn: |
| 800 | /* In case we have plenty of memory :-) */ |
| 801 | pr_warn("Cannot increase the hashsize of set %s further\n" , set->name); |
| 802 | ret = -IPSET_ERR_HASH_FULL; |
| 803 | goto out; |
| 804 | } |
| 805 | |
| 806 | /* Get the current number of elements and ext_size in the set */ |
| 807 | static void |
| 808 | mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) |
| 809 | { |
| 810 | struct htype *h = set->data; |
| 811 | const struct htable *t; |
| 812 | u32 i, j, r; |
| 813 | struct hbucket *n; |
| 814 | struct mtype_elem *data; |
| 815 | |
| 816 | t = rcu_dereference_bh(h->table); |
| 817 | for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { |
| 818 | for (i = ahash_bucket_start(r, t->htable_bits); |
| 819 | i < ahash_bucket_end(r, t->htable_bits); i++) { |
| 820 | n = rcu_dereference_bh(hbucket(t, i)); |
| 821 | if (!n) |
| 822 | continue; |
| 823 | for (j = 0; j < n->pos; j++) { |
| 824 | if (!test_bit(j, n->used)) |
| 825 | continue; |
| 826 | data = ahash_data(n, j, set->dsize); |
| 827 | if (!SET_ELEM_EXPIRED(set, data)) |
| 828 | (*elements)++; |
| 829 | } |
| 830 | } |
| 831 | *ext_size += t->hregion[r].ext_size; |
| 832 | } |
| 833 | } |
| 834 | |
| 835 | /* Add an element to a hash and update the internal counters when succeeded, |
| 836 | * otherwise report the proper error code. |
| 837 | */ |
| 838 | static int |
| 839 | mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, |
| 840 | struct ip_set_ext *mext, u32 flags) |
| 841 | { |
| 842 | struct htype *h = set->data; |
| 843 | struct htable *t; |
| 844 | const struct mtype_elem *d = value; |
| 845 | struct mtype_elem *data; |
| 846 | struct hbucket *n, *old = ERR_PTR(error: -ENOENT); |
| 847 | int i, j = -1, ret; |
| 848 | bool flag_exist = flags & IPSET_FLAG_EXIST; |
| 849 | bool deleted = false, forceadd = false, reuse = false; |
| 850 | u32 r, key, multi = 0, elements, maxelem; |
| 851 | |
| 852 | rcu_read_lock_bh(); |
| 853 | t = rcu_dereference_bh(h->table); |
| 854 | key = HKEY(value, h->initval, t->htable_bits); |
| 855 | r = ahash_region(key); |
| 856 | atomic_inc(v: &t->uref); |
| 857 | elements = t->hregion[r].elements; |
| 858 | maxelem = t->maxelem; |
| 859 | if (elements >= maxelem) { |
| 860 | u32 e; |
| 861 | if (SET_WITH_TIMEOUT(set)) { |
| 862 | rcu_read_unlock_bh(); |
| 863 | mtype_gc_do(set, h, t, r); |
| 864 | rcu_read_lock_bh(); |
| 865 | } |
| 866 | maxelem = h->maxelem; |
| 867 | elements = 0; |
| 868 | for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) |
| 869 | elements += t->hregion[e].elements; |
| 870 | if (elements >= maxelem && SET_WITH_FORCEADD(set)) |
| 871 | forceadd = true; |
| 872 | } |
| 873 | rcu_read_unlock_bh(); |
| 874 | |
| 875 | spin_lock_bh(lock: &t->hregion[r].lock); |
| 876 | n = rcu_dereference_bh(hbucket(t, key)); |
| 877 | if (!n) { |
| 878 | if (forceadd || elements >= maxelem) |
| 879 | goto set_full; |
| 880 | old = NULL; |
| 881 | n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, |
| 882 | GFP_ATOMIC); |
| 883 | if (!n) { |
| 884 | ret = -ENOMEM; |
| 885 | goto unlock; |
| 886 | } |
| 887 | n->size = AHASH_INIT_SIZE; |
| 888 | t->hregion[r].ext_size += |
| 889 | ext_size(AHASH_INIT_SIZE, set->dsize); |
| 890 | goto copy_elem; |
| 891 | } |
| 892 | for (i = 0; i < n->pos; i++) { |
| 893 | if (!test_bit(i, n->used)) { |
| 894 | /* Reuse first deleted entry */ |
| 895 | if (j == -1) { |
| 896 | deleted = reuse = true; |
| 897 | j = i; |
| 898 | } |
| 899 | continue; |
| 900 | } |
| 901 | data = ahash_data(n, i, set->dsize); |
| 902 | if (mtype_data_equal(e1: data, e2: d, multi: &multi)) { |
| 903 | if (flag_exist || SET_ELEM_EXPIRED(set, data)) { |
| 904 | /* Just the extensions could be overwritten */ |
| 905 | j = i; |
| 906 | goto overwrite_extensions; |
| 907 | } |
| 908 | ret = -IPSET_ERR_EXIST; |
| 909 | goto unlock; |
| 910 | } |
| 911 | /* Reuse first timed out entry */ |
| 912 | if (SET_ELEM_EXPIRED(set, data) && j == -1) { |
| 913 | j = i; |
| 914 | reuse = true; |
| 915 | } |
| 916 | } |
| 917 | if (reuse || forceadd) { |
| 918 | if (j == -1) |
| 919 | j = 0; |
| 920 | data = ahash_data(n, j, set->dsize); |
| 921 | if (!deleted) { |
| 922 | #ifdef IP_SET_HASH_WITH_NETS |
| 923 | for (i = 0; i < IPSET_NET_COUNT; i++) |
| 924 | mtype_del_cidr(set, h, |
| 925 | NCIDR_PUT(DCIDR_GET(data->cidr, i)), |
| 926 | i); |
| 927 | #endif |
| 928 | ip_set_ext_destroy(set, data); |
| 929 | t->hregion[r].elements--; |
| 930 | } |
| 931 | goto copy_data; |
| 932 | } |
| 933 | if (elements >= maxelem) |
| 934 | goto set_full; |
| 935 | /* Create a new slot */ |
| 936 | if (n->pos >= n->size) { |
| 937 | #ifdef IP_SET_HASH_WITH_MULTI |
| 938 | if (h->bucketsize >= AHASH_MAX_TUNED) |
| 939 | goto set_full; |
| 940 | else if (h->bucketsize <= multi) |
| 941 | h->bucketsize += AHASH_INIT_SIZE; |
| 942 | #endif |
| 943 | if (n->size >= AHASH_MAX(h)) { |
| 944 | /* Trigger rehashing */ |
| 945 | mtype_data_next(next: &h->next, e: d); |
| 946 | ret = -EAGAIN; |
| 947 | goto resize; |
| 948 | } |
| 949 | old = n; |
| 950 | n = kzalloc(sizeof(*n) + |
| 951 | (old->size + AHASH_INIT_SIZE) * set->dsize, |
| 952 | GFP_ATOMIC); |
| 953 | if (!n) { |
| 954 | ret = -ENOMEM; |
| 955 | goto unlock; |
| 956 | } |
| 957 | memcpy(n, old, sizeof(struct hbucket) + |
| 958 | old->size * set->dsize); |
| 959 | n->size = old->size + AHASH_INIT_SIZE; |
| 960 | t->hregion[r].ext_size += |
| 961 | ext_size(AHASH_INIT_SIZE, set->dsize); |
| 962 | } |
| 963 | |
| 964 | copy_elem: |
| 965 | j = n->pos++; |
| 966 | data = ahash_data(n, j, set->dsize); |
| 967 | copy_data: |
| 968 | t->hregion[r].elements++; |
| 969 | #ifdef IP_SET_HASH_WITH_NETS |
| 970 | for (i = 0; i < IPSET_NET_COUNT; i++) |
| 971 | mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); |
| 972 | #endif |
| 973 | memcpy(data, d, sizeof(struct mtype_elem)); |
| 974 | overwrite_extensions: |
| 975 | #ifdef IP_SET_HASH_WITH_NETS |
| 976 | mtype_data_set_flags(data, flags); |
| 977 | #endif |
| 978 | if (SET_WITH_COUNTER(set)) |
| 979 | ip_set_init_counter(ext_counter(data, set), ext); |
| 980 | if (SET_WITH_COMMENT(set)) |
| 981 | ip_set_init_comment(set, ext_comment(data, set), ext); |
| 982 | if (SET_WITH_SKBINFO(set)) |
| 983 | ip_set_init_skbinfo(ext_skbinfo(data, set), ext); |
| 984 | /* Must come last for the case when timed out entry is reused */ |
| 985 | if (SET_WITH_TIMEOUT(set)) |
| 986 | ip_set_timeout_set(ext_timeout(data, set), value: ext->timeout); |
| 987 | smp_mb__before_atomic(); |
| 988 | set_bit(nr: j, addr: n->used); |
| 989 | if (old != ERR_PTR(error: -ENOENT)) { |
| 990 | rcu_assign_pointer(hbucket(t, key), n); |
| 991 | if (old) |
| 992 | kfree_rcu(old, rcu); |
| 993 | } |
| 994 | ret = 0; |
| 995 | resize: |
| 996 | spin_unlock_bh(lock: &t->hregion[r].lock); |
| 997 | if (atomic_read(v: &t->ref) && ext->target) { |
| 998 | /* Resize is in process and kernel side add, save values */ |
| 999 | struct mtype_resize_ad *x; |
| 1000 | |
| 1001 | x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); |
| 1002 | if (!x) |
| 1003 | /* Don't bother */ |
| 1004 | goto out; |
| 1005 | x->ad = IPSET_ADD; |
| 1006 | memcpy(&x->d, value, sizeof(struct mtype_elem)); |
| 1007 | memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); |
| 1008 | memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); |
| 1009 | x->flags = flags; |
| 1010 | spin_lock_bh(lock: &set->lock); |
| 1011 | list_add_tail(new: &x->list, head: &h->ad); |
| 1012 | spin_unlock_bh(lock: &set->lock); |
| 1013 | } |
| 1014 | goto out; |
| 1015 | |
| 1016 | set_full: |
| 1017 | if (net_ratelimit()) |
| 1018 | pr_warn("Set %s is full, maxelem %u reached\n" , |
| 1019 | set->name, maxelem); |
| 1020 | ret = -IPSET_ERR_HASH_FULL; |
| 1021 | unlock: |
| 1022 | spin_unlock_bh(lock: &t->hregion[r].lock); |
| 1023 | out: |
| 1024 | if (atomic_dec_and_test(v: &t->uref) && atomic_read(v: &t->ref)) { |
| 1025 | pr_debug("Table destroy after resize by add: %p\n" , t); |
| 1026 | mtype_ahash_destroy(set, t, ext_destroy: false); |
| 1027 | } |
| 1028 | return ret; |
| 1029 | } |
| 1030 | |
| 1031 | /* Delete an element from the hash and free up space if possible. |
| 1032 | */ |
| 1033 | static int |
| 1034 | mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, |
| 1035 | struct ip_set_ext *mext, u32 flags) |
| 1036 | { |
| 1037 | struct htype *h = set->data; |
| 1038 | struct htable *t; |
| 1039 | const struct mtype_elem *d = value; |
| 1040 | struct mtype_elem *data; |
| 1041 | struct hbucket *n; |
| 1042 | struct mtype_resize_ad *x = NULL; |
| 1043 | int i, j, k, r, ret = -IPSET_ERR_EXIST; |
| 1044 | u32 key, multi = 0; |
| 1045 | size_t dsize = set->dsize; |
| 1046 | |
| 1047 | /* Userspace add and resize is excluded by the mutex. |
| 1048 | * Kernespace add does not trigger resize. |
| 1049 | */ |
| 1050 | rcu_read_lock_bh(); |
| 1051 | t = rcu_dereference_bh(h->table); |
| 1052 | key = HKEY(value, h->initval, t->htable_bits); |
| 1053 | r = ahash_region(key); |
| 1054 | atomic_inc(v: &t->uref); |
| 1055 | rcu_read_unlock_bh(); |
| 1056 | |
| 1057 | spin_lock_bh(lock: &t->hregion[r].lock); |
| 1058 | n = rcu_dereference_bh(hbucket(t, key)); |
| 1059 | if (!n) |
| 1060 | goto out; |
| 1061 | for (i = 0, k = 0; i < n->pos; i++) { |
| 1062 | if (!test_bit(i, n->used)) { |
| 1063 | k++; |
| 1064 | continue; |
| 1065 | } |
| 1066 | data = ahash_data(n, i, dsize); |
| 1067 | if (!mtype_data_equal(e1: data, e2: d, multi: &multi)) |
| 1068 | continue; |
| 1069 | if (SET_ELEM_EXPIRED(set, data)) |
| 1070 | goto out; |
| 1071 | |
| 1072 | ret = 0; |
| 1073 | clear_bit(nr: i, addr: n->used); |
| 1074 | smp_mb__after_atomic(); |
| 1075 | if (i + 1 == n->pos) |
| 1076 | n->pos--; |
| 1077 | t->hregion[r].elements--; |
| 1078 | #ifdef IP_SET_HASH_WITH_NETS |
| 1079 | for (j = 0; j < IPSET_NET_COUNT; j++) |
| 1080 | mtype_del_cidr(set, h, |
| 1081 | NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); |
| 1082 | #endif |
| 1083 | ip_set_ext_destroy(set, data); |
| 1084 | |
| 1085 | if (atomic_read(v: &t->ref) && ext->target) { |
| 1086 | /* Resize is in process and kernel side del, |
| 1087 | * save values |
| 1088 | */ |
| 1089 | x = kzalloc(sizeof(struct mtype_resize_ad), |
| 1090 | GFP_ATOMIC); |
| 1091 | if (x) { |
| 1092 | x->ad = IPSET_DEL; |
| 1093 | memcpy(&x->d, value, |
| 1094 | sizeof(struct mtype_elem)); |
| 1095 | x->flags = flags; |
| 1096 | } |
| 1097 | } |
| 1098 | for (; i < n->pos; i++) { |
| 1099 | if (!test_bit(i, n->used)) |
| 1100 | k++; |
| 1101 | } |
| 1102 | if (n->pos == 0 && k == 0) { |
| 1103 | t->hregion[r].ext_size -= ext_size(n->size, dsize); |
| 1104 | rcu_assign_pointer(hbucket(t, key), NULL); |
| 1105 | kfree_rcu(n, rcu); |
| 1106 | } else if (k >= AHASH_INIT_SIZE) { |
| 1107 | struct hbucket *tmp = kzalloc(sizeof(*tmp) + |
| 1108 | (n->size - AHASH_INIT_SIZE) * dsize, |
| 1109 | GFP_ATOMIC); |
| 1110 | if (!tmp) |
| 1111 | goto out; |
| 1112 | tmp->size = n->size - AHASH_INIT_SIZE; |
| 1113 | for (j = 0, k = 0; j < n->pos; j++) { |
| 1114 | if (!test_bit(j, n->used)) |
| 1115 | continue; |
| 1116 | data = ahash_data(n, j, dsize); |
| 1117 | memcpy(tmp->value + k * dsize, data, dsize); |
| 1118 | set_bit(nr: k, addr: tmp->used); |
| 1119 | k++; |
| 1120 | } |
| 1121 | tmp->pos = k; |
| 1122 | t->hregion[r].ext_size -= |
| 1123 | ext_size(AHASH_INIT_SIZE, dsize); |
| 1124 | rcu_assign_pointer(hbucket(t, key), tmp); |
| 1125 | kfree_rcu(n, rcu); |
| 1126 | } |
| 1127 | goto out; |
| 1128 | } |
| 1129 | |
| 1130 | out: |
| 1131 | spin_unlock_bh(lock: &t->hregion[r].lock); |
| 1132 | if (x) { |
| 1133 | spin_lock_bh(lock: &set->lock); |
| 1134 | list_add(new: &x->list, head: &h->ad); |
| 1135 | spin_unlock_bh(lock: &set->lock); |
| 1136 | } |
| 1137 | if (atomic_dec_and_test(v: &t->uref) && atomic_read(v: &t->ref)) { |
| 1138 | pr_debug("Table destroy after resize by del: %p\n" , t); |
| 1139 | mtype_ahash_destroy(set, t, ext_destroy: false); |
| 1140 | } |
| 1141 | return ret; |
| 1142 | } |
| 1143 | |
| 1144 | static int |
| 1145 | mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, |
| 1146 | struct ip_set_ext *mext, struct ip_set *set, u32 flags) |
| 1147 | { |
| 1148 | if (!ip_set_match_extensions(set, ext, mext, flags, data)) |
| 1149 | return 0; |
| 1150 | /* nomatch entries return -ENOTEMPTY */ |
| 1151 | return mtype_do_data_match(data); |
| 1152 | } |
| 1153 | |
| 1154 | #ifdef IP_SET_HASH_WITH_NETS |
| 1155 | /* Special test function which takes into account the different network |
| 1156 | * sizes added to the set |
| 1157 | */ |
| 1158 | static int |
| 1159 | mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, |
| 1160 | const struct ip_set_ext *ext, |
| 1161 | struct ip_set_ext *mext, u32 flags) |
| 1162 | { |
| 1163 | struct htype *h = set->data; |
| 1164 | struct htable *t = rcu_dereference_bh(h->table); |
| 1165 | struct hbucket *n; |
| 1166 | struct mtype_elem *data; |
| 1167 | #if IPSET_NET_COUNT == 2 |
| 1168 | struct mtype_elem orig = *d; |
| 1169 | int ret, i, j = 0, k; |
| 1170 | #else |
| 1171 | int ret, i, j = 0; |
| 1172 | #endif |
| 1173 | u32 key, multi = 0; |
| 1174 | |
| 1175 | pr_debug("test by nets\n" ); |
| 1176 | for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { |
| 1177 | #if IPSET_NET_COUNT == 2 |
| 1178 | mtype_data_reset_elem(d, &orig); |
| 1179 | mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); |
| 1180 | for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; |
| 1181 | k++) { |
| 1182 | mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), |
| 1183 | true); |
| 1184 | #else |
| 1185 | mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); |
| 1186 | #endif |
| 1187 | key = HKEY(d, h->initval, t->htable_bits); |
| 1188 | n = rcu_dereference_bh(hbucket(t, key)); |
| 1189 | if (!n) |
| 1190 | continue; |
| 1191 | for (i = 0; i < n->pos; i++) { |
| 1192 | if (!test_bit(i, n->used)) |
| 1193 | continue; |
| 1194 | data = ahash_data(n, i, set->dsize); |
| 1195 | if (!mtype_data_equal(data, d, &multi)) |
| 1196 | continue; |
| 1197 | ret = mtype_data_match(data, ext, mext, set, flags); |
| 1198 | if (ret != 0) |
| 1199 | return ret; |
| 1200 | #ifdef IP_SET_HASH_WITH_MULTI |
| 1201 | /* No match, reset multiple match flag */ |
| 1202 | multi = 0; |
| 1203 | #endif |
| 1204 | } |
| 1205 | #if IPSET_NET_COUNT == 2 |
| 1206 | } |
| 1207 | #endif |
| 1208 | } |
| 1209 | return 0; |
| 1210 | } |
| 1211 | #endif |
| 1212 | |
| 1213 | /* Test whether the element is added to the set */ |
| 1214 | static int |
| 1215 | mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, |
| 1216 | struct ip_set_ext *mext, u32 flags) |
| 1217 | { |
| 1218 | struct htype *h = set->data; |
| 1219 | struct htable *t; |
| 1220 | struct mtype_elem *d = value; |
| 1221 | struct hbucket *n; |
| 1222 | struct mtype_elem *data; |
| 1223 | int i, ret = 0; |
| 1224 | u32 key, multi = 0; |
| 1225 | |
| 1226 | rcu_read_lock_bh(); |
| 1227 | t = rcu_dereference_bh(h->table); |
| 1228 | #ifdef IP_SET_HASH_WITH_NETS |
| 1229 | /* If we test an IP address and not a network address, |
| 1230 | * try all possible network sizes |
| 1231 | */ |
| 1232 | for (i = 0; i < IPSET_NET_COUNT; i++) |
| 1233 | if (DCIDR_GET(d->cidr, i) != HOST_MASK) |
| 1234 | break; |
| 1235 | if (i == IPSET_NET_COUNT) { |
| 1236 | ret = mtype_test_cidrs(set, d, ext, mext, flags); |
| 1237 | goto out; |
| 1238 | } |
| 1239 | #endif |
| 1240 | |
| 1241 | key = HKEY(d, h->initval, t->htable_bits); |
| 1242 | n = rcu_dereference_bh(hbucket(t, key)); |
| 1243 | if (!n) { |
| 1244 | ret = 0; |
| 1245 | goto out; |
| 1246 | } |
| 1247 | for (i = 0; i < n->pos; i++) { |
| 1248 | if (!test_bit(i, n->used)) |
| 1249 | continue; |
| 1250 | data = ahash_data(n, i, set->dsize); |
| 1251 | if (!mtype_data_equal(e1: data, e2: d, multi: &multi)) |
| 1252 | continue; |
| 1253 | ret = mtype_data_match(data, ext, mext, set, flags); |
| 1254 | if (ret != 0) |
| 1255 | goto out; |
| 1256 | } |
| 1257 | out: |
| 1258 | rcu_read_unlock_bh(); |
| 1259 | return ret; |
| 1260 | } |
| 1261 | |
| 1262 | /* Reply a HEADER request: fill out the header part of the set */ |
| 1263 | static int |
| 1264 | mtype_head(struct ip_set *set, struct sk_buff *skb) |
| 1265 | { |
| 1266 | struct htype *h = set->data; |
| 1267 | const struct htable *t; |
| 1268 | struct nlattr *nested; |
| 1269 | size_t memsize; |
| 1270 | u32 elements = 0; |
| 1271 | size_t ext_size = 0; |
| 1272 | u8 htable_bits; |
| 1273 | |
| 1274 | rcu_read_lock_bh(); |
| 1275 | t = rcu_dereference_bh(h->table); |
| 1276 | mtype_ext_size(set, elements: &elements, ext_size: &ext_size); |
| 1277 | memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; |
| 1278 | htable_bits = t->htable_bits; |
| 1279 | rcu_read_unlock_bh(); |
| 1280 | |
| 1281 | nested = nla_nest_start(skb, attrtype: IPSET_ATTR_DATA); |
| 1282 | if (!nested) |
| 1283 | goto nla_put_failure; |
| 1284 | if (nla_put_net32(skb, attrtype: IPSET_ATTR_HASHSIZE, |
| 1285 | htonl(jhash_size(htable_bits))) || |
| 1286 | nla_put_net32(skb, attrtype: IPSET_ATTR_MAXELEM, htonl(h->maxelem))) |
| 1287 | goto nla_put_failure; |
| 1288 | #ifdef IP_SET_HASH_WITH_BITMASK |
| 1289 | /* if netmask is set to anything other than HOST_MASK we know that the user supplied netmask |
| 1290 | * and not bitmask. These two are mutually exclusive. */ |
| 1291 | if (h->netmask == HOST_MASK && !nf_inet_addr_cmp(a1: &onesmask, a2: &h->bitmask)) { |
| 1292 | if (set->family == NFPROTO_IPV4) { |
| 1293 | if (nla_put_ipaddr4(skb, type: IPSET_ATTR_BITMASK, ipaddr: h->bitmask.ip)) |
| 1294 | goto nla_put_failure; |
| 1295 | } else if (set->family == NFPROTO_IPV6) { |
| 1296 | if (nla_put_ipaddr6(skb, type: IPSET_ATTR_BITMASK, ipaddrptr: &h->bitmask.in6)) |
| 1297 | goto nla_put_failure; |
| 1298 | } |
| 1299 | } |
| 1300 | #endif |
| 1301 | #ifdef IP_SET_HASH_WITH_NETMASK |
| 1302 | if (h->netmask != HOST_MASK && nla_put_u8(skb, attrtype: IPSET_ATTR_NETMASK, value: h->netmask)) |
| 1303 | goto nla_put_failure; |
| 1304 | #endif |
| 1305 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 1306 | if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) |
| 1307 | goto nla_put_failure; |
| 1308 | #endif |
| 1309 | if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { |
| 1310 | if (nla_put_u8(skb, attrtype: IPSET_ATTR_BUCKETSIZE, value: h->bucketsize) || |
| 1311 | nla_put_net32(skb, attrtype: IPSET_ATTR_INITVAL, htonl(h->initval))) |
| 1312 | goto nla_put_failure; |
| 1313 | } |
| 1314 | if (nla_put_net32(skb, attrtype: IPSET_ATTR_REFERENCES, htonl(set->ref)) || |
| 1315 | nla_put_net32(skb, attrtype: IPSET_ATTR_MEMSIZE, htonl(memsize)) || |
| 1316 | nla_put_net32(skb, attrtype: IPSET_ATTR_ELEMENTS, htonl(elements))) |
| 1317 | goto nla_put_failure; |
| 1318 | if (unlikely(ip_set_put_flags(skb, set))) |
| 1319 | goto nla_put_failure; |
| 1320 | nla_nest_end(skb, start: nested); |
| 1321 | |
| 1322 | return 0; |
| 1323 | nla_put_failure: |
| 1324 | return -EMSGSIZE; |
| 1325 | } |
| 1326 | |
| 1327 | /* Make possible to run dumping parallel with resizing */ |
| 1328 | static void |
| 1329 | mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) |
| 1330 | { |
| 1331 | struct htype *h = set->data; |
| 1332 | struct htable *t; |
| 1333 | |
| 1334 | if (start) { |
| 1335 | rcu_read_lock_bh(); |
| 1336 | t = ipset_dereference_bh_nfnl(h->table); |
| 1337 | atomic_inc(v: &t->uref); |
| 1338 | cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; |
| 1339 | rcu_read_unlock_bh(); |
| 1340 | } else if (cb->args[IPSET_CB_PRIVATE]) { |
| 1341 | t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; |
| 1342 | if (atomic_dec_and_test(v: &t->uref) && atomic_read(v: &t->ref)) { |
| 1343 | pr_debug("Table destroy after resize " |
| 1344 | " by dump: %p\n" , t); |
| 1345 | mtype_ahash_destroy(set, t, ext_destroy: false); |
| 1346 | } |
| 1347 | cb->args[IPSET_CB_PRIVATE] = 0; |
| 1348 | } |
| 1349 | } |
| 1350 | |
| 1351 | /* Reply a LIST/SAVE request: dump the elements of the specified set */ |
| 1352 | static int |
| 1353 | mtype_list(const struct ip_set *set, |
| 1354 | struct sk_buff *skb, struct netlink_callback *cb) |
| 1355 | { |
| 1356 | const struct htable *t; |
| 1357 | struct nlattr *atd, *nested; |
| 1358 | const struct hbucket *n; |
| 1359 | const struct mtype_elem *e; |
| 1360 | u32 first = cb->args[IPSET_CB_ARG0]; |
| 1361 | /* We assume that one hash bucket fills into one page */ |
| 1362 | void *incomplete; |
| 1363 | int i, ret = 0; |
| 1364 | |
| 1365 | atd = nla_nest_start(skb, attrtype: IPSET_ATTR_ADT); |
| 1366 | if (!atd) |
| 1367 | return -EMSGSIZE; |
| 1368 | |
| 1369 | pr_debug("list hash set %s\n" , set->name); |
| 1370 | t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; |
| 1371 | /* Expire may replace a hbucket with another one */ |
| 1372 | rcu_read_lock(); |
| 1373 | for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); |
| 1374 | cb->args[IPSET_CB_ARG0]++) { |
| 1375 | cond_resched_rcu(); |
| 1376 | incomplete = skb_tail_pointer(skb); |
| 1377 | n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); |
| 1378 | pr_debug("cb->arg bucket: %lu, t %p n %p\n" , |
| 1379 | cb->args[IPSET_CB_ARG0], t, n); |
| 1380 | if (!n) |
| 1381 | continue; |
| 1382 | for (i = 0; i < n->pos; i++) { |
| 1383 | if (!test_bit(i, n->used)) |
| 1384 | continue; |
| 1385 | e = ahash_data(n, i, set->dsize); |
| 1386 | if (SET_ELEM_EXPIRED(set, e)) |
| 1387 | continue; |
| 1388 | pr_debug("list hash %lu hbucket %p i %u, data %p\n" , |
| 1389 | cb->args[IPSET_CB_ARG0], n, i, e); |
| 1390 | nested = nla_nest_start(skb, attrtype: IPSET_ATTR_DATA); |
| 1391 | if (!nested) { |
| 1392 | if (cb->args[IPSET_CB_ARG0] == first) { |
| 1393 | nla_nest_cancel(skb, start: atd); |
| 1394 | ret = -EMSGSIZE; |
| 1395 | goto out; |
| 1396 | } |
| 1397 | goto nla_put_failure; |
| 1398 | } |
| 1399 | if (mtype_data_list(skb, e)) |
| 1400 | goto nla_put_failure; |
| 1401 | if (ip_set_put_extensions(skb, set, e, active: true)) |
| 1402 | goto nla_put_failure; |
| 1403 | nla_nest_end(skb, start: nested); |
| 1404 | } |
| 1405 | } |
| 1406 | nla_nest_end(skb, start: atd); |
| 1407 | /* Set listing finished */ |
| 1408 | cb->args[IPSET_CB_ARG0] = 0; |
| 1409 | |
| 1410 | goto out; |
| 1411 | |
| 1412 | nla_put_failure: |
| 1413 | nlmsg_trim(skb, mark: incomplete); |
| 1414 | if (unlikely(first == cb->args[IPSET_CB_ARG0])) { |
| 1415 | pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n" , |
| 1416 | set->name); |
| 1417 | cb->args[IPSET_CB_ARG0] = 0; |
| 1418 | ret = -EMSGSIZE; |
| 1419 | } else { |
| 1420 | nla_nest_end(skb, start: atd); |
| 1421 | } |
| 1422 | out: |
| 1423 | rcu_read_unlock(); |
| 1424 | return ret; |
| 1425 | } |
| 1426 | |
| 1427 | static int |
| 1428 | IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, |
| 1429 | const struct xt_action_param *par, |
| 1430 | enum ipset_adt adt, struct ip_set_adt_opt *opt); |
| 1431 | |
| 1432 | static int |
| 1433 | IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], |
| 1434 | enum ipset_adt adt, u32 *lineno, u32 flags, |
| 1435 | bool retried); |
| 1436 | |
| 1437 | static const struct ip_set_type_variant mtype_variant = { |
| 1438 | .kadt = mtype_kadt, |
| 1439 | .uadt = mtype_uadt, |
| 1440 | .adt = { |
| 1441 | [IPSET_ADD] = mtype_add, |
| 1442 | [IPSET_DEL] = mtype_del, |
| 1443 | [IPSET_TEST] = mtype_test, |
| 1444 | }, |
| 1445 | .destroy = mtype_destroy, |
| 1446 | .flush = mtype_flush, |
| 1447 | .head = mtype_head, |
| 1448 | .list = mtype_list, |
| 1449 | .uref = mtype_uref, |
| 1450 | .resize = mtype_resize, |
| 1451 | .same_set = mtype_same_set, |
| 1452 | .cancel_gc = mtype_cancel_gc, |
| 1453 | .region_lock = true, |
| 1454 | }; |
| 1455 | |
| 1456 | #ifdef IP_SET_EMIT_CREATE |
| 1457 | static int |
| 1458 | IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, |
| 1459 | struct nlattr *tb[], u32 flags) |
| 1460 | { |
| 1461 | u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; |
| 1462 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 1463 | u32 markmask; |
| 1464 | #endif |
| 1465 | u8 hbits; |
| 1466 | #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) |
| 1467 | int ret __attribute__((unused)) = 0; |
| 1468 | u8 netmask = set->family == NFPROTO_IPV4 ? 32 : 128; |
| 1469 | union nf_inet_addr bitmask = onesmask; |
| 1470 | #endif |
| 1471 | size_t hsize; |
| 1472 | struct htype *h; |
| 1473 | struct htable *t; |
| 1474 | u32 i; |
| 1475 | |
| 1476 | pr_debug("Create set %s with family %s\n" , |
| 1477 | set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6" ); |
| 1478 | |
| 1479 | #ifdef IP_SET_PROTO_UNDEF |
| 1480 | if (set->family != NFPROTO_UNSPEC) |
| 1481 | return -IPSET_ERR_INVALID_FAMILY; |
| 1482 | #else |
| 1483 | if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) |
| 1484 | return -IPSET_ERR_INVALID_FAMILY; |
| 1485 | #endif |
| 1486 | |
| 1487 | if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || |
| 1488 | !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || |
| 1489 | !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || |
| 1490 | !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) |
| 1491 | return -IPSET_ERR_PROTOCOL; |
| 1492 | |
| 1493 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 1494 | /* Separated condition in order to avoid directive in argument list */ |
| 1495 | if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) |
| 1496 | return -IPSET_ERR_PROTOCOL; |
| 1497 | |
| 1498 | markmask = 0xffffffff; |
| 1499 | if (tb[IPSET_ATTR_MARKMASK]) { |
| 1500 | markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); |
| 1501 | if (markmask == 0) |
| 1502 | return -IPSET_ERR_INVALID_MARKMASK; |
| 1503 | } |
| 1504 | #endif |
| 1505 | |
| 1506 | #ifdef IP_SET_HASH_WITH_NETMASK |
| 1507 | if (tb[IPSET_ATTR_NETMASK]) { |
| 1508 | netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); |
| 1509 | |
| 1510 | if ((set->family == NFPROTO_IPV4 && netmask > 32) || |
| 1511 | (set->family == NFPROTO_IPV6 && netmask > 128) || |
| 1512 | netmask == 0) |
| 1513 | return -IPSET_ERR_INVALID_NETMASK; |
| 1514 | |
| 1515 | /* we convert netmask to bitmask and store it */ |
| 1516 | if (set->family == NFPROTO_IPV4) |
| 1517 | bitmask.ip = ip_set_netmask(netmask); |
| 1518 | else |
| 1519 | ip6_netmask(&bitmask, netmask); |
| 1520 | } |
| 1521 | #endif |
| 1522 | |
| 1523 | #ifdef IP_SET_HASH_WITH_BITMASK |
| 1524 | if (tb[IPSET_ATTR_BITMASK]) { |
| 1525 | /* bitmask and netmask do the same thing, allow only one of these options */ |
| 1526 | if (tb[IPSET_ATTR_NETMASK]) |
| 1527 | return -IPSET_ERR_BITMASK_NETMASK_EXCL; |
| 1528 | |
| 1529 | if (set->family == NFPROTO_IPV4) { |
| 1530 | ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_BITMASK], &bitmask.ip); |
| 1531 | if (ret || !bitmask.ip) |
| 1532 | return -IPSET_ERR_INVALID_NETMASK; |
| 1533 | } else if (set->family == NFPROTO_IPV6) { |
| 1534 | ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_BITMASK], &bitmask); |
| 1535 | if (ret || ipv6_addr_any(&bitmask.in6)) |
| 1536 | return -IPSET_ERR_INVALID_NETMASK; |
| 1537 | } |
| 1538 | |
| 1539 | if (nf_inet_addr_cmp(&bitmask, &zeromask)) |
| 1540 | return -IPSET_ERR_INVALID_NETMASK; |
| 1541 | } |
| 1542 | #endif |
| 1543 | |
| 1544 | if (tb[IPSET_ATTR_HASHSIZE]) { |
| 1545 | hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); |
| 1546 | if (hashsize < IPSET_MIMINAL_HASHSIZE) |
| 1547 | hashsize = IPSET_MIMINAL_HASHSIZE; |
| 1548 | } |
| 1549 | |
| 1550 | if (tb[IPSET_ATTR_MAXELEM]) |
| 1551 | maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); |
| 1552 | |
| 1553 | hsize = sizeof(*h); |
| 1554 | h = kzalloc(hsize, GFP_KERNEL); |
| 1555 | if (!h) |
| 1556 | return -ENOMEM; |
| 1557 | |
| 1558 | /* Compute htable_bits from the user input parameter hashsize. |
| 1559 | * Assume that hashsize == 2^htable_bits, |
| 1560 | * otherwise round up to the first 2^n value. |
| 1561 | */ |
| 1562 | hbits = fls(hashsize - 1); |
| 1563 | hsize = htable_size(hbits); |
| 1564 | if (hsize == 0) { |
| 1565 | kfree(h); |
| 1566 | return -ENOMEM; |
| 1567 | } |
| 1568 | t = ip_set_alloc(hsize); |
| 1569 | if (!t) { |
| 1570 | kfree(h); |
| 1571 | return -ENOMEM; |
| 1572 | } |
| 1573 | t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); |
| 1574 | if (!t->hregion) { |
| 1575 | ip_set_free(t); |
| 1576 | kfree(h); |
| 1577 | return -ENOMEM; |
| 1578 | } |
| 1579 | h->gc.set = set; |
| 1580 | for (i = 0; i < ahash_numof_locks(hbits); i++) |
| 1581 | spin_lock_init(&t->hregion[i].lock); |
| 1582 | h->maxelem = maxelem; |
| 1583 | #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) |
| 1584 | h->bitmask = bitmask; |
| 1585 | h->netmask = netmask; |
| 1586 | #endif |
| 1587 | #ifdef IP_SET_HASH_WITH_MARKMASK |
| 1588 | h->markmask = markmask; |
| 1589 | #endif |
| 1590 | if (tb[IPSET_ATTR_INITVAL]) |
| 1591 | h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); |
| 1592 | else |
| 1593 | get_random_bytes(&h->initval, sizeof(h->initval)); |
| 1594 | h->bucketsize = AHASH_MAX_SIZE; |
| 1595 | if (tb[IPSET_ATTR_BUCKETSIZE]) { |
| 1596 | h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); |
| 1597 | if (h->bucketsize < AHASH_INIT_SIZE) |
| 1598 | h->bucketsize = AHASH_INIT_SIZE; |
| 1599 | else if (h->bucketsize > AHASH_MAX_SIZE) |
| 1600 | h->bucketsize = AHASH_MAX_SIZE; |
| 1601 | else if (h->bucketsize % 2) |
| 1602 | h->bucketsize += 1; |
| 1603 | } |
| 1604 | t->htable_bits = hbits; |
| 1605 | t->maxelem = h->maxelem / ahash_numof_locks(hbits); |
| 1606 | RCU_INIT_POINTER(h->table, t); |
| 1607 | |
| 1608 | INIT_LIST_HEAD(&h->ad); |
| 1609 | set->data = h; |
| 1610 | #ifndef IP_SET_PROTO_UNDEF |
| 1611 | if (set->family == NFPROTO_IPV4) { |
| 1612 | #endif |
| 1613 | set->variant = &IPSET_TOKEN(HTYPE, 4_variant); |
| 1614 | set->dsize = ip_set_elem_len(set, tb, |
| 1615 | sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)), |
| 1616 | __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem))); |
| 1617 | #ifndef IP_SET_PROTO_UNDEF |
| 1618 | } else { |
| 1619 | set->variant = &IPSET_TOKEN(HTYPE, 6_variant); |
| 1620 | set->dsize = ip_set_elem_len(set, tb, |
| 1621 | sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)), |
| 1622 | __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); |
| 1623 | } |
| 1624 | #endif |
| 1625 | set->timeout = IPSET_NO_TIMEOUT; |
| 1626 | if (tb[IPSET_ATTR_TIMEOUT]) { |
| 1627 | set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); |
| 1628 | #ifndef IP_SET_PROTO_UNDEF |
| 1629 | if (set->family == NFPROTO_IPV4) |
| 1630 | #endif |
| 1631 | IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); |
| 1632 | #ifndef IP_SET_PROTO_UNDEF |
| 1633 | else |
| 1634 | IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); |
| 1635 | #endif |
| 1636 | } |
| 1637 | pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n" , |
| 1638 | set->name, jhash_size(t->htable_bits), |
| 1639 | t->htable_bits, h->maxelem, set->data, t); |
| 1640 | |
| 1641 | return 0; |
| 1642 | } |
| 1643 | #endif /* IP_SET_EMIT_CREATE */ |
| 1644 | |
| 1645 | #undef HKEY_DATALEN |
| 1646 | |