1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | * |
7 | * The filters are packed to hash tables of key nodes |
8 | * with a set of 32bit key/mask pairs at every node. |
9 | * Nodes reference next level hash tables etc. |
10 | * |
11 | * This scheme is the best universal classifier I managed to |
12 | * invent; it is not super-fast, but it is not slow (provided you |
13 | * program it correctly), and general enough. And its relative |
14 | * speed grows as the number of rules becomes larger. |
15 | * |
16 | * It seems that it represents the best middle point between |
17 | * speed and manageability both by human and by machine. |
18 | * |
19 | * It is especially useful for link sharing combined with QoS; |
20 | * pure RSVP doesn't need such a general approach and can use |
21 | * much simpler (and faster) schemes, sort of cls_rsvp.c. |
22 | * |
23 | * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> |
24 | */ |
25 | |
26 | #include <linux/module.h> |
27 | #include <linux/slab.h> |
28 | #include <linux/types.h> |
29 | #include <linux/kernel.h> |
30 | #include <linux/string.h> |
31 | #include <linux/errno.h> |
32 | #include <linux/percpu.h> |
33 | #include <linux/rtnetlink.h> |
34 | #include <linux/skbuff.h> |
35 | #include <linux/bitmap.h> |
36 | #include <linux/netdevice.h> |
37 | #include <linux/hash.h> |
38 | #include <net/netlink.h> |
39 | #include <net/act_api.h> |
40 | #include <net/pkt_cls.h> |
41 | #include <linux/idr.h> |
42 | #include <net/tc_wrapper.h> |
43 | |
44 | struct tc_u_knode { |
45 | struct tc_u_knode __rcu *next; |
46 | u32 handle; |
47 | struct tc_u_hnode __rcu *ht_up; |
48 | struct tcf_exts exts; |
49 | int ifindex; |
50 | u8 fshift; |
51 | struct tcf_result res; |
52 | struct tc_u_hnode __rcu *ht_down; |
53 | #ifdef CONFIG_CLS_U32_PERF |
54 | struct tc_u32_pcnt __percpu *pf; |
55 | #endif |
56 | u32 flags; |
57 | unsigned int in_hw_count; |
58 | #ifdef CONFIG_CLS_U32_MARK |
59 | u32 val; |
60 | u32 mask; |
61 | u32 __percpu *pcpu_success; |
62 | #endif |
63 | struct rcu_work rwork; |
64 | /* The 'sel' field MUST be the last field in structure to allow for |
65 | * tc_u32_keys allocated at end of structure. |
66 | */ |
67 | struct tc_u32_sel sel; |
68 | }; |
69 | |
70 | struct tc_u_hnode { |
71 | struct tc_u_hnode __rcu *next; |
72 | u32 handle; |
73 | u32 prio; |
74 | refcount_t refcnt; |
75 | unsigned int divisor; |
76 | struct idr handle_idr; |
77 | bool is_root; |
78 | struct rcu_head rcu; |
79 | u32 flags; |
80 | /* The 'ht' field MUST be the last field in structure to allow for |
81 | * more entries allocated at end of structure. |
82 | */ |
83 | struct tc_u_knode __rcu *ht[]; |
84 | }; |
85 | |
86 | struct tc_u_common { |
87 | struct tc_u_hnode __rcu *hlist; |
88 | void *ptr; |
89 | refcount_t refcnt; |
90 | struct idr handle_idr; |
91 | struct hlist_node hnode; |
92 | long knodes; |
93 | }; |
94 | |
95 | static inline unsigned int u32_hash_fold(__be32 key, |
96 | const struct tc_u32_sel *sel, |
97 | u8 fshift) |
98 | { |
99 | unsigned int h = ntohl(key & sel->hmask) >> fshift; |
100 | |
101 | return h; |
102 | } |
103 | |
104 | TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb, |
105 | const struct tcf_proto *tp, |
106 | struct tcf_result *res) |
107 | { |
108 | struct { |
109 | struct tc_u_knode *knode; |
110 | unsigned int off; |
111 | } stack[TC_U32_MAXDEPTH]; |
112 | |
113 | struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); |
114 | unsigned int off = skb_network_offset(skb); |
115 | struct tc_u_knode *n; |
116 | int sdepth = 0; |
117 | int off2 = 0; |
118 | int sel = 0; |
119 | #ifdef CONFIG_CLS_U32_PERF |
120 | int j; |
121 | #endif |
122 | int i, r; |
123 | |
124 | next_ht: |
125 | n = rcu_dereference_bh(ht->ht[sel]); |
126 | |
127 | next_knode: |
128 | if (n) { |
129 | struct tc_u32_key *key = n->sel.keys; |
130 | |
131 | #ifdef CONFIG_CLS_U32_PERF |
132 | __this_cpu_inc(n->pf->rcnt); |
133 | j = 0; |
134 | #endif |
135 | |
136 | if (tc_skip_sw(flags: n->flags)) { |
137 | n = rcu_dereference_bh(n->next); |
138 | goto next_knode; |
139 | } |
140 | |
141 | #ifdef CONFIG_CLS_U32_MARK |
142 | if ((skb->mark & n->mask) != n->val) { |
143 | n = rcu_dereference_bh(n->next); |
144 | goto next_knode; |
145 | } else { |
146 | __this_cpu_inc(*n->pcpu_success); |
147 | } |
148 | #endif |
149 | |
150 | for (i = n->sel.nkeys; i > 0; i--, key++) { |
151 | int toff = off + key->off + (off2 & key->offmask); |
152 | __be32 *data, hdata; |
153 | |
154 | if (skb_headroom(skb) + toff > INT_MAX) |
155 | goto out; |
156 | |
157 | data = skb_header_pointer(skb, offset: toff, len: 4, buffer: &hdata); |
158 | if (!data) |
159 | goto out; |
160 | if ((*data ^ key->val) & key->mask) { |
161 | n = rcu_dereference_bh(n->next); |
162 | goto next_knode; |
163 | } |
164 | #ifdef CONFIG_CLS_U32_PERF |
165 | __this_cpu_inc(n->pf->kcnts[j]); |
166 | j++; |
167 | #endif |
168 | } |
169 | |
170 | ht = rcu_dereference_bh(n->ht_down); |
171 | if (!ht) { |
172 | check_terminal: |
173 | if (n->sel.flags & TC_U32_TERMINAL) { |
174 | |
175 | *res = n->res; |
176 | if (!tcf_match_indev(skb, ifindex: n->ifindex)) { |
177 | n = rcu_dereference_bh(n->next); |
178 | goto next_knode; |
179 | } |
180 | #ifdef CONFIG_CLS_U32_PERF |
181 | __this_cpu_inc(n->pf->rhit); |
182 | #endif |
183 | r = tcf_exts_exec(skb, exts: &n->exts, res); |
184 | if (r < 0) { |
185 | n = rcu_dereference_bh(n->next); |
186 | goto next_knode; |
187 | } |
188 | |
189 | return r; |
190 | } |
191 | n = rcu_dereference_bh(n->next); |
192 | goto next_knode; |
193 | } |
194 | |
195 | /* PUSH */ |
196 | if (sdepth >= TC_U32_MAXDEPTH) |
197 | goto deadloop; |
198 | stack[sdepth].knode = n; |
199 | stack[sdepth].off = off; |
200 | sdepth++; |
201 | |
202 | ht = rcu_dereference_bh(n->ht_down); |
203 | sel = 0; |
204 | if (ht->divisor) { |
205 | __be32 *data, hdata; |
206 | |
207 | data = skb_header_pointer(skb, offset: off + n->sel.hoff, len: 4, |
208 | buffer: &hdata); |
209 | if (!data) |
210 | goto out; |
211 | sel = ht->divisor & u32_hash_fold(key: *data, sel: &n->sel, |
212 | fshift: n->fshift); |
213 | } |
214 | if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) |
215 | goto next_ht; |
216 | |
217 | if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { |
218 | off2 = n->sel.off + 3; |
219 | if (n->sel.flags & TC_U32_VAROFFSET) { |
220 | __be16 *data, hdata; |
221 | |
222 | data = skb_header_pointer(skb, |
223 | offset: off + n->sel.offoff, |
224 | len: 2, buffer: &hdata); |
225 | if (!data) |
226 | goto out; |
227 | off2 += ntohs(n->sel.offmask & *data) >> |
228 | n->sel.offshift; |
229 | } |
230 | off2 &= ~3; |
231 | } |
232 | if (n->sel.flags & TC_U32_EAT) { |
233 | off += off2; |
234 | off2 = 0; |
235 | } |
236 | |
237 | if (off < skb->len) |
238 | goto next_ht; |
239 | } |
240 | |
241 | /* POP */ |
242 | if (sdepth--) { |
243 | n = stack[sdepth].knode; |
244 | ht = rcu_dereference_bh(n->ht_up); |
245 | off = stack[sdepth].off; |
246 | goto check_terminal; |
247 | } |
248 | out: |
249 | return -1; |
250 | |
251 | deadloop: |
252 | net_warn_ratelimited("cls_u32: dead loop\n" ); |
253 | return -1; |
254 | } |
255 | |
256 | static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) |
257 | { |
258 | struct tc_u_hnode *ht; |
259 | |
260 | for (ht = rtnl_dereference(tp_c->hlist); |
261 | ht; |
262 | ht = rtnl_dereference(ht->next)) |
263 | if (ht->handle == handle) |
264 | break; |
265 | |
266 | return ht; |
267 | } |
268 | |
269 | static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) |
270 | { |
271 | unsigned int sel; |
272 | struct tc_u_knode *n = NULL; |
273 | |
274 | sel = TC_U32_HASH(handle); |
275 | if (sel > ht->divisor) |
276 | goto out; |
277 | |
278 | for (n = rtnl_dereference(ht->ht[sel]); |
279 | n; |
280 | n = rtnl_dereference(n->next)) |
281 | if (n->handle == handle) |
282 | break; |
283 | out: |
284 | return n; |
285 | } |
286 | |
287 | |
288 | static void *u32_get(struct tcf_proto *tp, u32 handle) |
289 | { |
290 | struct tc_u_hnode *ht; |
291 | struct tc_u_common *tp_c = tp->data; |
292 | |
293 | if (TC_U32_HTID(handle) == TC_U32_ROOT) |
294 | ht = rtnl_dereference(tp->root); |
295 | else |
296 | ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); |
297 | |
298 | if (!ht) |
299 | return NULL; |
300 | |
301 | if (TC_U32_KEY(handle) == 0) |
302 | return ht; |
303 | |
304 | return u32_lookup_key(ht, handle); |
305 | } |
306 | |
307 | /* Protected by rtnl lock */ |
308 | static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) |
309 | { |
310 | int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, start: 1, end: 0x7FF, GFP_KERNEL); |
311 | if (id < 0) |
312 | return 0; |
313 | return (id | 0x800U) << 20; |
314 | } |
315 | |
316 | static struct hlist_head *tc_u_common_hash; |
317 | |
318 | #define U32_HASH_SHIFT 10 |
319 | #define U32_HASH_SIZE (1 << U32_HASH_SHIFT) |
320 | |
321 | static void *tc_u_common_ptr(const struct tcf_proto *tp) |
322 | { |
323 | struct tcf_block *block = tp->chain->block; |
324 | |
325 | /* The block sharing is currently supported only |
326 | * for classless qdiscs. In that case we use block |
327 | * for tc_u_common identification. In case the |
328 | * block is not shared, block->q is a valid pointer |
329 | * and we can use that. That works for classful qdiscs. |
330 | */ |
331 | if (tcf_block_shared(block)) |
332 | return block; |
333 | else |
334 | return block->q; |
335 | } |
336 | |
337 | static struct hlist_head *tc_u_hash(void *key) |
338 | { |
339 | return tc_u_common_hash + hash_ptr(ptr: key, U32_HASH_SHIFT); |
340 | } |
341 | |
342 | static struct tc_u_common *tc_u_common_find(void *key) |
343 | { |
344 | struct tc_u_common *tc; |
345 | hlist_for_each_entry(tc, tc_u_hash(key), hnode) { |
346 | if (tc->ptr == key) |
347 | return tc; |
348 | } |
349 | return NULL; |
350 | } |
351 | |
352 | static int u32_init(struct tcf_proto *tp) |
353 | { |
354 | struct tc_u_hnode *root_ht; |
355 | void *key = tc_u_common_ptr(tp); |
356 | struct tc_u_common *tp_c = tc_u_common_find(key); |
357 | |
358 | root_ht = kzalloc(struct_size(root_ht, ht, 1), GFP_KERNEL); |
359 | if (root_ht == NULL) |
360 | return -ENOBUFS; |
361 | |
362 | refcount_set(r: &root_ht->refcnt, n: 1); |
363 | root_ht->handle = tp_c ? gen_new_htid(tp_c, ptr: root_ht) : 0x80000000; |
364 | root_ht->prio = tp->prio; |
365 | root_ht->is_root = true; |
366 | idr_init(idr: &root_ht->handle_idr); |
367 | |
368 | if (tp_c == NULL) { |
369 | tp_c = kzalloc(size: sizeof(*tp_c), GFP_KERNEL); |
370 | if (tp_c == NULL) { |
371 | kfree(objp: root_ht); |
372 | return -ENOBUFS; |
373 | } |
374 | refcount_set(r: &tp_c->refcnt, n: 1); |
375 | tp_c->ptr = key; |
376 | INIT_HLIST_NODE(h: &tp_c->hnode); |
377 | idr_init(idr: &tp_c->handle_idr); |
378 | |
379 | hlist_add_head(n: &tp_c->hnode, h: tc_u_hash(key)); |
380 | } else { |
381 | refcount_inc(r: &tp_c->refcnt); |
382 | } |
383 | |
384 | RCU_INIT_POINTER(root_ht->next, tp_c->hlist); |
385 | rcu_assign_pointer(tp_c->hlist, root_ht); |
386 | |
387 | /* root_ht must be destroyed when tcf_proto is destroyed */ |
388 | rcu_assign_pointer(tp->root, root_ht); |
389 | tp->data = tp_c; |
390 | return 0; |
391 | } |
392 | |
393 | static void __u32_destroy_key(struct tc_u_knode *n) |
394 | { |
395 | struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
396 | |
397 | tcf_exts_destroy(exts: &n->exts); |
398 | if (ht && refcount_dec_and_test(r: &ht->refcnt)) |
399 | kfree(objp: ht); |
400 | kfree(objp: n); |
401 | } |
402 | |
403 | static void u32_destroy_key(struct tc_u_knode *n, bool free_pf) |
404 | { |
405 | tcf_exts_put_net(exts: &n->exts); |
406 | #ifdef CONFIG_CLS_U32_PERF |
407 | if (free_pf) |
408 | free_percpu(pdata: n->pf); |
409 | #endif |
410 | #ifdef CONFIG_CLS_U32_MARK |
411 | if (free_pf) |
412 | free_percpu(pdata: n->pcpu_success); |
413 | #endif |
414 | __u32_destroy_key(n); |
415 | } |
416 | |
417 | /* u32_delete_key_rcu should be called when free'ing a copied |
418 | * version of a tc_u_knode obtained from u32_init_knode(). When |
419 | * copies are obtained from u32_init_knode() the statistics are |
420 | * shared between the old and new copies to allow readers to |
421 | * continue to update the statistics during the copy. To support |
422 | * this the u32_delete_key_rcu variant does not free the percpu |
423 | * statistics. |
424 | */ |
425 | static void u32_delete_key_work(struct work_struct *work) |
426 | { |
427 | struct tc_u_knode *key = container_of(to_rcu_work(work), |
428 | struct tc_u_knode, |
429 | rwork); |
430 | rtnl_lock(); |
431 | u32_destroy_key(n: key, free_pf: false); |
432 | rtnl_unlock(); |
433 | } |
434 | |
435 | /* u32_delete_key_freepf_rcu is the rcu callback variant |
436 | * that free's the entire structure including the statistics |
437 | * percpu variables. Only use this if the key is not a copy |
438 | * returned by u32_init_knode(). See u32_delete_key_rcu() |
439 | * for the variant that should be used with keys return from |
440 | * u32_init_knode() |
441 | */ |
442 | static void u32_delete_key_freepf_work(struct work_struct *work) |
443 | { |
444 | struct tc_u_knode *key = container_of(to_rcu_work(work), |
445 | struct tc_u_knode, |
446 | rwork); |
447 | rtnl_lock(); |
448 | u32_destroy_key(n: key, free_pf: true); |
449 | rtnl_unlock(); |
450 | } |
451 | |
452 | static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) |
453 | { |
454 | struct tc_u_common *tp_c = tp->data; |
455 | struct tc_u_knode __rcu **kp; |
456 | struct tc_u_knode *pkp; |
457 | struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); |
458 | |
459 | if (ht) { |
460 | kp = &ht->ht[TC_U32_HASH(key->handle)]; |
461 | for (pkp = rtnl_dereference(*kp); pkp; |
462 | kp = &pkp->next, pkp = rtnl_dereference(*kp)) { |
463 | if (pkp == key) { |
464 | RCU_INIT_POINTER(*kp, key->next); |
465 | tp_c->knodes--; |
466 | |
467 | tcf_unbind_filter(tp, r: &key->res); |
468 | idr_remove(&ht->handle_idr, id: key->handle); |
469 | tcf_exts_get_net(exts: &key->exts); |
470 | tcf_queue_work(rwork: &key->rwork, func: u32_delete_key_freepf_work); |
471 | return 0; |
472 | } |
473 | } |
474 | } |
475 | WARN_ON(1); |
476 | return 0; |
477 | } |
478 | |
479 | static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, |
480 | struct netlink_ext_ack *extack) |
481 | { |
482 | struct tcf_block *block = tp->chain->block; |
483 | struct tc_cls_u32_offload cls_u32 = {}; |
484 | |
485 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags: h->flags, extack); |
486 | cls_u32.command = TC_CLSU32_DELETE_HNODE; |
487 | cls_u32.hnode.divisor = h->divisor; |
488 | cls_u32.hnode.handle = h->handle; |
489 | cls_u32.hnode.prio = h->prio; |
490 | |
491 | tc_setup_cb_call(block, type: TC_SETUP_CLSU32, type_data: &cls_u32, err_stop: false, rtnl_held: true); |
492 | } |
493 | |
494 | static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, |
495 | u32 flags, struct netlink_ext_ack *extack) |
496 | { |
497 | struct tcf_block *block = tp->chain->block; |
498 | struct tc_cls_u32_offload cls_u32 = {}; |
499 | bool skip_sw = tc_skip_sw(flags); |
500 | bool offloaded = false; |
501 | int err; |
502 | |
503 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags, extack); |
504 | cls_u32.command = TC_CLSU32_NEW_HNODE; |
505 | cls_u32.hnode.divisor = h->divisor; |
506 | cls_u32.hnode.handle = h->handle; |
507 | cls_u32.hnode.prio = h->prio; |
508 | |
509 | err = tc_setup_cb_call(block, type: TC_SETUP_CLSU32, type_data: &cls_u32, err_stop: skip_sw, rtnl_held: true); |
510 | if (err < 0) { |
511 | u32_clear_hw_hnode(tp, h, NULL); |
512 | return err; |
513 | } else if (err > 0) { |
514 | offloaded = true; |
515 | } |
516 | |
517 | if (skip_sw && !offloaded) |
518 | return -EINVAL; |
519 | |
520 | return 0; |
521 | } |
522 | |
523 | static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, |
524 | struct netlink_ext_ack *extack) |
525 | { |
526 | struct tcf_block *block = tp->chain->block; |
527 | struct tc_cls_u32_offload cls_u32 = {}; |
528 | |
529 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags: n->flags, extack); |
530 | cls_u32.command = TC_CLSU32_DELETE_KNODE; |
531 | cls_u32.knode.handle = n->handle; |
532 | |
533 | tc_setup_cb_destroy(block, tp, type: TC_SETUP_CLSU32, type_data: &cls_u32, err_stop: false, |
534 | flags: &n->flags, in_hw_count: &n->in_hw_count, rtnl_held: true); |
535 | } |
536 | |
537 | static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, |
538 | u32 flags, struct netlink_ext_ack *extack) |
539 | { |
540 | struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
541 | struct tcf_block *block = tp->chain->block; |
542 | struct tc_cls_u32_offload cls_u32 = {}; |
543 | bool skip_sw = tc_skip_sw(flags); |
544 | int err; |
545 | |
546 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags, extack); |
547 | cls_u32.command = TC_CLSU32_REPLACE_KNODE; |
548 | cls_u32.knode.handle = n->handle; |
549 | cls_u32.knode.fshift = n->fshift; |
550 | #ifdef CONFIG_CLS_U32_MARK |
551 | cls_u32.knode.val = n->val; |
552 | cls_u32.knode.mask = n->mask; |
553 | #else |
554 | cls_u32.knode.val = 0; |
555 | cls_u32.knode.mask = 0; |
556 | #endif |
557 | cls_u32.knode.sel = &n->sel; |
558 | cls_u32.knode.res = &n->res; |
559 | cls_u32.knode.exts = &n->exts; |
560 | if (n->ht_down) |
561 | cls_u32.knode.link_handle = ht->handle; |
562 | |
563 | err = tc_setup_cb_add(block, tp, type: TC_SETUP_CLSU32, type_data: &cls_u32, err_stop: skip_sw, |
564 | flags: &n->flags, in_hw_count: &n->in_hw_count, rtnl_held: true); |
565 | if (err) { |
566 | u32_remove_hw_knode(tp, n, NULL); |
567 | return err; |
568 | } |
569 | |
570 | if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) |
571 | return -EINVAL; |
572 | |
573 | return 0; |
574 | } |
575 | |
576 | static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, |
577 | struct netlink_ext_ack *extack) |
578 | { |
579 | struct tc_u_common *tp_c = tp->data; |
580 | struct tc_u_knode *n; |
581 | unsigned int h; |
582 | |
583 | for (h = 0; h <= ht->divisor; h++) { |
584 | while ((n = rtnl_dereference(ht->ht[h])) != NULL) { |
585 | RCU_INIT_POINTER(ht->ht[h], |
586 | rtnl_dereference(n->next)); |
587 | tp_c->knodes--; |
588 | tcf_unbind_filter(tp, r: &n->res); |
589 | u32_remove_hw_knode(tp, n, extack); |
590 | idr_remove(&ht->handle_idr, id: n->handle); |
591 | if (tcf_exts_get_net(exts: &n->exts)) |
592 | tcf_queue_work(rwork: &n->rwork, func: u32_delete_key_freepf_work); |
593 | else |
594 | u32_destroy_key(n, free_pf: true); |
595 | } |
596 | } |
597 | } |
598 | |
599 | static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, |
600 | struct netlink_ext_ack *extack) |
601 | { |
602 | struct tc_u_common *tp_c = tp->data; |
603 | struct tc_u_hnode __rcu **hn; |
604 | struct tc_u_hnode *phn; |
605 | |
606 | u32_clear_hnode(tp, ht, extack); |
607 | |
608 | hn = &tp_c->hlist; |
609 | for (phn = rtnl_dereference(*hn); |
610 | phn; |
611 | hn = &phn->next, phn = rtnl_dereference(*hn)) { |
612 | if (phn == ht) { |
613 | u32_clear_hw_hnode(tp, h: ht, extack); |
614 | idr_destroy(&ht->handle_idr); |
615 | idr_remove(&tp_c->handle_idr, id: ht->handle); |
616 | RCU_INIT_POINTER(*hn, ht->next); |
617 | kfree_rcu(ht, rcu); |
618 | return 0; |
619 | } |
620 | } |
621 | |
622 | return -ENOENT; |
623 | } |
624 | |
625 | static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, |
626 | struct netlink_ext_ack *extack) |
627 | { |
628 | struct tc_u_common *tp_c = tp->data; |
629 | struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); |
630 | |
631 | WARN_ON(root_ht == NULL); |
632 | |
633 | if (root_ht && refcount_dec_and_test(r: &root_ht->refcnt)) |
634 | u32_destroy_hnode(tp, ht: root_ht, extack); |
635 | |
636 | if (refcount_dec_and_test(r: &tp_c->refcnt)) { |
637 | struct tc_u_hnode *ht; |
638 | |
639 | hlist_del(n: &tp_c->hnode); |
640 | |
641 | while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { |
642 | u32_clear_hnode(tp, ht, extack); |
643 | RCU_INIT_POINTER(tp_c->hlist, ht->next); |
644 | |
645 | /* u32_destroy_key() will later free ht for us, if it's |
646 | * still referenced by some knode |
647 | */ |
648 | if (refcount_dec_and_test(r: &ht->refcnt)) |
649 | kfree_rcu(ht, rcu); |
650 | } |
651 | |
652 | idr_destroy(&tp_c->handle_idr); |
653 | kfree(objp: tp_c); |
654 | } |
655 | |
656 | tp->data = NULL; |
657 | } |
658 | |
659 | static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, |
660 | bool rtnl_held, struct netlink_ext_ack *extack) |
661 | { |
662 | struct tc_u_hnode *ht = arg; |
663 | struct tc_u_common *tp_c = tp->data; |
664 | int ret = 0; |
665 | |
666 | if (TC_U32_KEY(ht->handle)) { |
667 | u32_remove_hw_knode(tp, n: (struct tc_u_knode *)ht, extack); |
668 | ret = u32_delete_key(tp, key: (struct tc_u_knode *)ht); |
669 | goto out; |
670 | } |
671 | |
672 | if (ht->is_root) { |
673 | NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node" ); |
674 | return -EINVAL; |
675 | } |
676 | |
677 | if (refcount_dec_if_one(r: &ht->refcnt)) { |
678 | u32_destroy_hnode(tp, ht, extack); |
679 | } else { |
680 | NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter" ); |
681 | return -EBUSY; |
682 | } |
683 | |
684 | out: |
685 | *last = refcount_read(r: &tp_c->refcnt) == 1 && tp_c->knodes == 0; |
686 | return ret; |
687 | } |
688 | |
689 | static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) |
690 | { |
691 | u32 index = htid | 0x800; |
692 | u32 max = htid | 0xFFF; |
693 | |
694 | if (idr_alloc_u32(&ht->handle_idr, NULL, id: &index, max, GFP_KERNEL)) { |
695 | index = htid + 1; |
696 | if (idr_alloc_u32(&ht->handle_idr, NULL, id: &index, max, |
697 | GFP_KERNEL)) |
698 | index = max; |
699 | } |
700 | |
701 | return index; |
702 | } |
703 | |
704 | static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { |
705 | [TCA_U32_CLASSID] = { .type = NLA_U32 }, |
706 | [TCA_U32_HASH] = { .type = NLA_U32 }, |
707 | [TCA_U32_LINK] = { .type = NLA_U32 }, |
708 | [TCA_U32_DIVISOR] = { .type = NLA_U32 }, |
709 | [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, |
710 | [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, |
711 | [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, |
712 | [TCA_U32_FLAGS] = { .type = NLA_U32 }, |
713 | }; |
714 | |
715 | static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n, |
716 | struct nlattr **tb) |
717 | { |
718 | if (tb[TCA_U32_CLASSID]) |
719 | tcf_unbind_filter(tp, r: &n->res); |
720 | } |
721 | |
722 | static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n, |
723 | unsigned long base, struct nlattr **tb) |
724 | { |
725 | if (tb[TCA_U32_CLASSID]) { |
726 | n->res.classid = nla_get_u32(nla: tb[TCA_U32_CLASSID]); |
727 | tcf_bind_filter(tp, r: &n->res, base); |
728 | } |
729 | } |
730 | |
731 | static int u32_set_parms(struct net *net, struct tcf_proto *tp, |
732 | struct tc_u_knode *n, struct nlattr **tb, |
733 | struct nlattr *est, u32 flags, u32 fl_flags, |
734 | struct netlink_ext_ack *extack) |
735 | { |
736 | int err, ifindex = -1; |
737 | |
738 | err = tcf_exts_validate_ex(net, tp, tb, rate_tlv: est, exts: &n->exts, flags, |
739 | fl_flags, extack); |
740 | if (err < 0) |
741 | return err; |
742 | |
743 | if (tb[TCA_U32_INDEV]) { |
744 | ifindex = tcf_change_indev(net, indev_tlv: tb[TCA_U32_INDEV], extack); |
745 | if (ifindex < 0) |
746 | return -EINVAL; |
747 | } |
748 | |
749 | if (tb[TCA_U32_LINK]) { |
750 | u32 handle = nla_get_u32(nla: tb[TCA_U32_LINK]); |
751 | struct tc_u_hnode *ht_down = NULL, *ht_old; |
752 | |
753 | if (TC_U32_KEY(handle)) { |
754 | NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table" ); |
755 | return -EINVAL; |
756 | } |
757 | |
758 | if (handle) { |
759 | ht_down = u32_lookup_ht(tp_c: tp->data, handle); |
760 | |
761 | if (!ht_down) { |
762 | NL_SET_ERR_MSG_MOD(extack, "Link hash table not found" ); |
763 | return -EINVAL; |
764 | } |
765 | if (ht_down->is_root) { |
766 | NL_SET_ERR_MSG_MOD(extack, "Not linking to root node" ); |
767 | return -EINVAL; |
768 | } |
769 | refcount_inc(r: &ht_down->refcnt); |
770 | } |
771 | |
772 | ht_old = rtnl_dereference(n->ht_down); |
773 | rcu_assign_pointer(n->ht_down, ht_down); |
774 | |
775 | if (ht_old) |
776 | refcount_dec(r: &ht_old->refcnt); |
777 | } |
778 | |
779 | if (ifindex >= 0) |
780 | n->ifindex = ifindex; |
781 | |
782 | return 0; |
783 | } |
784 | |
785 | static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, |
786 | struct tc_u_knode *n) |
787 | { |
788 | struct tc_u_knode __rcu **ins; |
789 | struct tc_u_knode *pins; |
790 | struct tc_u_hnode *ht; |
791 | |
792 | if (TC_U32_HTID(n->handle) == TC_U32_ROOT) |
793 | ht = rtnl_dereference(tp->root); |
794 | else |
795 | ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle)); |
796 | |
797 | ins = &ht->ht[TC_U32_HASH(n->handle)]; |
798 | |
799 | /* The node must always exist for it to be replaced if this is not the |
800 | * case then something went very wrong elsewhere. |
801 | */ |
802 | for (pins = rtnl_dereference(*ins); ; |
803 | ins = &pins->next, pins = rtnl_dereference(*ins)) |
804 | if (pins->handle == n->handle) |
805 | break; |
806 | |
807 | idr_replace(&ht->handle_idr, n, id: n->handle); |
808 | RCU_INIT_POINTER(n->next, pins->next); |
809 | rcu_assign_pointer(*ins, n); |
810 | } |
811 | |
812 | static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, |
813 | struct tc_u_knode *n) |
814 | { |
815 | struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
816 | struct tc_u32_sel *s = &n->sel; |
817 | struct tc_u_knode *new; |
818 | |
819 | new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL); |
820 | if (!new) |
821 | return NULL; |
822 | |
823 | RCU_INIT_POINTER(new->next, n->next); |
824 | new->handle = n->handle; |
825 | RCU_INIT_POINTER(new->ht_up, n->ht_up); |
826 | |
827 | new->ifindex = n->ifindex; |
828 | new->fshift = n->fshift; |
829 | new->flags = n->flags; |
830 | RCU_INIT_POINTER(new->ht_down, ht); |
831 | |
832 | #ifdef CONFIG_CLS_U32_PERF |
833 | /* Statistics may be incremented by readers during update |
834 | * so we must keep them in tact. When the node is later destroyed |
835 | * a special destroy call must be made to not free the pf memory. |
836 | */ |
837 | new->pf = n->pf; |
838 | #endif |
839 | |
840 | #ifdef CONFIG_CLS_U32_MARK |
841 | new->val = n->val; |
842 | new->mask = n->mask; |
843 | /* Similarly success statistics must be moved as pointers */ |
844 | new->pcpu_success = n->pcpu_success; |
845 | #endif |
846 | memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); |
847 | |
848 | if (tcf_exts_init(exts: &new->exts, net, action: TCA_U32_ACT, police: TCA_U32_POLICE)) { |
849 | kfree(objp: new); |
850 | return NULL; |
851 | } |
852 | |
853 | /* bump reference count as long as we hold pointer to structure */ |
854 | if (ht) |
855 | refcount_inc(r: &ht->refcnt); |
856 | |
857 | return new; |
858 | } |
859 | |
860 | static int u32_change(struct net *net, struct sk_buff *in_skb, |
861 | struct tcf_proto *tp, unsigned long base, u32 handle, |
862 | struct nlattr **tca, void **arg, u32 flags, |
863 | struct netlink_ext_ack *extack) |
864 | { |
865 | struct tc_u_common *tp_c = tp->data; |
866 | struct tc_u_hnode *ht; |
867 | struct tc_u_knode *n; |
868 | struct tc_u32_sel *s; |
869 | struct nlattr *opt = tca[TCA_OPTIONS]; |
870 | struct nlattr *tb[TCA_U32_MAX + 1]; |
871 | u32 htid, userflags = 0; |
872 | size_t sel_size; |
873 | int err; |
874 | |
875 | if (!opt) { |
876 | if (handle) { |
877 | NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options" ); |
878 | return -EINVAL; |
879 | } else { |
880 | return 0; |
881 | } |
882 | } |
883 | |
884 | err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, nla: opt, policy: u32_policy, |
885 | extack); |
886 | if (err < 0) |
887 | return err; |
888 | |
889 | if (tb[TCA_U32_FLAGS]) { |
890 | userflags = nla_get_u32(nla: tb[TCA_U32_FLAGS]); |
891 | if (!tc_flags_valid(flags: userflags)) { |
892 | NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags" ); |
893 | return -EINVAL; |
894 | } |
895 | } |
896 | |
897 | n = *arg; |
898 | if (n) { |
899 | struct tc_u_knode *new; |
900 | |
901 | if (TC_U32_KEY(n->handle) == 0) { |
902 | NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero" ); |
903 | return -EINVAL; |
904 | } |
905 | |
906 | if ((n->flags ^ userflags) & |
907 | ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { |
908 | NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags" ); |
909 | return -EINVAL; |
910 | } |
911 | |
912 | new = u32_init_knode(net, tp, n); |
913 | if (!new) |
914 | return -ENOMEM; |
915 | |
916 | err = u32_set_parms(net, tp, n: new, tb, est: tca[TCA_RATE], |
917 | flags, fl_flags: new->flags, extack); |
918 | |
919 | if (err) { |
920 | __u32_destroy_key(n: new); |
921 | return err; |
922 | } |
923 | |
924 | u32_bind_filter(tp, n: new, base, tb); |
925 | |
926 | err = u32_replace_hw_knode(tp, n: new, flags, extack); |
927 | if (err) { |
928 | u32_unbind_filter(tp, n: new, tb); |
929 | |
930 | if (tb[TCA_U32_LINK]) { |
931 | struct tc_u_hnode *ht_old; |
932 | |
933 | ht_old = rtnl_dereference(n->ht_down); |
934 | if (ht_old) |
935 | refcount_inc(r: &ht_old->refcnt); |
936 | } |
937 | __u32_destroy_key(n: new); |
938 | return err; |
939 | } |
940 | |
941 | if (!tc_in_hw(flags: new->flags)) |
942 | new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; |
943 | |
944 | u32_replace_knode(tp, tp_c, n: new); |
945 | tcf_unbind_filter(tp, r: &n->res); |
946 | tcf_exts_get_net(exts: &n->exts); |
947 | tcf_queue_work(rwork: &n->rwork, func: u32_delete_key_work); |
948 | return 0; |
949 | } |
950 | |
951 | if (tb[TCA_U32_DIVISOR]) { |
952 | unsigned int divisor = nla_get_u32(nla: tb[TCA_U32_DIVISOR]); |
953 | |
954 | if (!is_power_of_2(n: divisor)) { |
955 | NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2" ); |
956 | return -EINVAL; |
957 | } |
958 | if (divisor-- > 0x100) { |
959 | NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets" ); |
960 | return -EINVAL; |
961 | } |
962 | if (TC_U32_KEY(handle)) { |
963 | NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table" ); |
964 | return -EINVAL; |
965 | } |
966 | ht = kzalloc(struct_size(ht, ht, divisor + 1), GFP_KERNEL); |
967 | if (ht == NULL) |
968 | return -ENOBUFS; |
969 | if (handle == 0) { |
970 | handle = gen_new_htid(tp_c: tp->data, ptr: ht); |
971 | if (handle == 0) { |
972 | kfree(objp: ht); |
973 | return -ENOMEM; |
974 | } |
975 | } else { |
976 | err = idr_alloc_u32(&tp_c->handle_idr, ptr: ht, id: &handle, |
977 | max: handle, GFP_KERNEL); |
978 | if (err) { |
979 | kfree(objp: ht); |
980 | return err; |
981 | } |
982 | } |
983 | refcount_set(r: &ht->refcnt, n: 1); |
984 | ht->divisor = divisor; |
985 | ht->handle = handle; |
986 | ht->prio = tp->prio; |
987 | idr_init(idr: &ht->handle_idr); |
988 | ht->flags = userflags; |
989 | |
990 | err = u32_replace_hw_hnode(tp, h: ht, flags: userflags, extack); |
991 | if (err) { |
992 | idr_remove(&tp_c->handle_idr, id: handle); |
993 | kfree(objp: ht); |
994 | return err; |
995 | } |
996 | |
997 | RCU_INIT_POINTER(ht->next, tp_c->hlist); |
998 | rcu_assign_pointer(tp_c->hlist, ht); |
999 | *arg = ht; |
1000 | |
1001 | return 0; |
1002 | } |
1003 | |
1004 | if (tb[TCA_U32_HASH]) { |
1005 | htid = nla_get_u32(nla: tb[TCA_U32_HASH]); |
1006 | if (TC_U32_HTID(htid) == TC_U32_ROOT) { |
1007 | ht = rtnl_dereference(tp->root); |
1008 | htid = ht->handle; |
1009 | } else { |
1010 | ht = u32_lookup_ht(tp_c: tp->data, TC_U32_HTID(htid)); |
1011 | if (!ht) { |
1012 | NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found" ); |
1013 | return -EINVAL; |
1014 | } |
1015 | } |
1016 | } else { |
1017 | ht = rtnl_dereference(tp->root); |
1018 | htid = ht->handle; |
1019 | } |
1020 | |
1021 | if (ht->divisor < TC_U32_HASH(htid)) { |
1022 | NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value" ); |
1023 | return -EINVAL; |
1024 | } |
1025 | |
1026 | /* At this point, we need to derive the new handle that will be used to |
1027 | * uniquely map the identity of this table match entry. The |
1028 | * identity of the entry that we need to construct is 32 bits made of: |
1029 | * htid(12b):bucketid(8b):node/entryid(12b) |
1030 | * |
1031 | * At this point _we have the table(ht)_ in which we will insert this |
1032 | * entry. We carry the table's id in variable "htid". |
1033 | * Note that earlier code picked the ht selection either by a) the user |
1034 | * providing the htid specified via TCA_U32_HASH attribute or b) when |
1035 | * no such attribute is passed then the root ht, is default to at ID |
1036 | * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. |
1037 | * If OTOH the user passed us the htid, they may also pass a bucketid of |
1038 | * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is |
1039 | * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be |
1040 | * passed via the htid, so even if it was non-zero it will be ignored. |
1041 | * |
1042 | * We may also have a handle, if the user passed one. The handle also |
1043 | * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). |
1044 | * Rule: the bucketid on the handle is ignored even if one was passed; |
1045 | * rather the value on "htid" is always assumed to be the bucketid. |
1046 | */ |
1047 | if (handle) { |
1048 | /* Rule: The htid from handle and tableid from htid must match */ |
1049 | if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { |
1050 | NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch" ); |
1051 | return -EINVAL; |
1052 | } |
1053 | /* Ok, so far we have a valid htid(12b):bucketid(8b) but we |
1054 | * need to finalize the table entry identification with the last |
1055 | * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for |
1056 | * entries. Rule: nodeid of 0 is reserved only for tables(see |
1057 | * earlier code which processes TC_U32_DIVISOR attribute). |
1058 | * Rule: The nodeid can only be derived from the handle (and not |
1059 | * htid). |
1060 | * Rule: if the handle specified zero for the node id example |
1061 | * 0x60000000, then pick a new nodeid from the pool of IDs |
1062 | * this hash table has been allocating from. |
1063 | * If OTOH it is specified (i.e for example the user passed a |
1064 | * handle such as 0x60000123), then we use it generate our final |
1065 | * handle which is used to uniquely identify the match entry. |
1066 | */ |
1067 | if (!TC_U32_NODE(handle)) { |
1068 | handle = gen_new_kid(ht, htid); |
1069 | } else { |
1070 | handle = htid | TC_U32_NODE(handle); |
1071 | err = idr_alloc_u32(&ht->handle_idr, NULL, id: &handle, |
1072 | max: handle, GFP_KERNEL); |
1073 | if (err) |
1074 | return err; |
1075 | } |
1076 | } else { |
1077 | /* The user did not give us a handle; lets just generate one |
1078 | * from the table's pool of nodeids. |
1079 | */ |
1080 | handle = gen_new_kid(ht, htid); |
1081 | } |
1082 | |
1083 | if (tb[TCA_U32_SEL] == NULL) { |
1084 | NL_SET_ERR_MSG_MOD(extack, "Selector not specified" ); |
1085 | err = -EINVAL; |
1086 | goto erridr; |
1087 | } |
1088 | |
1089 | s = nla_data(nla: tb[TCA_U32_SEL]); |
1090 | sel_size = struct_size(s, keys, s->nkeys); |
1091 | if (nla_len(nla: tb[TCA_U32_SEL]) < sel_size) { |
1092 | err = -EINVAL; |
1093 | goto erridr; |
1094 | } |
1095 | |
1096 | n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL); |
1097 | if (n == NULL) { |
1098 | err = -ENOBUFS; |
1099 | goto erridr; |
1100 | } |
1101 | |
1102 | #ifdef CONFIG_CLS_U32_PERF |
1103 | n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys), |
1104 | align: __alignof__(struct tc_u32_pcnt)); |
1105 | if (!n->pf) { |
1106 | err = -ENOBUFS; |
1107 | goto errfree; |
1108 | } |
1109 | #endif |
1110 | |
1111 | unsafe_memcpy(&n->sel, s, sel_size, |
1112 | /* A composite flex-array structure destination, |
1113 | * which was correctly sized with struct_size(), |
1114 | * bounds-checked against nla_len(), and allocated |
1115 | * above. */); |
1116 | RCU_INIT_POINTER(n->ht_up, ht); |
1117 | n->handle = handle; |
1118 | n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; |
1119 | n->flags = userflags; |
1120 | |
1121 | err = tcf_exts_init(exts: &n->exts, net, action: TCA_U32_ACT, police: TCA_U32_POLICE); |
1122 | if (err < 0) |
1123 | goto errout; |
1124 | |
1125 | #ifdef CONFIG_CLS_U32_MARK |
1126 | n->pcpu_success = alloc_percpu(u32); |
1127 | if (!n->pcpu_success) { |
1128 | err = -ENOMEM; |
1129 | goto errout; |
1130 | } |
1131 | |
1132 | if (tb[TCA_U32_MARK]) { |
1133 | struct tc_u32_mark *mark; |
1134 | |
1135 | mark = nla_data(nla: tb[TCA_U32_MARK]); |
1136 | n->val = mark->val; |
1137 | n->mask = mark->mask; |
1138 | } |
1139 | #endif |
1140 | |
1141 | err = u32_set_parms(net, tp, n, tb, est: tca[TCA_RATE], |
1142 | flags, fl_flags: n->flags, extack); |
1143 | |
1144 | u32_bind_filter(tp, n, base, tb); |
1145 | |
1146 | if (err == 0) { |
1147 | struct tc_u_knode __rcu **ins; |
1148 | struct tc_u_knode *pins; |
1149 | |
1150 | err = u32_replace_hw_knode(tp, n, flags, extack); |
1151 | if (err) |
1152 | goto errunbind; |
1153 | |
1154 | if (!tc_in_hw(flags: n->flags)) |
1155 | n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; |
1156 | |
1157 | ins = &ht->ht[TC_U32_HASH(handle)]; |
1158 | for (pins = rtnl_dereference(*ins); pins; |
1159 | ins = &pins->next, pins = rtnl_dereference(*ins)) |
1160 | if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) |
1161 | break; |
1162 | |
1163 | RCU_INIT_POINTER(n->next, pins); |
1164 | rcu_assign_pointer(*ins, n); |
1165 | tp_c->knodes++; |
1166 | *arg = n; |
1167 | return 0; |
1168 | } |
1169 | |
1170 | errunbind: |
1171 | u32_unbind_filter(tp, n, tb); |
1172 | |
1173 | #ifdef CONFIG_CLS_U32_MARK |
1174 | free_percpu(pdata: n->pcpu_success); |
1175 | #endif |
1176 | |
1177 | errout: |
1178 | tcf_exts_destroy(exts: &n->exts); |
1179 | #ifdef CONFIG_CLS_U32_PERF |
1180 | errfree: |
1181 | free_percpu(pdata: n->pf); |
1182 | #endif |
1183 | kfree(objp: n); |
1184 | erridr: |
1185 | idr_remove(&ht->handle_idr, id: handle); |
1186 | return err; |
1187 | } |
1188 | |
1189 | static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, |
1190 | bool rtnl_held) |
1191 | { |
1192 | struct tc_u_common *tp_c = tp->data; |
1193 | struct tc_u_hnode *ht; |
1194 | struct tc_u_knode *n; |
1195 | unsigned int h; |
1196 | |
1197 | if (arg->stop) |
1198 | return; |
1199 | |
1200 | for (ht = rtnl_dereference(tp_c->hlist); |
1201 | ht; |
1202 | ht = rtnl_dereference(ht->next)) { |
1203 | if (ht->prio != tp->prio) |
1204 | continue; |
1205 | |
1206 | if (!tc_cls_stats_dump(tp, arg, filter: ht)) |
1207 | return; |
1208 | |
1209 | for (h = 0; h <= ht->divisor; h++) { |
1210 | for (n = rtnl_dereference(ht->ht[h]); |
1211 | n; |
1212 | n = rtnl_dereference(n->next)) { |
1213 | if (!tc_cls_stats_dump(tp, arg, filter: n)) |
1214 | return; |
1215 | } |
1216 | } |
1217 | } |
1218 | } |
1219 | |
1220 | static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, |
1221 | bool add, flow_setup_cb_t *cb, void *cb_priv, |
1222 | struct netlink_ext_ack *extack) |
1223 | { |
1224 | struct tc_cls_u32_offload cls_u32 = {}; |
1225 | int err; |
1226 | |
1227 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags: ht->flags, extack); |
1228 | cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE; |
1229 | cls_u32.hnode.divisor = ht->divisor; |
1230 | cls_u32.hnode.handle = ht->handle; |
1231 | cls_u32.hnode.prio = ht->prio; |
1232 | |
1233 | err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); |
1234 | if (err && add && tc_skip_sw(flags: ht->flags)) |
1235 | return err; |
1236 | |
1237 | return 0; |
1238 | } |
1239 | |
1240 | static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, |
1241 | bool add, flow_setup_cb_t *cb, void *cb_priv, |
1242 | struct netlink_ext_ack *extack) |
1243 | { |
1244 | struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
1245 | struct tcf_block *block = tp->chain->block; |
1246 | struct tc_cls_u32_offload cls_u32 = {}; |
1247 | |
1248 | tc_cls_common_offload_init(cls_common: &cls_u32.common, tp, flags: n->flags, extack); |
1249 | cls_u32.command = add ? |
1250 | TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE; |
1251 | cls_u32.knode.handle = n->handle; |
1252 | |
1253 | if (add) { |
1254 | cls_u32.knode.fshift = n->fshift; |
1255 | #ifdef CONFIG_CLS_U32_MARK |
1256 | cls_u32.knode.val = n->val; |
1257 | cls_u32.knode.mask = n->mask; |
1258 | #else |
1259 | cls_u32.knode.val = 0; |
1260 | cls_u32.knode.mask = 0; |
1261 | #endif |
1262 | cls_u32.knode.sel = &n->sel; |
1263 | cls_u32.knode.res = &n->res; |
1264 | cls_u32.knode.exts = &n->exts; |
1265 | if (n->ht_down) |
1266 | cls_u32.knode.link_handle = ht->handle; |
1267 | } |
1268 | |
1269 | return tc_setup_cb_reoffload(block, tp, add, cb, type: TC_SETUP_CLSU32, |
1270 | type_data: &cls_u32, cb_priv, flags: &n->flags, |
1271 | in_hw_count: &n->in_hw_count); |
1272 | } |
1273 | |
1274 | static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, |
1275 | void *cb_priv, struct netlink_ext_ack *extack) |
1276 | { |
1277 | struct tc_u_common *tp_c = tp->data; |
1278 | struct tc_u_hnode *ht; |
1279 | struct tc_u_knode *n; |
1280 | unsigned int h; |
1281 | int err; |
1282 | |
1283 | for (ht = rtnl_dereference(tp_c->hlist); |
1284 | ht; |
1285 | ht = rtnl_dereference(ht->next)) { |
1286 | if (ht->prio != tp->prio) |
1287 | continue; |
1288 | |
1289 | /* When adding filters to a new dev, try to offload the |
1290 | * hashtable first. When removing, do the filters before the |
1291 | * hashtable. |
1292 | */ |
1293 | if (add && !tc_skip_hw(flags: ht->flags)) { |
1294 | err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv, |
1295 | extack); |
1296 | if (err) |
1297 | return err; |
1298 | } |
1299 | |
1300 | for (h = 0; h <= ht->divisor; h++) { |
1301 | for (n = rtnl_dereference(ht->ht[h]); |
1302 | n; |
1303 | n = rtnl_dereference(n->next)) { |
1304 | if (tc_skip_hw(flags: n->flags)) |
1305 | continue; |
1306 | |
1307 | err = u32_reoffload_knode(tp, n, add, cb, |
1308 | cb_priv, extack); |
1309 | if (err) |
1310 | return err; |
1311 | } |
1312 | } |
1313 | |
1314 | if (!add && !tc_skip_hw(flags: ht->flags)) |
1315 | u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack); |
1316 | } |
1317 | |
1318 | return 0; |
1319 | } |
1320 | |
1321 | static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, |
1322 | unsigned long base) |
1323 | { |
1324 | struct tc_u_knode *n = fh; |
1325 | |
1326 | tc_cls_bind_class(classid, cl, q, res: &n->res, base); |
1327 | } |
1328 | |
1329 | static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, |
1330 | struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) |
1331 | { |
1332 | struct tc_u_knode *n = fh; |
1333 | struct tc_u_hnode *ht_up, *ht_down; |
1334 | struct nlattr *nest; |
1335 | |
1336 | if (n == NULL) |
1337 | return skb->len; |
1338 | |
1339 | t->tcm_handle = n->handle; |
1340 | |
1341 | nest = nla_nest_start_noflag(skb, attrtype: TCA_OPTIONS); |
1342 | if (nest == NULL) |
1343 | goto nla_put_failure; |
1344 | |
1345 | if (TC_U32_KEY(n->handle) == 0) { |
1346 | struct tc_u_hnode *ht = fh; |
1347 | u32 divisor = ht->divisor + 1; |
1348 | |
1349 | if (nla_put_u32(skb, attrtype: TCA_U32_DIVISOR, value: divisor)) |
1350 | goto nla_put_failure; |
1351 | } else { |
1352 | #ifdef CONFIG_CLS_U32_PERF |
1353 | struct tc_u32_pcnt *gpf; |
1354 | int cpu; |
1355 | #endif |
1356 | |
1357 | if (nla_put(skb, attrtype: TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys), |
1358 | data: &n->sel)) |
1359 | goto nla_put_failure; |
1360 | |
1361 | ht_up = rtnl_dereference(n->ht_up); |
1362 | if (ht_up) { |
1363 | u32 htid = n->handle & 0xFFFFF000; |
1364 | if (nla_put_u32(skb, attrtype: TCA_U32_HASH, value: htid)) |
1365 | goto nla_put_failure; |
1366 | } |
1367 | if (n->res.classid && |
1368 | nla_put_u32(skb, attrtype: TCA_U32_CLASSID, value: n->res.classid)) |
1369 | goto nla_put_failure; |
1370 | |
1371 | ht_down = rtnl_dereference(n->ht_down); |
1372 | if (ht_down && |
1373 | nla_put_u32(skb, attrtype: TCA_U32_LINK, value: ht_down->handle)) |
1374 | goto nla_put_failure; |
1375 | |
1376 | if (n->flags && nla_put_u32(skb, attrtype: TCA_U32_FLAGS, value: n->flags)) |
1377 | goto nla_put_failure; |
1378 | |
1379 | #ifdef CONFIG_CLS_U32_MARK |
1380 | if ((n->val || n->mask)) { |
1381 | struct tc_u32_mark mark = {.val = n->val, |
1382 | .mask = n->mask, |
1383 | .success = 0}; |
1384 | int cpum; |
1385 | |
1386 | for_each_possible_cpu(cpum) { |
1387 | __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum); |
1388 | |
1389 | mark.success += cnt; |
1390 | } |
1391 | |
1392 | if (nla_put(skb, attrtype: TCA_U32_MARK, attrlen: sizeof(mark), data: &mark)) |
1393 | goto nla_put_failure; |
1394 | } |
1395 | #endif |
1396 | |
1397 | if (tcf_exts_dump(skb, exts: &n->exts) < 0) |
1398 | goto nla_put_failure; |
1399 | |
1400 | if (n->ifindex) { |
1401 | struct net_device *dev; |
1402 | dev = __dev_get_by_index(net, ifindex: n->ifindex); |
1403 | if (dev && nla_put_string(skb, attrtype: TCA_U32_INDEV, str: dev->name)) |
1404 | goto nla_put_failure; |
1405 | } |
1406 | #ifdef CONFIG_CLS_U32_PERF |
1407 | gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL); |
1408 | if (!gpf) |
1409 | goto nla_put_failure; |
1410 | |
1411 | for_each_possible_cpu(cpu) { |
1412 | int i; |
1413 | struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu); |
1414 | |
1415 | gpf->rcnt += pf->rcnt; |
1416 | gpf->rhit += pf->rhit; |
1417 | for (i = 0; i < n->sel.nkeys; i++) |
1418 | gpf->kcnts[i] += pf->kcnts[i]; |
1419 | } |
1420 | |
1421 | if (nla_put_64bit(skb, attrtype: TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys), |
1422 | data: gpf, padattr: TCA_U32_PAD)) { |
1423 | kfree(objp: gpf); |
1424 | goto nla_put_failure; |
1425 | } |
1426 | kfree(objp: gpf); |
1427 | #endif |
1428 | } |
1429 | |
1430 | nla_nest_end(skb, start: nest); |
1431 | |
1432 | if (TC_U32_KEY(n->handle)) |
1433 | if (tcf_exts_dump_stats(skb, exts: &n->exts) < 0) |
1434 | goto nla_put_failure; |
1435 | return skb->len; |
1436 | |
1437 | nla_put_failure: |
1438 | nla_nest_cancel(skb, start: nest); |
1439 | return -1; |
1440 | } |
1441 | |
1442 | static struct tcf_proto_ops cls_u32_ops __read_mostly = { |
1443 | .kind = "u32" , |
1444 | .classify = u32_classify, |
1445 | .init = u32_init, |
1446 | .destroy = u32_destroy, |
1447 | .get = u32_get, |
1448 | .change = u32_change, |
1449 | .delete = u32_delete, |
1450 | .walk = u32_walk, |
1451 | .reoffload = u32_reoffload, |
1452 | .dump = u32_dump, |
1453 | .bind_class = u32_bind_class, |
1454 | .owner = THIS_MODULE, |
1455 | }; |
1456 | MODULE_ALIAS_NET_CLS("u32" ); |
1457 | |
1458 | static int __init init_u32(void) |
1459 | { |
1460 | int i, ret; |
1461 | |
1462 | pr_info("u32 classifier\n" ); |
1463 | #ifdef CONFIG_CLS_U32_PERF |
1464 | pr_info(" Performance counters on\n" ); |
1465 | #endif |
1466 | pr_info(" input device check on\n" ); |
1467 | #ifdef CONFIG_NET_CLS_ACT |
1468 | pr_info(" Actions configured\n" ); |
1469 | #endif |
1470 | tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, |
1471 | size: sizeof(struct hlist_head), |
1472 | GFP_KERNEL); |
1473 | if (!tc_u_common_hash) |
1474 | return -ENOMEM; |
1475 | |
1476 | for (i = 0; i < U32_HASH_SIZE; i++) |
1477 | INIT_HLIST_HEAD(&tc_u_common_hash[i]); |
1478 | |
1479 | ret = register_tcf_proto_ops(ops: &cls_u32_ops); |
1480 | if (ret) |
1481 | kvfree(addr: tc_u_common_hash); |
1482 | return ret; |
1483 | } |
1484 | |
1485 | static void __exit exit_u32(void) |
1486 | { |
1487 | unregister_tcf_proto_ops(ops: &cls_u32_ops); |
1488 | kvfree(addr: tc_u_common_hash); |
1489 | } |
1490 | |
1491 | module_init(init_u32) |
1492 | module_exit(exit_u32) |
1493 | MODULE_DESCRIPTION("Universal 32bit based TC Classifier" ); |
1494 | MODULE_LICENSE("GPL" ); |
1495 | |