1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/kernel.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <linux/netfilter.h>
6#include <linux/rhashtable.h>
7#include <linux/netdevice.h>
8#include <net/ip.h>
9#include <net/ip6_route.h>
10#include <net/netfilter/nf_tables.h>
11#include <net/netfilter/nf_flow_table.h>
12#include <net/netfilter/nf_conntrack.h>
13#include <net/netfilter/nf_conntrack_core.h>
14#include <net/netfilter/nf_conntrack_l4proto.h>
15#include <net/netfilter/nf_conntrack_tuple.h>
16
17static DEFINE_MUTEX(flowtable_lock);
18static LIST_HEAD(flowtables);
19
20static void
21flow_offload_fill_dir(struct flow_offload *flow,
22 enum flow_offload_tuple_dir dir)
23{
24 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
25 struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
26
27 ft->dir = dir;
28
29 switch (ctt->src.l3num) {
30 case NFPROTO_IPV4:
31 ft->src_v4 = ctt->src.u3.in;
32 ft->dst_v4 = ctt->dst.u3.in;
33 break;
34 case NFPROTO_IPV6:
35 ft->src_v6 = ctt->src.u3.in6;
36 ft->dst_v6 = ctt->dst.u3.in6;
37 break;
38 }
39
40 ft->l3proto = ctt->src.l3num;
41 ft->l4proto = ctt->dst.protonum;
42
43 switch (ctt->dst.protonum) {
44 case IPPROTO_TCP:
45 case IPPROTO_UDP:
46 ft->src_port = ctt->src.u.tcp.port;
47 ft->dst_port = ctt->dst.u.tcp.port;
48 break;
49 }
50}
51
52struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
53{
54 struct flow_offload *flow;
55
56 if (unlikely(nf_ct_is_dying(ct)))
57 return NULL;
58
59 flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
60 if (!flow)
61 return NULL;
62
63 refcount_inc(r: &ct->ct_general.use);
64 flow->ct = ct;
65
66 flow_offload_fill_dir(flow, dir: FLOW_OFFLOAD_DIR_ORIGINAL);
67 flow_offload_fill_dir(flow, dir: FLOW_OFFLOAD_DIR_REPLY);
68
69 if (ct->status & IPS_SRC_NAT)
70 __set_bit(NF_FLOW_SNAT, &flow->flags);
71 if (ct->status & IPS_DST_NAT)
72 __set_bit(NF_FLOW_DNAT, &flow->flags);
73
74 return flow;
75}
76EXPORT_SYMBOL_GPL(flow_offload_alloc);
77
78static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
79{
80 if (flow_tuple->l3proto == NFPROTO_IPV6)
81 return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
82
83 return 0;
84}
85
86static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
87 enum flow_offload_tuple_dir dir)
88{
89 struct dst_entry *dst = route->tuple[dir].dst;
90
91 route->tuple[dir].dst = NULL;
92
93 return dst;
94}
95
96static int flow_offload_fill_route(struct flow_offload *flow,
97 struct nf_flow_route *route,
98 enum flow_offload_tuple_dir dir)
99{
100 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
101 struct dst_entry *dst = nft_route_dst_fetch(route, dir);
102 int i, j = 0;
103
104 switch (flow_tuple->l3proto) {
105 case NFPROTO_IPV4:
106 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, forwarding: true);
107 break;
108 case NFPROTO_IPV6:
109 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, forwarding: true);
110 break;
111 }
112
113 flow_tuple->iifidx = route->tuple[dir].in.ifindex;
114 for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
115 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
116 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
117 if (route->tuple[dir].in.ingress_vlans & BIT(i))
118 flow_tuple->in_vlan_ingress |= BIT(j);
119 j++;
120 }
121
122 flow_tuple->tun = route->tuple[dir].in.tun;
123 flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
124 flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
125
126 switch (route->tuple[dir].xmit_type) {
127 case FLOW_OFFLOAD_XMIT_DIRECT:
128 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
129 ETH_ALEN);
130 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
131 ETH_ALEN);
132 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
133 dst_release(dst);
134 break;
135 case FLOW_OFFLOAD_XMIT_XFRM:
136 case FLOW_OFFLOAD_XMIT_NEIGH:
137 flow_tuple->ifidx = route->tuple[dir].out.ifindex;
138 flow_tuple->dst_cache = dst;
139 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
140 break;
141 default:
142 WARN_ON_ONCE(1);
143 break;
144 }
145 flow_tuple->xmit_type = route->tuple[dir].xmit_type;
146
147 return 0;
148}
149
150static void nft_flow_dst_release(struct flow_offload *flow,
151 enum flow_offload_tuple_dir dir)
152{
153 if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
154 flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
155 dst_release(dst: flow->tuplehash[dir].tuple.dst_cache);
156}
157
158void flow_offload_route_init(struct flow_offload *flow,
159 struct nf_flow_route *route)
160{
161 flow_offload_fill_route(flow, route, dir: FLOW_OFFLOAD_DIR_ORIGINAL);
162 flow_offload_fill_route(flow, route, dir: FLOW_OFFLOAD_DIR_REPLY);
163 flow->type = NF_FLOW_OFFLOAD_ROUTE;
164}
165EXPORT_SYMBOL_GPL(flow_offload_route_init);
166
167static inline bool nf_flow_has_expired(const struct flow_offload *flow)
168{
169 return nf_flow_timeout_delta(timeout: flow->timeout) <= 0;
170}
171
172static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state)
173{
174 struct ip_ct_tcp *tcp = &ct->proto.tcp;
175
176 spin_lock_bh(lock: &ct->lock);
177 if (tcp->state != tcp_state)
178 tcp->state = tcp_state;
179
180 /* syn packet triggers the TCP reopen case from conntrack. */
181 if (tcp->state == TCP_CONNTRACK_CLOSE)
182 ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
183
184 /* Conntrack state is outdated due to offload bypass.
185 * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks
186 * TCP reset validation will fail.
187 */
188 tcp->seen[0].td_maxwin = 0;
189 tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET;
190 tcp->seen[1].td_maxwin = 0;
191 tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET;
192 spin_unlock_bh(lock: &ct->lock);
193}
194
195static void flow_offload_fixup_ct(struct flow_offload *flow)
196{
197 struct nf_conn *ct = flow->ct;
198 struct net *net = nf_ct_net(ct);
199 int l4num = nf_ct_protonum(ct);
200 bool expired, closing = false;
201 u32 offload_timeout = 0;
202 s32 timeout;
203
204 if (l4num == IPPROTO_TCP) {
205 const struct nf_tcp_net *tn = nf_tcp_pernet(net);
206 u8 tcp_state;
207
208 /* Enter CLOSE state if fin/rst packet has been seen, this
209 * allows TCP reopen from conntrack. Otherwise, pick up from
210 * the last seen TCP state.
211 */
212 closing = test_bit(NF_FLOW_CLOSING, &flow->flags);
213 if (closing) {
214 flow_offload_fixup_tcp(ct, tcp_state: TCP_CONNTRACK_CLOSE);
215 timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]);
216 expired = false;
217 } else {
218 tcp_state = READ_ONCE(ct->proto.tcp.state);
219 flow_offload_fixup_tcp(ct, tcp_state);
220 timeout = READ_ONCE(tn->timeouts[tcp_state]);
221 expired = nf_flow_has_expired(flow);
222 }
223 offload_timeout = READ_ONCE(tn->offload_timeout);
224
225 } else if (l4num == IPPROTO_UDP) {
226 const struct nf_udp_net *tn = nf_udp_pernet(net);
227 enum udp_conntrack state =
228 test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
229 UDP_CT_REPLIED : UDP_CT_UNREPLIED;
230
231 timeout = READ_ONCE(tn->timeouts[state]);
232 expired = nf_flow_has_expired(flow);
233 offload_timeout = READ_ONCE(tn->offload_timeout);
234 } else {
235 return;
236 }
237
238 if (expired)
239 timeout -= offload_timeout;
240
241 if (timeout < 0)
242 timeout = 0;
243
244 if (closing ||
245 nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
246 nf_ct_refresh(ct, extra_jiffies: timeout);
247}
248
249static void flow_offload_route_release(struct flow_offload *flow)
250{
251 nft_flow_dst_release(flow, dir: FLOW_OFFLOAD_DIR_ORIGINAL);
252 nft_flow_dst_release(flow, dir: FLOW_OFFLOAD_DIR_REPLY);
253}
254
255void flow_offload_free(struct flow_offload *flow)
256{
257 switch (flow->type) {
258 case NF_FLOW_OFFLOAD_ROUTE:
259 flow_offload_route_release(flow);
260 break;
261 default:
262 break;
263 }
264 nf_ct_put(ct: flow->ct);
265 kfree_rcu(flow, rcu_head);
266}
267EXPORT_SYMBOL_GPL(flow_offload_free);
268
269static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
270{
271 const struct flow_offload_tuple *tuple = data;
272
273 return jhash(key: tuple, offsetof(struct flow_offload_tuple, __hash), initval: seed);
274}
275
276static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
277{
278 const struct flow_offload_tuple_rhash *tuplehash = data;
279
280 return jhash(key: &tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), initval: seed);
281}
282
283static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
284 const void *ptr)
285{
286 const struct flow_offload_tuple *tuple = arg->key;
287 const struct flow_offload_tuple_rhash *x = ptr;
288
289 if (memcmp(p: &x->tuple, q: tuple, offsetof(struct flow_offload_tuple, __hash)))
290 return 1;
291
292 return 0;
293}
294
295static const struct rhashtable_params nf_flow_offload_rhash_params = {
296 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
297 .hashfn = flow_offload_hash,
298 .obj_hashfn = flow_offload_hash_obj,
299 .obj_cmpfn = flow_offload_hash_cmp,
300 .automatic_shrinking = true,
301};
302
303unsigned long flow_offload_get_timeout(struct flow_offload *flow)
304{
305 unsigned long timeout = NF_FLOW_TIMEOUT;
306 struct net *net = nf_ct_net(ct: flow->ct);
307 int l4num = nf_ct_protonum(ct: flow->ct);
308
309 if (l4num == IPPROTO_TCP) {
310 struct nf_tcp_net *tn = nf_tcp_pernet(net);
311
312 timeout = tn->offload_timeout;
313 } else if (l4num == IPPROTO_UDP) {
314 struct nf_udp_net *tn = nf_udp_pernet(net);
315
316 timeout = tn->offload_timeout;
317 }
318
319 return timeout;
320}
321
322int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
323{
324 int err;
325
326 flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
327
328 err = rhashtable_insert_fast(ht: &flow_table->rhashtable,
329 obj: &flow->tuplehash[0].node,
330 params: nf_flow_offload_rhash_params);
331 if (err < 0)
332 return err;
333
334 err = rhashtable_insert_fast(ht: &flow_table->rhashtable,
335 obj: &flow->tuplehash[1].node,
336 params: nf_flow_offload_rhash_params);
337 if (err < 0) {
338 rhashtable_remove_fast(ht: &flow_table->rhashtable,
339 obj: &flow->tuplehash[0].node,
340 params: nf_flow_offload_rhash_params);
341 return err;
342 }
343
344 nf_ct_refresh(ct: flow->ct, NF_CT_DAY);
345
346 if (nf_flowtable_hw_offload(flowtable: flow_table)) {
347 __set_bit(NF_FLOW_HW, &flow->flags);
348 nf_flow_offload_add(flowtable: flow_table, flow);
349 }
350
351 return 0;
352}
353EXPORT_SYMBOL_GPL(flow_offload_add);
354
355void flow_offload_refresh(struct nf_flowtable *flow_table,
356 struct flow_offload *flow, bool force)
357{
358 u32 timeout;
359
360 timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
361 if (force || timeout - READ_ONCE(flow->timeout) > HZ)
362 WRITE_ONCE(flow->timeout, timeout);
363 else
364 return;
365
366 if (likely(!nf_flowtable_hw_offload(flow_table)) ||
367 test_bit(NF_FLOW_CLOSING, &flow->flags))
368 return;
369
370 nf_flow_offload_add(flowtable: flow_table, flow);
371}
372EXPORT_SYMBOL_GPL(flow_offload_refresh);
373
374static void flow_offload_del(struct nf_flowtable *flow_table,
375 struct flow_offload *flow)
376{
377 rhashtable_remove_fast(ht: &flow_table->rhashtable,
378 obj: &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
379 params: nf_flow_offload_rhash_params);
380 rhashtable_remove_fast(ht: &flow_table->rhashtable,
381 obj: &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
382 params: nf_flow_offload_rhash_params);
383 flow_offload_free(flow);
384}
385
386void flow_offload_teardown(struct flow_offload *flow)
387{
388 clear_bit(nr: IPS_OFFLOAD_BIT, addr: &flow->ct->status);
389 if (!test_and_set_bit(nr: NF_FLOW_TEARDOWN, addr: &flow->flags))
390 flow_offload_fixup_ct(flow);
391}
392EXPORT_SYMBOL_GPL(flow_offload_teardown);
393
394struct flow_offload_tuple_rhash *
395flow_offload_lookup(struct nf_flowtable *flow_table,
396 struct flow_offload_tuple *tuple)
397{
398 struct flow_offload_tuple_rhash *tuplehash;
399 struct flow_offload *flow;
400 int dir;
401
402 tuplehash = rhashtable_lookup(ht: &flow_table->rhashtable, key: tuple,
403 params: nf_flow_offload_rhash_params);
404 if (!tuplehash)
405 return NULL;
406
407 dir = tuplehash->tuple.dir;
408 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
409 if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
410 return NULL;
411
412 if (unlikely(nf_ct_is_dying(flow->ct)))
413 return NULL;
414
415 return tuplehash;
416}
417EXPORT_SYMBOL_GPL(flow_offload_lookup);
418
419static int
420nf_flow_table_iterate(struct nf_flowtable *flow_table,
421 void (*iter)(struct nf_flowtable *flowtable,
422 struct flow_offload *flow, void *data),
423 void *data)
424{
425 struct flow_offload_tuple_rhash *tuplehash;
426 struct rhashtable_iter hti;
427 struct flow_offload *flow;
428 int err = 0;
429
430 rhashtable_walk_enter(ht: &flow_table->rhashtable, iter: &hti);
431 rhashtable_walk_start(iter: &hti);
432
433 while ((tuplehash = rhashtable_walk_next(iter: &hti))) {
434 if (IS_ERR(ptr: tuplehash)) {
435 if (PTR_ERR(ptr: tuplehash) != -EAGAIN) {
436 err = PTR_ERR(ptr: tuplehash);
437 break;
438 }
439 continue;
440 }
441 if (tuplehash->tuple.dir)
442 continue;
443
444 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
445
446 iter(flow_table, flow, data);
447 }
448 rhashtable_walk_stop(iter: &hti);
449 rhashtable_walk_exit(iter: &hti);
450
451 return err;
452}
453
454static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
455 const struct flow_offload *flow)
456{
457 return flow_table->type->gc && flow_table->type->gc(flow);
458}
459
460/**
461 * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry
462 * @ct: Flowtable offloaded tcp ct
463 *
464 * Return: number of seconds when ct entry should expire.
465 */
466static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct)
467{
468 u8 state = READ_ONCE(ct->proto.tcp.state);
469
470 switch (state) {
471 case TCP_CONNTRACK_SYN_SENT:
472 case TCP_CONNTRACK_SYN_RECV:
473 return 0;
474 case TCP_CONNTRACK_ESTABLISHED:
475 return NF_CT_DAY;
476 case TCP_CONNTRACK_FIN_WAIT:
477 case TCP_CONNTRACK_CLOSE_WAIT:
478 case TCP_CONNTRACK_LAST_ACK:
479 case TCP_CONNTRACK_TIME_WAIT:
480 return 5 * 60 * HZ;
481 case TCP_CONNTRACK_CLOSE:
482 return 0;
483 }
484
485 return 0;
486}
487
488/**
489 * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry
490 * @ct: Flowtable offloaded ct
491 *
492 * Datapath lookups in the conntrack table will evict nf_conn entries
493 * if they have expired.
494 *
495 * Once nf_conn entries have been offloaded, nf_conntrack might not see any
496 * packets anymore. Thus ct->timeout is no longer refreshed and ct can
497 * be evicted.
498 *
499 * To avoid the need for an additional check on the offload bit for every
500 * packet processed via nf_conntrack_in(), set an arbitrary timeout large
501 * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT
502 * from the packet path via nf_ct_is_expired().
503 */
504static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
505{
506 static const u32 min_timeout = 5 * 60 * HZ;
507 u32 expires = nf_ct_expires(ct);
508
509 /* normal case: large enough timeout, nothing to do. */
510 if (likely(expires >= min_timeout))
511 return;
512
513 /* must check offload bit after this, we do not hold any locks.
514 * flowtable and ct entries could have been removed on another CPU.
515 */
516 if (!refcount_inc_not_zero(r: &ct->ct_general.use))
517 return;
518
519 /* load ct->status after refcount increase */
520 smp_acquire__after_ctrl_dep();
521
522 if (nf_ct_is_confirmed(ct) &&
523 test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
524 u8 l4proto = nf_ct_protonum(ct);
525 u32 new_timeout = true;
526
527 switch (l4proto) {
528 case IPPROTO_UDP:
529 new_timeout = NF_CT_DAY;
530 break;
531 case IPPROTO_TCP:
532 new_timeout = nf_flow_table_tcp_timeout(ct);
533 break;
534 default:
535 WARN_ON_ONCE(1);
536 break;
537 }
538
539 /* Update to ct->timeout from nf_conntrack happens
540 * without holding ct->lock.
541 *
542 * Use cmpxchg to ensure timeout extension doesn't
543 * happen when we race with conntrack datapath.
544 *
545 * The inverse -- datapath updating ->timeout right
546 * after this -- is fine, datapath is authoritative.
547 */
548 if (new_timeout) {
549 new_timeout += nfct_time_stamp;
550 cmpxchg(&ct->timeout, expires, new_timeout);
551 }
552 }
553
554 nf_ct_put(ct);
555}
556
557static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
558 struct flow_offload *flow, void *data)
559{
560 bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags);
561
562 if (nf_flow_has_expired(flow) ||
563 nf_ct_is_dying(ct: flow->ct) ||
564 nf_flow_custom_gc(flow_table, flow)) {
565 flow_offload_teardown(flow);
566 teardown = true;
567 } else if (!teardown) {
568 nf_flow_table_extend_ct_timeout(ct: flow->ct);
569 }
570
571 if (teardown) {
572 if (test_bit(NF_FLOW_HW, &flow->flags)) {
573 if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
574 nf_flow_offload_del(flowtable: flow_table, flow);
575 else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
576 flow_offload_del(flow_table, flow);
577 } else {
578 flow_offload_del(flow_table, flow);
579 }
580 } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) &&
581 test_bit(NF_FLOW_HW, &flow->flags) &&
582 !test_bit(NF_FLOW_HW_DYING, &flow->flags)) {
583 nf_flow_offload_del(flowtable: flow_table, flow);
584 } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
585 nf_flow_offload_stats(flowtable: flow_table, flow);
586 }
587}
588
589void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
590{
591 nf_flow_table_iterate(flow_table, iter: nf_flow_offload_gc_step, NULL);
592}
593
594static void nf_flow_offload_work_gc(struct work_struct *work)
595{
596 struct nf_flowtable *flow_table;
597
598 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
599 nf_flow_table_gc_run(flow_table);
600 queue_delayed_work(wq: system_power_efficient_wq, dwork: &flow_table->gc_work, HZ);
601}
602
603static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
604 __be16 port, __be16 new_port)
605{
606 struct tcphdr *tcph;
607
608 tcph = (void *)(skb_network_header(skb) + thoff);
609 inet_proto_csum_replace2(sum: &tcph->check, skb, from: port, to: new_port, pseudohdr: false);
610}
611
612static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
613 __be16 port, __be16 new_port)
614{
615 struct udphdr *udph;
616
617 udph = (void *)(skb_network_header(skb) + thoff);
618 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
619 inet_proto_csum_replace2(sum: &udph->check, skb, from: port,
620 to: new_port, pseudohdr: false);
621 if (!udph->check)
622 udph->check = CSUM_MANGLED_0;
623 }
624}
625
626static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
627 u8 protocol, __be16 port, __be16 new_port)
628{
629 switch (protocol) {
630 case IPPROTO_TCP:
631 nf_flow_nat_port_tcp(skb, thoff, port, new_port);
632 break;
633 case IPPROTO_UDP:
634 nf_flow_nat_port_udp(skb, thoff, port, new_port);
635 break;
636 }
637}
638
639void nf_flow_snat_port(const struct flow_offload *flow,
640 struct sk_buff *skb, unsigned int thoff,
641 u8 protocol, enum flow_offload_tuple_dir dir)
642{
643 struct flow_ports *hdr;
644 __be16 port, new_port;
645
646 hdr = (void *)(skb_network_header(skb) + thoff);
647
648 switch (dir) {
649 case FLOW_OFFLOAD_DIR_ORIGINAL:
650 port = hdr->source;
651 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
652 hdr->source = new_port;
653 break;
654 case FLOW_OFFLOAD_DIR_REPLY:
655 port = hdr->dest;
656 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
657 hdr->dest = new_port;
658 break;
659 }
660
661 nf_flow_nat_port(skb, thoff, protocol, port, new_port);
662}
663EXPORT_SYMBOL_GPL(nf_flow_snat_port);
664
665void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
666 unsigned int thoff, u8 protocol,
667 enum flow_offload_tuple_dir dir)
668{
669 struct flow_ports *hdr;
670 __be16 port, new_port;
671
672 hdr = (void *)(skb_network_header(skb) + thoff);
673
674 switch (dir) {
675 case FLOW_OFFLOAD_DIR_ORIGINAL:
676 port = hdr->dest;
677 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
678 hdr->dest = new_port;
679 break;
680 case FLOW_OFFLOAD_DIR_REPLY:
681 port = hdr->source;
682 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
683 hdr->source = new_port;
684 break;
685 }
686
687 nf_flow_nat_port(skb, thoff, protocol, port, new_port);
688}
689EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
690
691int nf_flow_table_init(struct nf_flowtable *flowtable)
692{
693 int err;
694
695 INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
696 flow_block_init(flow_block: &flowtable->flow_block);
697 init_rwsem(&flowtable->flow_block_lock);
698
699 err = rhashtable_init(&flowtable->rhashtable,
700 &nf_flow_offload_rhash_params);
701 if (err < 0)
702 return err;
703
704 queue_delayed_work(wq: system_power_efficient_wq,
705 dwork: &flowtable->gc_work, HZ);
706
707 mutex_lock(&flowtable_lock);
708 list_add(new: &flowtable->list, head: &flowtables);
709 mutex_unlock(lock: &flowtable_lock);
710
711 return 0;
712}
713EXPORT_SYMBOL_GPL(nf_flow_table_init);
714
715static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
716 struct flow_offload *flow, void *data)
717{
718 struct net_device *dev = data;
719
720 if (!dev) {
721 flow_offload_teardown(flow);
722 return;
723 }
724
725 if (net_eq(net1: nf_ct_net(ct: flow->ct), net2: dev_net(dev)) &&
726 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
727 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
728 flow_offload_teardown(flow);
729}
730
731void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
732 struct net_device *dev)
733{
734 nf_flow_table_iterate(flow_table: flowtable, iter: nf_flow_table_do_cleanup, data: dev);
735 flush_delayed_work(dwork: &flowtable->gc_work);
736 nf_flow_table_offload_flush(flowtable);
737}
738
739void nf_flow_table_cleanup(struct net_device *dev)
740{
741 struct nf_flowtable *flowtable;
742
743 mutex_lock(&flowtable_lock);
744 list_for_each_entry(flowtable, &flowtables, list)
745 nf_flow_table_gc_cleanup(flowtable, dev);
746 mutex_unlock(lock: &flowtable_lock);
747}
748EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
749
750void nf_flow_table_free(struct nf_flowtable *flow_table)
751{
752 mutex_lock(&flowtable_lock);
753 list_del(entry: &flow_table->list);
754 mutex_unlock(lock: &flowtable_lock);
755
756 cancel_delayed_work_sync(dwork: &flow_table->gc_work);
757 nf_flow_table_offload_flush(flowtable: flow_table);
758 /* ... no more pending work after this stage ... */
759 nf_flow_table_iterate(flow_table, iter: nf_flow_table_do_cleanup, NULL);
760 nf_flow_table_gc_run(flow_table);
761 nf_flow_table_offload_flush_cleanup(flowtable: flow_table);
762 rhashtable_destroy(ht: &flow_table->rhashtable);
763}
764EXPORT_SYMBOL_GPL(nf_flow_table_free);
765
766static int nf_flow_table_init_net(struct net *net)
767{
768 net->ft.stat = alloc_percpu(struct nf_flow_table_stat);
769 return net->ft.stat ? 0 : -ENOMEM;
770}
771
772static void nf_flow_table_fini_net(struct net *net)
773{
774 free_percpu(pdata: net->ft.stat);
775}
776
777static int nf_flow_table_pernet_init(struct net *net)
778{
779 int ret;
780
781 ret = nf_flow_table_init_net(net);
782 if (ret < 0)
783 return ret;
784
785 ret = nf_flow_table_init_proc(net);
786 if (ret < 0)
787 goto out_proc;
788
789 return 0;
790
791out_proc:
792 nf_flow_table_fini_net(net);
793 return ret;
794}
795
796static void nf_flow_table_pernet_exit(struct list_head *net_exit_list)
797{
798 struct net *net;
799
800 list_for_each_entry(net, net_exit_list, exit_list) {
801 nf_flow_table_fini_proc(net);
802 nf_flow_table_fini_net(net);
803 }
804}
805
806static struct pernet_operations nf_flow_table_net_ops = {
807 .init = nf_flow_table_pernet_init,
808 .exit_batch = nf_flow_table_pernet_exit,
809};
810
811static int __init nf_flow_table_module_init(void)
812{
813 int ret;
814
815 ret = register_pernet_subsys(&nf_flow_table_net_ops);
816 if (ret < 0)
817 return ret;
818
819 ret = nf_flow_table_offload_init();
820 if (ret)
821 goto out_offload;
822
823 ret = nf_flow_register_bpf();
824 if (ret)
825 goto out_bpf;
826
827 return 0;
828
829out_bpf:
830 nf_flow_table_offload_exit();
831out_offload:
832 unregister_pernet_subsys(&nf_flow_table_net_ops);
833 return ret;
834}
835
836static void __exit nf_flow_table_module_exit(void)
837{
838 nf_flow_table_offload_exit();
839 unregister_pernet_subsys(&nf_flow_table_net_ops);
840}
841
842module_init(nf_flow_table_module_init);
843module_exit(nf_flow_table_module_exit);
844
845MODULE_LICENSE("GPL");
846MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
847MODULE_DESCRIPTION("Netfilter flow table module");
848

source code of linux/net/netfilter/nf_flow_table_core.c