1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/kernel.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <linux/netfilter.h>
6#include <linux/rhashtable.h>
7#include <linux/ip.h>
8#include <linux/ipv6.h>
9#include <linux/netdevice.h>
10#include <linux/if_ether.h>
11#include <net/gso.h>
12#include <net/ip.h>
13#include <net/ipv6.h>
14#include <net/ip6_route.h>
15#include <net/neighbour.h>
16#include <net/netfilter/nf_flow_table.h>
17#include <net/netfilter/nf_conntrack_acct.h>
18/* For layer 4 checksum field offset. */
19#include <linux/tcp.h>
20#include <linux/udp.h>
21
22static int nf_flow_state_check(struct flow_offload *flow, int proto,
23 struct sk_buff *skb, unsigned int thoff)
24{
25 struct tcphdr *tcph;
26
27 if (proto != IPPROTO_TCP)
28 return 0;
29
30 tcph = (void *)(skb_network_header(skb) + thoff);
31 if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) {
32 flow_offload_teardown(flow);
33 return -1;
34 }
35
36 if ((tcph->fin || tcph->rst) &&
37 !test_bit(NF_FLOW_CLOSING, &flow->flags))
38 set_bit(nr: NF_FLOW_CLOSING, addr: &flow->flags);
39
40 return 0;
41}
42
43static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
44 __be32 addr, __be32 new_addr)
45{
46 struct tcphdr *tcph;
47
48 tcph = (void *)(skb_network_header(skb) + thoff);
49 inet_proto_csum_replace4(sum: &tcph->check, skb, from: addr, to: new_addr, pseudohdr: true);
50}
51
52static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
53 __be32 addr, __be32 new_addr)
54{
55 struct udphdr *udph;
56
57 udph = (void *)(skb_network_header(skb) + thoff);
58 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
59 inet_proto_csum_replace4(sum: &udph->check, skb, from: addr,
60 to: new_addr, pseudohdr: true);
61 if (!udph->check)
62 udph->check = CSUM_MANGLED_0;
63 }
64}
65
66static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
67 unsigned int thoff, __be32 addr,
68 __be32 new_addr)
69{
70 switch (iph->protocol) {
71 case IPPROTO_TCP:
72 nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
73 break;
74 case IPPROTO_UDP:
75 nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
76 break;
77 }
78}
79
80static void nf_flow_snat_ip(const struct flow_offload *flow,
81 struct sk_buff *skb, struct iphdr *iph,
82 unsigned int thoff, enum flow_offload_tuple_dir dir)
83{
84 __be32 addr, new_addr;
85
86 switch (dir) {
87 case FLOW_OFFLOAD_DIR_ORIGINAL:
88 addr = iph->saddr;
89 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
90 iph->saddr = new_addr;
91 break;
92 case FLOW_OFFLOAD_DIR_REPLY:
93 addr = iph->daddr;
94 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
95 iph->daddr = new_addr;
96 break;
97 }
98 csum_replace4(sum: &iph->check, from: addr, to: new_addr);
99
100 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
101}
102
103static void nf_flow_dnat_ip(const struct flow_offload *flow,
104 struct sk_buff *skb, struct iphdr *iph,
105 unsigned int thoff, enum flow_offload_tuple_dir dir)
106{
107 __be32 addr, new_addr;
108
109 switch (dir) {
110 case FLOW_OFFLOAD_DIR_ORIGINAL:
111 addr = iph->daddr;
112 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
113 iph->daddr = new_addr;
114 break;
115 case FLOW_OFFLOAD_DIR_REPLY:
116 addr = iph->saddr;
117 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
118 iph->saddr = new_addr;
119 break;
120 }
121 csum_replace4(sum: &iph->check, from: addr, to: new_addr);
122
123 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
124}
125
126static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
127 unsigned int thoff, enum flow_offload_tuple_dir dir,
128 struct iphdr *iph)
129{
130 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
131 nf_flow_snat_port(flow, skb, thoff, protocol: iph->protocol, dir);
132 nf_flow_snat_ip(flow, skb, iph, thoff, dir);
133 }
134 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
135 nf_flow_dnat_port(flow, skb, thoff, protocol: iph->protocol, dir);
136 nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
137 }
138}
139
140static bool ip_has_options(unsigned int thoff)
141{
142 return thoff != sizeof(struct iphdr);
143}
144
145static void nf_flow_tuple_encap(struct sk_buff *skb,
146 struct flow_offload_tuple *tuple)
147{
148 __be16 inner_proto = skb->protocol;
149 struct vlan_ethhdr *veth;
150 struct pppoe_hdr *phdr;
151 struct iphdr *iph;
152 u16 offset = 0;
153 int i = 0;
154
155 if (skb_vlan_tag_present(skb)) {
156 tuple->encap[i].id = skb_vlan_tag_get(skb);
157 tuple->encap[i].proto = skb->vlan_proto;
158 i++;
159 }
160 switch (skb->protocol) {
161 case htons(ETH_P_8021Q):
162 veth = (struct vlan_ethhdr *)skb_mac_header(skb);
163 tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
164 tuple->encap[i].proto = skb->protocol;
165 inner_proto = veth->h_vlan_encapsulated_proto;
166 offset += VLAN_HLEN;
167 break;
168 case htons(ETH_P_PPP_SES):
169 phdr = (struct pppoe_hdr *)skb_network_header(skb);
170 tuple->encap[i].id = ntohs(phdr->sid);
171 tuple->encap[i].proto = skb->protocol;
172 inner_proto = *((__be16 *)(phdr + 1));
173 offset += PPPOE_SES_HLEN;
174 break;
175 }
176
177 if (inner_proto == htons(ETH_P_IP)) {
178 iph = (struct iphdr *)(skb_network_header(skb) + offset);
179 if (iph->protocol == IPPROTO_IPIP) {
180 tuple->tun.dst_v4.s_addr = iph->daddr;
181 tuple->tun.src_v4.s_addr = iph->saddr;
182 tuple->tun.l3_proto = IPPROTO_IPIP;
183 }
184 }
185}
186
187struct nf_flowtable_ctx {
188 const struct net_device *in;
189 u32 offset;
190 u32 hdrsize;
191};
192
193static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
194 struct flow_offload_tuple *tuple)
195{
196 struct flow_ports *ports;
197 unsigned int thoff;
198 struct iphdr *iph;
199 u8 ipproto;
200
201 if (!pskb_may_pull(skb, len: sizeof(*iph) + ctx->offset))
202 return -1;
203
204 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
205 thoff = (iph->ihl * 4);
206
207 if (ip_is_fragment(iph) ||
208 unlikely(ip_has_options(thoff)))
209 return -1;
210
211 thoff += ctx->offset;
212
213 ipproto = iph->protocol;
214 switch (ipproto) {
215 case IPPROTO_TCP:
216 ctx->hdrsize = sizeof(struct tcphdr);
217 break;
218 case IPPROTO_UDP:
219 ctx->hdrsize = sizeof(struct udphdr);
220 break;
221#ifdef CONFIG_NF_CT_PROTO_GRE
222 case IPPROTO_GRE:
223 ctx->hdrsize = sizeof(struct gre_base_hdr);
224 break;
225#endif
226 default:
227 return -1;
228 }
229
230 if (iph->ttl <= 1)
231 return -1;
232
233 if (!pskb_may_pull(skb, len: thoff + ctx->hdrsize))
234 return -1;
235
236 switch (ipproto) {
237 case IPPROTO_TCP:
238 case IPPROTO_UDP:
239 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
240 tuple->src_port = ports->source;
241 tuple->dst_port = ports->dest;
242 break;
243 case IPPROTO_GRE: {
244 struct gre_base_hdr *greh;
245
246 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
247 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
248 return -1;
249 break;
250 }
251 }
252
253 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
254
255 tuple->src_v4.s_addr = iph->saddr;
256 tuple->dst_v4.s_addr = iph->daddr;
257 tuple->l3proto = AF_INET;
258 tuple->l4proto = ipproto;
259 tuple->iifidx = ctx->in->ifindex;
260 nf_flow_tuple_encap(skb, tuple);
261
262 return 0;
263}
264
265/* Based on ip_exceeds_mtu(). */
266static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
267{
268 if (skb->len <= mtu)
269 return false;
270
271 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
272 return false;
273
274 return true;
275}
276
277static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
278{
279 if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
280 tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
281 return true;
282
283 return dst_check(dst: tuple->dst_cache, cookie: tuple->dst_cookie);
284}
285
286static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
287 const struct nf_hook_state *state,
288 struct dst_entry *dst)
289{
290 skb_orphan(skb);
291 skb_dst_set_noref(skb, dst);
292 dst_output(net: state->net, sk: state->sk, skb);
293 return NF_STOLEN;
294}
295
296static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
297{
298 struct iphdr *iph;
299 u16 size;
300
301 if (!pskb_may_pull(skb, len: sizeof(*iph) + *psize))
302 return false;
303
304 iph = (struct iphdr *)(skb_network_header(skb) + *psize);
305 size = iph->ihl << 2;
306
307 if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
308 return false;
309
310 if (iph->ttl <= 1)
311 return false;
312
313 if (iph->protocol == IPPROTO_IPIP)
314 *psize += size;
315
316 return true;
317}
318
319static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
320{
321 struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
322
323 if (iph->protocol != IPPROTO_IPIP)
324 return;
325
326 skb_pull(skb, len: iph->ihl << 2);
327 skb_reset_network_header(skb);
328}
329
330static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
331 u32 *offset)
332{
333 __be16 inner_proto = skb->protocol;
334 struct vlan_ethhdr *veth;
335 bool ret = false;
336
337 switch (skb->protocol) {
338 case htons(ETH_P_8021Q):
339 if (!pskb_may_pull(skb, len: skb_mac_offset(skb) + sizeof(*veth)))
340 return false;
341
342 veth = (struct vlan_ethhdr *)skb_mac_header(skb);
343 if (veth->h_vlan_encapsulated_proto == proto) {
344 *offset += VLAN_HLEN;
345 inner_proto = proto;
346 ret = true;
347 }
348 break;
349 case htons(ETH_P_PPP_SES):
350 if (nf_flow_pppoe_proto(skb, inner_proto: &inner_proto) &&
351 inner_proto == proto) {
352 *offset += PPPOE_SES_HLEN;
353 ret = true;
354 }
355 break;
356 }
357
358 if (inner_proto == htons(ETH_P_IP))
359 ret = nf_flow_ip4_tunnel_proto(skb, psize: offset);
360
361 return ret;
362}
363
364static void nf_flow_encap_pop(struct sk_buff *skb,
365 struct flow_offload_tuple_rhash *tuplehash)
366{
367 struct vlan_hdr *vlan_hdr;
368 int i;
369
370 for (i = 0; i < tuplehash->tuple.encap_num; i++) {
371 if (skb_vlan_tag_present(skb)) {
372 __vlan_hwaccel_clear_tag(skb);
373 continue;
374 }
375 switch (skb->protocol) {
376 case htons(ETH_P_8021Q):
377 vlan_hdr = (struct vlan_hdr *)skb->data;
378 __skb_pull(skb, VLAN_HLEN);
379 vlan_set_encap_proto(skb, vhdr: vlan_hdr);
380 skb_reset_network_header(skb);
381 break;
382 case htons(ETH_P_PPP_SES):
383 skb->protocol = __nf_flow_pppoe_proto(skb);
384 skb_pull(skb, PPPOE_SES_HLEN);
385 skb_reset_network_header(skb);
386 break;
387 }
388 }
389
390 if (skb->protocol == htons(ETH_P_IP))
391 nf_flow_ip4_tunnel_pop(skb);
392}
393
394struct nf_flow_xmit {
395 const void *dest;
396 const void *source;
397 struct net_device *outdev;
398};
399
400static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
401 struct nf_flow_xmit *xmit)
402{
403 skb->dev = xmit->outdev;
404 dev_hard_header(skb, dev: skb->dev, ntohs(skb->protocol),
405 daddr: xmit->dest, saddr: xmit->source, len: skb->len);
406 dev_queue_xmit(skb);
407
408 return NF_STOLEN;
409}
410
411static struct flow_offload_tuple_rhash *
412nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
413 struct nf_flowtable *flow_table, struct sk_buff *skb)
414{
415 struct flow_offload_tuple tuple = {};
416
417 if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), offset: &ctx->offset))
418 return NULL;
419
420 if (nf_flow_tuple_ip(ctx, skb, tuple: &tuple) < 0)
421 return NULL;
422
423 return flow_offload_lookup(flow_table, tuple: &tuple);
424}
425
426static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
427 struct nf_flowtable *flow_table,
428 struct flow_offload_tuple_rhash *tuplehash,
429 struct sk_buff *skb)
430{
431 enum flow_offload_tuple_dir dir;
432 struct flow_offload *flow;
433 unsigned int thoff, mtu;
434 struct iphdr *iph;
435
436 dir = tuplehash->tuple.dir;
437 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
438
439 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
440 if (flow->tuplehash[!dir].tuple.tun_num)
441 mtu -= sizeof(*iph);
442
443 if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
444 return 0;
445
446 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
447 thoff = (iph->ihl * 4) + ctx->offset;
448 if (nf_flow_state_check(flow, proto: iph->protocol, skb, thoff))
449 return 0;
450
451 if (!nf_flow_dst_check(tuple: &tuplehash->tuple)) {
452 flow_offload_teardown(flow);
453 return 0;
454 }
455
456 if (skb_try_make_writable(skb, write_len: thoff + ctx->hdrsize))
457 return -1;
458
459 flow_offload_refresh(flow_table, flow, force: false);
460
461 nf_flow_encap_pop(skb, tuplehash);
462 thoff -= ctx->offset;
463
464 iph = ip_hdr(skb);
465 nf_flow_nat_ip(flow, skb, thoff, dir, iph);
466
467 ip_decrease_ttl(iph);
468 skb_clear_tstamp(skb);
469
470 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
471 nf_ct_acct_update(ct: flow->ct, dir: tuplehash->tuple.dir, bytes: skb->len);
472
473 return 1;
474}
475
476static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
477{
478 int data_len = skb->len + sizeof(__be16);
479 struct ppp_hdr {
480 struct pppoe_hdr hdr;
481 __be16 proto;
482 } *ph;
483 __be16 proto;
484
485 if (skb_cow_head(skb, PPPOE_SES_HLEN))
486 return -1;
487
488 switch (skb->protocol) {
489 case htons(ETH_P_IP):
490 proto = htons(PPP_IP);
491 break;
492 case htons(ETH_P_IPV6):
493 proto = htons(PPP_IPV6);
494 break;
495 default:
496 return -1;
497 }
498
499 __skb_push(skb, PPPOE_SES_HLEN);
500 skb_reset_network_header(skb);
501
502 ph = (struct ppp_hdr *)(skb->data);
503 ph->hdr.ver = 1;
504 ph->hdr.type = 1;
505 ph->hdr.code = 0;
506 ph->hdr.sid = htons(id);
507 ph->hdr.length = htons(data_len);
508 ph->proto = proto;
509 skb->protocol = htons(ETH_P_PPP_SES);
510
511 return 0;
512}
513
514static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb,
515 struct flow_offload_tuple *tuple,
516 __be32 *ip_daddr)
517{
518 struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
519 struct rtable *rt = dst_rtable(tuple->dst_cache);
520 u8 tos = iph->tos, ttl = iph->ttl;
521 __be16 frag_off = iph->frag_off;
522 u32 headroom = sizeof(*iph);
523 int err;
524
525 err = iptunnel_handle_offloads(skb, gso_type_mask: SKB_GSO_IPXIP4);
526 if (err)
527 return err;
528
529 skb_set_inner_ipproto(skb, IPPROTO_IPIP);
530 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
531 err = skb_cow_head(skb, headroom);
532 if (err)
533 return err;
534
535 skb_scrub_packet(skb, xnet: true);
536 skb_clear_hash_if_not_l4(skb);
537
538 /* Push down and install the IP header. */
539 skb_push(skb, len: sizeof(*iph));
540 skb_reset_network_header(skb);
541
542 iph = ip_hdr(skb);
543 iph->version = 4;
544 iph->ihl = sizeof(*iph) >> 2;
545 iph->frag_off = ip_mtu_locked(dst: &rt->dst) ? 0 : frag_off;
546 iph->protocol = tuple->tun.l3_proto;
547 iph->tos = tos;
548 iph->daddr = tuple->tun.src_v4.s_addr;
549 iph->saddr = tuple->tun.dst_v4.s_addr;
550 iph->ttl = ttl;
551 iph->tot_len = htons(skb->len);
552 __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
553 ip_send_check(ip: iph);
554
555 *ip_daddr = tuple->tun.src_v4.s_addr;
556
557 return 0;
558}
559
560static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
561 struct flow_offload_tuple *tuple,
562 __be32 *ip_daddr)
563{
564 if (tuple->tun_num)
565 return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr);
566
567 return 0;
568}
569
570static int nf_flow_encap_push(struct sk_buff *skb,
571 struct flow_offload_tuple *tuple)
572{
573 int i;
574
575 for (i = 0; i < tuple->encap_num; i++) {
576 switch (tuple->encap[i].proto) {
577 case htons(ETH_P_8021Q):
578 case htons(ETH_P_8021AD):
579 if (skb_vlan_push(skb, vlan_proto: tuple->encap[i].proto,
580 vlan_tci: tuple->encap[i].id) < 0)
581 return -1;
582 break;
583 case htons(ETH_P_PPP_SES):
584 if (nf_flow_pppoe_push(skb, id: tuple->encap[i].id) < 0)
585 return -1;
586 break;
587 }
588 }
589
590 return 0;
591}
592
593unsigned int
594nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
595 const struct nf_hook_state *state)
596{
597 struct flow_offload_tuple_rhash *tuplehash;
598 struct nf_flowtable *flow_table = priv;
599 struct flow_offload_tuple *other_tuple;
600 enum flow_offload_tuple_dir dir;
601 struct nf_flowtable_ctx ctx = {
602 .in = state->in,
603 };
604 struct nf_flow_xmit xmit = {};
605 struct flow_offload *flow;
606 struct neighbour *neigh;
607 struct rtable *rt;
608 __be32 ip_daddr;
609 int ret;
610
611 tuplehash = nf_flow_offload_lookup(ctx: &ctx, flow_table, skb);
612 if (!tuplehash)
613 return NF_ACCEPT;
614
615 ret = nf_flow_offload_forward(ctx: &ctx, flow_table, tuplehash, skb);
616 if (ret < 0)
617 return NF_DROP;
618 else if (ret == 0)
619 return NF_ACCEPT;
620
621 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
622 rt = dst_rtable(tuplehash->tuple.dst_cache);
623 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
624 IPCB(skb)->iif = skb->dev->ifindex;
625 IPCB(skb)->flags = IPSKB_FORWARDED;
626 return nf_flow_xmit_xfrm(skb, state, dst: &rt->dst);
627 }
628
629 dir = tuplehash->tuple.dir;
630 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
631 other_tuple = &flow->tuplehash[!dir].tuple;
632 ip_daddr = other_tuple->src_v4.s_addr;
633
634 if (nf_flow_tunnel_v4_push(net: state->net, skb, tuple: other_tuple, ip_daddr: &ip_daddr) < 0)
635 return NF_DROP;
636
637 if (nf_flow_encap_push(skb, tuple: other_tuple) < 0)
638 return NF_DROP;
639
640 switch (tuplehash->tuple.xmit_type) {
641 case FLOW_OFFLOAD_XMIT_NEIGH:
642 rt = dst_rtable(tuplehash->tuple.dst_cache);
643 xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.ifidx);
644 if (!xmit.outdev) {
645 flow_offload_teardown(flow);
646 return NF_DROP;
647 }
648 neigh = ip_neigh_gw4(dev: rt->dst.dev, daddr: rt_nexthop(rt, daddr: ip_daddr));
649 if (IS_ERR(ptr: neigh)) {
650 flow_offload_teardown(flow);
651 return NF_DROP;
652 }
653 xmit.dest = neigh->ha;
654 skb_dst_set_noref(skb, dst: &rt->dst);
655 break;
656 case FLOW_OFFLOAD_XMIT_DIRECT:
657 xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.out.ifidx);
658 if (!xmit.outdev) {
659 flow_offload_teardown(flow);
660 return NF_DROP;
661 }
662 xmit.dest = tuplehash->tuple.out.h_dest;
663 xmit.source = tuplehash->tuple.out.h_source;
664 break;
665 default:
666 WARN_ON_ONCE(1);
667 return NF_DROP;
668 }
669
670 return nf_flow_queue_xmit(net: state->net, skb, xmit: &xmit);
671}
672EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
673
674static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
675 struct in6_addr *addr,
676 struct in6_addr *new_addr,
677 struct ipv6hdr *ip6h)
678{
679 struct tcphdr *tcph;
680
681 tcph = (void *)(skb_network_header(skb) + thoff);
682 inet_proto_csum_replace16(sum: &tcph->check, skb, from: addr->s6_addr32,
683 to: new_addr->s6_addr32, pseudohdr: true);
684}
685
686static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
687 struct in6_addr *addr,
688 struct in6_addr *new_addr)
689{
690 struct udphdr *udph;
691
692 udph = (void *)(skb_network_header(skb) + thoff);
693 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
694 inet_proto_csum_replace16(sum: &udph->check, skb, from: addr->s6_addr32,
695 to: new_addr->s6_addr32, pseudohdr: true);
696 if (!udph->check)
697 udph->check = CSUM_MANGLED_0;
698 }
699}
700
701static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
702 unsigned int thoff, struct in6_addr *addr,
703 struct in6_addr *new_addr)
704{
705 switch (ip6h->nexthdr) {
706 case IPPROTO_TCP:
707 nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
708 break;
709 case IPPROTO_UDP:
710 nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
711 break;
712 }
713}
714
715static void nf_flow_snat_ipv6(const struct flow_offload *flow,
716 struct sk_buff *skb, struct ipv6hdr *ip6h,
717 unsigned int thoff,
718 enum flow_offload_tuple_dir dir)
719{
720 struct in6_addr addr, new_addr;
721
722 switch (dir) {
723 case FLOW_OFFLOAD_DIR_ORIGINAL:
724 addr = ip6h->saddr;
725 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
726 ip6h->saddr = new_addr;
727 break;
728 case FLOW_OFFLOAD_DIR_REPLY:
729 addr = ip6h->daddr;
730 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
731 ip6h->daddr = new_addr;
732 break;
733 }
734
735 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, addr: &addr, new_addr: &new_addr);
736}
737
738static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
739 struct sk_buff *skb, struct ipv6hdr *ip6h,
740 unsigned int thoff,
741 enum flow_offload_tuple_dir dir)
742{
743 struct in6_addr addr, new_addr;
744
745 switch (dir) {
746 case FLOW_OFFLOAD_DIR_ORIGINAL:
747 addr = ip6h->daddr;
748 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
749 ip6h->daddr = new_addr;
750 break;
751 case FLOW_OFFLOAD_DIR_REPLY:
752 addr = ip6h->saddr;
753 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
754 ip6h->saddr = new_addr;
755 break;
756 }
757
758 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, addr: &addr, new_addr: &new_addr);
759}
760
761static void nf_flow_nat_ipv6(const struct flow_offload *flow,
762 struct sk_buff *skb,
763 enum flow_offload_tuple_dir dir,
764 struct ipv6hdr *ip6h)
765{
766 unsigned int thoff = sizeof(*ip6h);
767
768 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
769 nf_flow_snat_port(flow, skb, thoff, protocol: ip6h->nexthdr, dir);
770 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
771 }
772 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
773 nf_flow_dnat_port(flow, skb, thoff, protocol: ip6h->nexthdr, dir);
774 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
775 }
776}
777
778static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
779 struct flow_offload_tuple *tuple)
780{
781 struct flow_ports *ports;
782 struct ipv6hdr *ip6h;
783 unsigned int thoff;
784 u8 nexthdr;
785
786 thoff = sizeof(*ip6h) + ctx->offset;
787 if (!pskb_may_pull(skb, len: thoff))
788 return -1;
789
790 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
791
792 nexthdr = ip6h->nexthdr;
793 switch (nexthdr) {
794 case IPPROTO_TCP:
795 ctx->hdrsize = sizeof(struct tcphdr);
796 break;
797 case IPPROTO_UDP:
798 ctx->hdrsize = sizeof(struct udphdr);
799 break;
800#ifdef CONFIG_NF_CT_PROTO_GRE
801 case IPPROTO_GRE:
802 ctx->hdrsize = sizeof(struct gre_base_hdr);
803 break;
804#endif
805 default:
806 return -1;
807 }
808
809 if (ip6h->hop_limit <= 1)
810 return -1;
811
812 if (!pskb_may_pull(skb, len: thoff + ctx->hdrsize))
813 return -1;
814
815 switch (nexthdr) {
816 case IPPROTO_TCP:
817 case IPPROTO_UDP:
818 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
819 tuple->src_port = ports->source;
820 tuple->dst_port = ports->dest;
821 break;
822 case IPPROTO_GRE: {
823 struct gre_base_hdr *greh;
824
825 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
826 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
827 return -1;
828 break;
829 }
830 }
831
832 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
833
834 tuple->src_v6 = ip6h->saddr;
835 tuple->dst_v6 = ip6h->daddr;
836 tuple->l3proto = AF_INET6;
837 tuple->l4proto = nexthdr;
838 tuple->iifidx = ctx->in->ifindex;
839 nf_flow_tuple_encap(skb, tuple);
840
841 return 0;
842}
843
844static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
845 struct nf_flowtable *flow_table,
846 struct flow_offload_tuple_rhash *tuplehash,
847 struct sk_buff *skb)
848{
849 enum flow_offload_tuple_dir dir;
850 struct flow_offload *flow;
851 unsigned int thoff, mtu;
852 struct ipv6hdr *ip6h;
853
854 dir = tuplehash->tuple.dir;
855 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
856
857 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
858 if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
859 return 0;
860
861 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
862 thoff = sizeof(*ip6h) + ctx->offset;
863 if (nf_flow_state_check(flow, proto: ip6h->nexthdr, skb, thoff))
864 return 0;
865
866 if (!nf_flow_dst_check(tuple: &tuplehash->tuple)) {
867 flow_offload_teardown(flow);
868 return 0;
869 }
870
871 if (skb_try_make_writable(skb, write_len: thoff + ctx->hdrsize))
872 return -1;
873
874 flow_offload_refresh(flow_table, flow, force: false);
875
876 nf_flow_encap_pop(skb, tuplehash);
877
878 ip6h = ipv6_hdr(skb);
879 nf_flow_nat_ipv6(flow, skb, dir, ip6h);
880
881 ip6h->hop_limit--;
882 skb_clear_tstamp(skb);
883
884 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
885 nf_ct_acct_update(ct: flow->ct, dir: tuplehash->tuple.dir, bytes: skb->len);
886
887 return 1;
888}
889
890static struct flow_offload_tuple_rhash *
891nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
892 struct nf_flowtable *flow_table,
893 struct sk_buff *skb)
894{
895 struct flow_offload_tuple tuple = {};
896
897 if (skb->protocol != htons(ETH_P_IPV6) &&
898 !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), offset: &ctx->offset))
899 return NULL;
900
901 if (nf_flow_tuple_ipv6(ctx, skb, tuple: &tuple) < 0)
902 return NULL;
903
904 return flow_offload_lookup(flow_table, tuple: &tuple);
905}
906
907unsigned int
908nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
909 const struct nf_hook_state *state)
910{
911 struct flow_offload_tuple_rhash *tuplehash;
912 struct nf_flowtable *flow_table = priv;
913 struct flow_offload_tuple *other_tuple;
914 enum flow_offload_tuple_dir dir;
915 struct nf_flowtable_ctx ctx = {
916 .in = state->in,
917 };
918 struct nf_flow_xmit xmit = {};
919 struct in6_addr *ip6_daddr;
920 struct flow_offload *flow;
921 struct neighbour *neigh;
922 struct rt6_info *rt;
923 int ret;
924
925 tuplehash = nf_flow_offload_ipv6_lookup(ctx: &ctx, flow_table, skb);
926 if (tuplehash == NULL)
927 return NF_ACCEPT;
928
929 ret = nf_flow_offload_ipv6_forward(ctx: &ctx, flow_table, tuplehash, skb);
930 if (ret < 0)
931 return NF_DROP;
932 else if (ret == 0)
933 return NF_ACCEPT;
934
935 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
936 rt = dst_rt6_info(tuplehash->tuple.dst_cache);
937 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
938 IP6CB(skb)->iif = skb->dev->ifindex;
939 IP6CB(skb)->flags = IP6SKB_FORWARDED;
940 return nf_flow_xmit_xfrm(skb, state, dst: &rt->dst);
941 }
942
943 dir = tuplehash->tuple.dir;
944 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
945 other_tuple = &flow->tuplehash[!dir].tuple;
946 ip6_daddr = &other_tuple->src_v6;
947
948 if (nf_flow_encap_push(skb, tuple: other_tuple) < 0)
949 return NF_DROP;
950
951 switch (tuplehash->tuple.xmit_type) {
952 case FLOW_OFFLOAD_XMIT_NEIGH:
953 rt = dst_rt6_info(tuplehash->tuple.dst_cache);
954 xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.ifidx);
955 if (!xmit.outdev) {
956 flow_offload_teardown(flow);
957 return NF_DROP;
958 }
959 neigh = ip_neigh_gw6(dev: rt->dst.dev, addr: rt6_nexthop(rt, daddr: ip6_daddr));
960 if (IS_ERR(ptr: neigh)) {
961 flow_offload_teardown(flow);
962 return NF_DROP;
963 }
964 xmit.dest = neigh->ha;
965 skb_dst_set_noref(skb, dst: &rt->dst);
966 break;
967 case FLOW_OFFLOAD_XMIT_DIRECT:
968 xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.out.ifidx);
969 if (!xmit.outdev) {
970 flow_offload_teardown(flow);
971 return NF_DROP;
972 }
973 xmit.dest = tuplehash->tuple.out.h_dest;
974 xmit.source = tuplehash->tuple.out.h_source;
975 break;
976 default:
977 WARN_ON_ONCE(1);
978 return NF_DROP;
979 }
980
981 return nf_flow_queue_xmit(net: state->net, skb, xmit: &xmit);
982}
983EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
984

source code of linux/net/netfilter/nf_flow_table_ip.c