| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | #include <linux/kernel.h> |
| 3 | #include <linux/init.h> |
| 4 | #include <linux/module.h> |
| 5 | #include <linux/netfilter.h> |
| 6 | #include <linux/rhashtable.h> |
| 7 | #include <linux/ip.h> |
| 8 | #include <linux/ipv6.h> |
| 9 | #include <linux/netdevice.h> |
| 10 | #include <linux/if_ether.h> |
| 11 | #include <net/gso.h> |
| 12 | #include <net/ip.h> |
| 13 | #include <net/ipv6.h> |
| 14 | #include <net/ip6_route.h> |
| 15 | #include <net/neighbour.h> |
| 16 | #include <net/netfilter/nf_flow_table.h> |
| 17 | #include <net/netfilter/nf_conntrack_acct.h> |
| 18 | /* For layer 4 checksum field offset. */ |
| 19 | #include <linux/tcp.h> |
| 20 | #include <linux/udp.h> |
| 21 | |
| 22 | static int nf_flow_state_check(struct flow_offload *flow, int proto, |
| 23 | struct sk_buff *skb, unsigned int thoff) |
| 24 | { |
| 25 | struct tcphdr *tcph; |
| 26 | |
| 27 | if (proto != IPPROTO_TCP) |
| 28 | return 0; |
| 29 | |
| 30 | tcph = (void *)(skb_network_header(skb) + thoff); |
| 31 | if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) { |
| 32 | flow_offload_teardown(flow); |
| 33 | return -1; |
| 34 | } |
| 35 | |
| 36 | if ((tcph->fin || tcph->rst) && |
| 37 | !test_bit(NF_FLOW_CLOSING, &flow->flags)) |
| 38 | set_bit(nr: NF_FLOW_CLOSING, addr: &flow->flags); |
| 39 | |
| 40 | return 0; |
| 41 | } |
| 42 | |
| 43 | static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, |
| 44 | __be32 addr, __be32 new_addr) |
| 45 | { |
| 46 | struct tcphdr *tcph; |
| 47 | |
| 48 | tcph = (void *)(skb_network_header(skb) + thoff); |
| 49 | inet_proto_csum_replace4(sum: &tcph->check, skb, from: addr, to: new_addr, pseudohdr: true); |
| 50 | } |
| 51 | |
| 52 | static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, |
| 53 | __be32 addr, __be32 new_addr) |
| 54 | { |
| 55 | struct udphdr *udph; |
| 56 | |
| 57 | udph = (void *)(skb_network_header(skb) + thoff); |
| 58 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
| 59 | inet_proto_csum_replace4(sum: &udph->check, skb, from: addr, |
| 60 | to: new_addr, pseudohdr: true); |
| 61 | if (!udph->check) |
| 62 | udph->check = CSUM_MANGLED_0; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, |
| 67 | unsigned int thoff, __be32 addr, |
| 68 | __be32 new_addr) |
| 69 | { |
| 70 | switch (iph->protocol) { |
| 71 | case IPPROTO_TCP: |
| 72 | nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); |
| 73 | break; |
| 74 | case IPPROTO_UDP: |
| 75 | nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); |
| 76 | break; |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | static void nf_flow_snat_ip(const struct flow_offload *flow, |
| 81 | struct sk_buff *skb, struct iphdr *iph, |
| 82 | unsigned int thoff, enum flow_offload_tuple_dir dir) |
| 83 | { |
| 84 | __be32 addr, new_addr; |
| 85 | |
| 86 | switch (dir) { |
| 87 | case FLOW_OFFLOAD_DIR_ORIGINAL: |
| 88 | addr = iph->saddr; |
| 89 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; |
| 90 | iph->saddr = new_addr; |
| 91 | break; |
| 92 | case FLOW_OFFLOAD_DIR_REPLY: |
| 93 | addr = iph->daddr; |
| 94 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; |
| 95 | iph->daddr = new_addr; |
| 96 | break; |
| 97 | } |
| 98 | csum_replace4(sum: &iph->check, from: addr, to: new_addr); |
| 99 | |
| 100 | nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| 101 | } |
| 102 | |
| 103 | static void nf_flow_dnat_ip(const struct flow_offload *flow, |
| 104 | struct sk_buff *skb, struct iphdr *iph, |
| 105 | unsigned int thoff, enum flow_offload_tuple_dir dir) |
| 106 | { |
| 107 | __be32 addr, new_addr; |
| 108 | |
| 109 | switch (dir) { |
| 110 | case FLOW_OFFLOAD_DIR_ORIGINAL: |
| 111 | addr = iph->daddr; |
| 112 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; |
| 113 | iph->daddr = new_addr; |
| 114 | break; |
| 115 | case FLOW_OFFLOAD_DIR_REPLY: |
| 116 | addr = iph->saddr; |
| 117 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; |
| 118 | iph->saddr = new_addr; |
| 119 | break; |
| 120 | } |
| 121 | csum_replace4(sum: &iph->check, from: addr, to: new_addr); |
| 122 | |
| 123 | nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| 124 | } |
| 125 | |
| 126 | static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| 127 | unsigned int thoff, enum flow_offload_tuple_dir dir, |
| 128 | struct iphdr *iph) |
| 129 | { |
| 130 | if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| 131 | nf_flow_snat_port(flow, skb, thoff, protocol: iph->protocol, dir); |
| 132 | nf_flow_snat_ip(flow, skb, iph, thoff, dir); |
| 133 | } |
| 134 | if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| 135 | nf_flow_dnat_port(flow, skb, thoff, protocol: iph->protocol, dir); |
| 136 | nf_flow_dnat_ip(flow, skb, iph, thoff, dir); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | static bool ip_has_options(unsigned int thoff) |
| 141 | { |
| 142 | return thoff != sizeof(struct iphdr); |
| 143 | } |
| 144 | |
| 145 | static void nf_flow_tuple_encap(struct sk_buff *skb, |
| 146 | struct flow_offload_tuple *tuple) |
| 147 | { |
| 148 | __be16 inner_proto = skb->protocol; |
| 149 | struct vlan_ethhdr *veth; |
| 150 | struct pppoe_hdr *phdr; |
| 151 | struct iphdr *iph; |
| 152 | u16 offset = 0; |
| 153 | int i = 0; |
| 154 | |
| 155 | if (skb_vlan_tag_present(skb)) { |
| 156 | tuple->encap[i].id = skb_vlan_tag_get(skb); |
| 157 | tuple->encap[i].proto = skb->vlan_proto; |
| 158 | i++; |
| 159 | } |
| 160 | switch (skb->protocol) { |
| 161 | case htons(ETH_P_8021Q): |
| 162 | veth = (struct vlan_ethhdr *)skb_mac_header(skb); |
| 163 | tuple->encap[i].id = ntohs(veth->h_vlan_TCI); |
| 164 | tuple->encap[i].proto = skb->protocol; |
| 165 | inner_proto = veth->h_vlan_encapsulated_proto; |
| 166 | offset += VLAN_HLEN; |
| 167 | break; |
| 168 | case htons(ETH_P_PPP_SES): |
| 169 | phdr = (struct pppoe_hdr *)skb_network_header(skb); |
| 170 | tuple->encap[i].id = ntohs(phdr->sid); |
| 171 | tuple->encap[i].proto = skb->protocol; |
| 172 | inner_proto = *((__be16 *)(phdr + 1)); |
| 173 | offset += PPPOE_SES_HLEN; |
| 174 | break; |
| 175 | } |
| 176 | |
| 177 | if (inner_proto == htons(ETH_P_IP)) { |
| 178 | iph = (struct iphdr *)(skb_network_header(skb) + offset); |
| 179 | if (iph->protocol == IPPROTO_IPIP) { |
| 180 | tuple->tun.dst_v4.s_addr = iph->daddr; |
| 181 | tuple->tun.src_v4.s_addr = iph->saddr; |
| 182 | tuple->tun.l3_proto = IPPROTO_IPIP; |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | struct nf_flowtable_ctx { |
| 188 | const struct net_device *in; |
| 189 | u32 offset; |
| 190 | u32 hdrsize; |
| 191 | }; |
| 192 | |
| 193 | static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, |
| 194 | struct flow_offload_tuple *tuple) |
| 195 | { |
| 196 | struct flow_ports *ports; |
| 197 | unsigned int thoff; |
| 198 | struct iphdr *iph; |
| 199 | u8 ipproto; |
| 200 | |
| 201 | if (!pskb_may_pull(skb, len: sizeof(*iph) + ctx->offset)) |
| 202 | return -1; |
| 203 | |
| 204 | iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); |
| 205 | thoff = (iph->ihl * 4); |
| 206 | |
| 207 | if (ip_is_fragment(iph) || |
| 208 | unlikely(ip_has_options(thoff))) |
| 209 | return -1; |
| 210 | |
| 211 | thoff += ctx->offset; |
| 212 | |
| 213 | ipproto = iph->protocol; |
| 214 | switch (ipproto) { |
| 215 | case IPPROTO_TCP: |
| 216 | ctx->hdrsize = sizeof(struct tcphdr); |
| 217 | break; |
| 218 | case IPPROTO_UDP: |
| 219 | ctx->hdrsize = sizeof(struct udphdr); |
| 220 | break; |
| 221 | #ifdef CONFIG_NF_CT_PROTO_GRE |
| 222 | case IPPROTO_GRE: |
| 223 | ctx->hdrsize = sizeof(struct gre_base_hdr); |
| 224 | break; |
| 225 | #endif |
| 226 | default: |
| 227 | return -1; |
| 228 | } |
| 229 | |
| 230 | if (iph->ttl <= 1) |
| 231 | return -1; |
| 232 | |
| 233 | if (!pskb_may_pull(skb, len: thoff + ctx->hdrsize)) |
| 234 | return -1; |
| 235 | |
| 236 | switch (ipproto) { |
| 237 | case IPPROTO_TCP: |
| 238 | case IPPROTO_UDP: |
| 239 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); |
| 240 | tuple->src_port = ports->source; |
| 241 | tuple->dst_port = ports->dest; |
| 242 | break; |
| 243 | case IPPROTO_GRE: { |
| 244 | struct gre_base_hdr *greh; |
| 245 | |
| 246 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); |
| 247 | if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) |
| 248 | return -1; |
| 249 | break; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); |
| 254 | |
| 255 | tuple->src_v4.s_addr = iph->saddr; |
| 256 | tuple->dst_v4.s_addr = iph->daddr; |
| 257 | tuple->l3proto = AF_INET; |
| 258 | tuple->l4proto = ipproto; |
| 259 | tuple->iifidx = ctx->in->ifindex; |
| 260 | nf_flow_tuple_encap(skb, tuple); |
| 261 | |
| 262 | return 0; |
| 263 | } |
| 264 | |
| 265 | /* Based on ip_exceeds_mtu(). */ |
| 266 | static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) |
| 267 | { |
| 268 | if (skb->len <= mtu) |
| 269 | return false; |
| 270 | |
| 271 | if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) |
| 272 | return false; |
| 273 | |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) |
| 278 | { |
| 279 | if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && |
| 280 | tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) |
| 281 | return true; |
| 282 | |
| 283 | return dst_check(dst: tuple->dst_cache, cookie: tuple->dst_cookie); |
| 284 | } |
| 285 | |
| 286 | static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, |
| 287 | const struct nf_hook_state *state, |
| 288 | struct dst_entry *dst) |
| 289 | { |
| 290 | skb_orphan(skb); |
| 291 | skb_dst_set_noref(skb, dst); |
| 292 | dst_output(net: state->net, sk: state->sk, skb); |
| 293 | return NF_STOLEN; |
| 294 | } |
| 295 | |
| 296 | static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) |
| 297 | { |
| 298 | struct iphdr *iph; |
| 299 | u16 size; |
| 300 | |
| 301 | if (!pskb_may_pull(skb, len: sizeof(*iph) + *psize)) |
| 302 | return false; |
| 303 | |
| 304 | iph = (struct iphdr *)(skb_network_header(skb) + *psize); |
| 305 | size = iph->ihl << 2; |
| 306 | |
| 307 | if (ip_is_fragment(iph) || unlikely(ip_has_options(size))) |
| 308 | return false; |
| 309 | |
| 310 | if (iph->ttl <= 1) |
| 311 | return false; |
| 312 | |
| 313 | if (iph->protocol == IPPROTO_IPIP) |
| 314 | *psize += size; |
| 315 | |
| 316 | return true; |
| 317 | } |
| 318 | |
| 319 | static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb) |
| 320 | { |
| 321 | struct iphdr *iph = (struct iphdr *)skb_network_header(skb); |
| 322 | |
| 323 | if (iph->protocol != IPPROTO_IPIP) |
| 324 | return; |
| 325 | |
| 326 | skb_pull(skb, len: iph->ihl << 2); |
| 327 | skb_reset_network_header(skb); |
| 328 | } |
| 329 | |
| 330 | static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, |
| 331 | u32 *offset) |
| 332 | { |
| 333 | __be16 inner_proto = skb->protocol; |
| 334 | struct vlan_ethhdr *veth; |
| 335 | bool ret = false; |
| 336 | |
| 337 | switch (skb->protocol) { |
| 338 | case htons(ETH_P_8021Q): |
| 339 | if (!pskb_may_pull(skb, len: skb_mac_offset(skb) + sizeof(*veth))) |
| 340 | return false; |
| 341 | |
| 342 | veth = (struct vlan_ethhdr *)skb_mac_header(skb); |
| 343 | if (veth->h_vlan_encapsulated_proto == proto) { |
| 344 | *offset += VLAN_HLEN; |
| 345 | inner_proto = proto; |
| 346 | ret = true; |
| 347 | } |
| 348 | break; |
| 349 | case htons(ETH_P_PPP_SES): |
| 350 | if (nf_flow_pppoe_proto(skb, inner_proto: &inner_proto) && |
| 351 | inner_proto == proto) { |
| 352 | *offset += PPPOE_SES_HLEN; |
| 353 | ret = true; |
| 354 | } |
| 355 | break; |
| 356 | } |
| 357 | |
| 358 | if (inner_proto == htons(ETH_P_IP)) |
| 359 | ret = nf_flow_ip4_tunnel_proto(skb, psize: offset); |
| 360 | |
| 361 | return ret; |
| 362 | } |
| 363 | |
| 364 | static void nf_flow_encap_pop(struct sk_buff *skb, |
| 365 | struct flow_offload_tuple_rhash *tuplehash) |
| 366 | { |
| 367 | struct vlan_hdr *vlan_hdr; |
| 368 | int i; |
| 369 | |
| 370 | for (i = 0; i < tuplehash->tuple.encap_num; i++) { |
| 371 | if (skb_vlan_tag_present(skb)) { |
| 372 | __vlan_hwaccel_clear_tag(skb); |
| 373 | continue; |
| 374 | } |
| 375 | switch (skb->protocol) { |
| 376 | case htons(ETH_P_8021Q): |
| 377 | vlan_hdr = (struct vlan_hdr *)skb->data; |
| 378 | __skb_pull(skb, VLAN_HLEN); |
| 379 | vlan_set_encap_proto(skb, vhdr: vlan_hdr); |
| 380 | skb_reset_network_header(skb); |
| 381 | break; |
| 382 | case htons(ETH_P_PPP_SES): |
| 383 | skb->protocol = __nf_flow_pppoe_proto(skb); |
| 384 | skb_pull(skb, PPPOE_SES_HLEN); |
| 385 | skb_reset_network_header(skb); |
| 386 | break; |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | if (skb->protocol == htons(ETH_P_IP)) |
| 391 | nf_flow_ip4_tunnel_pop(skb); |
| 392 | } |
| 393 | |
| 394 | struct nf_flow_xmit { |
| 395 | const void *dest; |
| 396 | const void *source; |
| 397 | struct net_device *outdev; |
| 398 | }; |
| 399 | |
| 400 | static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, |
| 401 | struct nf_flow_xmit *xmit) |
| 402 | { |
| 403 | skb->dev = xmit->outdev; |
| 404 | dev_hard_header(skb, dev: skb->dev, ntohs(skb->protocol), |
| 405 | daddr: xmit->dest, saddr: xmit->source, len: skb->len); |
| 406 | dev_queue_xmit(skb); |
| 407 | |
| 408 | return NF_STOLEN; |
| 409 | } |
| 410 | |
| 411 | static struct flow_offload_tuple_rhash * |
| 412 | nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, |
| 413 | struct nf_flowtable *flow_table, struct sk_buff *skb) |
| 414 | { |
| 415 | struct flow_offload_tuple tuple = {}; |
| 416 | |
| 417 | if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), offset: &ctx->offset)) |
| 418 | return NULL; |
| 419 | |
| 420 | if (nf_flow_tuple_ip(ctx, skb, tuple: &tuple) < 0) |
| 421 | return NULL; |
| 422 | |
| 423 | return flow_offload_lookup(flow_table, tuple: &tuple); |
| 424 | } |
| 425 | |
| 426 | static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, |
| 427 | struct nf_flowtable *flow_table, |
| 428 | struct flow_offload_tuple_rhash *tuplehash, |
| 429 | struct sk_buff *skb) |
| 430 | { |
| 431 | enum flow_offload_tuple_dir dir; |
| 432 | struct flow_offload *flow; |
| 433 | unsigned int thoff, mtu; |
| 434 | struct iphdr *iph; |
| 435 | |
| 436 | dir = tuplehash->tuple.dir; |
| 437 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| 438 | |
| 439 | mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; |
| 440 | if (flow->tuplehash[!dir].tuple.tun_num) |
| 441 | mtu -= sizeof(*iph); |
| 442 | |
| 443 | if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) |
| 444 | return 0; |
| 445 | |
| 446 | iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); |
| 447 | thoff = (iph->ihl * 4) + ctx->offset; |
| 448 | if (nf_flow_state_check(flow, proto: iph->protocol, skb, thoff)) |
| 449 | return 0; |
| 450 | |
| 451 | if (!nf_flow_dst_check(tuple: &tuplehash->tuple)) { |
| 452 | flow_offload_teardown(flow); |
| 453 | return 0; |
| 454 | } |
| 455 | |
| 456 | if (skb_try_make_writable(skb, write_len: thoff + ctx->hdrsize)) |
| 457 | return -1; |
| 458 | |
| 459 | flow_offload_refresh(flow_table, flow, force: false); |
| 460 | |
| 461 | nf_flow_encap_pop(skb, tuplehash); |
| 462 | thoff -= ctx->offset; |
| 463 | |
| 464 | iph = ip_hdr(skb); |
| 465 | nf_flow_nat_ip(flow, skb, thoff, dir, iph); |
| 466 | |
| 467 | ip_decrease_ttl(iph); |
| 468 | skb_clear_tstamp(skb); |
| 469 | |
| 470 | if (flow_table->flags & NF_FLOWTABLE_COUNTER) |
| 471 | nf_ct_acct_update(ct: flow->ct, dir: tuplehash->tuple.dir, bytes: skb->len); |
| 472 | |
| 473 | return 1; |
| 474 | } |
| 475 | |
| 476 | static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) |
| 477 | { |
| 478 | int data_len = skb->len + sizeof(__be16); |
| 479 | struct ppp_hdr { |
| 480 | struct pppoe_hdr hdr; |
| 481 | __be16 proto; |
| 482 | } *ph; |
| 483 | __be16 proto; |
| 484 | |
| 485 | if (skb_cow_head(skb, PPPOE_SES_HLEN)) |
| 486 | return -1; |
| 487 | |
| 488 | switch (skb->protocol) { |
| 489 | case htons(ETH_P_IP): |
| 490 | proto = htons(PPP_IP); |
| 491 | break; |
| 492 | case htons(ETH_P_IPV6): |
| 493 | proto = htons(PPP_IPV6); |
| 494 | break; |
| 495 | default: |
| 496 | return -1; |
| 497 | } |
| 498 | |
| 499 | __skb_push(skb, PPPOE_SES_HLEN); |
| 500 | skb_reset_network_header(skb); |
| 501 | |
| 502 | ph = (struct ppp_hdr *)(skb->data); |
| 503 | ph->hdr.ver = 1; |
| 504 | ph->hdr.type = 1; |
| 505 | ph->hdr.code = 0; |
| 506 | ph->hdr.sid = htons(id); |
| 507 | ph->hdr.length = htons(data_len); |
| 508 | ph->proto = proto; |
| 509 | skb->protocol = htons(ETH_P_PPP_SES); |
| 510 | |
| 511 | return 0; |
| 512 | } |
| 513 | |
| 514 | static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb, |
| 515 | struct flow_offload_tuple *tuple, |
| 516 | __be32 *ip_daddr) |
| 517 | { |
| 518 | struct iphdr *iph = (struct iphdr *)skb_network_header(skb); |
| 519 | struct rtable *rt = dst_rtable(tuple->dst_cache); |
| 520 | u8 tos = iph->tos, ttl = iph->ttl; |
| 521 | __be16 frag_off = iph->frag_off; |
| 522 | u32 headroom = sizeof(*iph); |
| 523 | int err; |
| 524 | |
| 525 | err = iptunnel_handle_offloads(skb, gso_type_mask: SKB_GSO_IPXIP4); |
| 526 | if (err) |
| 527 | return err; |
| 528 | |
| 529 | skb_set_inner_ipproto(skb, IPPROTO_IPIP); |
| 530 | headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; |
| 531 | err = skb_cow_head(skb, headroom); |
| 532 | if (err) |
| 533 | return err; |
| 534 | |
| 535 | skb_scrub_packet(skb, xnet: true); |
| 536 | skb_clear_hash_if_not_l4(skb); |
| 537 | |
| 538 | /* Push down and install the IP header. */ |
| 539 | skb_push(skb, len: sizeof(*iph)); |
| 540 | skb_reset_network_header(skb); |
| 541 | |
| 542 | iph = ip_hdr(skb); |
| 543 | iph->version = 4; |
| 544 | iph->ihl = sizeof(*iph) >> 2; |
| 545 | iph->frag_off = ip_mtu_locked(dst: &rt->dst) ? 0 : frag_off; |
| 546 | iph->protocol = tuple->tun.l3_proto; |
| 547 | iph->tos = tos; |
| 548 | iph->daddr = tuple->tun.src_v4.s_addr; |
| 549 | iph->saddr = tuple->tun.dst_v4.s_addr; |
| 550 | iph->ttl = ttl; |
| 551 | iph->tot_len = htons(skb->len); |
| 552 | __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); |
| 553 | ip_send_check(ip: iph); |
| 554 | |
| 555 | *ip_daddr = tuple->tun.src_v4.s_addr; |
| 556 | |
| 557 | return 0; |
| 558 | } |
| 559 | |
| 560 | static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb, |
| 561 | struct flow_offload_tuple *tuple, |
| 562 | __be32 *ip_daddr) |
| 563 | { |
| 564 | if (tuple->tun_num) |
| 565 | return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr); |
| 566 | |
| 567 | return 0; |
| 568 | } |
| 569 | |
| 570 | static int nf_flow_encap_push(struct sk_buff *skb, |
| 571 | struct flow_offload_tuple *tuple) |
| 572 | { |
| 573 | int i; |
| 574 | |
| 575 | for (i = 0; i < tuple->encap_num; i++) { |
| 576 | switch (tuple->encap[i].proto) { |
| 577 | case htons(ETH_P_8021Q): |
| 578 | case htons(ETH_P_8021AD): |
| 579 | if (skb_vlan_push(skb, vlan_proto: tuple->encap[i].proto, |
| 580 | vlan_tci: tuple->encap[i].id) < 0) |
| 581 | return -1; |
| 582 | break; |
| 583 | case htons(ETH_P_PPP_SES): |
| 584 | if (nf_flow_pppoe_push(skb, id: tuple->encap[i].id) < 0) |
| 585 | return -1; |
| 586 | break; |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | return 0; |
| 591 | } |
| 592 | |
| 593 | unsigned int |
| 594 | nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, |
| 595 | const struct nf_hook_state *state) |
| 596 | { |
| 597 | struct flow_offload_tuple_rhash *tuplehash; |
| 598 | struct nf_flowtable *flow_table = priv; |
| 599 | struct flow_offload_tuple *other_tuple; |
| 600 | enum flow_offload_tuple_dir dir; |
| 601 | struct nf_flowtable_ctx ctx = { |
| 602 | .in = state->in, |
| 603 | }; |
| 604 | struct nf_flow_xmit xmit = {}; |
| 605 | struct flow_offload *flow; |
| 606 | struct neighbour *neigh; |
| 607 | struct rtable *rt; |
| 608 | __be32 ip_daddr; |
| 609 | int ret; |
| 610 | |
| 611 | tuplehash = nf_flow_offload_lookup(ctx: &ctx, flow_table, skb); |
| 612 | if (!tuplehash) |
| 613 | return NF_ACCEPT; |
| 614 | |
| 615 | ret = nf_flow_offload_forward(ctx: &ctx, flow_table, tuplehash, skb); |
| 616 | if (ret < 0) |
| 617 | return NF_DROP; |
| 618 | else if (ret == 0) |
| 619 | return NF_ACCEPT; |
| 620 | |
| 621 | if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { |
| 622 | rt = dst_rtable(tuplehash->tuple.dst_cache); |
| 623 | memset(skb->cb, 0, sizeof(struct inet_skb_parm)); |
| 624 | IPCB(skb)->iif = skb->dev->ifindex; |
| 625 | IPCB(skb)->flags = IPSKB_FORWARDED; |
| 626 | return nf_flow_xmit_xfrm(skb, state, dst: &rt->dst); |
| 627 | } |
| 628 | |
| 629 | dir = tuplehash->tuple.dir; |
| 630 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| 631 | other_tuple = &flow->tuplehash[!dir].tuple; |
| 632 | ip_daddr = other_tuple->src_v4.s_addr; |
| 633 | |
| 634 | if (nf_flow_tunnel_v4_push(net: state->net, skb, tuple: other_tuple, ip_daddr: &ip_daddr) < 0) |
| 635 | return NF_DROP; |
| 636 | |
| 637 | if (nf_flow_encap_push(skb, tuple: other_tuple) < 0) |
| 638 | return NF_DROP; |
| 639 | |
| 640 | switch (tuplehash->tuple.xmit_type) { |
| 641 | case FLOW_OFFLOAD_XMIT_NEIGH: |
| 642 | rt = dst_rtable(tuplehash->tuple.dst_cache); |
| 643 | xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.ifidx); |
| 644 | if (!xmit.outdev) { |
| 645 | flow_offload_teardown(flow); |
| 646 | return NF_DROP; |
| 647 | } |
| 648 | neigh = ip_neigh_gw4(dev: rt->dst.dev, daddr: rt_nexthop(rt, daddr: ip_daddr)); |
| 649 | if (IS_ERR(ptr: neigh)) { |
| 650 | flow_offload_teardown(flow); |
| 651 | return NF_DROP; |
| 652 | } |
| 653 | xmit.dest = neigh->ha; |
| 654 | skb_dst_set_noref(skb, dst: &rt->dst); |
| 655 | break; |
| 656 | case FLOW_OFFLOAD_XMIT_DIRECT: |
| 657 | xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.out.ifidx); |
| 658 | if (!xmit.outdev) { |
| 659 | flow_offload_teardown(flow); |
| 660 | return NF_DROP; |
| 661 | } |
| 662 | xmit.dest = tuplehash->tuple.out.h_dest; |
| 663 | xmit.source = tuplehash->tuple.out.h_source; |
| 664 | break; |
| 665 | default: |
| 666 | WARN_ON_ONCE(1); |
| 667 | return NF_DROP; |
| 668 | } |
| 669 | |
| 670 | return nf_flow_queue_xmit(net: state->net, skb, xmit: &xmit); |
| 671 | } |
| 672 | EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); |
| 673 | |
| 674 | static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, |
| 675 | struct in6_addr *addr, |
| 676 | struct in6_addr *new_addr, |
| 677 | struct ipv6hdr *ip6h) |
| 678 | { |
| 679 | struct tcphdr *tcph; |
| 680 | |
| 681 | tcph = (void *)(skb_network_header(skb) + thoff); |
| 682 | inet_proto_csum_replace16(sum: &tcph->check, skb, from: addr->s6_addr32, |
| 683 | to: new_addr->s6_addr32, pseudohdr: true); |
| 684 | } |
| 685 | |
| 686 | static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, |
| 687 | struct in6_addr *addr, |
| 688 | struct in6_addr *new_addr) |
| 689 | { |
| 690 | struct udphdr *udph; |
| 691 | |
| 692 | udph = (void *)(skb_network_header(skb) + thoff); |
| 693 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
| 694 | inet_proto_csum_replace16(sum: &udph->check, skb, from: addr->s6_addr32, |
| 695 | to: new_addr->s6_addr32, pseudohdr: true); |
| 696 | if (!udph->check) |
| 697 | udph->check = CSUM_MANGLED_0; |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, |
| 702 | unsigned int thoff, struct in6_addr *addr, |
| 703 | struct in6_addr *new_addr) |
| 704 | { |
| 705 | switch (ip6h->nexthdr) { |
| 706 | case IPPROTO_TCP: |
| 707 | nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); |
| 708 | break; |
| 709 | case IPPROTO_UDP: |
| 710 | nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); |
| 711 | break; |
| 712 | } |
| 713 | } |
| 714 | |
| 715 | static void nf_flow_snat_ipv6(const struct flow_offload *flow, |
| 716 | struct sk_buff *skb, struct ipv6hdr *ip6h, |
| 717 | unsigned int thoff, |
| 718 | enum flow_offload_tuple_dir dir) |
| 719 | { |
| 720 | struct in6_addr addr, new_addr; |
| 721 | |
| 722 | switch (dir) { |
| 723 | case FLOW_OFFLOAD_DIR_ORIGINAL: |
| 724 | addr = ip6h->saddr; |
| 725 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; |
| 726 | ip6h->saddr = new_addr; |
| 727 | break; |
| 728 | case FLOW_OFFLOAD_DIR_REPLY: |
| 729 | addr = ip6h->daddr; |
| 730 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; |
| 731 | ip6h->daddr = new_addr; |
| 732 | break; |
| 733 | } |
| 734 | |
| 735 | nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, addr: &addr, new_addr: &new_addr); |
| 736 | } |
| 737 | |
| 738 | static void nf_flow_dnat_ipv6(const struct flow_offload *flow, |
| 739 | struct sk_buff *skb, struct ipv6hdr *ip6h, |
| 740 | unsigned int thoff, |
| 741 | enum flow_offload_tuple_dir dir) |
| 742 | { |
| 743 | struct in6_addr addr, new_addr; |
| 744 | |
| 745 | switch (dir) { |
| 746 | case FLOW_OFFLOAD_DIR_ORIGINAL: |
| 747 | addr = ip6h->daddr; |
| 748 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; |
| 749 | ip6h->daddr = new_addr; |
| 750 | break; |
| 751 | case FLOW_OFFLOAD_DIR_REPLY: |
| 752 | addr = ip6h->saddr; |
| 753 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; |
| 754 | ip6h->saddr = new_addr; |
| 755 | break; |
| 756 | } |
| 757 | |
| 758 | nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, addr: &addr, new_addr: &new_addr); |
| 759 | } |
| 760 | |
| 761 | static void nf_flow_nat_ipv6(const struct flow_offload *flow, |
| 762 | struct sk_buff *skb, |
| 763 | enum flow_offload_tuple_dir dir, |
| 764 | struct ipv6hdr *ip6h) |
| 765 | { |
| 766 | unsigned int thoff = sizeof(*ip6h); |
| 767 | |
| 768 | if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| 769 | nf_flow_snat_port(flow, skb, thoff, protocol: ip6h->nexthdr, dir); |
| 770 | nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); |
| 771 | } |
| 772 | if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| 773 | nf_flow_dnat_port(flow, skb, thoff, protocol: ip6h->nexthdr, dir); |
| 774 | nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); |
| 775 | } |
| 776 | } |
| 777 | |
| 778 | static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, |
| 779 | struct flow_offload_tuple *tuple) |
| 780 | { |
| 781 | struct flow_ports *ports; |
| 782 | struct ipv6hdr *ip6h; |
| 783 | unsigned int thoff; |
| 784 | u8 nexthdr; |
| 785 | |
| 786 | thoff = sizeof(*ip6h) + ctx->offset; |
| 787 | if (!pskb_may_pull(skb, len: thoff)) |
| 788 | return -1; |
| 789 | |
| 790 | ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); |
| 791 | |
| 792 | nexthdr = ip6h->nexthdr; |
| 793 | switch (nexthdr) { |
| 794 | case IPPROTO_TCP: |
| 795 | ctx->hdrsize = sizeof(struct tcphdr); |
| 796 | break; |
| 797 | case IPPROTO_UDP: |
| 798 | ctx->hdrsize = sizeof(struct udphdr); |
| 799 | break; |
| 800 | #ifdef CONFIG_NF_CT_PROTO_GRE |
| 801 | case IPPROTO_GRE: |
| 802 | ctx->hdrsize = sizeof(struct gre_base_hdr); |
| 803 | break; |
| 804 | #endif |
| 805 | default: |
| 806 | return -1; |
| 807 | } |
| 808 | |
| 809 | if (ip6h->hop_limit <= 1) |
| 810 | return -1; |
| 811 | |
| 812 | if (!pskb_may_pull(skb, len: thoff + ctx->hdrsize)) |
| 813 | return -1; |
| 814 | |
| 815 | switch (nexthdr) { |
| 816 | case IPPROTO_TCP: |
| 817 | case IPPROTO_UDP: |
| 818 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); |
| 819 | tuple->src_port = ports->source; |
| 820 | tuple->dst_port = ports->dest; |
| 821 | break; |
| 822 | case IPPROTO_GRE: { |
| 823 | struct gre_base_hdr *greh; |
| 824 | |
| 825 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); |
| 826 | if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) |
| 827 | return -1; |
| 828 | break; |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); |
| 833 | |
| 834 | tuple->src_v6 = ip6h->saddr; |
| 835 | tuple->dst_v6 = ip6h->daddr; |
| 836 | tuple->l3proto = AF_INET6; |
| 837 | tuple->l4proto = nexthdr; |
| 838 | tuple->iifidx = ctx->in->ifindex; |
| 839 | nf_flow_tuple_encap(skb, tuple); |
| 840 | |
| 841 | return 0; |
| 842 | } |
| 843 | |
| 844 | static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, |
| 845 | struct nf_flowtable *flow_table, |
| 846 | struct flow_offload_tuple_rhash *tuplehash, |
| 847 | struct sk_buff *skb) |
| 848 | { |
| 849 | enum flow_offload_tuple_dir dir; |
| 850 | struct flow_offload *flow; |
| 851 | unsigned int thoff, mtu; |
| 852 | struct ipv6hdr *ip6h; |
| 853 | |
| 854 | dir = tuplehash->tuple.dir; |
| 855 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| 856 | |
| 857 | mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; |
| 858 | if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) |
| 859 | return 0; |
| 860 | |
| 861 | ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); |
| 862 | thoff = sizeof(*ip6h) + ctx->offset; |
| 863 | if (nf_flow_state_check(flow, proto: ip6h->nexthdr, skb, thoff)) |
| 864 | return 0; |
| 865 | |
| 866 | if (!nf_flow_dst_check(tuple: &tuplehash->tuple)) { |
| 867 | flow_offload_teardown(flow); |
| 868 | return 0; |
| 869 | } |
| 870 | |
| 871 | if (skb_try_make_writable(skb, write_len: thoff + ctx->hdrsize)) |
| 872 | return -1; |
| 873 | |
| 874 | flow_offload_refresh(flow_table, flow, force: false); |
| 875 | |
| 876 | nf_flow_encap_pop(skb, tuplehash); |
| 877 | |
| 878 | ip6h = ipv6_hdr(skb); |
| 879 | nf_flow_nat_ipv6(flow, skb, dir, ip6h); |
| 880 | |
| 881 | ip6h->hop_limit--; |
| 882 | skb_clear_tstamp(skb); |
| 883 | |
| 884 | if (flow_table->flags & NF_FLOWTABLE_COUNTER) |
| 885 | nf_ct_acct_update(ct: flow->ct, dir: tuplehash->tuple.dir, bytes: skb->len); |
| 886 | |
| 887 | return 1; |
| 888 | } |
| 889 | |
| 890 | static struct flow_offload_tuple_rhash * |
| 891 | nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, |
| 892 | struct nf_flowtable *flow_table, |
| 893 | struct sk_buff *skb) |
| 894 | { |
| 895 | struct flow_offload_tuple tuple = {}; |
| 896 | |
| 897 | if (skb->protocol != htons(ETH_P_IPV6) && |
| 898 | !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), offset: &ctx->offset)) |
| 899 | return NULL; |
| 900 | |
| 901 | if (nf_flow_tuple_ipv6(ctx, skb, tuple: &tuple) < 0) |
| 902 | return NULL; |
| 903 | |
| 904 | return flow_offload_lookup(flow_table, tuple: &tuple); |
| 905 | } |
| 906 | |
| 907 | unsigned int |
| 908 | nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, |
| 909 | const struct nf_hook_state *state) |
| 910 | { |
| 911 | struct flow_offload_tuple_rhash *tuplehash; |
| 912 | struct nf_flowtable *flow_table = priv; |
| 913 | struct flow_offload_tuple *other_tuple; |
| 914 | enum flow_offload_tuple_dir dir; |
| 915 | struct nf_flowtable_ctx ctx = { |
| 916 | .in = state->in, |
| 917 | }; |
| 918 | struct nf_flow_xmit xmit = {}; |
| 919 | struct in6_addr *ip6_daddr; |
| 920 | struct flow_offload *flow; |
| 921 | struct neighbour *neigh; |
| 922 | struct rt6_info *rt; |
| 923 | int ret; |
| 924 | |
| 925 | tuplehash = nf_flow_offload_ipv6_lookup(ctx: &ctx, flow_table, skb); |
| 926 | if (tuplehash == NULL) |
| 927 | return NF_ACCEPT; |
| 928 | |
| 929 | ret = nf_flow_offload_ipv6_forward(ctx: &ctx, flow_table, tuplehash, skb); |
| 930 | if (ret < 0) |
| 931 | return NF_DROP; |
| 932 | else if (ret == 0) |
| 933 | return NF_ACCEPT; |
| 934 | |
| 935 | if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { |
| 936 | rt = dst_rt6_info(tuplehash->tuple.dst_cache); |
| 937 | memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); |
| 938 | IP6CB(skb)->iif = skb->dev->ifindex; |
| 939 | IP6CB(skb)->flags = IP6SKB_FORWARDED; |
| 940 | return nf_flow_xmit_xfrm(skb, state, dst: &rt->dst); |
| 941 | } |
| 942 | |
| 943 | dir = tuplehash->tuple.dir; |
| 944 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| 945 | other_tuple = &flow->tuplehash[!dir].tuple; |
| 946 | ip6_daddr = &other_tuple->src_v6; |
| 947 | |
| 948 | if (nf_flow_encap_push(skb, tuple: other_tuple) < 0) |
| 949 | return NF_DROP; |
| 950 | |
| 951 | switch (tuplehash->tuple.xmit_type) { |
| 952 | case FLOW_OFFLOAD_XMIT_NEIGH: |
| 953 | rt = dst_rt6_info(tuplehash->tuple.dst_cache); |
| 954 | xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.ifidx); |
| 955 | if (!xmit.outdev) { |
| 956 | flow_offload_teardown(flow); |
| 957 | return NF_DROP; |
| 958 | } |
| 959 | neigh = ip_neigh_gw6(dev: rt->dst.dev, addr: rt6_nexthop(rt, daddr: ip6_daddr)); |
| 960 | if (IS_ERR(ptr: neigh)) { |
| 961 | flow_offload_teardown(flow); |
| 962 | return NF_DROP; |
| 963 | } |
| 964 | xmit.dest = neigh->ha; |
| 965 | skb_dst_set_noref(skb, dst: &rt->dst); |
| 966 | break; |
| 967 | case FLOW_OFFLOAD_XMIT_DIRECT: |
| 968 | xmit.outdev = dev_get_by_index_rcu(net: state->net, ifindex: tuplehash->tuple.out.ifidx); |
| 969 | if (!xmit.outdev) { |
| 970 | flow_offload_teardown(flow); |
| 971 | return NF_DROP; |
| 972 | } |
| 973 | xmit.dest = tuplehash->tuple.out.h_dest; |
| 974 | xmit.source = tuplehash->tuple.out.h_source; |
| 975 | break; |
| 976 | default: |
| 977 | WARN_ON_ONCE(1); |
| 978 | return NF_DROP; |
| 979 | } |
| 980 | |
| 981 | return nf_flow_queue_xmit(net: state->net, skb, xmit: &xmit); |
| 982 | } |
| 983 | EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); |
| 984 | |