| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #include <linux/types.h> |
| 3 | #include <linux/ip.h> |
| 4 | #include <linux/netfilter.h> |
| 5 | #include <linux/netfilter_ipv6.h> |
| 6 | #include <linux/netfilter_bridge.h> |
| 7 | #include <linux/module.h> |
| 8 | #include <linux/skbuff.h> |
| 9 | #include <linux/icmp.h> |
| 10 | #include <linux/sysctl.h> |
| 11 | #include <net/route.h> |
| 12 | #include <net/ip.h> |
| 13 | |
| 14 | #include <net/netfilter/nf_conntrack.h> |
| 15 | #include <net/netfilter/nf_conntrack_core.h> |
| 16 | #include <net/netfilter/nf_conntrack_helper.h> |
| 17 | #include <net/netfilter/nf_conntrack_bridge.h> |
| 18 | |
| 19 | #include <linux/netfilter/nf_tables.h> |
| 20 | #include <net/netfilter/nf_tables.h> |
| 21 | |
| 22 | #include "../br_private.h" |
| 23 | |
| 24 | /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff |
| 25 | * has been linearized or cloned. |
| 26 | */ |
| 27 | static int nf_br_ip_fragment(struct net *net, struct sock *sk, |
| 28 | struct sk_buff *skb, |
| 29 | struct nf_bridge_frag_data *data, |
| 30 | int (*output)(struct net *, struct sock *sk, |
| 31 | const struct nf_bridge_frag_data *data, |
| 32 | struct sk_buff *)) |
| 33 | { |
| 34 | int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; |
| 35 | u8 tstamp_type = skb->tstamp_type; |
| 36 | unsigned int hlen, ll_rs, mtu; |
| 37 | ktime_t tstamp = skb->tstamp; |
| 38 | struct ip_frag_state state; |
| 39 | struct iphdr *iph; |
| 40 | int err = 0; |
| 41 | |
| 42 | /* for offloaded checksums cleanup checksum before fragmentation */ |
| 43 | if (skb->ip_summed == CHECKSUM_PARTIAL && |
| 44 | (err = skb_checksum_help(skb))) |
| 45 | goto blackhole; |
| 46 | |
| 47 | iph = ip_hdr(skb); |
| 48 | |
| 49 | /* |
| 50 | * Setup starting values |
| 51 | */ |
| 52 | |
| 53 | hlen = iph->ihl * 4; |
| 54 | frag_max_size -= hlen; |
| 55 | ll_rs = LL_RESERVED_SPACE(skb->dev); |
| 56 | mtu = skb->dev->mtu; |
| 57 | |
| 58 | if (skb_has_frag_list(skb)) { |
| 59 | unsigned int first_len = skb_pagelen(skb); |
| 60 | struct ip_fraglist_iter iter; |
| 61 | struct sk_buff *frag; |
| 62 | |
| 63 | if (first_len - hlen > mtu) |
| 64 | goto blackhole; |
| 65 | |
| 66 | if (skb_cloned(skb) || |
| 67 | skb_headroom(skb) < ll_rs) |
| 68 | goto slow_path; |
| 69 | |
| 70 | skb_walk_frags(skb, frag) { |
| 71 | if (frag->len > mtu) |
| 72 | goto blackhole; |
| 73 | |
| 74 | if (skb_shared(skb: frag) || |
| 75 | skb_headroom(skb: frag) < hlen + ll_rs) |
| 76 | goto slow_path; |
| 77 | } |
| 78 | |
| 79 | ip_fraglist_init(skb, iph, hlen, iter: &iter); |
| 80 | |
| 81 | for (;;) { |
| 82 | if (iter.frag) |
| 83 | ip_fraglist_prepare(skb, iter: &iter); |
| 84 | |
| 85 | skb_set_delivery_time(skb, kt: tstamp, tstamp_type); |
| 86 | err = output(net, sk, data, skb); |
| 87 | if (err || !iter.frag) |
| 88 | break; |
| 89 | |
| 90 | skb = ip_fraglist_next(iter: &iter); |
| 91 | } |
| 92 | |
| 93 | if (!err) |
| 94 | return 0; |
| 95 | |
| 96 | kfree_skb_list(segs: iter.frag); |
| 97 | |
| 98 | return err; |
| 99 | } |
| 100 | slow_path: |
| 101 | /* This is a linearized skbuff, the original geometry is lost for us. |
| 102 | * This may also be a clone skbuff, we could preserve the geometry for |
| 103 | * the copies but probably not worth the effort. |
| 104 | */ |
| 105 | ip_frag_init(skb, hlen, ll_rs, mtu: frag_max_size, DF: false, state: &state); |
| 106 | |
| 107 | while (state.left > 0) { |
| 108 | struct sk_buff *skb2; |
| 109 | |
| 110 | skb2 = ip_frag_next(skb, state: &state); |
| 111 | if (IS_ERR(ptr: skb2)) { |
| 112 | err = PTR_ERR(ptr: skb2); |
| 113 | goto blackhole; |
| 114 | } |
| 115 | |
| 116 | skb_set_delivery_time(skb: skb2, kt: tstamp, tstamp_type); |
| 117 | err = output(net, sk, data, skb2); |
| 118 | if (err) |
| 119 | goto blackhole; |
| 120 | } |
| 121 | consume_skb(skb); |
| 122 | return err; |
| 123 | |
| 124 | blackhole: |
| 125 | kfree_skb(skb); |
| 126 | return 0; |
| 127 | } |
| 128 | |
| 129 | /* ip_defrag() expects IPCB() in place. */ |
| 130 | static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, |
| 131 | size_t inet_skb_parm_size) |
| 132 | { |
| 133 | memcpy(cb, skb->cb, sizeof(*cb)); |
| 134 | memset(skb->cb, 0, inet_skb_parm_size); |
| 135 | } |
| 136 | |
| 137 | static void br_skb_cb_restore(struct sk_buff *skb, |
| 138 | const struct br_input_skb_cb *cb, |
| 139 | u16 fragsz) |
| 140 | { |
| 141 | memcpy(skb->cb, cb, sizeof(*cb)); |
| 142 | BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; |
| 143 | } |
| 144 | |
| 145 | static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, |
| 146 | const struct nf_hook_state *state) |
| 147 | { |
| 148 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; |
| 149 | enum ip_conntrack_info ctinfo; |
| 150 | struct br_input_skb_cb cb; |
| 151 | const struct nf_conn *ct; |
| 152 | int err; |
| 153 | |
| 154 | if (!ip_is_fragment(iph: ip_hdr(skb))) |
| 155 | return NF_ACCEPT; |
| 156 | |
| 157 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
| 158 | if (ct) |
| 159 | zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo)); |
| 160 | |
| 161 | br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet_skb_parm)); |
| 162 | local_bh_disable(); |
| 163 | err = ip_defrag(net: state->net, skb, |
| 164 | user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); |
| 165 | local_bh_enable(); |
| 166 | if (!err) { |
| 167 | br_skb_cb_restore(skb, cb: &cb, IPCB(skb)->frag_max_size); |
| 168 | skb->ignore_df = 1; |
| 169 | return NF_ACCEPT; |
| 170 | } |
| 171 | |
| 172 | return NF_STOLEN; |
| 173 | } |
| 174 | |
| 175 | static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, |
| 176 | const struct nf_hook_state *state) |
| 177 | { |
| 178 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) |
| 179 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; |
| 180 | enum ip_conntrack_info ctinfo; |
| 181 | struct br_input_skb_cb cb; |
| 182 | const struct nf_conn *ct; |
| 183 | int err; |
| 184 | |
| 185 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
| 186 | if (ct) |
| 187 | zone_id = nf_ct_zone_id(zone: nf_ct_zone(ct), CTINFO2DIR(ctinfo)); |
| 188 | |
| 189 | br_skb_cb_save(skb, cb: &cb, inet_skb_parm_size: sizeof(struct inet6_skb_parm)); |
| 190 | |
| 191 | err = nf_ct_frag6_gather(net: state->net, skb, |
| 192 | user: IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); |
| 193 | /* queued */ |
| 194 | if (err == -EINPROGRESS) |
| 195 | return NF_STOLEN; |
| 196 | |
| 197 | br_skb_cb_restore(skb, cb: &cb, IP6CB(skb)->frag_max_size); |
| 198 | return err == 0 ? NF_ACCEPT : NF_DROP; |
| 199 | #else |
| 200 | return NF_ACCEPT; |
| 201 | #endif |
| 202 | } |
| 203 | |
| 204 | static int nf_ct_br_ip_check(const struct sk_buff *skb) |
| 205 | { |
| 206 | const struct iphdr *iph; |
| 207 | int nhoff, len; |
| 208 | |
| 209 | nhoff = skb_network_offset(skb); |
| 210 | iph = ip_hdr(skb); |
| 211 | if (iph->ihl < 5 || |
| 212 | iph->version != 4) |
| 213 | return -1; |
| 214 | |
| 215 | len = skb_ip_totlen(skb); |
| 216 | if (skb->len < nhoff + len || |
| 217 | len < (iph->ihl * 4)) |
| 218 | return -1; |
| 219 | |
| 220 | return 0; |
| 221 | } |
| 222 | |
| 223 | static int nf_ct_br_ipv6_check(const struct sk_buff *skb) |
| 224 | { |
| 225 | const struct ipv6hdr *hdr; |
| 226 | int nhoff, len; |
| 227 | |
| 228 | nhoff = skb_network_offset(skb); |
| 229 | hdr = ipv6_hdr(skb); |
| 230 | if (hdr->version != 6) |
| 231 | return -1; |
| 232 | |
| 233 | len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; |
| 234 | if (skb->len < len) |
| 235 | return -1; |
| 236 | |
| 237 | return 0; |
| 238 | } |
| 239 | |
| 240 | static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, |
| 241 | const struct nf_hook_state *state) |
| 242 | { |
| 243 | struct nf_hook_state bridge_state = *state; |
| 244 | enum ip_conntrack_info ctinfo; |
| 245 | struct nf_conn *ct; |
| 246 | u32 len; |
| 247 | int ret; |
| 248 | |
| 249 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
| 250 | if ((ct && !nf_ct_is_template(ct)) || |
| 251 | ctinfo == IP_CT_UNTRACKED) |
| 252 | return NF_ACCEPT; |
| 253 | |
| 254 | switch (skb->protocol) { |
| 255 | case htons(ETH_P_IP): |
| 256 | if (!pskb_may_pull(skb, len: sizeof(struct iphdr))) |
| 257 | return NF_ACCEPT; |
| 258 | |
| 259 | len = skb_ip_totlen(skb); |
| 260 | if (pskb_trim_rcsum(skb, len)) |
| 261 | return NF_ACCEPT; |
| 262 | |
| 263 | if (nf_ct_br_ip_check(skb)) |
| 264 | return NF_ACCEPT; |
| 265 | |
| 266 | bridge_state.pf = NFPROTO_IPV4; |
| 267 | ret = nf_ct_br_defrag4(skb, state: &bridge_state); |
| 268 | break; |
| 269 | case htons(ETH_P_IPV6): |
| 270 | if (!pskb_may_pull(skb, len: sizeof(struct ipv6hdr))) |
| 271 | return NF_ACCEPT; |
| 272 | |
| 273 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); |
| 274 | if (pskb_trim_rcsum(skb, len)) |
| 275 | return NF_ACCEPT; |
| 276 | |
| 277 | if (nf_ct_br_ipv6_check(skb)) |
| 278 | return NF_ACCEPT; |
| 279 | |
| 280 | bridge_state.pf = NFPROTO_IPV6; |
| 281 | ret = nf_ct_br_defrag6(skb, state: &bridge_state); |
| 282 | break; |
| 283 | default: |
| 284 | nf_ct_set(skb, NULL, info: IP_CT_UNTRACKED); |
| 285 | return NF_ACCEPT; |
| 286 | } |
| 287 | |
| 288 | if (ret != NF_ACCEPT) |
| 289 | return ret; |
| 290 | |
| 291 | return nf_conntrack_in(skb, state: &bridge_state); |
| 292 | } |
| 293 | |
| 294 | static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, |
| 295 | const struct nf_hook_state *state) |
| 296 | { |
| 297 | bool promisc = BR_INPUT_SKB_CB(skb)->promisc; |
| 298 | struct nf_conntrack *nfct = skb_nfct(skb); |
| 299 | struct nf_conn *ct; |
| 300 | |
| 301 | if (promisc) { |
| 302 | nf_reset_ct(skb); |
| 303 | return NF_ACCEPT; |
| 304 | } |
| 305 | |
| 306 | if (!nfct || skb->pkt_type == PACKET_HOST) |
| 307 | return NF_ACCEPT; |
| 308 | |
| 309 | /* nf_conntrack_confirm() cannot handle concurrent clones, |
| 310 | * this happens for broad/multicast frames with e.g. macvlan on top |
| 311 | * of the bridge device. |
| 312 | */ |
| 313 | ct = container_of(nfct, struct nf_conn, ct_general); |
| 314 | if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) |
| 315 | return NF_ACCEPT; |
| 316 | |
| 317 | /* let inet prerouting call conntrack again */ |
| 318 | skb->_nfct = 0; |
| 319 | nf_ct_put(ct); |
| 320 | |
| 321 | return NF_ACCEPT; |
| 322 | } |
| 323 | |
| 324 | static void nf_ct_bridge_frag_save(struct sk_buff *skb, |
| 325 | struct nf_bridge_frag_data *data) |
| 326 | { |
| 327 | if (skb_vlan_tag_present(skb)) { |
| 328 | data->vlan_present = true; |
| 329 | data->vlan_tci = skb->vlan_tci; |
| 330 | data->vlan_proto = skb->vlan_proto; |
| 331 | } else { |
| 332 | data->vlan_present = false; |
| 333 | } |
| 334 | skb_copy_from_linear_data_offset(skb, offset: -ETH_HLEN, to: data->mac, ETH_HLEN); |
| 335 | } |
| 336 | |
| 337 | static unsigned int |
| 338 | nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, |
| 339 | int (*output)(struct net *, struct sock *sk, |
| 340 | const struct nf_bridge_frag_data *data, |
| 341 | struct sk_buff *)) |
| 342 | { |
| 343 | struct nf_bridge_frag_data data; |
| 344 | |
| 345 | if (!BR_INPUT_SKB_CB(skb)->frag_max_size) |
| 346 | return NF_ACCEPT; |
| 347 | |
| 348 | nf_ct_bridge_frag_save(skb, data: &data); |
| 349 | switch (skb->protocol) { |
| 350 | case htons(ETH_P_IP): |
| 351 | nf_br_ip_fragment(net: state->net, sk: state->sk, skb, data: &data, output); |
| 352 | break; |
| 353 | case htons(ETH_P_IPV6): |
| 354 | nf_br_ip6_fragment(net: state->net, sk: state->sk, skb, data: &data, output); |
| 355 | break; |
| 356 | default: |
| 357 | WARN_ON_ONCE(1); |
| 358 | return NF_DROP; |
| 359 | } |
| 360 | |
| 361 | return NF_STOLEN; |
| 362 | } |
| 363 | |
| 364 | /* Actually only slow path refragmentation needs this. */ |
| 365 | static int nf_ct_bridge_frag_restore(struct sk_buff *skb, |
| 366 | const struct nf_bridge_frag_data *data) |
| 367 | { |
| 368 | int err; |
| 369 | |
| 370 | err = skb_cow_head(skb, ETH_HLEN); |
| 371 | if (err) { |
| 372 | kfree_skb(skb); |
| 373 | return -ENOMEM; |
| 374 | } |
| 375 | if (data->vlan_present) |
| 376 | __vlan_hwaccel_put_tag(skb, vlan_proto: data->vlan_proto, vlan_tci: data->vlan_tci); |
| 377 | else if (skb_vlan_tag_present(skb)) |
| 378 | __vlan_hwaccel_clear_tag(skb); |
| 379 | |
| 380 | skb_copy_to_linear_data_offset(skb, offset: -ETH_HLEN, from: data->mac, ETH_HLEN); |
| 381 | skb_reset_mac_header(skb); |
| 382 | |
| 383 | return 0; |
| 384 | } |
| 385 | |
| 386 | static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, |
| 387 | const struct nf_bridge_frag_data *data, |
| 388 | struct sk_buff *skb) |
| 389 | { |
| 390 | int err; |
| 391 | |
| 392 | err = nf_ct_bridge_frag_restore(skb, data); |
| 393 | if (err < 0) |
| 394 | return err; |
| 395 | |
| 396 | return br_dev_queue_push_xmit(net, sk, skb); |
| 397 | } |
| 398 | |
| 399 | static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, |
| 400 | const struct nf_hook_state *state) |
| 401 | { |
| 402 | int ret; |
| 403 | |
| 404 | ret = nf_confirm(priv, skb, state); |
| 405 | if (ret != NF_ACCEPT) |
| 406 | return ret; |
| 407 | |
| 408 | return nf_ct_bridge_refrag(skb, state, output: nf_ct_bridge_refrag_post); |
| 409 | } |
| 410 | |
| 411 | static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { |
| 412 | { |
| 413 | .hook = nf_ct_bridge_pre, |
| 414 | .pf = NFPROTO_BRIDGE, |
| 415 | .hooknum = NF_BR_PRE_ROUTING, |
| 416 | .priority = NF_IP_PRI_CONNTRACK, |
| 417 | }, |
| 418 | { |
| 419 | .hook = nf_ct_bridge_in, |
| 420 | .pf = NFPROTO_BRIDGE, |
| 421 | .hooknum = NF_BR_LOCAL_IN, |
| 422 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
| 423 | }, |
| 424 | { |
| 425 | .hook = nf_ct_bridge_post, |
| 426 | .pf = NFPROTO_BRIDGE, |
| 427 | .hooknum = NF_BR_POST_ROUTING, |
| 428 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
| 429 | }, |
| 430 | }; |
| 431 | |
| 432 | static struct nf_ct_bridge_info bridge_info = { |
| 433 | .ops = nf_ct_bridge_hook_ops, |
| 434 | .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), |
| 435 | .me = THIS_MODULE, |
| 436 | }; |
| 437 | |
| 438 | static int __init nf_conntrack_l3proto_bridge_init(void) |
| 439 | { |
| 440 | nf_ct_bridge_register(info: &bridge_info); |
| 441 | |
| 442 | return 0; |
| 443 | } |
| 444 | |
| 445 | static void __exit nf_conntrack_l3proto_bridge_fini(void) |
| 446 | { |
| 447 | nf_ct_bridge_unregister(info: &bridge_info); |
| 448 | } |
| 449 | |
| 450 | module_init(nf_conntrack_l3proto_bridge_init); |
| 451 | module_exit(nf_conntrack_l3proto_bridge_fini); |
| 452 | |
| 453 | MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); |
| 454 | MODULE_LICENSE("GPL" ); |
| 455 | MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking" ); |
| 456 | |