1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> |
3 | */ |
4 | |
5 | #include "ipvlan.h" |
6 | |
7 | static u32 ipvlan_jhash_secret __read_mostly; |
8 | |
9 | void ipvlan_init_secret(void) |
10 | { |
11 | net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); |
12 | } |
13 | |
14 | void ipvlan_count_rx(const struct ipvl_dev *ipvlan, |
15 | unsigned int len, bool success, bool mcast) |
16 | { |
17 | if (likely(success)) { |
18 | struct ipvl_pcpu_stats *pcptr; |
19 | |
20 | pcptr = this_cpu_ptr(ipvlan->pcpu_stats); |
21 | u64_stats_update_begin(syncp: &pcptr->syncp); |
22 | u64_stats_inc(p: &pcptr->rx_pkts); |
23 | u64_stats_add(p: &pcptr->rx_bytes, val: len); |
24 | if (mcast) |
25 | u64_stats_inc(p: &pcptr->rx_mcast); |
26 | u64_stats_update_end(syncp: &pcptr->syncp); |
27 | } else { |
28 | this_cpu_inc(ipvlan->pcpu_stats->rx_errs); |
29 | } |
30 | } |
31 | EXPORT_SYMBOL_GPL(ipvlan_count_rx); |
32 | |
33 | #if IS_ENABLED(CONFIG_IPV6) |
34 | static u8 ipvlan_get_v6_hash(const void *iaddr) |
35 | { |
36 | const struct in6_addr *ip6_addr = iaddr; |
37 | |
38 | return __ipv6_addr_jhash(a: ip6_addr, initval: ipvlan_jhash_secret) & |
39 | IPVLAN_HASH_MASK; |
40 | } |
41 | #else |
42 | static u8 ipvlan_get_v6_hash(const void *iaddr) |
43 | { |
44 | return 0; |
45 | } |
46 | #endif |
47 | |
48 | static u8 ipvlan_get_v4_hash(const void *iaddr) |
49 | { |
50 | const struct in_addr *ip4_addr = iaddr; |
51 | |
52 | return jhash_1word(a: ip4_addr->s_addr, initval: ipvlan_jhash_secret) & |
53 | IPVLAN_HASH_MASK; |
54 | } |
55 | |
56 | static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr) |
57 | { |
58 | if (!is_v6 && addr->atype == IPVL_IPV4) { |
59 | struct in_addr *i4addr = (struct in_addr *)iaddr; |
60 | |
61 | return addr->ip4addr.s_addr == i4addr->s_addr; |
62 | #if IS_ENABLED(CONFIG_IPV6) |
63 | } else if (is_v6 && addr->atype == IPVL_IPV6) { |
64 | struct in6_addr *i6addr = (struct in6_addr *)iaddr; |
65 | |
66 | return ipv6_addr_equal(a1: &addr->ip6addr, a2: i6addr); |
67 | #endif |
68 | } |
69 | |
70 | return false; |
71 | } |
72 | |
73 | static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, |
74 | const void *iaddr, bool is_v6) |
75 | { |
76 | struct ipvl_addr *addr; |
77 | u8 hash; |
78 | |
79 | hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : |
80 | ipvlan_get_v4_hash(iaddr); |
81 | hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) |
82 | if (addr_equal(is_v6, addr, iaddr)) |
83 | return addr; |
84 | return NULL; |
85 | } |
86 | |
87 | void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) |
88 | { |
89 | struct ipvl_port *port = ipvlan->port; |
90 | u8 hash; |
91 | |
92 | hash = (addr->atype == IPVL_IPV6) ? |
93 | ipvlan_get_v6_hash(iaddr: &addr->ip6addr) : |
94 | ipvlan_get_v4_hash(iaddr: &addr->ip4addr); |
95 | if (hlist_unhashed(h: &addr->hlnode)) |
96 | hlist_add_head_rcu(n: &addr->hlnode, h: &port->hlhead[hash]); |
97 | } |
98 | |
99 | void ipvlan_ht_addr_del(struct ipvl_addr *addr) |
100 | { |
101 | hlist_del_init_rcu(n: &addr->hlnode); |
102 | } |
103 | |
104 | struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, |
105 | const void *iaddr, bool is_v6) |
106 | { |
107 | struct ipvl_addr *addr, *ret = NULL; |
108 | |
109 | rcu_read_lock(); |
110 | list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { |
111 | if (addr_equal(is_v6, addr, iaddr)) { |
112 | ret = addr; |
113 | break; |
114 | } |
115 | } |
116 | rcu_read_unlock(); |
117 | return ret; |
118 | } |
119 | |
120 | bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) |
121 | { |
122 | struct ipvl_dev *ipvlan; |
123 | bool ret = false; |
124 | |
125 | rcu_read_lock(); |
126 | list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { |
127 | if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { |
128 | ret = true; |
129 | break; |
130 | } |
131 | } |
132 | rcu_read_unlock(); |
133 | return ret; |
134 | } |
135 | |
136 | void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) |
137 | { |
138 | void *lyr3h = NULL; |
139 | |
140 | switch (skb->protocol) { |
141 | case htons(ETH_P_ARP): { |
142 | struct arphdr *arph; |
143 | |
144 | if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) |
145 | return NULL; |
146 | |
147 | arph = arp_hdr(skb); |
148 | *type = IPVL_ARP; |
149 | lyr3h = arph; |
150 | break; |
151 | } |
152 | case htons(ETH_P_IP): { |
153 | u32 pktlen; |
154 | struct iphdr *ip4h; |
155 | |
156 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) |
157 | return NULL; |
158 | |
159 | ip4h = ip_hdr(skb); |
160 | pktlen = skb_ip_totlen(skb); |
161 | if (ip4h->ihl < 5 || ip4h->version != 4) |
162 | return NULL; |
163 | if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) |
164 | return NULL; |
165 | |
166 | *type = IPVL_IPV4; |
167 | lyr3h = ip4h; |
168 | break; |
169 | } |
170 | #if IS_ENABLED(CONFIG_IPV6) |
171 | case htons(ETH_P_IPV6): { |
172 | struct ipv6hdr *ip6h; |
173 | |
174 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) |
175 | return NULL; |
176 | |
177 | ip6h = ipv6_hdr(skb); |
178 | if (ip6h->version != 6) |
179 | return NULL; |
180 | |
181 | *type = IPVL_IPV6; |
182 | lyr3h = ip6h; |
183 | /* Only Neighbour Solicitation pkts need different treatment */ |
184 | if (ipv6_addr_any(a: &ip6h->saddr) && |
185 | ip6h->nexthdr == NEXTHDR_ICMP) { |
186 | struct icmp6hdr *icmph; |
187 | |
188 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) |
189 | return NULL; |
190 | |
191 | ip6h = ipv6_hdr(skb); |
192 | icmph = (struct icmp6hdr *)(ip6h + 1); |
193 | |
194 | if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { |
195 | /* Need to access the ipv6 address in body */ |
196 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) |
197 | + sizeof(struct in6_addr)))) |
198 | return NULL; |
199 | |
200 | ip6h = ipv6_hdr(skb); |
201 | icmph = (struct icmp6hdr *)(ip6h + 1); |
202 | } |
203 | |
204 | *type = IPVL_ICMPV6; |
205 | lyr3h = icmph; |
206 | } |
207 | break; |
208 | } |
209 | #endif |
210 | default: |
211 | return NULL; |
212 | } |
213 | |
214 | return lyr3h; |
215 | } |
216 | |
217 | unsigned int ipvlan_mac_hash(const unsigned char *addr) |
218 | { |
219 | u32 hash = jhash_1word(a: __get_unaligned_cpu32(p: addr+2), |
220 | initval: ipvlan_jhash_secret); |
221 | |
222 | return hash & IPVLAN_MAC_FILTER_MASK; |
223 | } |
224 | |
225 | void ipvlan_process_multicast(struct work_struct *work) |
226 | { |
227 | struct ipvl_port *port = container_of(work, struct ipvl_port, wq); |
228 | struct ethhdr *ethh; |
229 | struct ipvl_dev *ipvlan; |
230 | struct sk_buff *skb, *nskb; |
231 | struct sk_buff_head list; |
232 | unsigned int len; |
233 | unsigned int mac_hash; |
234 | int ret; |
235 | u8 pkt_type; |
236 | bool tx_pkt; |
237 | |
238 | __skb_queue_head_init(list: &list); |
239 | |
240 | spin_lock_bh(lock: &port->backlog.lock); |
241 | skb_queue_splice_tail_init(list: &port->backlog, head: &list); |
242 | spin_unlock_bh(lock: &port->backlog.lock); |
243 | |
244 | while ((skb = __skb_dequeue(list: &list)) != NULL) { |
245 | struct net_device *dev = skb->dev; |
246 | bool consumed = false; |
247 | |
248 | ethh = eth_hdr(skb); |
249 | tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; |
250 | mac_hash = ipvlan_mac_hash(addr: ethh->h_dest); |
251 | |
252 | if (ether_addr_equal(addr1: ethh->h_dest, addr2: port->dev->broadcast)) |
253 | pkt_type = PACKET_BROADCAST; |
254 | else |
255 | pkt_type = PACKET_MULTICAST; |
256 | |
257 | rcu_read_lock(); |
258 | list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { |
259 | if (tx_pkt && (ipvlan->dev == skb->dev)) |
260 | continue; |
261 | if (!test_bit(mac_hash, ipvlan->mac_filters)) |
262 | continue; |
263 | if (!(ipvlan->dev->flags & IFF_UP)) |
264 | continue; |
265 | ret = NET_RX_DROP; |
266 | len = skb->len + ETH_HLEN; |
267 | nskb = skb_clone(skb, GFP_ATOMIC); |
268 | local_bh_disable(); |
269 | if (nskb) { |
270 | consumed = true; |
271 | nskb->pkt_type = pkt_type; |
272 | nskb->dev = ipvlan->dev; |
273 | if (tx_pkt) |
274 | ret = dev_forward_skb(dev: ipvlan->dev, skb: nskb); |
275 | else |
276 | ret = netif_rx(skb: nskb); |
277 | } |
278 | ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); |
279 | local_bh_enable(); |
280 | } |
281 | rcu_read_unlock(); |
282 | |
283 | if (tx_pkt) { |
284 | /* If the packet originated here, send it out. */ |
285 | skb->dev = port->dev; |
286 | skb->pkt_type = pkt_type; |
287 | dev_queue_xmit(skb); |
288 | } else { |
289 | if (consumed) |
290 | consume_skb(skb); |
291 | else |
292 | kfree_skb(skb); |
293 | } |
294 | dev_put(dev); |
295 | cond_resched(); |
296 | } |
297 | } |
298 | |
299 | static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) |
300 | { |
301 | bool xnet = true; |
302 | |
303 | if (dev) |
304 | xnet = !net_eq(net1: dev_net(dev: skb->dev), net2: dev_net(dev)); |
305 | |
306 | skb_scrub_packet(skb, xnet); |
307 | if (dev) |
308 | skb->dev = dev; |
309 | } |
310 | |
311 | static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, |
312 | bool local) |
313 | { |
314 | struct ipvl_dev *ipvlan = addr->master; |
315 | struct net_device *dev = ipvlan->dev; |
316 | unsigned int len; |
317 | rx_handler_result_t ret = RX_HANDLER_CONSUMED; |
318 | bool success = false; |
319 | struct sk_buff *skb = *pskb; |
320 | |
321 | len = skb->len + ETH_HLEN; |
322 | /* Only packets exchanged between two local slaves need to have |
323 | * device-up check as well as skb-share check. |
324 | */ |
325 | if (local) { |
326 | if (unlikely(!(dev->flags & IFF_UP))) { |
327 | kfree_skb(skb); |
328 | goto out; |
329 | } |
330 | |
331 | skb = skb_share_check(skb, GFP_ATOMIC); |
332 | if (!skb) |
333 | goto out; |
334 | |
335 | *pskb = skb; |
336 | } |
337 | |
338 | if (local) { |
339 | skb->pkt_type = PACKET_HOST; |
340 | if (dev_forward_skb(dev: ipvlan->dev, skb) == NET_RX_SUCCESS) |
341 | success = true; |
342 | } else { |
343 | skb->dev = dev; |
344 | ret = RX_HANDLER_ANOTHER; |
345 | success = true; |
346 | } |
347 | |
348 | out: |
349 | ipvlan_count_rx(ipvlan, len, success, false); |
350 | return ret; |
351 | } |
352 | |
353 | struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, |
354 | int addr_type, bool use_dest) |
355 | { |
356 | struct ipvl_addr *addr = NULL; |
357 | |
358 | switch (addr_type) { |
359 | #if IS_ENABLED(CONFIG_IPV6) |
360 | case IPVL_IPV6: { |
361 | struct ipv6hdr *ip6h; |
362 | struct in6_addr *i6addr; |
363 | |
364 | ip6h = (struct ipv6hdr *)lyr3h; |
365 | i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; |
366 | addr = ipvlan_ht_addr_lookup(port, iaddr: i6addr, is_v6: true); |
367 | break; |
368 | } |
369 | case IPVL_ICMPV6: { |
370 | struct nd_msg *ndmh; |
371 | struct in6_addr *i6addr; |
372 | |
373 | /* Make sure that the NeighborSolicitation ICMPv6 packets |
374 | * are handled to avoid DAD issue. |
375 | */ |
376 | ndmh = (struct nd_msg *)lyr3h; |
377 | if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { |
378 | i6addr = &ndmh->target; |
379 | addr = ipvlan_ht_addr_lookup(port, iaddr: i6addr, is_v6: true); |
380 | } |
381 | break; |
382 | } |
383 | #endif |
384 | case IPVL_IPV4: { |
385 | struct iphdr *ip4h; |
386 | __be32 *i4addr; |
387 | |
388 | ip4h = (struct iphdr *)lyr3h; |
389 | i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; |
390 | addr = ipvlan_ht_addr_lookup(port, iaddr: i4addr, is_v6: false); |
391 | break; |
392 | } |
393 | case IPVL_ARP: { |
394 | struct arphdr *arph; |
395 | unsigned char *arp_ptr; |
396 | __be32 dip; |
397 | |
398 | arph = (struct arphdr *)lyr3h; |
399 | arp_ptr = (unsigned char *)(arph + 1); |
400 | if (use_dest) |
401 | arp_ptr += (2 * port->dev->addr_len) + 4; |
402 | else |
403 | arp_ptr += port->dev->addr_len; |
404 | |
405 | memcpy(&dip, arp_ptr, 4); |
406 | addr = ipvlan_ht_addr_lookup(port, iaddr: &dip, is_v6: false); |
407 | break; |
408 | } |
409 | } |
410 | |
411 | return addr; |
412 | } |
413 | |
414 | static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) |
415 | { |
416 | const struct iphdr *ip4h = ip_hdr(skb); |
417 | struct net_device *dev = skb->dev; |
418 | struct net *net = dev_net(dev); |
419 | struct rtable *rt; |
420 | int err, ret = NET_XMIT_DROP; |
421 | struct flowi4 fl4 = { |
422 | .flowi4_oif = dev->ifindex, |
423 | .flowi4_tos = RT_TOS(ip4h->tos), |
424 | .flowi4_flags = FLOWI_FLAG_ANYSRC, |
425 | .flowi4_mark = skb->mark, |
426 | .daddr = ip4h->daddr, |
427 | .saddr = ip4h->saddr, |
428 | }; |
429 | |
430 | rt = ip_route_output_flow(net, flp: &fl4, NULL); |
431 | if (IS_ERR(ptr: rt)) |
432 | goto err; |
433 | |
434 | if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { |
435 | ip_rt_put(rt); |
436 | goto err; |
437 | } |
438 | skb_dst_set(skb, dst: &rt->dst); |
439 | |
440 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
441 | |
442 | err = ip_local_out(net, sk: skb->sk, skb); |
443 | if (unlikely(net_xmit_eval(err))) |
444 | DEV_STATS_INC(dev, tx_errors); |
445 | else |
446 | ret = NET_XMIT_SUCCESS; |
447 | goto out; |
448 | err: |
449 | DEV_STATS_INC(dev, tx_errors); |
450 | kfree_skb(skb); |
451 | out: |
452 | return ret; |
453 | } |
454 | |
455 | #if IS_ENABLED(CONFIG_IPV6) |
456 | |
457 | static noinline_for_stack int |
458 | ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) |
459 | { |
460 | const struct ipv6hdr *ip6h = ipv6_hdr(skb); |
461 | struct flowi6 fl6 = { |
462 | .flowi6_oif = dev->ifindex, |
463 | .daddr = ip6h->daddr, |
464 | .saddr = ip6h->saddr, |
465 | .flowi6_flags = FLOWI_FLAG_ANYSRC, |
466 | .flowlabel = ip6_flowinfo(hdr: ip6h), |
467 | .flowi6_mark = skb->mark, |
468 | .flowi6_proto = ip6h->nexthdr, |
469 | }; |
470 | struct dst_entry *dst; |
471 | int err; |
472 | |
473 | dst = ip6_route_output(net: dev_net(dev), NULL, fl6: &fl6); |
474 | err = dst->error; |
475 | if (err) { |
476 | dst_release(dst); |
477 | return err; |
478 | } |
479 | skb_dst_set(skb, dst); |
480 | return 0; |
481 | } |
482 | |
483 | static int ipvlan_process_v6_outbound(struct sk_buff *skb) |
484 | { |
485 | struct net_device *dev = skb->dev; |
486 | int err, ret = NET_XMIT_DROP; |
487 | |
488 | err = ipvlan_route_v6_outbound(dev, skb); |
489 | if (unlikely(err)) { |
490 | DEV_STATS_INC(dev, tx_errors); |
491 | kfree_skb(skb); |
492 | return err; |
493 | } |
494 | |
495 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); |
496 | |
497 | err = ip6_local_out(net: dev_net(dev), sk: skb->sk, skb); |
498 | if (unlikely(net_xmit_eval(err))) |
499 | DEV_STATS_INC(dev, tx_errors); |
500 | else |
501 | ret = NET_XMIT_SUCCESS; |
502 | return ret; |
503 | } |
504 | #else |
505 | static int ipvlan_process_v6_outbound(struct sk_buff *skb) |
506 | { |
507 | return NET_XMIT_DROP; |
508 | } |
509 | #endif |
510 | |
511 | static int ipvlan_process_outbound(struct sk_buff *skb) |
512 | { |
513 | int ret = NET_XMIT_DROP; |
514 | |
515 | /* The ipvlan is a pseudo-L2 device, so the packets that we receive |
516 | * will have L2; which need to discarded and processed further |
517 | * in the net-ns of the main-device. |
518 | */ |
519 | if (skb_mac_header_was_set(skb)) { |
520 | /* In this mode we dont care about |
521 | * multicast and broadcast traffic */ |
522 | struct ethhdr *ethh = eth_hdr(skb); |
523 | |
524 | if (is_multicast_ether_addr(addr: ethh->h_dest)) { |
525 | pr_debug_ratelimited( |
526 | "Dropped {multi|broad}cast of type=[%x]\n" , |
527 | ntohs(skb->protocol)); |
528 | kfree_skb(skb); |
529 | goto out; |
530 | } |
531 | |
532 | skb_pull(skb, len: sizeof(*ethh)); |
533 | skb->mac_header = (typeof(skb->mac_header))~0U; |
534 | skb_reset_network_header(skb); |
535 | } |
536 | |
537 | if (skb->protocol == htons(ETH_P_IPV6)) |
538 | ret = ipvlan_process_v6_outbound(skb); |
539 | else if (skb->protocol == htons(ETH_P_IP)) |
540 | ret = ipvlan_process_v4_outbound(skb); |
541 | else { |
542 | pr_warn_ratelimited("Dropped outbound packet type=%x\n" , |
543 | ntohs(skb->protocol)); |
544 | kfree_skb(skb); |
545 | } |
546 | out: |
547 | return ret; |
548 | } |
549 | |
550 | static void ipvlan_multicast_enqueue(struct ipvl_port *port, |
551 | struct sk_buff *skb, bool tx_pkt) |
552 | { |
553 | if (skb->protocol == htons(ETH_P_PAUSE)) { |
554 | kfree_skb(skb); |
555 | return; |
556 | } |
557 | |
558 | /* Record that the deferred packet is from TX or RX path. By |
559 | * looking at mac-addresses on packet will lead to erronus decisions. |
560 | * (This would be true for a loopback-mode on master device or a |
561 | * hair-pin mode of the switch.) |
562 | */ |
563 | IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; |
564 | |
565 | spin_lock(lock: &port->backlog.lock); |
566 | if (skb_queue_len(list_: &port->backlog) < IPVLAN_QBACKLOG_LIMIT) { |
567 | dev_hold(dev: skb->dev); |
568 | __skb_queue_tail(list: &port->backlog, newsk: skb); |
569 | spin_unlock(lock: &port->backlog.lock); |
570 | schedule_work(work: &port->wq); |
571 | } else { |
572 | spin_unlock(lock: &port->backlog.lock); |
573 | dev_core_stats_rx_dropped_inc(dev: skb->dev); |
574 | kfree_skb(skb); |
575 | } |
576 | } |
577 | |
578 | static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) |
579 | { |
580 | const struct ipvl_dev *ipvlan = netdev_priv(dev); |
581 | void *lyr3h; |
582 | struct ipvl_addr *addr; |
583 | int addr_type; |
584 | |
585 | lyr3h = ipvlan_get_L3_hdr(port: ipvlan->port, skb, type: &addr_type); |
586 | if (!lyr3h) |
587 | goto out; |
588 | |
589 | if (!ipvlan_is_vepa(port: ipvlan->port)) { |
590 | addr = ipvlan_addr_lookup(port: ipvlan->port, lyr3h, addr_type, use_dest: true); |
591 | if (addr) { |
592 | if (ipvlan_is_private(port: ipvlan->port)) { |
593 | consume_skb(skb); |
594 | return NET_XMIT_DROP; |
595 | } |
596 | ipvlan_rcv_frame(addr, pskb: &skb, local: true); |
597 | return NET_XMIT_SUCCESS; |
598 | } |
599 | } |
600 | out: |
601 | ipvlan_skb_crossing_ns(skb, dev: ipvlan->phy_dev); |
602 | return ipvlan_process_outbound(skb); |
603 | } |
604 | |
605 | static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) |
606 | { |
607 | const struct ipvl_dev *ipvlan = netdev_priv(dev); |
608 | struct ethhdr *eth = skb_eth_hdr(skb); |
609 | struct ipvl_addr *addr; |
610 | void *lyr3h; |
611 | int addr_type; |
612 | |
613 | if (!ipvlan_is_vepa(port: ipvlan->port) && |
614 | ether_addr_equal(addr1: eth->h_dest, addr2: eth->h_source)) { |
615 | lyr3h = ipvlan_get_L3_hdr(port: ipvlan->port, skb, type: &addr_type); |
616 | if (lyr3h) { |
617 | addr = ipvlan_addr_lookup(port: ipvlan->port, lyr3h, addr_type, use_dest: true); |
618 | if (addr) { |
619 | if (ipvlan_is_private(port: ipvlan->port)) { |
620 | consume_skb(skb); |
621 | return NET_XMIT_DROP; |
622 | } |
623 | ipvlan_rcv_frame(addr, pskb: &skb, local: true); |
624 | return NET_XMIT_SUCCESS; |
625 | } |
626 | } |
627 | skb = skb_share_check(skb, GFP_ATOMIC); |
628 | if (!skb) |
629 | return NET_XMIT_DROP; |
630 | |
631 | /* Packet definitely does not belong to any of the |
632 | * virtual devices, but the dest is local. So forward |
633 | * the skb for the main-dev. At the RX side we just return |
634 | * RX_PASS for it to be processed further on the stack. |
635 | */ |
636 | dev_forward_skb(dev: ipvlan->phy_dev, skb); |
637 | return NET_XMIT_SUCCESS; |
638 | |
639 | } else if (is_multicast_ether_addr(addr: eth->h_dest)) { |
640 | skb_reset_mac_header(skb); |
641 | ipvlan_skb_crossing_ns(skb, NULL); |
642 | ipvlan_multicast_enqueue(port: ipvlan->port, skb, tx_pkt: true); |
643 | return NET_XMIT_SUCCESS; |
644 | } |
645 | |
646 | skb->dev = ipvlan->phy_dev; |
647 | return dev_queue_xmit(skb); |
648 | } |
649 | |
650 | int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) |
651 | { |
652 | struct ipvl_dev *ipvlan = netdev_priv(dev); |
653 | struct ipvl_port *port = ipvlan_port_get_rcu_bh(d: ipvlan->phy_dev); |
654 | |
655 | if (!port) |
656 | goto out; |
657 | |
658 | if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) |
659 | goto out; |
660 | |
661 | switch(port->mode) { |
662 | case IPVLAN_MODE_L2: |
663 | return ipvlan_xmit_mode_l2(skb, dev); |
664 | case IPVLAN_MODE_L3: |
665 | #ifdef CONFIG_IPVLAN_L3S |
666 | case IPVLAN_MODE_L3S: |
667 | #endif |
668 | return ipvlan_xmit_mode_l3(skb, dev); |
669 | } |
670 | |
671 | /* Should not reach here */ |
672 | WARN_ONCE(true, "%s called for mode = [%x]\n" , __func__, port->mode); |
673 | out: |
674 | kfree_skb(skb); |
675 | return NET_XMIT_DROP; |
676 | } |
677 | |
678 | static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) |
679 | { |
680 | struct ethhdr *eth = eth_hdr(skb); |
681 | struct ipvl_addr *addr; |
682 | void *lyr3h; |
683 | int addr_type; |
684 | |
685 | if (ether_addr_equal(addr1: eth->h_source, addr2: skb->dev->dev_addr)) { |
686 | lyr3h = ipvlan_get_L3_hdr(port, skb, type: &addr_type); |
687 | if (!lyr3h) |
688 | return true; |
689 | |
690 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, use_dest: false); |
691 | if (addr) |
692 | return false; |
693 | } |
694 | |
695 | return true; |
696 | } |
697 | |
698 | static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, |
699 | struct ipvl_port *port) |
700 | { |
701 | void *lyr3h; |
702 | int addr_type; |
703 | struct ipvl_addr *addr; |
704 | struct sk_buff *skb = *pskb; |
705 | rx_handler_result_t ret = RX_HANDLER_PASS; |
706 | |
707 | lyr3h = ipvlan_get_L3_hdr(port, skb, type: &addr_type); |
708 | if (!lyr3h) |
709 | goto out; |
710 | |
711 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, use_dest: true); |
712 | if (addr) |
713 | ret = ipvlan_rcv_frame(addr, pskb, local: false); |
714 | |
715 | out: |
716 | return ret; |
717 | } |
718 | |
719 | static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, |
720 | struct ipvl_port *port) |
721 | { |
722 | struct sk_buff *skb = *pskb; |
723 | struct ethhdr *eth = eth_hdr(skb); |
724 | rx_handler_result_t ret = RX_HANDLER_PASS; |
725 | |
726 | if (is_multicast_ether_addr(addr: eth->h_dest)) { |
727 | if (ipvlan_external_frame(skb, port)) { |
728 | struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); |
729 | |
730 | /* External frames are queued for device local |
731 | * distribution, but a copy is given to master |
732 | * straight away to avoid sending duplicates later |
733 | * when work-queue processes this frame. This is |
734 | * achieved by returning RX_HANDLER_PASS. |
735 | */ |
736 | if (nskb) { |
737 | ipvlan_skb_crossing_ns(skb: nskb, NULL); |
738 | ipvlan_multicast_enqueue(port, skb: nskb, tx_pkt: false); |
739 | } |
740 | } |
741 | } else { |
742 | /* Perform like l3 mode for non-multicast packet */ |
743 | ret = ipvlan_handle_mode_l3(pskb, port); |
744 | } |
745 | |
746 | return ret; |
747 | } |
748 | |
749 | rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) |
750 | { |
751 | struct sk_buff *skb = *pskb; |
752 | struct ipvl_port *port = ipvlan_port_get_rcu(d: skb->dev); |
753 | |
754 | if (!port) |
755 | return RX_HANDLER_PASS; |
756 | |
757 | switch (port->mode) { |
758 | case IPVLAN_MODE_L2: |
759 | return ipvlan_handle_mode_l2(pskb, port); |
760 | case IPVLAN_MODE_L3: |
761 | return ipvlan_handle_mode_l3(pskb, port); |
762 | #ifdef CONFIG_IPVLAN_L3S |
763 | case IPVLAN_MODE_L3S: |
764 | return RX_HANDLER_PASS; |
765 | #endif |
766 | } |
767 | |
768 | /* Should not reach here */ |
769 | WARN_ONCE(true, "%s called for mode = [%x]\n" , __func__, port->mode); |
770 | kfree_skb(skb); |
771 | return RX_HANDLER_CONSUMED; |
772 | } |
773 | |