1// SPDX-License-Identifier: GPL-2.0-only
2/****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2023, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include "tc_encap_actions.h"
12#include "tc.h"
13#include "mae.h"
14#include <net/vxlan.h>
15#include <net/geneve.h>
16#include <net/netevent.h>
17#include <net/arp.h>
18
19static const struct rhashtable_params efx_neigh_ht_params = {
20 .key_len = offsetof(struct efx_neigh_binder, ha),
21 .key_offset = 0,
22 .head_offset = offsetof(struct efx_neigh_binder, linkage),
23};
24
25static const struct rhashtable_params efx_tc_encap_ht_params = {
26 .key_len = offsetofend(struct efx_tc_encap_action, key),
27 .key_offset = 0,
28 .head_offset = offsetof(struct efx_tc_encap_action, linkage),
29};
30
31static void efx_tc_encap_free(void *ptr, void *__unused)
32{
33 struct efx_tc_encap_action *enc = ptr;
34
35 WARN_ON(refcount_read(&enc->ref));
36 kfree(objp: enc);
37}
38
39static void efx_neigh_free(void *ptr, void *__unused)
40{
41 struct efx_neigh_binder *neigh = ptr;
42
43 WARN_ON(refcount_read(&neigh->ref));
44 WARN_ON(!list_empty(&neigh->users));
45 put_net_track(net: neigh->net, tracker: &neigh->ns_tracker);
46 netdev_put(dev: neigh->egdev, tracker: &neigh->dev_tracker);
47 kfree(objp: neigh);
48}
49
50int efx_tc_init_encap_actions(struct efx_nic *efx)
51{
52 int rc;
53
54 rc = rhashtable_init(ht: &efx->tc->neigh_ht, params: &efx_neigh_ht_params);
55 if (rc < 0)
56 goto fail_neigh_ht;
57 rc = rhashtable_init(ht: &efx->tc->encap_ht, params: &efx_tc_encap_ht_params);
58 if (rc < 0)
59 goto fail_encap_ht;
60 return 0;
61fail_encap_ht:
62 rhashtable_destroy(ht: &efx->tc->neigh_ht);
63fail_neigh_ht:
64 return rc;
65}
66
67/* Only call this in init failure teardown.
68 * Normal exit should fini instead as there may be entries in the table.
69 */
70void efx_tc_destroy_encap_actions(struct efx_nic *efx)
71{
72 rhashtable_destroy(ht: &efx->tc->encap_ht);
73 rhashtable_destroy(ht: &efx->tc->neigh_ht);
74}
75
76void efx_tc_fini_encap_actions(struct efx_nic *efx)
77{
78 rhashtable_free_and_destroy(ht: &efx->tc->encap_ht, free_fn: efx_tc_encap_free, NULL);
79 rhashtable_free_and_destroy(ht: &efx->tc->neigh_ht, free_fn: efx_neigh_free, NULL);
80}
81
82static void efx_neigh_update(struct work_struct *work);
83
84static int efx_bind_neigh(struct efx_nic *efx,
85 struct efx_tc_encap_action *encap, struct net *net,
86 struct netlink_ext_ack *extack)
87{
88 struct efx_neigh_binder *neigh, *old;
89 struct flowi6 flow6 = {};
90 struct flowi4 flow4 = {};
91 int rc;
92
93 /* GCC stupidly thinks that only values explicitly listed in the enum
94 * definition can _possibly_ be sensible case values, so without this
95 * cast it complains about the IPv6 versions.
96 */
97 switch ((int)encap->type) {
98 case EFX_ENCAP_TYPE_VXLAN:
99 case EFX_ENCAP_TYPE_GENEVE:
100 flow4.flowi4_proto = IPPROTO_UDP;
101 flow4.fl4_dport = encap->key.tp_dst;
102 flow4.flowi4_tos = encap->key.tos;
103 flow4.daddr = encap->key.u.ipv4.dst;
104 flow4.saddr = encap->key.u.ipv4.src;
105 break;
106 case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
107 case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
108 flow6.flowi6_proto = IPPROTO_UDP;
109 flow6.fl6_dport = encap->key.tp_dst;
110 flow6.flowlabel = ip6_make_flowinfo(tclass: encap->key.tos,
111 flowlabel: encap->key.label);
112 flow6.daddr = encap->key.u.ipv6.dst;
113 flow6.saddr = encap->key.u.ipv6.src;
114 break;
115 default:
116 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
117 (int)encap->type);
118 return -EOPNOTSUPP;
119 }
120
121 neigh = kzalloc(size: sizeof(*neigh), GFP_KERNEL_ACCOUNT);
122 if (!neigh)
123 return -ENOMEM;
124 neigh->net = get_net_track(net, tracker: &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
125 neigh->dst_ip = flow4.daddr;
126 neigh->dst_ip6 = flow6.daddr;
127
128 old = rhashtable_lookup_get_insert_fast(ht: &efx->tc->neigh_ht,
129 obj: &neigh->linkage,
130 params: efx_neigh_ht_params);
131 if (old) {
132 /* don't need our new entry */
133 put_net_track(net: neigh->net, tracker: &neigh->ns_tracker);
134 kfree(objp: neigh);
135 if (IS_ERR(ptr: old)) /* oh dear, it's actually an error */
136 return PTR_ERR(ptr: old);
137 if (!refcount_inc_not_zero(r: &old->ref))
138 return -EAGAIN;
139 /* existing entry found, ref taken */
140 neigh = old;
141 } else {
142 /* New entry. We need to initiate a lookup */
143 struct neighbour *n;
144 struct rtable *rt;
145
146 if (encap->type & EFX_ENCAP_FLAG_IPV6) {
147#if IS_ENABLED(CONFIG_IPV6)
148 struct dst_entry *dst;
149
150 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
151 NULL);
152 rc = PTR_ERR_OR_ZERO(ptr: dst);
153 if (rc) {
154 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
155 goto out_free;
156 }
157 neigh->egdev = dst->dev;
158 netdev_hold(dev: neigh->egdev, tracker: &neigh->dev_tracker,
159 GFP_KERNEL_ACCOUNT);
160 neigh->ttl = ip6_dst_hoplimit(dst);
161 n = dst_neigh_lookup(dst, daddr: &flow6.daddr);
162 dst_release(dst);
163#else
164 /* We shouldn't ever get here, because if IPv6 isn't
165 * enabled how did someone create an IPv6 tunnel_key?
166 */
167 rc = -EOPNOTSUPP;
168 NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
169 goto out_free;
170#endif
171 } else {
172 rt = ip_route_output_key(net, flp: &flow4);
173 if (IS_ERR_OR_NULL(ptr: rt)) {
174 rc = PTR_ERR_OR_ZERO(ptr: rt);
175 if (!rc)
176 rc = -EIO;
177 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
178 goto out_free;
179 }
180 neigh->egdev = rt->dst.dev;
181 netdev_hold(dev: neigh->egdev, tracker: &neigh->dev_tracker,
182 GFP_KERNEL_ACCOUNT);
183 neigh->ttl = ip4_dst_hoplimit(dst: &rt->dst);
184 n = dst_neigh_lookup(dst: &rt->dst, daddr: &flow4.daddr);
185 ip_rt_put(rt);
186 }
187 if (!n) {
188 rc = -ENETUNREACH;
189 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
190 netdev_put(dev: neigh->egdev, tracker: &neigh->dev_tracker);
191 goto out_free;
192 }
193 refcount_set(r: &neigh->ref, n: 1);
194 INIT_LIST_HEAD(list: &neigh->users);
195 read_lock_bh(&n->lock);
196 ether_addr_copy(dst: neigh->ha, src: n->ha);
197 neigh->n_valid = n->nud_state & NUD_VALID;
198 read_unlock_bh(&n->lock);
199 rwlock_init(&neigh->lock);
200 INIT_WORK(&neigh->work, efx_neigh_update);
201 neigh->efx = efx;
202 neigh->used = jiffies;
203 if (!neigh->n_valid)
204 /* Prod ARP to find us a neighbour */
205 neigh_event_send(neigh: n, NULL);
206 neigh_release(neigh: n);
207 }
208 /* Add us to this neigh */
209 encap->neigh = neigh;
210 list_add_tail(new: &encap->list, head: &neigh->users);
211 return 0;
212
213out_free:
214 /* cleanup common to several error paths */
215 rhashtable_remove_fast(ht: &efx->tc->neigh_ht, obj: &neigh->linkage,
216 params: efx_neigh_ht_params);
217 synchronize_rcu();
218 put_net_track(net, tracker: &neigh->ns_tracker);
219 kfree(objp: neigh);
220 return rc;
221}
222
223static void efx_free_neigh(struct efx_neigh_binder *neigh)
224{
225 struct efx_nic *efx = neigh->efx;
226
227 rhashtable_remove_fast(ht: &efx->tc->neigh_ht, obj: &neigh->linkage,
228 params: efx_neigh_ht_params);
229 synchronize_rcu();
230 netdev_put(dev: neigh->egdev, tracker: &neigh->dev_tracker);
231 put_net_track(net: neigh->net, tracker: &neigh->ns_tracker);
232 kfree(objp: neigh);
233}
234
235static void efx_release_neigh(struct efx_nic *efx,
236 struct efx_tc_encap_action *encap)
237{
238 struct efx_neigh_binder *neigh = encap->neigh;
239
240 if (!neigh)
241 return;
242 list_del(entry: &encap->list);
243 encap->neigh = NULL;
244 if (!refcount_dec_and_test(r: &neigh->ref))
245 return; /* still in use */
246 efx_free_neigh(neigh);
247}
248
249static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
250{
251 struct efx_neigh_binder *neigh = encap->neigh;
252 struct ethhdr *eth;
253
254 encap->encap_hdr_len = sizeof(*eth);
255 eth = (struct ethhdr *)encap->encap_hdr;
256
257 if (encap->neigh->n_valid)
258 ether_addr_copy(dst: eth->h_dest, src: neigh->ha);
259 else
260 eth_zero_addr(addr: eth->h_dest);
261 ether_addr_copy(dst: eth->h_source, src: neigh->egdev->dev_addr);
262 eth->h_proto = htons(proto);
263}
264
265static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
266{
267 struct efx_neigh_binder *neigh = encap->neigh;
268 struct ip_tunnel_key *key = &encap->key;
269 struct iphdr *ip;
270
271 ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
272 encap->encap_hdr_len += sizeof(*ip);
273
274 ip->daddr = key->u.ipv4.dst;
275 ip->saddr = key->u.ipv4.src;
276 ip->ttl = neigh->ttl;
277 ip->protocol = ipproto;
278 ip->version = 0x4;
279 ip->ihl = 0x5;
280 ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
281 ip_send_check(ip);
282}
283
284#ifdef CONFIG_IPV6
285static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
286{
287 struct efx_neigh_binder *neigh = encap->neigh;
288 struct ip_tunnel_key *key = &encap->key;
289 struct ipv6hdr *ip;
290
291 ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
292 encap->encap_hdr_len += sizeof(*ip);
293
294 ip6_flow_hdr(hdr: ip, tclass: key->tos, flowlabel: key->label);
295 ip->daddr = key->u.ipv6.dst;
296 ip->saddr = key->u.ipv6.src;
297 ip->hop_limit = neigh->ttl;
298 ip->nexthdr = ipproto;
299 ip->version = 0x6;
300 ip->payload_len = cpu_to_be16(len);
301}
302#endif
303
304static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
305{
306 struct ip_tunnel_key *key = &encap->key;
307 struct udphdr *udp;
308
309 udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
310 encap->encap_hdr_len += sizeof(*udp);
311
312 udp->dest = key->tp_dst;
313 udp->len = cpu_to_be16(sizeof(*udp) + len);
314}
315
316static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
317{
318 struct ip_tunnel_key *key = &encap->key;
319 struct vxlanhdr *vxlan;
320
321 vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
322 encap->encap_hdr_len += sizeof(*vxlan);
323
324 vxlan->vx_flags = VXLAN_HF_VNI;
325 vxlan->vx_vni = vxlan_vni_field(vni: tunnel_id_to_key32(tun_id: key->tun_id));
326}
327
328static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
329{
330 struct ip_tunnel_key *key = &encap->key;
331 struct genevehdr *geneve;
332 u32 vni;
333
334 geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
335 encap->encap_hdr_len += sizeof(*geneve);
336
337 geneve->proto_type = htons(ETH_P_TEB);
338 /* convert tun_id to host-endian so we can use host arithmetic to
339 * extract individual bytes.
340 */
341 vni = ntohl(tunnel_id_to_key32(key->tun_id));
342 geneve->vni[0] = vni >> 16;
343 geneve->vni[1] = vni >> 8;
344 geneve->vni[2] = vni;
345}
346
347#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
348#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
349static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
350{
351 BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
352 efx_gen_tun_header_eth(encap, ETH_P_IP);
353 efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
354 efx_gen_tun_header_udp(encap, len: sizeof(struct vxlanhdr));
355 efx_gen_tun_header_vxlan(encap);
356}
357
358#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr))
359#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
360static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
361{
362 BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
363 efx_gen_tun_header_eth(encap, ETH_P_IP);
364 efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
365 efx_gen_tun_header_udp(encap, len: sizeof(struct genevehdr));
366 efx_gen_tun_header_geneve(encap);
367}
368
369#ifdef CONFIG_IPV6
370#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
371static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
372{
373 BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
374 efx_gen_tun_header_eth(encap, ETH_P_IPV6);
375 efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
376 efx_gen_tun_header_udp(encap, len: sizeof(struct vxlanhdr));
377 efx_gen_tun_header_vxlan(encap);
378}
379
380#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
381static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
382{
383 BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
384 efx_gen_tun_header_eth(encap, ETH_P_IPV6);
385 efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
386 efx_gen_tun_header_udp(encap, len: sizeof(struct genevehdr));
387 efx_gen_tun_header_geneve(encap);
388}
389#endif
390
391static void efx_gen_encap_header(struct efx_nic *efx,
392 struct efx_tc_encap_action *encap)
393{
394 encap->n_valid = encap->neigh->n_valid;
395
396 /* GCC stupidly thinks that only values explicitly listed in the enum
397 * definition can _possibly_ be sensible case values, so without this
398 * cast it complains about the IPv6 versions.
399 */
400 switch ((int)encap->type) {
401 case EFX_ENCAP_TYPE_VXLAN:
402 efx_gen_vxlan_header_ipv4(encap);
403 break;
404 case EFX_ENCAP_TYPE_GENEVE:
405 efx_gen_geneve_header_ipv4(encap);
406 break;
407#ifdef CONFIG_IPV6
408 case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
409 efx_gen_vxlan_header_ipv6(encap);
410 break;
411 case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
412 efx_gen_geneve_header_ipv6(encap);
413 break;
414#endif
415 default:
416 /* unhandled encap type, can't happen */
417 if (net_ratelimit())
418 netif_err(efx, drv, efx->net_dev,
419 "Bogus encap type %d, can't generate\n",
420 encap->type);
421
422 /* Use fallback action. */
423 encap->n_valid = false;
424 break;
425 }
426}
427
428static void efx_tc_update_encap(struct efx_nic *efx,
429 struct efx_tc_encap_action *encap)
430{
431 struct efx_tc_action_set_list *acts, *fallback;
432 struct efx_tc_flow_rule *rule;
433 struct efx_tc_action_set *act;
434 int rc;
435
436 if (encap->n_valid) {
437 /* Make sure no rules are using this encap while we change it */
438 list_for_each_entry(act, &encap->users, encap_user) {
439 acts = act->user;
440 if (WARN_ON(!acts)) /* can't happen */
441 continue;
442 rule = container_of(acts, struct efx_tc_flow_rule, acts);
443 if (rule->fallback)
444 fallback = rule->fallback;
445 else /* fallback fallback: deliver to PF */
446 fallback = &efx->tc->facts.pf;
447 rc = efx_mae_update_rule(efx, acts_id: fallback->fw_id,
448 id: rule->fw_id);
449 if (rc)
450 netif_err(efx, drv, efx->net_dev,
451 "Failed to update (f) rule %08x rc %d\n",
452 rule->fw_id, rc);
453 else
454 netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
455 rule->fw_id);
456 }
457 }
458
459 /* Make sure we don't leak arbitrary bytes on the wire;
460 * set an all-0s ethernet header. A successful call to
461 * efx_gen_encap_header() will overwrite this.
462 */
463 memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
464 encap->encap_hdr_len = ETH_HLEN;
465
466 if (encap->neigh) {
467 read_lock_bh(&encap->neigh->lock);
468 efx_gen_encap_header(efx, encap);
469 read_unlock_bh(&encap->neigh->lock);
470 } else {
471 encap->n_valid = false;
472 }
473
474 rc = efx_mae_update_encap_md(efx, encap);
475 if (rc) {
476 netif_err(efx, drv, efx->net_dev,
477 "Failed to update encap hdr %08x rc %d\n",
478 encap->fw_id, rc);
479 return;
480 }
481 netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
482 encap->fw_id);
483 if (!encap->n_valid)
484 return;
485 /* Update rule users: use the action if they are now ready */
486 list_for_each_entry(act, &encap->users, encap_user) {
487 acts = act->user;
488 if (WARN_ON(!acts)) /* can't happen */
489 continue;
490 rule = container_of(acts, struct efx_tc_flow_rule, acts);
491 if (!efx_tc_check_ready(efx, rule))
492 continue;
493 rc = efx_mae_update_rule(efx, acts_id: acts->fw_id, id: rule->fw_id);
494 if (rc)
495 netif_err(efx, drv, efx->net_dev,
496 "Failed to update rule %08x rc %d\n",
497 rule->fw_id, rc);
498 else
499 netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
500 rule->fw_id);
501 }
502}
503
504static void efx_neigh_update(struct work_struct *work)
505{
506 struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
507 struct efx_tc_encap_action *encap;
508 struct efx_nic *efx = neigh->efx;
509
510 mutex_lock(&efx->tc->mutex);
511 list_for_each_entry(encap, &neigh->users, list)
512 efx_tc_update_encap(efx: neigh->efx, encap);
513 /* release ref taken in efx_neigh_event() */
514 if (refcount_dec_and_test(r: &neigh->ref))
515 efx_free_neigh(neigh);
516 mutex_unlock(lock: &efx->tc->mutex);
517}
518
519static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
520{
521 struct efx_neigh_binder keys = {NULL}, *neigh;
522 bool n_valid, ipv6 = false;
523 char ha[ETH_ALEN];
524 size_t keysize;
525
526 if (WARN_ON(!efx->tc))
527 return NOTIFY_DONE;
528
529 if (n->tbl == &arp_tbl) {
530 keysize = sizeof(keys.dst_ip);
531#if IS_ENABLED(CONFIG_IPV6)
532 } else if (n->tbl == ipv6_stub->nd_tbl) {
533 ipv6 = true;
534 keysize = sizeof(keys.dst_ip6);
535#endif
536 } else {
537 return NOTIFY_DONE;
538 }
539 if (!n->parms) {
540 netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
541 return NOTIFY_DONE;
542 }
543 keys.net = read_pnet(pnet: &n->parms->net);
544 if (n->tbl->key_len != keysize) {
545 netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
546 n->tbl->key_len);
547 return NOTIFY_DONE;
548 }
549 read_lock_bh(&n->lock); /* Get a consistent view */
550 memcpy(ha, n->ha, ETH_ALEN);
551 n_valid = (n->nud_state & NUD_VALID) && !n->dead;
552 read_unlock_bh(&n->lock);
553 if (ipv6)
554 memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
555 else
556 memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
557 rcu_read_lock();
558 neigh = rhashtable_lookup_fast(ht: &efx->tc->neigh_ht, key: &keys,
559 params: efx_neigh_ht_params);
560 if (!neigh || neigh->dying)
561 /* We're not interested in this neighbour */
562 goto done;
563 write_lock_bh(&neigh->lock);
564 if (n_valid == neigh->n_valid && !memcmp(p: ha, q: neigh->ha, ETH_ALEN)) {
565 write_unlock_bh(&neigh->lock);
566 /* Nothing has changed; no work to do */
567 goto done;
568 }
569 neigh->n_valid = n_valid;
570 memcpy(neigh->ha, ha, ETH_ALEN);
571 write_unlock_bh(&neigh->lock);
572 if (refcount_inc_not_zero(r: &neigh->ref)) {
573 rcu_read_unlock();
574 if (!schedule_work(work: &neigh->work))
575 /* failed to schedule, release the ref we just took */
576 if (refcount_dec_and_test(r: &neigh->ref))
577 efx_free_neigh(neigh);
578 } else {
579done:
580 rcu_read_unlock();
581 }
582 return NOTIFY_DONE;
583}
584
585bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
586{
587 struct efx_tc_action_set *act;
588
589 /* Encap actions can only be offloaded if they have valid
590 * neighbour info for the outer Ethernet header.
591 */
592 list_for_each_entry(act, &rule->acts.list, list)
593 if (act->encap_md && !act->encap_md->n_valid)
594 return false;
595 return true;
596}
597
598struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
599 struct efx_nic *efx, const struct ip_tunnel_info *info,
600 struct net_device *egdev, struct netlink_ext_ack *extack)
601{
602 enum efx_encap_type type = efx_tc_indr_netdev_type(net_dev: egdev);
603 struct efx_tc_encap_action *encap, *old;
604 struct efx_rep *to_efv;
605 s64 rc;
606
607 if (type == EFX_ENCAP_TYPE_NONE) {
608 /* dest is not an encap device */
609 NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
610 return ERR_PTR(error: -EOPNOTSUPP);
611 }
612 rc = efx_mae_check_encap_type_supported(efx, typ: type);
613 if (rc < 0) {
614 NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
615 return ERR_PTR(error: rc);
616 }
617 /* No support yet for Geneve options */
618 if (info->options_len) {
619 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
620 return ERR_PTR(error: -EOPNOTSUPP);
621 }
622 switch (info->mode) {
623 case IP_TUNNEL_INFO_TX:
624 break;
625 case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
626 type |= EFX_ENCAP_FLAG_IPV6;
627 break;
628 default:
629 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
630 info->mode);
631 return ERR_PTR(error: -EOPNOTSUPP);
632 }
633 encap = kzalloc(size: sizeof(*encap), GFP_KERNEL_ACCOUNT);
634 if (!encap)
635 return ERR_PTR(error: -ENOMEM);
636 encap->type = type;
637 encap->key = info->key;
638 INIT_LIST_HEAD(list: &encap->users);
639 old = rhashtable_lookup_get_insert_fast(ht: &efx->tc->encap_ht,
640 obj: &encap->linkage,
641 params: efx_tc_encap_ht_params);
642 if (old) {
643 /* don't need our new entry */
644 kfree(objp: encap);
645 if (IS_ERR(ptr: old)) /* oh dear, it's actually an error */
646 return ERR_CAST(ptr: old);
647 if (!refcount_inc_not_zero(r: &old->ref))
648 return ERR_PTR(error: -EAGAIN);
649 /* existing entry found, ref taken */
650 return old;
651 }
652
653 rc = efx_bind_neigh(efx, encap, net: dev_net(dev: egdev), extack);
654 if (rc < 0)
655 goto out_remove;
656 to_efv = efx_tc_flower_lookup_efv(efx, dev: encap->neigh->egdev);
657 if (IS_ERR(ptr: to_efv)) {
658 /* neigh->egdev isn't ours */
659 NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
660 rc = PTR_ERR(ptr: to_efv);
661 goto out_release;
662 }
663 rc = efx_tc_flower_external_mport(efx, efv: to_efv);
664 if (rc < 0) {
665 NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
666 goto out_release;
667 }
668 encap->dest_mport = rc;
669 read_lock_bh(&encap->neigh->lock);
670 efx_gen_encap_header(efx, encap);
671 read_unlock_bh(&encap->neigh->lock);
672
673 rc = efx_mae_allocate_encap_md(efx, encap);
674 if (rc < 0) {
675 NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
676 goto out_release;
677 }
678
679 /* ref and return */
680 refcount_set(r: &encap->ref, n: 1);
681 return encap;
682out_release:
683 efx_release_neigh(efx, encap);
684out_remove:
685 rhashtable_remove_fast(ht: &efx->tc->encap_ht, obj: &encap->linkage,
686 params: efx_tc_encap_ht_params);
687 kfree(objp: encap);
688 return ERR_PTR(error: rc);
689}
690
691void efx_tc_flower_release_encap_md(struct efx_nic *efx,
692 struct efx_tc_encap_action *encap)
693{
694 if (!refcount_dec_and_test(r: &encap->ref))
695 return; /* still in use */
696 efx_release_neigh(efx, encap);
697 rhashtable_remove_fast(ht: &efx->tc->encap_ht, obj: &encap->linkage,
698 params: efx_tc_encap_ht_params);
699 efx_mae_free_encap_md(efx, encap);
700 kfree(objp: encap);
701}
702
703static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
704{
705 struct efx_tc_encap_action *encap, *next;
706
707 list_for_each_entry_safe(encap, next, &neigh->users, list) {
708 /* Should cause neigh usage count to fall to zero, freeing it */
709 efx_release_neigh(efx, encap);
710 /* The encap has lost its neigh, so it's now unready */
711 efx_tc_update_encap(efx, encap);
712 }
713}
714
715void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
716{
717 struct efx_neigh_binder *neigh;
718 struct rhashtable_iter walk;
719
720 mutex_lock(&efx->tc->mutex);
721 rhashtable_walk_enter(ht: &efx->tc->neigh_ht, iter: &walk);
722 rhashtable_walk_start(iter: &walk);
723 while ((neigh = rhashtable_walk_next(iter: &walk)) != NULL) {
724 if (IS_ERR(ptr: neigh))
725 continue;
726 if (neigh->egdev != net_dev)
727 continue;
728 neigh->dying = true;
729 rhashtable_walk_stop(iter: &walk);
730 synchronize_rcu(); /* Make sure any updates see dying flag */
731 efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
732 rhashtable_walk_start(iter: &walk);
733 }
734 rhashtable_walk_stop(iter: &walk);
735 rhashtable_walk_exit(iter: &walk);
736 mutex_unlock(lock: &efx->tc->mutex);
737}
738
739int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
740 void *ptr)
741{
742 if (efx->type->is_vf)
743 return NOTIFY_DONE;
744
745 switch (event) {
746 case NETEVENT_NEIGH_UPDATE:
747 return efx_neigh_event(efx, n: ptr);
748 default:
749 return NOTIFY_DONE;
750 }
751}
752

source code of linux/drivers/net/ethernet/sfc/tc_encap_actions.c