1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2021 Mellanox Technologies. */
3
4#include <net/fib_notifier.h>
5#include <net/nexthop.h>
6#include <net/ip_tunnels.h>
7#include "tc_tun_encap.h"
8#include "en_tc.h"
9#include "tc_tun.h"
10#include "rep/tc.h"
11#include "diag/en_tc_tracepoint.h"
12
13enum {
14 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
15};
16
17static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 struct mlx5_flow_attr *attr,
19 struct mlx5e_encap_entry *e,
20 int out_index)
21{
22 struct net_device *route_dev;
23 int err = 0;
24
25 route_dev = dev_get_by_index(net: dev_net(dev: e->out_dev), ifindex: e->route_dev_ifindex);
26
27 if (!route_dev || !netif_is_ovs_master(dev: route_dev) ||
28 attr->parse_attr->filter_dev == e->out_dev)
29 goto out;
30
31 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, ifindex: e->route_dev_ifindex,
32 type: MLX5E_TC_INT_PORT_EGRESS,
33 action: &attr->action, out_index);
34
35out:
36 if (route_dev)
37 dev_put(dev: route_dev);
38
39 return err;
40}
41
42struct mlx5e_route_key {
43 int ip_version;
44 union {
45 __be32 v4;
46 struct in6_addr v6;
47 } endpoint_ip;
48};
49
50struct mlx5e_route_entry {
51 struct mlx5e_route_key key;
52 struct list_head encap_entries;
53 struct list_head decap_flows;
54 u32 flags;
55 struct hlist_node hlist;
56 refcount_t refcnt;
57 int tunnel_dev_index;
58 struct rcu_head rcu;
59};
60
61struct mlx5e_tc_tun_encap {
62 struct mlx5e_priv *priv;
63 struct notifier_block fib_nb;
64 spinlock_t route_lock; /* protects route_tbl */
65 unsigned long route_tbl_last_update;
66 DECLARE_HASHTABLE(route_tbl, 8);
67};
68
69static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70{
71 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
72}
73
74int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
75 struct mlx5_flow_spec *spec)
76{
77 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
78 struct mlx5_rx_tun_attr *tun_attr;
79 void *daddr, *saddr;
80 u8 ip_version;
81
82 tun_attr = kvzalloc(size: sizeof(*tun_attr), GFP_KERNEL);
83 if (!tun_attr)
84 return -ENOMEM;
85
86 esw_attr->rx_tun_attr = tun_attr;
87 ip_version = mlx5e_tc_get_ip_version(spec, outer: true);
88
89 if (ip_version == 4) {
90 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
92 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
93 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
94 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
95 tun_attr->src_ip.v4 = *(__be32 *)saddr;
96 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
97 return 0;
98 }
99#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
100 else if (ip_version == 6) {
101 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102
103 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
104 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
105 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
106 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
107 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
108 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
109 if (ipv6_addr_any(a: &tun_attr->dst_ip.v6) ||
110 ipv6_addr_any(a: &tun_attr->src_ip.v6))
111 return 0;
112 }
113#endif
114 /* Only set the flag if both src and dst ip addresses exist. They are
115 * required to establish routing.
116 */
117 flow_flag_set(flow, TUN_RX);
118 flow->attr->tun_ip_version = ip_version;
119 return 0;
120}
121
122static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123{
124 bool all_flow_encaps_valid = true;
125 int i;
126
127 /* Flow can be associated with multiple encap entries.
128 * Before offloading the flow verify that all of them have
129 * a valid neighbour.
130 */
131 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
132 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133 continue;
134 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
135 all_flow_encaps_valid = false;
136 break;
137 }
138 }
139
140 return all_flow_encaps_valid;
141}
142
143void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
144 struct mlx5e_encap_entry *e,
145 struct list_head *flow_list)
146{
147 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
148 struct mlx5_pkt_reformat_params reformat_params;
149 struct mlx5_esw_flow_attr *esw_attr;
150 struct mlx5_flow_handle *rule;
151 struct mlx5_flow_attr *attr;
152 struct mlx5_flow_spec *spec;
153 struct mlx5e_tc_flow *flow;
154 int err;
155
156 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
157 return;
158
159 memset(&reformat_params, 0, sizeof(reformat_params));
160 reformat_params.type = e->reformat_type;
161 reformat_params.size = e->encap_size;
162 reformat_params.data = e->encap_header;
163 e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev,
164 params: &reformat_params,
165 ns_type: MLX5_FLOW_NAMESPACE_FDB);
166 if (IS_ERR(ptr: e->pkt_reformat)) {
167 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
168 PTR_ERR(e->pkt_reformat));
169 return;
170 }
171 e->flags |= MLX5_ENCAP_ENTRY_VALID;
172 mlx5e_rep_queue_neigh_stats_work(priv);
173
174 list_for_each_entry(flow, flow_list, tmp_list) {
175 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
176 continue;
177
178 spec = &flow->attr->parse_attr->spec;
179
180 attr = mlx5e_tc_get_encap_attr(flow);
181 esw_attr = attr->esw_attr;
182 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
183 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184
185 /* Do not offload flows with unresolved neighbors */
186 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
187 continue;
188
189 err = mlx5e_tc_offload_flow_post_acts(flow);
190 if (err) {
191 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
192 err);
193 continue;
194 }
195
196 /* update from slow path rule to encap rule */
197 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr: flow->attr);
198 if (IS_ERR(ptr: rule)) {
199 mlx5e_tc_unoffload_flow_post_acts(flow);
200 err = PTR_ERR(ptr: rule);
201 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
202 err);
203 continue;
204 }
205
206 mlx5e_tc_unoffload_from_slow_path(esw, flow);
207 flow->rule[0] = rule;
208 /* was unset when slow path rule removed */
209 flow_flag_set(flow, OFFLOADED);
210 }
211}
212
213void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
214 struct mlx5e_encap_entry *e,
215 struct list_head *flow_list)
216{
217 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
218 struct mlx5_esw_flow_attr *esw_attr;
219 struct mlx5_flow_handle *rule;
220 struct mlx5_flow_attr *attr;
221 struct mlx5_flow_spec *spec;
222 struct mlx5e_tc_flow *flow;
223 int err;
224
225 list_for_each_entry(flow, flow_list, tmp_list) {
226 if (!mlx5e_is_offloaded_flow(flow))
227 continue;
228
229 attr = mlx5e_tc_get_encap_attr(flow);
230 esw_attr = attr->esw_attr;
231 /* mark the flow's encap dest as non-valid */
232 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
233 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234
235 /* Clear pkt_reformat before checking slow path flag. Because
236 * in next iteration, the same flow is already set slow path
237 * flag, but still need to clear the pkt_reformat.
238 */
239 if (flow_flag_test(flow, SLOW))
240 continue;
241
242 /* update from encap rule to slow path rule */
243 spec = &flow->attr->parse_attr->spec;
244 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
245
246 if (IS_ERR(ptr: rule)) {
247 err = PTR_ERR(ptr: rule);
248 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
249 err);
250 continue;
251 }
252
253 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr);
254 mlx5e_tc_unoffload_flow_post_acts(flow);
255 flow->rule[0] = rule;
256 /* was unset when fast path rule removed */
257 flow_flag_set(flow, OFFLOADED);
258 }
259
260 /* we know that the encap is valid */
261 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
262 mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat);
263 e->pkt_reformat = NULL;
264}
265
266static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
267 struct list_head *flow_list,
268 int index)
269{
270 if (IS_ERR(ptr: mlx5e_flow_get(flow))) {
271 /* Flow is being deleted concurrently. Wait for it to be
272 * unoffloaded from hardware, otherwise deleting encap will
273 * fail.
274 */
275 wait_for_completion(&flow->del_hw_done);
276 return;
277 }
278 wait_for_completion(&flow->init_done);
279
280 flow->tmp_entry_index = index;
281 list_add(new: &flow->tmp_list, head: flow_list);
282}
283
284/* Takes reference to all flows attached to encap and adds the flows to
285 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286 */
287void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288{
289 struct encap_flow_item *efi;
290 struct mlx5e_tc_flow *flow;
291
292 list_for_each_entry(efi, &e->flows, list) {
293 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
294 mlx5e_take_tmp_flow(flow, flow_list, index: efi->index);
295 }
296}
297
298/* Takes reference to all flows attached to route and adds the flows to
299 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300 */
301static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
302 struct list_head *flow_list)
303{
304 struct mlx5e_tc_flow *flow;
305
306 list_for_each_entry(flow, &r->decap_flows, decap_routes)
307 mlx5e_take_tmp_flow(flow, flow_list, index: 0);
308}
309
310typedef bool (match_cb)(struct mlx5e_encap_entry *);
311
312static struct mlx5e_encap_entry *
313mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
314 struct mlx5e_encap_entry *e,
315 match_cb match)
316{
317 struct mlx5e_encap_entry *next = NULL;
318
319retry:
320 rcu_read_lock();
321
322 /* find encap with non-zero reference counter value */
323 for (next = e ?
324 list_next_or_null_rcu(&nhe->encap_list,
325 &e->encap_list,
326 struct mlx5e_encap_entry,
327 encap_list) :
328 list_first_or_null_rcu(&nhe->encap_list,
329 struct mlx5e_encap_entry,
330 encap_list);
331 next;
332 next = list_next_or_null_rcu(&nhe->encap_list,
333 &next->encap_list,
334 struct mlx5e_encap_entry,
335 encap_list))
336 if (mlx5e_encap_take(e: next))
337 break;
338
339 rcu_read_unlock();
340
341 /* release starting encap */
342 if (e)
343 mlx5e_encap_put(priv: netdev_priv(dev: e->out_dev), e);
344 if (!next)
345 return next;
346
347 /* wait for encap to be fully initialized */
348 wait_for_completion(&next->res_ready);
349 /* continue searching if encap entry is not in valid state after completion */
350 if (!match(next)) {
351 e = next;
352 goto retry;
353 }
354
355 return next;
356}
357
358static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359{
360 return e->flags & MLX5_ENCAP_ENTRY_VALID;
361}
362
363static struct mlx5e_encap_entry *
364mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
365 struct mlx5e_encap_entry *e)
366{
367 return mlx5e_get_next_matching_encap(nhe, e, match: mlx5e_encap_valid);
368}
369
370static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371{
372 return e->compl_result >= 0;
373}
374
375struct mlx5e_encap_entry *
376mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
377 struct mlx5e_encap_entry *e)
378{
379 return mlx5e_get_next_matching_encap(nhe, e, match: mlx5e_encap_initialized);
380}
381
382void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383{
384 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
385 struct mlx5e_encap_entry *e = NULL;
386 struct mlx5e_tc_flow *flow;
387 struct mlx5_fc *counter;
388 struct neigh_table *tbl;
389 bool neigh_used = false;
390 struct neighbour *n;
391 u64 lastuse;
392
393 if (m_neigh->family == AF_INET)
394 tbl = &arp_tbl;
395#if IS_ENABLED(CONFIG_IPV6)
396 else if (m_neigh->family == AF_INET6)
397 tbl = ipv6_stub->nd_tbl;
398#endif
399 else
400 return;
401
402 /* mlx5e_get_next_valid_encap() releases previous encap before returning
403 * next one.
404 */
405 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
406 struct mlx5e_priv *priv = netdev_priv(dev: e->out_dev);
407 struct encap_flow_item *efi, *tmp;
408 struct mlx5_eswitch *esw;
409 LIST_HEAD(flow_list);
410
411 esw = priv->mdev->priv.eswitch;
412 mutex_lock(&esw->offloads.encap_tbl_lock);
413 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
414 flow = container_of(efi, struct mlx5e_tc_flow,
415 encaps[efi->index]);
416 if (IS_ERR(ptr: mlx5e_flow_get(flow)))
417 continue;
418 list_add(new: &flow->tmp_list, head: &flow_list);
419
420 if (mlx5e_is_offloaded_flow(flow)) {
421 counter = mlx5e_tc_get_counter(flow);
422 lastuse = mlx5_fc_query_lastuse(counter);
423 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
424 neigh_used = true;
425 break;
426 }
427 }
428 }
429 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
430
431 mlx5e_put_flow_list(priv, flow_list: &flow_list);
432 if (neigh_used) {
433 /* release current encap before breaking the loop */
434 mlx5e_encap_put(priv, e);
435 break;
436 }
437 }
438
439 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
440
441 if (neigh_used) {
442 nhe->reported_lastuse = jiffies;
443
444 /* find the relevant neigh according to the cached device and
445 * dst ip pair
446 */
447 n = neigh_lookup(tbl, pkey: &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
448 if (!n)
449 return;
450
451 neigh_event_send(neigh: n, NULL);
452 neigh_release(neigh: n);
453 }
454}
455
456static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457{
458 WARN_ON(!list_empty(&e->flows));
459
460 if (e->compl_result > 0) {
461 mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e);
462
463 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
464 mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat);
465 }
466
467 kfree(objp: e->tun_info);
468 kfree(objp: e->encap_header);
469 kfree_rcu(e, rcu);
470}
471
472static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
473 struct mlx5e_decap_entry *d)
474{
475 WARN_ON(!list_empty(&d->flows));
476
477 if (!d->compl_result)
478 mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: d->pkt_reformat);
479
480 kfree_rcu(d, rcu);
481}
482
483void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484{
485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486
487 if (!refcount_dec_and_mutex_lock(r: &e->refcnt, lock: &esw->offloads.encap_tbl_lock))
488 return;
489 list_del(entry: &e->route_list);
490 hash_del_rcu(node: &e->encap_hlist);
491 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
492
493 mlx5e_encap_dealloc(priv, e);
494}
495
496static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497{
498 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499
500 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501
502 if (!refcount_dec_and_test(r: &e->refcnt))
503 return;
504 list_del(entry: &e->route_list);
505 hash_del_rcu(node: &e->encap_hlist);
506 mlx5e_encap_dealloc(priv, e);
507}
508
509static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510{
511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512
513 if (!refcount_dec_and_mutex_lock(r: &d->refcnt, lock: &esw->offloads.decap_tbl_lock))
514 return;
515 hash_del_rcu(node: &d->hlist);
516 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
517
518 mlx5e_decap_dealloc(priv, d);
519}
520
521static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
522 struct mlx5e_tc_flow *flow,
523 int out_index);
524
525void mlx5e_detach_encap(struct mlx5e_priv *priv,
526 struct mlx5e_tc_flow *flow,
527 struct mlx5_flow_attr *attr,
528 int out_index)
529{
530 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
531 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532
533 if (!mlx5e_is_eswitch_flow(flow))
534 return;
535
536 if (attr->esw_attr->dests[out_index].flags &
537 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
538 mlx5e_detach_encap_route(priv, flow, out_index);
539
540 /* flow wasn't fully initialized */
541 if (!e)
542 return;
543
544 mutex_lock(&esw->offloads.encap_tbl_lock);
545 list_del(entry: &flow->encaps[out_index].list);
546 flow->encaps[out_index].e = NULL;
547 if (!refcount_dec_and_test(r: &e->refcnt)) {
548 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
549 return;
550 }
551 list_del(entry: &e->route_list);
552 hash_del_rcu(node: &e->encap_hlist);
553 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
554
555 mlx5e_encap_dealloc(priv, e);
556}
557
558void mlx5e_detach_decap(struct mlx5e_priv *priv,
559 struct mlx5e_tc_flow *flow)
560{
561 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
562 struct mlx5e_decap_entry *d = flow->decap_reformat;
563
564 if (!d)
565 return;
566
567 mutex_lock(&esw->offloads.decap_tbl_lock);
568 list_del(entry: &flow->l3_to_l2_reformat);
569 flow->decap_reformat = NULL;
570
571 if (!refcount_dec_and_test(r: &d->refcnt)) {
572 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
573 return;
574 }
575 hash_del_rcu(node: &d->hlist);
576 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
577
578 mlx5e_decap_dealloc(priv, d);
579}
580
581bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
582 struct mlx5e_encap_key *b)
583{
584 return memcmp(p: a->ip_tun_key, q: b->ip_tun_key, size: sizeof(*a->ip_tun_key)) == 0 &&
585 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
586}
587
588bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
589 struct mlx5e_encap_key *b,
590 __be16 tun_flags)
591{
592 struct ip_tunnel_info *a_info;
593 struct ip_tunnel_info *b_info;
594 bool a_has_opts, b_has_opts;
595
596 if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
597 return false;
598
599 a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
600 b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601
602 /* keys are equal when both don't have any options attached */
603 if (!a_has_opts && !b_has_opts)
604 return true;
605
606 if (a_has_opts != b_has_opts)
607 return false;
608
609 /* options stored in memory next to ip_tunnel_info struct */
610 a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
611 b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612
613 return a_info->options_len == b_info->options_len &&
614 !memcmp(ip_tunnel_info_opts(a_info),
615 ip_tunnel_info_opts(b_info),
616 size: a_info->options_len);
617}
618
619static int cmp_decap_info(struct mlx5e_decap_key *a,
620 struct mlx5e_decap_key *b)
621{
622 return memcmp(p: &a->key, q: &b->key, size: sizeof(b->key));
623}
624
625static int hash_encap_info(struct mlx5e_encap_key *key)
626{
627 return jhash(key: key->ip_tun_key, length: sizeof(*key->ip_tun_key),
628 initval: key->tc_tunnel->tunnel_type);
629}
630
631static int hash_decap_info(struct mlx5e_decap_key *key)
632{
633 return jhash(key: &key->key, length: sizeof(key->key), initval: 0);
634}
635
636bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637{
638 return refcount_inc_not_zero(r: &e->refcnt);
639}
640
641static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642{
643 return refcount_inc_not_zero(r: &e->refcnt);
644}
645
646static struct mlx5e_encap_entry *
647mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
648 uintptr_t hash_key)
649{
650 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
651 struct mlx5e_encap_key e_key;
652 struct mlx5e_encap_entry *e;
653
654 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
655 encap_hlist, hash_key) {
656 e_key.ip_tun_key = &e->tun_info->key;
657 e_key.tc_tunnel = e->tunnel;
658 if (e->tunnel->encap_info_equal(&e_key, key) &&
659 mlx5e_encap_take(e))
660 return e;
661 }
662
663 return NULL;
664}
665
666static struct mlx5e_decap_entry *
667mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
668 uintptr_t hash_key)
669{
670 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
671 struct mlx5e_decap_key r_key;
672 struct mlx5e_decap_entry *e;
673
674 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
675 hlist, hash_key) {
676 r_key = e->key;
677 if (!cmp_decap_info(a: &r_key, b: key) &&
678 mlx5e_decap_take(e))
679 return e;
680 }
681 return NULL;
682}
683
684struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685{
686 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687
688 return kmemdup(p: tun_info, size: tun_size, GFP_KERNEL);
689}
690
691static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
692 struct mlx5e_tc_flow *flow,
693 int out_index,
694 struct mlx5e_encap_entry *e,
695 struct netlink_ext_ack *extack)
696{
697 int i;
698
699 for (i = 0; i < out_index; i++) {
700 if (flow->encaps[i].e != e)
701 continue;
702 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
703 netdev_err(dev: priv->netdev, format: "can't duplicate encap action\n");
704 return true;
705 }
706
707 return false;
708}
709
710static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
711 struct mlx5_flow_attr *attr,
712 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
713 struct net_device *out_dev,
714 int route_dev_ifindex,
715 int out_index)
716{
717 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
718 struct net_device *route_dev;
719 u16 vport_num;
720 int err = 0;
721 u32 data;
722
723 route_dev = dev_get_by_index(net: dev_net(dev: out_dev), ifindex: route_dev_ifindex);
724
725 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
726 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
727 goto out;
728
729 err = mlx5e_tc_query_route_vport(out_dev, route_dev, vport: &vport_num);
730 if (err)
731 goto out;
732
733 attr->dest_chain = 0;
734 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
735 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
736 data = mlx5_eswitch_get_vport_metadata_for_set(esw: esw_attr->in_mdev->priv.eswitch,
737 vport_num);
738 err = mlx5e_tc_match_to_reg_set_and_get_id(mdev: esw->dev, mod_hdr_acts,
739 ns: MLX5_FLOW_NAMESPACE_FDB,
740 type: VPORT_TO_REG, data);
741 if (err >= 0) {
742 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
743 err = 0;
744 }
745
746out:
747 if (route_dev)
748 dev_put(dev: route_dev);
749 return err;
750}
751
752static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
753 struct mlx5_esw_flow_attr *attr,
754 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
755 struct net_device *out_dev,
756 int route_dev_ifindex,
757 int out_index)
758{
759 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
760 struct net_device *route_dev;
761 u16 vport_num;
762 int err = 0;
763 u32 data;
764
765 route_dev = dev_get_by_index(net: dev_net(dev: out_dev), ifindex: route_dev_ifindex);
766
767 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
768 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
769 err = -ENODEV;
770 goto out;
771 }
772
773 err = mlx5e_tc_query_route_vport(out_dev, route_dev, vport: &vport_num);
774 if (err)
775 goto out;
776
777 data = mlx5_eswitch_get_vport_metadata_for_set(esw: attr->in_mdev->priv.eswitch,
778 vport_num);
779 mlx5e_tc_match_to_reg_mod_hdr_change(mdev: esw->dev, mod_hdr_acts, type: VPORT_TO_REG, act_id, data);
780
781out:
782 if (route_dev)
783 dev_put(dev: route_dev);
784 return err;
785}
786
787static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788{
789 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790 struct mlx5_rep_uplink_priv *uplink_priv;
791 struct mlx5e_rep_priv *uplink_rpriv;
792 struct mlx5e_tc_tun_encap *encap;
793 unsigned int ret;
794
795 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH);
796 uplink_priv = &uplink_rpriv->uplink_priv;
797 encap = uplink_priv->encap;
798
799 spin_lock_bh(lock: &encap->route_lock);
800 ret = encap->route_tbl_last_update;
801 spin_unlock_bh(lock: &encap->route_lock);
802 return ret;
803}
804
805static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
806 struct mlx5e_tc_flow *flow,
807 struct mlx5_flow_attr *attr,
808 struct mlx5e_encap_entry *e,
809 bool new_encap_entry,
810 unsigned long tbl_time_before,
811 int out_index);
812
813int mlx5e_attach_encap(struct mlx5e_priv *priv,
814 struct mlx5e_tc_flow *flow,
815 struct mlx5_flow_attr *attr,
816 struct net_device *mirred_dev,
817 int out_index,
818 struct netlink_ext_ack *extack,
819 struct net_device **encap_dev)
820{
821 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
822 struct mlx5e_tc_flow_parse_attr *parse_attr;
823 const struct ip_tunnel_info *tun_info;
824 const struct mlx5e_mpls_info *mpls_info;
825 unsigned long tbl_time_before = 0;
826 struct mlx5e_encap_entry *e;
827 struct mlx5e_encap_key key;
828 bool entry_created = false;
829 unsigned short family;
830 uintptr_t hash_key;
831 int err = 0;
832
833 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834
835 parse_attr = attr->parse_attr;
836 tun_info = parse_attr->tun_info[out_index];
837 mpls_info = &parse_attr->mpls_info[out_index];
838 family = ip_tunnel_info_af(tun_info);
839 key.ip_tun_key = &tun_info->key;
840 key.tc_tunnel = mlx5e_get_tc_tun(tunnel_dev: mirred_dev);
841 if (!key.tc_tunnel) {
842 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
843 return -EOPNOTSUPP;
844 }
845
846 hash_key = hash_encap_info(key: &key);
847
848 e = mlx5e_encap_get(priv, key: &key, hash_key);
849
850 /* must verify if encap is valid or not */
851 if (e) {
852 /* Check that entry was not already attached to this flow */
853 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
854 err = -EOPNOTSUPP;
855 goto out_err;
856 }
857
858 goto attach_flow;
859 }
860
861 e = kzalloc(size: sizeof(*e), GFP_KERNEL);
862 if (!e) {
863 err = -ENOMEM;
864 goto out_err;
865 }
866
867 refcount_set(r: &e->refcnt, n: 1);
868 init_completion(x: &e->res_ready);
869 entry_created = true;
870 INIT_LIST_HEAD(list: &e->route_list);
871
872 tun_info = mlx5e_dup_tun_info(tun_info);
873 if (!tun_info) {
874 err = -ENOMEM;
875 goto out_err_init;
876 }
877 e->tun_info = tun_info;
878 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
879 err = mlx5e_tc_tun_init_encap_attr(tunnel_dev: mirred_dev, priv, e, extack);
880 if (err)
881 goto out_err_init;
882
883 INIT_LIST_HEAD(list: &e->flows);
884 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
885 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886
887 if (family == AF_INET)
888 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
889 else if (family == AF_INET6)
890 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891
892 complete_all(&e->res_ready);
893 if (err) {
894 e->compl_result = err;
895 goto out_err;
896 }
897 e->compl_result = 1;
898
899attach_flow:
900 err = mlx5e_attach_encap_route(priv, flow, attr, e, new_encap_entry: entry_created,
901 tbl_time_before, out_index);
902 if (err)
903 goto out_err;
904
905 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
906 if (err == -EOPNOTSUPP) {
907 /* If device doesn't support int port offload,
908 * redirect to uplink vport.
909 */
910 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
911 err = 0;
912 } else if (err) {
913 goto out_err;
914 }
915
916 flow->encaps[out_index].e = e;
917 list_add(new: &flow->encaps[out_index].list, head: &e->flows);
918 flow->encaps[out_index].index = out_index;
919 *encap_dev = e->out_dev;
920 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
921 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
922 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923 } else {
924 flow_flag_set(flow, SLOW);
925 }
926
927 return err;
928
929out_err:
930 if (e)
931 mlx5e_encap_put_locked(priv, e);
932 return err;
933
934out_err_init:
935 kfree(objp: tun_info);
936 kfree(objp: e);
937 return err;
938}
939
940int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 struct mlx5e_tc_flow *flow,
942 struct netlink_ext_ack *extack)
943{
944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 struct mlx5_pkt_reformat_params reformat_params;
947 struct mlx5e_decap_entry *d;
948 struct mlx5e_decap_key key;
949 uintptr_t hash_key;
950 int err = 0;
951
952 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 NL_SET_ERR_MSG_MOD(extack,
954 "encap header larger than max supported");
955 return -EOPNOTSUPP;
956 }
957
958 key.key = attr->eth;
959 hash_key = hash_decap_info(key: &key);
960 mutex_lock(&esw->offloads.decap_tbl_lock);
961 d = mlx5e_decap_get(priv, key: &key, hash_key);
962 if (d) {
963 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
964 wait_for_completion(&d->res_ready);
965 mutex_lock(&esw->offloads.decap_tbl_lock);
966 if (d->compl_result) {
967 err = -EREMOTEIO;
968 goto out_free;
969 }
970 goto found;
971 }
972
973 d = kzalloc(size: sizeof(*d), GFP_KERNEL);
974 if (!d) {
975 err = -ENOMEM;
976 goto out_err;
977 }
978
979 d->key = key;
980 refcount_set(r: &d->refcnt, n: 1);
981 init_completion(x: &d->res_ready);
982 INIT_LIST_HEAD(list: &d->flows);
983 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
985
986 memset(&reformat_params, 0, sizeof(reformat_params));
987 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 reformat_params.size = sizeof(attr->eth);
989 reformat_params.data = &attr->eth;
990 d->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev,
991 params: &reformat_params,
992 ns_type: MLX5_FLOW_NAMESPACE_FDB);
993 if (IS_ERR(ptr: d->pkt_reformat)) {
994 err = PTR_ERR(ptr: d->pkt_reformat);
995 d->compl_result = err;
996 }
997 mutex_lock(&esw->offloads.decap_tbl_lock);
998 complete_all(&d->res_ready);
999 if (err)
1000 goto out_free;
1001
1002found:
1003 flow->decap_reformat = d;
1004 attr->decap_pkt_reformat = d->pkt_reformat;
1005 list_add(new: &flow->l3_to_l2_reformat, head: &d->flows);
1006 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
1007 return 0;
1008
1009out_free:
1010 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
1011 mlx5e_decap_put(priv, d);
1012 return err;
1013
1014out_err:
1015 mutex_unlock(lock: &esw->offloads.decap_tbl_lock);
1016 return err;
1017}
1018
1019int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020 struct mlx5e_tc_flow *flow,
1021 struct mlx5_flow_attr *attr,
1022 struct netlink_ext_ack *extack,
1023 bool *vf_tun)
1024{
1025 struct mlx5e_tc_flow_parse_attr *parse_attr;
1026 struct mlx5_esw_flow_attr *esw_attr;
1027 struct net_device *encap_dev = NULL;
1028 struct mlx5e_rep_priv *rpriv;
1029 struct mlx5e_priv *out_priv;
1030 struct mlx5_eswitch *esw;
1031 int out_index;
1032 int err = 0;
1033
1034 parse_attr = attr->parse_attr;
1035 esw_attr = attr->esw_attr;
1036 *vf_tun = false;
1037
1038 esw = priv->mdev->priv.eswitch;
1039 mutex_lock(&esw->offloads.encap_tbl_lock);
1040 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041 struct net_device *out_dev;
1042 int mirred_ifindex;
1043
1044 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045 continue;
1046
1047 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048 out_dev = dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: mirred_ifindex);
1049 if (!out_dev) {
1050 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051 err = -ENODEV;
1052 goto out;
1053 }
1054 err = mlx5e_attach_encap(priv, flow, attr, mirred_dev: out_dev, out_index,
1055 extack, encap_dev: &encap_dev);
1056 dev_put(dev: out_dev);
1057 if (err)
1058 goto out;
1059
1060 if (esw_attr->dests[out_index].flags &
1061 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062 !esw_attr->dest_int_port)
1063 *vf_tun = true;
1064
1065 out_priv = netdev_priv(dev: encap_dev);
1066 rpriv = out_priv->ppriv;
1067 esw_attr->dests[out_index].vport_valid = true;
1068 esw_attr->dests[out_index].vport = rpriv->rep->vport;
1069 esw_attr->dests[out_index].mdev = out_priv->mdev;
1070 }
1071
1072 if (*vf_tun && esw_attr->out_count > 1) {
1073 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1074 err = -EOPNOTSUPP;
1075 goto out;
1076 }
1077
1078out:
1079 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1080 return err;
1081}
1082
1083void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1084 struct mlx5e_tc_flow *flow,
1085 struct mlx5_flow_attr *attr)
1086{
1087 struct mlx5_esw_flow_attr *esw_attr;
1088 int out_index;
1089
1090 if (!mlx5e_is_eswitch_flow(flow))
1091 return;
1092
1093 esw_attr = attr->esw_attr;
1094
1095 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1096 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1097 continue;
1098
1099 mlx5e_detach_encap(priv: flow->priv, flow, attr, out_index);
1100 kfree(objp: attr->parse_attr->tun_info[out_index]);
1101 }
1102}
1103
1104static int cmp_route_info(struct mlx5e_route_key *a,
1105 struct mlx5e_route_key *b)
1106{
1107 if (a->ip_version == 4 && b->ip_version == 4)
1108 return memcmp(p: &a->endpoint_ip.v4, q: &b->endpoint_ip.v4,
1109 size: sizeof(a->endpoint_ip.v4));
1110 else if (a->ip_version == 6 && b->ip_version == 6)
1111 return memcmp(p: &a->endpoint_ip.v6, q: &b->endpoint_ip.v6,
1112 size: sizeof(a->endpoint_ip.v6));
1113 return 1;
1114}
1115
1116static u32 hash_route_info(struct mlx5e_route_key *key)
1117{
1118 if (key->ip_version == 4)
1119 return jhash(key: &key->endpoint_ip.v4, length: sizeof(key->endpoint_ip.v4), initval: 0);
1120 return jhash(key: &key->endpoint_ip.v6, length: sizeof(key->endpoint_ip.v6), initval: 0);
1121}
1122
1123static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1124 struct mlx5e_route_entry *r)
1125{
1126 WARN_ON(!list_empty(&r->decap_flows));
1127 WARN_ON(!list_empty(&r->encap_entries));
1128
1129 kfree_rcu(r, rcu);
1130}
1131
1132static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1133{
1134 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1135
1136 if (!refcount_dec_and_mutex_lock(r: &r->refcnt, lock: &esw->offloads.encap_tbl_lock))
1137 return;
1138
1139 hash_del_rcu(node: &r->hlist);
1140 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1141
1142 mlx5e_route_dealloc(priv, r);
1143}
1144
1145static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1146{
1147 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1148
1149 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1150
1151 if (!refcount_dec_and_test(r: &r->refcnt))
1152 return;
1153 hash_del_rcu(node: &r->hlist);
1154 mlx5e_route_dealloc(priv, r);
1155}
1156
1157static struct mlx5e_route_entry *
1158mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1159 u32 hash_key)
1160{
1161 struct mlx5e_route_key r_key;
1162 struct mlx5e_route_entry *r;
1163
1164 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1165 r_key = r->key;
1166 if (!cmp_route_info(a: &r_key, b: key) &&
1167 refcount_inc_not_zero(r: &r->refcnt))
1168 return r;
1169 }
1170 return NULL;
1171}
1172
1173static struct mlx5e_route_entry *
1174mlx5e_route_get_create(struct mlx5e_priv *priv,
1175 struct mlx5e_route_key *key,
1176 int tunnel_dev_index,
1177 unsigned long *route_tbl_change_time)
1178{
1179 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180 struct mlx5_rep_uplink_priv *uplink_priv;
1181 struct mlx5e_rep_priv *uplink_rpriv;
1182 struct mlx5e_tc_tun_encap *encap;
1183 struct mlx5e_route_entry *r;
1184 u32 hash_key;
1185
1186 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH);
1187 uplink_priv = &uplink_rpriv->uplink_priv;
1188 encap = uplink_priv->encap;
1189
1190 hash_key = hash_route_info(key);
1191 spin_lock_bh(lock: &encap->route_lock);
1192 r = mlx5e_route_get(encap, key, hash_key);
1193 spin_unlock_bh(lock: &encap->route_lock);
1194 if (r) {
1195 if (!mlx5e_route_entry_valid(r)) {
1196 mlx5e_route_put_locked(priv, r);
1197 return ERR_PTR(error: -EINVAL);
1198 }
1199 return r;
1200 }
1201
1202 r = kzalloc(size: sizeof(*r), GFP_KERNEL);
1203 if (!r)
1204 return ERR_PTR(error: -ENOMEM);
1205
1206 r->key = *key;
1207 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1208 r->tunnel_dev_index = tunnel_dev_index;
1209 refcount_set(r: &r->refcnt, n: 1);
1210 INIT_LIST_HEAD(list: &r->decap_flows);
1211 INIT_LIST_HEAD(list: &r->encap_entries);
1212
1213 spin_lock_bh(lock: &encap->route_lock);
1214 *route_tbl_change_time = encap->route_tbl_last_update;
1215 hash_add(encap->route_tbl, &r->hlist, hash_key);
1216 spin_unlock_bh(lock: &encap->route_lock);
1217
1218 return r;
1219}
1220
1221static struct mlx5e_route_entry *
1222mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1223{
1224 u32 hash_key = hash_route_info(key);
1225 struct mlx5e_route_entry *r;
1226
1227 spin_lock_bh(lock: &encap->route_lock);
1228 encap->route_tbl_last_update = jiffies;
1229 r = mlx5e_route_get(encap, key, hash_key);
1230 spin_unlock_bh(lock: &encap->route_lock);
1231
1232 return r;
1233}
1234
1235struct mlx5e_tc_fib_event_data {
1236 struct work_struct work;
1237 unsigned long event;
1238 struct mlx5e_route_entry *r;
1239 struct net_device *ul_dev;
1240};
1241
1242static void mlx5e_tc_fib_event_work(struct work_struct *work);
1243static struct mlx5e_tc_fib_event_data *
1244mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1245{
1246 struct mlx5e_tc_fib_event_data *fib_work;
1247
1248 fib_work = kzalloc(size: sizeof(*fib_work), flags);
1249 if (WARN_ON(!fib_work))
1250 return NULL;
1251
1252 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1253 fib_work->event = event;
1254 fib_work->ul_dev = ul_dev;
1255
1256 return fib_work;
1257}
1258
1259static int
1260mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1261 struct mlx5e_route_entry *r,
1262 unsigned long event)
1263{
1264 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265 struct mlx5e_tc_fib_event_data *fib_work;
1266 struct mlx5e_rep_priv *uplink_rpriv;
1267 struct net_device *ul_dev;
1268
1269 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH);
1270 ul_dev = uplink_rpriv->netdev;
1271
1272 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1273 if (!fib_work)
1274 return -ENOMEM;
1275
1276 dev_hold(dev: ul_dev);
1277 refcount_inc(r: &r->refcnt);
1278 fib_work->r = r;
1279 queue_work(wq: priv->wq, work: &fib_work->work);
1280
1281 return 0;
1282}
1283
1284int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1285 struct mlx5e_tc_flow *flow)
1286{
1287 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288 unsigned long tbl_time_before, tbl_time_after;
1289 struct mlx5e_tc_flow_parse_attr *parse_attr;
1290 struct mlx5_flow_attr *attr = flow->attr;
1291 struct mlx5_esw_flow_attr *esw_attr;
1292 struct mlx5e_route_entry *r;
1293 struct mlx5e_route_key key;
1294 int err = 0;
1295
1296 esw_attr = attr->esw_attr;
1297 parse_attr = attr->parse_attr;
1298 mutex_lock(&esw->offloads.encap_tbl_lock);
1299 if (!esw_attr->rx_tun_attr)
1300 goto out;
1301
1302 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1303 tbl_time_after = tbl_time_before;
1304 err = mlx5e_tc_tun_route_lookup(priv, spec: &parse_attr->spec, attr, filter_dev: parse_attr->filter_dev);
1305 if (err || !esw_attr->rx_tun_attr->decap_vport)
1306 goto out;
1307
1308 key.ip_version = attr->tun_ip_version;
1309 if (key.ip_version == 4)
1310 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1311 else
1312 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1313
1314 r = mlx5e_route_get_create(priv, key: &key, tunnel_dev_index: parse_attr->filter_dev->ifindex,
1315 route_tbl_change_time: &tbl_time_after);
1316 if (IS_ERR(ptr: r)) {
1317 err = PTR_ERR(ptr: r);
1318 goto out;
1319 }
1320 /* Routing changed concurrently. FIB event handler might have missed new
1321 * entry, schedule update.
1322 */
1323 if (tbl_time_before != tbl_time_after) {
1324 err = mlx5e_route_enqueue_update(priv, r, event: FIB_EVENT_ENTRY_REPLACE);
1325 if (err) {
1326 mlx5e_route_put_locked(priv, r);
1327 goto out;
1328 }
1329 }
1330
1331 flow->decap_route = r;
1332 list_add(new: &flow->decap_routes, head: &r->decap_flows);
1333 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1334 return 0;
1335
1336out:
1337 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1338 return err;
1339}
1340
1341static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1342 struct mlx5e_tc_flow *flow,
1343 struct mlx5_flow_attr *attr,
1344 struct mlx5e_encap_entry *e,
1345 bool new_encap_entry,
1346 unsigned long tbl_time_before,
1347 int out_index)
1348{
1349 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1350 unsigned long tbl_time_after = tbl_time_before;
1351 struct mlx5e_tc_flow_parse_attr *parse_attr;
1352 const struct ip_tunnel_info *tun_info;
1353 struct mlx5_esw_flow_attr *esw_attr;
1354 struct mlx5e_route_entry *r;
1355 struct mlx5e_route_key key;
1356 unsigned short family;
1357 int err = 0;
1358
1359 esw_attr = attr->esw_attr;
1360 parse_attr = attr->parse_attr;
1361 tun_info = parse_attr->tun_info[out_index];
1362 family = ip_tunnel_info_af(tun_info);
1363
1364 if (family == AF_INET) {
1365 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1366 key.ip_version = 4;
1367 } else if (family == AF_INET6) {
1368 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1369 key.ip_version = 6;
1370 }
1371
1372 err = mlx5e_set_vf_tunnel(esw, attr, mod_hdr_acts: &parse_attr->mod_hdr_acts, out_dev: e->out_dev,
1373 route_dev_ifindex: e->route_dev_ifindex, out_index);
1374 if (err || !(esw_attr->dests[out_index].flags &
1375 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1376 return err;
1377
1378 r = mlx5e_route_get_create(priv, key: &key, tunnel_dev_index: parse_attr->mirred_ifindex[out_index],
1379 route_tbl_change_time: &tbl_time_after);
1380 if (IS_ERR(ptr: r))
1381 return PTR_ERR(ptr: r);
1382 /* Routing changed concurrently. FIB event handler might have missed new
1383 * entry, schedule update.
1384 */
1385 if (tbl_time_before != tbl_time_after) {
1386 err = mlx5e_route_enqueue_update(priv, r, event: FIB_EVENT_ENTRY_REPLACE);
1387 if (err) {
1388 mlx5e_route_put_locked(priv, r);
1389 return err;
1390 }
1391 }
1392
1393 flow->encap_routes[out_index].r = r;
1394 if (new_encap_entry)
1395 list_add(new: &e->route_list, head: &r->encap_entries);
1396 flow->encap_routes[out_index].index = out_index;
1397 return 0;
1398}
1399
1400void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1401 struct mlx5e_tc_flow *flow)
1402{
1403 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1404 struct mlx5e_route_entry *r = flow->decap_route;
1405
1406 if (!r)
1407 return;
1408
1409 mutex_lock(&esw->offloads.encap_tbl_lock);
1410 list_del(entry: &flow->decap_routes);
1411 flow->decap_route = NULL;
1412
1413 if (!refcount_dec_and_test(r: &r->refcnt)) {
1414 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1415 return;
1416 }
1417 hash_del_rcu(node: &r->hlist);
1418 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1419
1420 mlx5e_route_dealloc(priv, r);
1421}
1422
1423static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1424 struct mlx5e_tc_flow *flow,
1425 int out_index)
1426{
1427 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1428 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1429 struct mlx5e_encap_entry *e, *tmp;
1430
1431 if (!r)
1432 return;
1433
1434 mutex_lock(&esw->offloads.encap_tbl_lock);
1435 flow->encap_routes[out_index].r = NULL;
1436
1437 if (!refcount_dec_and_test(r: &r->refcnt)) {
1438 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1439 return;
1440 }
1441 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1442 list_del_init(entry: &e->route_list);
1443 hash_del_rcu(node: &r->hlist);
1444 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1445
1446 mlx5e_route_dealloc(priv, r);
1447}
1448
1449static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1450 struct mlx5e_encap_entry *e,
1451 struct list_head *encap_flows)
1452{
1453 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1454 struct mlx5e_tc_flow *flow;
1455
1456 list_for_each_entry(flow, encap_flows, tmp_list) {
1457 struct mlx5_esw_flow_attr *esw_attr;
1458 struct mlx5_flow_attr *attr;
1459
1460 if (!mlx5e_is_offloaded_flow(flow))
1461 continue;
1462
1463 attr = mlx5e_tc_get_encap_attr(flow);
1464 esw_attr = attr->esw_attr;
1465
1466 if (flow_flag_test(flow, SLOW)) {
1467 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1468 } else {
1469 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr);
1470 mlx5e_tc_unoffload_flow_post_acts(flow);
1471 }
1472
1473 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1474 attr->modify_hdr = NULL;
1475
1476 esw_attr->dests[flow->tmp_entry_index].flags &=
1477 ~MLX5_ESW_DEST_ENCAP_VALID;
1478 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1479 }
1480
1481 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1482 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1483 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1484 mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat);
1485 e->pkt_reformat = NULL;
1486 }
1487}
1488
1489static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1490 struct net_device *tunnel_dev,
1491 struct mlx5e_encap_entry *e,
1492 struct list_head *encap_flows)
1493{
1494 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495 struct mlx5e_tc_flow *flow;
1496 int err;
1497
1498 err = ip_tunnel_info_af(tun_info: e->tun_info) == AF_INET ?
1499 mlx5e_tc_tun_update_header_ipv4(priv, mirred_dev: tunnel_dev, e) :
1500 mlx5e_tc_tun_update_header_ipv6(priv, mirred_dev: tunnel_dev, e);
1501 if (err)
1502 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1503 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1504
1505 list_for_each_entry(flow, encap_flows, tmp_list) {
1506 struct mlx5e_tc_flow_parse_attr *parse_attr;
1507 struct mlx5_esw_flow_attr *esw_attr;
1508 struct mlx5_flow_handle *rule;
1509 struct mlx5_flow_attr *attr;
1510 struct mlx5_flow_spec *spec;
1511
1512 if (flow_flag_test(flow, FAILED))
1513 continue;
1514
1515 spec = &flow->attr->parse_attr->spec;
1516
1517 attr = mlx5e_tc_get_encap_attr(flow);
1518 esw_attr = attr->esw_attr;
1519 parse_attr = attr->parse_attr;
1520
1521 err = mlx5e_update_vf_tunnel(esw, attr: esw_attr, mod_hdr_acts: &parse_attr->mod_hdr_acts,
1522 out_dev: e->out_dev, route_dev_ifindex: e->route_dev_ifindex,
1523 out_index: flow->tmp_entry_index);
1524 if (err) {
1525 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1526 continue;
1527 }
1528
1529 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1530 if (err) {
1531 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1532 err);
1533 continue;
1534 }
1535
1536 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1537 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1538 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1539 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1540 goto offload_to_slow_path;
1541
1542 err = mlx5e_tc_offload_flow_post_acts(flow);
1543 if (err) {
1544 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1545 err);
1546 goto offload_to_slow_path;
1547 }
1548
1549 /* update from slow path rule to encap rule */
1550 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr: flow->attr);
1551 if (IS_ERR(ptr: rule)) {
1552 mlx5e_tc_unoffload_flow_post_acts(flow);
1553 err = PTR_ERR(ptr: rule);
1554 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1555 err);
1556 } else {
1557 flow->rule[0] = rule;
1558 }
1559 } else {
1560offload_to_slow_path:
1561 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1562 /* mark the flow's encap dest as non-valid */
1563 esw_attr->dests[flow->tmp_entry_index].flags &=
1564 ~MLX5_ESW_DEST_ENCAP_VALID;
1565
1566 if (IS_ERR(ptr: rule)) {
1567 err = PTR_ERR(ptr: rule);
1568 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1569 err);
1570 } else {
1571 flow->rule[0] = rule;
1572 }
1573 }
1574 flow_flag_set(flow, OFFLOADED);
1575 }
1576}
1577
1578static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1579 struct mlx5e_route_entry *r,
1580 struct list_head *flow_list,
1581 bool replace)
1582{
1583 struct net_device *tunnel_dev;
1584 struct mlx5e_encap_entry *e;
1585
1586 tunnel_dev = __dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: r->tunnel_dev_index);
1587 if (!tunnel_dev)
1588 return -ENODEV;
1589
1590 list_for_each_entry(e, &r->encap_entries, route_list) {
1591 LIST_HEAD(encap_flows);
1592
1593 mlx5e_take_all_encap_flows(e, flow_list: &encap_flows);
1594 if (list_empty(head: &encap_flows))
1595 continue;
1596
1597 if (mlx5e_route_entry_valid(r))
1598 mlx5e_invalidate_encap(priv, e, encap_flows: &encap_flows);
1599
1600 if (!replace) {
1601 list_splice(list: &encap_flows, head: flow_list);
1602 continue;
1603 }
1604
1605 mlx5e_reoffload_encap(priv, tunnel_dev, e, encap_flows: &encap_flows);
1606 list_splice(list: &encap_flows, head: flow_list);
1607 }
1608
1609 return 0;
1610}
1611
1612static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1613 struct list_head *flow_list)
1614{
1615 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1616 struct mlx5e_tc_flow *flow;
1617
1618 list_for_each_entry(flow, flow_list, tmp_list)
1619 if (mlx5e_is_offloaded_flow(flow))
1620 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr);
1621}
1622
1623static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1624 struct list_head *decap_flows)
1625{
1626 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1627 struct mlx5e_tc_flow *flow;
1628
1629 list_for_each_entry(flow, decap_flows, tmp_list) {
1630 struct mlx5e_tc_flow_parse_attr *parse_attr;
1631 struct mlx5_flow_attr *attr = flow->attr;
1632 struct mlx5_flow_handle *rule;
1633 struct mlx5_flow_spec *spec;
1634 int err;
1635
1636 if (flow_flag_test(flow, FAILED))
1637 continue;
1638
1639 parse_attr = attr->parse_attr;
1640 spec = &parse_attr->spec;
1641 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, filter_dev: parse_attr->filter_dev);
1642 if (err) {
1643 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1644 err);
1645 continue;
1646 }
1647
1648 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1649 if (IS_ERR(ptr: rule)) {
1650 err = PTR_ERR(ptr: rule);
1651 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1652 err);
1653 } else {
1654 flow->rule[0] = rule;
1655 flow_flag_set(flow, OFFLOADED);
1656 }
1657 }
1658}
1659
1660static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1661 struct mlx5e_route_entry *r,
1662 struct list_head *flow_list,
1663 bool replace)
1664{
1665 struct net_device *tunnel_dev;
1666 LIST_HEAD(decap_flows);
1667
1668 tunnel_dev = __dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: r->tunnel_dev_index);
1669 if (!tunnel_dev)
1670 return -ENODEV;
1671
1672 mlx5e_take_all_route_decap_flows(r, flow_list: &decap_flows);
1673 if (mlx5e_route_entry_valid(r))
1674 mlx5e_unoffload_flow_list(priv, flow_list: &decap_flows);
1675 if (replace)
1676 mlx5e_reoffload_decap(priv, decap_flows: &decap_flows);
1677
1678 list_splice(list: &decap_flows, head: flow_list);
1679
1680 return 0;
1681}
1682
1683static void mlx5e_tc_fib_event_work(struct work_struct *work)
1684{
1685 struct mlx5e_tc_fib_event_data *event_data =
1686 container_of(work, struct mlx5e_tc_fib_event_data, work);
1687 struct net_device *ul_dev = event_data->ul_dev;
1688 struct mlx5e_priv *priv = netdev_priv(dev: ul_dev);
1689 struct mlx5e_route_entry *r = event_data->r;
1690 struct mlx5_eswitch *esw;
1691 LIST_HEAD(flow_list);
1692 bool replace;
1693 int err;
1694
1695 /* sync with concurrent neigh updates */
1696 rtnl_lock();
1697 esw = priv->mdev->priv.eswitch;
1698 mutex_lock(&esw->offloads.encap_tbl_lock);
1699 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1700
1701 if (!mlx5e_route_entry_valid(r) && !replace)
1702 goto out;
1703
1704 err = mlx5e_update_route_encaps(priv, r, flow_list: &flow_list, replace);
1705 if (err)
1706 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1707 err);
1708
1709 err = mlx5e_update_route_decap_flows(priv, r, flow_list: &flow_list, replace);
1710 if (err)
1711 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1712 err);
1713
1714 if (replace)
1715 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1716out:
1717 mutex_unlock(lock: &esw->offloads.encap_tbl_lock);
1718 rtnl_unlock();
1719
1720 mlx5e_put_flow_list(priv, flow_list: &flow_list);
1721 mlx5e_route_put(priv, r: event_data->r);
1722 dev_put(dev: event_data->ul_dev);
1723 kfree(objp: event_data);
1724}
1725
1726static struct mlx5e_tc_fib_event_data *
1727mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1728 struct net_device *ul_dev,
1729 struct mlx5e_tc_tun_encap *encap,
1730 unsigned long event,
1731 struct fib_notifier_info *info)
1732{
1733 struct fib_entry_notifier_info *fen_info;
1734 struct mlx5e_tc_fib_event_data *fib_work;
1735 struct mlx5e_route_entry *r;
1736 struct mlx5e_route_key key;
1737 struct net_device *fib_dev;
1738
1739 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1740 if (fen_info->fi->nh)
1741 return NULL;
1742 fib_dev = fib_info_nh(fi: fen_info->fi, nhsel: 0)->fib_nh_dev;
1743 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1744 fen_info->dst_len != 32)
1745 return NULL;
1746
1747 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1748 if (!fib_work)
1749 return ERR_PTR(error: -ENOMEM);
1750
1751 key.endpoint_ip.v4 = htonl(fen_info->dst);
1752 key.ip_version = 4;
1753
1754 /* Can't fail after this point because releasing reference to r
1755 * requires obtaining sleeping mutex which we can't do in atomic
1756 * context.
1757 */
1758 r = mlx5e_route_lookup_for_update(encap, key: &key);
1759 if (!r)
1760 goto out;
1761 fib_work->r = r;
1762 dev_hold(dev: ul_dev);
1763
1764 return fib_work;
1765
1766out:
1767 kfree(objp: fib_work);
1768 return NULL;
1769}
1770
1771static struct mlx5e_tc_fib_event_data *
1772mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1773 struct net_device *ul_dev,
1774 struct mlx5e_tc_tun_encap *encap,
1775 unsigned long event,
1776 struct fib_notifier_info *info)
1777{
1778 struct fib6_entry_notifier_info *fen_info;
1779 struct mlx5e_tc_fib_event_data *fib_work;
1780 struct mlx5e_route_entry *r;
1781 struct mlx5e_route_key key;
1782 struct net_device *fib_dev;
1783
1784 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1785 fib_dev = fib6_info_nh_dev(f6i: fen_info->rt);
1786 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1787 fen_info->rt->fib6_dst.plen != 128)
1788 return NULL;
1789
1790 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1791 if (!fib_work)
1792 return ERR_PTR(error: -ENOMEM);
1793
1794 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1795 sizeof(fen_info->rt->fib6_dst.addr));
1796 key.ip_version = 6;
1797
1798 /* Can't fail after this point because releasing reference to r
1799 * requires obtaining sleeping mutex which we can't do in atomic
1800 * context.
1801 */
1802 r = mlx5e_route_lookup_for_update(encap, key: &key);
1803 if (!r)
1804 goto out;
1805 fib_work->r = r;
1806 dev_hold(dev: ul_dev);
1807
1808 return fib_work;
1809
1810out:
1811 kfree(objp: fib_work);
1812 return NULL;
1813}
1814
1815static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1816{
1817 struct mlx5e_tc_fib_event_data *fib_work;
1818 struct fib_notifier_info *info = ptr;
1819 struct mlx5e_tc_tun_encap *encap;
1820 struct net_device *ul_dev;
1821 struct mlx5e_priv *priv;
1822
1823 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1824 priv = encap->priv;
1825 ul_dev = priv->netdev;
1826 priv = netdev_priv(dev: ul_dev);
1827
1828 switch (event) {
1829 case FIB_EVENT_ENTRY_REPLACE:
1830 case FIB_EVENT_ENTRY_DEL:
1831 if (info->family == AF_INET)
1832 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1833 else if (info->family == AF_INET6)
1834 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1835 else
1836 return NOTIFY_DONE;
1837
1838 if (!IS_ERR_OR_NULL(ptr: fib_work)) {
1839 queue_work(wq: priv->wq, work: &fib_work->work);
1840 } else if (IS_ERR(ptr: fib_work)) {
1841 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1842 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1843 PTR_ERR(fib_work));
1844 }
1845
1846 break;
1847 default:
1848 return NOTIFY_DONE;
1849 }
1850
1851 return NOTIFY_DONE;
1852}
1853
1854struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1855{
1856 struct mlx5e_tc_tun_encap *encap;
1857 int err;
1858
1859 encap = kvzalloc(size: sizeof(*encap), GFP_KERNEL);
1860 if (!encap)
1861 return ERR_PTR(error: -ENOMEM);
1862
1863 encap->priv = priv;
1864 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1865 spin_lock_init(&encap->route_lock);
1866 hash_init(encap->route_tbl);
1867 err = register_fib_notifier(net: dev_net(dev: priv->netdev), nb: &encap->fib_nb,
1868 NULL, NULL);
1869 if (err) {
1870 kvfree(addr: encap);
1871 return ERR_PTR(error: err);
1872 }
1873
1874 return encap;
1875}
1876
1877void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1878{
1879 if (!encap)
1880 return;
1881
1882 unregister_fib_notifier(net: dev_net(dev: encap->priv->netdev), nb: &encap->fib_nb);
1883 flush_workqueue(encap->priv->wq); /* flush fib event works */
1884 kvfree(addr: encap);
1885}
1886

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c