1/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2/* Copyright (c) 2018 Mellanox Technologies. */
3
4#include <net/inet_ecn.h>
5#include <net/vxlan.h>
6#include <net/gre.h>
7#include <net/geneve.h>
8#include <net/bareudp.h>
9#include "en/tc_tun.h"
10#include "en/tc_priv.h"
11#include "en_tc.h"
12#include "rep/tc.h"
13#include "rep/neigh.h"
14#include "lag/lag.h"
15#include "lag/mp.h"
16
17struct mlx5e_tc_tun_route_attr {
18 struct net_device *out_dev;
19 struct net_device *route_dev;
20 union {
21 struct flowi4 fl4;
22 struct flowi6 fl6;
23 } fl;
24 struct neighbour *n;
25 u8 ttl;
26};
27
28#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
29
30static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
31{
32 if (attr->n)
33 neigh_release(neigh: attr->n);
34 if (attr->route_dev)
35 dev_put(dev: attr->route_dev);
36}
37
38struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
39{
40 if (netif_is_vxlan(dev: tunnel_dev))
41 return &vxlan_tunnel;
42 else if (netif_is_geneve(dev: tunnel_dev))
43 return &geneve_tunnel;
44 else if (netif_is_gretap(dev: tunnel_dev) ||
45 netif_is_ip6gretap(dev: tunnel_dev))
46 return &gre_tunnel;
47 else if (netif_is_bareudp(dev: tunnel_dev))
48 return &mplsoudp_tunnel;
49 else
50 return NULL;
51}
52
53static int get_route_and_out_devs(struct mlx5e_priv *priv,
54 struct net_device *dev,
55 struct net_device **route_dev,
56 struct net_device **out_dev)
57{
58 struct net_device *uplink_dev, *uplink_upper, *real_dev;
59 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
60 bool dst_is_lag_dev;
61
62 real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
63 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, rep_type: REP_ETH);
64
65 rcu_read_lock();
66 uplink_upper = netdev_master_upper_dev_get_rcu(dev: uplink_dev);
67 /* mlx5_lag_is_sriov() is a blocking function which can't be called
68 * while holding rcu read lock. Take the net_device for correctness
69 * sake.
70 */
71 if (uplink_upper)
72 dev_hold(dev: uplink_upper);
73 rcu_read_unlock();
74
75 dst_is_lag_dev = (uplink_upper &&
76 netif_is_lag_master(dev: uplink_upper) &&
77 real_dev == uplink_upper &&
78 mlx5_lag_is_sriov(dev: priv->mdev));
79 if (uplink_upper)
80 dev_put(dev: uplink_upper);
81
82 /* if the egress device isn't on the same HW e-switch or
83 * it's a LAG device, use the uplink
84 */
85 *route_dev = dev;
86 if (!netdev_port_same_parent_id(a: priv->netdev, b: real_dev) ||
87 dst_is_lag_dev || is_vlan_dev(dev: *route_dev) ||
88 netif_is_ovs_master(dev: *route_dev))
89 *out_dev = uplink_dev;
90 else if (mlx5e_eswitch_rep(netdev: dev) &&
91 mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev: dev))
92 *out_dev = *route_dev;
93 else
94 return -EOPNOTSUPP;
95
96 if (!mlx5e_eswitch_uplink_rep(netdev: *out_dev))
97 return -EOPNOTSUPP;
98
99 if (mlx5e_eswitch_uplink_rep(netdev: priv->netdev) && *out_dev != priv->netdev &&
100 !mlx5_lag_is_mpesw(dev: priv->mdev))
101 return -EOPNOTSUPP;
102
103 return 0;
104}
105
106static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
107 struct net_device *dev,
108 struct mlx5e_tc_tun_route_attr *attr)
109{
110 struct net_device *route_dev;
111 struct net_device *out_dev;
112 struct neighbour *n;
113 struct rtable *rt;
114
115#if IS_ENABLED(CONFIG_INET)
116 struct mlx5_core_dev *mdev = priv->mdev;
117 struct net_device *uplink_dev;
118 int ret;
119
120 if (mlx5_lag_is_multipath(dev: mdev)) {
121 struct mlx5_eswitch *esw = mdev->priv.eswitch;
122
123 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, rep_type: REP_ETH);
124 attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
125 } else {
126 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: dev);
127
128 if (tunnel && tunnel->get_remote_ifindex)
129 attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
130 }
131
132 rt = ip_route_output_key(net: dev_net(dev), flp: &attr->fl.fl4);
133 if (IS_ERR(ptr: rt))
134 return PTR_ERR(ptr: rt);
135
136 if (rt->rt_type != RTN_UNICAST) {
137 ret = -ENETUNREACH;
138 goto err_rt_release;
139 }
140
141 if (mlx5_lag_is_multipath(dev: mdev) && rt->rt_gw_family != AF_INET) {
142 ret = -ENETUNREACH;
143 goto err_rt_release;
144 }
145#else
146 return -EOPNOTSUPP;
147#endif
148
149 ret = get_route_and_out_devs(priv, dev: rt->dst.dev, route_dev: &route_dev, out_dev: &out_dev);
150 if (ret < 0)
151 goto err_rt_release;
152 dev_hold(dev: route_dev);
153
154 if (!attr->ttl)
155 attr->ttl = ip4_dst_hoplimit(dst: &rt->dst);
156 n = dst_neigh_lookup(dst: &rt->dst, daddr: &attr->fl.fl4.daddr);
157 if (!n) {
158 ret = -ENOMEM;
159 goto err_dev_release;
160 }
161
162 ip_rt_put(rt);
163 attr->route_dev = route_dev;
164 attr->out_dev = out_dev;
165 attr->n = n;
166 return 0;
167
168err_dev_release:
169 dev_put(dev: route_dev);
170err_rt_release:
171 ip_rt_put(rt);
172 return ret;
173}
174
175static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
176{
177 mlx5e_tc_tun_route_attr_cleanup(attr);
178}
179
180static const char *mlx5e_netdev_kind(struct net_device *dev)
181{
182 if (dev->rtnl_link_ops)
183 return dev->rtnl_link_ops->kind;
184 else
185 return "unknown";
186}
187
188static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
189 struct mlx5e_encap_entry *e)
190{
191 if (!e->tunnel) {
192 pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
193 return -EOPNOTSUPP;
194 }
195
196 return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
197}
198
199static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
200 struct mlx5e_encap_entry *e,
201 u16 proto)
202{
203 struct ethhdr *eth = (struct ethhdr *)buf;
204 char *ip;
205
206 ether_addr_copy(dst: eth->h_dest, src: e->h_dest);
207 ether_addr_copy(dst: eth->h_source, src: dev->dev_addr);
208 if (is_vlan_dev(dev)) {
209 struct vlan_hdr *vlan = (struct vlan_hdr *)
210 ((char *)eth + ETH_HLEN);
211 ip = (char *)vlan + VLAN_HLEN;
212 eth->h_proto = vlan_dev_vlan_proto(dev);
213 vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
214 vlan->h_vlan_encapsulated_proto = htons(proto);
215 } else {
216 eth->h_proto = htons(proto);
217 ip = (char *)eth + ETH_HLEN;
218 }
219
220 return ip;
221}
222
223int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
224 struct net_device *mirred_dev,
225 struct mlx5e_encap_entry *e)
226{
227 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
228 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
229 struct mlx5_pkt_reformat_params reformat_params;
230 struct mlx5e_neigh m_neigh = {};
231 TC_TUN_ROUTE_ATTR_INIT(attr);
232 int ipv4_encap_size;
233 char *encap_header;
234 struct iphdr *ip;
235 u8 nud_state;
236 int err;
237
238 /* add the IP fields */
239 attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
240 attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
241 attr.fl.fl4.saddr = tun_key->u.ipv4.src;
242 attr.ttl = tun_key->ttl;
243
244 err = mlx5e_route_lookup_ipv4_get(priv, dev: mirred_dev, attr: &attr);
245 if (err)
246 return err;
247
248 ipv4_encap_size =
249 (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
250 sizeof(struct iphdr) +
251 e->tunnel->calc_hlen(e);
252
253 if (max_encap_size < ipv4_encap_size) {
254 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
255 ipv4_encap_size, max_encap_size);
256 err = -EOPNOTSUPP;
257 goto release_neigh;
258 }
259
260 encap_header = kzalloc(size: ipv4_encap_size, GFP_KERNEL);
261 if (!encap_header) {
262 err = -ENOMEM;
263 goto release_neigh;
264 }
265
266 m_neigh.family = attr.n->ops->family;
267 memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
268 e->out_dev = attr.out_dev;
269 e->route_dev_ifindex = attr.route_dev->ifindex;
270
271 /* It's important to add the neigh to the hash table before checking
272 * the neigh validity state. So if we'll get a notification, in case the
273 * neigh changes it's validity state, we would find the relevant neigh
274 * in the hash.
275 */
276 err = mlx5e_rep_encap_entry_attach(priv: netdev_priv(dev: attr.out_dev), e, m_neigh: &m_neigh, neigh_dev: attr.n->dev);
277 if (err)
278 goto free_encap;
279
280 read_lock_bh(&attr.n->lock);
281 nud_state = attr.n->nud_state;
282 ether_addr_copy(dst: e->h_dest, src: attr.n->ha);
283 read_unlock_bh(&attr.n->lock);
284
285 /* add ethernet header */
286 ip = (struct iphdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e,
287 ETH_P_IP);
288
289 /* add ip header */
290 ip->tos = tun_key->tos;
291 ip->version = 0x4;
292 ip->ihl = 0x5;
293 ip->ttl = attr.ttl;
294 ip->daddr = attr.fl.fl4.daddr;
295 ip->saddr = attr.fl.fl4.saddr;
296
297 /* add tunneling protocol header */
298 err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip + sizeof(struct iphdr),
299 ip_proto: &ip->protocol, e);
300 if (err)
301 goto destroy_neigh_entry;
302
303 e->encap_size = ipv4_encap_size;
304 e->encap_header = encap_header;
305 encap_header = NULL;
306
307 if (!(nud_state & NUD_VALID)) {
308 neigh_event_send(neigh: attr.n, NULL);
309 /* the encap entry will be made valid on neigh update event
310 * and not used before that.
311 */
312 goto release_neigh;
313 }
314
315 memset(&reformat_params, 0, sizeof(reformat_params));
316 reformat_params.type = e->reformat_type;
317 reformat_params.size = e->encap_size;
318 reformat_params.data = e->encap_header;
319 e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params,
320 ns_type: MLX5_FLOW_NAMESPACE_FDB);
321 if (IS_ERR(ptr: e->pkt_reformat)) {
322 err = PTR_ERR(ptr: e->pkt_reformat);
323 goto destroy_neigh_entry;
324 }
325
326 e->flags |= MLX5_ENCAP_ENTRY_VALID;
327 mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev));
328 mlx5e_route_lookup_ipv4_put(attr: &attr);
329 return err;
330
331destroy_neigh_entry:
332 mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e);
333free_encap:
334 kfree(objp: encap_header);
335release_neigh:
336 mlx5e_route_lookup_ipv4_put(attr: &attr);
337 return err;
338}
339
340int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
341 struct net_device *mirred_dev,
342 struct mlx5e_encap_entry *e)
343{
344 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
345 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
346 struct mlx5_pkt_reformat_params reformat_params;
347 TC_TUN_ROUTE_ATTR_INIT(attr);
348 int ipv4_encap_size;
349 char *encap_header;
350 struct iphdr *ip;
351 u8 nud_state;
352 int err;
353
354 /* add the IP fields */
355 attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
356 attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
357 attr.fl.fl4.saddr = tun_key->u.ipv4.src;
358 attr.ttl = tun_key->ttl;
359
360 err = mlx5e_route_lookup_ipv4_get(priv, dev: mirred_dev, attr: &attr);
361 if (err)
362 return err;
363
364 ipv4_encap_size =
365 (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
366 sizeof(struct iphdr) +
367 e->tunnel->calc_hlen(e);
368
369 if (max_encap_size < ipv4_encap_size) {
370 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
371 ipv4_encap_size, max_encap_size);
372 err = -EOPNOTSUPP;
373 goto release_neigh;
374 }
375
376 encap_header = kzalloc(size: ipv4_encap_size, GFP_KERNEL);
377 if (!encap_header) {
378 err = -ENOMEM;
379 goto release_neigh;
380 }
381
382 e->route_dev_ifindex = attr.route_dev->ifindex;
383
384 read_lock_bh(&attr.n->lock);
385 nud_state = attr.n->nud_state;
386 ether_addr_copy(dst: e->h_dest, src: attr.n->ha);
387 WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
388 read_unlock_bh(&attr.n->lock);
389
390 /* add ethernet header */
391 ip = (struct iphdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e,
392 ETH_P_IP);
393
394 /* add ip header */
395 ip->tos = tun_key->tos;
396 ip->version = 0x4;
397 ip->ihl = 0x5;
398 ip->ttl = attr.ttl;
399 ip->daddr = attr.fl.fl4.daddr;
400 ip->saddr = attr.fl.fl4.saddr;
401
402 /* add tunneling protocol header */
403 err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip + sizeof(struct iphdr),
404 ip_proto: &ip->protocol, e);
405 if (err)
406 goto free_encap;
407
408 e->encap_size = ipv4_encap_size;
409 kfree(objp: e->encap_header);
410 e->encap_header = encap_header;
411 encap_header = NULL;
412
413 if (!(nud_state & NUD_VALID)) {
414 neigh_event_send(neigh: attr.n, NULL);
415 /* the encap entry will be made valid on neigh update event
416 * and not used before that.
417 */
418 goto release_neigh;
419 }
420
421 memset(&reformat_params, 0, sizeof(reformat_params));
422 reformat_params.type = e->reformat_type;
423 reformat_params.size = e->encap_size;
424 reformat_params.data = e->encap_header;
425 e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params,
426 ns_type: MLX5_FLOW_NAMESPACE_FDB);
427 if (IS_ERR(ptr: e->pkt_reformat)) {
428 err = PTR_ERR(ptr: e->pkt_reformat);
429 goto free_encap;
430 }
431
432 e->flags |= MLX5_ENCAP_ENTRY_VALID;
433 mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev));
434 mlx5e_route_lookup_ipv4_put(attr: &attr);
435 return err;
436
437free_encap:
438 kfree(objp: encap_header);
439release_neigh:
440 mlx5e_route_lookup_ipv4_put(attr: &attr);
441 return err;
442}
443
444#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
445static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
446 struct net_device *dev,
447 struct mlx5e_tc_tun_route_attr *attr)
448{
449 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: dev);
450 struct net_device *route_dev;
451 struct net_device *out_dev;
452 struct dst_entry *dst;
453 struct neighbour *n;
454 int ret;
455
456 if (tunnel && tunnel->get_remote_ifindex)
457 attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
458 dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
459 NULL);
460 if (IS_ERR(ptr: dst))
461 return PTR_ERR(ptr: dst);
462
463 if (!attr->ttl)
464 attr->ttl = ip6_dst_hoplimit(dst);
465
466 ret = get_route_and_out_devs(priv, dev: dst->dev, route_dev: &route_dev, out_dev: &out_dev);
467 if (ret < 0)
468 goto err_dst_release;
469
470 dev_hold(dev: route_dev);
471 n = dst_neigh_lookup(dst, daddr: &attr->fl.fl6.daddr);
472 if (!n) {
473 ret = -ENOMEM;
474 goto err_dev_release;
475 }
476
477 dst_release(dst);
478 attr->out_dev = out_dev;
479 attr->route_dev = route_dev;
480 attr->n = n;
481 return 0;
482
483err_dev_release:
484 dev_put(dev: route_dev);
485err_dst_release:
486 dst_release(dst);
487 return ret;
488}
489
490static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
491{
492 mlx5e_tc_tun_route_attr_cleanup(attr);
493}
494
495int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
496 struct net_device *mirred_dev,
497 struct mlx5e_encap_entry *e)
498{
499 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
500 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
501 struct mlx5_pkt_reformat_params reformat_params;
502 struct mlx5e_neigh m_neigh = {};
503 TC_TUN_ROUTE_ATTR_INIT(attr);
504 struct ipv6hdr *ip6h;
505 int ipv6_encap_size;
506 char *encap_header;
507 u8 nud_state;
508 int err;
509
510 attr.ttl = tun_key->ttl;
511 attr.fl.fl6.flowlabel = ip6_make_flowinfo(tclass: tun_key->tos, flowlabel: tun_key->label);
512 attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
513 attr.fl.fl6.saddr = tun_key->u.ipv6.src;
514
515 err = mlx5e_route_lookup_ipv6_get(priv, dev: mirred_dev, attr: &attr);
516 if (err)
517 return err;
518
519 ipv6_encap_size =
520 (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
521 sizeof(struct ipv6hdr) +
522 e->tunnel->calc_hlen(e);
523
524 if (max_encap_size < ipv6_encap_size) {
525 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
526 ipv6_encap_size, max_encap_size);
527 err = -EOPNOTSUPP;
528 goto release_neigh;
529 }
530
531 encap_header = kzalloc(size: ipv6_encap_size, GFP_KERNEL);
532 if (!encap_header) {
533 err = -ENOMEM;
534 goto release_neigh;
535 }
536
537 m_neigh.family = attr.n->ops->family;
538 memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
539 e->out_dev = attr.out_dev;
540 e->route_dev_ifindex = attr.route_dev->ifindex;
541
542 /* It's important to add the neigh to the hash table before checking
543 * the neigh validity state. So if we'll get a notification, in case the
544 * neigh changes it's validity state, we would find the relevant neigh
545 * in the hash.
546 */
547 err = mlx5e_rep_encap_entry_attach(priv: netdev_priv(dev: attr.out_dev), e, m_neigh: &m_neigh, neigh_dev: attr.n->dev);
548 if (err)
549 goto free_encap;
550
551 read_lock_bh(&attr.n->lock);
552 nud_state = attr.n->nud_state;
553 ether_addr_copy(dst: e->h_dest, src: attr.n->ha);
554 read_unlock_bh(&attr.n->lock);
555
556 /* add ethernet header */
557 ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e,
558 ETH_P_IPV6);
559
560 /* add ip header */
561 ip6_flow_hdr(hdr: ip6h, tclass: tun_key->tos, flowlabel: 0);
562 /* the HW fills up ipv6 payload len */
563 ip6h->hop_limit = attr.ttl;
564 ip6h->daddr = attr.fl.fl6.daddr;
565 ip6h->saddr = attr.fl.fl6.saddr;
566
567 /* add tunneling protocol header */
568 err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip6h + sizeof(struct ipv6hdr),
569 ip_proto: &ip6h->nexthdr, e);
570 if (err)
571 goto destroy_neigh_entry;
572
573 e->encap_size = ipv6_encap_size;
574 e->encap_header = encap_header;
575 encap_header = NULL;
576
577 if (!(nud_state & NUD_VALID)) {
578 neigh_event_send(neigh: attr.n, NULL);
579 /* the encap entry will be made valid on neigh update event
580 * and not used before that.
581 */
582 goto release_neigh;
583 }
584
585 memset(&reformat_params, 0, sizeof(reformat_params));
586 reformat_params.type = e->reformat_type;
587 reformat_params.size = e->encap_size;
588 reformat_params.data = e->encap_header;
589 e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params,
590 ns_type: MLX5_FLOW_NAMESPACE_FDB);
591 if (IS_ERR(ptr: e->pkt_reformat)) {
592 err = PTR_ERR(ptr: e->pkt_reformat);
593 goto destroy_neigh_entry;
594 }
595
596 e->flags |= MLX5_ENCAP_ENTRY_VALID;
597 mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev));
598 mlx5e_route_lookup_ipv6_put(attr: &attr);
599 return err;
600
601destroy_neigh_entry:
602 mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e);
603free_encap:
604 kfree(objp: encap_header);
605release_neigh:
606 mlx5e_route_lookup_ipv6_put(attr: &attr);
607 return err;
608}
609
610int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
611 struct net_device *mirred_dev,
612 struct mlx5e_encap_entry *e)
613{
614 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
615 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
616 struct mlx5_pkt_reformat_params reformat_params;
617 TC_TUN_ROUTE_ATTR_INIT(attr);
618 struct ipv6hdr *ip6h;
619 int ipv6_encap_size;
620 char *encap_header;
621 u8 nud_state;
622 int err;
623
624 attr.ttl = tun_key->ttl;
625
626 attr.fl.fl6.flowlabel = ip6_make_flowinfo(tclass: tun_key->tos, flowlabel: tun_key->label);
627 attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
628 attr.fl.fl6.saddr = tun_key->u.ipv6.src;
629
630 err = mlx5e_route_lookup_ipv6_get(priv, dev: mirred_dev, attr: &attr);
631 if (err)
632 return err;
633
634 ipv6_encap_size =
635 (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
636 sizeof(struct ipv6hdr) +
637 e->tunnel->calc_hlen(e);
638
639 if (max_encap_size < ipv6_encap_size) {
640 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
641 ipv6_encap_size, max_encap_size);
642 err = -EOPNOTSUPP;
643 goto release_neigh;
644 }
645
646 encap_header = kzalloc(size: ipv6_encap_size, GFP_KERNEL);
647 if (!encap_header) {
648 err = -ENOMEM;
649 goto release_neigh;
650 }
651
652 e->route_dev_ifindex = attr.route_dev->ifindex;
653
654 read_lock_bh(&attr.n->lock);
655 nud_state = attr.n->nud_state;
656 ether_addr_copy(dst: e->h_dest, src: attr.n->ha);
657 WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
658 read_unlock_bh(&attr.n->lock);
659
660 /* add ethernet header */
661 ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e,
662 ETH_P_IPV6);
663
664 /* add ip header */
665 ip6_flow_hdr(hdr: ip6h, tclass: tun_key->tos, flowlabel: 0);
666 /* the HW fills up ipv6 payload len */
667 ip6h->hop_limit = attr.ttl;
668 ip6h->daddr = attr.fl.fl6.daddr;
669 ip6h->saddr = attr.fl.fl6.saddr;
670
671 /* add tunneling protocol header */
672 err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip6h + sizeof(struct ipv6hdr),
673 ip_proto: &ip6h->nexthdr, e);
674 if (err)
675 goto free_encap;
676
677 e->encap_size = ipv6_encap_size;
678 kfree(objp: e->encap_header);
679 e->encap_header = encap_header;
680 encap_header = NULL;
681
682 if (!(nud_state & NUD_VALID)) {
683 neigh_event_send(neigh: attr.n, NULL);
684 /* the encap entry will be made valid on neigh update event
685 * and not used before that.
686 */
687 goto release_neigh;
688 }
689
690 memset(&reformat_params, 0, sizeof(reformat_params));
691 reformat_params.type = e->reformat_type;
692 reformat_params.size = e->encap_size;
693 reformat_params.data = e->encap_header;
694 e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params,
695 ns_type: MLX5_FLOW_NAMESPACE_FDB);
696 if (IS_ERR(ptr: e->pkt_reformat)) {
697 err = PTR_ERR(ptr: e->pkt_reformat);
698 goto free_encap;
699 }
700
701 e->flags |= MLX5_ENCAP_ENTRY_VALID;
702 mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev));
703 mlx5e_route_lookup_ipv6_put(attr: &attr);
704 return err;
705
706free_encap:
707 kfree(objp: encap_header);
708release_neigh:
709 mlx5e_route_lookup_ipv6_put(attr: &attr);
710 return err;
711}
712#endif
713
714int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
715 struct mlx5_flow_spec *spec,
716 struct mlx5_flow_attr *flow_attr,
717 struct net_device *filter_dev)
718{
719 struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
720 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
721 struct mlx5e_tc_int_port *int_port;
722 TC_TUN_ROUTE_ATTR_INIT(attr);
723 u16 vport_num;
724 int err = 0;
725
726 if (flow_attr->tun_ip_version == 4) {
727 /* Addresses are swapped for decap */
728 attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
729 attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
730 err = mlx5e_route_lookup_ipv4_get(priv, dev: filter_dev, attr: &attr);
731 }
732#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
733 else if (flow_attr->tun_ip_version == 6) {
734 /* Addresses are swapped for decap */
735 attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
736 attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
737 err = mlx5e_route_lookup_ipv6_get(priv, dev: filter_dev, attr: &attr);
738 }
739#endif
740 else
741 return 0;
742
743 if (err)
744 return err;
745
746 if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
747 mlx5e_tc_is_vf_tunnel(out_dev: attr.out_dev, route_dev: attr.route_dev)) {
748 err = mlx5e_tc_query_route_vport(out_dev: attr.out_dev, route_dev: attr.route_dev, vport: &vport_num);
749 if (err)
750 goto out;
751
752 esw_attr->rx_tun_attr->decap_vport = vport_num;
753 } else if (netif_is_ovs_master(dev: attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
754 int_port = mlx5e_tc_int_port_get(priv: mlx5e_get_int_port_priv(priv),
755 ifindex: attr.route_dev->ifindex,
756 type: MLX5E_TC_INT_PORT_INGRESS);
757 if (IS_ERR(ptr: int_port)) {
758 err = PTR_ERR(ptr: int_port);
759 goto out;
760 }
761 esw_attr->int_port = int_port;
762 }
763
764out:
765 if (flow_attr->tun_ip_version == 4)
766 mlx5e_route_lookup_ipv4_put(attr: &attr);
767#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
768 else if (flow_attr->tun_ip_version == 6)
769 mlx5e_route_lookup_ipv6_put(attr: &attr);
770#endif
771 return err;
772}
773
774bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
775 struct net_device *netdev)
776{
777 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: netdev);
778
779 if (tunnel && tunnel->can_offload(priv))
780 return true;
781 else
782 return false;
783}
784
785int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
786 struct mlx5e_priv *priv,
787 struct mlx5e_encap_entry *e,
788 struct netlink_ext_ack *extack)
789{
790 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
791
792 if (!tunnel) {
793 e->reformat_type = -1;
794 return -EOPNOTSUPP;
795 }
796
797 return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
798}
799
800int mlx5e_tc_tun_parse(struct net_device *filter_dev,
801 struct mlx5e_priv *priv,
802 struct mlx5_flow_spec *spec,
803 struct flow_cls_offload *f,
804 u8 *match_level)
805{
806 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: filter_dev);
807 struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f);
808 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
809 outer_headers);
810 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
811 outer_headers);
812 struct netlink_ext_ack *extack = f->common.extack;
813 int err = 0;
814
815 if (!tunnel) {
816 netdev_warn(dev: priv->netdev,
817 format: "decapsulation offload is not supported for %s net device\n",
818 mlx5e_netdev_kind(dev: filter_dev));
819 err = -EOPNOTSUPP;
820 goto out;
821 }
822
823 *match_level = tunnel->match_level;
824
825 if (tunnel->parse_udp_ports) {
826 err = tunnel->parse_udp_ports(priv, spec, f,
827 headers_c, headers_v);
828 if (err)
829 goto out;
830 }
831
832 if (tunnel->parse_tunnel) {
833 err = tunnel->parse_tunnel(priv, spec, f,
834 headers_c, headers_v);
835 if (err)
836 goto out;
837 }
838
839 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
840 struct flow_dissector_key_basic key_basic = {};
841 struct flow_dissector_key_basic mask_basic = {
842 .n_proto = htons(0xFFFF),
843 };
844 struct flow_match_basic match_basic = {
845 .key = &key_basic, .mask = &mask_basic,
846 };
847 struct flow_match_control match;
848 u16 addr_type;
849
850 flow_rule_match_enc_control(rule, out: &match);
851 addr_type = match.key->addr_type;
852
853 /* For tunnel addr_type used same key id`s as for non-tunnel */
854 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
855 struct flow_match_ipv4_addrs match;
856
857 flow_rule_match_enc_ipv4_addrs(rule, out: &match);
858 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
859 src_ipv4_src_ipv6.ipv4_layout.ipv4,
860 ntohl(match.mask->src));
861 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
862 src_ipv4_src_ipv6.ipv4_layout.ipv4,
863 ntohl(match.key->src));
864
865 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
866 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
867 ntohl(match.mask->dst));
868 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
869 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
870 ntohl(match.key->dst));
871
872 key_basic.n_proto = htons(ETH_P_IP);
873 mlx5e_tc_set_ethertype(mdev: priv->mdev, match: &match_basic, outer: true,
874 headers_c, headers_v);
875 } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
876 struct flow_match_ipv6_addrs match;
877
878 flow_rule_match_enc_ipv6_addrs(rule, out: &match);
879 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
880 src_ipv4_src_ipv6.ipv6_layout.ipv6),
881 &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
882 ipv6));
883 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
884 src_ipv4_src_ipv6.ipv6_layout.ipv6),
885 &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
886 ipv6));
887
888 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
889 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
890 &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
891 ipv6));
892 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
893 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
894 &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
895 ipv6));
896
897 key_basic.n_proto = htons(ETH_P_IPV6);
898 mlx5e_tc_set_ethertype(mdev: priv->mdev, match: &match_basic, outer: true,
899 headers_c, headers_v);
900 }
901 }
902
903 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_IP)) {
904 struct flow_match_ip match;
905
906 flow_rule_match_enc_ip(rule, out: &match);
907 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
908 match.mask->tos & 0x3);
909 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
910 match.key->tos & 0x3);
911
912 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
913 match.mask->tos >> 2);
914 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
915 match.key->tos >> 2);
916
917 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
918 match.mask->ttl);
919 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
920 match.key->ttl);
921
922 if (match.mask->ttl &&
923 !MLX5_CAP_ESW_FLOWTABLE_FDB
924 (priv->mdev,
925 ft_field_support.outer_ipv4_ttl)) {
926 NL_SET_ERR_MSG_MOD(extack,
927 "Matching on TTL is not supported");
928 err = -EOPNOTSUPP;
929 goto out;
930 }
931 }
932
933 /* let software handle IP fragments */
934 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
935 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
936
937 return 0;
938
939out:
940 return err;
941}
942
943int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
944 struct mlx5_flow_spec *spec,
945 struct flow_cls_offload *f,
946 void *headers_c,
947 void *headers_v)
948{
949 struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f);
950 struct netlink_ext_ack *extack = f->common.extack;
951 struct flow_match_ports enc_ports;
952
953 /* Full udp dst port must be given */
954
955 if (!flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_PORTS)) {
956 NL_SET_ERR_MSG_MOD(extack,
957 "UDP tunnel decap filter must include enc_dst_port condition");
958 netdev_warn(dev: priv->netdev,
959 format: "UDP tunnel decap filter must include enc_dst_port condition\n");
960 return -EOPNOTSUPP;
961 }
962
963 flow_rule_match_enc_ports(rule, out: &enc_ports);
964
965 if (memchr_inv(p: &enc_ports.mask->dst, c: 0xff,
966 size: sizeof(enc_ports.mask->dst))) {
967 NL_SET_ERR_MSG_MOD(extack,
968 "UDP tunnel decap filter must match enc_dst_port fully");
969 netdev_warn(dev: priv->netdev,
970 format: "UDP tunnel decap filter must match enc_dst_port fully\n");
971 return -EOPNOTSUPP;
972 }
973
974 /* match on UDP protocol and dst port number */
975
976 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
977 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
978
979 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
980 ntohs(enc_ports.mask->dst));
981 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
982 ntohs(enc_ports.key->dst));
983
984 /* UDP src port on outer header is generated by HW,
985 * so it is probably a bad idea to request matching it.
986 * Nonetheless, it is allowed.
987 */
988
989 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
990 ntohs(enc_ports.mask->src));
991 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
992 ntohs(enc_ports.key->src));
993
994 return 0;
995}
996

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c