1 | /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ |
2 | /* Copyright (c) 2018 Mellanox Technologies. */ |
3 | |
4 | #include <net/inet_ecn.h> |
5 | #include <net/vxlan.h> |
6 | #include <net/gre.h> |
7 | #include <net/geneve.h> |
8 | #include <net/bareudp.h> |
9 | #include "en/tc_tun.h" |
10 | #include "en/tc_priv.h" |
11 | #include "en_tc.h" |
12 | #include "rep/tc.h" |
13 | #include "rep/neigh.h" |
14 | #include "lag/lag.h" |
15 | #include "lag/mp.h" |
16 | |
17 | struct mlx5e_tc_tun_route_attr { |
18 | struct net_device *out_dev; |
19 | struct net_device *route_dev; |
20 | union { |
21 | struct flowi4 fl4; |
22 | struct flowi6 fl6; |
23 | } fl; |
24 | struct neighbour *n; |
25 | u8 ttl; |
26 | }; |
27 | |
28 | #define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {} |
29 | |
30 | static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr) |
31 | { |
32 | if (attr->n) |
33 | neigh_release(neigh: attr->n); |
34 | if (attr->route_dev) |
35 | dev_put(dev: attr->route_dev); |
36 | } |
37 | |
38 | struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) |
39 | { |
40 | if (netif_is_vxlan(dev: tunnel_dev)) |
41 | return &vxlan_tunnel; |
42 | else if (netif_is_geneve(dev: tunnel_dev)) |
43 | return &geneve_tunnel; |
44 | else if (netif_is_gretap(dev: tunnel_dev) || |
45 | netif_is_ip6gretap(dev: tunnel_dev)) |
46 | return &gre_tunnel; |
47 | else if (netif_is_bareudp(dev: tunnel_dev)) |
48 | return &mplsoudp_tunnel; |
49 | else |
50 | return NULL; |
51 | } |
52 | |
53 | static int get_route_and_out_devs(struct mlx5e_priv *priv, |
54 | struct net_device *dev, |
55 | struct net_device **route_dev, |
56 | struct net_device **out_dev) |
57 | { |
58 | struct net_device *uplink_dev, *uplink_upper, *real_dev; |
59 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
60 | bool dst_is_lag_dev; |
61 | |
62 | real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; |
63 | uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, rep_type: REP_ETH); |
64 | |
65 | rcu_read_lock(); |
66 | uplink_upper = netdev_master_upper_dev_get_rcu(dev: uplink_dev); |
67 | /* mlx5_lag_is_sriov() is a blocking function which can't be called |
68 | * while holding rcu read lock. Take the net_device for correctness |
69 | * sake. |
70 | */ |
71 | if (uplink_upper) |
72 | dev_hold(dev: uplink_upper); |
73 | rcu_read_unlock(); |
74 | |
75 | dst_is_lag_dev = (uplink_upper && |
76 | netif_is_lag_master(dev: uplink_upper) && |
77 | real_dev == uplink_upper && |
78 | mlx5_lag_is_sriov(dev: priv->mdev)); |
79 | if (uplink_upper) |
80 | dev_put(dev: uplink_upper); |
81 | |
82 | /* if the egress device isn't on the same HW e-switch or |
83 | * it's a LAG device, use the uplink |
84 | */ |
85 | *route_dev = dev; |
86 | if (!netdev_port_same_parent_id(a: priv->netdev, b: real_dev) || |
87 | dst_is_lag_dev || is_vlan_dev(dev: *route_dev) || |
88 | netif_is_ovs_master(dev: *route_dev)) |
89 | *out_dev = uplink_dev; |
90 | else if (mlx5e_eswitch_rep(netdev: dev) && |
91 | mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev: dev)) |
92 | *out_dev = *route_dev; |
93 | else |
94 | return -EOPNOTSUPP; |
95 | |
96 | if (!mlx5e_eswitch_uplink_rep(netdev: *out_dev)) |
97 | return -EOPNOTSUPP; |
98 | |
99 | if (mlx5e_eswitch_uplink_rep(netdev: priv->netdev) && *out_dev != priv->netdev && |
100 | !mlx5_lag_is_mpesw(dev: priv->mdev)) |
101 | return -EOPNOTSUPP; |
102 | |
103 | return 0; |
104 | } |
105 | |
106 | static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, |
107 | struct net_device *dev, |
108 | struct mlx5e_tc_tun_route_attr *attr) |
109 | { |
110 | struct net_device *route_dev; |
111 | struct net_device *out_dev; |
112 | struct neighbour *n; |
113 | struct rtable *rt; |
114 | |
115 | #if IS_ENABLED(CONFIG_INET) |
116 | struct mlx5_core_dev *mdev = priv->mdev; |
117 | struct net_device *uplink_dev; |
118 | int ret; |
119 | |
120 | if (mlx5_lag_is_multipath(dev: mdev)) { |
121 | struct mlx5_eswitch *esw = mdev->priv.eswitch; |
122 | |
123 | uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, rep_type: REP_ETH); |
124 | attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; |
125 | } else { |
126 | struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: dev); |
127 | |
128 | if (tunnel && tunnel->get_remote_ifindex) |
129 | attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev); |
130 | } |
131 | |
132 | rt = ip_route_output_key(net: dev_net(dev), flp: &attr->fl.fl4); |
133 | if (IS_ERR(ptr: rt)) |
134 | return PTR_ERR(ptr: rt); |
135 | |
136 | if (rt->rt_type != RTN_UNICAST) { |
137 | ret = -ENETUNREACH; |
138 | goto err_rt_release; |
139 | } |
140 | |
141 | if (mlx5_lag_is_multipath(dev: mdev) && rt->rt_gw_family != AF_INET) { |
142 | ret = -ENETUNREACH; |
143 | goto err_rt_release; |
144 | } |
145 | #else |
146 | return -EOPNOTSUPP; |
147 | #endif |
148 | |
149 | ret = get_route_and_out_devs(priv, dev: rt->dst.dev, route_dev: &route_dev, out_dev: &out_dev); |
150 | if (ret < 0) |
151 | goto err_rt_release; |
152 | dev_hold(dev: route_dev); |
153 | |
154 | if (!attr->ttl) |
155 | attr->ttl = ip4_dst_hoplimit(dst: &rt->dst); |
156 | n = dst_neigh_lookup(dst: &rt->dst, daddr: &attr->fl.fl4.daddr); |
157 | if (!n) { |
158 | ret = -ENOMEM; |
159 | goto err_dev_release; |
160 | } |
161 | |
162 | ip_rt_put(rt); |
163 | attr->route_dev = route_dev; |
164 | attr->out_dev = out_dev; |
165 | attr->n = n; |
166 | return 0; |
167 | |
168 | err_dev_release: |
169 | dev_put(dev: route_dev); |
170 | err_rt_release: |
171 | ip_rt_put(rt); |
172 | return ret; |
173 | } |
174 | |
175 | static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr) |
176 | { |
177 | mlx5e_tc_tun_route_attr_cleanup(attr); |
178 | } |
179 | |
180 | static const char *mlx5e_netdev_kind(struct net_device *dev) |
181 | { |
182 | if (dev->rtnl_link_ops) |
183 | return dev->rtnl_link_ops->kind; |
184 | else |
185 | return "unknown" ; |
186 | } |
187 | |
188 | static int (char buf[], __u8 *ip_proto, |
189 | struct mlx5e_encap_entry *e) |
190 | { |
191 | if (!e->tunnel) { |
192 | pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n" ); |
193 | return -EOPNOTSUPP; |
194 | } |
195 | |
196 | return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); |
197 | } |
198 | |
199 | static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, |
200 | struct mlx5e_encap_entry *e, |
201 | u16 proto) |
202 | { |
203 | struct ethhdr *eth = (struct ethhdr *)buf; |
204 | char *ip; |
205 | |
206 | ether_addr_copy(dst: eth->h_dest, src: e->h_dest); |
207 | ether_addr_copy(dst: eth->h_source, src: dev->dev_addr); |
208 | if (is_vlan_dev(dev)) { |
209 | struct vlan_hdr *vlan = (struct vlan_hdr *) |
210 | ((char *)eth + ETH_HLEN); |
211 | ip = (char *)vlan + VLAN_HLEN; |
212 | eth->h_proto = vlan_dev_vlan_proto(dev); |
213 | vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); |
214 | vlan->h_vlan_encapsulated_proto = htons(proto); |
215 | } else { |
216 | eth->h_proto = htons(proto); |
217 | ip = (char *)eth + ETH_HLEN; |
218 | } |
219 | |
220 | return ip; |
221 | } |
222 | |
223 | int (struct mlx5e_priv *priv, |
224 | struct net_device *mirred_dev, |
225 | struct mlx5e_encap_entry *e) |
226 | { |
227 | int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); |
228 | const struct ip_tunnel_key *tun_key = &e->tun_info->key; |
229 | struct mlx5_pkt_reformat_params reformat_params; |
230 | struct mlx5e_neigh m_neigh = {}; |
231 | TC_TUN_ROUTE_ATTR_INIT(attr); |
232 | int ipv4_encap_size; |
233 | char *; |
234 | struct iphdr *ip; |
235 | u8 nud_state; |
236 | int err; |
237 | |
238 | /* add the IP fields */ |
239 | attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; |
240 | attr.fl.fl4.daddr = tun_key->u.ipv4.dst; |
241 | attr.fl.fl4.saddr = tun_key->u.ipv4.src; |
242 | attr.ttl = tun_key->ttl; |
243 | |
244 | err = mlx5e_route_lookup_ipv4_get(priv, dev: mirred_dev, attr: &attr); |
245 | if (err) |
246 | return err; |
247 | |
248 | ipv4_encap_size = |
249 | (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + |
250 | sizeof(struct iphdr) + |
251 | e->tunnel->calc_hlen(e); |
252 | |
253 | if (max_encap_size < ipv4_encap_size) { |
254 | mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n" , |
255 | ipv4_encap_size, max_encap_size); |
256 | err = -EOPNOTSUPP; |
257 | goto release_neigh; |
258 | } |
259 | |
260 | encap_header = kzalloc(size: ipv4_encap_size, GFP_KERNEL); |
261 | if (!encap_header) { |
262 | err = -ENOMEM; |
263 | goto release_neigh; |
264 | } |
265 | |
266 | m_neigh.family = attr.n->ops->family; |
267 | memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); |
268 | e->out_dev = attr.out_dev; |
269 | e->route_dev_ifindex = attr.route_dev->ifindex; |
270 | |
271 | /* It's important to add the neigh to the hash table before checking |
272 | * the neigh validity state. So if we'll get a notification, in case the |
273 | * neigh changes it's validity state, we would find the relevant neigh |
274 | * in the hash. |
275 | */ |
276 | err = mlx5e_rep_encap_entry_attach(priv: netdev_priv(dev: attr.out_dev), e, m_neigh: &m_neigh, neigh_dev: attr.n->dev); |
277 | if (err) |
278 | goto free_encap; |
279 | |
280 | read_lock_bh(&attr.n->lock); |
281 | nud_state = attr.n->nud_state; |
282 | ether_addr_copy(dst: e->h_dest, src: attr.n->ha); |
283 | read_unlock_bh(&attr.n->lock); |
284 | |
285 | /* add ethernet header */ |
286 | ip = (struct iphdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e, |
287 | ETH_P_IP); |
288 | |
289 | /* add ip header */ |
290 | ip->tos = tun_key->tos; |
291 | ip->version = 0x4; |
292 | ip->ihl = 0x5; |
293 | ip->ttl = attr.ttl; |
294 | ip->daddr = attr.fl.fl4.daddr; |
295 | ip->saddr = attr.fl.fl4.saddr; |
296 | |
297 | /* add tunneling protocol header */ |
298 | err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip + sizeof(struct iphdr), |
299 | ip_proto: &ip->protocol, e); |
300 | if (err) |
301 | goto destroy_neigh_entry; |
302 | |
303 | e->encap_size = ipv4_encap_size; |
304 | e->encap_header = encap_header; |
305 | encap_header = NULL; |
306 | |
307 | if (!(nud_state & NUD_VALID)) { |
308 | neigh_event_send(neigh: attr.n, NULL); |
309 | /* the encap entry will be made valid on neigh update event |
310 | * and not used before that. |
311 | */ |
312 | goto release_neigh; |
313 | } |
314 | |
315 | memset(&reformat_params, 0, sizeof(reformat_params)); |
316 | reformat_params.type = e->reformat_type; |
317 | reformat_params.size = e->encap_size; |
318 | reformat_params.data = e->encap_header; |
319 | e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params, |
320 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
321 | if (IS_ERR(ptr: e->pkt_reformat)) { |
322 | err = PTR_ERR(ptr: e->pkt_reformat); |
323 | goto destroy_neigh_entry; |
324 | } |
325 | |
326 | e->flags |= MLX5_ENCAP_ENTRY_VALID; |
327 | mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev)); |
328 | mlx5e_route_lookup_ipv4_put(attr: &attr); |
329 | return err; |
330 | |
331 | destroy_neigh_entry: |
332 | mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e); |
333 | free_encap: |
334 | kfree(objp: encap_header); |
335 | release_neigh: |
336 | mlx5e_route_lookup_ipv4_put(attr: &attr); |
337 | return err; |
338 | } |
339 | |
340 | int (struct mlx5e_priv *priv, |
341 | struct net_device *mirred_dev, |
342 | struct mlx5e_encap_entry *e) |
343 | { |
344 | int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); |
345 | const struct ip_tunnel_key *tun_key = &e->tun_info->key; |
346 | struct mlx5_pkt_reformat_params reformat_params; |
347 | TC_TUN_ROUTE_ATTR_INIT(attr); |
348 | int ipv4_encap_size; |
349 | char *; |
350 | struct iphdr *ip; |
351 | u8 nud_state; |
352 | int err; |
353 | |
354 | /* add the IP fields */ |
355 | attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; |
356 | attr.fl.fl4.daddr = tun_key->u.ipv4.dst; |
357 | attr.fl.fl4.saddr = tun_key->u.ipv4.src; |
358 | attr.ttl = tun_key->ttl; |
359 | |
360 | err = mlx5e_route_lookup_ipv4_get(priv, dev: mirred_dev, attr: &attr); |
361 | if (err) |
362 | return err; |
363 | |
364 | ipv4_encap_size = |
365 | (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + |
366 | sizeof(struct iphdr) + |
367 | e->tunnel->calc_hlen(e); |
368 | |
369 | if (max_encap_size < ipv4_encap_size) { |
370 | mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n" , |
371 | ipv4_encap_size, max_encap_size); |
372 | err = -EOPNOTSUPP; |
373 | goto release_neigh; |
374 | } |
375 | |
376 | encap_header = kzalloc(size: ipv4_encap_size, GFP_KERNEL); |
377 | if (!encap_header) { |
378 | err = -ENOMEM; |
379 | goto release_neigh; |
380 | } |
381 | |
382 | e->route_dev_ifindex = attr.route_dev->ifindex; |
383 | |
384 | read_lock_bh(&attr.n->lock); |
385 | nud_state = attr.n->nud_state; |
386 | ether_addr_copy(dst: e->h_dest, src: attr.n->ha); |
387 | WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); |
388 | read_unlock_bh(&attr.n->lock); |
389 | |
390 | /* add ethernet header */ |
391 | ip = (struct iphdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e, |
392 | ETH_P_IP); |
393 | |
394 | /* add ip header */ |
395 | ip->tos = tun_key->tos; |
396 | ip->version = 0x4; |
397 | ip->ihl = 0x5; |
398 | ip->ttl = attr.ttl; |
399 | ip->daddr = attr.fl.fl4.daddr; |
400 | ip->saddr = attr.fl.fl4.saddr; |
401 | |
402 | /* add tunneling protocol header */ |
403 | err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip + sizeof(struct iphdr), |
404 | ip_proto: &ip->protocol, e); |
405 | if (err) |
406 | goto free_encap; |
407 | |
408 | e->encap_size = ipv4_encap_size; |
409 | kfree(objp: e->encap_header); |
410 | e->encap_header = encap_header; |
411 | encap_header = NULL; |
412 | |
413 | if (!(nud_state & NUD_VALID)) { |
414 | neigh_event_send(neigh: attr.n, NULL); |
415 | /* the encap entry will be made valid on neigh update event |
416 | * and not used before that. |
417 | */ |
418 | goto release_neigh; |
419 | } |
420 | |
421 | memset(&reformat_params, 0, sizeof(reformat_params)); |
422 | reformat_params.type = e->reformat_type; |
423 | reformat_params.size = e->encap_size; |
424 | reformat_params.data = e->encap_header; |
425 | e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params, |
426 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
427 | if (IS_ERR(ptr: e->pkt_reformat)) { |
428 | err = PTR_ERR(ptr: e->pkt_reformat); |
429 | goto free_encap; |
430 | } |
431 | |
432 | e->flags |= MLX5_ENCAP_ENTRY_VALID; |
433 | mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev)); |
434 | mlx5e_route_lookup_ipv4_put(attr: &attr); |
435 | return err; |
436 | |
437 | free_encap: |
438 | kfree(objp: encap_header); |
439 | release_neigh: |
440 | mlx5e_route_lookup_ipv4_put(attr: &attr); |
441 | return err; |
442 | } |
443 | |
444 | #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) |
445 | static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, |
446 | struct net_device *dev, |
447 | struct mlx5e_tc_tun_route_attr *attr) |
448 | { |
449 | struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: dev); |
450 | struct net_device *route_dev; |
451 | struct net_device *out_dev; |
452 | struct dst_entry *dst; |
453 | struct neighbour *n; |
454 | int ret; |
455 | |
456 | if (tunnel && tunnel->get_remote_ifindex) |
457 | attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); |
458 | dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, |
459 | NULL); |
460 | if (IS_ERR(ptr: dst)) |
461 | return PTR_ERR(ptr: dst); |
462 | |
463 | if (!attr->ttl) |
464 | attr->ttl = ip6_dst_hoplimit(dst); |
465 | |
466 | ret = get_route_and_out_devs(priv, dev: dst->dev, route_dev: &route_dev, out_dev: &out_dev); |
467 | if (ret < 0) |
468 | goto err_dst_release; |
469 | |
470 | dev_hold(dev: route_dev); |
471 | n = dst_neigh_lookup(dst, daddr: &attr->fl.fl6.daddr); |
472 | if (!n) { |
473 | ret = -ENOMEM; |
474 | goto err_dev_release; |
475 | } |
476 | |
477 | dst_release(dst); |
478 | attr->out_dev = out_dev; |
479 | attr->route_dev = route_dev; |
480 | attr->n = n; |
481 | return 0; |
482 | |
483 | err_dev_release: |
484 | dev_put(dev: route_dev); |
485 | err_dst_release: |
486 | dst_release(dst); |
487 | return ret; |
488 | } |
489 | |
490 | static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr) |
491 | { |
492 | mlx5e_tc_tun_route_attr_cleanup(attr); |
493 | } |
494 | |
495 | int (struct mlx5e_priv *priv, |
496 | struct net_device *mirred_dev, |
497 | struct mlx5e_encap_entry *e) |
498 | { |
499 | int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); |
500 | const struct ip_tunnel_key *tun_key = &e->tun_info->key; |
501 | struct mlx5_pkt_reformat_params reformat_params; |
502 | struct mlx5e_neigh m_neigh = {}; |
503 | TC_TUN_ROUTE_ATTR_INIT(attr); |
504 | struct ipv6hdr *ip6h; |
505 | int ipv6_encap_size; |
506 | char *; |
507 | u8 nud_state; |
508 | int err; |
509 | |
510 | attr.ttl = tun_key->ttl; |
511 | attr.fl.fl6.flowlabel = ip6_make_flowinfo(tclass: tun_key->tos, flowlabel: tun_key->label); |
512 | attr.fl.fl6.daddr = tun_key->u.ipv6.dst; |
513 | attr.fl.fl6.saddr = tun_key->u.ipv6.src; |
514 | |
515 | err = mlx5e_route_lookup_ipv6_get(priv, dev: mirred_dev, attr: &attr); |
516 | if (err) |
517 | return err; |
518 | |
519 | ipv6_encap_size = |
520 | (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + |
521 | sizeof(struct ipv6hdr) + |
522 | e->tunnel->calc_hlen(e); |
523 | |
524 | if (max_encap_size < ipv6_encap_size) { |
525 | mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n" , |
526 | ipv6_encap_size, max_encap_size); |
527 | err = -EOPNOTSUPP; |
528 | goto release_neigh; |
529 | } |
530 | |
531 | encap_header = kzalloc(size: ipv6_encap_size, GFP_KERNEL); |
532 | if (!encap_header) { |
533 | err = -ENOMEM; |
534 | goto release_neigh; |
535 | } |
536 | |
537 | m_neigh.family = attr.n->ops->family; |
538 | memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); |
539 | e->out_dev = attr.out_dev; |
540 | e->route_dev_ifindex = attr.route_dev->ifindex; |
541 | |
542 | /* It's important to add the neigh to the hash table before checking |
543 | * the neigh validity state. So if we'll get a notification, in case the |
544 | * neigh changes it's validity state, we would find the relevant neigh |
545 | * in the hash. |
546 | */ |
547 | err = mlx5e_rep_encap_entry_attach(priv: netdev_priv(dev: attr.out_dev), e, m_neigh: &m_neigh, neigh_dev: attr.n->dev); |
548 | if (err) |
549 | goto free_encap; |
550 | |
551 | read_lock_bh(&attr.n->lock); |
552 | nud_state = attr.n->nud_state; |
553 | ether_addr_copy(dst: e->h_dest, src: attr.n->ha); |
554 | read_unlock_bh(&attr.n->lock); |
555 | |
556 | /* add ethernet header */ |
557 | ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e, |
558 | ETH_P_IPV6); |
559 | |
560 | /* add ip header */ |
561 | ip6_flow_hdr(hdr: ip6h, tclass: tun_key->tos, flowlabel: 0); |
562 | /* the HW fills up ipv6 payload len */ |
563 | ip6h->hop_limit = attr.ttl; |
564 | ip6h->daddr = attr.fl.fl6.daddr; |
565 | ip6h->saddr = attr.fl.fl6.saddr; |
566 | |
567 | /* add tunneling protocol header */ |
568 | err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip6h + sizeof(struct ipv6hdr), |
569 | ip_proto: &ip6h->nexthdr, e); |
570 | if (err) |
571 | goto destroy_neigh_entry; |
572 | |
573 | e->encap_size = ipv6_encap_size; |
574 | e->encap_header = encap_header; |
575 | encap_header = NULL; |
576 | |
577 | if (!(nud_state & NUD_VALID)) { |
578 | neigh_event_send(neigh: attr.n, NULL); |
579 | /* the encap entry will be made valid on neigh update event |
580 | * and not used before that. |
581 | */ |
582 | goto release_neigh; |
583 | } |
584 | |
585 | memset(&reformat_params, 0, sizeof(reformat_params)); |
586 | reformat_params.type = e->reformat_type; |
587 | reformat_params.size = e->encap_size; |
588 | reformat_params.data = e->encap_header; |
589 | e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params, |
590 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
591 | if (IS_ERR(ptr: e->pkt_reformat)) { |
592 | err = PTR_ERR(ptr: e->pkt_reformat); |
593 | goto destroy_neigh_entry; |
594 | } |
595 | |
596 | e->flags |= MLX5_ENCAP_ENTRY_VALID; |
597 | mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev)); |
598 | mlx5e_route_lookup_ipv6_put(attr: &attr); |
599 | return err; |
600 | |
601 | destroy_neigh_entry: |
602 | mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e); |
603 | free_encap: |
604 | kfree(objp: encap_header); |
605 | release_neigh: |
606 | mlx5e_route_lookup_ipv6_put(attr: &attr); |
607 | return err; |
608 | } |
609 | |
610 | int (struct mlx5e_priv *priv, |
611 | struct net_device *mirred_dev, |
612 | struct mlx5e_encap_entry *e) |
613 | { |
614 | int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); |
615 | const struct ip_tunnel_key *tun_key = &e->tun_info->key; |
616 | struct mlx5_pkt_reformat_params reformat_params; |
617 | TC_TUN_ROUTE_ATTR_INIT(attr); |
618 | struct ipv6hdr *ip6h; |
619 | int ipv6_encap_size; |
620 | char *; |
621 | u8 nud_state; |
622 | int err; |
623 | |
624 | attr.ttl = tun_key->ttl; |
625 | |
626 | attr.fl.fl6.flowlabel = ip6_make_flowinfo(tclass: tun_key->tos, flowlabel: tun_key->label); |
627 | attr.fl.fl6.daddr = tun_key->u.ipv6.dst; |
628 | attr.fl.fl6.saddr = tun_key->u.ipv6.src; |
629 | |
630 | err = mlx5e_route_lookup_ipv6_get(priv, dev: mirred_dev, attr: &attr); |
631 | if (err) |
632 | return err; |
633 | |
634 | ipv6_encap_size = |
635 | (is_vlan_dev(dev: attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + |
636 | sizeof(struct ipv6hdr) + |
637 | e->tunnel->calc_hlen(e); |
638 | |
639 | if (max_encap_size < ipv6_encap_size) { |
640 | mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n" , |
641 | ipv6_encap_size, max_encap_size); |
642 | err = -EOPNOTSUPP; |
643 | goto release_neigh; |
644 | } |
645 | |
646 | encap_header = kzalloc(size: ipv6_encap_size, GFP_KERNEL); |
647 | if (!encap_header) { |
648 | err = -ENOMEM; |
649 | goto release_neigh; |
650 | } |
651 | |
652 | e->route_dev_ifindex = attr.route_dev->ifindex; |
653 | |
654 | read_lock_bh(&attr.n->lock); |
655 | nud_state = attr.n->nud_state; |
656 | ether_addr_copy(dst: e->h_dest, src: attr.n->ha); |
657 | WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); |
658 | read_unlock_bh(&attr.n->lock); |
659 | |
660 | /* add ethernet header */ |
661 | ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(buf: encap_header, dev: attr.route_dev, e, |
662 | ETH_P_IPV6); |
663 | |
664 | /* add ip header */ |
665 | ip6_flow_hdr(hdr: ip6h, tclass: tun_key->tos, flowlabel: 0); |
666 | /* the HW fills up ipv6 payload len */ |
667 | ip6h->hop_limit = attr.ttl; |
668 | ip6h->daddr = attr.fl.fl6.daddr; |
669 | ip6h->saddr = attr.fl.fl6.saddr; |
670 | |
671 | /* add tunneling protocol header */ |
672 | err = mlx5e_gen_ip_tunnel_header(buf: (char *)ip6h + sizeof(struct ipv6hdr), |
673 | ip_proto: &ip6h->nexthdr, e); |
674 | if (err) |
675 | goto free_encap; |
676 | |
677 | e->encap_size = ipv6_encap_size; |
678 | kfree(objp: e->encap_header); |
679 | e->encap_header = encap_header; |
680 | encap_header = NULL; |
681 | |
682 | if (!(nud_state & NUD_VALID)) { |
683 | neigh_event_send(neigh: attr.n, NULL); |
684 | /* the encap entry will be made valid on neigh update event |
685 | * and not used before that. |
686 | */ |
687 | goto release_neigh; |
688 | } |
689 | |
690 | memset(&reformat_params, 0, sizeof(reformat_params)); |
691 | reformat_params.type = e->reformat_type; |
692 | reformat_params.size = e->encap_size; |
693 | reformat_params.data = e->encap_header; |
694 | e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, params: &reformat_params, |
695 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
696 | if (IS_ERR(ptr: e->pkt_reformat)) { |
697 | err = PTR_ERR(ptr: e->pkt_reformat); |
698 | goto free_encap; |
699 | } |
700 | |
701 | e->flags |= MLX5_ENCAP_ENTRY_VALID; |
702 | mlx5e_rep_queue_neigh_stats_work(priv: netdev_priv(dev: attr.out_dev)); |
703 | mlx5e_route_lookup_ipv6_put(attr: &attr); |
704 | return err; |
705 | |
706 | free_encap: |
707 | kfree(objp: encap_header); |
708 | release_neigh: |
709 | mlx5e_route_lookup_ipv6_put(attr: &attr); |
710 | return err; |
711 | } |
712 | #endif |
713 | |
714 | int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, |
715 | struct mlx5_flow_spec *spec, |
716 | struct mlx5_flow_attr *flow_attr, |
717 | struct net_device *filter_dev) |
718 | { |
719 | struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; |
720 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
721 | struct mlx5e_tc_int_port *int_port; |
722 | TC_TUN_ROUTE_ATTR_INIT(attr); |
723 | u16 vport_num; |
724 | int err = 0; |
725 | |
726 | if (flow_attr->tun_ip_version == 4) { |
727 | /* Addresses are swapped for decap */ |
728 | attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; |
729 | attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; |
730 | err = mlx5e_route_lookup_ipv4_get(priv, dev: filter_dev, attr: &attr); |
731 | } |
732 | #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) |
733 | else if (flow_attr->tun_ip_version == 6) { |
734 | /* Addresses are swapped for decap */ |
735 | attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; |
736 | attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; |
737 | err = mlx5e_route_lookup_ipv6_get(priv, dev: filter_dev, attr: &attr); |
738 | } |
739 | #endif |
740 | else |
741 | return 0; |
742 | |
743 | if (err) |
744 | return err; |
745 | |
746 | if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops && |
747 | mlx5e_tc_is_vf_tunnel(out_dev: attr.out_dev, route_dev: attr.route_dev)) { |
748 | err = mlx5e_tc_query_route_vport(out_dev: attr.out_dev, route_dev: attr.route_dev, vport: &vport_num); |
749 | if (err) |
750 | goto out; |
751 | |
752 | esw_attr->rx_tun_attr->decap_vport = vport_num; |
753 | } else if (netif_is_ovs_master(dev: attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { |
754 | int_port = mlx5e_tc_int_port_get(priv: mlx5e_get_int_port_priv(priv), |
755 | ifindex: attr.route_dev->ifindex, |
756 | type: MLX5E_TC_INT_PORT_INGRESS); |
757 | if (IS_ERR(ptr: int_port)) { |
758 | err = PTR_ERR(ptr: int_port); |
759 | goto out; |
760 | } |
761 | esw_attr->int_port = int_port; |
762 | } |
763 | |
764 | out: |
765 | if (flow_attr->tun_ip_version == 4) |
766 | mlx5e_route_lookup_ipv4_put(attr: &attr); |
767 | #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) |
768 | else if (flow_attr->tun_ip_version == 6) |
769 | mlx5e_route_lookup_ipv6_put(attr: &attr); |
770 | #endif |
771 | return err; |
772 | } |
773 | |
774 | bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, |
775 | struct net_device *netdev) |
776 | { |
777 | struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: netdev); |
778 | |
779 | if (tunnel && tunnel->can_offload(priv)) |
780 | return true; |
781 | else |
782 | return false; |
783 | } |
784 | |
785 | int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, |
786 | struct mlx5e_priv *priv, |
787 | struct mlx5e_encap_entry *e, |
788 | struct netlink_ext_ack *extack) |
789 | { |
790 | struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); |
791 | |
792 | if (!tunnel) { |
793 | e->reformat_type = -1; |
794 | return -EOPNOTSUPP; |
795 | } |
796 | |
797 | return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); |
798 | } |
799 | |
800 | int mlx5e_tc_tun_parse(struct net_device *filter_dev, |
801 | struct mlx5e_priv *priv, |
802 | struct mlx5_flow_spec *spec, |
803 | struct flow_cls_offload *f, |
804 | u8 *match_level) |
805 | { |
806 | struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev: filter_dev); |
807 | struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f); |
808 | void * = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, |
809 | outer_headers); |
810 | void * = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
811 | outer_headers); |
812 | struct netlink_ext_ack *extack = f->common.extack; |
813 | int err = 0; |
814 | |
815 | if (!tunnel) { |
816 | netdev_warn(dev: priv->netdev, |
817 | format: "decapsulation offload is not supported for %s net device\n" , |
818 | mlx5e_netdev_kind(dev: filter_dev)); |
819 | err = -EOPNOTSUPP; |
820 | goto out; |
821 | } |
822 | |
823 | *match_level = tunnel->match_level; |
824 | |
825 | if (tunnel->parse_udp_ports) { |
826 | err = tunnel->parse_udp_ports(priv, spec, f, |
827 | headers_c, headers_v); |
828 | if (err) |
829 | goto out; |
830 | } |
831 | |
832 | if (tunnel->parse_tunnel) { |
833 | err = tunnel->parse_tunnel(priv, spec, f, |
834 | headers_c, headers_v); |
835 | if (err) |
836 | goto out; |
837 | } |
838 | |
839 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_CONTROL)) { |
840 | struct flow_dissector_key_basic key_basic = {}; |
841 | struct flow_dissector_key_basic mask_basic = { |
842 | .n_proto = htons(0xFFFF), |
843 | }; |
844 | struct flow_match_basic match_basic = { |
845 | .key = &key_basic, .mask = &mask_basic, |
846 | }; |
847 | struct flow_match_control match; |
848 | u16 addr_type; |
849 | |
850 | flow_rule_match_enc_control(rule, out: &match); |
851 | addr_type = match.key->addr_type; |
852 | |
853 | /* For tunnel addr_type used same key id`s as for non-tunnel */ |
854 | if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
855 | struct flow_match_ipv4_addrs match; |
856 | |
857 | flow_rule_match_enc_ipv4_addrs(rule, out: &match); |
858 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
859 | src_ipv4_src_ipv6.ipv4_layout.ipv4, |
860 | ntohl(match.mask->src)); |
861 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
862 | src_ipv4_src_ipv6.ipv4_layout.ipv4, |
863 | ntohl(match.key->src)); |
864 | |
865 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
866 | dst_ipv4_dst_ipv6.ipv4_layout.ipv4, |
867 | ntohl(match.mask->dst)); |
868 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
869 | dst_ipv4_dst_ipv6.ipv4_layout.ipv4, |
870 | ntohl(match.key->dst)); |
871 | |
872 | key_basic.n_proto = htons(ETH_P_IP); |
873 | mlx5e_tc_set_ethertype(mdev: priv->mdev, match: &match_basic, outer: true, |
874 | headers_c, headers_v); |
875 | } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
876 | struct flow_match_ipv6_addrs match; |
877 | |
878 | flow_rule_match_enc_ipv6_addrs(rule, out: &match); |
879 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
880 | src_ipv4_src_ipv6.ipv6_layout.ipv6), |
881 | &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, |
882 | ipv6)); |
883 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
884 | src_ipv4_src_ipv6.ipv6_layout.ipv6), |
885 | &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, |
886 | ipv6)); |
887 | |
888 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
889 | dst_ipv4_dst_ipv6.ipv6_layout.ipv6), |
890 | &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, |
891 | ipv6)); |
892 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
893 | dst_ipv4_dst_ipv6.ipv6_layout.ipv6), |
894 | &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, |
895 | ipv6)); |
896 | |
897 | key_basic.n_proto = htons(ETH_P_IPV6); |
898 | mlx5e_tc_set_ethertype(mdev: priv->mdev, match: &match_basic, outer: true, |
899 | headers_c, headers_v); |
900 | } |
901 | } |
902 | |
903 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_IP)) { |
904 | struct flow_match_ip match; |
905 | |
906 | flow_rule_match_enc_ip(rule, out: &match); |
907 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, |
908 | match.mask->tos & 0x3); |
909 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, |
910 | match.key->tos & 0x3); |
911 | |
912 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, |
913 | match.mask->tos >> 2); |
914 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, |
915 | match.key->tos >> 2); |
916 | |
917 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, |
918 | match.mask->ttl); |
919 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, |
920 | match.key->ttl); |
921 | |
922 | if (match.mask->ttl && |
923 | !MLX5_CAP_ESW_FLOWTABLE_FDB |
924 | (priv->mdev, |
925 | ft_field_support.outer_ipv4_ttl)) { |
926 | NL_SET_ERR_MSG_MOD(extack, |
927 | "Matching on TTL is not supported" ); |
928 | err = -EOPNOTSUPP; |
929 | goto out; |
930 | } |
931 | } |
932 | |
933 | /* let software handle IP fragments */ |
934 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); |
935 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); |
936 | |
937 | return 0; |
938 | |
939 | out: |
940 | return err; |
941 | } |
942 | |
943 | int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, |
944 | struct mlx5_flow_spec *spec, |
945 | struct flow_cls_offload *f, |
946 | void *, |
947 | void *) |
948 | { |
949 | struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f); |
950 | struct netlink_ext_ack *extack = f->common.extack; |
951 | struct flow_match_ports enc_ports; |
952 | |
953 | /* Full udp dst port must be given */ |
954 | |
955 | if (!flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_ENC_PORTS)) { |
956 | NL_SET_ERR_MSG_MOD(extack, |
957 | "UDP tunnel decap filter must include enc_dst_port condition" ); |
958 | netdev_warn(dev: priv->netdev, |
959 | format: "UDP tunnel decap filter must include enc_dst_port condition\n" ); |
960 | return -EOPNOTSUPP; |
961 | } |
962 | |
963 | flow_rule_match_enc_ports(rule, out: &enc_ports); |
964 | |
965 | if (memchr_inv(p: &enc_ports.mask->dst, c: 0xff, |
966 | size: sizeof(enc_ports.mask->dst))) { |
967 | NL_SET_ERR_MSG_MOD(extack, |
968 | "UDP tunnel decap filter must match enc_dst_port fully" ); |
969 | netdev_warn(dev: priv->netdev, |
970 | format: "UDP tunnel decap filter must match enc_dst_port fully\n" ); |
971 | return -EOPNOTSUPP; |
972 | } |
973 | |
974 | /* match on UDP protocol and dst port number */ |
975 | |
976 | MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); |
977 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); |
978 | |
979 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, |
980 | ntohs(enc_ports.mask->dst)); |
981 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, |
982 | ntohs(enc_ports.key->dst)); |
983 | |
984 | /* UDP src port on outer header is generated by HW, |
985 | * so it is probably a bad idea to request matching it. |
986 | * Nonetheless, it is allowed. |
987 | */ |
988 | |
989 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, |
990 | ntohs(enc_ports.mask->src)); |
991 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, |
992 | ntohs(enc_ports.key->src)); |
993 | |
994 | return 0; |
995 | } |
996 | |