1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* Copyright (c) 2021 Mellanox Technologies. */ |
3 | |
4 | #include <net/fib_notifier.h> |
5 | #include <net/nexthop.h> |
6 | #include <net/ip_tunnels.h> |
7 | #include "tc_tun_encap.h" |
8 | #include "en_tc.h" |
9 | #include "tc_tun.h" |
10 | #include "rep/tc.h" |
11 | #include "diag/en_tc_tracepoint.h" |
12 | |
13 | enum { |
14 | MLX5E_ROUTE_ENTRY_VALID = BIT(0), |
15 | }; |
16 | |
17 | static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, |
18 | struct mlx5_flow_attr *attr, |
19 | struct mlx5e_encap_entry *e, |
20 | int out_index) |
21 | { |
22 | struct net_device *route_dev; |
23 | int err = 0; |
24 | |
25 | route_dev = dev_get_by_index(net: dev_net(dev: e->out_dev), ifindex: e->route_dev_ifindex); |
26 | |
27 | if (!route_dev || !netif_is_ovs_master(dev: route_dev) || |
28 | attr->parse_attr->filter_dev == e->out_dev) |
29 | goto out; |
30 | |
31 | err = mlx5e_set_fwd_to_int_port_actions(priv, attr, ifindex: e->route_dev_ifindex, |
32 | type: MLX5E_TC_INT_PORT_EGRESS, |
33 | action: &attr->action, out_index); |
34 | |
35 | out: |
36 | if (route_dev) |
37 | dev_put(dev: route_dev); |
38 | |
39 | return err; |
40 | } |
41 | |
42 | struct mlx5e_route_key { |
43 | int ip_version; |
44 | union { |
45 | __be32 v4; |
46 | struct in6_addr v6; |
47 | } endpoint_ip; |
48 | }; |
49 | |
50 | struct mlx5e_route_entry { |
51 | struct mlx5e_route_key key; |
52 | struct list_head encap_entries; |
53 | struct list_head decap_flows; |
54 | u32 flags; |
55 | struct hlist_node hlist; |
56 | refcount_t refcnt; |
57 | int tunnel_dev_index; |
58 | struct rcu_head rcu; |
59 | }; |
60 | |
61 | struct mlx5e_tc_tun_encap { |
62 | struct mlx5e_priv *priv; |
63 | struct notifier_block fib_nb; |
64 | spinlock_t route_lock; /* protects route_tbl */ |
65 | unsigned long route_tbl_last_update; |
66 | DECLARE_HASHTABLE(route_tbl, 8); |
67 | }; |
68 | |
69 | static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) |
70 | { |
71 | return r->flags & MLX5E_ROUTE_ENTRY_VALID; |
72 | } |
73 | |
74 | int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, |
75 | struct mlx5_flow_spec *spec) |
76 | { |
77 | struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; |
78 | struct mlx5_rx_tun_attr *tun_attr; |
79 | void *daddr, *saddr; |
80 | u8 ip_version; |
81 | |
82 | tun_attr = kvzalloc(size: sizeof(*tun_attr), GFP_KERNEL); |
83 | if (!tun_attr) |
84 | return -ENOMEM; |
85 | |
86 | esw_attr->rx_tun_attr = tun_attr; |
87 | ip_version = mlx5e_tc_get_ip_version(spec, outer: true); |
88 | |
89 | if (ip_version == 4) { |
90 | daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
91 | outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); |
92 | saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
93 | outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); |
94 | tun_attr->dst_ip.v4 = *(__be32 *)daddr; |
95 | tun_attr->src_ip.v4 = *(__be32 *)saddr; |
96 | if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) |
97 | return 0; |
98 | } |
99 | #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) |
100 | else if (ip_version == 6) { |
101 | int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); |
102 | |
103 | daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
104 | outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); |
105 | saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
106 | outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); |
107 | memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); |
108 | memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); |
109 | if (ipv6_addr_any(a: &tun_attr->dst_ip.v6) || |
110 | ipv6_addr_any(a: &tun_attr->src_ip.v6)) |
111 | return 0; |
112 | } |
113 | #endif |
114 | /* Only set the flag if both src and dst ip addresses exist. They are |
115 | * required to establish routing. |
116 | */ |
117 | flow_flag_set(flow, TUN_RX); |
118 | flow->attr->tun_ip_version = ip_version; |
119 | return 0; |
120 | } |
121 | |
122 | static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) |
123 | { |
124 | bool all_flow_encaps_valid = true; |
125 | int i; |
126 | |
127 | /* Flow can be associated with multiple encap entries. |
128 | * Before offloading the flow verify that all of them have |
129 | * a valid neighbour. |
130 | */ |
131 | for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { |
132 | if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) |
133 | continue; |
134 | if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { |
135 | all_flow_encaps_valid = false; |
136 | break; |
137 | } |
138 | } |
139 | |
140 | return all_flow_encaps_valid; |
141 | } |
142 | |
143 | void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, |
144 | struct mlx5e_encap_entry *e, |
145 | struct list_head *flow_list) |
146 | { |
147 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
148 | struct mlx5_pkt_reformat_params reformat_params; |
149 | struct mlx5_esw_flow_attr *esw_attr; |
150 | struct mlx5_flow_handle *rule; |
151 | struct mlx5_flow_attr *attr; |
152 | struct mlx5_flow_spec *spec; |
153 | struct mlx5e_tc_flow *flow; |
154 | int err; |
155 | |
156 | if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) |
157 | return; |
158 | |
159 | memset(&reformat_params, 0, sizeof(reformat_params)); |
160 | reformat_params.type = e->reformat_type; |
161 | reformat_params.size = e->encap_size; |
162 | reformat_params.data = e->encap_header; |
163 | e->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, |
164 | params: &reformat_params, |
165 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
166 | if (IS_ERR(ptr: e->pkt_reformat)) { |
167 | mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n" , |
168 | PTR_ERR(e->pkt_reformat)); |
169 | return; |
170 | } |
171 | e->flags |= MLX5_ENCAP_ENTRY_VALID; |
172 | mlx5e_rep_queue_neigh_stats_work(priv); |
173 | |
174 | list_for_each_entry(flow, flow_list, tmp_list) { |
175 | if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) |
176 | continue; |
177 | |
178 | spec = &flow->attr->parse_attr->spec; |
179 | |
180 | attr = mlx5e_tc_get_encap_attr(flow); |
181 | esw_attr = attr->esw_attr; |
182 | esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; |
183 | esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
184 | |
185 | /* Do not offload flows with unresolved neighbors */ |
186 | if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) |
187 | continue; |
188 | |
189 | err = mlx5e_tc_offload_flow_post_acts(flow); |
190 | if (err) { |
191 | mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n" , |
192 | err); |
193 | continue; |
194 | } |
195 | |
196 | /* update from slow path rule to encap rule */ |
197 | rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr: flow->attr); |
198 | if (IS_ERR(ptr: rule)) { |
199 | mlx5e_tc_unoffload_flow_post_acts(flow); |
200 | err = PTR_ERR(ptr: rule); |
201 | mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n" , |
202 | err); |
203 | continue; |
204 | } |
205 | |
206 | mlx5e_tc_unoffload_from_slow_path(esw, flow); |
207 | flow->rule[0] = rule; |
208 | /* was unset when slow path rule removed */ |
209 | flow_flag_set(flow, OFFLOADED); |
210 | } |
211 | } |
212 | |
213 | void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, |
214 | struct mlx5e_encap_entry *e, |
215 | struct list_head *flow_list) |
216 | { |
217 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
218 | struct mlx5_esw_flow_attr *esw_attr; |
219 | struct mlx5_flow_handle *rule; |
220 | struct mlx5_flow_attr *attr; |
221 | struct mlx5_flow_spec *spec; |
222 | struct mlx5e_tc_flow *flow; |
223 | int err; |
224 | |
225 | list_for_each_entry(flow, flow_list, tmp_list) { |
226 | if (!mlx5e_is_offloaded_flow(flow)) |
227 | continue; |
228 | |
229 | attr = mlx5e_tc_get_encap_attr(flow); |
230 | esw_attr = attr->esw_attr; |
231 | /* mark the flow's encap dest as non-valid */ |
232 | esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; |
233 | esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; |
234 | |
235 | /* Clear pkt_reformat before checking slow path flag. Because |
236 | * in next iteration, the same flow is already set slow path |
237 | * flag, but still need to clear the pkt_reformat. |
238 | */ |
239 | if (flow_flag_test(flow, SLOW)) |
240 | continue; |
241 | |
242 | /* update from encap rule to slow path rule */ |
243 | spec = &flow->attr->parse_attr->spec; |
244 | rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); |
245 | |
246 | if (IS_ERR(ptr: rule)) { |
247 | err = PTR_ERR(ptr: rule); |
248 | mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n" , |
249 | err); |
250 | continue; |
251 | } |
252 | |
253 | mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr); |
254 | mlx5e_tc_unoffload_flow_post_acts(flow); |
255 | flow->rule[0] = rule; |
256 | /* was unset when fast path rule removed */ |
257 | flow_flag_set(flow, OFFLOADED); |
258 | } |
259 | |
260 | /* we know that the encap is valid */ |
261 | e->flags &= ~MLX5_ENCAP_ENTRY_VALID; |
262 | mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat); |
263 | e->pkt_reformat = NULL; |
264 | } |
265 | |
266 | static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, |
267 | struct list_head *flow_list, |
268 | int index) |
269 | { |
270 | if (IS_ERR(ptr: mlx5e_flow_get(flow))) { |
271 | /* Flow is being deleted concurrently. Wait for it to be |
272 | * unoffloaded from hardware, otherwise deleting encap will |
273 | * fail. |
274 | */ |
275 | wait_for_completion(&flow->del_hw_done); |
276 | return; |
277 | } |
278 | wait_for_completion(&flow->init_done); |
279 | |
280 | flow->tmp_entry_index = index; |
281 | list_add(new: &flow->tmp_list, head: flow_list); |
282 | } |
283 | |
284 | /* Takes reference to all flows attached to encap and adds the flows to |
285 | * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. |
286 | */ |
287 | void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) |
288 | { |
289 | struct encap_flow_item *efi; |
290 | struct mlx5e_tc_flow *flow; |
291 | |
292 | list_for_each_entry(efi, &e->flows, list) { |
293 | flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); |
294 | mlx5e_take_tmp_flow(flow, flow_list, index: efi->index); |
295 | } |
296 | } |
297 | |
298 | /* Takes reference to all flows attached to route and adds the flows to |
299 | * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. |
300 | */ |
301 | static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, |
302 | struct list_head *flow_list) |
303 | { |
304 | struct mlx5e_tc_flow *flow; |
305 | |
306 | list_for_each_entry(flow, &r->decap_flows, decap_routes) |
307 | mlx5e_take_tmp_flow(flow, flow_list, index: 0); |
308 | } |
309 | |
310 | typedef bool (match_cb)(struct mlx5e_encap_entry *); |
311 | |
312 | static struct mlx5e_encap_entry * |
313 | mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, |
314 | struct mlx5e_encap_entry *e, |
315 | match_cb match) |
316 | { |
317 | struct mlx5e_encap_entry *next = NULL; |
318 | |
319 | retry: |
320 | rcu_read_lock(); |
321 | |
322 | /* find encap with non-zero reference counter value */ |
323 | for (next = e ? |
324 | list_next_or_null_rcu(&nhe->encap_list, |
325 | &e->encap_list, |
326 | struct mlx5e_encap_entry, |
327 | encap_list) : |
328 | list_first_or_null_rcu(&nhe->encap_list, |
329 | struct mlx5e_encap_entry, |
330 | encap_list); |
331 | next; |
332 | next = list_next_or_null_rcu(&nhe->encap_list, |
333 | &next->encap_list, |
334 | struct mlx5e_encap_entry, |
335 | encap_list)) |
336 | if (mlx5e_encap_take(e: next)) |
337 | break; |
338 | |
339 | rcu_read_unlock(); |
340 | |
341 | /* release starting encap */ |
342 | if (e) |
343 | mlx5e_encap_put(priv: netdev_priv(dev: e->out_dev), e); |
344 | if (!next) |
345 | return next; |
346 | |
347 | /* wait for encap to be fully initialized */ |
348 | wait_for_completion(&next->res_ready); |
349 | /* continue searching if encap entry is not in valid state after completion */ |
350 | if (!match(next)) { |
351 | e = next; |
352 | goto retry; |
353 | } |
354 | |
355 | return next; |
356 | } |
357 | |
358 | static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) |
359 | { |
360 | return e->flags & MLX5_ENCAP_ENTRY_VALID; |
361 | } |
362 | |
363 | static struct mlx5e_encap_entry * |
364 | mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, |
365 | struct mlx5e_encap_entry *e) |
366 | { |
367 | return mlx5e_get_next_matching_encap(nhe, e, match: mlx5e_encap_valid); |
368 | } |
369 | |
370 | static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) |
371 | { |
372 | return e->compl_result >= 0; |
373 | } |
374 | |
375 | struct mlx5e_encap_entry * |
376 | mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, |
377 | struct mlx5e_encap_entry *e) |
378 | { |
379 | return mlx5e_get_next_matching_encap(nhe, e, match: mlx5e_encap_initialized); |
380 | } |
381 | |
382 | void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) |
383 | { |
384 | struct mlx5e_neigh *m_neigh = &nhe->m_neigh; |
385 | struct mlx5e_encap_entry *e = NULL; |
386 | struct mlx5e_tc_flow *flow; |
387 | struct mlx5_fc *counter; |
388 | struct neigh_table *tbl; |
389 | bool neigh_used = false; |
390 | struct neighbour *n; |
391 | u64 lastuse; |
392 | |
393 | if (m_neigh->family == AF_INET) |
394 | tbl = &arp_tbl; |
395 | #if IS_ENABLED(CONFIG_IPV6) |
396 | else if (m_neigh->family == AF_INET6) |
397 | tbl = ipv6_stub->nd_tbl; |
398 | #endif |
399 | else |
400 | return; |
401 | |
402 | /* mlx5e_get_next_valid_encap() releases previous encap before returning |
403 | * next one. |
404 | */ |
405 | while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { |
406 | struct mlx5e_priv *priv = netdev_priv(dev: e->out_dev); |
407 | struct encap_flow_item *efi, *tmp; |
408 | struct mlx5_eswitch *esw; |
409 | LIST_HEAD(flow_list); |
410 | |
411 | esw = priv->mdev->priv.eswitch; |
412 | mutex_lock(&esw->offloads.encap_tbl_lock); |
413 | list_for_each_entry_safe(efi, tmp, &e->flows, list) { |
414 | flow = container_of(efi, struct mlx5e_tc_flow, |
415 | encaps[efi->index]); |
416 | if (IS_ERR(ptr: mlx5e_flow_get(flow))) |
417 | continue; |
418 | list_add(new: &flow->tmp_list, head: &flow_list); |
419 | |
420 | if (mlx5e_is_offloaded_flow(flow)) { |
421 | counter = mlx5e_tc_get_counter(flow); |
422 | lastuse = mlx5_fc_query_lastuse(counter); |
423 | if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { |
424 | neigh_used = true; |
425 | break; |
426 | } |
427 | } |
428 | } |
429 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
430 | |
431 | mlx5e_put_flow_list(priv, flow_list: &flow_list); |
432 | if (neigh_used) { |
433 | /* release current encap before breaking the loop */ |
434 | mlx5e_encap_put(priv, e); |
435 | break; |
436 | } |
437 | } |
438 | |
439 | trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); |
440 | |
441 | if (neigh_used) { |
442 | nhe->reported_lastuse = jiffies; |
443 | |
444 | /* find the relevant neigh according to the cached device and |
445 | * dst ip pair |
446 | */ |
447 | n = neigh_lookup(tbl, pkey: &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); |
448 | if (!n) |
449 | return; |
450 | |
451 | neigh_event_send(neigh: n, NULL); |
452 | neigh_release(neigh: n); |
453 | } |
454 | } |
455 | |
456 | static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) |
457 | { |
458 | WARN_ON(!list_empty(&e->flows)); |
459 | |
460 | if (e->compl_result > 0) { |
461 | mlx5e_rep_encap_entry_detach(priv: netdev_priv(dev: e->out_dev), e); |
462 | |
463 | if (e->flags & MLX5_ENCAP_ENTRY_VALID) |
464 | mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat); |
465 | } |
466 | |
467 | kfree(objp: e->tun_info); |
468 | kfree(objp: e->encap_header); |
469 | kfree_rcu(e, rcu); |
470 | } |
471 | |
472 | static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, |
473 | struct mlx5e_decap_entry *d) |
474 | { |
475 | WARN_ON(!list_empty(&d->flows)); |
476 | |
477 | if (!d->compl_result) |
478 | mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: d->pkt_reformat); |
479 | |
480 | kfree_rcu(d, rcu); |
481 | } |
482 | |
483 | void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) |
484 | { |
485 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
486 | |
487 | if (!refcount_dec_and_mutex_lock(r: &e->refcnt, lock: &esw->offloads.encap_tbl_lock)) |
488 | return; |
489 | list_del(entry: &e->route_list); |
490 | hash_del_rcu(node: &e->encap_hlist); |
491 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
492 | |
493 | mlx5e_encap_dealloc(priv, e); |
494 | } |
495 | |
496 | static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) |
497 | { |
498 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
499 | |
500 | lockdep_assert_held(&esw->offloads.encap_tbl_lock); |
501 | |
502 | if (!refcount_dec_and_test(r: &e->refcnt)) |
503 | return; |
504 | list_del(entry: &e->route_list); |
505 | hash_del_rcu(node: &e->encap_hlist); |
506 | mlx5e_encap_dealloc(priv, e); |
507 | } |
508 | |
509 | static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) |
510 | { |
511 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
512 | |
513 | if (!refcount_dec_and_mutex_lock(r: &d->refcnt, lock: &esw->offloads.decap_tbl_lock)) |
514 | return; |
515 | hash_del_rcu(node: &d->hlist); |
516 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
517 | |
518 | mlx5e_decap_dealloc(priv, d); |
519 | } |
520 | |
521 | static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, |
522 | struct mlx5e_tc_flow *flow, |
523 | int out_index); |
524 | |
525 | void mlx5e_detach_encap(struct mlx5e_priv *priv, |
526 | struct mlx5e_tc_flow *flow, |
527 | struct mlx5_flow_attr *attr, |
528 | int out_index) |
529 | { |
530 | struct mlx5e_encap_entry *e = flow->encaps[out_index].e; |
531 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
532 | |
533 | if (!mlx5e_is_eswitch_flow(flow)) |
534 | return; |
535 | |
536 | if (attr->esw_attr->dests[out_index].flags & |
537 | MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) |
538 | mlx5e_detach_encap_route(priv, flow, out_index); |
539 | |
540 | /* flow wasn't fully initialized */ |
541 | if (!e) |
542 | return; |
543 | |
544 | mutex_lock(&esw->offloads.encap_tbl_lock); |
545 | list_del(entry: &flow->encaps[out_index].list); |
546 | flow->encaps[out_index].e = NULL; |
547 | if (!refcount_dec_and_test(r: &e->refcnt)) { |
548 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
549 | return; |
550 | } |
551 | list_del(entry: &e->route_list); |
552 | hash_del_rcu(node: &e->encap_hlist); |
553 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
554 | |
555 | mlx5e_encap_dealloc(priv, e); |
556 | } |
557 | |
558 | void mlx5e_detach_decap(struct mlx5e_priv *priv, |
559 | struct mlx5e_tc_flow *flow) |
560 | { |
561 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
562 | struct mlx5e_decap_entry *d = flow->decap_reformat; |
563 | |
564 | if (!d) |
565 | return; |
566 | |
567 | mutex_lock(&esw->offloads.decap_tbl_lock); |
568 | list_del(entry: &flow->l3_to_l2_reformat); |
569 | flow->decap_reformat = NULL; |
570 | |
571 | if (!refcount_dec_and_test(r: &d->refcnt)) { |
572 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
573 | return; |
574 | } |
575 | hash_del_rcu(node: &d->hlist); |
576 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
577 | |
578 | mlx5e_decap_dealloc(priv, d); |
579 | } |
580 | |
581 | bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, |
582 | struct mlx5e_encap_key *b) |
583 | { |
584 | return memcmp(p: a->ip_tun_key, q: b->ip_tun_key, size: sizeof(*a->ip_tun_key)) == 0 && |
585 | a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; |
586 | } |
587 | |
588 | bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, |
589 | struct mlx5e_encap_key *b, |
590 | __be16 tun_flags) |
591 | { |
592 | struct ip_tunnel_info *a_info; |
593 | struct ip_tunnel_info *b_info; |
594 | bool a_has_opts, b_has_opts; |
595 | |
596 | if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) |
597 | return false; |
598 | |
599 | a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); |
600 | b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); |
601 | |
602 | /* keys are equal when both don't have any options attached */ |
603 | if (!a_has_opts && !b_has_opts) |
604 | return true; |
605 | |
606 | if (a_has_opts != b_has_opts) |
607 | return false; |
608 | |
609 | /* options stored in memory next to ip_tunnel_info struct */ |
610 | a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); |
611 | b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); |
612 | |
613 | return a_info->options_len == b_info->options_len && |
614 | !memcmp(ip_tunnel_info_opts(a_info), |
615 | ip_tunnel_info_opts(b_info), |
616 | size: a_info->options_len); |
617 | } |
618 | |
619 | static int cmp_decap_info(struct mlx5e_decap_key *a, |
620 | struct mlx5e_decap_key *b) |
621 | { |
622 | return memcmp(p: &a->key, q: &b->key, size: sizeof(b->key)); |
623 | } |
624 | |
625 | static int hash_encap_info(struct mlx5e_encap_key *key) |
626 | { |
627 | return jhash(key: key->ip_tun_key, length: sizeof(*key->ip_tun_key), |
628 | initval: key->tc_tunnel->tunnel_type); |
629 | } |
630 | |
631 | static int hash_decap_info(struct mlx5e_decap_key *key) |
632 | { |
633 | return jhash(key: &key->key, length: sizeof(key->key), initval: 0); |
634 | } |
635 | |
636 | bool mlx5e_encap_take(struct mlx5e_encap_entry *e) |
637 | { |
638 | return refcount_inc_not_zero(r: &e->refcnt); |
639 | } |
640 | |
641 | static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) |
642 | { |
643 | return refcount_inc_not_zero(r: &e->refcnt); |
644 | } |
645 | |
646 | static struct mlx5e_encap_entry * |
647 | mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, |
648 | uintptr_t hash_key) |
649 | { |
650 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
651 | struct mlx5e_encap_key e_key; |
652 | struct mlx5e_encap_entry *e; |
653 | |
654 | hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, |
655 | encap_hlist, hash_key) { |
656 | e_key.ip_tun_key = &e->tun_info->key; |
657 | e_key.tc_tunnel = e->tunnel; |
658 | if (e->tunnel->encap_info_equal(&e_key, key) && |
659 | mlx5e_encap_take(e)) |
660 | return e; |
661 | } |
662 | |
663 | return NULL; |
664 | } |
665 | |
666 | static struct mlx5e_decap_entry * |
667 | mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, |
668 | uintptr_t hash_key) |
669 | { |
670 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
671 | struct mlx5e_decap_key r_key; |
672 | struct mlx5e_decap_entry *e; |
673 | |
674 | hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, |
675 | hlist, hash_key) { |
676 | r_key = e->key; |
677 | if (!cmp_decap_info(a: &r_key, b: key) && |
678 | mlx5e_decap_take(e)) |
679 | return e; |
680 | } |
681 | return NULL; |
682 | } |
683 | |
684 | struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) |
685 | { |
686 | size_t tun_size = sizeof(*tun_info) + tun_info->options_len; |
687 | |
688 | return kmemdup(p: tun_info, size: tun_size, GFP_KERNEL); |
689 | } |
690 | |
691 | static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, |
692 | struct mlx5e_tc_flow *flow, |
693 | int out_index, |
694 | struct mlx5e_encap_entry *e, |
695 | struct netlink_ext_ack *extack) |
696 | { |
697 | int i; |
698 | |
699 | for (i = 0; i < out_index; i++) { |
700 | if (flow->encaps[i].e != e) |
701 | continue; |
702 | NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action" ); |
703 | netdev_err(dev: priv->netdev, format: "can't duplicate encap action\n" ); |
704 | return true; |
705 | } |
706 | |
707 | return false; |
708 | } |
709 | |
710 | static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, |
711 | struct mlx5_flow_attr *attr, |
712 | struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, |
713 | struct net_device *out_dev, |
714 | int route_dev_ifindex, |
715 | int out_index) |
716 | { |
717 | struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; |
718 | struct net_device *route_dev; |
719 | u16 vport_num; |
720 | int err = 0; |
721 | u32 data; |
722 | |
723 | route_dev = dev_get_by_index(net: dev_net(dev: out_dev), ifindex: route_dev_ifindex); |
724 | |
725 | if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || |
726 | !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) |
727 | goto out; |
728 | |
729 | err = mlx5e_tc_query_route_vport(out_dev, route_dev, vport: &vport_num); |
730 | if (err) |
731 | goto out; |
732 | |
733 | attr->dest_chain = 0; |
734 | attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; |
735 | esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; |
736 | data = mlx5_eswitch_get_vport_metadata_for_set(esw: esw_attr->in_mdev->priv.eswitch, |
737 | vport_num); |
738 | err = mlx5e_tc_match_to_reg_set_and_get_id(mdev: esw->dev, mod_hdr_acts, |
739 | ns: MLX5_FLOW_NAMESPACE_FDB, |
740 | type: VPORT_TO_REG, data); |
741 | if (err >= 0) { |
742 | esw_attr->dests[out_index].src_port_rewrite_act_id = err; |
743 | err = 0; |
744 | } |
745 | |
746 | out: |
747 | if (route_dev) |
748 | dev_put(dev: route_dev); |
749 | return err; |
750 | } |
751 | |
752 | static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, |
753 | struct mlx5_esw_flow_attr *attr, |
754 | struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, |
755 | struct net_device *out_dev, |
756 | int route_dev_ifindex, |
757 | int out_index) |
758 | { |
759 | int act_id = attr->dests[out_index].src_port_rewrite_act_id; |
760 | struct net_device *route_dev; |
761 | u16 vport_num; |
762 | int err = 0; |
763 | u32 data; |
764 | |
765 | route_dev = dev_get_by_index(net: dev_net(dev: out_dev), ifindex: route_dev_ifindex); |
766 | |
767 | if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || |
768 | !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { |
769 | err = -ENODEV; |
770 | goto out; |
771 | } |
772 | |
773 | err = mlx5e_tc_query_route_vport(out_dev, route_dev, vport: &vport_num); |
774 | if (err) |
775 | goto out; |
776 | |
777 | data = mlx5_eswitch_get_vport_metadata_for_set(esw: attr->in_mdev->priv.eswitch, |
778 | vport_num); |
779 | mlx5e_tc_match_to_reg_mod_hdr_change(mdev: esw->dev, mod_hdr_acts, type: VPORT_TO_REG, act_id, data); |
780 | |
781 | out: |
782 | if (route_dev) |
783 | dev_put(dev: route_dev); |
784 | return err; |
785 | } |
786 | |
787 | static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) |
788 | { |
789 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
790 | struct mlx5_rep_uplink_priv *uplink_priv; |
791 | struct mlx5e_rep_priv *uplink_rpriv; |
792 | struct mlx5e_tc_tun_encap *encap; |
793 | unsigned int ret; |
794 | |
795 | uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH); |
796 | uplink_priv = &uplink_rpriv->uplink_priv; |
797 | encap = uplink_priv->encap; |
798 | |
799 | spin_lock_bh(lock: &encap->route_lock); |
800 | ret = encap->route_tbl_last_update; |
801 | spin_unlock_bh(lock: &encap->route_lock); |
802 | return ret; |
803 | } |
804 | |
805 | static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, |
806 | struct mlx5e_tc_flow *flow, |
807 | struct mlx5_flow_attr *attr, |
808 | struct mlx5e_encap_entry *e, |
809 | bool new_encap_entry, |
810 | unsigned long tbl_time_before, |
811 | int out_index); |
812 | |
813 | int mlx5e_attach_encap(struct mlx5e_priv *priv, |
814 | struct mlx5e_tc_flow *flow, |
815 | struct mlx5_flow_attr *attr, |
816 | struct net_device *mirred_dev, |
817 | int out_index, |
818 | struct netlink_ext_ack *extack, |
819 | struct net_device **encap_dev) |
820 | { |
821 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
822 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
823 | const struct ip_tunnel_info *tun_info; |
824 | const struct mlx5e_mpls_info *mpls_info; |
825 | unsigned long tbl_time_before = 0; |
826 | struct mlx5e_encap_entry *e; |
827 | struct mlx5e_encap_key key; |
828 | bool entry_created = false; |
829 | unsigned short family; |
830 | uintptr_t hash_key; |
831 | int err = 0; |
832 | |
833 | lockdep_assert_held(&esw->offloads.encap_tbl_lock); |
834 | |
835 | parse_attr = attr->parse_attr; |
836 | tun_info = parse_attr->tun_info[out_index]; |
837 | mpls_info = &parse_attr->mpls_info[out_index]; |
838 | family = ip_tunnel_info_af(tun_info); |
839 | key.ip_tun_key = &tun_info->key; |
840 | key.tc_tunnel = mlx5e_get_tc_tun(tunnel_dev: mirred_dev); |
841 | if (!key.tc_tunnel) { |
842 | NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel" ); |
843 | return -EOPNOTSUPP; |
844 | } |
845 | |
846 | hash_key = hash_encap_info(key: &key); |
847 | |
848 | e = mlx5e_encap_get(priv, key: &key, hash_key); |
849 | |
850 | /* must verify if encap is valid or not */ |
851 | if (e) { |
852 | /* Check that entry was not already attached to this flow */ |
853 | if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { |
854 | err = -EOPNOTSUPP; |
855 | goto out_err; |
856 | } |
857 | |
858 | goto attach_flow; |
859 | } |
860 | |
861 | e = kzalloc(size: sizeof(*e), GFP_KERNEL); |
862 | if (!e) { |
863 | err = -ENOMEM; |
864 | goto out_err; |
865 | } |
866 | |
867 | refcount_set(r: &e->refcnt, n: 1); |
868 | init_completion(x: &e->res_ready); |
869 | entry_created = true; |
870 | INIT_LIST_HEAD(list: &e->route_list); |
871 | |
872 | tun_info = mlx5e_dup_tun_info(tun_info); |
873 | if (!tun_info) { |
874 | err = -ENOMEM; |
875 | goto out_err_init; |
876 | } |
877 | e->tun_info = tun_info; |
878 | memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); |
879 | err = mlx5e_tc_tun_init_encap_attr(tunnel_dev: mirred_dev, priv, e, extack); |
880 | if (err) |
881 | goto out_err_init; |
882 | |
883 | INIT_LIST_HEAD(list: &e->flows); |
884 | hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); |
885 | tbl_time_before = mlx5e_route_tbl_get_last_update(priv); |
886 | |
887 | if (family == AF_INET) |
888 | err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); |
889 | else if (family == AF_INET6) |
890 | err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); |
891 | |
892 | complete_all(&e->res_ready); |
893 | if (err) { |
894 | e->compl_result = err; |
895 | goto out_err; |
896 | } |
897 | e->compl_result = 1; |
898 | |
899 | attach_flow: |
900 | err = mlx5e_attach_encap_route(priv, flow, attr, e, new_encap_entry: entry_created, |
901 | tbl_time_before, out_index); |
902 | if (err) |
903 | goto out_err; |
904 | |
905 | err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); |
906 | if (err == -EOPNOTSUPP) { |
907 | /* If device doesn't support int port offload, |
908 | * redirect to uplink vport. |
909 | */ |
910 | mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n" ); |
911 | err = 0; |
912 | } else if (err) { |
913 | goto out_err; |
914 | } |
915 | |
916 | flow->encaps[out_index].e = e; |
917 | list_add(new: &flow->encaps[out_index].list, head: &e->flows); |
918 | flow->encaps[out_index].index = out_index; |
919 | *encap_dev = e->out_dev; |
920 | if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
921 | attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; |
922 | attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
923 | } else { |
924 | flow_flag_set(flow, SLOW); |
925 | } |
926 | |
927 | return err; |
928 | |
929 | out_err: |
930 | if (e) |
931 | mlx5e_encap_put_locked(priv, e); |
932 | return err; |
933 | |
934 | out_err_init: |
935 | kfree(objp: tun_info); |
936 | kfree(objp: e); |
937 | return err; |
938 | } |
939 | |
940 | int mlx5e_attach_decap(struct mlx5e_priv *priv, |
941 | struct mlx5e_tc_flow *flow, |
942 | struct netlink_ext_ack *extack) |
943 | { |
944 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
945 | struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; |
946 | struct mlx5_pkt_reformat_params reformat_params; |
947 | struct mlx5e_decap_entry *d; |
948 | struct mlx5e_decap_key key; |
949 | uintptr_t hash_key; |
950 | int err = 0; |
951 | |
952 | if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { |
953 | NL_SET_ERR_MSG_MOD(extack, |
954 | "encap header larger than max supported" ); |
955 | return -EOPNOTSUPP; |
956 | } |
957 | |
958 | key.key = attr->eth; |
959 | hash_key = hash_decap_info(key: &key); |
960 | mutex_lock(&esw->offloads.decap_tbl_lock); |
961 | d = mlx5e_decap_get(priv, key: &key, hash_key); |
962 | if (d) { |
963 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
964 | wait_for_completion(&d->res_ready); |
965 | mutex_lock(&esw->offloads.decap_tbl_lock); |
966 | if (d->compl_result) { |
967 | err = -EREMOTEIO; |
968 | goto out_free; |
969 | } |
970 | goto found; |
971 | } |
972 | |
973 | d = kzalloc(size: sizeof(*d), GFP_KERNEL); |
974 | if (!d) { |
975 | err = -ENOMEM; |
976 | goto out_err; |
977 | } |
978 | |
979 | d->key = key; |
980 | refcount_set(r: &d->refcnt, n: 1); |
981 | init_completion(x: &d->res_ready); |
982 | INIT_LIST_HEAD(list: &d->flows); |
983 | hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); |
984 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
985 | |
986 | memset(&reformat_params, 0, sizeof(reformat_params)); |
987 | reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; |
988 | reformat_params.size = sizeof(attr->eth); |
989 | reformat_params.data = &attr->eth; |
990 | d->pkt_reformat = mlx5_packet_reformat_alloc(dev: priv->mdev, |
991 | params: &reformat_params, |
992 | ns_type: MLX5_FLOW_NAMESPACE_FDB); |
993 | if (IS_ERR(ptr: d->pkt_reformat)) { |
994 | err = PTR_ERR(ptr: d->pkt_reformat); |
995 | d->compl_result = err; |
996 | } |
997 | mutex_lock(&esw->offloads.decap_tbl_lock); |
998 | complete_all(&d->res_ready); |
999 | if (err) |
1000 | goto out_free; |
1001 | |
1002 | found: |
1003 | flow->decap_reformat = d; |
1004 | attr->decap_pkt_reformat = d->pkt_reformat; |
1005 | list_add(new: &flow->l3_to_l2_reformat, head: &d->flows); |
1006 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
1007 | return 0; |
1008 | |
1009 | out_free: |
1010 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
1011 | mlx5e_decap_put(priv, d); |
1012 | return err; |
1013 | |
1014 | out_err: |
1015 | mutex_unlock(lock: &esw->offloads.decap_tbl_lock); |
1016 | return err; |
1017 | } |
1018 | |
1019 | int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, |
1020 | struct mlx5e_tc_flow *flow, |
1021 | struct mlx5_flow_attr *attr, |
1022 | struct netlink_ext_ack *extack, |
1023 | bool *vf_tun) |
1024 | { |
1025 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
1026 | struct mlx5_esw_flow_attr *esw_attr; |
1027 | struct net_device *encap_dev = NULL; |
1028 | struct mlx5e_rep_priv *rpriv; |
1029 | struct mlx5e_priv *out_priv; |
1030 | struct mlx5_eswitch *esw; |
1031 | int out_index; |
1032 | int err = 0; |
1033 | |
1034 | parse_attr = attr->parse_attr; |
1035 | esw_attr = attr->esw_attr; |
1036 | *vf_tun = false; |
1037 | |
1038 | esw = priv->mdev->priv.eswitch; |
1039 | mutex_lock(&esw->offloads.encap_tbl_lock); |
1040 | for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { |
1041 | struct net_device *out_dev; |
1042 | int mirred_ifindex; |
1043 | |
1044 | if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) |
1045 | continue; |
1046 | |
1047 | mirred_ifindex = parse_attr->mirred_ifindex[out_index]; |
1048 | out_dev = dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: mirred_ifindex); |
1049 | if (!out_dev) { |
1050 | NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found" ); |
1051 | err = -ENODEV; |
1052 | goto out; |
1053 | } |
1054 | err = mlx5e_attach_encap(priv, flow, attr, mirred_dev: out_dev, out_index, |
1055 | extack, encap_dev: &encap_dev); |
1056 | dev_put(dev: out_dev); |
1057 | if (err) |
1058 | goto out; |
1059 | |
1060 | if (esw_attr->dests[out_index].flags & |
1061 | MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && |
1062 | !esw_attr->dest_int_port) |
1063 | *vf_tun = true; |
1064 | |
1065 | out_priv = netdev_priv(dev: encap_dev); |
1066 | rpriv = out_priv->ppriv; |
1067 | esw_attr->dests[out_index].vport_valid = true; |
1068 | esw_attr->dests[out_index].vport = rpriv->rep->vport; |
1069 | esw_attr->dests[out_index].mdev = out_priv->mdev; |
1070 | } |
1071 | |
1072 | if (*vf_tun && esw_attr->out_count > 1) { |
1073 | NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported" ); |
1074 | err = -EOPNOTSUPP; |
1075 | goto out; |
1076 | } |
1077 | |
1078 | out: |
1079 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1080 | return err; |
1081 | } |
1082 | |
1083 | void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, |
1084 | struct mlx5e_tc_flow *flow, |
1085 | struct mlx5_flow_attr *attr) |
1086 | { |
1087 | struct mlx5_esw_flow_attr *esw_attr; |
1088 | int out_index; |
1089 | |
1090 | if (!mlx5e_is_eswitch_flow(flow)) |
1091 | return; |
1092 | |
1093 | esw_attr = attr->esw_attr; |
1094 | |
1095 | for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { |
1096 | if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) |
1097 | continue; |
1098 | |
1099 | mlx5e_detach_encap(priv: flow->priv, flow, attr, out_index); |
1100 | kfree(objp: attr->parse_attr->tun_info[out_index]); |
1101 | } |
1102 | } |
1103 | |
1104 | static int cmp_route_info(struct mlx5e_route_key *a, |
1105 | struct mlx5e_route_key *b) |
1106 | { |
1107 | if (a->ip_version == 4 && b->ip_version == 4) |
1108 | return memcmp(p: &a->endpoint_ip.v4, q: &b->endpoint_ip.v4, |
1109 | size: sizeof(a->endpoint_ip.v4)); |
1110 | else if (a->ip_version == 6 && b->ip_version == 6) |
1111 | return memcmp(p: &a->endpoint_ip.v6, q: &b->endpoint_ip.v6, |
1112 | size: sizeof(a->endpoint_ip.v6)); |
1113 | return 1; |
1114 | } |
1115 | |
1116 | static u32 hash_route_info(struct mlx5e_route_key *key) |
1117 | { |
1118 | if (key->ip_version == 4) |
1119 | return jhash(key: &key->endpoint_ip.v4, length: sizeof(key->endpoint_ip.v4), initval: 0); |
1120 | return jhash(key: &key->endpoint_ip.v6, length: sizeof(key->endpoint_ip.v6), initval: 0); |
1121 | } |
1122 | |
1123 | static void mlx5e_route_dealloc(struct mlx5e_priv *priv, |
1124 | struct mlx5e_route_entry *r) |
1125 | { |
1126 | WARN_ON(!list_empty(&r->decap_flows)); |
1127 | WARN_ON(!list_empty(&r->encap_entries)); |
1128 | |
1129 | kfree_rcu(r, rcu); |
1130 | } |
1131 | |
1132 | static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) |
1133 | { |
1134 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1135 | |
1136 | if (!refcount_dec_and_mutex_lock(r: &r->refcnt, lock: &esw->offloads.encap_tbl_lock)) |
1137 | return; |
1138 | |
1139 | hash_del_rcu(node: &r->hlist); |
1140 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1141 | |
1142 | mlx5e_route_dealloc(priv, r); |
1143 | } |
1144 | |
1145 | static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) |
1146 | { |
1147 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1148 | |
1149 | lockdep_assert_held(&esw->offloads.encap_tbl_lock); |
1150 | |
1151 | if (!refcount_dec_and_test(r: &r->refcnt)) |
1152 | return; |
1153 | hash_del_rcu(node: &r->hlist); |
1154 | mlx5e_route_dealloc(priv, r); |
1155 | } |
1156 | |
1157 | static struct mlx5e_route_entry * |
1158 | mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, |
1159 | u32 hash_key) |
1160 | { |
1161 | struct mlx5e_route_key r_key; |
1162 | struct mlx5e_route_entry *r; |
1163 | |
1164 | hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { |
1165 | r_key = r->key; |
1166 | if (!cmp_route_info(a: &r_key, b: key) && |
1167 | refcount_inc_not_zero(r: &r->refcnt)) |
1168 | return r; |
1169 | } |
1170 | return NULL; |
1171 | } |
1172 | |
1173 | static struct mlx5e_route_entry * |
1174 | mlx5e_route_get_create(struct mlx5e_priv *priv, |
1175 | struct mlx5e_route_key *key, |
1176 | int tunnel_dev_index, |
1177 | unsigned long *route_tbl_change_time) |
1178 | { |
1179 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1180 | struct mlx5_rep_uplink_priv *uplink_priv; |
1181 | struct mlx5e_rep_priv *uplink_rpriv; |
1182 | struct mlx5e_tc_tun_encap *encap; |
1183 | struct mlx5e_route_entry *r; |
1184 | u32 hash_key; |
1185 | |
1186 | uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH); |
1187 | uplink_priv = &uplink_rpriv->uplink_priv; |
1188 | encap = uplink_priv->encap; |
1189 | |
1190 | hash_key = hash_route_info(key); |
1191 | spin_lock_bh(lock: &encap->route_lock); |
1192 | r = mlx5e_route_get(encap, key, hash_key); |
1193 | spin_unlock_bh(lock: &encap->route_lock); |
1194 | if (r) { |
1195 | if (!mlx5e_route_entry_valid(r)) { |
1196 | mlx5e_route_put_locked(priv, r); |
1197 | return ERR_PTR(error: -EINVAL); |
1198 | } |
1199 | return r; |
1200 | } |
1201 | |
1202 | r = kzalloc(size: sizeof(*r), GFP_KERNEL); |
1203 | if (!r) |
1204 | return ERR_PTR(error: -ENOMEM); |
1205 | |
1206 | r->key = *key; |
1207 | r->flags |= MLX5E_ROUTE_ENTRY_VALID; |
1208 | r->tunnel_dev_index = tunnel_dev_index; |
1209 | refcount_set(r: &r->refcnt, n: 1); |
1210 | INIT_LIST_HEAD(list: &r->decap_flows); |
1211 | INIT_LIST_HEAD(list: &r->encap_entries); |
1212 | |
1213 | spin_lock_bh(lock: &encap->route_lock); |
1214 | *route_tbl_change_time = encap->route_tbl_last_update; |
1215 | hash_add(encap->route_tbl, &r->hlist, hash_key); |
1216 | spin_unlock_bh(lock: &encap->route_lock); |
1217 | |
1218 | return r; |
1219 | } |
1220 | |
1221 | static struct mlx5e_route_entry * |
1222 | mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) |
1223 | { |
1224 | u32 hash_key = hash_route_info(key); |
1225 | struct mlx5e_route_entry *r; |
1226 | |
1227 | spin_lock_bh(lock: &encap->route_lock); |
1228 | encap->route_tbl_last_update = jiffies; |
1229 | r = mlx5e_route_get(encap, key, hash_key); |
1230 | spin_unlock_bh(lock: &encap->route_lock); |
1231 | |
1232 | return r; |
1233 | } |
1234 | |
1235 | struct mlx5e_tc_fib_event_data { |
1236 | struct work_struct work; |
1237 | unsigned long event; |
1238 | struct mlx5e_route_entry *r; |
1239 | struct net_device *ul_dev; |
1240 | }; |
1241 | |
1242 | static void mlx5e_tc_fib_event_work(struct work_struct *work); |
1243 | static struct mlx5e_tc_fib_event_data * |
1244 | mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) |
1245 | { |
1246 | struct mlx5e_tc_fib_event_data *fib_work; |
1247 | |
1248 | fib_work = kzalloc(size: sizeof(*fib_work), flags); |
1249 | if (WARN_ON(!fib_work)) |
1250 | return NULL; |
1251 | |
1252 | INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); |
1253 | fib_work->event = event; |
1254 | fib_work->ul_dev = ul_dev; |
1255 | |
1256 | return fib_work; |
1257 | } |
1258 | |
1259 | static int |
1260 | mlx5e_route_enqueue_update(struct mlx5e_priv *priv, |
1261 | struct mlx5e_route_entry *r, |
1262 | unsigned long event) |
1263 | { |
1264 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1265 | struct mlx5e_tc_fib_event_data *fib_work; |
1266 | struct mlx5e_rep_priv *uplink_rpriv; |
1267 | struct net_device *ul_dev; |
1268 | |
1269 | uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, rep_type: REP_ETH); |
1270 | ul_dev = uplink_rpriv->netdev; |
1271 | |
1272 | fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); |
1273 | if (!fib_work) |
1274 | return -ENOMEM; |
1275 | |
1276 | dev_hold(dev: ul_dev); |
1277 | refcount_inc(r: &r->refcnt); |
1278 | fib_work->r = r; |
1279 | queue_work(wq: priv->wq, work: &fib_work->work); |
1280 | |
1281 | return 0; |
1282 | } |
1283 | |
1284 | int mlx5e_attach_decap_route(struct mlx5e_priv *priv, |
1285 | struct mlx5e_tc_flow *flow) |
1286 | { |
1287 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1288 | unsigned long tbl_time_before, tbl_time_after; |
1289 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
1290 | struct mlx5_flow_attr *attr = flow->attr; |
1291 | struct mlx5_esw_flow_attr *esw_attr; |
1292 | struct mlx5e_route_entry *r; |
1293 | struct mlx5e_route_key key; |
1294 | int err = 0; |
1295 | |
1296 | esw_attr = attr->esw_attr; |
1297 | parse_attr = attr->parse_attr; |
1298 | mutex_lock(&esw->offloads.encap_tbl_lock); |
1299 | if (!esw_attr->rx_tun_attr) |
1300 | goto out; |
1301 | |
1302 | tbl_time_before = mlx5e_route_tbl_get_last_update(priv); |
1303 | tbl_time_after = tbl_time_before; |
1304 | err = mlx5e_tc_tun_route_lookup(priv, spec: &parse_attr->spec, attr, filter_dev: parse_attr->filter_dev); |
1305 | if (err || !esw_attr->rx_tun_attr->decap_vport) |
1306 | goto out; |
1307 | |
1308 | key.ip_version = attr->tun_ip_version; |
1309 | if (key.ip_version == 4) |
1310 | key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; |
1311 | else |
1312 | key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; |
1313 | |
1314 | r = mlx5e_route_get_create(priv, key: &key, tunnel_dev_index: parse_attr->filter_dev->ifindex, |
1315 | route_tbl_change_time: &tbl_time_after); |
1316 | if (IS_ERR(ptr: r)) { |
1317 | err = PTR_ERR(ptr: r); |
1318 | goto out; |
1319 | } |
1320 | /* Routing changed concurrently. FIB event handler might have missed new |
1321 | * entry, schedule update. |
1322 | */ |
1323 | if (tbl_time_before != tbl_time_after) { |
1324 | err = mlx5e_route_enqueue_update(priv, r, event: FIB_EVENT_ENTRY_REPLACE); |
1325 | if (err) { |
1326 | mlx5e_route_put_locked(priv, r); |
1327 | goto out; |
1328 | } |
1329 | } |
1330 | |
1331 | flow->decap_route = r; |
1332 | list_add(new: &flow->decap_routes, head: &r->decap_flows); |
1333 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1334 | return 0; |
1335 | |
1336 | out: |
1337 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1338 | return err; |
1339 | } |
1340 | |
1341 | static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, |
1342 | struct mlx5e_tc_flow *flow, |
1343 | struct mlx5_flow_attr *attr, |
1344 | struct mlx5e_encap_entry *e, |
1345 | bool new_encap_entry, |
1346 | unsigned long tbl_time_before, |
1347 | int out_index) |
1348 | { |
1349 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1350 | unsigned long tbl_time_after = tbl_time_before; |
1351 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
1352 | const struct ip_tunnel_info *tun_info; |
1353 | struct mlx5_esw_flow_attr *esw_attr; |
1354 | struct mlx5e_route_entry *r; |
1355 | struct mlx5e_route_key key; |
1356 | unsigned short family; |
1357 | int err = 0; |
1358 | |
1359 | esw_attr = attr->esw_attr; |
1360 | parse_attr = attr->parse_attr; |
1361 | tun_info = parse_attr->tun_info[out_index]; |
1362 | family = ip_tunnel_info_af(tun_info); |
1363 | |
1364 | if (family == AF_INET) { |
1365 | key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; |
1366 | key.ip_version = 4; |
1367 | } else if (family == AF_INET6) { |
1368 | key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; |
1369 | key.ip_version = 6; |
1370 | } |
1371 | |
1372 | err = mlx5e_set_vf_tunnel(esw, attr, mod_hdr_acts: &parse_attr->mod_hdr_acts, out_dev: e->out_dev, |
1373 | route_dev_ifindex: e->route_dev_ifindex, out_index); |
1374 | if (err || !(esw_attr->dests[out_index].flags & |
1375 | MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) |
1376 | return err; |
1377 | |
1378 | r = mlx5e_route_get_create(priv, key: &key, tunnel_dev_index: parse_attr->mirred_ifindex[out_index], |
1379 | route_tbl_change_time: &tbl_time_after); |
1380 | if (IS_ERR(ptr: r)) |
1381 | return PTR_ERR(ptr: r); |
1382 | /* Routing changed concurrently. FIB event handler might have missed new |
1383 | * entry, schedule update. |
1384 | */ |
1385 | if (tbl_time_before != tbl_time_after) { |
1386 | err = mlx5e_route_enqueue_update(priv, r, event: FIB_EVENT_ENTRY_REPLACE); |
1387 | if (err) { |
1388 | mlx5e_route_put_locked(priv, r); |
1389 | return err; |
1390 | } |
1391 | } |
1392 | |
1393 | flow->encap_routes[out_index].r = r; |
1394 | if (new_encap_entry) |
1395 | list_add(new: &e->route_list, head: &r->encap_entries); |
1396 | flow->encap_routes[out_index].index = out_index; |
1397 | return 0; |
1398 | } |
1399 | |
1400 | void mlx5e_detach_decap_route(struct mlx5e_priv *priv, |
1401 | struct mlx5e_tc_flow *flow) |
1402 | { |
1403 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1404 | struct mlx5e_route_entry *r = flow->decap_route; |
1405 | |
1406 | if (!r) |
1407 | return; |
1408 | |
1409 | mutex_lock(&esw->offloads.encap_tbl_lock); |
1410 | list_del(entry: &flow->decap_routes); |
1411 | flow->decap_route = NULL; |
1412 | |
1413 | if (!refcount_dec_and_test(r: &r->refcnt)) { |
1414 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1415 | return; |
1416 | } |
1417 | hash_del_rcu(node: &r->hlist); |
1418 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1419 | |
1420 | mlx5e_route_dealloc(priv, r); |
1421 | } |
1422 | |
1423 | static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, |
1424 | struct mlx5e_tc_flow *flow, |
1425 | int out_index) |
1426 | { |
1427 | struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; |
1428 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1429 | struct mlx5e_encap_entry *e, *tmp; |
1430 | |
1431 | if (!r) |
1432 | return; |
1433 | |
1434 | mutex_lock(&esw->offloads.encap_tbl_lock); |
1435 | flow->encap_routes[out_index].r = NULL; |
1436 | |
1437 | if (!refcount_dec_and_test(r: &r->refcnt)) { |
1438 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1439 | return; |
1440 | } |
1441 | list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) |
1442 | list_del_init(entry: &e->route_list); |
1443 | hash_del_rcu(node: &r->hlist); |
1444 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1445 | |
1446 | mlx5e_route_dealloc(priv, r); |
1447 | } |
1448 | |
1449 | static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, |
1450 | struct mlx5e_encap_entry *e, |
1451 | struct list_head *encap_flows) |
1452 | { |
1453 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1454 | struct mlx5e_tc_flow *flow; |
1455 | |
1456 | list_for_each_entry(flow, encap_flows, tmp_list) { |
1457 | struct mlx5_esw_flow_attr *esw_attr; |
1458 | struct mlx5_flow_attr *attr; |
1459 | |
1460 | if (!mlx5e_is_offloaded_flow(flow)) |
1461 | continue; |
1462 | |
1463 | attr = mlx5e_tc_get_encap_attr(flow); |
1464 | esw_attr = attr->esw_attr; |
1465 | |
1466 | if (flow_flag_test(flow, SLOW)) { |
1467 | mlx5e_tc_unoffload_from_slow_path(esw, flow); |
1468 | } else { |
1469 | mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr); |
1470 | mlx5e_tc_unoffload_flow_post_acts(flow); |
1471 | } |
1472 | |
1473 | mlx5e_tc_detach_mod_hdr(priv, flow, attr); |
1474 | attr->modify_hdr = NULL; |
1475 | |
1476 | esw_attr->dests[flow->tmp_entry_index].flags &= |
1477 | ~MLX5_ESW_DEST_ENCAP_VALID; |
1478 | esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; |
1479 | } |
1480 | |
1481 | e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; |
1482 | if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
1483 | e->flags &= ~MLX5_ENCAP_ENTRY_VALID; |
1484 | mlx5_packet_reformat_dealloc(dev: priv->mdev, reformat: e->pkt_reformat); |
1485 | e->pkt_reformat = NULL; |
1486 | } |
1487 | } |
1488 | |
1489 | static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, |
1490 | struct net_device *tunnel_dev, |
1491 | struct mlx5e_encap_entry *e, |
1492 | struct list_head *encap_flows) |
1493 | { |
1494 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1495 | struct mlx5e_tc_flow *flow; |
1496 | int err; |
1497 | |
1498 | err = ip_tunnel_info_af(tun_info: e->tun_info) == AF_INET ? |
1499 | mlx5e_tc_tun_update_header_ipv4(priv, mirred_dev: tunnel_dev, e) : |
1500 | mlx5e_tc_tun_update_header_ipv6(priv, mirred_dev: tunnel_dev, e); |
1501 | if (err) |
1502 | mlx5_core_warn(priv->mdev, "Failed to update encap header, %d" , err); |
1503 | e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; |
1504 | |
1505 | list_for_each_entry(flow, encap_flows, tmp_list) { |
1506 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
1507 | struct mlx5_esw_flow_attr *esw_attr; |
1508 | struct mlx5_flow_handle *rule; |
1509 | struct mlx5_flow_attr *attr; |
1510 | struct mlx5_flow_spec *spec; |
1511 | |
1512 | if (flow_flag_test(flow, FAILED)) |
1513 | continue; |
1514 | |
1515 | spec = &flow->attr->parse_attr->spec; |
1516 | |
1517 | attr = mlx5e_tc_get_encap_attr(flow); |
1518 | esw_attr = attr->esw_attr; |
1519 | parse_attr = attr->parse_attr; |
1520 | |
1521 | err = mlx5e_update_vf_tunnel(esw, attr: esw_attr, mod_hdr_acts: &parse_attr->mod_hdr_acts, |
1522 | out_dev: e->out_dev, route_dev_ifindex: e->route_dev_ifindex, |
1523 | out_index: flow->tmp_entry_index); |
1524 | if (err) { |
1525 | mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d" , err); |
1526 | continue; |
1527 | } |
1528 | |
1529 | err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); |
1530 | if (err) { |
1531 | mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d" , |
1532 | err); |
1533 | continue; |
1534 | } |
1535 | |
1536 | if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
1537 | esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; |
1538 | esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
1539 | if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) |
1540 | goto offload_to_slow_path; |
1541 | |
1542 | err = mlx5e_tc_offload_flow_post_acts(flow); |
1543 | if (err) { |
1544 | mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n" , |
1545 | err); |
1546 | goto offload_to_slow_path; |
1547 | } |
1548 | |
1549 | /* update from slow path rule to encap rule */ |
1550 | rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr: flow->attr); |
1551 | if (IS_ERR(ptr: rule)) { |
1552 | mlx5e_tc_unoffload_flow_post_acts(flow); |
1553 | err = PTR_ERR(ptr: rule); |
1554 | mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n" , |
1555 | err); |
1556 | } else { |
1557 | flow->rule[0] = rule; |
1558 | } |
1559 | } else { |
1560 | offload_to_slow_path: |
1561 | rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); |
1562 | /* mark the flow's encap dest as non-valid */ |
1563 | esw_attr->dests[flow->tmp_entry_index].flags &= |
1564 | ~MLX5_ESW_DEST_ENCAP_VALID; |
1565 | |
1566 | if (IS_ERR(ptr: rule)) { |
1567 | err = PTR_ERR(ptr: rule); |
1568 | mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n" , |
1569 | err); |
1570 | } else { |
1571 | flow->rule[0] = rule; |
1572 | } |
1573 | } |
1574 | flow_flag_set(flow, OFFLOADED); |
1575 | } |
1576 | } |
1577 | |
1578 | static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, |
1579 | struct mlx5e_route_entry *r, |
1580 | struct list_head *flow_list, |
1581 | bool replace) |
1582 | { |
1583 | struct net_device *tunnel_dev; |
1584 | struct mlx5e_encap_entry *e; |
1585 | |
1586 | tunnel_dev = __dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: r->tunnel_dev_index); |
1587 | if (!tunnel_dev) |
1588 | return -ENODEV; |
1589 | |
1590 | list_for_each_entry(e, &r->encap_entries, route_list) { |
1591 | LIST_HEAD(encap_flows); |
1592 | |
1593 | mlx5e_take_all_encap_flows(e, flow_list: &encap_flows); |
1594 | if (list_empty(head: &encap_flows)) |
1595 | continue; |
1596 | |
1597 | if (mlx5e_route_entry_valid(r)) |
1598 | mlx5e_invalidate_encap(priv, e, encap_flows: &encap_flows); |
1599 | |
1600 | if (!replace) { |
1601 | list_splice(list: &encap_flows, head: flow_list); |
1602 | continue; |
1603 | } |
1604 | |
1605 | mlx5e_reoffload_encap(priv, tunnel_dev, e, encap_flows: &encap_flows); |
1606 | list_splice(list: &encap_flows, head: flow_list); |
1607 | } |
1608 | |
1609 | return 0; |
1610 | } |
1611 | |
1612 | static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, |
1613 | struct list_head *flow_list) |
1614 | { |
1615 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1616 | struct mlx5e_tc_flow *flow; |
1617 | |
1618 | list_for_each_entry(flow, flow_list, tmp_list) |
1619 | if (mlx5e_is_offloaded_flow(flow)) |
1620 | mlx5e_tc_unoffload_fdb_rules(esw, flow, attr: flow->attr); |
1621 | } |
1622 | |
1623 | static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, |
1624 | struct list_head *decap_flows) |
1625 | { |
1626 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
1627 | struct mlx5e_tc_flow *flow; |
1628 | |
1629 | list_for_each_entry(flow, decap_flows, tmp_list) { |
1630 | struct mlx5e_tc_flow_parse_attr *parse_attr; |
1631 | struct mlx5_flow_attr *attr = flow->attr; |
1632 | struct mlx5_flow_handle *rule; |
1633 | struct mlx5_flow_spec *spec; |
1634 | int err; |
1635 | |
1636 | if (flow_flag_test(flow, FAILED)) |
1637 | continue; |
1638 | |
1639 | parse_attr = attr->parse_attr; |
1640 | spec = &parse_attr->spec; |
1641 | err = mlx5e_tc_tun_route_lookup(priv, spec, attr, filter_dev: parse_attr->filter_dev); |
1642 | if (err) { |
1643 | mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n" , |
1644 | err); |
1645 | continue; |
1646 | } |
1647 | |
1648 | rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); |
1649 | if (IS_ERR(ptr: rule)) { |
1650 | err = PTR_ERR(ptr: rule); |
1651 | mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n" , |
1652 | err); |
1653 | } else { |
1654 | flow->rule[0] = rule; |
1655 | flow_flag_set(flow, OFFLOADED); |
1656 | } |
1657 | } |
1658 | } |
1659 | |
1660 | static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, |
1661 | struct mlx5e_route_entry *r, |
1662 | struct list_head *flow_list, |
1663 | bool replace) |
1664 | { |
1665 | struct net_device *tunnel_dev; |
1666 | LIST_HEAD(decap_flows); |
1667 | |
1668 | tunnel_dev = __dev_get_by_index(net: dev_net(dev: priv->netdev), ifindex: r->tunnel_dev_index); |
1669 | if (!tunnel_dev) |
1670 | return -ENODEV; |
1671 | |
1672 | mlx5e_take_all_route_decap_flows(r, flow_list: &decap_flows); |
1673 | if (mlx5e_route_entry_valid(r)) |
1674 | mlx5e_unoffload_flow_list(priv, flow_list: &decap_flows); |
1675 | if (replace) |
1676 | mlx5e_reoffload_decap(priv, decap_flows: &decap_flows); |
1677 | |
1678 | list_splice(list: &decap_flows, head: flow_list); |
1679 | |
1680 | return 0; |
1681 | } |
1682 | |
1683 | static void mlx5e_tc_fib_event_work(struct work_struct *work) |
1684 | { |
1685 | struct mlx5e_tc_fib_event_data *event_data = |
1686 | container_of(work, struct mlx5e_tc_fib_event_data, work); |
1687 | struct net_device *ul_dev = event_data->ul_dev; |
1688 | struct mlx5e_priv *priv = netdev_priv(dev: ul_dev); |
1689 | struct mlx5e_route_entry *r = event_data->r; |
1690 | struct mlx5_eswitch *esw; |
1691 | LIST_HEAD(flow_list); |
1692 | bool replace; |
1693 | int err; |
1694 | |
1695 | /* sync with concurrent neigh updates */ |
1696 | rtnl_lock(); |
1697 | esw = priv->mdev->priv.eswitch; |
1698 | mutex_lock(&esw->offloads.encap_tbl_lock); |
1699 | replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; |
1700 | |
1701 | if (!mlx5e_route_entry_valid(r) && !replace) |
1702 | goto out; |
1703 | |
1704 | err = mlx5e_update_route_encaps(priv, r, flow_list: &flow_list, replace); |
1705 | if (err) |
1706 | mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n" , |
1707 | err); |
1708 | |
1709 | err = mlx5e_update_route_decap_flows(priv, r, flow_list: &flow_list, replace); |
1710 | if (err) |
1711 | mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n" , |
1712 | err); |
1713 | |
1714 | if (replace) |
1715 | r->flags |= MLX5E_ROUTE_ENTRY_VALID; |
1716 | out: |
1717 | mutex_unlock(lock: &esw->offloads.encap_tbl_lock); |
1718 | rtnl_unlock(); |
1719 | |
1720 | mlx5e_put_flow_list(priv, flow_list: &flow_list); |
1721 | mlx5e_route_put(priv, r: event_data->r); |
1722 | dev_put(dev: event_data->ul_dev); |
1723 | kfree(objp: event_data); |
1724 | } |
1725 | |
1726 | static struct mlx5e_tc_fib_event_data * |
1727 | mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, |
1728 | struct net_device *ul_dev, |
1729 | struct mlx5e_tc_tun_encap *encap, |
1730 | unsigned long event, |
1731 | struct fib_notifier_info *info) |
1732 | { |
1733 | struct fib_entry_notifier_info *fen_info; |
1734 | struct mlx5e_tc_fib_event_data *fib_work; |
1735 | struct mlx5e_route_entry *r; |
1736 | struct mlx5e_route_key key; |
1737 | struct net_device *fib_dev; |
1738 | |
1739 | fen_info = container_of(info, struct fib_entry_notifier_info, info); |
1740 | if (fen_info->fi->nh) |
1741 | return NULL; |
1742 | fib_dev = fib_info_nh(fi: fen_info->fi, nhsel: 0)->fib_nh_dev; |
1743 | if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || |
1744 | fen_info->dst_len != 32) |
1745 | return NULL; |
1746 | |
1747 | fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); |
1748 | if (!fib_work) |
1749 | return ERR_PTR(error: -ENOMEM); |
1750 | |
1751 | key.endpoint_ip.v4 = htonl(fen_info->dst); |
1752 | key.ip_version = 4; |
1753 | |
1754 | /* Can't fail after this point because releasing reference to r |
1755 | * requires obtaining sleeping mutex which we can't do in atomic |
1756 | * context. |
1757 | */ |
1758 | r = mlx5e_route_lookup_for_update(encap, key: &key); |
1759 | if (!r) |
1760 | goto out; |
1761 | fib_work->r = r; |
1762 | dev_hold(dev: ul_dev); |
1763 | |
1764 | return fib_work; |
1765 | |
1766 | out: |
1767 | kfree(objp: fib_work); |
1768 | return NULL; |
1769 | } |
1770 | |
1771 | static struct mlx5e_tc_fib_event_data * |
1772 | mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, |
1773 | struct net_device *ul_dev, |
1774 | struct mlx5e_tc_tun_encap *encap, |
1775 | unsigned long event, |
1776 | struct fib_notifier_info *info) |
1777 | { |
1778 | struct fib6_entry_notifier_info *fen_info; |
1779 | struct mlx5e_tc_fib_event_data *fib_work; |
1780 | struct mlx5e_route_entry *r; |
1781 | struct mlx5e_route_key key; |
1782 | struct net_device *fib_dev; |
1783 | |
1784 | fen_info = container_of(info, struct fib6_entry_notifier_info, info); |
1785 | fib_dev = fib6_info_nh_dev(f6i: fen_info->rt); |
1786 | if (fib_dev->netdev_ops != &mlx5e_netdev_ops || |
1787 | fen_info->rt->fib6_dst.plen != 128) |
1788 | return NULL; |
1789 | |
1790 | fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); |
1791 | if (!fib_work) |
1792 | return ERR_PTR(error: -ENOMEM); |
1793 | |
1794 | memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, |
1795 | sizeof(fen_info->rt->fib6_dst.addr)); |
1796 | key.ip_version = 6; |
1797 | |
1798 | /* Can't fail after this point because releasing reference to r |
1799 | * requires obtaining sleeping mutex which we can't do in atomic |
1800 | * context. |
1801 | */ |
1802 | r = mlx5e_route_lookup_for_update(encap, key: &key); |
1803 | if (!r) |
1804 | goto out; |
1805 | fib_work->r = r; |
1806 | dev_hold(dev: ul_dev); |
1807 | |
1808 | return fib_work; |
1809 | |
1810 | out: |
1811 | kfree(objp: fib_work); |
1812 | return NULL; |
1813 | } |
1814 | |
1815 | static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) |
1816 | { |
1817 | struct mlx5e_tc_fib_event_data *fib_work; |
1818 | struct fib_notifier_info *info = ptr; |
1819 | struct mlx5e_tc_tun_encap *encap; |
1820 | struct net_device *ul_dev; |
1821 | struct mlx5e_priv *priv; |
1822 | |
1823 | encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); |
1824 | priv = encap->priv; |
1825 | ul_dev = priv->netdev; |
1826 | priv = netdev_priv(dev: ul_dev); |
1827 | |
1828 | switch (event) { |
1829 | case FIB_EVENT_ENTRY_REPLACE: |
1830 | case FIB_EVENT_ENTRY_DEL: |
1831 | if (info->family == AF_INET) |
1832 | fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); |
1833 | else if (info->family == AF_INET6) |
1834 | fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); |
1835 | else |
1836 | return NOTIFY_DONE; |
1837 | |
1838 | if (!IS_ERR_OR_NULL(ptr: fib_work)) { |
1839 | queue_work(wq: priv->wq, work: &fib_work->work); |
1840 | } else if (IS_ERR(ptr: fib_work)) { |
1841 | NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work" ); |
1842 | mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n" , |
1843 | PTR_ERR(fib_work)); |
1844 | } |
1845 | |
1846 | break; |
1847 | default: |
1848 | return NOTIFY_DONE; |
1849 | } |
1850 | |
1851 | return NOTIFY_DONE; |
1852 | } |
1853 | |
1854 | struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) |
1855 | { |
1856 | struct mlx5e_tc_tun_encap *encap; |
1857 | int err; |
1858 | |
1859 | encap = kvzalloc(size: sizeof(*encap), GFP_KERNEL); |
1860 | if (!encap) |
1861 | return ERR_PTR(error: -ENOMEM); |
1862 | |
1863 | encap->priv = priv; |
1864 | encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; |
1865 | spin_lock_init(&encap->route_lock); |
1866 | hash_init(encap->route_tbl); |
1867 | err = register_fib_notifier(net: dev_net(dev: priv->netdev), nb: &encap->fib_nb, |
1868 | NULL, NULL); |
1869 | if (err) { |
1870 | kvfree(addr: encap); |
1871 | return ERR_PTR(error: err); |
1872 | } |
1873 | |
1874 | return encap; |
1875 | } |
1876 | |
1877 | void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) |
1878 | { |
1879 | if (!encap) |
1880 | return; |
1881 | |
1882 | unregister_fib_notifier(net: dev_net(dev: encap->priv->netdev), nb: &encap->fib_nb); |
1883 | flush_workqueue(encap->priv->wq); /* flush fib event works */ |
1884 | kvfree(addr: encap); |
1885 | } |
1886 | |