1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2019 Mellanox Technologies. */
3
4#include <net/netfilter/nf_conntrack.h>
5#include <net/netfilter/nf_conntrack_core.h>
6#include <net/netfilter/nf_conntrack_zones.h>
7#include <net/netfilter/nf_conntrack_labels.h>
8#include <net/netfilter/nf_conntrack_helper.h>
9#include <net/netfilter/nf_conntrack_acct.h>
10#include <uapi/linux/tc_act/tc_pedit.h>
11#include <net/tc_act/tc_ct.h>
12#include <net/flow_offload.h>
13#include <net/netfilter/nf_flow_table.h>
14#include <linux/workqueue.h>
15#include <linux/refcount.h>
16#include <linux/xarray.h>
17#include <linux/if_macvlan.h>
18#include <linux/debugfs.h>
19
20#include "lib/fs_chains.h"
21#include "en/tc_ct.h"
22#include "en/tc/ct_fs.h"
23#include "en/tc_priv.h"
24#include "en/mod_hdr.h"
25#include "en/mapping.h"
26#include "en/tc/post_act.h"
27#include "en.h"
28#include "en_tc.h"
29#include "en_rep.h"
30#include "fs_core.h"
31
32#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33#define MLX5_CT_STATE_TRK_BIT BIT(2)
34#define MLX5_CT_STATE_NAT_BIT BIT(3)
35#define MLX5_CT_STATE_REPLY_BIT BIT(4)
36#define MLX5_CT_STATE_RELATED_BIT BIT(5)
37#define MLX5_CT_STATE_INVALID_BIT BIT(6)
38#define MLX5_CT_STATE_NEW_BIT BIT(7)
39
40#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
41#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
42
43/* Statically allocate modify actions for
44 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
45 * This will be increased dynamically if needed (for the ipv6 snat + dnat).
46 */
47#define MLX5_CT_MIN_MOD_ACTS 10
48
49#define ct_dbg(fmt, args...)\
50 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
51
52struct mlx5_tc_ct_debugfs {
53 struct {
54 atomic_t offloaded;
55 atomic_t rx_dropped;
56 } stats;
57
58 struct dentry *root;
59};
60
61struct mlx5_tc_ct_priv {
62 struct mlx5_core_dev *dev;
63 struct mlx5e_priv *priv;
64 const struct net_device *netdev;
65 struct mod_hdr_tbl *mod_hdr_tbl;
66 struct xarray tuple_ids;
67 struct rhashtable zone_ht;
68 struct rhashtable ct_tuples_ht;
69 struct rhashtable ct_tuples_nat_ht;
70 struct mlx5_flow_table *ct;
71 struct mlx5_flow_table *ct_nat;
72 struct mlx5e_post_act *post_act;
73 struct mutex control_lock; /* guards parallel adds/dels */
74 struct mapping_ctx *zone_mapping;
75 struct mapping_ctx *labels_mapping;
76 enum mlx5_flow_namespace_type ns_type;
77 struct mlx5_fs_chains *chains;
78 struct mlx5_ct_fs *fs;
79 struct mlx5_ct_fs_ops *fs_ops;
80 spinlock_t ht_lock; /* protects ft entries */
81 struct workqueue_struct *wq;
82
83 struct mlx5_tc_ct_debugfs debugfs;
84};
85
86struct mlx5_ct_zone_rule {
87 struct mlx5_ct_fs_rule *rule;
88 struct mlx5e_mod_hdr_handle *mh;
89 struct mlx5_flow_attr *attr;
90 bool nat;
91};
92
93struct mlx5_tc_ct_pre {
94 struct mlx5_flow_table *ft;
95 struct mlx5_flow_group *flow_grp;
96 struct mlx5_flow_group *miss_grp;
97 struct mlx5_flow_handle *flow_rule;
98 struct mlx5_flow_handle *miss_rule;
99 struct mlx5_modify_hdr *modify_hdr;
100};
101
102struct mlx5_ct_ft {
103 struct rhash_head node;
104 u16 zone;
105 u32 zone_restore_id;
106 refcount_t refcount;
107 struct nf_flowtable *nf_ft;
108 struct mlx5_tc_ct_priv *ct_priv;
109 struct rhashtable ct_entries_ht;
110 struct mlx5_tc_ct_pre pre_ct;
111 struct mlx5_tc_ct_pre pre_ct_nat;
112};
113
114struct mlx5_ct_tuple {
115 u16 addr_type;
116 __be16 n_proto;
117 u8 ip_proto;
118 struct {
119 union {
120 __be32 src_v4;
121 struct in6_addr src_v6;
122 };
123 union {
124 __be32 dst_v4;
125 struct in6_addr dst_v6;
126 };
127 } ip;
128 struct {
129 __be16 src;
130 __be16 dst;
131 } port;
132
133 u16 zone;
134};
135
136struct mlx5_ct_counter {
137 struct mlx5_fc *counter;
138 refcount_t refcount;
139 bool is_shared;
140};
141
142enum {
143 MLX5_CT_ENTRY_FLAG_VALID,
144};
145
146struct mlx5_ct_entry {
147 struct rhash_head node;
148 struct rhash_head tuple_node;
149 struct rhash_head tuple_nat_node;
150 struct mlx5_ct_counter *counter;
151 unsigned long cookie;
152 unsigned long restore_cookie;
153 struct mlx5_ct_tuple tuple;
154 struct mlx5_ct_tuple tuple_nat;
155 struct mlx5_ct_zone_rule zone_rules[2];
156
157 struct mlx5_tc_ct_priv *ct_priv;
158 struct work_struct work;
159
160 refcount_t refcnt;
161 unsigned long flags;
162};
163
164static void
165mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
166 struct mlx5_flow_attr *attr,
167 struct mlx5e_mod_hdr_handle *mh);
168
169static const struct rhashtable_params cts_ht_params = {
170 .head_offset = offsetof(struct mlx5_ct_entry, node),
171 .key_offset = offsetof(struct mlx5_ct_entry, cookie),
172 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
173 .automatic_shrinking = true,
174 .min_size = 16 * 1024,
175};
176
177static const struct rhashtable_params zone_params = {
178 .head_offset = offsetof(struct mlx5_ct_ft, node),
179 .key_offset = offsetof(struct mlx5_ct_ft, zone),
180 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
181 .automatic_shrinking = true,
182};
183
184static const struct rhashtable_params tuples_ht_params = {
185 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
186 .key_offset = offsetof(struct mlx5_ct_entry, tuple),
187 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
188 .automatic_shrinking = true,
189 .min_size = 16 * 1024,
190};
191
192static const struct rhashtable_params tuples_nat_ht_params = {
193 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
194 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
195 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
196 .automatic_shrinking = true,
197 .min_size = 16 * 1024,
198};
199
200static bool
201mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
202{
203 return !!(entry->tuple_nat_node.next);
204}
205
206static int
207mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
208 u32 *labels, u32 *id)
209{
210 if (!memchr_inv(p: labels, c: 0, size: sizeof(u32) * 4)) {
211 *id = 0;
212 return 0;
213 }
214
215 if (mapping_add(ctx: ct_priv->labels_mapping, data: labels, id))
216 return -EOPNOTSUPP;
217
218 return 0;
219}
220
221static void
222mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
223{
224 if (id)
225 mapping_remove(ctx: ct_priv->labels_mapping, id);
226}
227
228static int
229mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
230{
231 struct flow_match_control control;
232 struct flow_match_basic basic;
233
234 flow_rule_match_basic(rule, out: &basic);
235 flow_rule_match_control(rule, out: &control);
236
237 tuple->n_proto = basic.key->n_proto;
238 tuple->ip_proto = basic.key->ip_proto;
239 tuple->addr_type = control.key->addr_type;
240
241 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
242 struct flow_match_ipv4_addrs match;
243
244 flow_rule_match_ipv4_addrs(rule, out: &match);
245 tuple->ip.src_v4 = match.key->src;
246 tuple->ip.dst_v4 = match.key->dst;
247 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
248 struct flow_match_ipv6_addrs match;
249
250 flow_rule_match_ipv6_addrs(rule, out: &match);
251 tuple->ip.src_v6 = match.key->src;
252 tuple->ip.dst_v6 = match.key->dst;
253 } else {
254 return -EOPNOTSUPP;
255 }
256
257 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_PORTS)) {
258 struct flow_match_ports match;
259
260 flow_rule_match_ports(rule, out: &match);
261 switch (tuple->ip_proto) {
262 case IPPROTO_TCP:
263 case IPPROTO_UDP:
264 tuple->port.src = match.key->src;
265 tuple->port.dst = match.key->dst;
266 break;
267 default:
268 return -EOPNOTSUPP;
269 }
270 } else {
271 if (tuple->ip_proto != IPPROTO_GRE)
272 return -EOPNOTSUPP;
273 }
274
275 return 0;
276}
277
278static int
279mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
280 struct flow_rule *rule)
281{
282 struct flow_action *flow_action = &rule->action;
283 struct flow_action_entry *act;
284 u32 offset, val, ip6_offset;
285 int i;
286
287 flow_action_for_each(i, act, flow_action) {
288 if (act->id != FLOW_ACTION_MANGLE)
289 continue;
290
291 offset = act->mangle.offset;
292 val = act->mangle.val;
293 switch (act->mangle.htype) {
294 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
295 if (offset == offsetof(struct iphdr, saddr))
296 tuple->ip.src_v4 = cpu_to_be32(val);
297 else if (offset == offsetof(struct iphdr, daddr))
298 tuple->ip.dst_v4 = cpu_to_be32(val);
299 else
300 return -EOPNOTSUPP;
301 break;
302
303 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
304 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
305 ip6_offset /= 4;
306 if (ip6_offset < 4)
307 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
308 else if (ip6_offset < 8)
309 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
310 else
311 return -EOPNOTSUPP;
312 break;
313
314 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315 if (offset == offsetof(struct tcphdr, source))
316 tuple->port.src = cpu_to_be16(val);
317 else if (offset == offsetof(struct tcphdr, dest))
318 tuple->port.dst = cpu_to_be16(val);
319 else
320 return -EOPNOTSUPP;
321 break;
322
323 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
324 if (offset == offsetof(struct udphdr, source))
325 tuple->port.src = cpu_to_be16(val);
326 else if (offset == offsetof(struct udphdr, dest))
327 tuple->port.dst = cpu_to_be16(val);
328 else
329 return -EOPNOTSUPP;
330 break;
331
332 default:
333 return -EOPNOTSUPP;
334 }
335 }
336
337 return 0;
338}
339
340static int
341mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
342 struct net_device *ndev)
343{
344 struct mlx5e_priv *other_priv = netdev_priv(dev: ndev);
345 struct mlx5_core_dev *mdev = ct_priv->dev;
346 bool vf_rep, uplink_rep;
347
348 vf_rep = mlx5e_eswitch_vf_rep(netdev: ndev) && mlx5_same_hw_devs(dev: mdev, peer_dev: other_priv->mdev);
349 uplink_rep = mlx5e_eswitch_uplink_rep(netdev: ndev) && mlx5_same_hw_devs(dev: mdev, peer_dev: other_priv->mdev);
350
351 if (vf_rep)
352 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
353 if (uplink_rep)
354 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
355 if (is_vlan_dev(dev: ndev))
356 return mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: vlan_dev_real_dev(dev: ndev));
357 if (netif_is_macvlan(dev: ndev))
358 return mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: macvlan_dev_real_dev(dev: ndev));
359 if (mlx5e_get_tc_tun(tunnel_dev: ndev) || netif_is_lag_master(dev: ndev))
360 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
361
362 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
363}
364
365static int
366mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
367 struct mlx5_flow_spec *spec,
368 struct flow_rule *rule)
369{
370 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
371 outer_headers);
372 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
373 outer_headers);
374 u16 addr_type = 0;
375 u8 ip_proto = 0;
376
377 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_BASIC)) {
378 struct flow_match_basic match;
379
380 flow_rule_match_basic(rule, out: &match);
381
382 mlx5e_tc_set_ethertype(mdev: ct_priv->dev, match: &match, outer: true, headers_c, headers_v);
383 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
384 match.mask->ip_proto);
385 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
386 match.key->ip_proto);
387
388 ip_proto = match.key->ip_proto;
389 }
390
391 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_CONTROL)) {
392 struct flow_match_control match;
393
394 flow_rule_match_control(rule, out: &match);
395 addr_type = match.key->addr_type;
396 }
397
398 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
399 struct flow_match_ipv4_addrs match;
400
401 flow_rule_match_ipv4_addrs(rule, out: &match);
402 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
403 src_ipv4_src_ipv6.ipv4_layout.ipv4),
404 &match.mask->src, sizeof(match.mask->src));
405 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
406 src_ipv4_src_ipv6.ipv4_layout.ipv4),
407 &match.key->src, sizeof(match.key->src));
408 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
409 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
410 &match.mask->dst, sizeof(match.mask->dst));
411 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
412 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
413 &match.key->dst, sizeof(match.key->dst));
414 }
415
416 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
417 struct flow_match_ipv6_addrs match;
418
419 flow_rule_match_ipv6_addrs(rule, out: &match);
420 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
421 src_ipv4_src_ipv6.ipv6_layout.ipv6),
422 &match.mask->src, sizeof(match.mask->src));
423 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
424 src_ipv4_src_ipv6.ipv6_layout.ipv6),
425 &match.key->src, sizeof(match.key->src));
426
427 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
428 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
429 &match.mask->dst, sizeof(match.mask->dst));
430 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
431 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
432 &match.key->dst, sizeof(match.key->dst));
433 }
434
435 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_PORTS)) {
436 struct flow_match_ports match;
437
438 flow_rule_match_ports(rule, out: &match);
439 switch (ip_proto) {
440 case IPPROTO_TCP:
441 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
442 tcp_sport, ntohs(match.mask->src));
443 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
444 tcp_sport, ntohs(match.key->src));
445
446 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
447 tcp_dport, ntohs(match.mask->dst));
448 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
449 tcp_dport, ntohs(match.key->dst));
450 break;
451
452 case IPPROTO_UDP:
453 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
454 udp_sport, ntohs(match.mask->src));
455 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
456 udp_sport, ntohs(match.key->src));
457
458 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
459 udp_dport, ntohs(match.mask->dst));
460 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
461 udp_dport, ntohs(match.key->dst));
462 break;
463 default:
464 break;
465 }
466 }
467
468 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_TCP)) {
469 struct flow_match_tcp match;
470
471 flow_rule_match_tcp(rule, out: &match);
472 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
473 ntohs(match.mask->flags));
474 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
475 ntohs(match.key->flags));
476 }
477
478 if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_META)) {
479 struct flow_match_meta match;
480
481 flow_rule_match_meta(rule, out: &match);
482
483 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
484 struct net_device *dev;
485
486 dev = dev_get_by_index(net: &init_net, ifindex: match.key->ingress_ifindex);
487 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
488 spec->flow_context.flow_source =
489 mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: dev);
490
491 dev_put(dev);
492 }
493 }
494
495 return 0;
496}
497
498static void
499mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
500{
501 if (entry->counter->is_shared &&
502 !refcount_dec_and_test(r: &entry->counter->refcount))
503 return;
504
505 mlx5_fc_destroy(dev: ct_priv->dev, counter: entry->counter->counter);
506 kfree(objp: entry->counter);
507}
508
509static void
510mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
511 struct mlx5_ct_entry *entry,
512 bool nat)
513{
514 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
515 struct mlx5_flow_attr *attr = zone_rule->attr;
516
517 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
518
519 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
520 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh: zone_rule->mh);
521 mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id);
522 kfree(objp: attr);
523}
524
525static void
526mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
527 struct mlx5_ct_entry *entry)
528{
529 mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: true);
530 mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false);
531
532 atomic_dec(v: &ct_priv->debugfs.stats.offloaded);
533}
534
535static struct flow_action_entry *
536mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
537{
538 struct flow_action *flow_action = &flow_rule->action;
539 struct flow_action_entry *act;
540 int i;
541
542 flow_action_for_each(i, act, flow_action) {
543 if (act->id == FLOW_ACTION_CT_METADATA)
544 return act;
545 }
546
547 return NULL;
548}
549
550static int
551mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
552 struct mlx5e_tc_mod_hdr_acts *mod_acts,
553 u8 ct_state,
554 u32 mark,
555 u32 labels_id,
556 u8 zone_restore_id)
557{
558 enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
559 struct mlx5_core_dev *dev = ct_priv->dev;
560 int err;
561
562 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns,
563 type: CTSTATE_TO_REG, data: ct_state);
564 if (err)
565 return err;
566
567 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns,
568 type: MARK_TO_REG, data: mark);
569 if (err)
570 return err;
571
572 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns,
573 type: LABELS_TO_REG, data: labels_id);
574 if (err)
575 return err;
576
577 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns,
578 type: ZONE_RESTORE_TO_REG, data: zone_restore_id);
579 if (err)
580 return err;
581
582 /* Make another copy of zone id in reg_b for
583 * NIC rx flows since we don't copy reg_c1 to
584 * reg_b upon miss.
585 */
586 if (ns != MLX5_FLOW_NAMESPACE_FDB) {
587 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns,
588 type: NIC_ZONE_RESTORE_TO_REG, data: zone_restore_id);
589 if (err)
590 return err;
591 }
592 return 0;
593}
594
595static int
596mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
597 char *modact)
598{
599 u32 offset = act->mangle.offset, field;
600
601 switch (act->mangle.htype) {
602 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
603 MLX5_SET(set_action_in, modact, length, 0);
604 if (offset == offsetof(struct iphdr, saddr))
605 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
606 else if (offset == offsetof(struct iphdr, daddr))
607 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
608 else
609 return -EOPNOTSUPP;
610 break;
611
612 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
613 MLX5_SET(set_action_in, modact, length, 0);
614 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
615 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
616 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
617 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
618 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
619 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
620 else if (offset == offsetof(struct ipv6hdr, saddr))
621 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
622 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
623 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
624 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
625 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
626 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
627 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
628 else if (offset == offsetof(struct ipv6hdr, daddr))
629 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
630 else
631 return -EOPNOTSUPP;
632 break;
633
634 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
635 MLX5_SET(set_action_in, modact, length, 16);
636 if (offset == offsetof(struct tcphdr, source))
637 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
638 else if (offset == offsetof(struct tcphdr, dest))
639 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
640 else
641 return -EOPNOTSUPP;
642 break;
643
644 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
645 MLX5_SET(set_action_in, modact, length, 16);
646 if (offset == offsetof(struct udphdr, source))
647 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
648 else if (offset == offsetof(struct udphdr, dest))
649 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
650 else
651 return -EOPNOTSUPP;
652 break;
653
654 default:
655 return -EOPNOTSUPP;
656 }
657
658 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
659 MLX5_SET(set_action_in, modact, offset, 0);
660 MLX5_SET(set_action_in, modact, field, field);
661 MLX5_SET(set_action_in, modact, data, act->mangle.val);
662
663 return 0;
664}
665
666static int
667mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
668 struct flow_rule *flow_rule,
669 struct mlx5e_tc_mod_hdr_acts *mod_acts)
670{
671 struct flow_action *flow_action = &flow_rule->action;
672 struct mlx5_core_dev *mdev = ct_priv->dev;
673 struct flow_action_entry *act;
674 char *modact;
675 int err, i;
676
677 flow_action_for_each(i, act, flow_action) {
678 switch (act->id) {
679 case FLOW_ACTION_MANGLE: {
680 modact = mlx5e_mod_hdr_alloc(mdev, namespace: ct_priv->ns_type, mod_hdr_acts: mod_acts);
681 if (IS_ERR(ptr: modact))
682 return PTR_ERR(ptr: modact);
683
684 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
685 if (err)
686 return err;
687
688 mod_acts->num_actions++;
689 }
690 break;
691
692 case FLOW_ACTION_CT_METADATA:
693 /* Handled earlier */
694 continue;
695 default:
696 return -EOPNOTSUPP;
697 }
698 }
699
700 return 0;
701}
702
703static int
704mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
705 struct mlx5_flow_attr *attr,
706 struct flow_rule *flow_rule,
707 struct mlx5e_mod_hdr_handle **mh,
708 u8 zone_restore_id, bool nat_table, bool has_nat)
709{
710 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
711 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
712 struct flow_action_entry *meta;
713 enum ip_conntrack_info ctinfo;
714 u16 ct_state = 0;
715 int err;
716
717 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
718 if (!meta)
719 return -EOPNOTSUPP;
720 ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK;
721
722 err = mlx5_get_label_mapping(ct_priv, labels: meta->ct_metadata.labels,
723 id: &attr->ct_attr.ct_labels_id);
724 if (err)
725 return -EOPNOTSUPP;
726 if (nat_table) {
727 if (has_nat) {
728 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, mod_acts: &mod_acts);
729 if (err)
730 goto err_mapping;
731 }
732
733 ct_state |= MLX5_CT_STATE_NAT_BIT;
734 }
735
736 ct_state |= MLX5_CT_STATE_TRK_BIT;
737 ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT;
738 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
739 err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts: &mod_acts,
740 ct_state,
741 mark: meta->ct_metadata.mark,
742 labels_id: attr->ct_attr.ct_labels_id,
743 zone_restore_id);
744 if (err)
745 goto err_mapping;
746
747 if (nat_table && has_nat) {
748 attr->modify_hdr = mlx5_modify_header_alloc(dev: ct_priv->dev, ns_type: ct_priv->ns_type,
749 num_actions: mod_acts.num_actions,
750 modify_actions: mod_acts.actions);
751 if (IS_ERR(ptr: attr->modify_hdr)) {
752 err = PTR_ERR(ptr: attr->modify_hdr);
753 goto err_mapping;
754 }
755
756 *mh = NULL;
757 } else {
758 *mh = mlx5e_mod_hdr_attach(mdev: ct_priv->dev,
759 tbl: ct_priv->mod_hdr_tbl,
760 namespace: ct_priv->ns_type,
761 mod_hdr_acts: &mod_acts);
762 if (IS_ERR(ptr: *mh)) {
763 err = PTR_ERR(ptr: *mh);
764 goto err_mapping;
765 }
766 attr->modify_hdr = mlx5e_mod_hdr_get(mh: *mh);
767 }
768
769 mlx5e_mod_hdr_dealloc(mod_hdr_acts: &mod_acts);
770 return 0;
771
772err_mapping:
773 mlx5e_mod_hdr_dealloc(mod_hdr_acts: &mod_acts);
774 mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id);
775 return err;
776}
777
778static void
779mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
780 struct mlx5_flow_attr *attr,
781 struct mlx5e_mod_hdr_handle *mh)
782{
783 if (mh)
784 mlx5e_mod_hdr_detach(mdev: ct_priv->dev, tbl: ct_priv->mod_hdr_tbl, mh);
785 else
786 mlx5_modify_header_dealloc(dev: ct_priv->dev, modify_hdr: attr->modify_hdr);
787}
788
789static int
790mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
791 struct flow_rule *flow_rule,
792 struct mlx5_ct_entry *entry,
793 bool nat, u8 zone_restore_id)
794{
795 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
796 struct mlx5e_priv *priv = netdev_priv(dev: ct_priv->netdev);
797 struct mlx5_flow_spec *spec = NULL;
798 struct mlx5_flow_attr *attr;
799 int err;
800
801 zone_rule->nat = nat;
802
803 spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL);
804 if (!spec)
805 return -ENOMEM;
806
807 attr = mlx5_alloc_flow_attr(type: ct_priv->ns_type);
808 if (!attr) {
809 err = -ENOMEM;
810 goto err_attr;
811 }
812
813 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
814 mh: &zone_rule->mh,
815 zone_restore_id,
816 nat_table: nat,
817 has_nat: mlx5_tc_ct_entry_has_nat(entry));
818 if (err) {
819 ct_dbg("Failed to create ct entry mod hdr");
820 goto err_mod_hdr;
821 }
822
823 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
824 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
825 MLX5_FLOW_CONTEXT_ACTION_COUNT;
826 attr->dest_chain = 0;
827 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act);
828 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
829 if (entry->tuple.ip_proto == IPPROTO_TCP ||
830 entry->tuple.ip_proto == IPPROTO_UDP)
831 attr->outer_match_level = MLX5_MATCH_L4;
832 else
833 attr->outer_match_level = MLX5_MATCH_L3;
834 attr->counter = entry->counter->counter;
835 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
836 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
837 attr->esw_attr->in_mdev = priv->mdev;
838
839 mlx5_tc_ct_set_tuple_match(ct_priv, spec, rule: flow_rule);
840 mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, data: entry->tuple.zone, MLX5_CT_ZONE_MASK);
841
842 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
843 if (IS_ERR(ptr: zone_rule->rule)) {
844 err = PTR_ERR(ptr: zone_rule->rule);
845 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
846 goto err_rule;
847 }
848
849 zone_rule->attr = attr;
850
851 kvfree(addr: spec);
852 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
853
854 return 0;
855
856err_rule:
857 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh: zone_rule->mh);
858 mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id);
859err_mod_hdr:
860 kfree(objp: attr);
861err_attr:
862 kvfree(addr: spec);
863 return err;
864}
865
866static int
867mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
868 struct flow_rule *flow_rule,
869 struct mlx5_ct_entry *entry,
870 bool nat, u8 zone_restore_id)
871{
872 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
873 struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr;
874 struct mlx5e_mod_hdr_handle *mh;
875 struct mlx5_ct_fs_rule *rule;
876 struct mlx5_flow_spec *spec;
877 int err;
878
879 spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL);
880 if (!spec)
881 return -ENOMEM;
882
883 old_attr = mlx5_alloc_flow_attr(type: ct_priv->ns_type);
884 if (!old_attr) {
885 err = -ENOMEM;
886 goto err_attr;
887 }
888 *old_attr = *attr;
889
890 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, mh: &mh, zone_restore_id,
891 nat_table: nat, has_nat: mlx5_tc_ct_entry_has_nat(entry));
892 if (err) {
893 ct_dbg("Failed to create ct entry mod hdr");
894 goto err_mod_hdr;
895 }
896
897 mlx5_tc_ct_set_tuple_match(ct_priv, spec, rule: flow_rule);
898 mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, data: entry->tuple.zone, MLX5_CT_ZONE_MASK);
899
900 rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
901 if (IS_ERR(ptr: rule)) {
902 err = PTR_ERR(ptr: rule);
903 ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat);
904 goto err_rule;
905 }
906
907 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
908 zone_rule->rule = rule;
909 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: old_attr, mh: zone_rule->mh);
910 zone_rule->mh = mh;
911 mlx5_put_label_mapping(ct_priv, id: old_attr->ct_attr.ct_labels_id);
912
913 kfree(objp: old_attr);
914 kvfree(addr: spec);
915 ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone);
916
917 return 0;
918
919err_rule:
920 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh);
921 mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id);
922err_mod_hdr:
923 kfree(objp: old_attr);
924err_attr:
925 kvfree(addr: spec);
926 return err;
927}
928
929static bool
930mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
931{
932 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
933}
934
935static struct mlx5_ct_entry *
936mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
937{
938 struct mlx5_ct_entry *entry;
939
940 entry = rhashtable_lookup_fast(ht: &ct_priv->ct_tuples_ht, key: tuple,
941 params: tuples_ht_params);
942 if (entry && mlx5_tc_ct_entry_valid(entry) &&
943 refcount_inc_not_zero(r: &entry->refcnt)) {
944 return entry;
945 } else if (!entry) {
946 entry = rhashtable_lookup_fast(ht: &ct_priv->ct_tuples_nat_ht,
947 key: tuple, params: tuples_nat_ht_params);
948 if (entry && mlx5_tc_ct_entry_valid(entry) &&
949 refcount_inc_not_zero(r: &entry->refcnt))
950 return entry;
951 }
952
953 return entry ? ERR_PTR(error: -EINVAL) : NULL;
954}
955
956static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
957{
958 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
959
960 rhashtable_remove_fast(ht: &ct_priv->ct_tuples_nat_ht,
961 obj: &entry->tuple_nat_node,
962 params: tuples_nat_ht_params);
963 rhashtable_remove_fast(ht: &ct_priv->ct_tuples_ht, obj: &entry->tuple_node,
964 params: tuples_ht_params);
965}
966
967static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
968{
969 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
970
971 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
972
973 spin_lock_bh(lock: &ct_priv->ht_lock);
974 mlx5_tc_ct_entry_remove_from_tuples(entry);
975 spin_unlock_bh(lock: &ct_priv->ht_lock);
976
977 mlx5_tc_ct_counter_put(ct_priv, entry);
978 kfree(objp: entry);
979}
980
981static void
982mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
983{
984 if (!refcount_dec_and_test(r: &entry->refcnt))
985 return;
986
987 mlx5_tc_ct_entry_del(entry);
988}
989
990static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
991{
992 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
993
994 mlx5_tc_ct_entry_del(entry);
995}
996
997static void
998__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
999{
1000 if (!refcount_dec_and_test(r: &entry->refcnt))
1001 return;
1002
1003 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
1004 queue_work(wq: entry->ct_priv->wq, work: &entry->work);
1005}
1006
1007static struct mlx5_ct_counter *
1008mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
1009{
1010 struct mlx5_ct_counter *counter;
1011 int ret;
1012
1013 counter = kzalloc(size: sizeof(*counter), GFP_KERNEL);
1014 if (!counter)
1015 return ERR_PTR(error: -ENOMEM);
1016
1017 counter->is_shared = false;
1018 counter->counter = mlx5_fc_create_ex(dev: ct_priv->dev, aging: true);
1019 if (IS_ERR(ptr: counter->counter)) {
1020 ct_dbg("Failed to create counter for ct entry");
1021 ret = PTR_ERR(ptr: counter->counter);
1022 kfree(objp: counter);
1023 return ERR_PTR(error: ret);
1024 }
1025
1026 return counter;
1027}
1028
1029static struct mlx5_ct_counter *
1030mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
1031 struct mlx5_ct_entry *entry)
1032{
1033 struct mlx5_ct_tuple rev_tuple = entry->tuple;
1034 struct mlx5_ct_counter *shared_counter;
1035 struct mlx5_ct_entry *rev_entry;
1036
1037 /* get the reversed tuple */
1038 swap(rev_tuple.port.src, rev_tuple.port.dst);
1039
1040 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1041 __be32 tmp_addr = rev_tuple.ip.src_v4;
1042
1043 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
1044 rev_tuple.ip.dst_v4 = tmp_addr;
1045 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1046 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
1047
1048 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
1049 rev_tuple.ip.dst_v6 = tmp_addr;
1050 } else {
1051 return ERR_PTR(error: -EOPNOTSUPP);
1052 }
1053
1054 /* Use the same counter as the reverse direction */
1055 spin_lock_bh(lock: &ct_priv->ht_lock);
1056 rev_entry = mlx5_tc_ct_entry_get(ct_priv, tuple: &rev_tuple);
1057
1058 if (IS_ERR(ptr: rev_entry)) {
1059 spin_unlock_bh(lock: &ct_priv->ht_lock);
1060 goto create_counter;
1061 }
1062
1063 if (rev_entry && refcount_inc_not_zero(r: &rev_entry->counter->refcount)) {
1064 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1065 shared_counter = rev_entry->counter;
1066 spin_unlock_bh(lock: &ct_priv->ht_lock);
1067
1068 mlx5_tc_ct_entry_put(entry: rev_entry);
1069 return shared_counter;
1070 }
1071
1072 spin_unlock_bh(lock: &ct_priv->ht_lock);
1073
1074create_counter:
1075
1076 shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1077 if (IS_ERR(ptr: shared_counter))
1078 return shared_counter;
1079
1080 shared_counter->is_shared = true;
1081 refcount_set(r: &shared_counter->refcount, n: 1);
1082 return shared_counter;
1083}
1084
1085static int
1086mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1087 struct flow_rule *flow_rule,
1088 struct mlx5_ct_entry *entry,
1089 u8 zone_restore_id)
1090{
1091 int err;
1092
1093 if (nf_ct_acct_enabled(net: dev_net(dev: ct_priv->netdev)))
1094 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1095 else
1096 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1097
1098 if (IS_ERR(ptr: entry->counter)) {
1099 err = PTR_ERR(ptr: entry->counter);
1100 return err;
1101 }
1102
1103 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, nat: false,
1104 zone_restore_id);
1105 if (err)
1106 goto err_orig;
1107
1108 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, nat: true,
1109 zone_restore_id);
1110 if (err)
1111 goto err_nat;
1112
1113 atomic_inc(v: &ct_priv->debugfs.stats.offloaded);
1114 return 0;
1115
1116err_nat:
1117 mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false);
1118err_orig:
1119 mlx5_tc_ct_counter_put(ct_priv, entry);
1120 return err;
1121}
1122
1123static int
1124mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
1125 struct flow_rule *flow_rule,
1126 struct mlx5_ct_entry *entry,
1127 u8 zone_restore_id)
1128{
1129 int err;
1130
1131 err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, nat: false,
1132 zone_restore_id);
1133 if (err)
1134 return err;
1135
1136 err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, nat: true,
1137 zone_restore_id);
1138 if (err)
1139 mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false);
1140 return err;
1141}
1142
1143static int
1144mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
1145 struct mlx5_ct_entry *entry, unsigned long cookie)
1146{
1147 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1148 int err;
1149
1150 err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, zone_restore_id: ft->zone_restore_id);
1151 if (!err)
1152 return 0;
1153
1154 /* If failed to update the entry, then look it up again under ht_lock
1155 * protection and properly delete it.
1156 */
1157 spin_lock_bh(lock: &ct_priv->ht_lock);
1158 entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params);
1159 if (entry) {
1160 rhashtable_remove_fast(ht: &ft->ct_entries_ht, obj: &entry->node, params: cts_ht_params);
1161 spin_unlock_bh(lock: &ct_priv->ht_lock);
1162 mlx5_tc_ct_entry_put(entry);
1163 } else {
1164 spin_unlock_bh(lock: &ct_priv->ht_lock);
1165 }
1166 return err;
1167}
1168
1169static int
1170mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1171 struct flow_cls_offload *flow)
1172{
1173 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow_cmd: flow);
1174 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1175 struct flow_action_entry *meta_action;
1176 unsigned long cookie = flow->cookie;
1177 struct mlx5_ct_entry *entry;
1178 int err;
1179
1180 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1181 if (!meta_action)
1182 return -EOPNOTSUPP;
1183
1184 spin_lock_bh(lock: &ct_priv->ht_lock);
1185 entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params);
1186 if (entry && refcount_inc_not_zero(r: &entry->refcnt)) {
1187 if (entry->restore_cookie == meta_action->ct_metadata.cookie) {
1188 spin_unlock_bh(lock: &ct_priv->ht_lock);
1189 mlx5_tc_ct_entry_put(entry);
1190 return -EEXIST;
1191 }
1192 entry->restore_cookie = meta_action->ct_metadata.cookie;
1193 spin_unlock_bh(lock: &ct_priv->ht_lock);
1194
1195 err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie);
1196 mlx5_tc_ct_entry_put(entry);
1197 return err;
1198 }
1199 spin_unlock_bh(lock: &ct_priv->ht_lock);
1200
1201 entry = kzalloc(size: sizeof(*entry), GFP_KERNEL);
1202 if (!entry)
1203 return -ENOMEM;
1204
1205 entry->tuple.zone = ft->zone;
1206 entry->cookie = flow->cookie;
1207 entry->restore_cookie = meta_action->ct_metadata.cookie;
1208 refcount_set(r: &entry->refcnt, n: 2);
1209 entry->ct_priv = ct_priv;
1210
1211 err = mlx5_tc_ct_rule_to_tuple(tuple: &entry->tuple, rule: flow_rule);
1212 if (err)
1213 goto err_set;
1214
1215 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1216 err = mlx5_tc_ct_rule_to_tuple_nat(tuple: &entry->tuple_nat, rule: flow_rule);
1217 if (err)
1218 goto err_set;
1219
1220 spin_lock_bh(lock: &ct_priv->ht_lock);
1221
1222 err = rhashtable_lookup_insert_fast(ht: &ft->ct_entries_ht, obj: &entry->node,
1223 params: cts_ht_params);
1224 if (err)
1225 goto err_entries;
1226
1227 err = rhashtable_lookup_insert_fast(ht: &ct_priv->ct_tuples_ht,
1228 obj: &entry->tuple_node,
1229 params: tuples_ht_params);
1230 if (err)
1231 goto err_tuple;
1232
1233 if (memcmp(p: &entry->tuple, q: &entry->tuple_nat, size: sizeof(entry->tuple))) {
1234 err = rhashtable_lookup_insert_fast(ht: &ct_priv->ct_tuples_nat_ht,
1235 obj: &entry->tuple_nat_node,
1236 params: tuples_nat_ht_params);
1237 if (err)
1238 goto err_tuple_nat;
1239 }
1240 spin_unlock_bh(lock: &ct_priv->ht_lock);
1241
1242 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1243 zone_restore_id: ft->zone_restore_id);
1244 if (err)
1245 goto err_rules;
1246
1247 set_bit(nr: MLX5_CT_ENTRY_FLAG_VALID, addr: &entry->flags);
1248 mlx5_tc_ct_entry_put(entry); /* this function reference */
1249
1250 return 0;
1251
1252err_rules:
1253 spin_lock_bh(lock: &ct_priv->ht_lock);
1254 if (mlx5_tc_ct_entry_has_nat(entry))
1255 rhashtable_remove_fast(ht: &ct_priv->ct_tuples_nat_ht,
1256 obj: &entry->tuple_nat_node, params: tuples_nat_ht_params);
1257err_tuple_nat:
1258 rhashtable_remove_fast(ht: &ct_priv->ct_tuples_ht,
1259 obj: &entry->tuple_node,
1260 params: tuples_ht_params);
1261err_tuple:
1262 rhashtable_remove_fast(ht: &ft->ct_entries_ht,
1263 obj: &entry->node,
1264 params: cts_ht_params);
1265err_entries:
1266 spin_unlock_bh(lock: &ct_priv->ht_lock);
1267err_set:
1268 kfree(objp: entry);
1269 if (err != -EEXIST)
1270 netdev_warn(dev: ct_priv->netdev, format: "Failed to offload ct entry, err: %d\n", err);
1271 return err;
1272}
1273
1274static int
1275mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1276 struct flow_cls_offload *flow)
1277{
1278 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1279 unsigned long cookie = flow->cookie;
1280 struct mlx5_ct_entry *entry;
1281
1282 spin_lock_bh(lock: &ct_priv->ht_lock);
1283 entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params);
1284 if (!entry) {
1285 spin_unlock_bh(lock: &ct_priv->ht_lock);
1286 return -ENOENT;
1287 }
1288
1289 if (!mlx5_tc_ct_entry_valid(entry)) {
1290 spin_unlock_bh(lock: &ct_priv->ht_lock);
1291 return -EINVAL;
1292 }
1293
1294 rhashtable_remove_fast(ht: &ft->ct_entries_ht, obj: &entry->node, params: cts_ht_params);
1295 spin_unlock_bh(lock: &ct_priv->ht_lock);
1296
1297 mlx5_tc_ct_entry_put(entry);
1298
1299 return 0;
1300}
1301
1302static int
1303mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1304 struct flow_cls_offload *f)
1305{
1306 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1307 unsigned long cookie = f->cookie;
1308 struct mlx5_ct_entry *entry;
1309 u64 lastuse, packets, bytes;
1310
1311 spin_lock_bh(lock: &ct_priv->ht_lock);
1312 entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params);
1313 if (!entry) {
1314 spin_unlock_bh(lock: &ct_priv->ht_lock);
1315 return -ENOENT;
1316 }
1317
1318 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(r: &entry->refcnt)) {
1319 spin_unlock_bh(lock: &ct_priv->ht_lock);
1320 return -EINVAL;
1321 }
1322
1323 spin_unlock_bh(lock: &ct_priv->ht_lock);
1324
1325 mlx5_fc_query_cached(counter: entry->counter->counter, bytes: &bytes, packets: &packets, lastuse: &lastuse);
1326 flow_stats_update(flow_stats: &f->stats, bytes, pkts: packets, drops: 0, lastused: lastuse,
1327 used_hw_stats: FLOW_ACTION_HW_STATS_DELAYED);
1328
1329 mlx5_tc_ct_entry_put(entry);
1330 return 0;
1331}
1332
1333static int
1334mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1335 void *cb_priv)
1336{
1337 struct flow_cls_offload *f = type_data;
1338 struct mlx5_ct_ft *ft = cb_priv;
1339
1340 if (type != TC_SETUP_CLSFLOWER)
1341 return -EOPNOTSUPP;
1342
1343 switch (f->command) {
1344 case FLOW_CLS_REPLACE:
1345 return mlx5_tc_ct_block_flow_offload_add(ft, flow: f);
1346 case FLOW_CLS_DESTROY:
1347 return mlx5_tc_ct_block_flow_offload_del(ft, flow: f);
1348 case FLOW_CLS_STATS:
1349 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1350 default:
1351 break;
1352 }
1353
1354 return -EOPNOTSUPP;
1355}
1356
1357static bool
1358mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1359 u16 zone)
1360{
1361 struct flow_keys flow_keys;
1362
1363 skb_reset_network_header(skb);
1364 skb_flow_dissect_flow_keys(skb, flow: &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1365
1366 tuple->zone = zone;
1367
1368 if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1369 flow_keys.basic.ip_proto != IPPROTO_UDP &&
1370 flow_keys.basic.ip_proto != IPPROTO_GRE)
1371 return false;
1372
1373 if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1374 flow_keys.basic.ip_proto == IPPROTO_UDP) {
1375 tuple->port.src = flow_keys.ports.src;
1376 tuple->port.dst = flow_keys.ports.dst;
1377 }
1378 tuple->n_proto = flow_keys.basic.n_proto;
1379 tuple->ip_proto = flow_keys.basic.ip_proto;
1380
1381 switch (flow_keys.basic.n_proto) {
1382 case htons(ETH_P_IP):
1383 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1384 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1385 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1386 break;
1387
1388 case htons(ETH_P_IPV6):
1389 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1390 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1391 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1392 break;
1393 default:
1394 goto out;
1395 }
1396
1397 return true;
1398
1399out:
1400 return false;
1401}
1402
1403int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1404{
1405 u32 ctstate = 0, ctstate_mask = 0;
1406
1407 mlx5e_tc_match_to_reg_get_match(spec, type: CTSTATE_TO_REG,
1408 data: &ctstate, mask: &ctstate_mask);
1409
1410 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1411 return -EOPNOTSUPP;
1412
1413 ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1414 mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG,
1415 data: ctstate, mask: ctstate_mask);
1416
1417 return 0;
1418}
1419
1420void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1421{
1422 if (!priv || !ct_attr->ct_labels_id)
1423 return;
1424
1425 mlx5_put_label_mapping(ct_priv: priv, id: ct_attr->ct_labels_id);
1426}
1427
1428int
1429mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1430 struct mlx5_flow_spec *spec,
1431 struct flow_cls_offload *f,
1432 struct mlx5_ct_attr *ct_attr,
1433 struct netlink_ext_ack *extack)
1434{
1435 bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1436 struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f);
1437 struct flow_dissector_key_ct *mask, *key;
1438 u32 ctstate = 0, ctstate_mask = 0;
1439 u16 ct_state_on, ct_state_off;
1440 u16 ct_state, ct_state_mask;
1441 struct flow_match_ct match;
1442 u32 ct_labels[4];
1443
1444 if (!flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_CT))
1445 return 0;
1446
1447 if (!priv) {
1448 NL_SET_ERR_MSG_MOD(extack,
1449 "offload of ct matching isn't available");
1450 return -EOPNOTSUPP;
1451 }
1452
1453 flow_rule_match_ct(rule, out: &match);
1454
1455 key = match.key;
1456 mask = match.mask;
1457
1458 ct_state = key->ct_state;
1459 ct_state_mask = mask->ct_state;
1460
1461 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1462 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1463 TCA_FLOWER_KEY_CT_FLAGS_NEW |
1464 TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1465 TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1466 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1467 NL_SET_ERR_MSG_MOD(extack,
1468 "only ct_state trk, est, new and rpl are supported for offload");
1469 return -EOPNOTSUPP;
1470 }
1471
1472 ct_state_on = ct_state & ct_state_mask;
1473 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1474 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1475 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1476 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1477 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1478 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1479 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1480 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1481 unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1482 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1483 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1484 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1485 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1486
1487 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1488 ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0;
1489 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1490 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1491 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1492 ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0;
1493 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1494 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1495 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1496 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1497
1498 if (rel) {
1499 NL_SET_ERR_MSG_MOD(extack,
1500 "matching on ct_state +rel isn't supported");
1501 return -EOPNOTSUPP;
1502 }
1503
1504 if (inv) {
1505 NL_SET_ERR_MSG_MOD(extack,
1506 "matching on ct_state +inv isn't supported");
1507 return -EOPNOTSUPP;
1508 }
1509
1510 if (mask->ct_zone)
1511 mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG,
1512 data: key->ct_zone, MLX5_CT_ZONE_MASK);
1513 if (ctstate_mask)
1514 mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG,
1515 data: ctstate, mask: ctstate_mask);
1516 if (mask->ct_mark)
1517 mlx5e_tc_match_to_reg_match(spec, type: MARK_TO_REG,
1518 data: key->ct_mark, mask: mask->ct_mark);
1519 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1520 mask->ct_labels[3]) {
1521 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1522 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1523 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1524 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1525 if (mlx5_get_label_mapping(ct_priv: priv, labels: ct_labels, id: &ct_attr->ct_labels_id))
1526 return -EOPNOTSUPP;
1527 mlx5e_tc_match_to_reg_match(spec, type: LABELS_TO_REG, data: ct_attr->ct_labels_id,
1528 MLX5_CT_LABELS_MASK);
1529 }
1530
1531 return 0;
1532}
1533
1534int
1535mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1536 struct mlx5_flow_attr *attr,
1537 const struct flow_action_entry *act,
1538 struct netlink_ext_ack *extack)
1539{
1540 if (!priv) {
1541 NL_SET_ERR_MSG_MOD(extack,
1542 "offload of ct action isn't available");
1543 return -EOPNOTSUPP;
1544 }
1545
1546 attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
1547 attr->ct_attr.zone = act->ct.zone;
1548 if (!(act->ct.action & TCA_CT_ACT_CLEAR))
1549 attr->ct_attr.nf_ft = act->ct.flow_table;
1550 attr->ct_attr.act_miss_cookie = act->miss_cookie;
1551
1552 return 0;
1553}
1554
1555static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1556 struct mlx5_tc_ct_pre *pre_ct,
1557 bool nat)
1558{
1559 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1560 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1561 struct mlx5_core_dev *dev = ct_priv->dev;
1562 struct mlx5_flow_table *ft = pre_ct->ft;
1563 struct mlx5_flow_destination dest = {};
1564 struct mlx5_flow_act flow_act = {};
1565 struct mlx5_modify_hdr *mod_hdr;
1566 struct mlx5_flow_handle *rule;
1567 struct mlx5_flow_spec *spec;
1568 u32 ctstate;
1569 u16 zone;
1570 int err;
1571
1572 spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL);
1573 if (!spec)
1574 return -ENOMEM;
1575
1576 zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1577 err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: &pre_mod_acts, ns: ct_priv->ns_type,
1578 type: ZONE_TO_REG, data: zone);
1579 if (err) {
1580 ct_dbg("Failed to set zone register mapping");
1581 goto err_mapping;
1582 }
1583
1584 mod_hdr = mlx5_modify_header_alloc(dev, ns_type: ct_priv->ns_type,
1585 num_actions: pre_mod_acts.num_actions,
1586 modify_actions: pre_mod_acts.actions);
1587
1588 if (IS_ERR(ptr: mod_hdr)) {
1589 err = PTR_ERR(ptr: mod_hdr);
1590 ct_dbg("Failed to create pre ct mod hdr");
1591 goto err_mapping;
1592 }
1593 pre_ct->modify_hdr = mod_hdr;
1594
1595 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1596 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1597 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1598 flow_act.modify_hdr = mod_hdr;
1599 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1600
1601 /* add flow rule */
1602 mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG,
1603 data: zone, MLX5_CT_ZONE_MASK);
1604 ctstate = MLX5_CT_STATE_TRK_BIT;
1605 if (nat)
1606 ctstate |= MLX5_CT_STATE_NAT_BIT;
1607 mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG, data: ctstate, mask: ctstate);
1608
1609 dest.ft = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act);
1610 rule = mlx5_add_flow_rules(ft, spec, flow_act: &flow_act, dest: &dest, num_dest: 1);
1611 if (IS_ERR(ptr: rule)) {
1612 err = PTR_ERR(ptr: rule);
1613 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1614 goto err_flow_rule;
1615 }
1616 pre_ct->flow_rule = rule;
1617
1618 /* add miss rule */
1619 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1620 rule = mlx5_add_flow_rules(ft, NULL, flow_act: &flow_act, dest: &dest, num_dest: 1);
1621 if (IS_ERR(ptr: rule)) {
1622 err = PTR_ERR(ptr: rule);
1623 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1624 goto err_miss_rule;
1625 }
1626 pre_ct->miss_rule = rule;
1627
1628 mlx5e_mod_hdr_dealloc(mod_hdr_acts: &pre_mod_acts);
1629 kvfree(addr: spec);
1630 return 0;
1631
1632err_miss_rule:
1633 mlx5_del_flow_rules(fr: pre_ct->flow_rule);
1634err_flow_rule:
1635 mlx5_modify_header_dealloc(dev, modify_hdr: pre_ct->modify_hdr);
1636err_mapping:
1637 mlx5e_mod_hdr_dealloc(mod_hdr_acts: &pre_mod_acts);
1638 kvfree(addr: spec);
1639 return err;
1640}
1641
1642static void
1643tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1644 struct mlx5_tc_ct_pre *pre_ct)
1645{
1646 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1647 struct mlx5_core_dev *dev = ct_priv->dev;
1648
1649 mlx5_del_flow_rules(fr: pre_ct->flow_rule);
1650 mlx5_del_flow_rules(fr: pre_ct->miss_rule);
1651 mlx5_modify_header_dealloc(dev, modify_hdr: pre_ct->modify_hdr);
1652}
1653
1654static int
1655mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1656 struct mlx5_tc_ct_pre *pre_ct,
1657 bool nat)
1658{
1659 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1660 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1661 struct mlx5_core_dev *dev = ct_priv->dev;
1662 struct mlx5_flow_table_attr ft_attr = {};
1663 struct mlx5_flow_namespace *ns;
1664 struct mlx5_flow_table *ft;
1665 struct mlx5_flow_group *g;
1666 u32 metadata_reg_c_2_mask;
1667 u32 *flow_group_in;
1668 void *misc;
1669 int err;
1670
1671 ns = mlx5_get_flow_namespace(dev, type: ct_priv->ns_type);
1672 if (!ns) {
1673 err = -EOPNOTSUPP;
1674 ct_dbg("Failed to get flow namespace");
1675 return err;
1676 }
1677
1678 flow_group_in = kvzalloc(size: inlen, GFP_KERNEL);
1679 if (!flow_group_in)
1680 return -ENOMEM;
1681
1682 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1683 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
1684 FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1685 ft_attr.max_fte = 2;
1686 ft_attr.level = 1;
1687 ft = mlx5_create_flow_table(ns, ft_attr: &ft_attr);
1688 if (IS_ERR(ptr: ft)) {
1689 err = PTR_ERR(ptr: ft);
1690 ct_dbg("Failed to create pre ct table");
1691 goto out_free;
1692 }
1693 pre_ct->ft = ft;
1694
1695 /* create flow group */
1696 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1697 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1698 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1699 MLX5_MATCH_MISC_PARAMETERS_2);
1700
1701 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1702 match_criteria.misc_parameters_2);
1703
1704 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1705 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1706 if (nat)
1707 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1708
1709 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1710 metadata_reg_c_2_mask);
1711
1712 g = mlx5_create_flow_group(ft, in: flow_group_in);
1713 if (IS_ERR(ptr: g)) {
1714 err = PTR_ERR(ptr: g);
1715 ct_dbg("Failed to create pre ct group");
1716 goto err_flow_grp;
1717 }
1718 pre_ct->flow_grp = g;
1719
1720 /* create miss group */
1721 memset(flow_group_in, 0, inlen);
1722 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1723 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1724 g = mlx5_create_flow_group(ft, in: flow_group_in);
1725 if (IS_ERR(ptr: g)) {
1726 err = PTR_ERR(ptr: g);
1727 ct_dbg("Failed to create pre ct miss group");
1728 goto err_miss_grp;
1729 }
1730 pre_ct->miss_grp = g;
1731
1732 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1733 if (err)
1734 goto err_add_rules;
1735
1736 kvfree(addr: flow_group_in);
1737 return 0;
1738
1739err_add_rules:
1740 mlx5_destroy_flow_group(fg: pre_ct->miss_grp);
1741err_miss_grp:
1742 mlx5_destroy_flow_group(fg: pre_ct->flow_grp);
1743err_flow_grp:
1744 mlx5_destroy_flow_table(ft);
1745out_free:
1746 kvfree(addr: flow_group_in);
1747 return err;
1748}
1749
1750static void
1751mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1752 struct mlx5_tc_ct_pre *pre_ct)
1753{
1754 tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1755 mlx5_destroy_flow_group(fg: pre_ct->miss_grp);
1756 mlx5_destroy_flow_group(fg: pre_ct->flow_grp);
1757 mlx5_destroy_flow_table(ft: pre_ct->ft);
1758}
1759
1760static int
1761mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1762{
1763 int err;
1764
1765 err = mlx5_tc_ct_alloc_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct, nat: false);
1766 if (err)
1767 return err;
1768
1769 err = mlx5_tc_ct_alloc_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct_nat, nat: true);
1770 if (err)
1771 goto err_pre_ct_nat;
1772
1773 return 0;
1774
1775err_pre_ct_nat:
1776 mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct);
1777 return err;
1778}
1779
1780static void
1781mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1782{
1783 mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct_nat);
1784 mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct);
1785}
1786
1787/* To avoid false lock dependency warning set the ct_entries_ht lock
1788 * class different than the lock class of the ht being used when deleting
1789 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1790 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1791 * it's different than the ht->mutex here.
1792 */
1793static struct lock_class_key ct_entries_ht_lock_key;
1794
1795static struct mlx5_ct_ft *
1796mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1797 struct nf_flowtable *nf_ft)
1798{
1799 struct mlx5_ct_ft *ft;
1800 int err;
1801
1802 ft = rhashtable_lookup_fast(ht: &ct_priv->zone_ht, key: &zone, params: zone_params);
1803 if (ft) {
1804 refcount_inc(r: &ft->refcount);
1805 return ft;
1806 }
1807
1808 ft = kzalloc(size: sizeof(*ft), GFP_KERNEL);
1809 if (!ft)
1810 return ERR_PTR(error: -ENOMEM);
1811
1812 err = mapping_add(ctx: ct_priv->zone_mapping, data: &zone, id: &ft->zone_restore_id);
1813 if (err)
1814 goto err_mapping;
1815
1816 ft->zone = zone;
1817 ft->nf_ft = nf_ft;
1818 ft->ct_priv = ct_priv;
1819 refcount_set(r: &ft->refcount, n: 1);
1820
1821 err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1822 if (err)
1823 goto err_alloc_pre_ct;
1824
1825 err = rhashtable_init(ht: &ft->ct_entries_ht, params: &cts_ht_params);
1826 if (err)
1827 goto err_init;
1828
1829 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1830
1831 err = rhashtable_insert_fast(ht: &ct_priv->zone_ht, obj: &ft->node,
1832 params: zone_params);
1833 if (err)
1834 goto err_insert;
1835
1836 err = nf_flow_table_offload_add_cb(flow_table: ft->nf_ft,
1837 cb: mlx5_tc_ct_block_flow_offload, cb_priv: ft);
1838 if (err)
1839 goto err_add_cb;
1840
1841 return ft;
1842
1843err_add_cb:
1844 rhashtable_remove_fast(ht: &ct_priv->zone_ht, obj: &ft->node, params: zone_params);
1845err_insert:
1846 rhashtable_destroy(ht: &ft->ct_entries_ht);
1847err_init:
1848 mlx5_tc_ct_free_pre_ct_tables(ft);
1849err_alloc_pre_ct:
1850 mapping_remove(ctx: ct_priv->zone_mapping, id: ft->zone_restore_id);
1851err_mapping:
1852 kfree(objp: ft);
1853 return ERR_PTR(error: err);
1854}
1855
1856static void
1857mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1858{
1859 struct mlx5_ct_entry *entry = ptr;
1860
1861 mlx5_tc_ct_entry_put(entry);
1862}
1863
1864static void
1865mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1866{
1867 if (!refcount_dec_and_test(r: &ft->refcount))
1868 return;
1869
1870 flush_workqueue(ct_priv->wq);
1871 nf_flow_table_offload_del_cb(flow_table: ft->nf_ft,
1872 cb: mlx5_tc_ct_block_flow_offload, cb_priv: ft);
1873 rhashtable_remove_fast(ht: &ct_priv->zone_ht, obj: &ft->node, params: zone_params);
1874 rhashtable_free_and_destroy(ht: &ft->ct_entries_ht,
1875 free_fn: mlx5_tc_ct_flush_ft_entry,
1876 arg: ct_priv);
1877 mlx5_tc_ct_free_pre_ct_tables(ft);
1878 mapping_remove(ctx: ct_priv->zone_mapping, id: ft->zone_restore_id);
1879 kfree(objp: ft);
1880}
1881
1882/* We translate the tc filter with CT action to the following HW model:
1883 *
1884 * +-----------------------+
1885 * + rule (either original +
1886 * + or post_act rule) +
1887 * +-----------------------+
1888 * | set act_miss_cookie mapping
1889 * | set fte_id
1890 * | set tunnel_id
1891 * | rest of actions before the CT action (for this orig/post_act rule)
1892 * |
1893 * +-------------+
1894 * | Chain 0 |
1895 * | optimization|
1896 * | v
1897 * | +---------------------+
1898 * | + pre_ct/pre_ct_nat + if matches +----------------------+
1899 * | + zone+nat match +---------------->+ post_act (see below) +
1900 * | +---------------------+ set zone +----------------------+
1901 * | |
1902 * +-------------+ set zone
1903 * |
1904 * v
1905 * +--------------------+
1906 * + CT (nat or no nat) +
1907 * + tuple + zone match +
1908 * +--------------------+
1909 * | set mark
1910 * | set labels_id
1911 * | set established
1912 * | set zone_restore
1913 * | do nat (if needed)
1914 * v
1915 * +--------------+
1916 * + post_act + rest of parsed filter's actions
1917 * + fte_id match +------------------------>
1918 * +--------------+
1919 *
1920 */
1921static int
1922__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1923 struct mlx5_flow_attr *attr)
1924{
1925 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1926 struct mlx5e_priv *priv = netdev_priv(dev: ct_priv->netdev);
1927 int act_miss_mapping = 0, err;
1928 struct mlx5_ct_ft *ft;
1929 u16 zone;
1930
1931 /* Register for CT established events */
1932 ft = mlx5_tc_ct_add_ft_cb(ct_priv, zone: attr->ct_attr.zone,
1933 nf_ft: attr->ct_attr.nf_ft);
1934 if (IS_ERR(ptr: ft)) {
1935 err = PTR_ERR(ptr: ft);
1936 ct_dbg("Failed to register to ft callback");
1937 goto err_ft;
1938 }
1939 attr->ct_attr.ft = ft;
1940
1941 err = mlx5e_tc_action_miss_mapping_get(priv: ct_priv->priv, attr, act_miss_cookie: attr->ct_attr.act_miss_cookie,
1942 act_miss_mapping: &act_miss_mapping);
1943 if (err) {
1944 ct_dbg("Failed to get register mapping for act miss");
1945 goto err_get_act_miss;
1946 }
1947
1948 err = mlx5e_tc_match_to_reg_set(mdev: priv->mdev, mod_hdr_acts: &attr->parse_attr->mod_hdr_acts,
1949 ns: ct_priv->ns_type, type: MAPPED_OBJ_TO_REG, data: act_miss_mapping);
1950 if (err) {
1951 ct_dbg("Failed to set act miss register mapping");
1952 goto err_mapping;
1953 }
1954
1955 /* Chain 0 sets the zone and jumps to ct table
1956 * Other chains jump to pre_ct table to align with act_ct cached logic
1957 */
1958 if (!attr->chain) {
1959 zone = ft->zone & MLX5_CT_ZONE_MASK;
1960 err = mlx5e_tc_match_to_reg_set(mdev: priv->mdev, mod_hdr_acts: &attr->parse_attr->mod_hdr_acts,
1961 ns: ct_priv->ns_type, type: ZONE_TO_REG, data: zone);
1962 if (err) {
1963 ct_dbg("Failed to set zone register mapping");
1964 goto err_mapping;
1965 }
1966
1967 attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1968 } else {
1969 attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1970 }
1971
1972 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1973 attr->ct_attr.act_miss_mapping = act_miss_mapping;
1974
1975 return 0;
1976
1977err_mapping:
1978 mlx5e_tc_action_miss_mapping_put(priv: ct_priv->priv, attr, act_miss_mapping);
1979err_get_act_miss:
1980 mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1981err_ft:
1982 netdev_warn(dev: priv->netdev, format: "Failed to offload ct flow, err %d\n", err);
1983 return err;
1984}
1985
1986int
1987mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
1988{
1989 int err;
1990
1991 if (!priv)
1992 return -EOPNOTSUPP;
1993
1994 if (attr->ct_attr.offloaded)
1995 return 0;
1996
1997 if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
1998 err = mlx5_tc_ct_entry_set_registers(ct_priv: priv, mod_acts: &attr->parse_attr->mod_hdr_acts,
1999 ct_state: 0, mark: 0, labels_id: 0, zone_restore_id: 0);
2000 if (err)
2001 return err;
2002
2003 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2004 }
2005
2006 if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
2007 attr->ct_attr.offloaded = true;
2008 return 0;
2009 }
2010
2011 mutex_lock(&priv->control_lock);
2012 err = __mlx5_tc_ct_flow_offload(ct_priv: priv, attr);
2013 if (!err)
2014 attr->ct_attr.offloaded = true;
2015 mutex_unlock(lock: &priv->control_lock);
2016
2017 return err;
2018}
2019
2020static void
2021__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
2022 struct mlx5_flow_attr *attr)
2023{
2024 mlx5e_tc_action_miss_mapping_put(priv: ct_priv->priv, attr, act_miss_mapping: attr->ct_attr.act_miss_mapping);
2025 mlx5_tc_ct_del_ft_cb(ct_priv, ft: attr->ct_attr.ft);
2026}
2027
2028void
2029mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
2030 struct mlx5_flow_attr *attr)
2031{
2032 if (!attr->ct_attr.offloaded) /* no ct action, return */
2033 return;
2034 if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
2035 return;
2036
2037 mutex_lock(&priv->control_lock);
2038 __mlx5_tc_ct_delete_flow(ct_priv: priv, attr);
2039 mutex_unlock(lock: &priv->control_lock);
2040}
2041
2042static int
2043mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
2044{
2045 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act);
2046 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
2047 int err;
2048
2049 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
2050 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
2051 ct_dbg("Using SMFS ct flow steering provider");
2052 fs_ops = mlx5_ct_fs_smfs_ops_get();
2053 }
2054
2055 ct_priv->fs = kzalloc(size: sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
2056 if (!ct_priv->fs)
2057 return -ENOMEM;
2058
2059 ct_priv->fs->netdev = ct_priv->netdev;
2060 ct_priv->fs->dev = ct_priv->dev;
2061 ct_priv->fs_ops = fs_ops;
2062
2063 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2064 if (err)
2065 goto err_init;
2066
2067 return 0;
2068
2069err_init:
2070 kfree(objp: ct_priv->fs);
2071 return err;
2072}
2073
2074static int
2075mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2076 const char **err_msg)
2077{
2078 if (!mlx5_eswitch_vlan_actions_supported(dev: esw->dev, vlan_depth: 1)) {
2079 /* vlan workaround should be avoided for multi chain rules.
2080 * This is just a sanity check as pop vlan action should
2081 * be supported by any FW that supports ignore_flow_level
2082 */
2083
2084 *err_msg = "firmware vlan actions support is missing";
2085 return -EOPNOTSUPP;
2086 }
2087
2088 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2089 fdb_modify_header_fwd_to_table)) {
2090 /* CT always writes to registers which are mod header actions.
2091 * Therefore, mod header and goto is required
2092 */
2093
2094 *err_msg = "firmware fwd and modify support is missing";
2095 return -EOPNOTSUPP;
2096 }
2097
2098 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2099 *err_msg = "register loopback isn't supported";
2100 return -EOPNOTSUPP;
2101 }
2102
2103 return 0;
2104}
2105
2106static int
2107mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2108 enum mlx5_flow_namespace_type ns_type,
2109 struct mlx5e_post_act *post_act)
2110{
2111 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2112 const char *err_msg = NULL;
2113 int err = 0;
2114
2115 if (IS_ERR_OR_NULL(ptr: post_act)) {
2116 /* Ignore_flow_level support isn't supported by default for VFs and so post_act
2117 * won't be supported. Skip showing error msg.
2118 */
2119 if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
2120 err_msg = "post action is missing";
2121 err = -EOPNOTSUPP;
2122 goto out_err;
2123 }
2124
2125 if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2126 err = mlx5_tc_ct_init_check_esw_support(esw, err_msg: &err_msg);
2127
2128out_err:
2129 if (err && err_msg)
2130 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2131 return err;
2132}
2133
2134static void
2135mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2136{
2137 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2138
2139 ct_dbgfs->root = debugfs_create_dir(name: "ct", parent: mlx5_debugfs_get_dev_root(dev: ct_priv->dev));
2140 debugfs_create_atomic_t(name: "offloaded", mode: 0400, parent: ct_dbgfs->root,
2141 value: &ct_dbgfs->stats.offloaded);
2142 debugfs_create_atomic_t(name: "rx_dropped", mode: 0400, parent: ct_dbgfs->root,
2143 value: &ct_dbgfs->stats.rx_dropped);
2144}
2145
2146static void
2147mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2148{
2149 debugfs_remove_recursive(dentry: ct_priv->debugfs.root);
2150}
2151
2152#define INIT_ERR_PREFIX "tc ct offload init failed"
2153
2154struct mlx5_tc_ct_priv *
2155mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2156 struct mod_hdr_tbl *mod_hdr,
2157 enum mlx5_flow_namespace_type ns_type,
2158 struct mlx5e_post_act *post_act)
2159{
2160 struct mlx5_tc_ct_priv *ct_priv;
2161 struct mlx5_core_dev *dev;
2162 u64 mapping_id;
2163 int err;
2164
2165 dev = priv->mdev;
2166 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2167 if (err)
2168 goto err_support;
2169
2170 ct_priv = kzalloc(size: sizeof(*ct_priv), GFP_KERNEL);
2171 if (!ct_priv)
2172 goto err_alloc;
2173
2174 mapping_id = mlx5_query_nic_system_image_guid(mdev: dev);
2175
2176 ct_priv->zone_mapping = mapping_create_for_id(id: mapping_id, type: MAPPING_TYPE_ZONE,
2177 data_size: sizeof(u16), max_id: 0, delayed_removal: true);
2178 if (IS_ERR(ptr: ct_priv->zone_mapping)) {
2179 err = PTR_ERR(ptr: ct_priv->zone_mapping);
2180 goto err_mapping_zone;
2181 }
2182
2183 ct_priv->labels_mapping = mapping_create_for_id(id: mapping_id, type: MAPPING_TYPE_LABELS,
2184 data_size: sizeof(u32) * 4, max_id: 0, delayed_removal: true);
2185 if (IS_ERR(ptr: ct_priv->labels_mapping)) {
2186 err = PTR_ERR(ptr: ct_priv->labels_mapping);
2187 goto err_mapping_labels;
2188 }
2189
2190 spin_lock_init(&ct_priv->ht_lock);
2191 ct_priv->priv = priv;
2192 ct_priv->ns_type = ns_type;
2193 ct_priv->chains = chains;
2194 ct_priv->netdev = priv->netdev;
2195 ct_priv->dev = priv->mdev;
2196 ct_priv->mod_hdr_tbl = mod_hdr;
2197 ct_priv->ct = mlx5_chains_create_global_table(chains);
2198 if (IS_ERR(ptr: ct_priv->ct)) {
2199 err = PTR_ERR(ptr: ct_priv->ct);
2200 mlx5_core_warn(dev,
2201 "%s, failed to create ct table err: %d\n",
2202 INIT_ERR_PREFIX, err);
2203 goto err_ct_tbl;
2204 }
2205
2206 ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2207 if (IS_ERR(ptr: ct_priv->ct_nat)) {
2208 err = PTR_ERR(ptr: ct_priv->ct_nat);
2209 mlx5_core_warn(dev,
2210 "%s, failed to create ct nat table err: %d\n",
2211 INIT_ERR_PREFIX, err);
2212 goto err_ct_nat_tbl;
2213 }
2214
2215 ct_priv->post_act = post_act;
2216 mutex_init(&ct_priv->control_lock);
2217 if (rhashtable_init(ht: &ct_priv->zone_ht, params: &zone_params))
2218 goto err_ct_zone_ht;
2219 if (rhashtable_init(ht: &ct_priv->ct_tuples_ht, params: &tuples_ht_params))
2220 goto err_ct_tuples_ht;
2221 if (rhashtable_init(ht: &ct_priv->ct_tuples_nat_ht, params: &tuples_nat_ht_params))
2222 goto err_ct_tuples_nat_ht;
2223
2224 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2225 if (!ct_priv->wq) {
2226 err = -ENOMEM;
2227 goto err_wq;
2228 }
2229
2230 err = mlx5_tc_ct_fs_init(ct_priv);
2231 if (err)
2232 goto err_init_fs;
2233
2234 mlx5_ct_tc_create_dbgfs(ct_priv);
2235 return ct_priv;
2236
2237err_init_fs:
2238 destroy_workqueue(wq: ct_priv->wq);
2239err_wq:
2240 rhashtable_destroy(ht: &ct_priv->ct_tuples_nat_ht);
2241err_ct_tuples_nat_ht:
2242 rhashtable_destroy(ht: &ct_priv->ct_tuples_ht);
2243err_ct_tuples_ht:
2244 rhashtable_destroy(ht: &ct_priv->zone_ht);
2245err_ct_zone_ht:
2246 mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct_nat);
2247err_ct_nat_tbl:
2248 mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct);
2249err_ct_tbl:
2250 mapping_destroy(ctx: ct_priv->labels_mapping);
2251err_mapping_labels:
2252 mapping_destroy(ctx: ct_priv->zone_mapping);
2253err_mapping_zone:
2254 kfree(objp: ct_priv);
2255err_alloc:
2256err_support:
2257
2258 return NULL;
2259}
2260
2261void
2262mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2263{
2264 struct mlx5_fs_chains *chains;
2265
2266 if (!ct_priv)
2267 return;
2268
2269 destroy_workqueue(wq: ct_priv->wq);
2270 mlx5_ct_tc_remove_dbgfs(ct_priv);
2271 chains = ct_priv->chains;
2272
2273 ct_priv->fs_ops->destroy(ct_priv->fs);
2274 kfree(objp: ct_priv->fs);
2275
2276 mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct_nat);
2277 mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct);
2278 mapping_destroy(ctx: ct_priv->zone_mapping);
2279 mapping_destroy(ctx: ct_priv->labels_mapping);
2280
2281 rhashtable_destroy(ht: &ct_priv->ct_tuples_ht);
2282 rhashtable_destroy(ht: &ct_priv->ct_tuples_nat_ht);
2283 rhashtable_destroy(ht: &ct_priv->zone_ht);
2284 mutex_destroy(lock: &ct_priv->control_lock);
2285 kfree(objp: ct_priv);
2286}
2287
2288bool
2289mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2290 struct sk_buff *skb, u8 zone_restore_id)
2291{
2292 struct mlx5_ct_tuple tuple = {};
2293 struct mlx5_ct_entry *entry;
2294 u16 zone;
2295
2296 if (!ct_priv || !zone_restore_id)
2297 return true;
2298
2299 if (mapping_find(ctx: ct_priv->zone_mapping, id: zone_restore_id, data: &zone))
2300 goto out_inc_drop;
2301
2302 if (!mlx5_tc_ct_skb_to_tuple(skb, tuple: &tuple, zone))
2303 goto out_inc_drop;
2304
2305 spin_lock(lock: &ct_priv->ht_lock);
2306
2307 entry = mlx5_tc_ct_entry_get(ct_priv, tuple: &tuple);
2308 if (!entry) {
2309 spin_unlock(lock: &ct_priv->ht_lock);
2310 goto out_inc_drop;
2311 }
2312
2313 if (IS_ERR(ptr: entry)) {
2314 spin_unlock(lock: &ct_priv->ht_lock);
2315 goto out_inc_drop;
2316 }
2317 spin_unlock(lock: &ct_priv->ht_lock);
2318
2319 tcf_ct_flow_table_restore_skb(skb, cookie: entry->restore_cookie);
2320 __mlx5_tc_ct_entry_put(entry);
2321
2322 return true;
2323
2324out_inc_drop:
2325 atomic_inc(v: &ct_priv->debugfs.stats.rx_dropped);
2326 return false;
2327}
2328

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c