1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* Copyright (c) 2019 Mellanox Technologies. */ |
3 | |
4 | #include <net/netfilter/nf_conntrack.h> |
5 | #include <net/netfilter/nf_conntrack_core.h> |
6 | #include <net/netfilter/nf_conntrack_zones.h> |
7 | #include <net/netfilter/nf_conntrack_labels.h> |
8 | #include <net/netfilter/nf_conntrack_helper.h> |
9 | #include <net/netfilter/nf_conntrack_acct.h> |
10 | #include <uapi/linux/tc_act/tc_pedit.h> |
11 | #include <net/tc_act/tc_ct.h> |
12 | #include <net/flow_offload.h> |
13 | #include <net/netfilter/nf_flow_table.h> |
14 | #include <linux/workqueue.h> |
15 | #include <linux/refcount.h> |
16 | #include <linux/xarray.h> |
17 | #include <linux/if_macvlan.h> |
18 | #include <linux/debugfs.h> |
19 | |
20 | #include "lib/fs_chains.h" |
21 | #include "en/tc_ct.h" |
22 | #include "en/tc/ct_fs.h" |
23 | #include "en/tc_priv.h" |
24 | #include "en/mod_hdr.h" |
25 | #include "en/mapping.h" |
26 | #include "en/tc/post_act.h" |
27 | #include "en.h" |
28 | #include "en_tc.h" |
29 | #include "en_rep.h" |
30 | #include "fs_core.h" |
31 | |
32 | #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) |
33 | #define MLX5_CT_STATE_TRK_BIT BIT(2) |
34 | #define MLX5_CT_STATE_NAT_BIT BIT(3) |
35 | #define MLX5_CT_STATE_REPLY_BIT BIT(4) |
36 | #define MLX5_CT_STATE_RELATED_BIT BIT(5) |
37 | #define MLX5_CT_STATE_INVALID_BIT BIT(6) |
38 | #define MLX5_CT_STATE_NEW_BIT BIT(7) |
39 | |
40 | #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) |
41 | #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) |
42 | |
43 | /* Statically allocate modify actions for |
44 | * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. |
45 | * This will be increased dynamically if needed (for the ipv6 snat + dnat). |
46 | */ |
47 | #define MLX5_CT_MIN_MOD_ACTS 10 |
48 | |
49 | #define ct_dbg(fmt, args...)\ |
50 | netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) |
51 | |
52 | struct mlx5_tc_ct_debugfs { |
53 | struct { |
54 | atomic_t offloaded; |
55 | atomic_t rx_dropped; |
56 | } stats; |
57 | |
58 | struct dentry *root; |
59 | }; |
60 | |
61 | struct mlx5_tc_ct_priv { |
62 | struct mlx5_core_dev *dev; |
63 | struct mlx5e_priv *priv; |
64 | const struct net_device *netdev; |
65 | struct mod_hdr_tbl *mod_hdr_tbl; |
66 | struct xarray tuple_ids; |
67 | struct rhashtable zone_ht; |
68 | struct rhashtable ct_tuples_ht; |
69 | struct rhashtable ct_tuples_nat_ht; |
70 | struct mlx5_flow_table *ct; |
71 | struct mlx5_flow_table *ct_nat; |
72 | struct mlx5e_post_act *post_act; |
73 | struct mutex control_lock; /* guards parallel adds/dels */ |
74 | struct mapping_ctx *zone_mapping; |
75 | struct mapping_ctx *labels_mapping; |
76 | enum mlx5_flow_namespace_type ns_type; |
77 | struct mlx5_fs_chains *chains; |
78 | struct mlx5_ct_fs *fs; |
79 | struct mlx5_ct_fs_ops *fs_ops; |
80 | spinlock_t ht_lock; /* protects ft entries */ |
81 | struct workqueue_struct *wq; |
82 | |
83 | struct mlx5_tc_ct_debugfs debugfs; |
84 | }; |
85 | |
86 | struct mlx5_ct_zone_rule { |
87 | struct mlx5_ct_fs_rule *rule; |
88 | struct mlx5e_mod_hdr_handle *mh; |
89 | struct mlx5_flow_attr *attr; |
90 | bool nat; |
91 | }; |
92 | |
93 | struct mlx5_tc_ct_pre { |
94 | struct mlx5_flow_table *ft; |
95 | struct mlx5_flow_group *flow_grp; |
96 | struct mlx5_flow_group *miss_grp; |
97 | struct mlx5_flow_handle *flow_rule; |
98 | struct mlx5_flow_handle *miss_rule; |
99 | struct mlx5_modify_hdr *modify_hdr; |
100 | }; |
101 | |
102 | struct mlx5_ct_ft { |
103 | struct rhash_head node; |
104 | u16 zone; |
105 | u32 zone_restore_id; |
106 | refcount_t refcount; |
107 | struct nf_flowtable *nf_ft; |
108 | struct mlx5_tc_ct_priv *ct_priv; |
109 | struct rhashtable ct_entries_ht; |
110 | struct mlx5_tc_ct_pre pre_ct; |
111 | struct mlx5_tc_ct_pre pre_ct_nat; |
112 | }; |
113 | |
114 | struct mlx5_ct_tuple { |
115 | u16 addr_type; |
116 | __be16 n_proto; |
117 | u8 ip_proto; |
118 | struct { |
119 | union { |
120 | __be32 src_v4; |
121 | struct in6_addr src_v6; |
122 | }; |
123 | union { |
124 | __be32 dst_v4; |
125 | struct in6_addr dst_v6; |
126 | }; |
127 | } ip; |
128 | struct { |
129 | __be16 src; |
130 | __be16 dst; |
131 | } port; |
132 | |
133 | u16 zone; |
134 | }; |
135 | |
136 | struct mlx5_ct_counter { |
137 | struct mlx5_fc *counter; |
138 | refcount_t refcount; |
139 | bool is_shared; |
140 | }; |
141 | |
142 | enum { |
143 | MLX5_CT_ENTRY_FLAG_VALID, |
144 | }; |
145 | |
146 | struct mlx5_ct_entry { |
147 | struct rhash_head node; |
148 | struct rhash_head tuple_node; |
149 | struct rhash_head tuple_nat_node; |
150 | struct mlx5_ct_counter *counter; |
151 | unsigned long cookie; |
152 | unsigned long restore_cookie; |
153 | struct mlx5_ct_tuple tuple; |
154 | struct mlx5_ct_tuple tuple_nat; |
155 | struct mlx5_ct_zone_rule zone_rules[2]; |
156 | |
157 | struct mlx5_tc_ct_priv *ct_priv; |
158 | struct work_struct work; |
159 | |
160 | refcount_t refcnt; |
161 | unsigned long flags; |
162 | }; |
163 | |
164 | static void |
165 | mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, |
166 | struct mlx5_flow_attr *attr, |
167 | struct mlx5e_mod_hdr_handle *mh); |
168 | |
169 | static const struct rhashtable_params cts_ht_params = { |
170 | .head_offset = offsetof(struct mlx5_ct_entry, node), |
171 | .key_offset = offsetof(struct mlx5_ct_entry, cookie), |
172 | .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), |
173 | .automatic_shrinking = true, |
174 | .min_size = 16 * 1024, |
175 | }; |
176 | |
177 | static const struct rhashtable_params zone_params = { |
178 | .head_offset = offsetof(struct mlx5_ct_ft, node), |
179 | .key_offset = offsetof(struct mlx5_ct_ft, zone), |
180 | .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), |
181 | .automatic_shrinking = true, |
182 | }; |
183 | |
184 | static const struct rhashtable_params tuples_ht_params = { |
185 | .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), |
186 | .key_offset = offsetof(struct mlx5_ct_entry, tuple), |
187 | .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), |
188 | .automatic_shrinking = true, |
189 | .min_size = 16 * 1024, |
190 | }; |
191 | |
192 | static const struct rhashtable_params tuples_nat_ht_params = { |
193 | .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), |
194 | .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), |
195 | .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), |
196 | .automatic_shrinking = true, |
197 | .min_size = 16 * 1024, |
198 | }; |
199 | |
200 | static bool |
201 | mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) |
202 | { |
203 | return !!(entry->tuple_nat_node.next); |
204 | } |
205 | |
206 | static int |
207 | mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, |
208 | u32 *labels, u32 *id) |
209 | { |
210 | if (!memchr_inv(p: labels, c: 0, size: sizeof(u32) * 4)) { |
211 | *id = 0; |
212 | return 0; |
213 | } |
214 | |
215 | if (mapping_add(ctx: ct_priv->labels_mapping, data: labels, id)) |
216 | return -EOPNOTSUPP; |
217 | |
218 | return 0; |
219 | } |
220 | |
221 | static void |
222 | mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) |
223 | { |
224 | if (id) |
225 | mapping_remove(ctx: ct_priv->labels_mapping, id); |
226 | } |
227 | |
228 | static int |
229 | mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) |
230 | { |
231 | struct flow_match_control control; |
232 | struct flow_match_basic basic; |
233 | |
234 | flow_rule_match_basic(rule, out: &basic); |
235 | flow_rule_match_control(rule, out: &control); |
236 | |
237 | tuple->n_proto = basic.key->n_proto; |
238 | tuple->ip_proto = basic.key->ip_proto; |
239 | tuple->addr_type = control.key->addr_type; |
240 | |
241 | if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
242 | struct flow_match_ipv4_addrs match; |
243 | |
244 | flow_rule_match_ipv4_addrs(rule, out: &match); |
245 | tuple->ip.src_v4 = match.key->src; |
246 | tuple->ip.dst_v4 = match.key->dst; |
247 | } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
248 | struct flow_match_ipv6_addrs match; |
249 | |
250 | flow_rule_match_ipv6_addrs(rule, out: &match); |
251 | tuple->ip.src_v6 = match.key->src; |
252 | tuple->ip.dst_v6 = match.key->dst; |
253 | } else { |
254 | return -EOPNOTSUPP; |
255 | } |
256 | |
257 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_PORTS)) { |
258 | struct flow_match_ports match; |
259 | |
260 | flow_rule_match_ports(rule, out: &match); |
261 | switch (tuple->ip_proto) { |
262 | case IPPROTO_TCP: |
263 | case IPPROTO_UDP: |
264 | tuple->port.src = match.key->src; |
265 | tuple->port.dst = match.key->dst; |
266 | break; |
267 | default: |
268 | return -EOPNOTSUPP; |
269 | } |
270 | } else { |
271 | if (tuple->ip_proto != IPPROTO_GRE) |
272 | return -EOPNOTSUPP; |
273 | } |
274 | |
275 | return 0; |
276 | } |
277 | |
278 | static int |
279 | mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, |
280 | struct flow_rule *rule) |
281 | { |
282 | struct flow_action *flow_action = &rule->action; |
283 | struct flow_action_entry *act; |
284 | u32 offset, val, ip6_offset; |
285 | int i; |
286 | |
287 | flow_action_for_each(i, act, flow_action) { |
288 | if (act->id != FLOW_ACTION_MANGLE) |
289 | continue; |
290 | |
291 | offset = act->mangle.offset; |
292 | val = act->mangle.val; |
293 | switch (act->mangle.htype) { |
294 | case FLOW_ACT_MANGLE_HDR_TYPE_IP4: |
295 | if (offset == offsetof(struct iphdr, saddr)) |
296 | tuple->ip.src_v4 = cpu_to_be32(val); |
297 | else if (offset == offsetof(struct iphdr, daddr)) |
298 | tuple->ip.dst_v4 = cpu_to_be32(val); |
299 | else |
300 | return -EOPNOTSUPP; |
301 | break; |
302 | |
303 | case FLOW_ACT_MANGLE_HDR_TYPE_IP6: |
304 | ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); |
305 | ip6_offset /= 4; |
306 | if (ip6_offset < 4) |
307 | tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); |
308 | else if (ip6_offset < 8) |
309 | tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); |
310 | else |
311 | return -EOPNOTSUPP; |
312 | break; |
313 | |
314 | case FLOW_ACT_MANGLE_HDR_TYPE_TCP: |
315 | if (offset == offsetof(struct tcphdr, source)) |
316 | tuple->port.src = cpu_to_be16(val); |
317 | else if (offset == offsetof(struct tcphdr, dest)) |
318 | tuple->port.dst = cpu_to_be16(val); |
319 | else |
320 | return -EOPNOTSUPP; |
321 | break; |
322 | |
323 | case FLOW_ACT_MANGLE_HDR_TYPE_UDP: |
324 | if (offset == offsetof(struct udphdr, source)) |
325 | tuple->port.src = cpu_to_be16(val); |
326 | else if (offset == offsetof(struct udphdr, dest)) |
327 | tuple->port.dst = cpu_to_be16(val); |
328 | else |
329 | return -EOPNOTSUPP; |
330 | break; |
331 | |
332 | default: |
333 | return -EOPNOTSUPP; |
334 | } |
335 | } |
336 | |
337 | return 0; |
338 | } |
339 | |
340 | static int |
341 | mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, |
342 | struct net_device *ndev) |
343 | { |
344 | struct mlx5e_priv *other_priv = netdev_priv(dev: ndev); |
345 | struct mlx5_core_dev *mdev = ct_priv->dev; |
346 | bool vf_rep, uplink_rep; |
347 | |
348 | vf_rep = mlx5e_eswitch_vf_rep(netdev: ndev) && mlx5_same_hw_devs(dev: mdev, peer_dev: other_priv->mdev); |
349 | uplink_rep = mlx5e_eswitch_uplink_rep(netdev: ndev) && mlx5_same_hw_devs(dev: mdev, peer_dev: other_priv->mdev); |
350 | |
351 | if (vf_rep) |
352 | return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; |
353 | if (uplink_rep) |
354 | return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; |
355 | if (is_vlan_dev(dev: ndev)) |
356 | return mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: vlan_dev_real_dev(dev: ndev)); |
357 | if (netif_is_macvlan(dev: ndev)) |
358 | return mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: macvlan_dev_real_dev(dev: ndev)); |
359 | if (mlx5e_get_tc_tun(tunnel_dev: ndev) || netif_is_lag_master(dev: ndev)) |
360 | return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; |
361 | |
362 | return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; |
363 | } |
364 | |
365 | static int |
366 | mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, |
367 | struct mlx5_flow_spec *spec, |
368 | struct flow_rule *rule) |
369 | { |
370 | void * = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, |
371 | outer_headers); |
372 | void * = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
373 | outer_headers); |
374 | u16 addr_type = 0; |
375 | u8 ip_proto = 0; |
376 | |
377 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_BASIC)) { |
378 | struct flow_match_basic match; |
379 | |
380 | flow_rule_match_basic(rule, out: &match); |
381 | |
382 | mlx5e_tc_set_ethertype(mdev: ct_priv->dev, match: &match, outer: true, headers_c, headers_v); |
383 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, |
384 | match.mask->ip_proto); |
385 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, |
386 | match.key->ip_proto); |
387 | |
388 | ip_proto = match.key->ip_proto; |
389 | } |
390 | |
391 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_CONTROL)) { |
392 | struct flow_match_control match; |
393 | |
394 | flow_rule_match_control(rule, out: &match); |
395 | addr_type = match.key->addr_type; |
396 | } |
397 | |
398 | if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
399 | struct flow_match_ipv4_addrs match; |
400 | |
401 | flow_rule_match_ipv4_addrs(rule, out: &match); |
402 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
403 | src_ipv4_src_ipv6.ipv4_layout.ipv4), |
404 | &match.mask->src, sizeof(match.mask->src)); |
405 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
406 | src_ipv4_src_ipv6.ipv4_layout.ipv4), |
407 | &match.key->src, sizeof(match.key->src)); |
408 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
409 | dst_ipv4_dst_ipv6.ipv4_layout.ipv4), |
410 | &match.mask->dst, sizeof(match.mask->dst)); |
411 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
412 | dst_ipv4_dst_ipv6.ipv4_layout.ipv4), |
413 | &match.key->dst, sizeof(match.key->dst)); |
414 | } |
415 | |
416 | if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
417 | struct flow_match_ipv6_addrs match; |
418 | |
419 | flow_rule_match_ipv6_addrs(rule, out: &match); |
420 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
421 | src_ipv4_src_ipv6.ipv6_layout.ipv6), |
422 | &match.mask->src, sizeof(match.mask->src)); |
423 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
424 | src_ipv4_src_ipv6.ipv6_layout.ipv6), |
425 | &match.key->src, sizeof(match.key->src)); |
426 | |
427 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, |
428 | dst_ipv4_dst_ipv6.ipv6_layout.ipv6), |
429 | &match.mask->dst, sizeof(match.mask->dst)); |
430 | memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, |
431 | dst_ipv4_dst_ipv6.ipv6_layout.ipv6), |
432 | &match.key->dst, sizeof(match.key->dst)); |
433 | } |
434 | |
435 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_PORTS)) { |
436 | struct flow_match_ports match; |
437 | |
438 | flow_rule_match_ports(rule, out: &match); |
439 | switch (ip_proto) { |
440 | case IPPROTO_TCP: |
441 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
442 | tcp_sport, ntohs(match.mask->src)); |
443 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
444 | tcp_sport, ntohs(match.key->src)); |
445 | |
446 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
447 | tcp_dport, ntohs(match.mask->dst)); |
448 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
449 | tcp_dport, ntohs(match.key->dst)); |
450 | break; |
451 | |
452 | case IPPROTO_UDP: |
453 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
454 | udp_sport, ntohs(match.mask->src)); |
455 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
456 | udp_sport, ntohs(match.key->src)); |
457 | |
458 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, |
459 | udp_dport, ntohs(match.mask->dst)); |
460 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, |
461 | udp_dport, ntohs(match.key->dst)); |
462 | break; |
463 | default: |
464 | break; |
465 | } |
466 | } |
467 | |
468 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_TCP)) { |
469 | struct flow_match_tcp match; |
470 | |
471 | flow_rule_match_tcp(rule, out: &match); |
472 | MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, |
473 | ntohs(match.mask->flags)); |
474 | MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, |
475 | ntohs(match.key->flags)); |
476 | } |
477 | |
478 | if (flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_META)) { |
479 | struct flow_match_meta match; |
480 | |
481 | flow_rule_match_meta(rule, out: &match); |
482 | |
483 | if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { |
484 | struct net_device *dev; |
485 | |
486 | dev = dev_get_by_index(net: &init_net, ifindex: match.key->ingress_ifindex); |
487 | if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) |
488 | spec->flow_context.flow_source = |
489 | mlx5_tc_ct_get_flow_source_match(ct_priv, ndev: dev); |
490 | |
491 | dev_put(dev); |
492 | } |
493 | } |
494 | |
495 | return 0; |
496 | } |
497 | |
498 | static void |
499 | mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) |
500 | { |
501 | if (entry->counter->is_shared && |
502 | !refcount_dec_and_test(r: &entry->counter->refcount)) |
503 | return; |
504 | |
505 | mlx5_fc_destroy(dev: ct_priv->dev, counter: entry->counter->counter); |
506 | kfree(objp: entry->counter); |
507 | } |
508 | |
509 | static void |
510 | mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, |
511 | struct mlx5_ct_entry *entry, |
512 | bool nat) |
513 | { |
514 | struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; |
515 | struct mlx5_flow_attr *attr = zone_rule->attr; |
516 | |
517 | ct_dbg("Deleting ct entry rule in zone %d" , entry->tuple.zone); |
518 | |
519 | ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); |
520 | mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh: zone_rule->mh); |
521 | mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id); |
522 | kfree(objp: attr); |
523 | } |
524 | |
525 | static void |
526 | mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, |
527 | struct mlx5_ct_entry *entry) |
528 | { |
529 | mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: true); |
530 | mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false); |
531 | |
532 | atomic_dec(v: &ct_priv->debugfs.stats.offloaded); |
533 | } |
534 | |
535 | static struct flow_action_entry * |
536 | mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) |
537 | { |
538 | struct flow_action *flow_action = &flow_rule->action; |
539 | struct flow_action_entry *act; |
540 | int i; |
541 | |
542 | flow_action_for_each(i, act, flow_action) { |
543 | if (act->id == FLOW_ACTION_CT_METADATA) |
544 | return act; |
545 | } |
546 | |
547 | return NULL; |
548 | } |
549 | |
550 | static int |
551 | mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, |
552 | struct mlx5e_tc_mod_hdr_acts *mod_acts, |
553 | u8 ct_state, |
554 | u32 mark, |
555 | u32 labels_id, |
556 | u8 zone_restore_id) |
557 | { |
558 | enum mlx5_flow_namespace_type ns = ct_priv->ns_type; |
559 | struct mlx5_core_dev *dev = ct_priv->dev; |
560 | int err; |
561 | |
562 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns, |
563 | type: CTSTATE_TO_REG, data: ct_state); |
564 | if (err) |
565 | return err; |
566 | |
567 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns, |
568 | type: MARK_TO_REG, data: mark); |
569 | if (err) |
570 | return err; |
571 | |
572 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns, |
573 | type: LABELS_TO_REG, data: labels_id); |
574 | if (err) |
575 | return err; |
576 | |
577 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns, |
578 | type: ZONE_RESTORE_TO_REG, data: zone_restore_id); |
579 | if (err) |
580 | return err; |
581 | |
582 | /* Make another copy of zone id in reg_b for |
583 | * NIC rx flows since we don't copy reg_c1 to |
584 | * reg_b upon miss. |
585 | */ |
586 | if (ns != MLX5_FLOW_NAMESPACE_FDB) { |
587 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: mod_acts, ns, |
588 | type: NIC_ZONE_RESTORE_TO_REG, data: zone_restore_id); |
589 | if (err) |
590 | return err; |
591 | } |
592 | return 0; |
593 | } |
594 | |
595 | static int |
596 | mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, |
597 | char *modact) |
598 | { |
599 | u32 offset = act->mangle.offset, field; |
600 | |
601 | switch (act->mangle.htype) { |
602 | case FLOW_ACT_MANGLE_HDR_TYPE_IP4: |
603 | MLX5_SET(set_action_in, modact, length, 0); |
604 | if (offset == offsetof(struct iphdr, saddr)) |
605 | field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; |
606 | else if (offset == offsetof(struct iphdr, daddr)) |
607 | field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; |
608 | else |
609 | return -EOPNOTSUPP; |
610 | break; |
611 | |
612 | case FLOW_ACT_MANGLE_HDR_TYPE_IP6: |
613 | MLX5_SET(set_action_in, modact, length, 0); |
614 | if (offset == offsetof(struct ipv6hdr, saddr) + 12) |
615 | field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; |
616 | else if (offset == offsetof(struct ipv6hdr, saddr) + 8) |
617 | field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; |
618 | else if (offset == offsetof(struct ipv6hdr, saddr) + 4) |
619 | field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; |
620 | else if (offset == offsetof(struct ipv6hdr, saddr)) |
621 | field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; |
622 | else if (offset == offsetof(struct ipv6hdr, daddr) + 12) |
623 | field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; |
624 | else if (offset == offsetof(struct ipv6hdr, daddr) + 8) |
625 | field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; |
626 | else if (offset == offsetof(struct ipv6hdr, daddr) + 4) |
627 | field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; |
628 | else if (offset == offsetof(struct ipv6hdr, daddr)) |
629 | field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; |
630 | else |
631 | return -EOPNOTSUPP; |
632 | break; |
633 | |
634 | case FLOW_ACT_MANGLE_HDR_TYPE_TCP: |
635 | MLX5_SET(set_action_in, modact, length, 16); |
636 | if (offset == offsetof(struct tcphdr, source)) |
637 | field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; |
638 | else if (offset == offsetof(struct tcphdr, dest)) |
639 | field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; |
640 | else |
641 | return -EOPNOTSUPP; |
642 | break; |
643 | |
644 | case FLOW_ACT_MANGLE_HDR_TYPE_UDP: |
645 | MLX5_SET(set_action_in, modact, length, 16); |
646 | if (offset == offsetof(struct udphdr, source)) |
647 | field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; |
648 | else if (offset == offsetof(struct udphdr, dest)) |
649 | field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; |
650 | else |
651 | return -EOPNOTSUPP; |
652 | break; |
653 | |
654 | default: |
655 | return -EOPNOTSUPP; |
656 | } |
657 | |
658 | MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); |
659 | MLX5_SET(set_action_in, modact, offset, 0); |
660 | MLX5_SET(set_action_in, modact, field, field); |
661 | MLX5_SET(set_action_in, modact, data, act->mangle.val); |
662 | |
663 | return 0; |
664 | } |
665 | |
666 | static int |
667 | mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, |
668 | struct flow_rule *flow_rule, |
669 | struct mlx5e_tc_mod_hdr_acts *mod_acts) |
670 | { |
671 | struct flow_action *flow_action = &flow_rule->action; |
672 | struct mlx5_core_dev *mdev = ct_priv->dev; |
673 | struct flow_action_entry *act; |
674 | char *modact; |
675 | int err, i; |
676 | |
677 | flow_action_for_each(i, act, flow_action) { |
678 | switch (act->id) { |
679 | case FLOW_ACTION_MANGLE: { |
680 | modact = mlx5e_mod_hdr_alloc(mdev, namespace: ct_priv->ns_type, mod_hdr_acts: mod_acts); |
681 | if (IS_ERR(ptr: modact)) |
682 | return PTR_ERR(ptr: modact); |
683 | |
684 | err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); |
685 | if (err) |
686 | return err; |
687 | |
688 | mod_acts->num_actions++; |
689 | } |
690 | break; |
691 | |
692 | case FLOW_ACTION_CT_METADATA: |
693 | /* Handled earlier */ |
694 | continue; |
695 | default: |
696 | return -EOPNOTSUPP; |
697 | } |
698 | } |
699 | |
700 | return 0; |
701 | } |
702 | |
703 | static int |
704 | mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, |
705 | struct mlx5_flow_attr *attr, |
706 | struct flow_rule *flow_rule, |
707 | struct mlx5e_mod_hdr_handle **mh, |
708 | u8 zone_restore_id, bool nat_table, bool has_nat) |
709 | { |
710 | DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); |
711 | DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); |
712 | struct flow_action_entry *meta; |
713 | enum ip_conntrack_info ctinfo; |
714 | u16 ct_state = 0; |
715 | int err; |
716 | |
717 | meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); |
718 | if (!meta) |
719 | return -EOPNOTSUPP; |
720 | ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK; |
721 | |
722 | err = mlx5_get_label_mapping(ct_priv, labels: meta->ct_metadata.labels, |
723 | id: &attr->ct_attr.ct_labels_id); |
724 | if (err) |
725 | return -EOPNOTSUPP; |
726 | if (nat_table) { |
727 | if (has_nat) { |
728 | err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, mod_acts: &mod_acts); |
729 | if (err) |
730 | goto err_mapping; |
731 | } |
732 | |
733 | ct_state |= MLX5_CT_STATE_NAT_BIT; |
734 | } |
735 | |
736 | ct_state |= MLX5_CT_STATE_TRK_BIT; |
737 | ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT; |
738 | ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; |
739 | err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts: &mod_acts, |
740 | ct_state, |
741 | mark: meta->ct_metadata.mark, |
742 | labels_id: attr->ct_attr.ct_labels_id, |
743 | zone_restore_id); |
744 | if (err) |
745 | goto err_mapping; |
746 | |
747 | if (nat_table && has_nat) { |
748 | attr->modify_hdr = mlx5_modify_header_alloc(dev: ct_priv->dev, ns_type: ct_priv->ns_type, |
749 | num_actions: mod_acts.num_actions, |
750 | modify_actions: mod_acts.actions); |
751 | if (IS_ERR(ptr: attr->modify_hdr)) { |
752 | err = PTR_ERR(ptr: attr->modify_hdr); |
753 | goto err_mapping; |
754 | } |
755 | |
756 | *mh = NULL; |
757 | } else { |
758 | *mh = mlx5e_mod_hdr_attach(mdev: ct_priv->dev, |
759 | tbl: ct_priv->mod_hdr_tbl, |
760 | namespace: ct_priv->ns_type, |
761 | mod_hdr_acts: &mod_acts); |
762 | if (IS_ERR(ptr: *mh)) { |
763 | err = PTR_ERR(ptr: *mh); |
764 | goto err_mapping; |
765 | } |
766 | attr->modify_hdr = mlx5e_mod_hdr_get(mh: *mh); |
767 | } |
768 | |
769 | mlx5e_mod_hdr_dealloc(mod_hdr_acts: &mod_acts); |
770 | return 0; |
771 | |
772 | err_mapping: |
773 | mlx5e_mod_hdr_dealloc(mod_hdr_acts: &mod_acts); |
774 | mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id); |
775 | return err; |
776 | } |
777 | |
778 | static void |
779 | mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, |
780 | struct mlx5_flow_attr *attr, |
781 | struct mlx5e_mod_hdr_handle *mh) |
782 | { |
783 | if (mh) |
784 | mlx5e_mod_hdr_detach(mdev: ct_priv->dev, tbl: ct_priv->mod_hdr_tbl, mh); |
785 | else |
786 | mlx5_modify_header_dealloc(dev: ct_priv->dev, modify_hdr: attr->modify_hdr); |
787 | } |
788 | |
789 | static int |
790 | mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, |
791 | struct flow_rule *flow_rule, |
792 | struct mlx5_ct_entry *entry, |
793 | bool nat, u8 zone_restore_id) |
794 | { |
795 | struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; |
796 | struct mlx5e_priv *priv = netdev_priv(dev: ct_priv->netdev); |
797 | struct mlx5_flow_spec *spec = NULL; |
798 | struct mlx5_flow_attr *attr; |
799 | int err; |
800 | |
801 | zone_rule->nat = nat; |
802 | |
803 | spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL); |
804 | if (!spec) |
805 | return -ENOMEM; |
806 | |
807 | attr = mlx5_alloc_flow_attr(type: ct_priv->ns_type); |
808 | if (!attr) { |
809 | err = -ENOMEM; |
810 | goto err_attr; |
811 | } |
812 | |
813 | err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, |
814 | mh: &zone_rule->mh, |
815 | zone_restore_id, |
816 | nat_table: nat, |
817 | has_nat: mlx5_tc_ct_entry_has_nat(entry)); |
818 | if (err) { |
819 | ct_dbg("Failed to create ct entry mod hdr" ); |
820 | goto err_mod_hdr; |
821 | } |
822 | |
823 | attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | |
824 | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | |
825 | MLX5_FLOW_CONTEXT_ACTION_COUNT; |
826 | attr->dest_chain = 0; |
827 | attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act); |
828 | attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; |
829 | if (entry->tuple.ip_proto == IPPROTO_TCP || |
830 | entry->tuple.ip_proto == IPPROTO_UDP) |
831 | attr->outer_match_level = MLX5_MATCH_L4; |
832 | else |
833 | attr->outer_match_level = MLX5_MATCH_L3; |
834 | attr->counter = entry->counter->counter; |
835 | attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; |
836 | if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) |
837 | attr->esw_attr->in_mdev = priv->mdev; |
838 | |
839 | mlx5_tc_ct_set_tuple_match(ct_priv, spec, rule: flow_rule); |
840 | mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, data: entry->tuple.zone, MLX5_CT_ZONE_MASK); |
841 | |
842 | zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); |
843 | if (IS_ERR(ptr: zone_rule->rule)) { |
844 | err = PTR_ERR(ptr: zone_rule->rule); |
845 | ct_dbg("Failed to add ct entry rule, nat: %d" , nat); |
846 | goto err_rule; |
847 | } |
848 | |
849 | zone_rule->attr = attr; |
850 | |
851 | kvfree(addr: spec); |
852 | ct_dbg("Offloaded ct entry rule in zone %d" , entry->tuple.zone); |
853 | |
854 | return 0; |
855 | |
856 | err_rule: |
857 | mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh: zone_rule->mh); |
858 | mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id); |
859 | err_mod_hdr: |
860 | kfree(objp: attr); |
861 | err_attr: |
862 | kvfree(addr: spec); |
863 | return err; |
864 | } |
865 | |
866 | static int |
867 | mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv, |
868 | struct flow_rule *flow_rule, |
869 | struct mlx5_ct_entry *entry, |
870 | bool nat, u8 zone_restore_id) |
871 | { |
872 | struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; |
873 | struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr; |
874 | struct mlx5e_mod_hdr_handle *mh; |
875 | struct mlx5_ct_fs_rule *rule; |
876 | struct mlx5_flow_spec *spec; |
877 | int err; |
878 | |
879 | spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL); |
880 | if (!spec) |
881 | return -ENOMEM; |
882 | |
883 | old_attr = mlx5_alloc_flow_attr(type: ct_priv->ns_type); |
884 | if (!old_attr) { |
885 | err = -ENOMEM; |
886 | goto err_attr; |
887 | } |
888 | *old_attr = *attr; |
889 | |
890 | err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, mh: &mh, zone_restore_id, |
891 | nat_table: nat, has_nat: mlx5_tc_ct_entry_has_nat(entry)); |
892 | if (err) { |
893 | ct_dbg("Failed to create ct entry mod hdr" ); |
894 | goto err_mod_hdr; |
895 | } |
896 | |
897 | mlx5_tc_ct_set_tuple_match(ct_priv, spec, rule: flow_rule); |
898 | mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, data: entry->tuple.zone, MLX5_CT_ZONE_MASK); |
899 | |
900 | rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); |
901 | if (IS_ERR(ptr: rule)) { |
902 | err = PTR_ERR(ptr: rule); |
903 | ct_dbg("Failed to add replacement ct entry rule, nat: %d" , nat); |
904 | goto err_rule; |
905 | } |
906 | |
907 | ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); |
908 | zone_rule->rule = rule; |
909 | mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: old_attr, mh: zone_rule->mh); |
910 | zone_rule->mh = mh; |
911 | mlx5_put_label_mapping(ct_priv, id: old_attr->ct_attr.ct_labels_id); |
912 | |
913 | kfree(objp: old_attr); |
914 | kvfree(addr: spec); |
915 | ct_dbg("Replaced ct entry rule in zone %d" , entry->tuple.zone); |
916 | |
917 | return 0; |
918 | |
919 | err_rule: |
920 | mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr: zone_rule->attr, mh); |
921 | mlx5_put_label_mapping(ct_priv, id: attr->ct_attr.ct_labels_id); |
922 | err_mod_hdr: |
923 | kfree(objp: old_attr); |
924 | err_attr: |
925 | kvfree(addr: spec); |
926 | return err; |
927 | } |
928 | |
929 | static bool |
930 | mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) |
931 | { |
932 | return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); |
933 | } |
934 | |
935 | static struct mlx5_ct_entry * |
936 | mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) |
937 | { |
938 | struct mlx5_ct_entry *entry; |
939 | |
940 | entry = rhashtable_lookup_fast(ht: &ct_priv->ct_tuples_ht, key: tuple, |
941 | params: tuples_ht_params); |
942 | if (entry && mlx5_tc_ct_entry_valid(entry) && |
943 | refcount_inc_not_zero(r: &entry->refcnt)) { |
944 | return entry; |
945 | } else if (!entry) { |
946 | entry = rhashtable_lookup_fast(ht: &ct_priv->ct_tuples_nat_ht, |
947 | key: tuple, params: tuples_nat_ht_params); |
948 | if (entry && mlx5_tc_ct_entry_valid(entry) && |
949 | refcount_inc_not_zero(r: &entry->refcnt)) |
950 | return entry; |
951 | } |
952 | |
953 | return entry ? ERR_PTR(error: -EINVAL) : NULL; |
954 | } |
955 | |
956 | static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) |
957 | { |
958 | struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; |
959 | |
960 | rhashtable_remove_fast(ht: &ct_priv->ct_tuples_nat_ht, |
961 | obj: &entry->tuple_nat_node, |
962 | params: tuples_nat_ht_params); |
963 | rhashtable_remove_fast(ht: &ct_priv->ct_tuples_ht, obj: &entry->tuple_node, |
964 | params: tuples_ht_params); |
965 | } |
966 | |
967 | static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) |
968 | { |
969 | struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; |
970 | |
971 | mlx5_tc_ct_entry_del_rules(ct_priv, entry); |
972 | |
973 | spin_lock_bh(lock: &ct_priv->ht_lock); |
974 | mlx5_tc_ct_entry_remove_from_tuples(entry); |
975 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
976 | |
977 | mlx5_tc_ct_counter_put(ct_priv, entry); |
978 | kfree(objp: entry); |
979 | } |
980 | |
981 | static void |
982 | mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) |
983 | { |
984 | if (!refcount_dec_and_test(r: &entry->refcnt)) |
985 | return; |
986 | |
987 | mlx5_tc_ct_entry_del(entry); |
988 | } |
989 | |
990 | static void mlx5_tc_ct_entry_del_work(struct work_struct *work) |
991 | { |
992 | struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); |
993 | |
994 | mlx5_tc_ct_entry_del(entry); |
995 | } |
996 | |
997 | static void |
998 | __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) |
999 | { |
1000 | if (!refcount_dec_and_test(r: &entry->refcnt)) |
1001 | return; |
1002 | |
1003 | INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); |
1004 | queue_work(wq: entry->ct_priv->wq, work: &entry->work); |
1005 | } |
1006 | |
1007 | static struct mlx5_ct_counter * |
1008 | mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) |
1009 | { |
1010 | struct mlx5_ct_counter *counter; |
1011 | int ret; |
1012 | |
1013 | counter = kzalloc(size: sizeof(*counter), GFP_KERNEL); |
1014 | if (!counter) |
1015 | return ERR_PTR(error: -ENOMEM); |
1016 | |
1017 | counter->is_shared = false; |
1018 | counter->counter = mlx5_fc_create_ex(dev: ct_priv->dev, aging: true); |
1019 | if (IS_ERR(ptr: counter->counter)) { |
1020 | ct_dbg("Failed to create counter for ct entry" ); |
1021 | ret = PTR_ERR(ptr: counter->counter); |
1022 | kfree(objp: counter); |
1023 | return ERR_PTR(error: ret); |
1024 | } |
1025 | |
1026 | return counter; |
1027 | } |
1028 | |
1029 | static struct mlx5_ct_counter * |
1030 | mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, |
1031 | struct mlx5_ct_entry *entry) |
1032 | { |
1033 | struct mlx5_ct_tuple rev_tuple = entry->tuple; |
1034 | struct mlx5_ct_counter *shared_counter; |
1035 | struct mlx5_ct_entry *rev_entry; |
1036 | |
1037 | /* get the reversed tuple */ |
1038 | swap(rev_tuple.port.src, rev_tuple.port.dst); |
1039 | |
1040 | if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
1041 | __be32 tmp_addr = rev_tuple.ip.src_v4; |
1042 | |
1043 | rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; |
1044 | rev_tuple.ip.dst_v4 = tmp_addr; |
1045 | } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
1046 | struct in6_addr tmp_addr = rev_tuple.ip.src_v6; |
1047 | |
1048 | rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; |
1049 | rev_tuple.ip.dst_v6 = tmp_addr; |
1050 | } else { |
1051 | return ERR_PTR(error: -EOPNOTSUPP); |
1052 | } |
1053 | |
1054 | /* Use the same counter as the reverse direction */ |
1055 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1056 | rev_entry = mlx5_tc_ct_entry_get(ct_priv, tuple: &rev_tuple); |
1057 | |
1058 | if (IS_ERR(ptr: rev_entry)) { |
1059 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1060 | goto create_counter; |
1061 | } |
1062 | |
1063 | if (rev_entry && refcount_inc_not_zero(r: &rev_entry->counter->refcount)) { |
1064 | ct_dbg("Using shared counter entry=0x%p rev=0x%p" , entry, rev_entry); |
1065 | shared_counter = rev_entry->counter; |
1066 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1067 | |
1068 | mlx5_tc_ct_entry_put(entry: rev_entry); |
1069 | return shared_counter; |
1070 | } |
1071 | |
1072 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1073 | |
1074 | create_counter: |
1075 | |
1076 | shared_counter = mlx5_tc_ct_counter_create(ct_priv); |
1077 | if (IS_ERR(ptr: shared_counter)) |
1078 | return shared_counter; |
1079 | |
1080 | shared_counter->is_shared = true; |
1081 | refcount_set(r: &shared_counter->refcount, n: 1); |
1082 | return shared_counter; |
1083 | } |
1084 | |
1085 | static int |
1086 | mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, |
1087 | struct flow_rule *flow_rule, |
1088 | struct mlx5_ct_entry *entry, |
1089 | u8 zone_restore_id) |
1090 | { |
1091 | int err; |
1092 | |
1093 | if (nf_ct_acct_enabled(net: dev_net(dev: ct_priv->netdev))) |
1094 | entry->counter = mlx5_tc_ct_counter_create(ct_priv); |
1095 | else |
1096 | entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); |
1097 | |
1098 | if (IS_ERR(ptr: entry->counter)) { |
1099 | err = PTR_ERR(ptr: entry->counter); |
1100 | return err; |
1101 | } |
1102 | |
1103 | err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, nat: false, |
1104 | zone_restore_id); |
1105 | if (err) |
1106 | goto err_orig; |
1107 | |
1108 | err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, nat: true, |
1109 | zone_restore_id); |
1110 | if (err) |
1111 | goto err_nat; |
1112 | |
1113 | atomic_inc(v: &ct_priv->debugfs.stats.offloaded); |
1114 | return 0; |
1115 | |
1116 | err_nat: |
1117 | mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false); |
1118 | err_orig: |
1119 | mlx5_tc_ct_counter_put(ct_priv, entry); |
1120 | return err; |
1121 | } |
1122 | |
1123 | static int |
1124 | mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv, |
1125 | struct flow_rule *flow_rule, |
1126 | struct mlx5_ct_entry *entry, |
1127 | u8 zone_restore_id) |
1128 | { |
1129 | int err; |
1130 | |
1131 | err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, nat: false, |
1132 | zone_restore_id); |
1133 | if (err) |
1134 | return err; |
1135 | |
1136 | err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, nat: true, |
1137 | zone_restore_id); |
1138 | if (err) |
1139 | mlx5_tc_ct_entry_del_rule(ct_priv, entry, nat: false); |
1140 | return err; |
1141 | } |
1142 | |
1143 | static int |
1144 | mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule, |
1145 | struct mlx5_ct_entry *entry, unsigned long cookie) |
1146 | { |
1147 | struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; |
1148 | int err; |
1149 | |
1150 | err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, zone_restore_id: ft->zone_restore_id); |
1151 | if (!err) |
1152 | return 0; |
1153 | |
1154 | /* If failed to update the entry, then look it up again under ht_lock |
1155 | * protection and properly delete it. |
1156 | */ |
1157 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1158 | entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params); |
1159 | if (entry) { |
1160 | rhashtable_remove_fast(ht: &ft->ct_entries_ht, obj: &entry->node, params: cts_ht_params); |
1161 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1162 | mlx5_tc_ct_entry_put(entry); |
1163 | } else { |
1164 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1165 | } |
1166 | return err; |
1167 | } |
1168 | |
1169 | static int |
1170 | mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, |
1171 | struct flow_cls_offload *flow) |
1172 | { |
1173 | struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow_cmd: flow); |
1174 | struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; |
1175 | struct flow_action_entry *meta_action; |
1176 | unsigned long cookie = flow->cookie; |
1177 | struct mlx5_ct_entry *entry; |
1178 | int err; |
1179 | |
1180 | meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); |
1181 | if (!meta_action) |
1182 | return -EOPNOTSUPP; |
1183 | |
1184 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1185 | entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params); |
1186 | if (entry && refcount_inc_not_zero(r: &entry->refcnt)) { |
1187 | if (entry->restore_cookie == meta_action->ct_metadata.cookie) { |
1188 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1189 | mlx5_tc_ct_entry_put(entry); |
1190 | return -EEXIST; |
1191 | } |
1192 | entry->restore_cookie = meta_action->ct_metadata.cookie; |
1193 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1194 | |
1195 | err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie); |
1196 | mlx5_tc_ct_entry_put(entry); |
1197 | return err; |
1198 | } |
1199 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1200 | |
1201 | entry = kzalloc(size: sizeof(*entry), GFP_KERNEL); |
1202 | if (!entry) |
1203 | return -ENOMEM; |
1204 | |
1205 | entry->tuple.zone = ft->zone; |
1206 | entry->cookie = flow->cookie; |
1207 | entry->restore_cookie = meta_action->ct_metadata.cookie; |
1208 | refcount_set(r: &entry->refcnt, n: 2); |
1209 | entry->ct_priv = ct_priv; |
1210 | |
1211 | err = mlx5_tc_ct_rule_to_tuple(tuple: &entry->tuple, rule: flow_rule); |
1212 | if (err) |
1213 | goto err_set; |
1214 | |
1215 | memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); |
1216 | err = mlx5_tc_ct_rule_to_tuple_nat(tuple: &entry->tuple_nat, rule: flow_rule); |
1217 | if (err) |
1218 | goto err_set; |
1219 | |
1220 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1221 | |
1222 | err = rhashtable_lookup_insert_fast(ht: &ft->ct_entries_ht, obj: &entry->node, |
1223 | params: cts_ht_params); |
1224 | if (err) |
1225 | goto err_entries; |
1226 | |
1227 | err = rhashtable_lookup_insert_fast(ht: &ct_priv->ct_tuples_ht, |
1228 | obj: &entry->tuple_node, |
1229 | params: tuples_ht_params); |
1230 | if (err) |
1231 | goto err_tuple; |
1232 | |
1233 | if (memcmp(p: &entry->tuple, q: &entry->tuple_nat, size: sizeof(entry->tuple))) { |
1234 | err = rhashtable_lookup_insert_fast(ht: &ct_priv->ct_tuples_nat_ht, |
1235 | obj: &entry->tuple_nat_node, |
1236 | params: tuples_nat_ht_params); |
1237 | if (err) |
1238 | goto err_tuple_nat; |
1239 | } |
1240 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1241 | |
1242 | err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, |
1243 | zone_restore_id: ft->zone_restore_id); |
1244 | if (err) |
1245 | goto err_rules; |
1246 | |
1247 | set_bit(nr: MLX5_CT_ENTRY_FLAG_VALID, addr: &entry->flags); |
1248 | mlx5_tc_ct_entry_put(entry); /* this function reference */ |
1249 | |
1250 | return 0; |
1251 | |
1252 | err_rules: |
1253 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1254 | if (mlx5_tc_ct_entry_has_nat(entry)) |
1255 | rhashtable_remove_fast(ht: &ct_priv->ct_tuples_nat_ht, |
1256 | obj: &entry->tuple_nat_node, params: tuples_nat_ht_params); |
1257 | err_tuple_nat: |
1258 | rhashtable_remove_fast(ht: &ct_priv->ct_tuples_ht, |
1259 | obj: &entry->tuple_node, |
1260 | params: tuples_ht_params); |
1261 | err_tuple: |
1262 | rhashtable_remove_fast(ht: &ft->ct_entries_ht, |
1263 | obj: &entry->node, |
1264 | params: cts_ht_params); |
1265 | err_entries: |
1266 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1267 | err_set: |
1268 | kfree(objp: entry); |
1269 | if (err != -EEXIST) |
1270 | netdev_warn(dev: ct_priv->netdev, format: "Failed to offload ct entry, err: %d\n" , err); |
1271 | return err; |
1272 | } |
1273 | |
1274 | static int |
1275 | mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, |
1276 | struct flow_cls_offload *flow) |
1277 | { |
1278 | struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; |
1279 | unsigned long cookie = flow->cookie; |
1280 | struct mlx5_ct_entry *entry; |
1281 | |
1282 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1283 | entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params); |
1284 | if (!entry) { |
1285 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1286 | return -ENOENT; |
1287 | } |
1288 | |
1289 | if (!mlx5_tc_ct_entry_valid(entry)) { |
1290 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1291 | return -EINVAL; |
1292 | } |
1293 | |
1294 | rhashtable_remove_fast(ht: &ft->ct_entries_ht, obj: &entry->node, params: cts_ht_params); |
1295 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1296 | |
1297 | mlx5_tc_ct_entry_put(entry); |
1298 | |
1299 | return 0; |
1300 | } |
1301 | |
1302 | static int |
1303 | mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, |
1304 | struct flow_cls_offload *f) |
1305 | { |
1306 | struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; |
1307 | unsigned long cookie = f->cookie; |
1308 | struct mlx5_ct_entry *entry; |
1309 | u64 lastuse, packets, bytes; |
1310 | |
1311 | spin_lock_bh(lock: &ct_priv->ht_lock); |
1312 | entry = rhashtable_lookup_fast(ht: &ft->ct_entries_ht, key: &cookie, params: cts_ht_params); |
1313 | if (!entry) { |
1314 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1315 | return -ENOENT; |
1316 | } |
1317 | |
1318 | if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(r: &entry->refcnt)) { |
1319 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1320 | return -EINVAL; |
1321 | } |
1322 | |
1323 | spin_unlock_bh(lock: &ct_priv->ht_lock); |
1324 | |
1325 | mlx5_fc_query_cached(counter: entry->counter->counter, bytes: &bytes, packets: &packets, lastuse: &lastuse); |
1326 | flow_stats_update(flow_stats: &f->stats, bytes, pkts: packets, drops: 0, lastused: lastuse, |
1327 | used_hw_stats: FLOW_ACTION_HW_STATS_DELAYED); |
1328 | |
1329 | mlx5_tc_ct_entry_put(entry); |
1330 | return 0; |
1331 | } |
1332 | |
1333 | static int |
1334 | mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, |
1335 | void *cb_priv) |
1336 | { |
1337 | struct flow_cls_offload *f = type_data; |
1338 | struct mlx5_ct_ft *ft = cb_priv; |
1339 | |
1340 | if (type != TC_SETUP_CLSFLOWER) |
1341 | return -EOPNOTSUPP; |
1342 | |
1343 | switch (f->command) { |
1344 | case FLOW_CLS_REPLACE: |
1345 | return mlx5_tc_ct_block_flow_offload_add(ft, flow: f); |
1346 | case FLOW_CLS_DESTROY: |
1347 | return mlx5_tc_ct_block_flow_offload_del(ft, flow: f); |
1348 | case FLOW_CLS_STATS: |
1349 | return mlx5_tc_ct_block_flow_offload_stats(ft, f); |
1350 | default: |
1351 | break; |
1352 | } |
1353 | |
1354 | return -EOPNOTSUPP; |
1355 | } |
1356 | |
1357 | static bool |
1358 | mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, |
1359 | u16 zone) |
1360 | { |
1361 | struct flow_keys flow_keys; |
1362 | |
1363 | skb_reset_network_header(skb); |
1364 | skb_flow_dissect_flow_keys(skb, flow: &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); |
1365 | |
1366 | tuple->zone = zone; |
1367 | |
1368 | if (flow_keys.basic.ip_proto != IPPROTO_TCP && |
1369 | flow_keys.basic.ip_proto != IPPROTO_UDP && |
1370 | flow_keys.basic.ip_proto != IPPROTO_GRE) |
1371 | return false; |
1372 | |
1373 | if (flow_keys.basic.ip_proto == IPPROTO_TCP || |
1374 | flow_keys.basic.ip_proto == IPPROTO_UDP) { |
1375 | tuple->port.src = flow_keys.ports.src; |
1376 | tuple->port.dst = flow_keys.ports.dst; |
1377 | } |
1378 | tuple->n_proto = flow_keys.basic.n_proto; |
1379 | tuple->ip_proto = flow_keys.basic.ip_proto; |
1380 | |
1381 | switch (flow_keys.basic.n_proto) { |
1382 | case htons(ETH_P_IP): |
1383 | tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
1384 | tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; |
1385 | tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; |
1386 | break; |
1387 | |
1388 | case htons(ETH_P_IPV6): |
1389 | tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
1390 | tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; |
1391 | tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; |
1392 | break; |
1393 | default: |
1394 | goto out; |
1395 | } |
1396 | |
1397 | return true; |
1398 | |
1399 | out: |
1400 | return false; |
1401 | } |
1402 | |
1403 | int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) |
1404 | { |
1405 | u32 ctstate = 0, ctstate_mask = 0; |
1406 | |
1407 | mlx5e_tc_match_to_reg_get_match(spec, type: CTSTATE_TO_REG, |
1408 | data: &ctstate, mask: &ctstate_mask); |
1409 | |
1410 | if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) |
1411 | return -EOPNOTSUPP; |
1412 | |
1413 | ctstate_mask |= MLX5_CT_STATE_TRK_BIT; |
1414 | mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG, |
1415 | data: ctstate, mask: ctstate_mask); |
1416 | |
1417 | return 0; |
1418 | } |
1419 | |
1420 | void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) |
1421 | { |
1422 | if (!priv || !ct_attr->ct_labels_id) |
1423 | return; |
1424 | |
1425 | mlx5_put_label_mapping(ct_priv: priv, id: ct_attr->ct_labels_id); |
1426 | } |
1427 | |
1428 | int |
1429 | mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, |
1430 | struct mlx5_flow_spec *spec, |
1431 | struct flow_cls_offload *f, |
1432 | struct mlx5_ct_attr *ct_attr, |
1433 | struct netlink_ext_ack *extack) |
1434 | { |
1435 | bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv; |
1436 | struct flow_rule *rule = flow_cls_offload_flow_rule(flow_cmd: f); |
1437 | struct flow_dissector_key_ct *mask, *key; |
1438 | u32 ctstate = 0, ctstate_mask = 0; |
1439 | u16 ct_state_on, ct_state_off; |
1440 | u16 ct_state, ct_state_mask; |
1441 | struct flow_match_ct match; |
1442 | u32 ct_labels[4]; |
1443 | |
1444 | if (!flow_rule_match_key(rule, key: FLOW_DISSECTOR_KEY_CT)) |
1445 | return 0; |
1446 | |
1447 | if (!priv) { |
1448 | NL_SET_ERR_MSG_MOD(extack, |
1449 | "offload of ct matching isn't available" ); |
1450 | return -EOPNOTSUPP; |
1451 | } |
1452 | |
1453 | flow_rule_match_ct(rule, out: &match); |
1454 | |
1455 | key = match.key; |
1456 | mask = match.mask; |
1457 | |
1458 | ct_state = key->ct_state; |
1459 | ct_state_mask = mask->ct_state; |
1460 | |
1461 | if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | |
1462 | TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | |
1463 | TCA_FLOWER_KEY_CT_FLAGS_NEW | |
1464 | TCA_FLOWER_KEY_CT_FLAGS_REPLY | |
1465 | TCA_FLOWER_KEY_CT_FLAGS_RELATED | |
1466 | TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { |
1467 | NL_SET_ERR_MSG_MOD(extack, |
1468 | "only ct_state trk, est, new and rpl are supported for offload" ); |
1469 | return -EOPNOTSUPP; |
1470 | } |
1471 | |
1472 | ct_state_on = ct_state & ct_state_mask; |
1473 | ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; |
1474 | trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; |
1475 | new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; |
1476 | est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; |
1477 | rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; |
1478 | rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; |
1479 | inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; |
1480 | untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; |
1481 | unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW; |
1482 | unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; |
1483 | unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; |
1484 | unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; |
1485 | uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; |
1486 | |
1487 | ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; |
1488 | ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0; |
1489 | ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; |
1490 | ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; |
1491 | ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; |
1492 | ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0; |
1493 | ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; |
1494 | ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; |
1495 | ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; |
1496 | ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; |
1497 | |
1498 | if (rel) { |
1499 | NL_SET_ERR_MSG_MOD(extack, |
1500 | "matching on ct_state +rel isn't supported" ); |
1501 | return -EOPNOTSUPP; |
1502 | } |
1503 | |
1504 | if (inv) { |
1505 | NL_SET_ERR_MSG_MOD(extack, |
1506 | "matching on ct_state +inv isn't supported" ); |
1507 | return -EOPNOTSUPP; |
1508 | } |
1509 | |
1510 | if (mask->ct_zone) |
1511 | mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, |
1512 | data: key->ct_zone, MLX5_CT_ZONE_MASK); |
1513 | if (ctstate_mask) |
1514 | mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG, |
1515 | data: ctstate, mask: ctstate_mask); |
1516 | if (mask->ct_mark) |
1517 | mlx5e_tc_match_to_reg_match(spec, type: MARK_TO_REG, |
1518 | data: key->ct_mark, mask: mask->ct_mark); |
1519 | if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || |
1520 | mask->ct_labels[3]) { |
1521 | ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; |
1522 | ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; |
1523 | ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; |
1524 | ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; |
1525 | if (mlx5_get_label_mapping(ct_priv: priv, labels: ct_labels, id: &ct_attr->ct_labels_id)) |
1526 | return -EOPNOTSUPP; |
1527 | mlx5e_tc_match_to_reg_match(spec, type: LABELS_TO_REG, data: ct_attr->ct_labels_id, |
1528 | MLX5_CT_LABELS_MASK); |
1529 | } |
1530 | |
1531 | return 0; |
1532 | } |
1533 | |
1534 | int |
1535 | mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, |
1536 | struct mlx5_flow_attr *attr, |
1537 | const struct flow_action_entry *act, |
1538 | struct netlink_ext_ack *extack) |
1539 | { |
1540 | if (!priv) { |
1541 | NL_SET_ERR_MSG_MOD(extack, |
1542 | "offload of ct action isn't available" ); |
1543 | return -EOPNOTSUPP; |
1544 | } |
1545 | |
1546 | attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ |
1547 | attr->ct_attr.zone = act->ct.zone; |
1548 | if (!(act->ct.action & TCA_CT_ACT_CLEAR)) |
1549 | attr->ct_attr.nf_ft = act->ct.flow_table; |
1550 | attr->ct_attr.act_miss_cookie = act->miss_cookie; |
1551 | |
1552 | return 0; |
1553 | } |
1554 | |
1555 | static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, |
1556 | struct mlx5_tc_ct_pre *pre_ct, |
1557 | bool nat) |
1558 | { |
1559 | struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; |
1560 | struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; |
1561 | struct mlx5_core_dev *dev = ct_priv->dev; |
1562 | struct mlx5_flow_table *ft = pre_ct->ft; |
1563 | struct mlx5_flow_destination dest = {}; |
1564 | struct mlx5_flow_act flow_act = {}; |
1565 | struct mlx5_modify_hdr *mod_hdr; |
1566 | struct mlx5_flow_handle *rule; |
1567 | struct mlx5_flow_spec *spec; |
1568 | u32 ctstate; |
1569 | u16 zone; |
1570 | int err; |
1571 | |
1572 | spec = kvzalloc(size: sizeof(*spec), GFP_KERNEL); |
1573 | if (!spec) |
1574 | return -ENOMEM; |
1575 | |
1576 | zone = ct_ft->zone & MLX5_CT_ZONE_MASK; |
1577 | err = mlx5e_tc_match_to_reg_set(mdev: dev, mod_hdr_acts: &pre_mod_acts, ns: ct_priv->ns_type, |
1578 | type: ZONE_TO_REG, data: zone); |
1579 | if (err) { |
1580 | ct_dbg("Failed to set zone register mapping" ); |
1581 | goto err_mapping; |
1582 | } |
1583 | |
1584 | mod_hdr = mlx5_modify_header_alloc(dev, ns_type: ct_priv->ns_type, |
1585 | num_actions: pre_mod_acts.num_actions, |
1586 | modify_actions: pre_mod_acts.actions); |
1587 | |
1588 | if (IS_ERR(ptr: mod_hdr)) { |
1589 | err = PTR_ERR(ptr: mod_hdr); |
1590 | ct_dbg("Failed to create pre ct mod hdr" ); |
1591 | goto err_mapping; |
1592 | } |
1593 | pre_ct->modify_hdr = mod_hdr; |
1594 | |
1595 | flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | |
1596 | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; |
1597 | flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; |
1598 | flow_act.modify_hdr = mod_hdr; |
1599 | dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; |
1600 | |
1601 | /* add flow rule */ |
1602 | mlx5e_tc_match_to_reg_match(spec, type: ZONE_TO_REG, |
1603 | data: zone, MLX5_CT_ZONE_MASK); |
1604 | ctstate = MLX5_CT_STATE_TRK_BIT; |
1605 | if (nat) |
1606 | ctstate |= MLX5_CT_STATE_NAT_BIT; |
1607 | mlx5e_tc_match_to_reg_match(spec, type: CTSTATE_TO_REG, data: ctstate, mask: ctstate); |
1608 | |
1609 | dest.ft = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act); |
1610 | rule = mlx5_add_flow_rules(ft, spec, flow_act: &flow_act, dest: &dest, num_dest: 1); |
1611 | if (IS_ERR(ptr: rule)) { |
1612 | err = PTR_ERR(ptr: rule); |
1613 | ct_dbg("Failed to add pre ct flow rule zone %d" , zone); |
1614 | goto err_flow_rule; |
1615 | } |
1616 | pre_ct->flow_rule = rule; |
1617 | |
1618 | /* add miss rule */ |
1619 | dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; |
1620 | rule = mlx5_add_flow_rules(ft, NULL, flow_act: &flow_act, dest: &dest, num_dest: 1); |
1621 | if (IS_ERR(ptr: rule)) { |
1622 | err = PTR_ERR(ptr: rule); |
1623 | ct_dbg("Failed to add pre ct miss rule zone %d" , zone); |
1624 | goto err_miss_rule; |
1625 | } |
1626 | pre_ct->miss_rule = rule; |
1627 | |
1628 | mlx5e_mod_hdr_dealloc(mod_hdr_acts: &pre_mod_acts); |
1629 | kvfree(addr: spec); |
1630 | return 0; |
1631 | |
1632 | err_miss_rule: |
1633 | mlx5_del_flow_rules(fr: pre_ct->flow_rule); |
1634 | err_flow_rule: |
1635 | mlx5_modify_header_dealloc(dev, modify_hdr: pre_ct->modify_hdr); |
1636 | err_mapping: |
1637 | mlx5e_mod_hdr_dealloc(mod_hdr_acts: &pre_mod_acts); |
1638 | kvfree(addr: spec); |
1639 | return err; |
1640 | } |
1641 | |
1642 | static void |
1643 | tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, |
1644 | struct mlx5_tc_ct_pre *pre_ct) |
1645 | { |
1646 | struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; |
1647 | struct mlx5_core_dev *dev = ct_priv->dev; |
1648 | |
1649 | mlx5_del_flow_rules(fr: pre_ct->flow_rule); |
1650 | mlx5_del_flow_rules(fr: pre_ct->miss_rule); |
1651 | mlx5_modify_header_dealloc(dev, modify_hdr: pre_ct->modify_hdr); |
1652 | } |
1653 | |
1654 | static int |
1655 | mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, |
1656 | struct mlx5_tc_ct_pre *pre_ct, |
1657 | bool nat) |
1658 | { |
1659 | int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); |
1660 | struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; |
1661 | struct mlx5_core_dev *dev = ct_priv->dev; |
1662 | struct mlx5_flow_table_attr ft_attr = {}; |
1663 | struct mlx5_flow_namespace *ns; |
1664 | struct mlx5_flow_table *ft; |
1665 | struct mlx5_flow_group *g; |
1666 | u32 metadata_reg_c_2_mask; |
1667 | u32 *flow_group_in; |
1668 | void *misc; |
1669 | int err; |
1670 | |
1671 | ns = mlx5_get_flow_namespace(dev, type: ct_priv->ns_type); |
1672 | if (!ns) { |
1673 | err = -EOPNOTSUPP; |
1674 | ct_dbg("Failed to get flow namespace" ); |
1675 | return err; |
1676 | } |
1677 | |
1678 | flow_group_in = kvzalloc(size: inlen, GFP_KERNEL); |
1679 | if (!flow_group_in) |
1680 | return -ENOMEM; |
1681 | |
1682 | ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; |
1683 | ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? |
1684 | FDB_TC_OFFLOAD : MLX5E_TC_PRIO; |
1685 | ft_attr.max_fte = 2; |
1686 | ft_attr.level = 1; |
1687 | ft = mlx5_create_flow_table(ns, ft_attr: &ft_attr); |
1688 | if (IS_ERR(ptr: ft)) { |
1689 | err = PTR_ERR(ptr: ft); |
1690 | ct_dbg("Failed to create pre ct table" ); |
1691 | goto out_free; |
1692 | } |
1693 | pre_ct->ft = ft; |
1694 | |
1695 | /* create flow group */ |
1696 | MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); |
1697 | MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); |
1698 | MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, |
1699 | MLX5_MATCH_MISC_PARAMETERS_2); |
1700 | |
1701 | misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, |
1702 | match_criteria.misc_parameters_2); |
1703 | |
1704 | metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; |
1705 | metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); |
1706 | if (nat) |
1707 | metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); |
1708 | |
1709 | MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, |
1710 | metadata_reg_c_2_mask); |
1711 | |
1712 | g = mlx5_create_flow_group(ft, in: flow_group_in); |
1713 | if (IS_ERR(ptr: g)) { |
1714 | err = PTR_ERR(ptr: g); |
1715 | ct_dbg("Failed to create pre ct group" ); |
1716 | goto err_flow_grp; |
1717 | } |
1718 | pre_ct->flow_grp = g; |
1719 | |
1720 | /* create miss group */ |
1721 | memset(flow_group_in, 0, inlen); |
1722 | MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); |
1723 | MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); |
1724 | g = mlx5_create_flow_group(ft, in: flow_group_in); |
1725 | if (IS_ERR(ptr: g)) { |
1726 | err = PTR_ERR(ptr: g); |
1727 | ct_dbg("Failed to create pre ct miss group" ); |
1728 | goto err_miss_grp; |
1729 | } |
1730 | pre_ct->miss_grp = g; |
1731 | |
1732 | err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); |
1733 | if (err) |
1734 | goto err_add_rules; |
1735 | |
1736 | kvfree(addr: flow_group_in); |
1737 | return 0; |
1738 | |
1739 | err_add_rules: |
1740 | mlx5_destroy_flow_group(fg: pre_ct->miss_grp); |
1741 | err_miss_grp: |
1742 | mlx5_destroy_flow_group(fg: pre_ct->flow_grp); |
1743 | err_flow_grp: |
1744 | mlx5_destroy_flow_table(ft); |
1745 | out_free: |
1746 | kvfree(addr: flow_group_in); |
1747 | return err; |
1748 | } |
1749 | |
1750 | static void |
1751 | mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, |
1752 | struct mlx5_tc_ct_pre *pre_ct) |
1753 | { |
1754 | tc_ct_pre_ct_del_rules(ct_ft, pre_ct); |
1755 | mlx5_destroy_flow_group(fg: pre_ct->miss_grp); |
1756 | mlx5_destroy_flow_group(fg: pre_ct->flow_grp); |
1757 | mlx5_destroy_flow_table(ft: pre_ct->ft); |
1758 | } |
1759 | |
1760 | static int |
1761 | mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) |
1762 | { |
1763 | int err; |
1764 | |
1765 | err = mlx5_tc_ct_alloc_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct, nat: false); |
1766 | if (err) |
1767 | return err; |
1768 | |
1769 | err = mlx5_tc_ct_alloc_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct_nat, nat: true); |
1770 | if (err) |
1771 | goto err_pre_ct_nat; |
1772 | |
1773 | return 0; |
1774 | |
1775 | err_pre_ct_nat: |
1776 | mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct); |
1777 | return err; |
1778 | } |
1779 | |
1780 | static void |
1781 | mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) |
1782 | { |
1783 | mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct_nat); |
1784 | mlx5_tc_ct_free_pre_ct(ct_ft: ft, pre_ct: &ft->pre_ct); |
1785 | } |
1786 | |
1787 | /* To avoid false lock dependency warning set the ct_entries_ht lock |
1788 | * class different than the lock class of the ht being used when deleting |
1789 | * last flow from a group and then deleting a group, we get into del_sw_flow_group() |
1790 | * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but |
1791 | * it's different than the ht->mutex here. |
1792 | */ |
1793 | static struct lock_class_key ct_entries_ht_lock_key; |
1794 | |
1795 | static struct mlx5_ct_ft * |
1796 | mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, |
1797 | struct nf_flowtable *nf_ft) |
1798 | { |
1799 | struct mlx5_ct_ft *ft; |
1800 | int err; |
1801 | |
1802 | ft = rhashtable_lookup_fast(ht: &ct_priv->zone_ht, key: &zone, params: zone_params); |
1803 | if (ft) { |
1804 | refcount_inc(r: &ft->refcount); |
1805 | return ft; |
1806 | } |
1807 | |
1808 | ft = kzalloc(size: sizeof(*ft), GFP_KERNEL); |
1809 | if (!ft) |
1810 | return ERR_PTR(error: -ENOMEM); |
1811 | |
1812 | err = mapping_add(ctx: ct_priv->zone_mapping, data: &zone, id: &ft->zone_restore_id); |
1813 | if (err) |
1814 | goto err_mapping; |
1815 | |
1816 | ft->zone = zone; |
1817 | ft->nf_ft = nf_ft; |
1818 | ft->ct_priv = ct_priv; |
1819 | refcount_set(r: &ft->refcount, n: 1); |
1820 | |
1821 | err = mlx5_tc_ct_alloc_pre_ct_tables(ft); |
1822 | if (err) |
1823 | goto err_alloc_pre_ct; |
1824 | |
1825 | err = rhashtable_init(ht: &ft->ct_entries_ht, params: &cts_ht_params); |
1826 | if (err) |
1827 | goto err_init; |
1828 | |
1829 | lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); |
1830 | |
1831 | err = rhashtable_insert_fast(ht: &ct_priv->zone_ht, obj: &ft->node, |
1832 | params: zone_params); |
1833 | if (err) |
1834 | goto err_insert; |
1835 | |
1836 | err = nf_flow_table_offload_add_cb(flow_table: ft->nf_ft, |
1837 | cb: mlx5_tc_ct_block_flow_offload, cb_priv: ft); |
1838 | if (err) |
1839 | goto err_add_cb; |
1840 | |
1841 | return ft; |
1842 | |
1843 | err_add_cb: |
1844 | rhashtable_remove_fast(ht: &ct_priv->zone_ht, obj: &ft->node, params: zone_params); |
1845 | err_insert: |
1846 | rhashtable_destroy(ht: &ft->ct_entries_ht); |
1847 | err_init: |
1848 | mlx5_tc_ct_free_pre_ct_tables(ft); |
1849 | err_alloc_pre_ct: |
1850 | mapping_remove(ctx: ct_priv->zone_mapping, id: ft->zone_restore_id); |
1851 | err_mapping: |
1852 | kfree(objp: ft); |
1853 | return ERR_PTR(error: err); |
1854 | } |
1855 | |
1856 | static void |
1857 | mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) |
1858 | { |
1859 | struct mlx5_ct_entry *entry = ptr; |
1860 | |
1861 | mlx5_tc_ct_entry_put(entry); |
1862 | } |
1863 | |
1864 | static void |
1865 | mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) |
1866 | { |
1867 | if (!refcount_dec_and_test(r: &ft->refcount)) |
1868 | return; |
1869 | |
1870 | flush_workqueue(ct_priv->wq); |
1871 | nf_flow_table_offload_del_cb(flow_table: ft->nf_ft, |
1872 | cb: mlx5_tc_ct_block_flow_offload, cb_priv: ft); |
1873 | rhashtable_remove_fast(ht: &ct_priv->zone_ht, obj: &ft->node, params: zone_params); |
1874 | rhashtable_free_and_destroy(ht: &ft->ct_entries_ht, |
1875 | free_fn: mlx5_tc_ct_flush_ft_entry, |
1876 | arg: ct_priv); |
1877 | mlx5_tc_ct_free_pre_ct_tables(ft); |
1878 | mapping_remove(ctx: ct_priv->zone_mapping, id: ft->zone_restore_id); |
1879 | kfree(objp: ft); |
1880 | } |
1881 | |
1882 | /* We translate the tc filter with CT action to the following HW model: |
1883 | * |
1884 | * +-----------------------+ |
1885 | * + rule (either original + |
1886 | * + or post_act rule) + |
1887 | * +-----------------------+ |
1888 | * | set act_miss_cookie mapping |
1889 | * | set fte_id |
1890 | * | set tunnel_id |
1891 | * | rest of actions before the CT action (for this orig/post_act rule) |
1892 | * | |
1893 | * +-------------+ |
1894 | * | Chain 0 | |
1895 | * | optimization| |
1896 | * | v |
1897 | * | +---------------------+ |
1898 | * | + pre_ct/pre_ct_nat + if matches +----------------------+ |
1899 | * | + zone+nat match +---------------->+ post_act (see below) + |
1900 | * | +---------------------+ set zone +----------------------+ |
1901 | * | | |
1902 | * +-------------+ set zone |
1903 | * | |
1904 | * v |
1905 | * +--------------------+ |
1906 | * + CT (nat or no nat) + |
1907 | * + tuple + zone match + |
1908 | * +--------------------+ |
1909 | * | set mark |
1910 | * | set labels_id |
1911 | * | set established |
1912 | * | set zone_restore |
1913 | * | do nat (if needed) |
1914 | * v |
1915 | * +--------------+ |
1916 | * + post_act + rest of parsed filter's actions |
1917 | * + fte_id match +------------------------> |
1918 | * +--------------+ |
1919 | * |
1920 | */ |
1921 | static int |
1922 | __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, |
1923 | struct mlx5_flow_attr *attr) |
1924 | { |
1925 | bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; |
1926 | struct mlx5e_priv *priv = netdev_priv(dev: ct_priv->netdev); |
1927 | int act_miss_mapping = 0, err; |
1928 | struct mlx5_ct_ft *ft; |
1929 | u16 zone; |
1930 | |
1931 | /* Register for CT established events */ |
1932 | ft = mlx5_tc_ct_add_ft_cb(ct_priv, zone: attr->ct_attr.zone, |
1933 | nf_ft: attr->ct_attr.nf_ft); |
1934 | if (IS_ERR(ptr: ft)) { |
1935 | err = PTR_ERR(ptr: ft); |
1936 | ct_dbg("Failed to register to ft callback" ); |
1937 | goto err_ft; |
1938 | } |
1939 | attr->ct_attr.ft = ft; |
1940 | |
1941 | err = mlx5e_tc_action_miss_mapping_get(priv: ct_priv->priv, attr, act_miss_cookie: attr->ct_attr.act_miss_cookie, |
1942 | act_miss_mapping: &act_miss_mapping); |
1943 | if (err) { |
1944 | ct_dbg("Failed to get register mapping for act miss" ); |
1945 | goto err_get_act_miss; |
1946 | } |
1947 | |
1948 | err = mlx5e_tc_match_to_reg_set(mdev: priv->mdev, mod_hdr_acts: &attr->parse_attr->mod_hdr_acts, |
1949 | ns: ct_priv->ns_type, type: MAPPED_OBJ_TO_REG, data: act_miss_mapping); |
1950 | if (err) { |
1951 | ct_dbg("Failed to set act miss register mapping" ); |
1952 | goto err_mapping; |
1953 | } |
1954 | |
1955 | /* Chain 0 sets the zone and jumps to ct table |
1956 | * Other chains jump to pre_ct table to align with act_ct cached logic |
1957 | */ |
1958 | if (!attr->chain) { |
1959 | zone = ft->zone & MLX5_CT_ZONE_MASK; |
1960 | err = mlx5e_tc_match_to_reg_set(mdev: priv->mdev, mod_hdr_acts: &attr->parse_attr->mod_hdr_acts, |
1961 | ns: ct_priv->ns_type, type: ZONE_TO_REG, data: zone); |
1962 | if (err) { |
1963 | ct_dbg("Failed to set zone register mapping" ); |
1964 | goto err_mapping; |
1965 | } |
1966 | |
1967 | attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; |
1968 | } else { |
1969 | attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; |
1970 | } |
1971 | |
1972 | attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; |
1973 | attr->ct_attr.act_miss_mapping = act_miss_mapping; |
1974 | |
1975 | return 0; |
1976 | |
1977 | err_mapping: |
1978 | mlx5e_tc_action_miss_mapping_put(priv: ct_priv->priv, attr, act_miss_mapping); |
1979 | err_get_act_miss: |
1980 | mlx5_tc_ct_del_ft_cb(ct_priv, ft); |
1981 | err_ft: |
1982 | netdev_warn(dev: priv->netdev, format: "Failed to offload ct flow, err %d\n" , err); |
1983 | return err; |
1984 | } |
1985 | |
1986 | int |
1987 | mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) |
1988 | { |
1989 | int err; |
1990 | |
1991 | if (!priv) |
1992 | return -EOPNOTSUPP; |
1993 | |
1994 | if (attr->ct_attr.offloaded) |
1995 | return 0; |
1996 | |
1997 | if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { |
1998 | err = mlx5_tc_ct_entry_set_registers(ct_priv: priv, mod_acts: &attr->parse_attr->mod_hdr_acts, |
1999 | ct_state: 0, mark: 0, labels_id: 0, zone_restore_id: 0); |
2000 | if (err) |
2001 | return err; |
2002 | |
2003 | attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; |
2004 | } |
2005 | |
2006 | if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ |
2007 | attr->ct_attr.offloaded = true; |
2008 | return 0; |
2009 | } |
2010 | |
2011 | mutex_lock(&priv->control_lock); |
2012 | err = __mlx5_tc_ct_flow_offload(ct_priv: priv, attr); |
2013 | if (!err) |
2014 | attr->ct_attr.offloaded = true; |
2015 | mutex_unlock(lock: &priv->control_lock); |
2016 | |
2017 | return err; |
2018 | } |
2019 | |
2020 | static void |
2021 | __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, |
2022 | struct mlx5_flow_attr *attr) |
2023 | { |
2024 | mlx5e_tc_action_miss_mapping_put(priv: ct_priv->priv, attr, act_miss_mapping: attr->ct_attr.act_miss_mapping); |
2025 | mlx5_tc_ct_del_ft_cb(ct_priv, ft: attr->ct_attr.ft); |
2026 | } |
2027 | |
2028 | void |
2029 | mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, |
2030 | struct mlx5_flow_attr *attr) |
2031 | { |
2032 | if (!attr->ct_attr.offloaded) /* no ct action, return */ |
2033 | return; |
2034 | if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ |
2035 | return; |
2036 | |
2037 | mutex_lock(&priv->control_lock); |
2038 | __mlx5_tc_ct_delete_flow(ct_priv: priv, attr); |
2039 | mutex_unlock(lock: &priv->control_lock); |
2040 | } |
2041 | |
2042 | static int |
2043 | mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) |
2044 | { |
2045 | struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(post_act: ct_priv->post_act); |
2046 | struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); |
2047 | int err; |
2048 | |
2049 | if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && |
2050 | ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { |
2051 | ct_dbg("Using SMFS ct flow steering provider" ); |
2052 | fs_ops = mlx5_ct_fs_smfs_ops_get(); |
2053 | } |
2054 | |
2055 | ct_priv->fs = kzalloc(size: sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); |
2056 | if (!ct_priv->fs) |
2057 | return -ENOMEM; |
2058 | |
2059 | ct_priv->fs->netdev = ct_priv->netdev; |
2060 | ct_priv->fs->dev = ct_priv->dev; |
2061 | ct_priv->fs_ops = fs_ops; |
2062 | |
2063 | err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); |
2064 | if (err) |
2065 | goto err_init; |
2066 | |
2067 | return 0; |
2068 | |
2069 | err_init: |
2070 | kfree(objp: ct_priv->fs); |
2071 | return err; |
2072 | } |
2073 | |
2074 | static int |
2075 | mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, |
2076 | const char **err_msg) |
2077 | { |
2078 | if (!mlx5_eswitch_vlan_actions_supported(dev: esw->dev, vlan_depth: 1)) { |
2079 | /* vlan workaround should be avoided for multi chain rules. |
2080 | * This is just a sanity check as pop vlan action should |
2081 | * be supported by any FW that supports ignore_flow_level |
2082 | */ |
2083 | |
2084 | *err_msg = "firmware vlan actions support is missing" ; |
2085 | return -EOPNOTSUPP; |
2086 | } |
2087 | |
2088 | if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, |
2089 | fdb_modify_header_fwd_to_table)) { |
2090 | /* CT always writes to registers which are mod header actions. |
2091 | * Therefore, mod header and goto is required |
2092 | */ |
2093 | |
2094 | *err_msg = "firmware fwd and modify support is missing" ; |
2095 | return -EOPNOTSUPP; |
2096 | } |
2097 | |
2098 | if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { |
2099 | *err_msg = "register loopback isn't supported" ; |
2100 | return -EOPNOTSUPP; |
2101 | } |
2102 | |
2103 | return 0; |
2104 | } |
2105 | |
2106 | static int |
2107 | mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, |
2108 | enum mlx5_flow_namespace_type ns_type, |
2109 | struct mlx5e_post_act *post_act) |
2110 | { |
2111 | struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
2112 | const char *err_msg = NULL; |
2113 | int err = 0; |
2114 | |
2115 | if (IS_ERR_OR_NULL(ptr: post_act)) { |
2116 | /* Ignore_flow_level support isn't supported by default for VFs and so post_act |
2117 | * won't be supported. Skip showing error msg. |
2118 | */ |
2119 | if (priv->mdev->coredev_type == MLX5_COREDEV_PF) |
2120 | err_msg = "post action is missing" ; |
2121 | err = -EOPNOTSUPP; |
2122 | goto out_err; |
2123 | } |
2124 | |
2125 | if (ns_type == MLX5_FLOW_NAMESPACE_FDB) |
2126 | err = mlx5_tc_ct_init_check_esw_support(esw, err_msg: &err_msg); |
2127 | |
2128 | out_err: |
2129 | if (err && err_msg) |
2130 | netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n" , err_msg); |
2131 | return err; |
2132 | } |
2133 | |
2134 | static void |
2135 | mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) |
2136 | { |
2137 | struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; |
2138 | |
2139 | ct_dbgfs->root = debugfs_create_dir(name: "ct" , parent: mlx5_debugfs_get_dev_root(dev: ct_priv->dev)); |
2140 | debugfs_create_atomic_t(name: "offloaded" , mode: 0400, parent: ct_dbgfs->root, |
2141 | value: &ct_dbgfs->stats.offloaded); |
2142 | debugfs_create_atomic_t(name: "rx_dropped" , mode: 0400, parent: ct_dbgfs->root, |
2143 | value: &ct_dbgfs->stats.rx_dropped); |
2144 | } |
2145 | |
2146 | static void |
2147 | mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) |
2148 | { |
2149 | debugfs_remove_recursive(dentry: ct_priv->debugfs.root); |
2150 | } |
2151 | |
2152 | #define INIT_ERR_PREFIX "tc ct offload init failed" |
2153 | |
2154 | struct mlx5_tc_ct_priv * |
2155 | mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, |
2156 | struct mod_hdr_tbl *mod_hdr, |
2157 | enum mlx5_flow_namespace_type ns_type, |
2158 | struct mlx5e_post_act *post_act) |
2159 | { |
2160 | struct mlx5_tc_ct_priv *ct_priv; |
2161 | struct mlx5_core_dev *dev; |
2162 | u64 mapping_id; |
2163 | int err; |
2164 | |
2165 | dev = priv->mdev; |
2166 | err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); |
2167 | if (err) |
2168 | goto err_support; |
2169 | |
2170 | ct_priv = kzalloc(size: sizeof(*ct_priv), GFP_KERNEL); |
2171 | if (!ct_priv) |
2172 | goto err_alloc; |
2173 | |
2174 | mapping_id = mlx5_query_nic_system_image_guid(mdev: dev); |
2175 | |
2176 | ct_priv->zone_mapping = mapping_create_for_id(id: mapping_id, type: MAPPING_TYPE_ZONE, |
2177 | data_size: sizeof(u16), max_id: 0, delayed_removal: true); |
2178 | if (IS_ERR(ptr: ct_priv->zone_mapping)) { |
2179 | err = PTR_ERR(ptr: ct_priv->zone_mapping); |
2180 | goto err_mapping_zone; |
2181 | } |
2182 | |
2183 | ct_priv->labels_mapping = mapping_create_for_id(id: mapping_id, type: MAPPING_TYPE_LABELS, |
2184 | data_size: sizeof(u32) * 4, max_id: 0, delayed_removal: true); |
2185 | if (IS_ERR(ptr: ct_priv->labels_mapping)) { |
2186 | err = PTR_ERR(ptr: ct_priv->labels_mapping); |
2187 | goto err_mapping_labels; |
2188 | } |
2189 | |
2190 | spin_lock_init(&ct_priv->ht_lock); |
2191 | ct_priv->priv = priv; |
2192 | ct_priv->ns_type = ns_type; |
2193 | ct_priv->chains = chains; |
2194 | ct_priv->netdev = priv->netdev; |
2195 | ct_priv->dev = priv->mdev; |
2196 | ct_priv->mod_hdr_tbl = mod_hdr; |
2197 | ct_priv->ct = mlx5_chains_create_global_table(chains); |
2198 | if (IS_ERR(ptr: ct_priv->ct)) { |
2199 | err = PTR_ERR(ptr: ct_priv->ct); |
2200 | mlx5_core_warn(dev, |
2201 | "%s, failed to create ct table err: %d\n" , |
2202 | INIT_ERR_PREFIX, err); |
2203 | goto err_ct_tbl; |
2204 | } |
2205 | |
2206 | ct_priv->ct_nat = mlx5_chains_create_global_table(chains); |
2207 | if (IS_ERR(ptr: ct_priv->ct_nat)) { |
2208 | err = PTR_ERR(ptr: ct_priv->ct_nat); |
2209 | mlx5_core_warn(dev, |
2210 | "%s, failed to create ct nat table err: %d\n" , |
2211 | INIT_ERR_PREFIX, err); |
2212 | goto err_ct_nat_tbl; |
2213 | } |
2214 | |
2215 | ct_priv->post_act = post_act; |
2216 | mutex_init(&ct_priv->control_lock); |
2217 | if (rhashtable_init(ht: &ct_priv->zone_ht, params: &zone_params)) |
2218 | goto err_ct_zone_ht; |
2219 | if (rhashtable_init(ht: &ct_priv->ct_tuples_ht, params: &tuples_ht_params)) |
2220 | goto err_ct_tuples_ht; |
2221 | if (rhashtable_init(ht: &ct_priv->ct_tuples_nat_ht, params: &tuples_nat_ht_params)) |
2222 | goto err_ct_tuples_nat_ht; |
2223 | |
2224 | ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq" , 0); |
2225 | if (!ct_priv->wq) { |
2226 | err = -ENOMEM; |
2227 | goto err_wq; |
2228 | } |
2229 | |
2230 | err = mlx5_tc_ct_fs_init(ct_priv); |
2231 | if (err) |
2232 | goto err_init_fs; |
2233 | |
2234 | mlx5_ct_tc_create_dbgfs(ct_priv); |
2235 | return ct_priv; |
2236 | |
2237 | err_init_fs: |
2238 | destroy_workqueue(wq: ct_priv->wq); |
2239 | err_wq: |
2240 | rhashtable_destroy(ht: &ct_priv->ct_tuples_nat_ht); |
2241 | err_ct_tuples_nat_ht: |
2242 | rhashtable_destroy(ht: &ct_priv->ct_tuples_ht); |
2243 | err_ct_tuples_ht: |
2244 | rhashtable_destroy(ht: &ct_priv->zone_ht); |
2245 | err_ct_zone_ht: |
2246 | mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct_nat); |
2247 | err_ct_nat_tbl: |
2248 | mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct); |
2249 | err_ct_tbl: |
2250 | mapping_destroy(ctx: ct_priv->labels_mapping); |
2251 | err_mapping_labels: |
2252 | mapping_destroy(ctx: ct_priv->zone_mapping); |
2253 | err_mapping_zone: |
2254 | kfree(objp: ct_priv); |
2255 | err_alloc: |
2256 | err_support: |
2257 | |
2258 | return NULL; |
2259 | } |
2260 | |
2261 | void |
2262 | mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) |
2263 | { |
2264 | struct mlx5_fs_chains *chains; |
2265 | |
2266 | if (!ct_priv) |
2267 | return; |
2268 | |
2269 | destroy_workqueue(wq: ct_priv->wq); |
2270 | mlx5_ct_tc_remove_dbgfs(ct_priv); |
2271 | chains = ct_priv->chains; |
2272 | |
2273 | ct_priv->fs_ops->destroy(ct_priv->fs); |
2274 | kfree(objp: ct_priv->fs); |
2275 | |
2276 | mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct_nat); |
2277 | mlx5_chains_destroy_global_table(chains, ft: ct_priv->ct); |
2278 | mapping_destroy(ctx: ct_priv->zone_mapping); |
2279 | mapping_destroy(ctx: ct_priv->labels_mapping); |
2280 | |
2281 | rhashtable_destroy(ht: &ct_priv->ct_tuples_ht); |
2282 | rhashtable_destroy(ht: &ct_priv->ct_tuples_nat_ht); |
2283 | rhashtable_destroy(ht: &ct_priv->zone_ht); |
2284 | mutex_destroy(lock: &ct_priv->control_lock); |
2285 | kfree(objp: ct_priv); |
2286 | } |
2287 | |
2288 | bool |
2289 | mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, |
2290 | struct sk_buff *skb, u8 zone_restore_id) |
2291 | { |
2292 | struct mlx5_ct_tuple tuple = {}; |
2293 | struct mlx5_ct_entry *entry; |
2294 | u16 zone; |
2295 | |
2296 | if (!ct_priv || !zone_restore_id) |
2297 | return true; |
2298 | |
2299 | if (mapping_find(ctx: ct_priv->zone_mapping, id: zone_restore_id, data: &zone)) |
2300 | goto out_inc_drop; |
2301 | |
2302 | if (!mlx5_tc_ct_skb_to_tuple(skb, tuple: &tuple, zone)) |
2303 | goto out_inc_drop; |
2304 | |
2305 | spin_lock(lock: &ct_priv->ht_lock); |
2306 | |
2307 | entry = mlx5_tc_ct_entry_get(ct_priv, tuple: &tuple); |
2308 | if (!entry) { |
2309 | spin_unlock(lock: &ct_priv->ht_lock); |
2310 | goto out_inc_drop; |
2311 | } |
2312 | |
2313 | if (IS_ERR(ptr: entry)) { |
2314 | spin_unlock(lock: &ct_priv->ht_lock); |
2315 | goto out_inc_drop; |
2316 | } |
2317 | spin_unlock(lock: &ct_priv->ht_lock); |
2318 | |
2319 | tcf_ct_flow_table_restore_skb(skb, cookie: entry->restore_cookie); |
2320 | __mlx5_tc_ct_entry_put(entry); |
2321 | |
2322 | return true; |
2323 | |
2324 | out_inc_drop: |
2325 | atomic_inc(v: &ct_priv->debugfs.stats.rx_dropped); |
2326 | return false; |
2327 | } |
2328 | |