1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4#include "eswitch.h"
5#include "lib/mlx5.h"
6#include "esw/qos.h"
7#include "en/port.h"
8#define CREATE_TRACE_POINTS
9#include "diag/qos_tracepoint.h"
10
11/* Minimum supported BW share value by the HW is 1 Mbit/sec */
12#define MLX5_MIN_BW_SHARE 1
13
14/* Holds rate nodes associated with an E-Switch. */
15struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20};
21
22static void esw_qos_lock(struct mlx5_eswitch *esw)
23{
24 mutex_lock(&esw->qos.domain->lock);
25}
26
27static void esw_qos_unlock(struct mlx5_eswitch *esw)
28{
29 mutex_unlock(lock: &esw->qos.domain->lock);
30}
31
32static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33{
34 lockdep_assert_held(&esw->qos.domain->lock);
35}
36
37static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38{
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(list: &qos_domain->nodes);
47
48 return qos_domain;
49}
50
51static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52{
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56}
57
58static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59{
60 kfree(objp: esw->qos.domain);
61 esw->qos.domain = NULL;
62}
63
64enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67};
68
69static const char * const sched_node_type_str[] = {
70 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
71 [SCHED_NODE_TYPE_VPORT] = "vport",
72};
73
74struct mlx5_esw_sched_node {
75 u32 ix;
76 /* Bandwidth parameters. */
77 u32 max_rate;
78 u32 min_rate;
79 /* A computed value indicating relative min_rate between node's children. */
80 u32 bw_share;
81 /* The parent node in the rate hierarchy. */
82 struct mlx5_esw_sched_node *parent;
83 /* Entry in the parent node's children list. */
84 struct list_head entry;
85 /* The type of this node in the rate hierarchy. */
86 enum sched_node_type type;
87 /* The eswitch this node belongs to. */
88 struct mlx5_eswitch *esw;
89 /* The children nodes of this node, empty list for leaf nodes. */
90 struct list_head children;
91 /* Valid only if this node is associated with a vport. */
92 struct mlx5_vport *vport;
93 /* Level in the hierarchy. The root node level is 1. */
94 u8 level;
95};
96
97static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
98{
99 if (!node->parent) {
100 /* Root children are assigned a depth level of 2. */
101 node->level = 2;
102 list_add_tail(new: &node->entry, head: &node->esw->qos.domain->nodes);
103 } else {
104 node->level = node->parent->level + 1;
105 list_add_tail(new: &node->entry, head: &node->parent->children);
106 }
107}
108
109static void
110esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
111{
112 list_del_init(entry: &node->entry);
113 node->parent = parent;
114 if (parent)
115 node->esw = parent->esw;
116 esw_qos_node_attach_to_parent(node);
117}
118
119void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
120{
121 kfree(objp: vport->qos.sched_node);
122 memset(&vport->qos, 0, sizeof(vport->qos));
123}
124
125u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
126{
127 if (!vport->qos.sched_node)
128 return 0;
129
130 return vport->qos.sched_node->ix;
131}
132
133struct mlx5_esw_sched_node *
134mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
135{
136 if (!vport->qos.sched_node)
137 return NULL;
138
139 return vport->qos.sched_node->parent;
140}
141
142static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
143{
144 if (node->vport) {
145 esw_warn(node->esw->dev,
146 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
147 op, sched_node_type_str[node->type], node->vport->vport, err);
148 return;
149 }
150
151 esw_warn(node->esw->dev,
152 "E-Switch %s %s scheduling element failed (err=%d)\n",
153 op, sched_node_type_str[node->type], err);
154}
155
156static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
157 struct netlink_ext_ack *extack)
158{
159 int err;
160
161 err = mlx5_create_scheduling_element_cmd(dev: node->esw->dev, hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, context: ctx,
162 element_id: &node->ix);
163 if (err) {
164 esw_qos_sched_elem_warn(node, err, op: "create");
165 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
166 }
167
168 return err;
169}
170
171static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
172 struct netlink_ext_ack *extack)
173{
174 int err;
175
176 err = mlx5_destroy_scheduling_element_cmd(dev: node->esw->dev,
177 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
178 element_id: node->ix);
179 if (err) {
180 esw_qos_sched_elem_warn(node, err, op: "destroy");
181 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
182 }
183
184 return err;
185}
186
187static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
188 struct netlink_ext_ack *extack)
189{
190 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
191 struct mlx5_core_dev *dev = node->esw->dev;
192 u32 bitmask = 0;
193 int err;
194
195 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
196 return -EOPNOTSUPP;
197
198 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
199 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
200 return -EOPNOTSUPP;
201
202 if (node->max_rate == max_rate && node->bw_share == bw_share)
203 return 0;
204
205 if (node->max_rate != max_rate) {
206 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
207 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
208 }
209 if (node->bw_share != bw_share) {
210 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
211 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
212 }
213
214 err = mlx5_modify_scheduling_element_cmd(dev,
215 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
216 context: sched_ctx,
217 element_id: node->ix,
218 modify_bitmask: bitmask);
219 if (err) {
220 esw_qos_sched_elem_warn(node, err, op: "modify");
221 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
222
223 return err;
224 }
225
226 node->max_rate = max_rate;
227 node->bw_share = bw_share;
228 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
229 trace_mlx5_esw_node_qos_config(dev, node, tsar_ix: node->ix, bw_share, max_rate);
230 else if (node->type == SCHED_NODE_TYPE_VPORT)
231 trace_mlx5_esw_vport_qos_config(dev, vport: node->vport, bw_share, max_rate);
232
233 return 0;
234}
235
236static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
237 struct mlx5_esw_sched_node *parent)
238{
239 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
240 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
241 struct mlx5_esw_sched_node *node;
242 u32 max_guarantee = 0;
243
244 /* Find max min_rate across all nodes.
245 * This will correspond to fw_max_bw_share in the final bw_share calculation.
246 */
247 list_for_each_entry(node, nodes, entry) {
248 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
249 node->min_rate > max_guarantee)
250 max_guarantee = node->min_rate;
251 }
252
253 if (max_guarantee)
254 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
255
256 /* If nodes max min_rate divider is 0 but their parent has bw_share
257 * configured, then set bw_share for nodes to minimal value.
258 */
259
260 if (parent && parent->bw_share)
261 return 1;
262
263 /* If the node nodes has min_rate configured, a divider of 0 sets all
264 * nodes' bw_share to 0, effectively disabling min guarantees.
265 */
266 return 0;
267}
268
269static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
270{
271 if (!divider)
272 return 0;
273 return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max);
274}
275
276static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
277 u32 divider,
278 struct netlink_ext_ack *extack)
279{
280 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
281 u32 bw_share;
282
283 bw_share = esw_qos_calc_bw_share(min_rate: node->min_rate, divider, fw_max: fw_max_bw_share);
284
285 esw_qos_sched_elem_config(node, max_rate: node->max_rate, bw_share, extack);
286}
287
288static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
289 struct mlx5_esw_sched_node *parent,
290 struct netlink_ext_ack *extack)
291{
292 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
293 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
294 struct mlx5_esw_sched_node *node;
295
296 list_for_each_entry(node, nodes, entry) {
297 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
298 continue;
299
300 esw_qos_update_sched_node_bw_share(node, divider, extack);
301
302 if (list_empty(head: &node->children))
303 continue;
304
305 esw_qos_normalize_min_rate(esw: node->esw, parent: node, extack);
306 }
307}
308
309static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
310 u32 min_rate, struct netlink_ext_ack *extack)
311{
312 struct mlx5_eswitch *esw = node->esw;
313
314 if (min_rate == node->min_rate)
315 return 0;
316
317 node->min_rate = min_rate;
318 esw_qos_normalize_min_rate(esw, parent: node->parent, extack);
319
320 return 0;
321}
322
323static int
324esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
325 u32 max_rate, u32 bw_share, u32 *tsar_ix)
326{
327 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
328 void *attr;
329
330 if (!mlx5_qos_element_type_supported(dev,
331 type: SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
332 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH) ||
333 !mlx5_qos_tsar_type_supported(dev,
334 type: TSAR_ELEMENT_TSAR_TYPE_DWRR,
335 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH))
336 return -EOPNOTSUPP;
337
338 MLX5_SET(scheduling_context, tsar_ctx, element_type,
339 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
340 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
341 parent_element_id);
342 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
343 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
344 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
345 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
346
347 return mlx5_create_scheduling_element_cmd(dev,
348 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
349 context: tsar_ctx,
350 element_id: tsar_ix);
351}
352
353static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
354 struct netlink_ext_ack *extack)
355{
356 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
357 struct mlx5_core_dev *dev = vport_node->esw->dev;
358 void *attr;
359
360 if (!mlx5_qos_element_type_supported(dev,
361 type: SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
362 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH))
363 return -EOPNOTSUPP;
364
365 MLX5_SET(scheduling_context, sched_ctx, element_type,
366 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
367 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
368 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
369 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix);
370 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate);
371
372 return esw_qos_node_create_sched_element(node: vport_node, ctx: sched_ctx, extack);
373}
374
375static struct mlx5_esw_sched_node *
376__esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
377 struct mlx5_esw_sched_node *parent)
378{
379 struct mlx5_esw_sched_node *node;
380
381 node = kzalloc(sizeof(*node), GFP_KERNEL);
382 if (!node)
383 return NULL;
384
385 node->esw = esw;
386 node->ix = tsar_ix;
387 node->type = type;
388 node->parent = parent;
389 INIT_LIST_HEAD(list: &node->children);
390 esw_qos_node_attach_to_parent(node);
391
392 return node;
393}
394
395static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
396{
397 list_del(entry: &node->entry);
398 kfree(objp: node);
399}
400
401static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
402{
403 esw_qos_node_destroy_sched_element(node, extack);
404 __esw_qos_free_node(node);
405}
406
407static struct mlx5_esw_sched_node *
408__esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
409 struct netlink_ext_ack *extack)
410{
411 struct mlx5_esw_sched_node *node;
412 u32 tsar_ix;
413 int err;
414
415 err = esw_qos_create_node_sched_elem(dev: esw->dev, parent_element_id: esw->qos.root_tsar_ix, max_rate: 0,
416 bw_share: 0, tsar_ix: &tsar_ix);
417 if (err) {
418 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
419 return ERR_PTR(error: err);
420 }
421
422 node = __esw_qos_alloc_node(esw, tsar_ix, type: SCHED_NODE_TYPE_VPORTS_TSAR, parent);
423 if (!node) {
424 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
425 err = -ENOMEM;
426 goto err_alloc_node;
427 }
428
429 esw_qos_normalize_min_rate(esw, NULL, extack);
430 trace_mlx5_esw_node_qos_create(dev: esw->dev, node, tsar_ix: node->ix);
431
432 return node;
433
434err_alloc_node:
435 if (mlx5_destroy_scheduling_element_cmd(dev: esw->dev,
436 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
437 element_id: tsar_ix))
438 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
439 return ERR_PTR(error: err);
440}
441
442static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
443static void esw_qos_put(struct mlx5_eswitch *esw);
444
445static struct mlx5_esw_sched_node *
446esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
447{
448 struct mlx5_esw_sched_node *node;
449 int err;
450
451 esw_assert_qos_lock_held(esw);
452 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
453 return ERR_PTR(error: -EOPNOTSUPP);
454
455 err = esw_qos_get(esw, extack);
456 if (err)
457 return ERR_PTR(error: err);
458
459 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
460 if (IS_ERR(ptr: node))
461 esw_qos_put(esw);
462
463 return node;
464}
465
466static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
467{
468 struct mlx5_eswitch *esw = node->esw;
469
470 trace_mlx5_esw_node_qos_destroy(dev: esw->dev, node, tsar_ix: node->ix);
471 esw_qos_destroy_node(node, extack);
472 esw_qos_normalize_min_rate(esw, NULL, extack);
473}
474
475static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
476{
477 struct mlx5_core_dev *dev = esw->dev;
478 int err;
479
480 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
481 return -EOPNOTSUPP;
482
483 err = esw_qos_create_node_sched_elem(dev: esw->dev, parent_element_id: 0, max_rate: 0, bw_share: 0,
484 tsar_ix: &esw->qos.root_tsar_ix);
485 if (err) {
486 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
487 return err;
488 }
489
490 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
491 esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack);
492 } else {
493 /* The eswitch doesn't support scheduling nodes.
494 * Create a software-only node0 using the root TSAR to attach vport QoS to.
495 */
496 if (!__esw_qos_alloc_node(esw,
497 tsar_ix: esw->qos.root_tsar_ix,
498 type: SCHED_NODE_TYPE_VPORTS_TSAR,
499 NULL))
500 esw->qos.node0 = ERR_PTR(error: -ENOMEM);
501 }
502 if (IS_ERR(ptr: esw->qos.node0)) {
503 err = PTR_ERR(ptr: esw->qos.node0);
504 esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err);
505 goto err_node0;
506 }
507 refcount_set(r: &esw->qos.refcnt, n: 1);
508
509 return 0;
510
511err_node0:
512 if (mlx5_destroy_scheduling_element_cmd(dev: esw->dev, hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
513 element_id: esw->qos.root_tsar_ix))
514 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
515
516 return err;
517}
518
519static void esw_qos_destroy(struct mlx5_eswitch *esw)
520{
521 int err;
522
523 if (esw->qos.node0->ix != esw->qos.root_tsar_ix)
524 __esw_qos_destroy_node(node: esw->qos.node0, NULL);
525 else
526 __esw_qos_free_node(node: esw->qos.node0);
527 esw->qos.node0 = NULL;
528
529 err = mlx5_destroy_scheduling_element_cmd(dev: esw->dev,
530 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
531 element_id: esw->qos.root_tsar_ix);
532 if (err)
533 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
534}
535
536static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
537{
538 int err = 0;
539
540 esw_assert_qos_lock_held(esw);
541 if (!refcount_inc_not_zero(r: &esw->qos.refcnt)) {
542 /* esw_qos_create() set refcount to 1 only on success.
543 * No need to decrement on failure.
544 */
545 err = esw_qos_create(esw, extack);
546 }
547
548 return err;
549}
550
551static void esw_qos_put(struct mlx5_eswitch *esw)
552{
553 esw_assert_qos_lock_held(esw);
554 if (refcount_dec_and_test(r: &esw->qos.refcnt))
555 esw_qos_destroy(esw);
556}
557
558static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
559{
560 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
561 struct mlx5_esw_sched_node *parent = vport_node->parent;
562
563 esw_qos_node_destroy_sched_element(node: vport_node, extack);
564
565 vport_node->bw_share = 0;
566 list_del_init(entry: &vport_node->entry);
567 esw_qos_normalize_min_rate(esw: parent->esw, parent, extack);
568
569 trace_mlx5_esw_vport_qos_destroy(dev: vport_node->esw->dev, vport);
570}
571
572static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
573 struct netlink_ext_ack *extack)
574{
575 int err;
576
577 esw_assert_qos_lock_held(esw: vport->dev->priv.eswitch);
578
579 esw_qos_node_set_parent(node: vport->qos.sched_node, parent);
580 err = esw_qos_vport_create_sched_element(vport_node: vport->qos.sched_node, extack);
581 if (err)
582 return err;
583
584 esw_qos_normalize_min_rate(esw: parent->esw, parent, extack);
585 trace_mlx5_esw_vport_qos_create(dev: vport->dev, vport,
586 bw_share: vport->qos.sched_node->max_rate,
587 max_rate: vport->qos.sched_node->bw_share);
588
589 return 0;
590}
591
592static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
593 struct mlx5_esw_sched_node *parent, u32 max_rate,
594 u32 min_rate, struct netlink_ext_ack *extack)
595{
596 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
597 struct mlx5_esw_sched_node *sched_node;
598 int err;
599
600 esw_assert_qos_lock_held(esw);
601 err = esw_qos_get(esw, extack);
602 if (err)
603 return err;
604
605 parent = parent ?: esw->qos.node0;
606 sched_node = __esw_qos_alloc_node(esw: parent->esw, tsar_ix: 0, type, parent);
607 if (!sched_node)
608 return -ENOMEM;
609
610 sched_node->max_rate = max_rate;
611 sched_node->min_rate = min_rate;
612 sched_node->vport = vport;
613 vport->qos.sched_node = sched_node;
614 err = esw_qos_vport_enable(vport, parent, extack);
615 if (err) {
616 __esw_qos_free_node(node: sched_node);
617 esw_qos_put(esw);
618 vport->qos.sched_node = NULL;
619 }
620
621 return err;
622}
623
624void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
625{
626 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
627 struct mlx5_esw_sched_node *parent;
628
629 lockdep_assert_held(&esw->state_lock);
630 esw_qos_lock(esw);
631 if (!vport->qos.sched_node)
632 goto unlock;
633
634 parent = vport->qos.sched_node->parent;
635 WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node");
636
637 esw_qos_vport_disable(vport, NULL);
638 mlx5_esw_qos_vport_qos_free(vport);
639 esw_qos_put(esw);
640unlock:
641 esw_qos_unlock(esw);
642}
643
644static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
645 struct netlink_ext_ack *extack)
646{
647 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
648
649 esw_assert_qos_lock_held(esw: vport->dev->priv.eswitch);
650
651 if (!vport_node)
652 return mlx5_esw_qos_vport_enable(vport, type: SCHED_NODE_TYPE_VPORT, NULL, max_rate, min_rate: 0,
653 extack);
654 else
655 return esw_qos_sched_elem_config(node: vport_node, max_rate, bw_share: vport_node->bw_share,
656 extack);
657}
658
659static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
660 struct netlink_ext_ack *extack)
661{
662 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
663
664 esw_assert_qos_lock_held(esw: vport->dev->priv.eswitch);
665
666 if (!vport_node)
667 return mlx5_esw_qos_vport_enable(vport, type: SCHED_NODE_TYPE_VPORT, NULL, max_rate: 0, min_rate,
668 extack);
669 else
670 return esw_qos_set_node_min_rate(node: vport_node, min_rate, extack);
671}
672
673int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
674{
675 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
676 int err;
677
678 esw_qos_lock(esw);
679 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
680 if (!err)
681 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
682 esw_qos_unlock(esw);
683 return err;
684}
685
686bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
687{
688 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
689 bool enabled;
690
691 esw_qos_lock(esw);
692 enabled = !!vport->qos.sched_node;
693 if (enabled) {
694 *max_rate = vport->qos.sched_node->max_rate;
695 *min_rate = vport->qos.sched_node->min_rate;
696 }
697 esw_qos_unlock(esw);
698 return enabled;
699}
700
701static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
702 struct netlink_ext_ack *extack)
703{
704 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
705 struct mlx5_esw_sched_node *curr_parent;
706 int err;
707
708 esw_assert_qos_lock_held(esw);
709 curr_parent = vport->qos.sched_node->parent;
710 parent = parent ?: esw->qos.node0;
711 if (curr_parent == parent)
712 return 0;
713
714 esw_qos_vport_disable(vport, extack);
715
716 err = esw_qos_vport_enable(vport, parent, extack);
717 if (err) {
718 if (esw_qos_vport_enable(vport, parent: curr_parent, NULL))
719 esw_warn(parent->esw->dev, "vport restore QoS failed (vport=%d)\n",
720 vport->vport);
721 }
722
723 return err;
724}
725
726static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
727{
728 struct ethtool_link_ksettings lksettings;
729 struct net_device *slave, *master;
730 u32 speed = SPEED_UNKNOWN;
731
732 /* Lock ensures a stable reference to master and slave netdevice
733 * while port speed of master is queried.
734 */
735 ASSERT_RTNL();
736
737 slave = mlx5_uplink_netdev_get(mdev);
738 if (!slave)
739 goto out;
740
741 master = netdev_master_upper_dev_get(dev: slave);
742 if (master && !__ethtool_get_link_ksettings(dev: master, link_ksettings: &lksettings))
743 speed = lksettings.base.speed;
744
745out:
746 return speed;
747}
748
749static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
750 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
751{
752 int err;
753
754 if (!mlx5_lag_is_active(dev: mdev))
755 goto skip_lag;
756
757 if (hold_rtnl_lock)
758 rtnl_lock();
759
760 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
761
762 if (hold_rtnl_lock)
763 rtnl_unlock();
764
765 if (*link_speed_max != (u32)SPEED_UNKNOWN)
766 return 0;
767
768skip_lag:
769 err = mlx5_port_max_linkspeed(mdev, speed: link_speed_max);
770 if (err)
771 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
772
773 return err;
774}
775
776static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
777 const char *name, u32 link_speed_max,
778 u64 value, struct netlink_ext_ack *extack)
779{
780 if (value > link_speed_max) {
781 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
782 name, value, link_speed_max);
783 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
784 return -EINVAL;
785 }
786
787 return 0;
788}
789
790int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
791{
792 struct mlx5_vport *vport;
793 u32 link_speed_max;
794 int err;
795
796 vport = mlx5_eswitch_get_vport(esw, vport_num);
797 if (IS_ERR(ptr: vport))
798 return PTR_ERR(ptr: vport);
799
800 if (rate_mbps) {
801 err = mlx5_esw_qos_max_link_speed_get(mdev: esw->dev, link_speed_max: &link_speed_max, hold_rtnl_lock: false, NULL);
802 if (err)
803 return err;
804
805 err = mlx5_esw_qos_link_speed_verify(mdev: esw->dev, name: "Police",
806 link_speed_max, value: rate_mbps, NULL);
807 if (err)
808 return err;
809 }
810
811 esw_qos_lock(esw);
812 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate: rate_mbps, NULL);
813 esw_qos_unlock(esw);
814
815 return err;
816}
817
818#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
819
820/* Converts bytes per second value passed in a pointer into megabits per
821 * second, rewriting last. If converted rate exceed link speed or is not a
822 * fraction of Mbps - returns error.
823 */
824static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
825 u64 *rate, struct netlink_ext_ack *extack)
826{
827 u32 link_speed_max, remainder;
828 u64 value;
829 int err;
830
831 value = div_u64_rem(dividend: *rate, MLX5_LINKSPEED_UNIT, remainder: &remainder);
832 if (remainder) {
833 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
834 name, *rate);
835 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
836 return -EINVAL;
837 }
838
839 err = mlx5_esw_qos_max_link_speed_get(mdev, link_speed_max: &link_speed_max, hold_rtnl_lock: true, extack);
840 if (err)
841 return err;
842
843 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
844 if (err)
845 return err;
846
847 *rate = value;
848 return 0;
849}
850
851int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
852{
853 if (esw->qos.domain)
854 return 0; /* Nothing to change. */
855
856 return esw_qos_domain_init(esw);
857}
858
859void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
860{
861 if (esw->qos.domain)
862 esw_qos_domain_release(esw);
863}
864
865/* Eswitch devlink rate API */
866
867int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
868 u64 tx_share, struct netlink_ext_ack *extack)
869{
870 struct mlx5_vport *vport = priv;
871 struct mlx5_eswitch *esw;
872 int err;
873
874 esw = vport->dev->priv.eswitch;
875 if (!mlx5_esw_allowed(esw))
876 return -EPERM;
877
878 err = esw_qos_devlink_rate_to_mbps(mdev: vport->dev, name: "tx_share", rate: &tx_share, extack);
879 if (err)
880 return err;
881
882 esw_qos_lock(esw);
883 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate: tx_share, extack);
884 esw_qos_unlock(esw);
885 return err;
886}
887
888int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
889 u64 tx_max, struct netlink_ext_ack *extack)
890{
891 struct mlx5_vport *vport = priv;
892 struct mlx5_eswitch *esw;
893 int err;
894
895 esw = vport->dev->priv.eswitch;
896 if (!mlx5_esw_allowed(esw))
897 return -EPERM;
898
899 err = esw_qos_devlink_rate_to_mbps(mdev: vport->dev, name: "tx_max", rate: &tx_max, extack);
900 if (err)
901 return err;
902
903 esw_qos_lock(esw);
904 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate: tx_max, extack);
905 esw_qos_unlock(esw);
906 return err;
907}
908
909int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
910 u64 tx_share, struct netlink_ext_ack *extack)
911{
912 struct mlx5_esw_sched_node *node = priv;
913 struct mlx5_eswitch *esw = node->esw;
914 int err;
915
916 err = esw_qos_devlink_rate_to_mbps(mdev: esw->dev, name: "tx_share", rate: &tx_share, extack);
917 if (err)
918 return err;
919
920 esw_qos_lock(esw);
921 err = esw_qos_set_node_min_rate(node, min_rate: tx_share, extack);
922 esw_qos_unlock(esw);
923 return err;
924}
925
926int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
927 u64 tx_max, struct netlink_ext_ack *extack)
928{
929 struct mlx5_esw_sched_node *node = priv;
930 struct mlx5_eswitch *esw = node->esw;
931 int err;
932
933 err = esw_qos_devlink_rate_to_mbps(mdev: esw->dev, name: "tx_max", rate: &tx_max, extack);
934 if (err)
935 return err;
936
937 esw_qos_lock(esw);
938 err = esw_qos_sched_elem_config(node, max_rate: tx_max, bw_share: node->bw_share, extack);
939 esw_qos_unlock(esw);
940 return err;
941}
942
943int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
944 struct netlink_ext_ack *extack)
945{
946 struct mlx5_esw_sched_node *node;
947 struct mlx5_eswitch *esw;
948 int err = 0;
949
950 esw = mlx5_devlink_eswitch_get(devlink: rate_node->devlink);
951 if (IS_ERR(ptr: esw))
952 return PTR_ERR(ptr: esw);
953
954 esw_qos_lock(esw);
955 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
956 NL_SET_ERR_MSG_MOD(extack,
957 "Rate node creation supported only in switchdev mode");
958 err = -EOPNOTSUPP;
959 goto unlock;
960 }
961
962 node = esw_qos_create_vports_sched_node(esw, extack);
963 if (IS_ERR(ptr: node)) {
964 err = PTR_ERR(ptr: node);
965 goto unlock;
966 }
967
968 *priv = node;
969unlock:
970 esw_qos_unlock(esw);
971 return err;
972}
973
974int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
975 struct netlink_ext_ack *extack)
976{
977 struct mlx5_esw_sched_node *node = priv;
978 struct mlx5_eswitch *esw = node->esw;
979
980 esw_qos_lock(esw);
981 __esw_qos_destroy_node(node, extack);
982 esw_qos_put(esw);
983 esw_qos_unlock(esw);
984 return 0;
985}
986
987int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
988 struct netlink_ext_ack *extack)
989{
990 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
991 int err = 0;
992
993 if (parent && parent->esw != esw) {
994 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
995 return -EOPNOTSUPP;
996 }
997
998 esw_qos_lock(esw);
999 if (!vport->qos.sched_node && parent)
1000 err = mlx5_esw_qos_vport_enable(vport, type: SCHED_NODE_TYPE_VPORT, parent, max_rate: 0, min_rate: 0, extack);
1001 else if (vport->qos.sched_node)
1002 err = esw_qos_vport_update_parent(vport, parent, extack);
1003 esw_qos_unlock(esw);
1004 return err;
1005}
1006
1007int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1008 struct devlink_rate *parent,
1009 void *priv, void *parent_priv,
1010 struct netlink_ext_ack *extack)
1011{
1012 struct mlx5_esw_sched_node *node;
1013 struct mlx5_vport *vport = priv;
1014
1015 if (!parent)
1016 return mlx5_esw_qos_vport_update_parent(vport, NULL, extack);
1017
1018 node = parent_priv;
1019 return mlx5_esw_qos_vport_update_parent(vport, parent: node, extack);
1020}
1021
1022static int
1023mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1024 struct mlx5_esw_sched_node *parent,
1025 struct netlink_ext_ack *extack)
1026{
1027 u8 new_level, max_level;
1028
1029 if (parent && parent->esw != node->esw) {
1030 NL_SET_ERR_MSG_MOD(extack,
1031 "Cannot assign node to another E-Switch");
1032 return -EOPNOTSUPP;
1033 }
1034
1035 if (!list_empty(head: &node->children)) {
1036 NL_SET_ERR_MSG_MOD(extack,
1037 "Cannot reassign a node that contains rate objects");
1038 return -EOPNOTSUPP;
1039 }
1040
1041 new_level = parent ? parent->level + 1 : 2;
1042 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1043 if (new_level > max_level) {
1044 NL_SET_ERR_MSG_MOD(extack,
1045 "Node hierarchy depth exceeds the maximum supported level");
1046 return -EOPNOTSUPP;
1047 }
1048
1049 return 0;
1050}
1051
1052static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
1053 struct mlx5_esw_sched_node *parent,
1054 struct netlink_ext_ack *extack)
1055{
1056 struct mlx5_esw_sched_node *curr_parent = node->parent;
1057 struct mlx5_eswitch *esw = node->esw;
1058 u32 parent_ix;
1059 int err;
1060
1061 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
1062 mlx5_destroy_scheduling_element_cmd(dev: esw->dev,
1063 hierarchy: SCHEDULING_HIERARCHY_E_SWITCH,
1064 element_id: node->ix);
1065 err = esw_qos_create_node_sched_elem(dev: esw->dev, parent_element_id: parent_ix,
1066 max_rate: node->max_rate, bw_share: 0, tsar_ix: &node->ix);
1067 if (err) {
1068 NL_SET_ERR_MSG_MOD(extack,
1069 "Failed to create a node under the new hierarchy.");
1070 if (esw_qos_create_node_sched_elem(dev: esw->dev, parent_element_id: curr_parent->ix,
1071 max_rate: node->max_rate,
1072 bw_share: node->bw_share,
1073 tsar_ix: &node->ix))
1074 esw_warn(esw->dev, "Node restore QoS failed\n");
1075
1076 return err;
1077 }
1078 esw_qos_node_set_parent(node, parent);
1079
1080 return 0;
1081}
1082
1083static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
1084 struct mlx5_esw_sched_node *parent,
1085 struct netlink_ext_ack *extack)
1086{
1087 struct mlx5_esw_sched_node *curr_parent;
1088 struct mlx5_eswitch *esw = node->esw;
1089 int err;
1090
1091 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
1092 if (err)
1093 return err;
1094
1095 esw_qos_lock(esw);
1096 curr_parent = node->parent;
1097 err = esw_qos_vports_node_update_parent(node, parent, extack);
1098 if (err)
1099 goto out;
1100
1101 esw_qos_normalize_min_rate(esw, parent: curr_parent, extack);
1102 esw_qos_normalize_min_rate(esw, parent, extack);
1103
1104out:
1105 esw_qos_unlock(esw);
1106
1107 return err;
1108}
1109
1110int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
1111 struct devlink_rate *parent,
1112 void *priv, void *parent_priv,
1113 struct netlink_ext_ack *extack)
1114{
1115 struct mlx5_esw_sched_node *node = priv, *parent_node;
1116
1117 if (!parent)
1118 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
1119
1120 parent_node = parent_priv;
1121 return mlx5_esw_qos_node_update_parent(node, parent: parent_node, extack);
1122}
1123

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c