1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
---|---|
2 | /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ |
3 | |
4 | #include "eswitch.h" |
5 | #include "lib/mlx5.h" |
6 | #include "esw/qos.h" |
7 | #include "en/port.h" |
8 | #define CREATE_TRACE_POINTS |
9 | #include "diag/qos_tracepoint.h" |
10 | |
11 | /* Minimum supported BW share value by the HW is 1 Mbit/sec */ |
12 | #define MLX5_MIN_BW_SHARE 1 |
13 | |
14 | #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ |
15 | min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) |
16 | |
17 | struct mlx5_esw_rate_group { |
18 | u32 tsar_ix; |
19 | u32 max_rate; |
20 | u32 min_rate; |
21 | u32 bw_share; |
22 | struct list_head list; |
23 | }; |
24 | |
25 | static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, |
26 | u32 tsar_ix, u32 max_rate, u32 bw_share) |
27 | { |
28 | u32 bitmask = 0; |
29 | |
30 | if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) |
31 | return -EOPNOTSUPP; |
32 | |
33 | MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); |
34 | MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); |
35 | bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; |
36 | bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; |
37 | |
38 | return mlx5_modify_scheduling_element_cmd(dev, |
39 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
40 | context: sched_ctx, |
41 | element_id: tsar_ix, |
42 | modify_bitmask: bitmask); |
43 | } |
44 | |
45 | static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, |
46 | u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) |
47 | { |
48 | u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
49 | struct mlx5_core_dev *dev = esw->dev; |
50 | int err; |
51 | |
52 | err = esw_qos_tsar_config(dev, sched_ctx, |
53 | tsar_ix: group->tsar_ix, |
54 | max_rate, bw_share); |
55 | if (err) |
56 | NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); |
57 | |
58 | trace_mlx5_esw_group_qos_config(dev, group, tsar_ix: group->tsar_ix, bw_share, max_rate); |
59 | |
60 | return err; |
61 | } |
62 | |
63 | static int esw_qos_vport_config(struct mlx5_eswitch *esw, |
64 | struct mlx5_vport *vport, |
65 | u32 max_rate, u32 bw_share, |
66 | struct netlink_ext_ack *extack) |
67 | { |
68 | u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
69 | struct mlx5_core_dev *dev = esw->dev; |
70 | int err; |
71 | |
72 | if (!vport->qos.enabled) |
73 | return -EIO; |
74 | |
75 | err = esw_qos_tsar_config(dev, sched_ctx, tsar_ix: vport->qos.esw_tsar_ix, |
76 | max_rate, bw_share); |
77 | if (err) { |
78 | esw_warn(esw->dev, |
79 | "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", |
80 | vport->vport, err); |
81 | NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); |
82 | return err; |
83 | } |
84 | |
85 | trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); |
86 | |
87 | return 0; |
88 | } |
89 | |
90 | static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, |
91 | struct mlx5_esw_rate_group *group, |
92 | bool group_level) |
93 | { |
94 | u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); |
95 | struct mlx5_vport *evport; |
96 | u32 max_guarantee = 0; |
97 | unsigned long i; |
98 | |
99 | if (group_level) { |
100 | struct mlx5_esw_rate_group *group; |
101 | |
102 | list_for_each_entry(group, &esw->qos.groups, list) { |
103 | if (group->min_rate < max_guarantee) |
104 | continue; |
105 | max_guarantee = group->min_rate; |
106 | } |
107 | } else { |
108 | mlx5_esw_for_each_vport(esw, i, evport) { |
109 | if (!evport->enabled || !evport->qos.enabled || |
110 | evport->qos.group != group || evport->qos.min_rate < max_guarantee) |
111 | continue; |
112 | max_guarantee = evport->qos.min_rate; |
113 | } |
114 | } |
115 | |
116 | if (max_guarantee) |
117 | return max_t(u32, max_guarantee / fw_max_bw_share, 1); |
118 | |
119 | /* If vports min rate divider is 0 but their group has bw_share configured, then |
120 | * need to set bw_share for vports to minimal value. |
121 | */ |
122 | if (!group_level && !max_guarantee && group && group->bw_share) |
123 | return 1; |
124 | return 0; |
125 | } |
126 | |
127 | static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) |
128 | { |
129 | if (divider) |
130 | return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); |
131 | |
132 | return 0; |
133 | } |
134 | |
135 | static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, |
136 | struct mlx5_esw_rate_group *group, |
137 | struct netlink_ext_ack *extack) |
138 | { |
139 | u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); |
140 | u32 divider = esw_qos_calculate_min_rate_divider(esw, group, group_level: false); |
141 | struct mlx5_vport *evport; |
142 | unsigned long i; |
143 | u32 bw_share; |
144 | int err; |
145 | |
146 | mlx5_esw_for_each_vport(esw, i, evport) { |
147 | if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) |
148 | continue; |
149 | bw_share = esw_qos_calc_bw_share(min_rate: evport->qos.min_rate, divider, fw_max: fw_max_bw_share); |
150 | |
151 | if (bw_share == evport->qos.bw_share) |
152 | continue; |
153 | |
154 | err = esw_qos_vport_config(esw, vport: evport, max_rate: evport->qos.max_rate, bw_share, extack); |
155 | if (err) |
156 | return err; |
157 | |
158 | evport->qos.bw_share = bw_share; |
159 | } |
160 | |
161 | return 0; |
162 | } |
163 | |
164 | static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, |
165 | struct netlink_ext_ack *extack) |
166 | { |
167 | u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); |
168 | struct mlx5_esw_rate_group *group; |
169 | u32 bw_share; |
170 | int err; |
171 | |
172 | list_for_each_entry(group, &esw->qos.groups, list) { |
173 | bw_share = esw_qos_calc_bw_share(min_rate: group->min_rate, divider, fw_max: fw_max_bw_share); |
174 | |
175 | if (bw_share == group->bw_share) |
176 | continue; |
177 | |
178 | err = esw_qos_group_config(esw, group, max_rate: group->max_rate, bw_share, extack); |
179 | if (err) |
180 | return err; |
181 | |
182 | group->bw_share = bw_share; |
183 | |
184 | /* All the group's vports need to be set with default bw_share |
185 | * to enable them with QOS |
186 | */ |
187 | err = esw_qos_normalize_vports_min_rate(esw, group, extack); |
188 | |
189 | if (err) |
190 | return err; |
191 | } |
192 | |
193 | return 0; |
194 | } |
195 | |
196 | static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, |
197 | u32 min_rate, struct netlink_ext_ack *extack) |
198 | { |
199 | u32 fw_max_bw_share, previous_min_rate; |
200 | bool min_rate_supported; |
201 | int err; |
202 | |
203 | lockdep_assert_held(&esw->state_lock); |
204 | fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); |
205 | min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && |
206 | fw_max_bw_share >= MLX5_MIN_BW_SHARE; |
207 | if (min_rate && !min_rate_supported) |
208 | return -EOPNOTSUPP; |
209 | if (min_rate == evport->qos.min_rate) |
210 | return 0; |
211 | |
212 | previous_min_rate = evport->qos.min_rate; |
213 | evport->qos.min_rate = min_rate; |
214 | err = esw_qos_normalize_vports_min_rate(esw, group: evport->qos.group, extack); |
215 | if (err) |
216 | evport->qos.min_rate = previous_min_rate; |
217 | |
218 | return err; |
219 | } |
220 | |
221 | static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, |
222 | u32 max_rate, struct netlink_ext_ack *extack) |
223 | { |
224 | u32 act_max_rate = max_rate; |
225 | bool max_rate_supported; |
226 | int err; |
227 | |
228 | lockdep_assert_held(&esw->state_lock); |
229 | max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); |
230 | |
231 | if (max_rate && !max_rate_supported) |
232 | return -EOPNOTSUPP; |
233 | if (max_rate == evport->qos.max_rate) |
234 | return 0; |
235 | |
236 | /* If parent group has rate limit need to set to group |
237 | * value when new max rate is 0. |
238 | */ |
239 | if (evport->qos.group && !max_rate) |
240 | act_max_rate = evport->qos.group->max_rate; |
241 | |
242 | err = esw_qos_vport_config(esw, vport: evport, max_rate: act_max_rate, bw_share: evport->qos.bw_share, extack); |
243 | |
244 | if (!err) |
245 | evport->qos.max_rate = max_rate; |
246 | |
247 | return err; |
248 | } |
249 | |
250 | static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, |
251 | u32 min_rate, struct netlink_ext_ack *extack) |
252 | { |
253 | u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); |
254 | struct mlx5_core_dev *dev = esw->dev; |
255 | u32 previous_min_rate, divider; |
256 | int err; |
257 | |
258 | if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) |
259 | return -EOPNOTSUPP; |
260 | |
261 | if (min_rate == group->min_rate) |
262 | return 0; |
263 | |
264 | previous_min_rate = group->min_rate; |
265 | group->min_rate = min_rate; |
266 | divider = esw_qos_calculate_min_rate_divider(esw, group, group_level: true); |
267 | err = esw_qos_normalize_groups_min_rate(esw, divider, extack); |
268 | if (err) { |
269 | group->min_rate = previous_min_rate; |
270 | NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); |
271 | |
272 | /* Attempt restoring previous configuration */ |
273 | divider = esw_qos_calculate_min_rate_divider(esw, group, group_level: true); |
274 | if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) |
275 | NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); |
276 | } |
277 | |
278 | return err; |
279 | } |
280 | |
281 | static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, |
282 | struct mlx5_esw_rate_group *group, |
283 | u32 max_rate, struct netlink_ext_ack *extack) |
284 | { |
285 | struct mlx5_vport *vport; |
286 | unsigned long i; |
287 | int err; |
288 | |
289 | if (group->max_rate == max_rate) |
290 | return 0; |
291 | |
292 | err = esw_qos_group_config(esw, group, max_rate, bw_share: group->bw_share, extack); |
293 | if (err) |
294 | return err; |
295 | |
296 | group->max_rate = max_rate; |
297 | |
298 | /* Any unlimited vports in the group should be set |
299 | * with the value of the group. |
300 | */ |
301 | mlx5_esw_for_each_vport(esw, i, vport) { |
302 | if (!vport->enabled || !vport->qos.enabled || |
303 | vport->qos.group != group || vport->qos.max_rate) |
304 | continue; |
305 | |
306 | err = esw_qos_vport_config(esw, vport, max_rate, bw_share: vport->qos.bw_share, extack); |
307 | if (err) |
308 | NL_SET_ERR_MSG_MOD(extack, |
309 | "E-Switch vport implicit rate limit setting failed"); |
310 | } |
311 | |
312 | return err; |
313 | } |
314 | |
315 | static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, |
316 | struct mlx5_vport *vport, |
317 | u32 max_rate, u32 bw_share) |
318 | { |
319 | u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
320 | struct mlx5_esw_rate_group *group = vport->qos.group; |
321 | struct mlx5_core_dev *dev = esw->dev; |
322 | u32 parent_tsar_ix; |
323 | void *vport_elem; |
324 | int err; |
325 | |
326 | parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; |
327 | MLX5_SET(scheduling_context, sched_ctx, element_type, |
328 | SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); |
329 | vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); |
330 | MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); |
331 | MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); |
332 | MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); |
333 | MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); |
334 | |
335 | err = mlx5_create_scheduling_element_cmd(dev, |
336 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
337 | context: sched_ctx, |
338 | element_id: &vport->qos.esw_tsar_ix); |
339 | if (err) { |
340 | esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", |
341 | vport->vport, err); |
342 | return err; |
343 | } |
344 | |
345 | return 0; |
346 | } |
347 | |
348 | static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, |
349 | struct mlx5_vport *vport, |
350 | struct mlx5_esw_rate_group *curr_group, |
351 | struct mlx5_esw_rate_group *new_group, |
352 | struct netlink_ext_ack *extack) |
353 | { |
354 | u32 max_rate; |
355 | int err; |
356 | |
357 | err = mlx5_destroy_scheduling_element_cmd(dev: esw->dev, |
358 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
359 | element_id: vport->qos.esw_tsar_ix); |
360 | if (err) { |
361 | NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); |
362 | return err; |
363 | } |
364 | |
365 | vport->qos.group = new_group; |
366 | max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; |
367 | |
368 | /* If vport is unlimited, we set the group's value. |
369 | * Therefore, if the group is limited it will apply to |
370 | * the vport as well and if not, vport will remain unlimited. |
371 | */ |
372 | err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share: vport->qos.bw_share); |
373 | if (err) { |
374 | NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); |
375 | goto err_sched; |
376 | } |
377 | |
378 | return 0; |
379 | |
380 | err_sched: |
381 | vport->qos.group = curr_group; |
382 | max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; |
383 | if (esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share: vport->qos.bw_share)) |
384 | esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", |
385 | vport->vport); |
386 | |
387 | return err; |
388 | } |
389 | |
390 | static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, |
391 | struct mlx5_vport *vport, |
392 | struct mlx5_esw_rate_group *group, |
393 | struct netlink_ext_ack *extack) |
394 | { |
395 | struct mlx5_esw_rate_group *new_group, *curr_group; |
396 | int err; |
397 | |
398 | if (!vport->enabled) |
399 | return -EINVAL; |
400 | |
401 | curr_group = vport->qos.group; |
402 | new_group = group ?: esw->qos.group0; |
403 | if (curr_group == new_group) |
404 | return 0; |
405 | |
406 | err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); |
407 | if (err) |
408 | return err; |
409 | |
410 | /* Recalculate bw share weights of old and new groups */ |
411 | if (vport->qos.bw_share || new_group->bw_share) { |
412 | esw_qos_normalize_vports_min_rate(esw, group: curr_group, extack); |
413 | esw_qos_normalize_vports_min_rate(esw, group: new_group, extack); |
414 | } |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | static struct mlx5_esw_rate_group * |
420 | __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) |
421 | { |
422 | u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
423 | struct mlx5_esw_rate_group *group; |
424 | u32 divider; |
425 | int err; |
426 | |
427 | group = kzalloc(size: sizeof(*group), GFP_KERNEL); |
428 | if (!group) |
429 | return ERR_PTR(error: -ENOMEM); |
430 | |
431 | MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, |
432 | esw->qos.root_tsar_ix); |
433 | err = mlx5_create_scheduling_element_cmd(dev: esw->dev, |
434 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
435 | context: tsar_ctx, |
436 | element_id: &group->tsar_ix); |
437 | if (err) { |
438 | NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); |
439 | goto err_sched_elem; |
440 | } |
441 | |
442 | list_add_tail(new: &group->list, head: &esw->qos.groups); |
443 | |
444 | divider = esw_qos_calculate_min_rate_divider(esw, group, group_level: true); |
445 | if (divider) { |
446 | err = esw_qos_normalize_groups_min_rate(esw, divider, extack); |
447 | if (err) { |
448 | NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); |
449 | goto err_min_rate; |
450 | } |
451 | } |
452 | trace_mlx5_esw_group_qos_create(dev: esw->dev, group, tsar_ix: group->tsar_ix); |
453 | |
454 | return group; |
455 | |
456 | err_min_rate: |
457 | list_del(entry: &group->list); |
458 | if (mlx5_destroy_scheduling_element_cmd(dev: esw->dev, |
459 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
460 | element_id: group->tsar_ix)) |
461 | NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); |
462 | err_sched_elem: |
463 | kfree(objp: group); |
464 | return ERR_PTR(error: err); |
465 | } |
466 | |
467 | static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); |
468 | static void esw_qos_put(struct mlx5_eswitch *esw); |
469 | |
470 | static struct mlx5_esw_rate_group * |
471 | esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) |
472 | { |
473 | struct mlx5_esw_rate_group *group; |
474 | int err; |
475 | |
476 | if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) |
477 | return ERR_PTR(error: -EOPNOTSUPP); |
478 | |
479 | err = esw_qos_get(esw, extack); |
480 | if (err) |
481 | return ERR_PTR(error: err); |
482 | |
483 | group = __esw_qos_create_rate_group(esw, extack); |
484 | if (IS_ERR(ptr: group)) |
485 | esw_qos_put(esw); |
486 | |
487 | return group; |
488 | } |
489 | |
490 | static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, |
491 | struct mlx5_esw_rate_group *group, |
492 | struct netlink_ext_ack *extack) |
493 | { |
494 | u32 divider; |
495 | int err; |
496 | |
497 | list_del(entry: &group->list); |
498 | |
499 | divider = esw_qos_calculate_min_rate_divider(esw, NULL, group_level: true); |
500 | err = esw_qos_normalize_groups_min_rate(esw, divider, extack); |
501 | if (err) |
502 | NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); |
503 | |
504 | err = mlx5_destroy_scheduling_element_cmd(dev: esw->dev, |
505 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
506 | element_id: group->tsar_ix); |
507 | if (err) |
508 | NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); |
509 | |
510 | trace_mlx5_esw_group_qos_destroy(dev: esw->dev, group, tsar_ix: group->tsar_ix); |
511 | |
512 | kfree(objp: group); |
513 | |
514 | return err; |
515 | } |
516 | |
517 | static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, |
518 | struct mlx5_esw_rate_group *group, |
519 | struct netlink_ext_ack *extack) |
520 | { |
521 | int err; |
522 | |
523 | err = __esw_qos_destroy_rate_group(esw, group, extack); |
524 | esw_qos_put(esw); |
525 | |
526 | return err; |
527 | } |
528 | |
529 | static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) |
530 | { |
531 | switch (type) { |
532 | case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: |
533 | return MLX5_CAP_QOS(dev, esw_element_type) & |
534 | ELEMENT_TYPE_CAP_MASK_TASR; |
535 | case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: |
536 | return MLX5_CAP_QOS(dev, esw_element_type) & |
537 | ELEMENT_TYPE_CAP_MASK_VPORT; |
538 | case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: |
539 | return MLX5_CAP_QOS(dev, esw_element_type) & |
540 | ELEMENT_TYPE_CAP_MASK_VPORT_TC; |
541 | case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: |
542 | return MLX5_CAP_QOS(dev, esw_element_type) & |
543 | ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; |
544 | } |
545 | return false; |
546 | } |
547 | |
548 | static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) |
549 | { |
550 | u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
551 | struct mlx5_core_dev *dev = esw->dev; |
552 | __be32 *attr; |
553 | int err; |
554 | |
555 | if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) |
556 | return -EOPNOTSUPP; |
557 | |
558 | if (!esw_qos_element_type_supported(dev, type: SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) |
559 | return -EOPNOTSUPP; |
560 | |
561 | MLX5_SET(scheduling_context, tsar_ctx, element_type, |
562 | SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); |
563 | |
564 | attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); |
565 | *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); |
566 | |
567 | err = mlx5_create_scheduling_element_cmd(dev, |
568 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
569 | context: tsar_ctx, |
570 | element_id: &esw->qos.root_tsar_ix); |
571 | if (err) { |
572 | esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); |
573 | return err; |
574 | } |
575 | |
576 | INIT_LIST_HEAD(list: &esw->qos.groups); |
577 | if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { |
578 | esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); |
579 | if (IS_ERR(ptr: esw->qos.group0)) { |
580 | esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", |
581 | PTR_ERR(esw->qos.group0)); |
582 | err = PTR_ERR(ptr: esw->qos.group0); |
583 | goto err_group0; |
584 | } |
585 | } |
586 | refcount_set(r: &esw->qos.refcnt, n: 1); |
587 | |
588 | return 0; |
589 | |
590 | err_group0: |
591 | if (mlx5_destroy_scheduling_element_cmd(dev: esw->dev, hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
592 | element_id: esw->qos.root_tsar_ix)) |
593 | esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); |
594 | |
595 | return err; |
596 | } |
597 | |
598 | static void esw_qos_destroy(struct mlx5_eswitch *esw) |
599 | { |
600 | int err; |
601 | |
602 | if (esw->qos.group0) |
603 | __esw_qos_destroy_rate_group(esw, group: esw->qos.group0, NULL); |
604 | |
605 | err = mlx5_destroy_scheduling_element_cmd(dev: esw->dev, |
606 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
607 | element_id: esw->qos.root_tsar_ix); |
608 | if (err) |
609 | esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); |
610 | } |
611 | |
612 | static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) |
613 | { |
614 | int err = 0; |
615 | |
616 | lockdep_assert_held(&esw->state_lock); |
617 | |
618 | if (!refcount_inc_not_zero(r: &esw->qos.refcnt)) { |
619 | /* esw_qos_create() set refcount to 1 only on success. |
620 | * No need to decrement on failure. |
621 | */ |
622 | err = esw_qos_create(esw, extack); |
623 | } |
624 | |
625 | return err; |
626 | } |
627 | |
628 | static void esw_qos_put(struct mlx5_eswitch *esw) |
629 | { |
630 | lockdep_assert_held(&esw->state_lock); |
631 | if (refcount_dec_and_test(r: &esw->qos.refcnt)) |
632 | esw_qos_destroy(esw); |
633 | } |
634 | |
635 | static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, |
636 | u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) |
637 | { |
638 | int err; |
639 | |
640 | lockdep_assert_held(&esw->state_lock); |
641 | if (vport->qos.enabled) |
642 | return 0; |
643 | |
644 | err = esw_qos_get(esw, extack); |
645 | if (err) |
646 | return err; |
647 | |
648 | vport->qos.group = esw->qos.group0; |
649 | |
650 | err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); |
651 | if (err) |
652 | goto err_out; |
653 | |
654 | vport->qos.enabled = true; |
655 | trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); |
656 | |
657 | return 0; |
658 | |
659 | err_out: |
660 | esw_qos_put(esw); |
661 | |
662 | return err; |
663 | } |
664 | |
665 | void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) |
666 | { |
667 | int err; |
668 | |
669 | lockdep_assert_held(&esw->state_lock); |
670 | if (!vport->qos.enabled) |
671 | return; |
672 | WARN(vport->qos.group && vport->qos.group != esw->qos.group0, |
673 | "Disabling QoS on port before detaching it from group"); |
674 | |
675 | err = mlx5_destroy_scheduling_element_cmd(dev: esw->dev, |
676 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
677 | element_id: vport->qos.esw_tsar_ix); |
678 | if (err) |
679 | esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", |
680 | vport->vport, err); |
681 | |
682 | memset(&vport->qos, 0, sizeof(vport->qos)); |
683 | trace_mlx5_esw_vport_qos_destroy(vport); |
684 | |
685 | esw_qos_put(esw); |
686 | } |
687 | |
688 | int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, |
689 | u32 max_rate, u32 min_rate) |
690 | { |
691 | int err; |
692 | |
693 | lockdep_assert_held(&esw->state_lock); |
694 | err = esw_qos_vport_enable(esw, vport, max_rate: 0, bw_share: 0, NULL); |
695 | if (err) |
696 | return err; |
697 | |
698 | err = esw_qos_set_vport_min_rate(esw, evport: vport, min_rate, NULL); |
699 | if (!err) |
700 | err = esw_qos_set_vport_max_rate(esw, evport: vport, max_rate, NULL); |
701 | |
702 | return err; |
703 | } |
704 | |
705 | static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) |
706 | { |
707 | struct ethtool_link_ksettings lksettings; |
708 | struct net_device *slave, *master; |
709 | u32 speed = SPEED_UNKNOWN; |
710 | |
711 | /* Lock ensures a stable reference to master and slave netdevice |
712 | * while port speed of master is queried. |
713 | */ |
714 | ASSERT_RTNL(); |
715 | |
716 | slave = mlx5_uplink_netdev_get(mdev); |
717 | if (!slave) |
718 | goto out; |
719 | |
720 | master = netdev_master_upper_dev_get(dev: slave); |
721 | if (master && !__ethtool_get_link_ksettings(dev: master, link_ksettings: &lksettings)) |
722 | speed = lksettings.base.speed; |
723 | |
724 | out: |
725 | return speed; |
726 | } |
727 | |
728 | static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, |
729 | bool hold_rtnl_lock, struct netlink_ext_ack *extack) |
730 | { |
731 | int err; |
732 | |
733 | if (!mlx5_lag_is_active(dev: mdev)) |
734 | goto skip_lag; |
735 | |
736 | if (hold_rtnl_lock) |
737 | rtnl_lock(); |
738 | |
739 | *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); |
740 | |
741 | if (hold_rtnl_lock) |
742 | rtnl_unlock(); |
743 | |
744 | if (*link_speed_max != (u32)SPEED_UNKNOWN) |
745 | return 0; |
746 | |
747 | skip_lag: |
748 | err = mlx5_port_max_linkspeed(mdev, speed: link_speed_max); |
749 | if (err) |
750 | NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); |
751 | |
752 | return err; |
753 | } |
754 | |
755 | static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev, |
756 | const char *name, u32 link_speed_max, |
757 | u64 value, struct netlink_ext_ack *extack) |
758 | { |
759 | if (value > link_speed_max) { |
760 | pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", |
761 | name, value, link_speed_max); |
762 | NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); |
763 | return -EINVAL; |
764 | } |
765 | |
766 | return 0; |
767 | } |
768 | |
769 | int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) |
770 | { |
771 | u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; |
772 | struct mlx5_vport *vport; |
773 | u32 link_speed_max; |
774 | u32 bitmask; |
775 | int err; |
776 | |
777 | vport = mlx5_eswitch_get_vport(esw, vport_num); |
778 | if (IS_ERR(ptr: vport)) |
779 | return PTR_ERR(ptr: vport); |
780 | |
781 | if (rate_mbps) { |
782 | err = mlx5_esw_qos_max_link_speed_get(mdev: esw->dev, link_speed_max: &link_speed_max, hold_rtnl_lock: false, NULL); |
783 | if (err) |
784 | return err; |
785 | |
786 | err = mlx5_esw_qos_link_speed_verify(mdev: esw->dev, name: "Police", |
787 | link_speed_max, value: rate_mbps, NULL); |
788 | if (err) |
789 | return err; |
790 | } |
791 | |
792 | mutex_lock(&esw->state_lock); |
793 | if (!vport->qos.enabled) { |
794 | /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ |
795 | err = esw_qos_vport_enable(esw, vport, max_rate: rate_mbps, bw_share: vport->qos.bw_share, NULL); |
796 | } else { |
797 | MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); |
798 | |
799 | bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; |
800 | err = mlx5_modify_scheduling_element_cmd(dev: esw->dev, |
801 | hierarchy: SCHEDULING_HIERARCHY_E_SWITCH, |
802 | context: ctx, |
803 | element_id: vport->qos.esw_tsar_ix, |
804 | modify_bitmask: bitmask); |
805 | } |
806 | mutex_unlock(lock: &esw->state_lock); |
807 | |
808 | return err; |
809 | } |
810 | |
811 | #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ |
812 | |
813 | /* Converts bytes per second value passed in a pointer into megabits per |
814 | * second, rewriting last. If converted rate exceed link speed or is not a |
815 | * fraction of Mbps - returns error. |
816 | */ |
817 | static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, |
818 | u64 *rate, struct netlink_ext_ack *extack) |
819 | { |
820 | u32 link_speed_max, remainder; |
821 | u64 value; |
822 | int err; |
823 | |
824 | value = div_u64_rem(dividend: *rate, MLX5_LINKSPEED_UNIT, remainder: &remainder); |
825 | if (remainder) { |
826 | pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", |
827 | name, *rate); |
828 | NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); |
829 | return -EINVAL; |
830 | } |
831 | |
832 | err = mlx5_esw_qos_max_link_speed_get(mdev, link_speed_max: &link_speed_max, hold_rtnl_lock: true, extack); |
833 | if (err) |
834 | return err; |
835 | |
836 | err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack); |
837 | if (err) |
838 | return err; |
839 | |
840 | *rate = value; |
841 | return 0; |
842 | } |
843 | |
844 | /* Eswitch devlink rate API */ |
845 | |
846 | int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, |
847 | u64 tx_share, struct netlink_ext_ack *extack) |
848 | { |
849 | struct mlx5_vport *vport = priv; |
850 | struct mlx5_eswitch *esw; |
851 | int err; |
852 | |
853 | esw = vport->dev->priv.eswitch; |
854 | if (!mlx5_esw_allowed(esw)) |
855 | return -EPERM; |
856 | |
857 | err = esw_qos_devlink_rate_to_mbps(mdev: vport->dev, name: "tx_share", rate: &tx_share, extack); |
858 | if (err) |
859 | return err; |
860 | |
861 | mutex_lock(&esw->state_lock); |
862 | err = esw_qos_vport_enable(esw, vport, max_rate: 0, bw_share: 0, extack); |
863 | if (err) |
864 | goto unlock; |
865 | |
866 | err = esw_qos_set_vport_min_rate(esw, evport: vport, min_rate: tx_share, extack); |
867 | unlock: |
868 | mutex_unlock(lock: &esw->state_lock); |
869 | return err; |
870 | } |
871 | |
872 | int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, |
873 | u64 tx_max, struct netlink_ext_ack *extack) |
874 | { |
875 | struct mlx5_vport *vport = priv; |
876 | struct mlx5_eswitch *esw; |
877 | int err; |
878 | |
879 | esw = vport->dev->priv.eswitch; |
880 | if (!mlx5_esw_allowed(esw)) |
881 | return -EPERM; |
882 | |
883 | err = esw_qos_devlink_rate_to_mbps(mdev: vport->dev, name: "tx_max", rate: &tx_max, extack); |
884 | if (err) |
885 | return err; |
886 | |
887 | mutex_lock(&esw->state_lock); |
888 | err = esw_qos_vport_enable(esw, vport, max_rate: 0, bw_share: 0, extack); |
889 | if (err) |
890 | goto unlock; |
891 | |
892 | err = esw_qos_set_vport_max_rate(esw, evport: vport, max_rate: tx_max, extack); |
893 | unlock: |
894 | mutex_unlock(lock: &esw->state_lock); |
895 | return err; |
896 | } |
897 | |
898 | int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, |
899 | u64 tx_share, struct netlink_ext_ack *extack) |
900 | { |
901 | struct mlx5_core_dev *dev = devlink_priv(devlink: rate_node->devlink); |
902 | struct mlx5_eswitch *esw = dev->priv.eswitch; |
903 | struct mlx5_esw_rate_group *group = priv; |
904 | int err; |
905 | |
906 | err = esw_qos_devlink_rate_to_mbps(mdev: dev, name: "tx_share", rate: &tx_share, extack); |
907 | if (err) |
908 | return err; |
909 | |
910 | mutex_lock(&esw->state_lock); |
911 | err = esw_qos_set_group_min_rate(esw, group, min_rate: tx_share, extack); |
912 | mutex_unlock(lock: &esw->state_lock); |
913 | return err; |
914 | } |
915 | |
916 | int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, |
917 | u64 tx_max, struct netlink_ext_ack *extack) |
918 | { |
919 | struct mlx5_core_dev *dev = devlink_priv(devlink: rate_node->devlink); |
920 | struct mlx5_eswitch *esw = dev->priv.eswitch; |
921 | struct mlx5_esw_rate_group *group = priv; |
922 | int err; |
923 | |
924 | err = esw_qos_devlink_rate_to_mbps(mdev: dev, name: "tx_max", rate: &tx_max, extack); |
925 | if (err) |
926 | return err; |
927 | |
928 | mutex_lock(&esw->state_lock); |
929 | err = esw_qos_set_group_max_rate(esw, group, max_rate: tx_max, extack); |
930 | mutex_unlock(lock: &esw->state_lock); |
931 | return err; |
932 | } |
933 | |
934 | int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, |
935 | struct netlink_ext_ack *extack) |
936 | { |
937 | struct mlx5_esw_rate_group *group; |
938 | struct mlx5_eswitch *esw; |
939 | int err = 0; |
940 | |
941 | esw = mlx5_devlink_eswitch_get(devlink: rate_node->devlink); |
942 | if (IS_ERR(ptr: esw)) |
943 | return PTR_ERR(ptr: esw); |
944 | |
945 | mutex_lock(&esw->state_lock); |
946 | if (esw->mode != MLX5_ESWITCH_OFFLOADS) { |
947 | NL_SET_ERR_MSG_MOD(extack, |
948 | "Rate node creation supported only in switchdev mode"); |
949 | err = -EOPNOTSUPP; |
950 | goto unlock; |
951 | } |
952 | |
953 | group = esw_qos_create_rate_group(esw, extack); |
954 | if (IS_ERR(ptr: group)) { |
955 | err = PTR_ERR(ptr: group); |
956 | goto unlock; |
957 | } |
958 | |
959 | *priv = group; |
960 | unlock: |
961 | mutex_unlock(lock: &esw->state_lock); |
962 | return err; |
963 | } |
964 | |
965 | int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, |
966 | struct netlink_ext_ack *extack) |
967 | { |
968 | struct mlx5_esw_rate_group *group = priv; |
969 | struct mlx5_eswitch *esw; |
970 | int err; |
971 | |
972 | esw = mlx5_devlink_eswitch_get(devlink: rate_node->devlink); |
973 | if (IS_ERR(ptr: esw)) |
974 | return PTR_ERR(ptr: esw); |
975 | |
976 | mutex_lock(&esw->state_lock); |
977 | err = esw_qos_destroy_rate_group(esw, group, extack); |
978 | mutex_unlock(lock: &esw->state_lock); |
979 | return err; |
980 | } |
981 | |
982 | int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, |
983 | struct mlx5_vport *vport, |
984 | struct mlx5_esw_rate_group *group, |
985 | struct netlink_ext_ack *extack) |
986 | { |
987 | int err = 0; |
988 | |
989 | mutex_lock(&esw->state_lock); |
990 | if (!vport->qos.enabled && !group) |
991 | goto unlock; |
992 | |
993 | err = esw_qos_vport_enable(esw, vport, max_rate: 0, bw_share: 0, extack); |
994 | if (!err) |
995 | err = esw_qos_vport_update_group(esw, vport, group, extack); |
996 | unlock: |
997 | mutex_unlock(lock: &esw->state_lock); |
998 | return err; |
999 | } |
1000 | |
1001 | int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, |
1002 | struct devlink_rate *parent, |
1003 | void *priv, void *parent_priv, |
1004 | struct netlink_ext_ack *extack) |
1005 | { |
1006 | struct mlx5_esw_rate_group *group; |
1007 | struct mlx5_vport *vport = priv; |
1008 | |
1009 | if (!parent) |
1010 | return mlx5_esw_qos_vport_update_group(esw: vport->dev->priv.eswitch, |
1011 | vport, NULL, extack); |
1012 | |
1013 | group = parent_priv; |
1014 | return mlx5_esw_qos_vport_update_group(esw: vport->dev->priv.eswitch, vport, group, extack); |
1015 | } |
1016 |
Definitions
- mlx5_esw_rate_group
- esw_qos_tsar_config
- esw_qos_group_config
- esw_qos_vport_config
- esw_qos_calculate_min_rate_divider
- esw_qos_calc_bw_share
- esw_qos_normalize_vports_min_rate
- esw_qos_normalize_groups_min_rate
- esw_qos_set_vport_min_rate
- esw_qos_set_vport_max_rate
- esw_qos_set_group_min_rate
- esw_qos_set_group_max_rate
- esw_qos_vport_create_sched_element
- esw_qos_update_group_scheduling_element
- esw_qos_vport_update_group
- __esw_qos_create_rate_group
- esw_qos_create_rate_group
- __esw_qos_destroy_rate_group
- esw_qos_destroy_rate_group
- esw_qos_element_type_supported
- esw_qos_create
- esw_qos_destroy
- esw_qos_get
- esw_qos_put
- esw_qos_vport_enable
- mlx5_esw_qos_vport_disable
- mlx5_esw_qos_set_vport_rate
- mlx5_esw_qos_lag_link_speed_get_locked
- mlx5_esw_qos_max_link_speed_get
- mlx5_esw_qos_link_speed_verify
- mlx5_esw_qos_modify_vport_rate
- esw_qos_devlink_rate_to_mbps
- mlx5_esw_devlink_rate_leaf_tx_share_set
- mlx5_esw_devlink_rate_leaf_tx_max_set
- mlx5_esw_devlink_rate_node_tx_share_set
- mlx5_esw_devlink_rate_node_tx_max_set
- mlx5_esw_devlink_rate_node_new
- mlx5_esw_devlink_rate_node_del
- mlx5_esw_qos_vport_update_group
Improve your Profiling and Debugging skills
Find out more