1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* Copyright (c) 2019 Mellanox Technologies. */ |
3 | |
4 | #include "health.h" |
5 | #include "en/ptp.h" |
6 | #include "en/devlink.h" |
7 | #include "lib/tout.h" |
8 | |
9 | /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ |
10 | static const char * const sq_sw_state_type_name[] = { |
11 | [MLX5E_SQ_STATE_ENABLED] = "enabled" , |
12 | [MLX5E_SQ_STATE_MPWQE] = "mpwqe" , |
13 | [MLX5E_SQ_STATE_RECOVERING] = "recovering" , |
14 | [MLX5E_SQ_STATE_IPSEC] = "ipsec" , |
15 | [MLX5E_SQ_STATE_DIM] = "dim" , |
16 | [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline" , |
17 | [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx" , |
18 | [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync" , |
19 | [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf" , |
20 | }; |
21 | |
22 | static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) |
23 | { |
24 | struct mlx5_core_dev *dev = sq->mdev; |
25 | unsigned long exp_time; |
26 | |
27 | exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); |
28 | |
29 | while (time_before(jiffies, exp_time)) { |
30 | if (sq->cc == sq->pc) |
31 | return 0; |
32 | |
33 | msleep(msecs: 20); |
34 | } |
35 | |
36 | netdev_err(dev: sq->netdev, |
37 | format: "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n" , |
38 | sq->sqn, sq->cc, sq->pc); |
39 | |
40 | return -ETIMEDOUT; |
41 | } |
42 | |
43 | static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) |
44 | { |
45 | WARN_ONCE(sq->cc != sq->pc, |
46 | "SQ 0x%x: cc (0x%x) != pc (0x%x)\n" , |
47 | sq->sqn, sq->cc, sq->pc); |
48 | sq->cc = 0; |
49 | sq->dma_fifo_cc = 0; |
50 | sq->pc = 0; |
51 | } |
52 | |
53 | static void mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) |
54 | { |
55 | int i; |
56 | |
57 | BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, |
58 | "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h" ); |
59 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SW State" ); |
60 | |
61 | for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) |
62 | devlink_fmsg_u32_pair_put(fmsg, name: sq_sw_state_type_name[i], |
63 | test_bit(i, &sq->state)); |
64 | |
65 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
66 | } |
67 | |
68 | static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) |
69 | { |
70 | struct mlx5_core_dev *mdev; |
71 | struct net_device *dev; |
72 | struct mlx5e_txqsq *sq; |
73 | u8 state; |
74 | int err; |
75 | |
76 | sq = ctx; |
77 | mdev = sq->mdev; |
78 | dev = sq->netdev; |
79 | |
80 | if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) |
81 | return 0; |
82 | |
83 | err = mlx5_core_query_sq_state(dev: mdev, sqn: sq->sqn, state: &state); |
84 | if (err) { |
85 | netdev_err(dev, format: "Failed to query SQ 0x%x state. err = %d\n" , |
86 | sq->sqn, err); |
87 | goto out; |
88 | } |
89 | |
90 | if (state != MLX5_SQC_STATE_ERR) |
91 | goto out; |
92 | |
93 | mlx5e_tx_disable_queue(txq: sq->txq); |
94 | |
95 | err = mlx5e_wait_for_sq_flush(sq); |
96 | if (err) |
97 | goto out; |
98 | |
99 | /* At this point, no new packets will arrive from the stack as TXQ is |
100 | * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all |
101 | * pending WQEs. SQ can safely reset the SQ. |
102 | */ |
103 | |
104 | err = mlx5e_health_sq_to_ready(mdev, dev, sqn: sq->sqn); |
105 | if (err) |
106 | goto out; |
107 | |
108 | mlx5e_reset_txqsq_cc_pc(sq); |
109 | sq->stats->recover++; |
110 | clear_bit(nr: MLX5E_SQ_STATE_RECOVERING, addr: &sq->state); |
111 | rtnl_lock(); |
112 | mlx5e_activate_txqsq(sq); |
113 | rtnl_unlock(); |
114 | |
115 | if (sq->channel) |
116 | mlx5e_trigger_napi_icosq(c: sq->channel); |
117 | else |
118 | mlx5e_trigger_napi_sched(napi: sq->cq.napi); |
119 | |
120 | return 0; |
121 | out: |
122 | clear_bit(nr: MLX5E_SQ_STATE_RECOVERING, addr: &sq->state); |
123 | return err; |
124 | } |
125 | |
126 | struct mlx5e_tx_timeout_ctx { |
127 | struct mlx5e_txqsq *sq; |
128 | signed int status; |
129 | }; |
130 | |
131 | static int mlx5e_tx_reporter_timeout_recover(void *ctx) |
132 | { |
133 | struct mlx5e_tx_timeout_ctx *to_ctx; |
134 | struct mlx5e_priv *priv; |
135 | struct mlx5_eq_comp *eq; |
136 | struct mlx5e_txqsq *sq; |
137 | int err; |
138 | |
139 | to_ctx = ctx; |
140 | sq = to_ctx->sq; |
141 | eq = sq->cq.mcq.eq; |
142 | priv = sq->priv; |
143 | err = mlx5e_health_channel_eq_recover(dev: sq->netdev, eq, stats: sq->cq.ch_stats); |
144 | if (!err) { |
145 | to_ctx->status = 0; /* this sq recovered */ |
146 | return err; |
147 | } |
148 | |
149 | err = mlx5e_safe_reopen_channels(priv); |
150 | if (!err) { |
151 | to_ctx->status = 1; /* all channels recovered */ |
152 | return err; |
153 | } |
154 | |
155 | to_ctx->status = err; |
156 | clear_bit(nr: MLX5E_SQ_STATE_ENABLED, addr: &sq->state); |
157 | netdev_err(dev: priv->netdev, |
158 | format: "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n" , |
159 | err); |
160 | |
161 | return err; |
162 | } |
163 | |
164 | static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) |
165 | { |
166 | struct mlx5e_ptpsq *ptpsq = ctx; |
167 | struct mlx5e_channels *chs; |
168 | struct net_device *netdev; |
169 | struct mlx5e_priv *priv; |
170 | int carrier_ok; |
171 | int err; |
172 | |
173 | if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &ptpsq->txqsq.state)) |
174 | return 0; |
175 | |
176 | priv = ptpsq->txqsq.priv; |
177 | |
178 | mutex_lock(&priv->state_lock); |
179 | chs = &priv->channels; |
180 | netdev = priv->netdev; |
181 | |
182 | carrier_ok = netif_carrier_ok(dev: netdev); |
183 | netif_carrier_off(dev: netdev); |
184 | |
185 | rtnl_lock(); |
186 | mlx5e_deactivate_priv_channels(priv); |
187 | rtnl_unlock(); |
188 | |
189 | mlx5e_ptp_close(c: chs->ptp); |
190 | err = mlx5e_ptp_open(priv, params: &chs->params, lag_port: chs->c[0]->lag_port, cp: &chs->ptp); |
191 | |
192 | rtnl_lock(); |
193 | mlx5e_activate_priv_channels(priv); |
194 | rtnl_unlock(); |
195 | |
196 | /* return carrier back if needed */ |
197 | if (carrier_ok) |
198 | netif_carrier_on(dev: netdev); |
199 | |
200 | mutex_unlock(lock: &priv->state_lock); |
201 | |
202 | return err; |
203 | } |
204 | |
205 | /* state lock cannot be grabbed within this function. |
206 | * It can cause a dead lock or a read-after-free. |
207 | */ |
208 | static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) |
209 | { |
210 | return err_ctx->recover(err_ctx->ctx); |
211 | } |
212 | |
213 | static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, |
214 | void *context, |
215 | struct netlink_ext_ack *extack) |
216 | { |
217 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
218 | struct mlx5e_err_ctx *err_ctx = context; |
219 | |
220 | return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : |
221 | mlx5e_health_recover_channels(priv); |
222 | } |
223 | |
224 | static void |
225 | mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, |
226 | struct mlx5e_txqsq *sq, int tc) |
227 | { |
228 | bool stopped = netif_xmit_stopped(dev_queue: sq->txq); |
229 | u8 state; |
230 | int err; |
231 | |
232 | devlink_fmsg_u32_pair_put(fmsg, name: "tc" , value: tc); |
233 | devlink_fmsg_u32_pair_put(fmsg, name: "txq ix" , value: sq->txq_ix); |
234 | devlink_fmsg_u32_pair_put(fmsg, name: "sqn" , value: sq->sqn); |
235 | |
236 | err = mlx5_core_query_sq_state(dev: sq->mdev, sqn: sq->sqn, state: &state); |
237 | if (!err) |
238 | devlink_fmsg_u8_pair_put(fmsg, name: "HW state" , value: state); |
239 | |
240 | devlink_fmsg_bool_pair_put(fmsg, name: "stopped" , value: stopped); |
241 | devlink_fmsg_u32_pair_put(fmsg, name: "cc" , value: sq->cc); |
242 | devlink_fmsg_u32_pair_put(fmsg, name: "pc" , value: sq->pc); |
243 | mlx5e_health_sq_put_sw_state(fmsg, sq); |
244 | mlx5e_health_cq_diag_fmsg(cq: &sq->cq, fmsg); |
245 | mlx5e_health_eq_diag_fmsg(eq: sq->cq.mcq.eq, fmsg); |
246 | } |
247 | |
248 | static void |
249 | mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, |
250 | struct mlx5e_txqsq *sq, int tc) |
251 | { |
252 | devlink_fmsg_obj_nest_start(fmsg); |
253 | devlink_fmsg_u32_pair_put(fmsg, name: "channel ix" , value: sq->ch_ix); |
254 | mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); |
255 | devlink_fmsg_obj_nest_end(fmsg); |
256 | } |
257 | |
258 | static void |
259 | mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, |
260 | struct mlx5e_ptpsq *ptpsq, int tc) |
261 | { |
262 | devlink_fmsg_obj_nest_start(fmsg); |
263 | devlink_fmsg_string_pair_put(fmsg, name: "channel" , value: "ptp" ); |
264 | mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq: &ptpsq->txqsq, tc); |
265 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "Port TS" ); |
266 | mlx5e_health_cq_diag_fmsg(cq: &ptpsq->ts_cq, fmsg); |
267 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
268 | devlink_fmsg_obj_nest_end(fmsg); |
269 | } |
270 | |
271 | static void |
272 | mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, |
273 | struct mlx5e_txqsq *txqsq) |
274 | { |
275 | bool real_time = mlx5_is_real_time_sq(mdev: txqsq->mdev); |
276 | u32 sq_sz = mlx5_wq_cyc_get_size(wq: &txqsq->wq); |
277 | u32 sq_stride = MLX5_SEND_WQE_BB; |
278 | |
279 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SQ" ); |
280 | devlink_fmsg_u64_pair_put(fmsg, name: "stride size" , value: sq_stride); |
281 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: sq_sz); |
282 | devlink_fmsg_string_pair_put(fmsg, name: "ts_format" , value: real_time ? "RT" : "FRC" ); |
283 | mlx5e_health_cq_common_diag_fmsg(cq: &txqsq->cq, fmsg); |
284 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
285 | } |
286 | |
287 | static void |
288 | mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, |
289 | struct mlx5e_ptpsq *ptpsq) |
290 | { |
291 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "Port TS" ); |
292 | mlx5e_health_cq_common_diag_fmsg(cq: &ptpsq->ts_cq, fmsg); |
293 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
294 | } |
295 | |
296 | static void |
297 | mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, |
298 | struct devlink_fmsg *fmsg) |
299 | { |
300 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
301 | struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; |
302 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
303 | struct mlx5e_ptpsq *generic_ptpsq; |
304 | |
305 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "Common Config" ); |
306 | mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, txqsq: generic_sq); |
307 | |
308 | if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) |
309 | goto out; |
310 | |
311 | generic_ptpsq = &ptp_ch->ptpsq[0]; |
312 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "PTP" ); |
313 | mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, txqsq: &generic_ptpsq->txqsq); |
314 | mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, ptpsq: generic_ptpsq); |
315 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
316 | out: |
317 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
318 | } |
319 | |
320 | static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, |
321 | struct devlink_fmsg *fmsg, |
322 | struct netlink_ext_ack *extack) |
323 | { |
324 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
325 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
326 | |
327 | int i, tc; |
328 | |
329 | mutex_lock(&priv->state_lock); |
330 | |
331 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
332 | goto unlock; |
333 | |
334 | mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); |
335 | devlink_fmsg_arr_pair_nest_start(fmsg, name: "SQs" ); |
336 | |
337 | for (i = 0; i < priv->channels.num; i++) { |
338 | struct mlx5e_channel *c = priv->channels.c[i]; |
339 | |
340 | for (tc = 0; tc < mlx5e_get_dcb_num_tc(params: &priv->channels.params); tc++) { |
341 | struct mlx5e_txqsq *sq = &c->sq[tc]; |
342 | |
343 | mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); |
344 | } |
345 | } |
346 | |
347 | if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) |
348 | goto close_sqs_nest; |
349 | |
350 | for (tc = 0; tc < mlx5e_get_dcb_num_tc(params: &priv->channels.params); tc++) |
351 | mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, |
352 | ptpsq: &ptp_ch->ptpsq[tc], |
353 | tc); |
354 | |
355 | close_sqs_nest: |
356 | devlink_fmsg_arr_pair_nest_end(fmsg); |
357 | unlock: |
358 | mutex_unlock(lock: &priv->state_lock); |
359 | return 0; |
360 | } |
361 | |
362 | static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, |
363 | void *ctx) |
364 | { |
365 | struct mlx5_rsc_key key = {}; |
366 | struct mlx5e_txqsq *sq = ctx; |
367 | |
368 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
369 | return 0; |
370 | |
371 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SX Slice" ); |
372 | key.size = PAGE_SIZE; |
373 | key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; |
374 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
375 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
376 | |
377 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SQ" ); |
378 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "QPC" ); |
379 | key.rsc = MLX5_SGMT_TYPE_FULL_QPC; |
380 | key.index1 = sq->sqn; |
381 | key.num_of_obj1 = 1; |
382 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
383 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
384 | |
385 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "send_buff" ); |
386 | key.rsc = MLX5_SGMT_TYPE_SND_BUFF; |
387 | key.num_of_obj2 = MLX5_RSC_DUMP_ALL; |
388 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
389 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
390 | |
391 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
392 | |
393 | return 0; |
394 | } |
395 | |
396 | static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, |
397 | void *ctx) |
398 | { |
399 | struct mlx5e_tx_timeout_ctx *to_ctx = ctx; |
400 | |
401 | return mlx5e_tx_reporter_dump_sq(priv, fmsg, ctx: to_ctx->sq); |
402 | } |
403 | |
404 | static int mlx5e_tx_reporter_ptpsq_unhealthy_dump(struct mlx5e_priv *priv, |
405 | struct devlink_fmsg *fmsg, |
406 | void *ctx) |
407 | { |
408 | struct mlx5e_ptpsq *ptpsq = ctx; |
409 | |
410 | return mlx5e_tx_reporter_dump_sq(priv, fmsg, ctx: &ptpsq->txqsq); |
411 | } |
412 | |
413 | static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, |
414 | struct devlink_fmsg *fmsg) |
415 | { |
416 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
417 | struct mlx5_rsc_key key = {}; |
418 | int i, tc; |
419 | |
420 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
421 | return 0; |
422 | |
423 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SX Slice" ); |
424 | key.size = PAGE_SIZE; |
425 | key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; |
426 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
427 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
428 | devlink_fmsg_arr_pair_nest_start(fmsg, name: "SQs" ); |
429 | |
430 | for (i = 0; i < priv->channels.num; i++) { |
431 | struct mlx5e_channel *c = priv->channels.c[i]; |
432 | |
433 | for (tc = 0; tc < mlx5e_get_dcb_num_tc(params: &priv->channels.params); tc++) { |
434 | struct mlx5e_txqsq *sq = &c->sq[tc]; |
435 | |
436 | mlx5e_health_queue_dump(priv, fmsg, queue_idx: sq->sqn, lbl: "SQ" ); |
437 | } |
438 | } |
439 | |
440 | if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { |
441 | for (tc = 0; tc < mlx5e_get_dcb_num_tc(params: &priv->channels.params); tc++) { |
442 | struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; |
443 | |
444 | mlx5e_health_queue_dump(priv, fmsg, queue_idx: sq->sqn, lbl: "PTP SQ" ); |
445 | } |
446 | } |
447 | |
448 | devlink_fmsg_arr_pair_nest_end(fmsg); |
449 | return 0; |
450 | } |
451 | |
452 | static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, |
453 | struct mlx5e_err_ctx *err_ctx, |
454 | struct devlink_fmsg *fmsg) |
455 | { |
456 | return err_ctx->dump(priv, fmsg, err_ctx->ctx); |
457 | } |
458 | |
459 | static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, |
460 | struct devlink_fmsg *fmsg, void *context, |
461 | struct netlink_ext_ack *extack) |
462 | { |
463 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
464 | struct mlx5e_err_ctx *err_ctx = context; |
465 | |
466 | return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : |
467 | mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); |
468 | } |
469 | |
470 | void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) |
471 | { |
472 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
473 | struct mlx5e_priv *priv = sq->priv; |
474 | struct mlx5e_err_ctx err_ctx = {}; |
475 | |
476 | err_ctx.ctx = sq; |
477 | err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; |
478 | err_ctx.dump = mlx5e_tx_reporter_dump_sq; |
479 | snprintf(buf: err_str, size: sizeof(err_str), fmt: "ERR CQE on SQ: 0x%x" , sq->sqn); |
480 | |
481 | mlx5e_health_report(priv, reporter: priv->tx_reporter, err_str, err_ctx: &err_ctx); |
482 | } |
483 | |
484 | int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) |
485 | { |
486 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
487 | struct mlx5e_tx_timeout_ctx to_ctx = {}; |
488 | struct mlx5e_priv *priv = sq->priv; |
489 | struct mlx5e_err_ctx err_ctx = {}; |
490 | |
491 | to_ctx.sq = sq; |
492 | err_ctx.ctx = &to_ctx; |
493 | err_ctx.recover = mlx5e_tx_reporter_timeout_recover; |
494 | err_ctx.dump = mlx5e_tx_reporter_timeout_dump; |
495 | snprintf(buf: err_str, size: sizeof(err_str), |
496 | fmt: "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u" , |
497 | sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, |
498 | jiffies_to_usecs(j: jiffies - READ_ONCE(sq->txq->trans_start))); |
499 | |
500 | mlx5e_health_report(priv, reporter: priv->tx_reporter, err_str, err_ctx: &err_ctx); |
501 | return to_ctx.status; |
502 | } |
503 | |
504 | void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) |
505 | { |
506 | struct mlx5e_ptp_metadata_map *map = &ptpsq->metadata_map; |
507 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
508 | struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; |
509 | struct mlx5e_cq *ts_cq = &ptpsq->ts_cq; |
510 | struct mlx5e_priv *priv = txqsq->priv; |
511 | struct mlx5e_err_ctx err_ctx = {}; |
512 | |
513 | err_ctx.ctx = ptpsq; |
514 | err_ctx.recover = mlx5e_tx_reporter_ptpsq_unhealthy_recover; |
515 | err_ctx.dump = mlx5e_tx_reporter_ptpsq_unhealthy_dump; |
516 | snprintf(buf: err_str, size: sizeof(err_str), |
517 | fmt: "Unhealthy TX port TS queue: %d, SQ: 0x%x, CQ: 0x%x, Undelivered CQEs: %u Map Capacity: %u" , |
518 | txqsq->ch_ix, txqsq->sqn, ts_cq->mcq.cqn, map->undelivered_counter, map->capacity); |
519 | |
520 | mlx5e_health_report(priv, reporter: priv->tx_reporter, err_str, err_ctx: &err_ctx); |
521 | } |
522 | |
523 | static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { |
524 | .name = "tx" , |
525 | .recover = mlx5e_tx_reporter_recover, |
526 | .diagnose = mlx5e_tx_reporter_diagnose, |
527 | .dump = mlx5e_tx_reporter_dump, |
528 | }; |
529 | |
530 | #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 |
531 | |
532 | void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) |
533 | { |
534 | struct devlink_health_reporter *reporter; |
535 | |
536 | reporter = devlink_port_health_reporter_create(port: priv->netdev->devlink_port, |
537 | ops: &mlx5_tx_reporter_ops, |
538 | MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); |
539 | if (IS_ERR(ptr: reporter)) { |
540 | netdev_warn(dev: priv->netdev, |
541 | format: "Failed to create tx reporter, err = %ld\n" , |
542 | PTR_ERR(ptr: reporter)); |
543 | return; |
544 | } |
545 | priv->tx_reporter = reporter; |
546 | } |
547 | |
548 | void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) |
549 | { |
550 | if (!priv->tx_reporter) |
551 | return; |
552 | |
553 | devlink_health_reporter_destroy(reporter: priv->tx_reporter); |
554 | priv->tx_reporter = NULL; |
555 | } |
556 | |