1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (c) 2019 Mellanox Technologies. |
3 | |
4 | #include "health.h" |
5 | #include "params.h" |
6 | #include "txrx.h" |
7 | #include "devlink.h" |
8 | #include "ptp.h" |
9 | #include "lib/tout.h" |
10 | |
11 | /* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */ |
12 | static const char * const rq_sw_state_type_name[] = { |
13 | [MLX5E_RQ_STATE_ENABLED] = "enabled" , |
14 | [MLX5E_RQ_STATE_RECOVERING] = "recovering" , |
15 | [MLX5E_RQ_STATE_DIM] = "dim" , |
16 | [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete" , |
17 | [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full" , |
18 | [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx" , |
19 | [MLX5E_RQ_STATE_SHAMPO] = "shampo" , |
20 | [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced" , |
21 | [MLX5E_RQ_STATE_XSK] = "xsk" , |
22 | }; |
23 | |
24 | static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) |
25 | { |
26 | int outlen = MLX5_ST_SZ_BYTES(query_rq_out); |
27 | void *out; |
28 | void *rqc; |
29 | int err; |
30 | |
31 | out = kvzalloc(size: outlen, GFP_KERNEL); |
32 | if (!out) |
33 | return -ENOMEM; |
34 | |
35 | err = mlx5_core_query_rq(dev, rqn, out); |
36 | if (err) |
37 | goto out; |
38 | |
39 | rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); |
40 | *state = MLX5_GET(rqc, rqc, state); |
41 | |
42 | out: |
43 | kvfree(addr: out); |
44 | return err; |
45 | } |
46 | |
47 | static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) |
48 | { |
49 | struct mlx5_core_dev *dev = icosq->channel->mdev; |
50 | unsigned long exp_time; |
51 | |
52 | exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); |
53 | |
54 | while (time_before(jiffies, exp_time)) { |
55 | if (icosq->cc == icosq->pc) |
56 | return 0; |
57 | |
58 | msleep(msecs: 20); |
59 | } |
60 | |
61 | netdev_err(dev: icosq->channel->netdev, |
62 | format: "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n" , |
63 | icosq->sqn, icosq->cc, icosq->pc); |
64 | |
65 | return -ETIMEDOUT; |
66 | } |
67 | |
68 | static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) |
69 | { |
70 | WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n" , |
71 | icosq->sqn, icosq->cc, icosq->pc); |
72 | icosq->cc = 0; |
73 | icosq->pc = 0; |
74 | } |
75 | |
76 | static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) |
77 | { |
78 | struct mlx5e_rq *xskrq = NULL; |
79 | struct mlx5_core_dev *mdev; |
80 | struct mlx5e_icosq *icosq; |
81 | struct net_device *dev; |
82 | struct mlx5e_rq *rq; |
83 | u8 state; |
84 | int err; |
85 | |
86 | icosq = ctx; |
87 | |
88 | mutex_lock(&icosq->channel->icosq_recovery_lock); |
89 | |
90 | /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ |
91 | rq = &icosq->channel->rq; |
92 | if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) |
93 | xskrq = &icosq->channel->xskrq; |
94 | mdev = icosq->channel->mdev; |
95 | dev = icosq->channel->netdev; |
96 | err = mlx5_core_query_sq_state(dev: mdev, sqn: icosq->sqn, state: &state); |
97 | if (err) { |
98 | netdev_err(dev, format: "Failed to query ICOSQ 0x%x state. err = %d\n" , |
99 | icosq->sqn, err); |
100 | goto out; |
101 | } |
102 | |
103 | if (state != MLX5_SQC_STATE_ERR) |
104 | goto out; |
105 | |
106 | mlx5e_deactivate_rq(rq); |
107 | if (xskrq) |
108 | mlx5e_deactivate_rq(rq: xskrq); |
109 | |
110 | err = mlx5e_wait_for_icosq_flush(icosq); |
111 | if (err) |
112 | goto out; |
113 | |
114 | mlx5e_deactivate_icosq(icosq); |
115 | |
116 | /* At this point, both the rq and the icosq are disabled */ |
117 | |
118 | err = mlx5e_health_sq_to_ready(mdev, dev, sqn: icosq->sqn); |
119 | if (err) |
120 | goto out; |
121 | |
122 | mlx5e_reset_icosq_cc_pc(icosq); |
123 | |
124 | mlx5e_free_rx_missing_descs(rq); |
125 | if (xskrq) |
126 | mlx5e_free_rx_missing_descs(rq: xskrq); |
127 | |
128 | clear_bit(nr: MLX5E_SQ_STATE_RECOVERING, addr: &icosq->state); |
129 | mlx5e_activate_icosq(icosq); |
130 | |
131 | mlx5e_activate_rq(rq); |
132 | rq->stats->recover++; |
133 | |
134 | if (xskrq) { |
135 | mlx5e_activate_rq(rq: xskrq); |
136 | xskrq->stats->recover++; |
137 | } |
138 | |
139 | mlx5e_trigger_napi_icosq(c: icosq->channel); |
140 | |
141 | mutex_unlock(lock: &icosq->channel->icosq_recovery_lock); |
142 | |
143 | return 0; |
144 | out: |
145 | clear_bit(nr: MLX5E_SQ_STATE_RECOVERING, addr: &icosq->state); |
146 | mutex_unlock(lock: &icosq->channel->icosq_recovery_lock); |
147 | return err; |
148 | } |
149 | |
150 | static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) |
151 | { |
152 | struct mlx5e_rq *rq = ctx; |
153 | int err; |
154 | |
155 | mlx5e_deactivate_rq(rq); |
156 | err = mlx5e_flush_rq(rq, curr_state: MLX5_RQC_STATE_ERR); |
157 | clear_bit(nr: MLX5E_RQ_STATE_RECOVERING, addr: &rq->state); |
158 | if (err) |
159 | return err; |
160 | |
161 | mlx5e_activate_rq(rq); |
162 | rq->stats->recover++; |
163 | if (rq->channel) |
164 | mlx5e_trigger_napi_icosq(c: rq->channel); |
165 | else |
166 | mlx5e_trigger_napi_sched(napi: rq->cq.napi); |
167 | return 0; |
168 | } |
169 | |
170 | static int mlx5e_rx_reporter_timeout_recover(void *ctx) |
171 | { |
172 | struct mlx5_eq_comp *eq; |
173 | struct mlx5e_rq *rq; |
174 | int err; |
175 | |
176 | rq = ctx; |
177 | eq = rq->cq.mcq.eq; |
178 | |
179 | err = mlx5e_health_channel_eq_recover(dev: rq->netdev, eq, stats: rq->cq.ch_stats); |
180 | if (err && rq->icosq) |
181 | clear_bit(nr: MLX5E_SQ_STATE_ENABLED, addr: &rq->icosq->state); |
182 | |
183 | return err; |
184 | } |
185 | |
186 | static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) |
187 | { |
188 | return err_ctx->recover(err_ctx->ctx); |
189 | } |
190 | |
191 | static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, |
192 | void *context, |
193 | struct netlink_ext_ack *extack) |
194 | { |
195 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
196 | struct mlx5e_err_ctx *err_ctx = context; |
197 | |
198 | return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : |
199 | mlx5e_health_recover_channels(priv); |
200 | } |
201 | |
202 | static void mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, |
203 | struct devlink_fmsg *fmsg) |
204 | { |
205 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "ICOSQ" ); |
206 | devlink_fmsg_u32_pair_put(fmsg, name: "sqn" , value: icosq->sqn); |
207 | devlink_fmsg_u8_pair_put(fmsg, name: "HW state" , value: hw_state); |
208 | devlink_fmsg_u32_pair_put(fmsg, name: "cc" , value: icosq->cc); |
209 | devlink_fmsg_u32_pair_put(fmsg, name: "pc" , value: icosq->pc); |
210 | devlink_fmsg_u32_pair_put(fmsg, name: "WQE size" , value: mlx5_wq_cyc_get_size(wq: &icosq->wq)); |
211 | |
212 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "CQ" ); |
213 | devlink_fmsg_u32_pair_put(fmsg, name: "cqn" , value: icosq->cq.mcq.cqn); |
214 | devlink_fmsg_u32_pair_put(fmsg, name: "cc" , value: icosq->cq.wq.cc); |
215 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: mlx5_cqwq_get_size(wq: &icosq->cq.wq)); |
216 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
217 | |
218 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
219 | } |
220 | |
221 | static void mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq) |
222 | { |
223 | int i; |
224 | |
225 | BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES, |
226 | "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h" ); |
227 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SW State" ); |
228 | |
229 | for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) |
230 | devlink_fmsg_u32_pair_put(fmsg, name: rq_sw_state_type_name[i], |
231 | test_bit(i, &rq->state)); |
232 | |
233 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
234 | } |
235 | |
236 | static int |
237 | mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, |
238 | struct devlink_fmsg *fmsg) |
239 | { |
240 | u16 wqe_counter; |
241 | int wqes_sz; |
242 | u8 hw_state; |
243 | u16 wq_head; |
244 | int err; |
245 | |
246 | err = mlx5e_query_rq_state(dev: rq->mdev, rqn: rq->rqn, state: &hw_state); |
247 | if (err) |
248 | return err; |
249 | |
250 | wqes_sz = mlx5e_rqwq_get_cur_sz(rq); |
251 | wq_head = mlx5e_rqwq_get_head(rq); |
252 | wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); |
253 | |
254 | devlink_fmsg_u32_pair_put(fmsg, name: "rqn" , value: rq->rqn); |
255 | devlink_fmsg_u8_pair_put(fmsg, name: "HW state" , value: hw_state); |
256 | devlink_fmsg_u32_pair_put(fmsg, name: "WQE counter" , value: wqe_counter); |
257 | devlink_fmsg_u32_pair_put(fmsg, name: "posted WQEs" , value: wqes_sz); |
258 | devlink_fmsg_u32_pair_put(fmsg, name: "cc" , value: wq_head); |
259 | mlx5e_health_rq_put_sw_state(fmsg, rq); |
260 | mlx5e_health_cq_diag_fmsg(cq: &rq->cq, fmsg); |
261 | mlx5e_health_eq_diag_fmsg(eq: rq->cq.mcq.eq, fmsg); |
262 | |
263 | if (rq->icosq) { |
264 | struct mlx5e_icosq *icosq = rq->icosq; |
265 | u8 icosq_hw_state; |
266 | int err; |
267 | |
268 | err = mlx5_core_query_sq_state(dev: rq->mdev, sqn: icosq->sqn, state: &icosq_hw_state); |
269 | if (err) |
270 | return err; |
271 | |
272 | mlx5e_reporter_icosq_diagnose(icosq, hw_state: icosq_hw_state, fmsg); |
273 | } |
274 | |
275 | return 0; |
276 | } |
277 | |
278 | static void mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, |
279 | struct devlink_fmsg *fmsg) |
280 | { |
281 | devlink_fmsg_obj_nest_start(fmsg); |
282 | devlink_fmsg_u32_pair_put(fmsg, name: "channel ix" , value: rq->ix); |
283 | mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); |
284 | devlink_fmsg_obj_nest_end(fmsg); |
285 | } |
286 | |
287 | static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, |
288 | struct devlink_fmsg *fmsg) |
289 | { |
290 | struct mlx5e_priv *priv = rq->priv; |
291 | struct mlx5e_params *params; |
292 | u32 rq_stride, rq_sz; |
293 | bool real_time; |
294 | |
295 | params = &priv->channels.params; |
296 | rq_sz = mlx5e_rqwq_get_size(rq); |
297 | real_time = mlx5_is_real_time_rq(mdev: rq->mdev); |
298 | rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(rq->mdev, params, NULL)); |
299 | |
300 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "RQ" ); |
301 | devlink_fmsg_u8_pair_put(fmsg, name: "type" , value: params->rq_wq_type); |
302 | devlink_fmsg_u64_pair_put(fmsg, name: "stride size" , value: rq_stride); |
303 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: rq_sz); |
304 | devlink_fmsg_string_pair_put(fmsg, name: "ts_format" , value: real_time ? "RT" : "FRC" ); |
305 | mlx5e_health_cq_common_diag_fmsg(cq: &rq->cq, fmsg); |
306 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
307 | } |
308 | |
309 | static void |
310 | mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, |
311 | struct devlink_fmsg *fmsg) |
312 | { |
313 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "PTP" ); |
314 | devlink_fmsg_u32_pair_put(fmsg, name: "filter_type" , value: priv->tstamp.rx_filter); |
315 | mlx5e_rx_reporter_diagnose_generic_rq(rq: &ptp_ch->rq, fmsg); |
316 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
317 | } |
318 | |
319 | static void |
320 | mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, |
321 | struct devlink_fmsg *fmsg) |
322 | { |
323 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
324 | struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; |
325 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
326 | |
327 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "Common config" ); |
328 | mlx5e_rx_reporter_diagnose_generic_rq(rq: generic_rq, fmsg); |
329 | if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) |
330 | mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); |
331 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
332 | } |
333 | |
334 | static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, |
335 | struct devlink_fmsg *fmsg) |
336 | { |
337 | devlink_fmsg_obj_nest_start(fmsg); |
338 | devlink_fmsg_string_pair_put(fmsg, name: "channel" , value: "ptp" ); |
339 | mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); |
340 | devlink_fmsg_obj_nest_end(fmsg); |
341 | } |
342 | |
343 | static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, |
344 | struct devlink_fmsg *fmsg, |
345 | struct netlink_ext_ack *extack) |
346 | { |
347 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
348 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
349 | int i; |
350 | |
351 | mutex_lock(&priv->state_lock); |
352 | |
353 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
354 | goto unlock; |
355 | |
356 | mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); |
357 | devlink_fmsg_arr_pair_nest_start(fmsg, name: "RQs" ); |
358 | |
359 | for (i = 0; i < priv->channels.num; i++) { |
360 | struct mlx5e_channel *c = priv->channels.c[i]; |
361 | struct mlx5e_rq *rq; |
362 | |
363 | rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? |
364 | &c->xskrq : &c->rq; |
365 | |
366 | mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); |
367 | } |
368 | if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) |
369 | mlx5e_rx_reporter_build_diagnose_output_ptp_rq(rq: &ptp_ch->rq, fmsg); |
370 | devlink_fmsg_arr_pair_nest_end(fmsg); |
371 | unlock: |
372 | mutex_unlock(lock: &priv->state_lock); |
373 | return 0; |
374 | } |
375 | |
376 | static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, |
377 | void *ctx) |
378 | { |
379 | struct mlx5e_txqsq *icosq = ctx; |
380 | struct mlx5_rsc_key key = {}; |
381 | |
382 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
383 | return 0; |
384 | |
385 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "SX Slice" ); |
386 | key.size = PAGE_SIZE; |
387 | key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; |
388 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
389 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
390 | |
391 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "ICOSQ" ); |
392 | |
393 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "QPC" ); |
394 | key.rsc = MLX5_SGMT_TYPE_FULL_QPC; |
395 | key.index1 = icosq->sqn; |
396 | key.num_of_obj1 = 1; |
397 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
398 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
399 | |
400 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "send_buff" ); |
401 | key.rsc = MLX5_SGMT_TYPE_SND_BUFF; |
402 | key.num_of_obj2 = MLX5_RSC_DUMP_ALL; |
403 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
404 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
405 | |
406 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
407 | |
408 | return 0; |
409 | } |
410 | |
411 | static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, |
412 | void *ctx) |
413 | { |
414 | struct mlx5_rsc_key key = {}; |
415 | struct mlx5e_rq *rq = ctx; |
416 | |
417 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
418 | return 0; |
419 | |
420 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "RX Slice" ); |
421 | key.size = PAGE_SIZE; |
422 | key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; |
423 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
424 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
425 | |
426 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "RQ" ); |
427 | |
428 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "QPC" ); |
429 | key.rsc = MLX5_SGMT_TYPE_FULL_QPC; |
430 | key.index1 = rq->rqn; |
431 | key.num_of_obj1 = 1; |
432 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
433 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
434 | |
435 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "receive_buff" ); |
436 | key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; |
437 | key.num_of_obj2 = MLX5_RSC_DUMP_ALL; |
438 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
439 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
440 | |
441 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
442 | |
443 | return 0; |
444 | } |
445 | |
446 | static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, |
447 | struct devlink_fmsg *fmsg) |
448 | { |
449 | struct mlx5e_ptp *ptp_ch = priv->channels.ptp; |
450 | struct mlx5_rsc_key key = {}; |
451 | |
452 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
453 | return 0; |
454 | |
455 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "RX Slice" ); |
456 | key.size = PAGE_SIZE; |
457 | key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; |
458 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
459 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
460 | devlink_fmsg_arr_pair_nest_start(fmsg, name: "RQs" ); |
461 | |
462 | for (int i = 0; i < priv->channels.num; i++) { |
463 | struct mlx5e_rq *rq = &priv->channels.c[i]->rq; |
464 | |
465 | mlx5e_health_queue_dump(priv, fmsg, queue_idx: rq->rqn, lbl: "RQ" ); |
466 | } |
467 | |
468 | if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) |
469 | mlx5e_health_queue_dump(priv, fmsg, queue_idx: ptp_ch->rq.rqn, lbl: "PTP RQ" ); |
470 | |
471 | devlink_fmsg_arr_pair_nest_end(fmsg); |
472 | return 0; |
473 | } |
474 | |
475 | static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, |
476 | struct mlx5e_err_ctx *err_ctx, |
477 | struct devlink_fmsg *fmsg) |
478 | { |
479 | return err_ctx->dump(priv, fmsg, err_ctx->ctx); |
480 | } |
481 | |
482 | static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, |
483 | struct devlink_fmsg *fmsg, void *context, |
484 | struct netlink_ext_ack *extack) |
485 | { |
486 | struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); |
487 | struct mlx5e_err_ctx *err_ctx = context; |
488 | |
489 | return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : |
490 | mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); |
491 | } |
492 | |
493 | void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) |
494 | { |
495 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
496 | struct mlx5e_icosq *icosq = rq->icosq; |
497 | struct mlx5e_priv *priv = rq->priv; |
498 | struct mlx5e_err_ctx err_ctx = {}; |
499 | char icosq_str[32] = {}; |
500 | |
501 | err_ctx.ctx = rq; |
502 | err_ctx.recover = mlx5e_rx_reporter_timeout_recover; |
503 | err_ctx.dump = mlx5e_rx_reporter_dump_rq; |
504 | |
505 | if (icosq) |
506 | snprintf(buf: icosq_str, size: sizeof(icosq_str), fmt: "ICOSQ: 0x%x, " , icosq->sqn); |
507 | snprintf(buf: err_str, size: sizeof(err_str), |
508 | fmt: "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x" , |
509 | rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); |
510 | |
511 | mlx5e_health_report(priv, reporter: priv->rx_reporter, err_str, err_ctx: &err_ctx); |
512 | } |
513 | |
514 | void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) |
515 | { |
516 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
517 | struct mlx5e_priv *priv = rq->priv; |
518 | struct mlx5e_err_ctx err_ctx = {}; |
519 | |
520 | err_ctx.ctx = rq; |
521 | err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; |
522 | err_ctx.dump = mlx5e_rx_reporter_dump_rq; |
523 | snprintf(buf: err_str, size: sizeof(err_str), fmt: "ERR CQE on RQ: 0x%x" , rq->rqn); |
524 | |
525 | mlx5e_health_report(priv, reporter: priv->rx_reporter, err_str, err_ctx: &err_ctx); |
526 | } |
527 | |
528 | void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) |
529 | { |
530 | struct mlx5e_priv *priv = icosq->channel->priv; |
531 | char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; |
532 | struct mlx5e_err_ctx err_ctx = {}; |
533 | |
534 | err_ctx.ctx = icosq; |
535 | err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; |
536 | err_ctx.dump = mlx5e_rx_reporter_dump_icosq; |
537 | snprintf(buf: err_str, size: sizeof(err_str), fmt: "ERR CQE on ICOSQ: 0x%x" , icosq->sqn); |
538 | |
539 | mlx5e_health_report(priv, reporter: priv->rx_reporter, err_str, err_ctx: &err_ctx); |
540 | } |
541 | |
542 | void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) |
543 | { |
544 | mutex_lock(&c->icosq_recovery_lock); |
545 | } |
546 | |
547 | void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) |
548 | { |
549 | mutex_unlock(lock: &c->icosq_recovery_lock); |
550 | } |
551 | |
552 | static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { |
553 | .name = "rx" , |
554 | .recover = mlx5e_rx_reporter_recover, |
555 | .diagnose = mlx5e_rx_reporter_diagnose, |
556 | .dump = mlx5e_rx_reporter_dump, |
557 | }; |
558 | |
559 | #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 |
560 | |
561 | void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) |
562 | { |
563 | struct devlink_health_reporter *reporter; |
564 | |
565 | reporter = devlink_port_health_reporter_create(port: priv->netdev->devlink_port, |
566 | ops: &mlx5_rx_reporter_ops, |
567 | MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); |
568 | if (IS_ERR(ptr: reporter)) { |
569 | netdev_warn(dev: priv->netdev, format: "Failed to create rx reporter, err = %ld\n" , |
570 | PTR_ERR(ptr: reporter)); |
571 | return; |
572 | } |
573 | priv->rx_reporter = reporter; |
574 | } |
575 | |
576 | void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) |
577 | { |
578 | if (!priv->rx_reporter) |
579 | return; |
580 | |
581 | devlink_health_reporter_destroy(reporter: priv->rx_reporter); |
582 | priv->rx_reporter = NULL; |
583 | } |
584 | |