1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (c) 2019 Mellanox Technologies. |
3 | |
4 | #include "health.h" |
5 | #include "lib/eq.h" |
6 | #include "lib/mlx5.h" |
7 | |
8 | void mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) |
9 | { |
10 | devlink_fmsg_pair_nest_start(fmsg, name); |
11 | devlink_fmsg_obj_nest_start(fmsg); |
12 | } |
13 | |
14 | void mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) |
15 | { |
16 | devlink_fmsg_obj_nest_end(fmsg); |
17 | devlink_fmsg_pair_nest_end(fmsg); |
18 | } |
19 | |
20 | void mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) |
21 | { |
22 | u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; |
23 | u8 hw_status; |
24 | void *cqc; |
25 | |
26 | mlx5_core_query_cq(dev: cq->mdev, cq: &cq->mcq, out); |
27 | cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); |
28 | hw_status = MLX5_GET(cqc, cqc, status); |
29 | |
30 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "CQ" ); |
31 | devlink_fmsg_u32_pair_put(fmsg, name: "cqn" , value: cq->mcq.cqn); |
32 | devlink_fmsg_u8_pair_put(fmsg, name: "HW status" , value: hw_status); |
33 | devlink_fmsg_u32_pair_put(fmsg, name: "ci" , value: mlx5_cqwq_get_ci(wq: &cq->wq)); |
34 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: mlx5_cqwq_get_size(wq: &cq->wq)); |
35 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
36 | } |
37 | |
38 | void mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) |
39 | { |
40 | u8 cq_log_stride; |
41 | u32 cq_sz; |
42 | |
43 | cq_sz = mlx5_cqwq_get_size(wq: &cq->wq); |
44 | cq_log_stride = mlx5_cqwq_get_log_stride_size(wq: &cq->wq); |
45 | |
46 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "CQ" ); |
47 | devlink_fmsg_u64_pair_put(fmsg, name: "stride size" , BIT(cq_log_stride)); |
48 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: cq_sz); |
49 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
50 | } |
51 | |
52 | void mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) |
53 | { |
54 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: "EQ" ); |
55 | devlink_fmsg_u8_pair_put(fmsg, name: "eqn" , value: eq->core.eqn); |
56 | devlink_fmsg_u32_pair_put(fmsg, name: "irqn" , value: eq->core.irqn); |
57 | devlink_fmsg_u32_pair_put(fmsg, name: "vecidx" , value: eq->core.vecidx); |
58 | devlink_fmsg_u32_pair_put(fmsg, name: "ci" , value: eq->core.cons_index); |
59 | devlink_fmsg_u32_pair_put(fmsg, name: "size" , value: eq_get_size(eq: &eq->core)); |
60 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
61 | } |
62 | |
63 | void mlx5e_health_create_reporters(struct mlx5e_priv *priv) |
64 | { |
65 | mlx5e_reporter_tx_create(priv); |
66 | mlx5e_reporter_rx_create(priv); |
67 | } |
68 | |
69 | void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) |
70 | { |
71 | mlx5e_reporter_rx_destroy(priv); |
72 | mlx5e_reporter_tx_destroy(priv); |
73 | } |
74 | |
75 | void mlx5e_health_channels_update(struct mlx5e_priv *priv) |
76 | { |
77 | if (priv->tx_reporter) |
78 | devlink_health_reporter_state_update(reporter: priv->tx_reporter, |
79 | state: DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); |
80 | if (priv->rx_reporter) |
81 | devlink_health_reporter_state_update(reporter: priv->rx_reporter, |
82 | state: DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); |
83 | } |
84 | |
85 | int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn) |
86 | { |
87 | struct mlx5e_modify_sq_param msp = {}; |
88 | int err; |
89 | |
90 | msp.curr_state = MLX5_SQC_STATE_ERR; |
91 | msp.next_state = MLX5_SQC_STATE_RST; |
92 | |
93 | err = mlx5e_modify_sq(mdev, sqn, p: &msp); |
94 | if (err) { |
95 | netdev_err(dev, format: "Failed to move sq 0x%x to reset\n" , sqn); |
96 | return err; |
97 | } |
98 | |
99 | memset(&msp, 0, sizeof(msp)); |
100 | msp.curr_state = MLX5_SQC_STATE_RST; |
101 | msp.next_state = MLX5_SQC_STATE_RDY; |
102 | |
103 | err = mlx5e_modify_sq(mdev, sqn, p: &msp); |
104 | if (err) { |
105 | netdev_err(dev, format: "Failed to move sq 0x%x to ready\n" , sqn); |
106 | return err; |
107 | } |
108 | |
109 | return 0; |
110 | } |
111 | |
112 | int mlx5e_health_recover_channels(struct mlx5e_priv *priv) |
113 | { |
114 | int err = 0; |
115 | |
116 | rtnl_lock(); |
117 | mutex_lock(&priv->state_lock); |
118 | |
119 | if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) |
120 | goto out; |
121 | |
122 | err = mlx5e_safe_reopen_channels(priv); |
123 | |
124 | out: |
125 | mutex_unlock(lock: &priv->state_lock); |
126 | rtnl_unlock(); |
127 | |
128 | return err; |
129 | } |
130 | |
131 | int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, |
132 | struct mlx5e_ch_stats *stats) |
133 | { |
134 | u32 eqe_count; |
135 | |
136 | netdev_err(dev, format: "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n" , |
137 | eq->core.eqn, eq->core.cons_index, eq->core.irqn); |
138 | |
139 | eqe_count = mlx5_eq_poll_irq_disabled(eq); |
140 | if (!eqe_count) |
141 | return -EIO; |
142 | |
143 | netdev_err(dev, format: "Recovered %d eqes on EQ 0x%x\n" , |
144 | eqe_count, eq->core.eqn); |
145 | |
146 | stats->eq_rearm++; |
147 | return 0; |
148 | } |
149 | |
150 | int mlx5e_health_report(struct mlx5e_priv *priv, |
151 | struct devlink_health_reporter *reporter, char *err_str, |
152 | struct mlx5e_err_ctx *err_ctx) |
153 | { |
154 | netdev_err(dev: priv->netdev, format: "%s\n" , err_str); |
155 | |
156 | if (!reporter) |
157 | return err_ctx->recover(err_ctx->ctx); |
158 | |
159 | return devlink_health_report(reporter, msg: err_str, priv_ctx: err_ctx); |
160 | } |
161 | |
162 | #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 |
163 | static void mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, |
164 | const void *value, u32 value_len) |
165 | |
166 | { |
167 | u32 data_size; |
168 | u32 offset; |
169 | |
170 | for (offset = 0; offset < value_len; offset += data_size) { |
171 | data_size = value_len - offset; |
172 | if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) |
173 | data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; |
174 | devlink_fmsg_binary_put(fmsg, value: value + offset, value_len: data_size); |
175 | } |
176 | } |
177 | |
178 | int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, |
179 | struct devlink_fmsg *fmsg) |
180 | { |
181 | struct mlx5_core_dev *mdev = priv->mdev; |
182 | struct mlx5_rsc_dump_cmd *cmd; |
183 | int cmd_err, err = 0; |
184 | struct page *page; |
185 | int size; |
186 | |
187 | if (IS_ERR_OR_NULL(ptr: mdev->rsc_dump)) |
188 | return -EOPNOTSUPP; |
189 | |
190 | page = alloc_page(GFP_KERNEL); |
191 | if (!page) |
192 | return -ENOMEM; |
193 | |
194 | devlink_fmsg_binary_pair_nest_start(fmsg, name: "data" ); |
195 | |
196 | cmd = mlx5_rsc_dump_cmd_create(dev: mdev, key); |
197 | if (IS_ERR(ptr: cmd)) { |
198 | err = PTR_ERR(ptr: cmd); |
199 | goto free_page; |
200 | } |
201 | |
202 | do { |
203 | cmd_err = mlx5_rsc_dump_next(dev: mdev, cmd, page, size: &size); |
204 | if (cmd_err < 0) { |
205 | err = cmd_err; |
206 | goto destroy_cmd; |
207 | } |
208 | |
209 | mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), value_len: size); |
210 | } while (cmd_err > 0); |
211 | |
212 | destroy_cmd: |
213 | mlx5_rsc_dump_cmd_destroy(cmd); |
214 | devlink_fmsg_binary_pair_nest_end(fmsg); |
215 | free_page: |
216 | __free_page(page); |
217 | return err; |
218 | } |
219 | |
220 | void mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, |
221 | int queue_idx, char *lbl) |
222 | { |
223 | struct mlx5_rsc_key key = {}; |
224 | |
225 | key.rsc = MLX5_SGMT_TYPE_FULL_QPC; |
226 | key.index1 = queue_idx; |
227 | key.size = PAGE_SIZE; |
228 | key.num_of_obj1 = 1; |
229 | |
230 | devlink_fmsg_obj_nest_start(fmsg); |
231 | mlx5e_health_fmsg_named_obj_nest_start(fmsg, name: lbl); |
232 | devlink_fmsg_u32_pair_put(fmsg, name: "index" , value: queue_idx); |
233 | mlx5e_health_rsc_fmsg_dump(priv, key: &key, fmsg); |
234 | mlx5e_health_fmsg_named_obj_nest_end(fmsg); |
235 | devlink_fmsg_obj_nest_end(fmsg); |
236 | } |
237 | |