1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2// Copyright (c) 2018 Mellanox Technologies
3
4#include <linux/mlx5/driver.h>
5
6#include "mlx5_core.h"
7#include "lib/eq.h"
8#include "lib/events.h"
9#include "hwmon.h"
10
11struct mlx5_event_nb {
12 struct mlx5_nb nb;
13 void *ctx;
14};
15
16/* General events handlers for the low level mlx5_core driver
17 *
18 * Other Major feature specific events such as
19 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
20 * separate notifiers callbacks, specifically by those mlx5 components.
21 */
22static int any_notifier(struct notifier_block *, unsigned long, void *);
23static int temp_warn(struct notifier_block *, unsigned long, void *);
24static int port_module(struct notifier_block *, unsigned long, void *);
25static int pcie_core(struct notifier_block *, unsigned long, void *);
26
27/* handler which forwards the event to events->fw_nh, driver notifiers */
28static int forward_event(struct notifier_block *, unsigned long, void *);
29
30static struct mlx5_nb events_nbs_ref[] = {
31 /* Events to be processed by mlx5_core */
32 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
33 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
34 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
35 {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
36
37 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
39 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
40 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
41 /* QP/WQ resource events to forward */
42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
47 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
49 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
50 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
51 /* SRQ events */
52 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
53 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
54};
55
56struct mlx5_events {
57 struct mlx5_core_dev *dev;
58 struct workqueue_struct *wq;
59 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
60 /* driver notifier chain for fw events */
61 struct atomic_notifier_head fw_nh;
62 /* port module events stats */
63 struct mlx5_pme_stats pme_stats;
64 /*pcie_core*/
65 struct work_struct pcie_core_work;
66 /* driver notifier chain for sw events */
67 struct blocking_notifier_head sw_nh;
68};
69
70static const char *eqe_type_str(u8 type)
71{
72 switch (type) {
73 case MLX5_EVENT_TYPE_COMP:
74 return "MLX5_EVENT_TYPE_COMP";
75 case MLX5_EVENT_TYPE_PATH_MIG:
76 return "MLX5_EVENT_TYPE_PATH_MIG";
77 case MLX5_EVENT_TYPE_COMM_EST:
78 return "MLX5_EVENT_TYPE_COMM_EST";
79 case MLX5_EVENT_TYPE_SQ_DRAINED:
80 return "MLX5_EVENT_TYPE_SQ_DRAINED";
81 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
82 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
83 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
84 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
85 case MLX5_EVENT_TYPE_CQ_ERROR:
86 return "MLX5_EVENT_TYPE_CQ_ERROR";
87 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
88 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
89 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
90 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
91 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
92 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
93 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
94 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
95 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
96 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
97 case MLX5_EVENT_TYPE_INTERNAL_ERROR:
98 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
99 case MLX5_EVENT_TYPE_PORT_CHANGE:
100 return "MLX5_EVENT_TYPE_PORT_CHANGE";
101 case MLX5_EVENT_TYPE_GPIO_EVENT:
102 return "MLX5_EVENT_TYPE_GPIO_EVENT";
103 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
104 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
105 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
106 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
107 case MLX5_EVENT_TYPE_REMOTE_CONFIG:
108 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
109 case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
110 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
111 case MLX5_EVENT_TYPE_STALL_EVENT:
112 return "MLX5_EVENT_TYPE_STALL_EVENT";
113 case MLX5_EVENT_TYPE_CMD:
114 return "MLX5_EVENT_TYPE_CMD";
115 case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
116 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
117 case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
118 return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
119 case MLX5_EVENT_TYPE_PAGE_REQUEST:
120 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
121 case MLX5_EVENT_TYPE_PAGE_FAULT:
122 return "MLX5_EVENT_TYPE_PAGE_FAULT";
123 case MLX5_EVENT_TYPE_PPS_EVENT:
124 return "MLX5_EVENT_TYPE_PPS_EVENT";
125 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
126 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
127 case MLX5_EVENT_TYPE_FPGA_ERROR:
128 return "MLX5_EVENT_TYPE_FPGA_ERROR";
129 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
130 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
131 case MLX5_EVENT_TYPE_GENERAL_EVENT:
132 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
133 case MLX5_EVENT_TYPE_MONITOR_COUNTER:
134 return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
135 case MLX5_EVENT_TYPE_DEVICE_TRACER:
136 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
137 case MLX5_EVENT_TYPE_OBJECT_CHANGE:
138 return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
139 default:
140 return "Unrecognized event";
141 }
142}
143
144/* handles all FW events, type == eqe->type */
145static int any_notifier(struct notifier_block *nb,
146 unsigned long type, void *data)
147{
148 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
149 struct mlx5_events *events = event_nb->ctx;
150 struct mlx5_eqe *eqe = data;
151
152 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
153 eqe_type_str(eqe->type), eqe->sub_type);
154 return NOTIFY_OK;
155}
156
157#if IS_ENABLED(CONFIG_HWMON)
158static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon,
159 u64 bit_set, int bit_set_offset)
160{
161 unsigned long *bit_set_ptr = (unsigned long *)&bit_set;
162 int num_bits = sizeof(bit_set) * BITS_PER_BYTE;
163 int i;
164
165 for_each_set_bit(i, bit_set_ptr, num_bits) {
166 const char *sensor_name = hwmon_get_sensor_name(hwmon, channel: i + bit_set_offset);
167
168 mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name);
169 }
170}
171#endif /* CONFIG_HWMON */
172
173/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
174static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
175{
176 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
177 struct mlx5_events *events = event_nb->ctx;
178 struct mlx5_core_dev *dev = events->dev;
179 struct mlx5_eqe *eqe = data;
180 u64 value_lsb;
181 u64 value_msb;
182
183 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
184 /* bit 1-63 are not supported for NICs,
185 * hence read only bit 0 (asic) from lsb.
186 */
187 value_lsb &= 0x1;
188 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
189
190 if (net_ratelimit()) {
191 mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n",
192 value_msb, value_lsb);
193#if IS_ENABLED(CONFIG_HWMON)
194 if (dev->hwmon) {
195 print_sensor_names_in_bit_set(dev, hwmon: dev->hwmon, bit_set: value_lsb, bit_set_offset: 0);
196 print_sensor_names_in_bit_set(dev, hwmon: dev->hwmon, bit_set: value_msb,
197 bit_set_offset: sizeof(value_lsb) * BITS_PER_BYTE);
198 }
199#endif
200 }
201
202 return NOTIFY_OK;
203}
204
205/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
206static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
207{
208 switch (status) {
209 case MLX5_MODULE_STATUS_PLUGGED:
210 return "Cable plugged";
211 case MLX5_MODULE_STATUS_UNPLUGGED:
212 return "Cable unplugged";
213 case MLX5_MODULE_STATUS_ERROR:
214 return "Cable error";
215 case MLX5_MODULE_STATUS_DISABLED:
216 return "Cable disabled";
217 default:
218 return "Unknown status";
219 }
220}
221
222static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
223{
224 switch (error) {
225 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
226 return "Power budget exceeded";
227 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
228 return "Long Range for non MLNX cable";
229 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
230 return "Bus stuck (I2C or data shorted)";
231 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
232 return "No EEPROM/retry timeout";
233 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
234 return "Enforce part number list";
235 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
236 return "Unknown identifier";
237 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
238 return "High Temperature";
239 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
240 return "Bad or shorted cable/module";
241 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
242 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
243 default:
244 return "Unknown error";
245 }
246}
247
248/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
249static int port_module(struct notifier_block *nb, unsigned long type, void *data)
250{
251 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
252 struct mlx5_events *events = event_nb->ctx;
253 struct mlx5_eqe *eqe = data;
254
255 enum port_module_event_status_type module_status;
256 enum port_module_event_error_type error_type;
257 struct mlx5_eqe_port_module *module_event_eqe;
258 const char *status_str;
259 u8 module_num;
260
261 module_event_eqe = &eqe->data.port_module;
262 module_status = module_event_eqe->module_status &
263 PORT_MODULE_EVENT_MODULE_STATUS_MASK;
264 error_type = module_event_eqe->error_type &
265 PORT_MODULE_EVENT_ERROR_TYPE_MASK;
266
267 if (module_status < MLX5_MODULE_STATUS_NUM)
268 events->pme_stats.status_counters[module_status]++;
269
270 if (module_status == MLX5_MODULE_STATUS_ERROR)
271 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
272 events->pme_stats.error_counters[error_type]++;
273
274 if (!printk_ratelimit())
275 return NOTIFY_OK;
276
277 module_num = module_event_eqe->module;
278 status_str = mlx5_pme_status_to_string(status: module_status);
279 if (module_status == MLX5_MODULE_STATUS_ERROR) {
280 const char *error_str = mlx5_pme_error_to_string(error: error_type);
281
282 mlx5_core_err(events->dev,
283 "Port module event[error]: module %u, %s, %s\n",
284 module_num, status_str, error_str);
285 } else {
286 mlx5_core_info(events->dev,
287 "Port module event: module %u, %s\n",
288 module_num, status_str);
289 }
290
291 return NOTIFY_OK;
292}
293
294enum {
295 MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
296 MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
297 MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
298};
299
300static void mlx5_pcie_event(struct work_struct *work)
301{
302 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
303 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
304 struct mlx5_events *events;
305 struct mlx5_core_dev *dev;
306 u8 power_status;
307 u16 pci_power;
308
309 events = container_of(work, struct mlx5_events, pcie_core_work);
310 dev = events->dev;
311
312 if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
313 return;
314
315 mlx5_core_access_reg(dev, data_in: in, size_in: sizeof(in), data_out: out, size_out: sizeof(out),
316 reg_num: MLX5_REG_MPEIN, arg: 0, write: 0);
317 power_status = MLX5_GET(mpein_reg, out, pwr_status);
318 pci_power = MLX5_GET(mpein_reg, out, pci_power);
319
320 switch (power_status) {
321 case MLX5_PCI_POWER_COULD_NOT_BE_READ:
322 mlx5_core_info_rl(dev,
323 "PCIe slot power capability was not advertised.\n");
324 break;
325 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
326 mlx5_core_warn_rl(dev,
327 "Detected insufficient power on the PCIe slot (%uW).\n",
328 pci_power);
329 break;
330 case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
331 mlx5_core_info_rl(dev,
332 "PCIe slot advertised sufficient power (%uW).\n",
333 pci_power);
334 break;
335 }
336}
337
338static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
339{
340 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
341 struct mlx5_event_nb,
342 nb);
343 struct mlx5_events *events = event_nb->ctx;
344 struct mlx5_eqe *eqe = data;
345
346 switch (eqe->sub_type) {
347 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
348 queue_work(wq: events->wq, work: &events->pcie_core_work);
349 break;
350 default:
351 return NOTIFY_DONE;
352 }
353
354 return NOTIFY_OK;
355}
356
357void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
358{
359 *stats = dev->priv.events->pme_stats;
360}
361
362/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
363static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
364{
365 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
366 struct mlx5_events *events = event_nb->ctx;
367 struct mlx5_eqe *eqe = data;
368
369 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
370 eqe_type_str(eqe->type), eqe->sub_type);
371 atomic_notifier_call_chain(nh: &events->fw_nh, val: event, v: data);
372 return NOTIFY_OK;
373}
374
375int mlx5_events_init(struct mlx5_core_dev *dev)
376{
377 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
378
379 if (!events)
380 return -ENOMEM;
381
382 ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
383 events->dev = dev;
384 dev->priv.events = events;
385 events->wq = create_singlethread_workqueue("mlx5_events");
386 if (!events->wq) {
387 kfree(objp: events);
388 return -ENOMEM;
389 }
390 INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
391 BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
392
393 return 0;
394}
395
396void mlx5_events_cleanup(struct mlx5_core_dev *dev)
397{
398 destroy_workqueue(wq: dev->priv.events->wq);
399 kvfree(addr: dev->priv.events);
400}
401
402void mlx5_events_start(struct mlx5_core_dev *dev)
403{
404 struct mlx5_events *events = dev->priv.events;
405 int i;
406
407 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
408 events->notifiers[i].nb = events_nbs_ref[i];
409 events->notifiers[i].ctx = events;
410 mlx5_eq_notifier_register(dev, nb: &events->notifiers[i].nb);
411 }
412}
413
414void mlx5_events_stop(struct mlx5_core_dev *dev)
415{
416 struct mlx5_events *events = dev->priv.events;
417 int i;
418
419 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
420 mlx5_eq_notifier_unregister(dev, nb: &events->notifiers[i].nb);
421 flush_workqueue(events->wq);
422}
423
424/* This API is used only for processing and forwarding firmware
425 * events to mlx5 consumer.
426 */
427int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
428{
429 struct mlx5_events *events = dev->priv.events;
430
431 return atomic_notifier_chain_register(nh: &events->fw_nh, nb);
432}
433EXPORT_SYMBOL(mlx5_notifier_register);
434
435int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
436{
437 struct mlx5_events *events = dev->priv.events;
438
439 return atomic_notifier_chain_unregister(nh: &events->fw_nh, nb);
440}
441EXPORT_SYMBOL(mlx5_notifier_unregister);
442
443int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
444{
445 return atomic_notifier_call_chain(nh: &events->fw_nh, val: event, v: data);
446}
447
448/* This API is used only for processing and forwarding driver-specific
449 * events to mlx5 consumers.
450 */
451int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
452{
453 struct mlx5_events *events = dev->priv.events;
454
455 return blocking_notifier_chain_register(nh: &events->sw_nh, nb);
456}
457EXPORT_SYMBOL(mlx5_blocking_notifier_register);
458
459int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
460{
461 struct mlx5_events *events = dev->priv.events;
462
463 return blocking_notifier_chain_unregister(nh: &events->sw_nh, nb);
464}
465EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
466
467int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
468 void *data)
469{
470 struct mlx5_events *events = dev->priv.events;
471
472 return blocking_notifier_call_chain(nh: &events->sw_nh, val: event, v: data);
473}
474

source code of linux/drivers/net/ethernet/mellanox/mlx5/core/events.c