1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | /* |
3 | * Implement the AER root port service driver. The driver registers an IRQ |
4 | * handler. When a root port triggers an AER interrupt, the IRQ handler |
5 | * collects Root Port status and schedules work. |
6 | * |
7 | * Copyright (C) 2006 Intel Corp. |
8 | * Tom Long Nguyen (tom.l.nguyen@intel.com) |
9 | * Zhang Yanmin (yanmin.zhang@intel.com) |
10 | * |
11 | * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. |
12 | * Andrew Patterson <andrew.patterson@hp.com> |
13 | */ |
14 | |
15 | #define pr_fmt(fmt) "AER: " fmt |
16 | #define dev_fmt pr_fmt |
17 | |
18 | #include <linux/bitops.h> |
19 | #include <linux/cper.h> |
20 | #include <linux/dev_printk.h> |
21 | #include <linux/pci.h> |
22 | #include <linux/pci-acpi.h> |
23 | #include <linux/sched.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/errno.h> |
26 | #include <linux/pm.h> |
27 | #include <linux/init.h> |
28 | #include <linux/interrupt.h> |
29 | #include <linux/delay.h> |
30 | #include <linux/kfifo.h> |
31 | #include <linux/ratelimit.h> |
32 | #include <linux/slab.h> |
33 | #include <acpi/apei.h> |
34 | #include <acpi/ghes.h> |
35 | #include <ras/ras_event.h> |
36 | |
37 | #include "../pci.h" |
38 | #include "portdrv.h" |
39 | |
40 | #define aer_printk(level, pdev, fmt, arg...) \ |
41 | dev_printk(level, &(pdev)->dev, fmt, ##arg) |
42 | |
43 | #define AER_ERROR_SOURCES_MAX 128 |
44 | |
45 | #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ |
46 | #define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ |
47 | |
48 | struct aer_err_source { |
49 | u32 status; /* PCI_ERR_ROOT_STATUS */ |
50 | u32 id; /* PCI_ERR_ROOT_ERR_SRC */ |
51 | }; |
52 | |
53 | struct aer_rpc { |
54 | struct pci_dev *rpd; /* Root Port device */ |
55 | DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); |
56 | }; |
57 | |
58 | /* AER info for the device */ |
59 | struct aer_info { |
60 | |
61 | /* |
62 | * Fields for all AER capable devices. They indicate the errors |
63 | * "as seen by this device". Note that this may mean that if an |
64 | * Endpoint is causing problems, the AER counters may increment |
65 | * at its link partner (e.g. Root Port) because the errors will be |
66 | * "seen" by the link partner and not the problematic Endpoint |
67 | * itself (which may report all counters as 0 as it never saw any |
68 | * problems). |
69 | */ |
70 | /* Counters for different type of correctable errors */ |
71 | u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; |
72 | /* Counters for different type of fatal uncorrectable errors */ |
73 | u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; |
74 | /* Counters for different type of nonfatal uncorrectable errors */ |
75 | u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; |
76 | /* Total number of ERR_COR sent by this device */ |
77 | u64 dev_total_cor_errs; |
78 | /* Total number of ERR_FATAL sent by this device */ |
79 | u64 dev_total_fatal_errs; |
80 | /* Total number of ERR_NONFATAL sent by this device */ |
81 | u64 dev_total_nonfatal_errs; |
82 | |
83 | /* |
84 | * Fields for Root Ports & Root Complex Event Collectors only; these |
85 | * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL |
86 | * messages received by the Root Port / Event Collector, INCLUDING the |
87 | * ones that are generated internally (by the Root Port itself) |
88 | */ |
89 | u64 rootport_total_cor_errs; |
90 | u64 rootport_total_fatal_errs; |
91 | u64 rootport_total_nonfatal_errs; |
92 | |
93 | /* Ratelimits for errors */ |
94 | struct ratelimit_state correctable_ratelimit; |
95 | struct ratelimit_state nonfatal_ratelimit; |
96 | }; |
97 | |
98 | #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ |
99 | PCI_ERR_UNC_ECRC| \ |
100 | PCI_ERR_UNC_UNSUP| \ |
101 | PCI_ERR_UNC_COMP_ABORT| \ |
102 | PCI_ERR_UNC_UNX_COMP| \ |
103 | PCI_ERR_UNC_MALF_TLP) |
104 | |
105 | #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ |
106 | PCI_EXP_RTCTL_SENFEE| \ |
107 | PCI_EXP_RTCTL_SEFEE) |
108 | #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ |
109 | PCI_ERR_ROOT_CMD_NONFATAL_EN| \ |
110 | PCI_ERR_ROOT_CMD_FATAL_EN) |
111 | #define ERR_COR_ID(d) (d & 0xffff) |
112 | #define ERR_UNCOR_ID(d) (d >> 16) |
113 | |
114 | #define AER_ERR_STATUS_MASK (PCI_ERR_ROOT_UNCOR_RCV | \ |
115 | PCI_ERR_ROOT_COR_RCV | \ |
116 | PCI_ERR_ROOT_MULTI_COR_RCV | \ |
117 | PCI_ERR_ROOT_MULTI_UNCOR_RCV) |
118 | |
119 | static int pcie_aer_disable; |
120 | static pci_ers_result_t aer_root_reset(struct pci_dev *dev); |
121 | |
122 | void pci_no_aer(void) |
123 | { |
124 | pcie_aer_disable = 1; |
125 | } |
126 | |
127 | bool pci_aer_available(void) |
128 | { |
129 | return !pcie_aer_disable && pci_msi_enabled(); |
130 | } |
131 | |
132 | #ifdef CONFIG_PCIE_ECRC |
133 | |
134 | #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ |
135 | #define ECRC_POLICY_OFF 1 /* ECRC off for performance */ |
136 | #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ |
137 | |
138 | static int ecrc_policy = ECRC_POLICY_DEFAULT; |
139 | |
140 | static const char * const ecrc_policy_str[] = { |
141 | [ECRC_POLICY_DEFAULT] = "bios", |
142 | [ECRC_POLICY_OFF] = "off", |
143 | [ECRC_POLICY_ON] = "on" |
144 | }; |
145 | |
146 | /** |
147 | * enable_ecrc_checking - enable PCIe ECRC checking for a device |
148 | * @dev: the PCI device |
149 | * |
150 | * Return: 0 on success, or negative on failure. |
151 | */ |
152 | static int enable_ecrc_checking(struct pci_dev *dev) |
153 | { |
154 | int aer = dev->aer_cap; |
155 | u32 reg32; |
156 | |
157 | if (!aer) |
158 | return -ENODEV; |
159 | |
160 | pci_read_config_dword(dev, where: aer + PCI_ERR_CAP, val: ®32); |
161 | if (reg32 & PCI_ERR_CAP_ECRC_GENC) |
162 | reg32 |= PCI_ERR_CAP_ECRC_GENE; |
163 | if (reg32 & PCI_ERR_CAP_ECRC_CHKC) |
164 | reg32 |= PCI_ERR_CAP_ECRC_CHKE; |
165 | pci_write_config_dword(dev, where: aer + PCI_ERR_CAP, val: reg32); |
166 | |
167 | return 0; |
168 | } |
169 | |
170 | /** |
171 | * disable_ecrc_checking - disable PCIe ECRC checking for a device |
172 | * @dev: the PCI device |
173 | * |
174 | * Return: 0 on success, or negative on failure. |
175 | */ |
176 | static int disable_ecrc_checking(struct pci_dev *dev) |
177 | { |
178 | int aer = dev->aer_cap; |
179 | u32 reg32; |
180 | |
181 | if (!aer) |
182 | return -ENODEV; |
183 | |
184 | pci_read_config_dword(dev, where: aer + PCI_ERR_CAP, val: ®32); |
185 | reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); |
186 | pci_write_config_dword(dev, where: aer + PCI_ERR_CAP, val: reg32); |
187 | |
188 | return 0; |
189 | } |
190 | |
191 | /** |
192 | * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based |
193 | * on global policy |
194 | * @dev: the PCI device |
195 | */ |
196 | void pcie_set_ecrc_checking(struct pci_dev *dev) |
197 | { |
198 | if (!pcie_aer_is_native(dev)) |
199 | return; |
200 | |
201 | switch (ecrc_policy) { |
202 | case ECRC_POLICY_DEFAULT: |
203 | return; |
204 | case ECRC_POLICY_OFF: |
205 | disable_ecrc_checking(dev); |
206 | break; |
207 | case ECRC_POLICY_ON: |
208 | enable_ecrc_checking(dev); |
209 | break; |
210 | default: |
211 | return; |
212 | } |
213 | } |
214 | |
215 | /** |
216 | * pcie_ecrc_get_policy - parse kernel command-line ecrc option |
217 | * @str: ECRC policy from kernel command line to use |
218 | */ |
219 | void pcie_ecrc_get_policy(char *str) |
220 | { |
221 | int i; |
222 | |
223 | i = match_string(array: ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), string: str); |
224 | if (i < 0) |
225 | return; |
226 | |
227 | ecrc_policy = i; |
228 | } |
229 | #endif /* CONFIG_PCIE_ECRC */ |
230 | |
231 | #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ |
232 | PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) |
233 | |
234 | int pcie_aer_is_native(struct pci_dev *dev) |
235 | { |
236 | struct pci_host_bridge *host = pci_find_host_bridge(bus: dev->bus); |
237 | |
238 | if (!dev->aer_cap) |
239 | return 0; |
240 | |
241 | return pcie_ports_native || host->native_aer; |
242 | } |
243 | EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, "CXL"); |
244 | |
245 | static int pci_enable_pcie_error_reporting(struct pci_dev *dev) |
246 | { |
247 | int rc; |
248 | |
249 | if (!pcie_aer_is_native(dev)) |
250 | return -EIO; |
251 | |
252 | rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); |
253 | return pcibios_err_to_errno(err: rc); |
254 | } |
255 | |
256 | int pci_aer_clear_nonfatal_status(struct pci_dev *dev) |
257 | { |
258 | int aer = dev->aer_cap; |
259 | u32 status, sev; |
260 | |
261 | if (!pcie_aer_is_native(dev)) |
262 | return -EIO; |
263 | |
264 | /* Clear status bits for ERR_NONFATAL errors only */ |
265 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: &status); |
266 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_SEVER, val: &sev); |
267 | status &= ~sev; |
268 | if (status) |
269 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: status); |
270 | |
271 | return 0; |
272 | } |
273 | EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status); |
274 | |
275 | void pci_aer_clear_fatal_status(struct pci_dev *dev) |
276 | { |
277 | int aer = dev->aer_cap; |
278 | u32 status, sev; |
279 | |
280 | if (!pcie_aer_is_native(dev)) |
281 | return; |
282 | |
283 | /* Clear status bits for ERR_FATAL errors only */ |
284 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: &status); |
285 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_SEVER, val: &sev); |
286 | status &= sev; |
287 | if (status) |
288 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: status); |
289 | } |
290 | |
291 | /** |
292 | * pci_aer_raw_clear_status - Clear AER error registers. |
293 | * @dev: the PCI device |
294 | * |
295 | * Clear AER error status registers unconditionally, regardless of |
296 | * whether they're owned by firmware or the OS. |
297 | * |
298 | * Return: 0 on success, or negative on failure. |
299 | */ |
300 | int pci_aer_raw_clear_status(struct pci_dev *dev) |
301 | { |
302 | int aer = dev->aer_cap; |
303 | u32 status; |
304 | int port_type; |
305 | |
306 | if (!aer) |
307 | return -EIO; |
308 | |
309 | port_type = pci_pcie_type(dev); |
310 | if (port_type == PCI_EXP_TYPE_ROOT_PORT || |
311 | port_type == PCI_EXP_TYPE_RC_EC) { |
312 | pci_read_config_dword(dev, where: aer + PCI_ERR_ROOT_STATUS, val: &status); |
313 | pci_write_config_dword(dev, where: aer + PCI_ERR_ROOT_STATUS, val: status); |
314 | } |
315 | |
316 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_STATUS, val: &status); |
317 | pci_write_config_dword(dev, where: aer + PCI_ERR_COR_STATUS, val: status); |
318 | |
319 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: &status); |
320 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: status); |
321 | |
322 | return 0; |
323 | } |
324 | |
325 | int pci_aer_clear_status(struct pci_dev *dev) |
326 | { |
327 | if (!pcie_aer_is_native(dev)) |
328 | return -EIO; |
329 | |
330 | return pci_aer_raw_clear_status(dev); |
331 | } |
332 | |
333 | void pci_save_aer_state(struct pci_dev *dev) |
334 | { |
335 | int aer = dev->aer_cap; |
336 | struct pci_cap_saved_state *save_state; |
337 | u32 *cap; |
338 | |
339 | if (!aer) |
340 | return; |
341 | |
342 | save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); |
343 | if (!save_state) |
344 | return; |
345 | |
346 | cap = &save_state->cap.data[0]; |
347 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, val: cap++); |
348 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_SEVER, val: cap++); |
349 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_MASK, val: cap++); |
350 | pci_read_config_dword(dev, where: aer + PCI_ERR_CAP, val: cap++); |
351 | if (pcie_cap_has_rtctl(dev)) |
352 | pci_read_config_dword(dev, where: aer + PCI_ERR_ROOT_COMMAND, val: cap++); |
353 | } |
354 | |
355 | void pci_restore_aer_state(struct pci_dev *dev) |
356 | { |
357 | int aer = dev->aer_cap; |
358 | struct pci_cap_saved_state *save_state; |
359 | u32 *cap; |
360 | |
361 | if (!aer) |
362 | return; |
363 | |
364 | save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); |
365 | if (!save_state) |
366 | return; |
367 | |
368 | cap = &save_state->cap.data[0]; |
369 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, val: *cap++); |
370 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_SEVER, val: *cap++); |
371 | pci_write_config_dword(dev, where: aer + PCI_ERR_COR_MASK, val: *cap++); |
372 | pci_write_config_dword(dev, where: aer + PCI_ERR_CAP, val: *cap++); |
373 | if (pcie_cap_has_rtctl(dev)) |
374 | pci_write_config_dword(dev, where: aer + PCI_ERR_ROOT_COMMAND, val: *cap++); |
375 | } |
376 | |
377 | void pci_aer_init(struct pci_dev *dev) |
378 | { |
379 | int n; |
380 | |
381 | dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); |
382 | if (!dev->aer_cap) |
383 | return; |
384 | |
385 | dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL); |
386 | |
387 | ratelimit_state_init(rs: &dev->aer_info->correctable_ratelimit, |
388 | DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
389 | ratelimit_state_init(rs: &dev->aer_info->nonfatal_ratelimit, |
390 | DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
391 | |
392 | /* |
393 | * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, |
394 | * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event |
395 | * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r6.0, sec |
396 | * 7.8.4.9). |
397 | */ |
398 | n = pcie_cap_has_rtctl(dev) ? 5 : 4; |
399 | pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, size: sizeof(u32) * n); |
400 | |
401 | pci_aer_clear_status(dev); |
402 | |
403 | if (pci_aer_available()) |
404 | pci_enable_pcie_error_reporting(dev); |
405 | |
406 | pcie_set_ecrc_checking(dev); |
407 | } |
408 | |
409 | void pci_aer_exit(struct pci_dev *dev) |
410 | { |
411 | kfree(objp: dev->aer_info); |
412 | dev->aer_info = NULL; |
413 | } |
414 | |
415 | #define AER_AGENT_RECEIVER 0 |
416 | #define AER_AGENT_REQUESTER 1 |
417 | #define AER_AGENT_COMPLETER 2 |
418 | #define AER_AGENT_TRANSMITTER 3 |
419 | |
420 | #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ |
421 | 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) |
422 | #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ |
423 | 0 : PCI_ERR_UNC_COMP_ABORT) |
424 | #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ |
425 | (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) |
426 | |
427 | #define AER_GET_AGENT(t, e) \ |
428 | ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ |
429 | (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ |
430 | (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ |
431 | AER_AGENT_RECEIVER) |
432 | |
433 | #define AER_PHYSICAL_LAYER_ERROR 0 |
434 | #define AER_DATA_LINK_LAYER_ERROR 1 |
435 | #define AER_TRANSACTION_LAYER_ERROR 2 |
436 | |
437 | #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ |
438 | PCI_ERR_COR_RCVR : 0) |
439 | #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ |
440 | (PCI_ERR_COR_BAD_TLP| \ |
441 | PCI_ERR_COR_BAD_DLLP| \ |
442 | PCI_ERR_COR_REP_ROLL| \ |
443 | PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) |
444 | |
445 | #define AER_GET_LAYER_ERROR(t, e) \ |
446 | ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ |
447 | (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ |
448 | AER_TRANSACTION_LAYER_ERROR) |
449 | |
450 | /* |
451 | * AER error strings |
452 | */ |
453 | static const char * const aer_error_severity_string[] = { |
454 | "Uncorrectable (Non-Fatal)", |
455 | "Uncorrectable (Fatal)", |
456 | "Correctable" |
457 | }; |
458 | |
459 | static const char *aer_error_layer[] = { |
460 | "Physical Layer", |
461 | "Data Link Layer", |
462 | "Transaction Layer" |
463 | }; |
464 | |
465 | static const char *aer_correctable_error_string[] = { |
466 | "RxErr", /* Bit Position 0 */ |
467 | NULL, |
468 | NULL, |
469 | NULL, |
470 | NULL, |
471 | NULL, |
472 | "BadTLP", /* Bit Position 6 */ |
473 | "BadDLLP", /* Bit Position 7 */ |
474 | "Rollover", /* Bit Position 8 */ |
475 | NULL, |
476 | NULL, |
477 | NULL, |
478 | "Timeout", /* Bit Position 12 */ |
479 | "NonFatalErr", /* Bit Position 13 */ |
480 | "CorrIntErr", /* Bit Position 14 */ |
481 | "HeaderOF", /* Bit Position 15 */ |
482 | NULL, /* Bit Position 16 */ |
483 | NULL, /* Bit Position 17 */ |
484 | NULL, /* Bit Position 18 */ |
485 | NULL, /* Bit Position 19 */ |
486 | NULL, /* Bit Position 20 */ |
487 | NULL, /* Bit Position 21 */ |
488 | NULL, /* Bit Position 22 */ |
489 | NULL, /* Bit Position 23 */ |
490 | NULL, /* Bit Position 24 */ |
491 | NULL, /* Bit Position 25 */ |
492 | NULL, /* Bit Position 26 */ |
493 | NULL, /* Bit Position 27 */ |
494 | NULL, /* Bit Position 28 */ |
495 | NULL, /* Bit Position 29 */ |
496 | NULL, /* Bit Position 30 */ |
497 | NULL, /* Bit Position 31 */ |
498 | }; |
499 | |
500 | static const char *aer_uncorrectable_error_string[] = { |
501 | "Undefined", /* Bit Position 0 */ |
502 | NULL, |
503 | NULL, |
504 | NULL, |
505 | "DLP", /* Bit Position 4 */ |
506 | "SDES", /* Bit Position 5 */ |
507 | NULL, |
508 | NULL, |
509 | NULL, |
510 | NULL, |
511 | NULL, |
512 | NULL, |
513 | "TLP", /* Bit Position 12 */ |
514 | "FCP", /* Bit Position 13 */ |
515 | "CmpltTO", /* Bit Position 14 */ |
516 | "CmpltAbrt", /* Bit Position 15 */ |
517 | "UnxCmplt", /* Bit Position 16 */ |
518 | "RxOF", /* Bit Position 17 */ |
519 | "MalfTLP", /* Bit Position 18 */ |
520 | "ECRC", /* Bit Position 19 */ |
521 | "UnsupReq", /* Bit Position 20 */ |
522 | "ACSViol", /* Bit Position 21 */ |
523 | "UncorrIntErr", /* Bit Position 22 */ |
524 | "BlockedTLP", /* Bit Position 23 */ |
525 | "AtomicOpBlocked", /* Bit Position 24 */ |
526 | "TLPBlockedErr", /* Bit Position 25 */ |
527 | "PoisonTLPBlocked", /* Bit Position 26 */ |
528 | NULL, /* Bit Position 27 */ |
529 | NULL, /* Bit Position 28 */ |
530 | NULL, /* Bit Position 29 */ |
531 | NULL, /* Bit Position 30 */ |
532 | NULL, /* Bit Position 31 */ |
533 | }; |
534 | |
535 | static const char *aer_agent_string[] = { |
536 | "Receiver ID", |
537 | "Requester ID", |
538 | "Completer ID", |
539 | "Transmitter ID" |
540 | }; |
541 | |
542 | #define aer_stats_dev_attr(name, stats_array, strings_array, \ |
543 | total_string, total_field) \ |
544 | static ssize_t \ |
545 | name##_show(struct device *dev, struct device_attribute *attr, \ |
546 | char *buf) \ |
547 | { \ |
548 | unsigned int i; \ |
549 | struct pci_dev *pdev = to_pci_dev(dev); \ |
550 | u64 *stats = pdev->aer_info->stats_array; \ |
551 | size_t len = 0; \ |
552 | \ |
553 | for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) { \ |
554 | if (strings_array[i]) \ |
555 | len += sysfs_emit_at(buf, len, "%s %llu\n", \ |
556 | strings_array[i], \ |
557 | stats[i]); \ |
558 | else if (stats[i]) \ |
559 | len += sysfs_emit_at(buf, len, \ |
560 | #stats_array "_bit[%d] %llu\n",\ |
561 | i, stats[i]); \ |
562 | } \ |
563 | len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \ |
564 | pdev->aer_info->total_field); \ |
565 | return len; \ |
566 | } \ |
567 | static DEVICE_ATTR_RO(name) |
568 | |
569 | aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, |
570 | aer_correctable_error_string, "ERR_COR", |
571 | dev_total_cor_errs); |
572 | aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, |
573 | aer_uncorrectable_error_string, "ERR_FATAL", |
574 | dev_total_fatal_errs); |
575 | aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, |
576 | aer_uncorrectable_error_string, "ERR_NONFATAL", |
577 | dev_total_nonfatal_errs); |
578 | |
579 | #define aer_stats_rootport_attr(name, field) \ |
580 | static ssize_t \ |
581 | name##_show(struct device *dev, struct device_attribute *attr, \ |
582 | char *buf) \ |
583 | { \ |
584 | struct pci_dev *pdev = to_pci_dev(dev); \ |
585 | return sysfs_emit(buf, "%llu\n", pdev->aer_info->field); \ |
586 | } \ |
587 | static DEVICE_ATTR_RO(name) |
588 | |
589 | aer_stats_rootport_attr(aer_rootport_total_err_cor, |
590 | rootport_total_cor_errs); |
591 | aer_stats_rootport_attr(aer_rootport_total_err_fatal, |
592 | rootport_total_fatal_errs); |
593 | aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, |
594 | rootport_total_nonfatal_errs); |
595 | |
596 | static struct attribute *aer_stats_attrs[] __ro_after_init = { |
597 | &dev_attr_aer_dev_correctable.attr, |
598 | &dev_attr_aer_dev_fatal.attr, |
599 | &dev_attr_aer_dev_nonfatal.attr, |
600 | &dev_attr_aer_rootport_total_err_cor.attr, |
601 | &dev_attr_aer_rootport_total_err_fatal.attr, |
602 | &dev_attr_aer_rootport_total_err_nonfatal.attr, |
603 | NULL |
604 | }; |
605 | |
606 | static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, |
607 | struct attribute *a, int n) |
608 | { |
609 | struct device *dev = kobj_to_dev(kobj); |
610 | struct pci_dev *pdev = to_pci_dev(dev); |
611 | |
612 | if (!pdev->aer_info) |
613 | return 0; |
614 | |
615 | if ((a == &dev_attr_aer_rootport_total_err_cor.attr || |
616 | a == &dev_attr_aer_rootport_total_err_fatal.attr || |
617 | a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && |
618 | ((pci_pcie_type(dev: pdev) != PCI_EXP_TYPE_ROOT_PORT) && |
619 | (pci_pcie_type(dev: pdev) != PCI_EXP_TYPE_RC_EC))) |
620 | return 0; |
621 | |
622 | return a->mode; |
623 | } |
624 | |
625 | const struct attribute_group aer_stats_attr_group = { |
626 | .attrs = aer_stats_attrs, |
627 | .is_visible = aer_stats_attrs_are_visible, |
628 | }; |
629 | |
630 | /* |
631 | * Ratelimit interval |
632 | * <=0: disabled with ratelimit.interval = 0 |
633 | * >0: enabled with ratelimit.interval in ms |
634 | */ |
635 | #define aer_ratelimit_interval_attr(name, ratelimit) \ |
636 | static ssize_t \ |
637 | name##_show(struct device *dev, struct device_attribute *attr, \ |
638 | char *buf) \ |
639 | { \ |
640 | struct pci_dev *pdev = to_pci_dev(dev); \ |
641 | \ |
642 | return sysfs_emit(buf, "%d\n", \ |
643 | pdev->aer_info->ratelimit.interval); \ |
644 | } \ |
645 | \ |
646 | static ssize_t \ |
647 | name##_store(struct device *dev, struct device_attribute *attr, \ |
648 | const char *buf, size_t count) \ |
649 | { \ |
650 | struct pci_dev *pdev = to_pci_dev(dev); \ |
651 | int interval; \ |
652 | \ |
653 | if (!capable(CAP_SYS_ADMIN)) \ |
654 | return -EPERM; \ |
655 | \ |
656 | if (kstrtoint(buf, 0, &interval) < 0) \ |
657 | return -EINVAL; \ |
658 | \ |
659 | if (interval <= 0) \ |
660 | interval = 0; \ |
661 | else \ |
662 | interval = msecs_to_jiffies(interval); \ |
663 | \ |
664 | pdev->aer_info->ratelimit.interval = interval; \ |
665 | \ |
666 | return count; \ |
667 | } \ |
668 | static DEVICE_ATTR_RW(name); |
669 | |
670 | #define aer_ratelimit_burst_attr(name, ratelimit) \ |
671 | static ssize_t \ |
672 | name##_show(struct device *dev, struct device_attribute *attr, \ |
673 | char *buf) \ |
674 | { \ |
675 | struct pci_dev *pdev = to_pci_dev(dev); \ |
676 | \ |
677 | return sysfs_emit(buf, "%d\n", \ |
678 | pdev->aer_info->ratelimit.burst); \ |
679 | } \ |
680 | \ |
681 | static ssize_t \ |
682 | name##_store(struct device *dev, struct device_attribute *attr, \ |
683 | const char *buf, size_t count) \ |
684 | { \ |
685 | struct pci_dev *pdev = to_pci_dev(dev); \ |
686 | int burst; \ |
687 | \ |
688 | if (!capable(CAP_SYS_ADMIN)) \ |
689 | return -EPERM; \ |
690 | \ |
691 | if (kstrtoint(buf, 0, &burst) < 0) \ |
692 | return -EINVAL; \ |
693 | \ |
694 | pdev->aer_info->ratelimit.burst = burst; \ |
695 | \ |
696 | return count; \ |
697 | } \ |
698 | static DEVICE_ATTR_RW(name); |
699 | |
700 | #define aer_ratelimit_attrs(name) \ |
701 | aer_ratelimit_interval_attr(name##_ratelimit_interval_ms, \ |
702 | name##_ratelimit) \ |
703 | aer_ratelimit_burst_attr(name##_ratelimit_burst, \ |
704 | name##_ratelimit) |
705 | |
706 | aer_ratelimit_attrs(correctable) |
707 | aer_ratelimit_attrs(nonfatal) |
708 | |
709 | static struct attribute *aer_attrs[] = { |
710 | &dev_attr_correctable_ratelimit_interval_ms.attr, |
711 | &dev_attr_correctable_ratelimit_burst.attr, |
712 | &dev_attr_nonfatal_ratelimit_interval_ms.attr, |
713 | &dev_attr_nonfatal_ratelimit_burst.attr, |
714 | NULL |
715 | }; |
716 | |
717 | static umode_t aer_attrs_are_visible(struct kobject *kobj, |
718 | struct attribute *a, int n) |
719 | { |
720 | struct device *dev = kobj_to_dev(kobj); |
721 | struct pci_dev *pdev = to_pci_dev(dev); |
722 | |
723 | if (!pdev->aer_info) |
724 | return 0; |
725 | |
726 | return a->mode; |
727 | } |
728 | |
729 | const struct attribute_group aer_attr_group = { |
730 | .name = "aer", |
731 | .attrs = aer_attrs, |
732 | .is_visible = aer_attrs_are_visible, |
733 | }; |
734 | |
735 | static void pci_dev_aer_stats_incr(struct pci_dev *pdev, |
736 | struct aer_err_info *info) |
737 | { |
738 | unsigned long status = info->status & ~info->mask; |
739 | int i, max = -1; |
740 | u64 *counter = NULL; |
741 | struct aer_info *aer_info = pdev->aer_info; |
742 | |
743 | if (!aer_info) |
744 | return; |
745 | |
746 | switch (info->severity) { |
747 | case AER_CORRECTABLE: |
748 | aer_info->dev_total_cor_errs++; |
749 | counter = &aer_info->dev_cor_errs[0]; |
750 | max = AER_MAX_TYPEOF_COR_ERRS; |
751 | break; |
752 | case AER_NONFATAL: |
753 | aer_info->dev_total_nonfatal_errs++; |
754 | counter = &aer_info->dev_nonfatal_errs[0]; |
755 | max = AER_MAX_TYPEOF_UNCOR_ERRS; |
756 | break; |
757 | case AER_FATAL: |
758 | aer_info->dev_total_fatal_errs++; |
759 | counter = &aer_info->dev_fatal_errs[0]; |
760 | max = AER_MAX_TYPEOF_UNCOR_ERRS; |
761 | break; |
762 | } |
763 | |
764 | for_each_set_bit(i, &status, max) |
765 | counter[i]++; |
766 | } |
767 | |
768 | static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, |
769 | struct aer_err_source *e_src) |
770 | { |
771 | struct aer_info *aer_info = pdev->aer_info; |
772 | |
773 | if (!aer_info) |
774 | return; |
775 | |
776 | if (e_src->status & PCI_ERR_ROOT_COR_RCV) |
777 | aer_info->rootport_total_cor_errs++; |
778 | |
779 | if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { |
780 | if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) |
781 | aer_info->rootport_total_fatal_errs++; |
782 | else |
783 | aer_info->rootport_total_nonfatal_errs++; |
784 | } |
785 | } |
786 | |
787 | static int aer_ratelimit(struct pci_dev *dev, unsigned int severity) |
788 | { |
789 | switch (severity) { |
790 | case AER_NONFATAL: |
791 | return __ratelimit(&dev->aer_info->nonfatal_ratelimit); |
792 | case AER_CORRECTABLE: |
793 | return __ratelimit(&dev->aer_info->correctable_ratelimit); |
794 | default: |
795 | return 1; /* Don't ratelimit fatal errors */ |
796 | } |
797 | } |
798 | |
799 | static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) |
800 | { |
801 | const char **strings; |
802 | unsigned long status = info->status & ~info->mask; |
803 | const char *level = info->level; |
804 | const char *errmsg; |
805 | int i; |
806 | |
807 | if (info->severity == AER_CORRECTABLE) |
808 | strings = aer_correctable_error_string; |
809 | else |
810 | strings = aer_uncorrectable_error_string; |
811 | |
812 | for_each_set_bit(i, &status, 32) { |
813 | errmsg = strings[i]; |
814 | if (!errmsg) |
815 | errmsg = "Unknown Error Bit"; |
816 | |
817 | aer_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, |
818 | info->first_error == i ? " (First)": ""); |
819 | } |
820 | } |
821 | |
822 | static void aer_print_source(struct pci_dev *dev, struct aer_err_info *info, |
823 | bool found) |
824 | { |
825 | u16 source = info->id; |
826 | |
827 | pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d%s\n", |
828 | info->multi_error_valid ? "Multiple ": "", |
829 | aer_error_severity_string[info->severity], |
830 | pci_domain_nr(dev->bus), PCI_BUS_NUM(source), |
831 | PCI_SLOT(source), PCI_FUNC(source), |
832 | found ? "": " (no details found"); |
833 | } |
834 | |
835 | void aer_print_error(struct aer_err_info *info, int i) |
836 | { |
837 | struct pci_dev *dev; |
838 | int layer, agent, id; |
839 | const char *level = info->level; |
840 | |
841 | if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) |
842 | return; |
843 | |
844 | dev = info->dev[i]; |
845 | id = pci_dev_id(dev); |
846 | |
847 | pci_dev_aer_stats_incr(pdev: dev, info); |
848 | trace_aer_event(dev_name: pci_name(pdev: dev), status: (info->status & ~info->mask), |
849 | severity: info->severity, tlp_header_valid: info->tlp_header_valid, tlp: &info->tlp); |
850 | |
851 | if (!info->ratelimit_print[i]) |
852 | return; |
853 | |
854 | if (!info->status) { |
855 | pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", |
856 | aer_error_severity_string[info->severity]); |
857 | goto out; |
858 | } |
859 | |
860 | layer = AER_GET_LAYER_ERROR(info->severity, info->status); |
861 | agent = AER_GET_AGENT(info->severity, info->status); |
862 | |
863 | aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", |
864 | aer_error_severity_string[info->severity], |
865 | aer_error_layer[layer], aer_agent_string[agent]); |
866 | |
867 | aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", |
868 | dev->vendor, dev->device, info->status, info->mask); |
869 | |
870 | __aer_print_error(dev, info); |
871 | |
872 | if (info->tlp_header_valid) |
873 | pcie_print_tlp_log(dev, log: &info->tlp, level, dev_fmt(" ")); |
874 | |
875 | out: |
876 | if (info->id && info->error_dev_num > 1 && info->id == id) |
877 | pci_err(dev, " Error of this Agent is reported first\n"); |
878 | } |
879 | |
880 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
881 | int cper_severity_to_aer(int cper_severity) |
882 | { |
883 | switch (cper_severity) { |
884 | case CPER_SEV_RECOVERABLE: |
885 | return AER_NONFATAL; |
886 | case CPER_SEV_FATAL: |
887 | return AER_FATAL; |
888 | default: |
889 | return AER_CORRECTABLE; |
890 | } |
891 | } |
892 | EXPORT_SYMBOL_GPL(cper_severity_to_aer); |
893 | #endif |
894 | |
895 | void pci_print_aer(struct pci_dev *dev, int aer_severity, |
896 | struct aer_capability_regs *aer) |
897 | { |
898 | int layer, agent, tlp_header_valid = 0; |
899 | u32 status, mask; |
900 | struct aer_err_info info = { |
901 | .severity = aer_severity, |
902 | .first_error = PCI_ERR_CAP_FEP(aer->cap_control), |
903 | }; |
904 | |
905 | if (aer_severity == AER_CORRECTABLE) { |
906 | status = aer->cor_status; |
907 | mask = aer->cor_mask; |
908 | info.level = KERN_WARNING; |
909 | } else { |
910 | status = aer->uncor_status; |
911 | mask = aer->uncor_mask; |
912 | info.level = KERN_ERR; |
913 | tlp_header_valid = status & AER_LOG_TLP_MASKS; |
914 | } |
915 | |
916 | info.status = status; |
917 | info.mask = mask; |
918 | |
919 | pci_dev_aer_stats_incr(pdev: dev, info: &info); |
920 | trace_aer_event(dev_name: pci_name(pdev: dev), status: (status & ~mask), |
921 | severity: aer_severity, tlp_header_valid, tlp: &aer->header_log); |
922 | |
923 | if (!aer_ratelimit(dev, severity: info.severity)) |
924 | return; |
925 | |
926 | layer = AER_GET_LAYER_ERROR(aer_severity, status); |
927 | agent = AER_GET_AGENT(aer_severity, status); |
928 | |
929 | aer_printk(info.level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", |
930 | status, mask); |
931 | __aer_print_error(dev, info: &info); |
932 | aer_printk(info.level, dev, "aer_layer=%s, aer_agent=%s\n", |
933 | aer_error_layer[layer], aer_agent_string[agent]); |
934 | |
935 | if (aer_severity != AER_CORRECTABLE) |
936 | aer_printk(info.level, dev, "aer_uncor_severity: 0x%08x\n", |
937 | aer->uncor_severity); |
938 | |
939 | if (tlp_header_valid) |
940 | pcie_print_tlp_log(dev, log: &aer->header_log, level: info.level, |
941 | dev_fmt(" ")); |
942 | } |
943 | EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); |
944 | |
945 | /** |
946 | * add_error_device - list device to be handled |
947 | * @e_info: pointer to error info |
948 | * @dev: pointer to pci_dev to be added |
949 | */ |
950 | static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) |
951 | { |
952 | int i = e_info->error_dev_num; |
953 | |
954 | if (i >= AER_MAX_MULTI_ERR_DEVICES) |
955 | return -ENOSPC; |
956 | |
957 | e_info->dev[i] = pci_dev_get(dev); |
958 | e_info->error_dev_num++; |
959 | |
960 | /* |
961 | * Ratelimit AER log messages. "dev" is either the source |
962 | * identified by the root's Error Source ID or it has an unmasked |
963 | * error logged in its own AER Capability. Messages are emitted |
964 | * when "ratelimit_print[i]" is non-zero. If we will print detail |
965 | * for a downstream device, make sure we print the Error Source ID |
966 | * from the root as well. |
967 | */ |
968 | if (aer_ratelimit(dev, severity: e_info->severity)) { |
969 | e_info->ratelimit_print[i] = 1; |
970 | e_info->root_ratelimit_print = 1; |
971 | } |
972 | return 0; |
973 | } |
974 | |
975 | /** |
976 | * is_error_source - check whether the device is source of reported error |
977 | * @dev: pointer to pci_dev to be checked |
978 | * @e_info: pointer to reported error info |
979 | */ |
980 | static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) |
981 | { |
982 | int aer = dev->aer_cap; |
983 | u32 status, mask; |
984 | u16 reg16; |
985 | |
986 | /* |
987 | * When bus ID is equal to 0, it might be a bad ID |
988 | * reported by Root Port. |
989 | */ |
990 | if ((PCI_BUS_NUM(e_info->id) != 0) && |
991 | !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { |
992 | /* Device ID match? */ |
993 | if (e_info->id == pci_dev_id(dev)) |
994 | return true; |
995 | |
996 | /* Continue ID comparing if there is no multiple error */ |
997 | if (!e_info->multi_error_valid) |
998 | return false; |
999 | } |
1000 | |
1001 | /* |
1002 | * When either |
1003 | * 1) bus ID is equal to 0. Some ports might lose the bus |
1004 | * ID of error source id; |
1005 | * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set |
1006 | * 3) There are multiple errors and prior ID comparing fails; |
1007 | * We check AER status registers to find possible reporter. |
1008 | */ |
1009 | if (atomic_read(v: &dev->enable_cnt) == 0) |
1010 | return false; |
1011 | |
1012 | /* Check if AER is enabled */ |
1013 | pcie_capability_read_word(dev, PCI_EXP_DEVCTL, val: ®16); |
1014 | if (!(reg16 & PCI_EXP_AER_FLAGS)) |
1015 | return false; |
1016 | |
1017 | if (!aer) |
1018 | return false; |
1019 | |
1020 | /* Check if error is recorded */ |
1021 | if (e_info->severity == AER_CORRECTABLE) { |
1022 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_STATUS, val: &status); |
1023 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_MASK, val: &mask); |
1024 | } else { |
1025 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, val: &status); |
1026 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, val: &mask); |
1027 | } |
1028 | if (status & ~mask) |
1029 | return true; |
1030 | |
1031 | return false; |
1032 | } |
1033 | |
1034 | static int find_device_iter(struct pci_dev *dev, void *data) |
1035 | { |
1036 | struct aer_err_info *e_info = (struct aer_err_info *)data; |
1037 | |
1038 | if (is_error_source(dev, e_info)) { |
1039 | /* List this device */ |
1040 | if (add_error_device(e_info, dev)) { |
1041 | /* We cannot handle more... Stop iteration */ |
1042 | /* TODO: Should print error message here? */ |
1043 | return 1; |
1044 | } |
1045 | |
1046 | /* If there is only a single error, stop iteration */ |
1047 | if (!e_info->multi_error_valid) |
1048 | return 1; |
1049 | } |
1050 | return 0; |
1051 | } |
1052 | |
1053 | /** |
1054 | * find_source_device - search through device hierarchy for source device |
1055 | * @parent: pointer to Root Port pci_dev data structure |
1056 | * @e_info: including detailed error information such as ID |
1057 | * |
1058 | * Return: true if found. |
1059 | * |
1060 | * Invoked by DPC when error is detected at the Root Port. |
1061 | * Caller of this function must set id, severity, and multi_error_valid of |
1062 | * struct aer_err_info pointed by @e_info properly. This function must fill |
1063 | * e_info->error_dev_num and e_info->dev[], based on the given information. |
1064 | */ |
1065 | static bool find_source_device(struct pci_dev *parent, |
1066 | struct aer_err_info *e_info) |
1067 | { |
1068 | struct pci_dev *dev = parent; |
1069 | int result; |
1070 | |
1071 | /* Must reset in this function */ |
1072 | e_info->error_dev_num = 0; |
1073 | |
1074 | /* Is Root Port an agent that sends error message? */ |
1075 | result = find_device_iter(dev, data: e_info); |
1076 | if (result) |
1077 | return true; |
1078 | |
1079 | if (pci_pcie_type(dev: parent) == PCI_EXP_TYPE_RC_EC) |
1080 | pcie_walk_rcec(rcec: parent, cb: find_device_iter, userdata: e_info); |
1081 | else |
1082 | pci_walk_bus(top: parent->subordinate, cb: find_device_iter, userdata: e_info); |
1083 | |
1084 | if (!e_info->error_dev_num) |
1085 | return false; |
1086 | return true; |
1087 | } |
1088 | |
1089 | #ifdef CONFIG_PCIEAER_CXL |
1090 | |
1091 | /** |
1092 | * pci_aer_unmask_internal_errors - unmask internal errors |
1093 | * @dev: pointer to the pci_dev data structure |
1094 | * |
1095 | * Unmask internal errors in the Uncorrectable and Correctable Error |
1096 | * Mask registers. |
1097 | * |
1098 | * Note: AER must be enabled and supported by the device which must be |
1099 | * checked in advance, e.g. with pcie_aer_is_native(). |
1100 | */ |
1101 | static void pci_aer_unmask_internal_errors(struct pci_dev *dev) |
1102 | { |
1103 | int aer = dev->aer_cap; |
1104 | u32 mask; |
1105 | |
1106 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, val: &mask); |
1107 | mask &= ~PCI_ERR_UNC_INTN; |
1108 | pci_write_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, val: mask); |
1109 | |
1110 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_MASK, val: &mask); |
1111 | mask &= ~PCI_ERR_COR_INTERNAL; |
1112 | pci_write_config_dword(dev, where: aer + PCI_ERR_COR_MASK, val: mask); |
1113 | } |
1114 | |
1115 | static bool is_cxl_mem_dev(struct pci_dev *dev) |
1116 | { |
1117 | /* |
1118 | * The capability, status, and control fields in Device 0, |
1119 | * Function 0 DVSEC control the CXL functionality of the |
1120 | * entire device (CXL 3.0, 8.1.3). |
1121 | */ |
1122 | if (dev->devfn != PCI_DEVFN(0, 0)) |
1123 | return false; |
1124 | |
1125 | /* |
1126 | * CXL Memory Devices must have the 502h class code set (CXL |
1127 | * 3.0, 8.1.12.1). |
1128 | */ |
1129 | if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) |
1130 | return false; |
1131 | |
1132 | return true; |
1133 | } |
1134 | |
1135 | static bool cxl_error_is_native(struct pci_dev *dev) |
1136 | { |
1137 | struct pci_host_bridge *host = pci_find_host_bridge(bus: dev->bus); |
1138 | |
1139 | return (pcie_ports_native || host->native_aer); |
1140 | } |
1141 | |
1142 | static bool is_internal_error(struct aer_err_info *info) |
1143 | { |
1144 | if (info->severity == AER_CORRECTABLE) |
1145 | return info->status & PCI_ERR_COR_INTERNAL; |
1146 | |
1147 | return info->status & PCI_ERR_UNC_INTN; |
1148 | } |
1149 | |
1150 | static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) |
1151 | { |
1152 | struct aer_err_info *info = (struct aer_err_info *)data; |
1153 | const struct pci_error_handlers *err_handler; |
1154 | |
1155 | if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) |
1156 | return 0; |
1157 | |
1158 | /* Protect dev->driver */ |
1159 | device_lock(dev: &dev->dev); |
1160 | |
1161 | err_handler = dev->driver ? dev->driver->err_handler : NULL; |
1162 | if (!err_handler) |
1163 | goto out; |
1164 | |
1165 | if (info->severity == AER_CORRECTABLE) { |
1166 | if (err_handler->cor_error_detected) |
1167 | err_handler->cor_error_detected(dev); |
1168 | } else if (err_handler->error_detected) { |
1169 | if (info->severity == AER_NONFATAL) |
1170 | err_handler->error_detected(dev, pci_channel_io_normal); |
1171 | else if (info->severity == AER_FATAL) |
1172 | err_handler->error_detected(dev, pci_channel_io_frozen); |
1173 | } |
1174 | out: |
1175 | device_unlock(dev: &dev->dev); |
1176 | return 0; |
1177 | } |
1178 | |
1179 | static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) |
1180 | { |
1181 | /* |
1182 | * Internal errors of an RCEC indicate an AER error in an |
1183 | * RCH's downstream port. Check and handle them in the CXL.mem |
1184 | * device driver. |
1185 | */ |
1186 | if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && |
1187 | is_internal_error(info)) |
1188 | pcie_walk_rcec(rcec: dev, cb: cxl_rch_handle_error_iter, userdata: info); |
1189 | } |
1190 | |
1191 | static int handles_cxl_error_iter(struct pci_dev *dev, void *data) |
1192 | { |
1193 | bool *handles_cxl = data; |
1194 | |
1195 | if (!*handles_cxl) |
1196 | *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); |
1197 | |
1198 | /* Non-zero terminates iteration */ |
1199 | return *handles_cxl; |
1200 | } |
1201 | |
1202 | static bool handles_cxl_errors(struct pci_dev *rcec) |
1203 | { |
1204 | bool handles_cxl = false; |
1205 | |
1206 | if (pci_pcie_type(dev: rcec) == PCI_EXP_TYPE_RC_EC && |
1207 | pcie_aer_is_native(rcec)) |
1208 | pcie_walk_rcec(rcec, cb: handles_cxl_error_iter, userdata: &handles_cxl); |
1209 | |
1210 | return handles_cxl; |
1211 | } |
1212 | |
1213 | static void cxl_rch_enable_rcec(struct pci_dev *rcec) |
1214 | { |
1215 | if (!handles_cxl_errors(rcec)) |
1216 | return; |
1217 | |
1218 | pci_aer_unmask_internal_errors(dev: rcec); |
1219 | pci_info(rcec, "CXL: Internal errors unmasked"); |
1220 | } |
1221 | |
1222 | #else |
1223 | static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } |
1224 | static inline void cxl_rch_handle_error(struct pci_dev *dev, |
1225 | struct aer_err_info *info) { } |
1226 | #endif |
1227 | |
1228 | /** |
1229 | * pci_aer_handle_error - handle logging error into an event log |
1230 | * @dev: pointer to pci_dev data structure of error source device |
1231 | * @info: comprehensive error information |
1232 | * |
1233 | * Invoked when an error being detected by Root Port. |
1234 | */ |
1235 | static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) |
1236 | { |
1237 | int aer = dev->aer_cap; |
1238 | |
1239 | if (info->severity == AER_CORRECTABLE) { |
1240 | /* |
1241 | * Correctable error does not need software intervention. |
1242 | * No need to go through error recovery process. |
1243 | */ |
1244 | if (aer) |
1245 | pci_write_config_dword(dev, where: aer + PCI_ERR_COR_STATUS, |
1246 | val: info->status); |
1247 | if (pcie_aer_is_native(dev)) { |
1248 | struct pci_driver *pdrv = dev->driver; |
1249 | |
1250 | if (pdrv && pdrv->err_handler && |
1251 | pdrv->err_handler->cor_error_detected) |
1252 | pdrv->err_handler->cor_error_detected(dev); |
1253 | pcie_clear_device_status(dev); |
1254 | } |
1255 | } else if (info->severity == AER_NONFATAL) |
1256 | pcie_do_recovery(dev, state: pci_channel_io_normal, reset_subordinates: aer_root_reset); |
1257 | else if (info->severity == AER_FATAL) |
1258 | pcie_do_recovery(dev, state: pci_channel_io_frozen, reset_subordinates: aer_root_reset); |
1259 | } |
1260 | |
1261 | static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) |
1262 | { |
1263 | cxl_rch_handle_error(dev, info); |
1264 | pci_aer_handle_error(dev, info); |
1265 | pci_dev_put(dev); |
1266 | } |
1267 | |
1268 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
1269 | |
1270 | #define AER_RECOVER_RING_SIZE 16 |
1271 | |
1272 | struct aer_recover_entry { |
1273 | u8 bus; |
1274 | u8 devfn; |
1275 | u16 domain; |
1276 | int severity; |
1277 | struct aer_capability_regs *regs; |
1278 | }; |
1279 | |
1280 | static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, |
1281 | AER_RECOVER_RING_SIZE); |
1282 | |
1283 | static void aer_recover_work_func(struct work_struct *work) |
1284 | { |
1285 | struct aer_recover_entry entry; |
1286 | struct pci_dev *pdev; |
1287 | |
1288 | while (kfifo_get(&aer_recover_ring, &entry)) { |
1289 | pdev = pci_get_domain_bus_and_slot(domain: entry.domain, bus: entry.bus, |
1290 | devfn: entry.devfn); |
1291 | if (!pdev) { |
1292 | pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev found\n", |
1293 | entry.domain, entry.bus, |
1294 | PCI_SLOT(entry.devfn), |
1295 | PCI_FUNC(entry.devfn)); |
1296 | continue; |
1297 | } |
1298 | pci_print_aer(pdev, entry.severity, entry.regs); |
1299 | |
1300 | /* |
1301 | * Memory for aer_capability_regs(entry.regs) is being |
1302 | * allocated from the ghes_estatus_pool to protect it from |
1303 | * overwriting when multiple sections are present in the |
1304 | * error status. Thus free the same after processing the |
1305 | * data. |
1306 | */ |
1307 | ghes_estatus_pool_region_free(addr: (unsigned long)entry.regs, |
1308 | size: sizeof(struct aer_capability_regs)); |
1309 | |
1310 | if (entry.severity == AER_NONFATAL) |
1311 | pcie_do_recovery(dev: pdev, state: pci_channel_io_normal, |
1312 | reset_subordinates: aer_root_reset); |
1313 | else if (entry.severity == AER_FATAL) |
1314 | pcie_do_recovery(dev: pdev, state: pci_channel_io_frozen, |
1315 | reset_subordinates: aer_root_reset); |
1316 | pci_dev_put(dev: pdev); |
1317 | } |
1318 | } |
1319 | |
1320 | /* |
1321 | * Mutual exclusion for writers of aer_recover_ring, reader side don't |
1322 | * need lock, because there is only one reader and lock is not needed |
1323 | * between reader and writer. |
1324 | */ |
1325 | static DEFINE_SPINLOCK(aer_recover_ring_lock); |
1326 | static DECLARE_WORK(aer_recover_work, aer_recover_work_func); |
1327 | |
1328 | void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, |
1329 | int severity, struct aer_capability_regs *aer_regs) |
1330 | { |
1331 | struct aer_recover_entry entry = { |
1332 | .bus = bus, |
1333 | .devfn = devfn, |
1334 | .domain = domain, |
1335 | .severity = severity, |
1336 | .regs = aer_regs, |
1337 | }; |
1338 | |
1339 | if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, |
1340 | &aer_recover_ring_lock)) |
1341 | schedule_work(work: &aer_recover_work); |
1342 | else |
1343 | pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n", |
1344 | domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1345 | } |
1346 | EXPORT_SYMBOL_GPL(aer_recover_queue); |
1347 | #endif |
1348 | |
1349 | /** |
1350 | * aer_get_device_error_info - read error status from dev and store it to info |
1351 | * @info: pointer to structure to store the error record |
1352 | * @i: index into info->dev[] |
1353 | * |
1354 | * Return: 1 on success, 0 on error. |
1355 | * |
1356 | * Note that @info is reused among all error devices. Clear fields properly. |
1357 | */ |
1358 | int aer_get_device_error_info(struct aer_err_info *info, int i) |
1359 | { |
1360 | struct pci_dev *dev; |
1361 | int type, aer; |
1362 | u32 aercc; |
1363 | |
1364 | if (i >= AER_MAX_MULTI_ERR_DEVICES) |
1365 | return 0; |
1366 | |
1367 | dev = info->dev[i]; |
1368 | aer = dev->aer_cap; |
1369 | type = pci_pcie_type(dev); |
1370 | |
1371 | /* Must reset in this function */ |
1372 | info->status = 0; |
1373 | info->tlp_header_valid = 0; |
1374 | |
1375 | /* The device might not support AER */ |
1376 | if (!aer) |
1377 | return 0; |
1378 | |
1379 | if (info->severity == AER_CORRECTABLE) { |
1380 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_STATUS, |
1381 | val: &info->status); |
1382 | pci_read_config_dword(dev, where: aer + PCI_ERR_COR_MASK, |
1383 | val: &info->mask); |
1384 | if (!(info->status & ~info->mask)) |
1385 | return 0; |
1386 | } else if (type == PCI_EXP_TYPE_ROOT_PORT || |
1387 | type == PCI_EXP_TYPE_RC_EC || |
1388 | type == PCI_EXP_TYPE_DOWNSTREAM || |
1389 | info->severity == AER_NONFATAL) { |
1390 | |
1391 | /* Link is still healthy for IO reads */ |
1392 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_STATUS, |
1393 | val: &info->status); |
1394 | pci_read_config_dword(dev, where: aer + PCI_ERR_UNCOR_MASK, |
1395 | val: &info->mask); |
1396 | if (!(info->status & ~info->mask)) |
1397 | return 0; |
1398 | |
1399 | /* Get First Error Pointer */ |
1400 | pci_read_config_dword(dev, where: aer + PCI_ERR_CAP, val: &aercc); |
1401 | info->first_error = PCI_ERR_CAP_FEP(aercc); |
1402 | |
1403 | if (info->status & AER_LOG_TLP_MASKS) { |
1404 | info->tlp_header_valid = 1; |
1405 | pcie_read_tlp_log(dev, where: aer + PCI_ERR_HEADER_LOG, |
1406 | where2: aer + PCI_ERR_PREFIX_LOG, |
1407 | tlp_len: aer_tlp_log_len(dev, aercc), |
1408 | flit: aercc & PCI_ERR_CAP_TLP_LOG_FLIT, |
1409 | log: &info->tlp); |
1410 | } |
1411 | } |
1412 | |
1413 | return 1; |
1414 | } |
1415 | |
1416 | static inline void aer_process_err_devices(struct aer_err_info *e_info) |
1417 | { |
1418 | int i; |
1419 | |
1420 | /* Report all before handling them, to not lose records by reset etc. */ |
1421 | for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { |
1422 | if (aer_get_device_error_info(info: e_info, i)) |
1423 | aer_print_error(info: e_info, i); |
1424 | } |
1425 | for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { |
1426 | if (aer_get_device_error_info(info: e_info, i)) |
1427 | handle_error_source(dev: e_info->dev[i], info: e_info); |
1428 | } |
1429 | } |
1430 | |
1431 | /** |
1432 | * aer_isr_one_error_type - consume a Correctable or Uncorrectable Error |
1433 | * detected by Root Port or RCEC |
1434 | * @root: pointer to Root Port or RCEC that signaled AER interrupt |
1435 | * @info: pointer to AER error info |
1436 | */ |
1437 | static void aer_isr_one_error_type(struct pci_dev *root, |
1438 | struct aer_err_info *info) |
1439 | { |
1440 | bool found; |
1441 | |
1442 | found = find_source_device(parent: root, e_info: info); |
1443 | |
1444 | /* |
1445 | * If we're going to log error messages, we've already set |
1446 | * "info->root_ratelimit_print" and "info->ratelimit_print[i]" to |
1447 | * non-zero (which enables printing) because this is either an |
1448 | * ERR_FATAL or we found a device with an error logged in its AER |
1449 | * Capability. |
1450 | * |
1451 | * If we didn't find the Error Source device, at least log the |
1452 | * Requester ID from the ERR_* Message received by the Root Port or |
1453 | * RCEC, ratelimited by the RP or RCEC. |
1454 | */ |
1455 | if (info->root_ratelimit_print || |
1456 | (!found && aer_ratelimit(dev: root, severity: info->severity))) |
1457 | aer_print_source(dev: root, info, found); |
1458 | |
1459 | if (found) |
1460 | aer_process_err_devices(e_info: info); |
1461 | } |
1462 | |
1463 | /** |
1464 | * aer_isr_one_error - consume error(s) signaled by an AER interrupt from |
1465 | * Root Port or RCEC |
1466 | * @root: pointer to Root Port or RCEC that signaled AER interrupt |
1467 | * @e_src: pointer to an error source |
1468 | */ |
1469 | static void aer_isr_one_error(struct pci_dev *root, |
1470 | struct aer_err_source *e_src) |
1471 | { |
1472 | u32 status = e_src->status; |
1473 | |
1474 | pci_rootport_aer_stats_incr(pdev: root, e_src); |
1475 | |
1476 | /* |
1477 | * There is a possibility that both correctable error and |
1478 | * uncorrectable error being logged. Report correctable error first. |
1479 | */ |
1480 | if (status & PCI_ERR_ROOT_COR_RCV) { |
1481 | int multi = status & PCI_ERR_ROOT_MULTI_COR_RCV; |
1482 | struct aer_err_info e_info = { |
1483 | .id = ERR_COR_ID(e_src->id), |
1484 | .severity = AER_CORRECTABLE, |
1485 | .level = KERN_WARNING, |
1486 | .multi_error_valid = multi ? 1 : 0, |
1487 | }; |
1488 | |
1489 | aer_isr_one_error_type(root, info: &e_info); |
1490 | } |
1491 | |
1492 | if (status & PCI_ERR_ROOT_UNCOR_RCV) { |
1493 | int fatal = status & PCI_ERR_ROOT_FATAL_RCV; |
1494 | int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV; |
1495 | struct aer_err_info e_info = { |
1496 | .id = ERR_UNCOR_ID(e_src->id), |
1497 | .severity = fatal ? AER_FATAL : AER_NONFATAL, |
1498 | .level = KERN_ERR, |
1499 | .multi_error_valid = multi ? 1 : 0, |
1500 | }; |
1501 | |
1502 | aer_isr_one_error_type(root, info: &e_info); |
1503 | } |
1504 | } |
1505 | |
1506 | /** |
1507 | * aer_isr - consume errors detected by Root Port |
1508 | * @irq: IRQ assigned to Root Port |
1509 | * @context: pointer to Root Port data structure |
1510 | * |
1511 | * Invoked, as DPC, when Root Port records new detected error |
1512 | */ |
1513 | static irqreturn_t aer_isr(int irq, void *context) |
1514 | { |
1515 | struct pcie_device *dev = (struct pcie_device *)context; |
1516 | struct aer_rpc *rpc = get_service_data(dev); |
1517 | struct aer_err_source e_src; |
1518 | |
1519 | if (kfifo_is_empty(&rpc->aer_fifo)) |
1520 | return IRQ_NONE; |
1521 | |
1522 | while (kfifo_get(&rpc->aer_fifo, &e_src)) |
1523 | aer_isr_one_error(root: rpc->rpd, e_src: &e_src); |
1524 | return IRQ_HANDLED; |
1525 | } |
1526 | |
1527 | /** |
1528 | * aer_irq - Root Port's ISR |
1529 | * @irq: IRQ assigned to Root Port |
1530 | * @context: pointer to Root Port data structure |
1531 | * |
1532 | * Invoked when Root Port detects AER messages. |
1533 | */ |
1534 | static irqreturn_t aer_irq(int irq, void *context) |
1535 | { |
1536 | struct pcie_device *pdev = (struct pcie_device *)context; |
1537 | struct aer_rpc *rpc = get_service_data(dev: pdev); |
1538 | struct pci_dev *rp = rpc->rpd; |
1539 | int aer = rp->aer_cap; |
1540 | struct aer_err_source e_src = {}; |
1541 | |
1542 | pci_read_config_dword(dev: rp, where: aer + PCI_ERR_ROOT_STATUS, val: &e_src.status); |
1543 | if (!(e_src.status & AER_ERR_STATUS_MASK)) |
1544 | return IRQ_NONE; |
1545 | |
1546 | pci_read_config_dword(dev: rp, where: aer + PCI_ERR_ROOT_ERR_SRC, val: &e_src.id); |
1547 | pci_write_config_dword(dev: rp, where: aer + PCI_ERR_ROOT_STATUS, val: e_src.status); |
1548 | |
1549 | if (!kfifo_put(&rpc->aer_fifo, e_src)) |
1550 | return IRQ_HANDLED; |
1551 | |
1552 | return IRQ_WAKE_THREAD; |
1553 | } |
1554 | |
1555 | static void aer_enable_irq(struct pci_dev *pdev) |
1556 | { |
1557 | int aer = pdev->aer_cap; |
1558 | u32 reg32; |
1559 | |
1560 | /* Enable Root Port's interrupt in response to error messages */ |
1561 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_COMMAND, val: ®32); |
1562 | reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; |
1563 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_COMMAND, val: reg32); |
1564 | } |
1565 | |
1566 | static void aer_disable_irq(struct pci_dev *pdev) |
1567 | { |
1568 | int aer = pdev->aer_cap; |
1569 | u32 reg32; |
1570 | |
1571 | /* Disable Root Port's interrupt in response to error messages */ |
1572 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_COMMAND, val: ®32); |
1573 | reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; |
1574 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_COMMAND, val: reg32); |
1575 | } |
1576 | |
1577 | /** |
1578 | * aer_enable_rootport - enable Root Port's interrupts when receiving messages |
1579 | * @rpc: pointer to a Root Port data structure |
1580 | * |
1581 | * Invoked when PCIe bus loads AER service driver. |
1582 | */ |
1583 | static void aer_enable_rootport(struct aer_rpc *rpc) |
1584 | { |
1585 | struct pci_dev *pdev = rpc->rpd; |
1586 | int aer = pdev->aer_cap; |
1587 | u16 reg16; |
1588 | u32 reg32; |
1589 | |
1590 | /* Clear PCIe Capability's Device Status */ |
1591 | pcie_capability_read_word(dev: pdev, PCI_EXP_DEVSTA, val: ®16); |
1592 | pcie_capability_write_word(dev: pdev, PCI_EXP_DEVSTA, val: reg16); |
1593 | |
1594 | /* Disable system error generation in response to error messages */ |
1595 | pcie_capability_clear_word(dev: pdev, PCI_EXP_RTCTL, |
1596 | SYSTEM_ERROR_INTR_ON_MESG_MASK); |
1597 | |
1598 | /* Clear error status */ |
1599 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_STATUS, val: ®32); |
1600 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_STATUS, val: reg32); |
1601 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_COR_STATUS, val: ®32); |
1602 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_COR_STATUS, val: reg32); |
1603 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_UNCOR_STATUS, val: ®32); |
1604 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_UNCOR_STATUS, val: reg32); |
1605 | |
1606 | aer_enable_irq(pdev); |
1607 | } |
1608 | |
1609 | /** |
1610 | * aer_disable_rootport - disable Root Port's interrupts when receiving messages |
1611 | * @rpc: pointer to a Root Port data structure |
1612 | * |
1613 | * Invoked when PCIe bus unloads AER service driver. |
1614 | */ |
1615 | static void aer_disable_rootport(struct aer_rpc *rpc) |
1616 | { |
1617 | struct pci_dev *pdev = rpc->rpd; |
1618 | int aer = pdev->aer_cap; |
1619 | u32 reg32; |
1620 | |
1621 | aer_disable_irq(pdev); |
1622 | |
1623 | /* Clear Root's error status reg */ |
1624 | pci_read_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_STATUS, val: ®32); |
1625 | pci_write_config_dword(dev: pdev, where: aer + PCI_ERR_ROOT_STATUS, val: reg32); |
1626 | } |
1627 | |
1628 | /** |
1629 | * aer_remove - clean up resources |
1630 | * @dev: pointer to the pcie_dev data structure |
1631 | * |
1632 | * Invoked when PCI Express bus unloads or AER probe fails. |
1633 | */ |
1634 | static void aer_remove(struct pcie_device *dev) |
1635 | { |
1636 | struct aer_rpc *rpc = get_service_data(dev); |
1637 | |
1638 | aer_disable_rootport(rpc); |
1639 | } |
1640 | |
1641 | /** |
1642 | * aer_probe - initialize resources |
1643 | * @dev: pointer to the pcie_dev data structure |
1644 | * |
1645 | * Invoked when PCI Express bus loads AER service driver. |
1646 | */ |
1647 | static int aer_probe(struct pcie_device *dev) |
1648 | { |
1649 | int status; |
1650 | struct aer_rpc *rpc; |
1651 | struct device *device = &dev->device; |
1652 | struct pci_dev *port = dev->port; |
1653 | |
1654 | BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) < |
1655 | AER_MAX_TYPEOF_COR_ERRS); |
1656 | BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) < |
1657 | AER_MAX_TYPEOF_UNCOR_ERRS); |
1658 | |
1659 | /* Limit to Root Ports or Root Complex Event Collectors */ |
1660 | if ((pci_pcie_type(dev: port) != PCI_EXP_TYPE_RC_EC) && |
1661 | (pci_pcie_type(dev: port) != PCI_EXP_TYPE_ROOT_PORT)) |
1662 | return -ENODEV; |
1663 | |
1664 | rpc = devm_kzalloc(dev: device, size: sizeof(struct aer_rpc), GFP_KERNEL); |
1665 | if (!rpc) |
1666 | return -ENOMEM; |
1667 | |
1668 | rpc->rpd = port; |
1669 | INIT_KFIFO(rpc->aer_fifo); |
1670 | set_service_data(dev, data: rpc); |
1671 | |
1672 | status = devm_request_threaded_irq(dev: device, irq: dev->irq, handler: aer_irq, thread_fn: aer_isr, |
1673 | IRQF_SHARED, devname: "aerdrv", dev_id: dev); |
1674 | if (status) { |
1675 | pci_err(port, "request AER IRQ %d failed\n", dev->irq); |
1676 | return status; |
1677 | } |
1678 | |
1679 | cxl_rch_enable_rcec(rcec: port); |
1680 | aer_enable_rootport(rpc); |
1681 | pci_info(port, "enabled with IRQ %d\n", dev->irq); |
1682 | return 0; |
1683 | } |
1684 | |
1685 | static int aer_suspend(struct pcie_device *dev) |
1686 | { |
1687 | struct aer_rpc *rpc = get_service_data(dev); |
1688 | |
1689 | aer_disable_rootport(rpc); |
1690 | return 0; |
1691 | } |
1692 | |
1693 | static int aer_resume(struct pcie_device *dev) |
1694 | { |
1695 | struct aer_rpc *rpc = get_service_data(dev); |
1696 | |
1697 | aer_enable_rootport(rpc); |
1698 | return 0; |
1699 | } |
1700 | |
1701 | /** |
1702 | * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP |
1703 | * @dev: pointer to Root Port, RCEC, or RCiEP |
1704 | * |
1705 | * Invoked by Port Bus driver when performing reset. |
1706 | */ |
1707 | static pci_ers_result_t aer_root_reset(struct pci_dev *dev) |
1708 | { |
1709 | int type = pci_pcie_type(dev); |
1710 | struct pci_dev *root; |
1711 | int aer; |
1712 | struct pci_host_bridge *host = pci_find_host_bridge(bus: dev->bus); |
1713 | u32 reg32; |
1714 | int rc; |
1715 | |
1716 | /* |
1717 | * Only Root Ports and RCECs have AER Root Command and Root Status |
1718 | * registers. If "dev" is an RCiEP, the relevant registers are in |
1719 | * the RCEC. |
1720 | */ |
1721 | if (type == PCI_EXP_TYPE_RC_END) |
1722 | root = dev->rcec; |
1723 | else |
1724 | root = pcie_find_root_port(dev); |
1725 | |
1726 | /* |
1727 | * If the platform retained control of AER, an RCiEP may not have |
1728 | * an RCEC visible to us, so dev->rcec ("root") may be NULL. In |
1729 | * that case, firmware is responsible for these registers. |
1730 | */ |
1731 | aer = root ? root->aer_cap : 0; |
1732 | |
1733 | if ((host->native_aer || pcie_ports_native) && aer) |
1734 | aer_disable_irq(pdev: root); |
1735 | |
1736 | if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { |
1737 | rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET); |
1738 | if (!rc) |
1739 | pci_info(dev, "has been reset\n"); |
1740 | else |
1741 | pci_info(dev, "not reset (no FLR support: %d)\n", rc); |
1742 | } else { |
1743 | rc = pci_bus_error_reset(dev); |
1744 | pci_info(dev, "%s Port link has been reset (%d)\n", |
1745 | pci_is_root_bus(dev->bus) ? "Root": "Downstream", rc); |
1746 | } |
1747 | |
1748 | if ((host->native_aer || pcie_ports_native) && aer) { |
1749 | /* Clear Root Error Status */ |
1750 | pci_read_config_dword(dev: root, where: aer + PCI_ERR_ROOT_STATUS, val: ®32); |
1751 | pci_write_config_dword(dev: root, where: aer + PCI_ERR_ROOT_STATUS, val: reg32); |
1752 | |
1753 | aer_enable_irq(pdev: root); |
1754 | } |
1755 | |
1756 | return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; |
1757 | } |
1758 | |
1759 | static struct pcie_port_service_driver aerdriver = { |
1760 | .name = "aer", |
1761 | .port_type = PCIE_ANY_PORT, |
1762 | .service = PCIE_PORT_SERVICE_AER, |
1763 | |
1764 | .probe = aer_probe, |
1765 | .suspend = aer_suspend, |
1766 | .resume = aer_resume, |
1767 | .remove = aer_remove, |
1768 | }; |
1769 | |
1770 | /** |
1771 | * pcie_aer_init - register AER service driver |
1772 | * |
1773 | * Invoked when AER service driver is loaded. |
1774 | */ |
1775 | int __init pcie_aer_init(void) |
1776 | { |
1777 | if (!pci_aer_available()) |
1778 | return -ENXIO; |
1779 | return pcie_port_service_register(new: &aerdriver); |
1780 | } |
1781 |
Definitions
- aer_err_source
- aer_rpc
- aer_info
- pcie_aer_disable
- pci_no_aer
- pci_aer_available
- ecrc_policy
- ecrc_policy_str
- enable_ecrc_checking
- disable_ecrc_checking
- pcie_set_ecrc_checking
- pcie_ecrc_get_policy
- pcie_aer_is_native
- pci_enable_pcie_error_reporting
- pci_aer_clear_nonfatal_status
- pci_aer_clear_fatal_status
- pci_aer_raw_clear_status
- pci_aer_clear_status
- pci_save_aer_state
- pci_restore_aer_state
- pci_aer_init
- pci_aer_exit
- aer_error_severity_string
- aer_error_layer
- aer_correctable_error_string
- aer_uncorrectable_error_string
- aer_agent_string
- aer_stats_attrs
- aer_stats_attrs_are_visible
- aer_stats_attr_group
- aer_attrs
- aer_attrs_are_visible
- aer_attr_group
- pci_dev_aer_stats_incr
- pci_rootport_aer_stats_incr
- aer_ratelimit
- __aer_print_error
- aer_print_source
- aer_print_error
- cper_severity_to_aer
- pci_print_aer
- add_error_device
- is_error_source
- find_device_iter
- find_source_device
- pci_aer_unmask_internal_errors
- is_cxl_mem_dev
- cxl_error_is_native
- is_internal_error
- cxl_rch_handle_error_iter
- cxl_rch_handle_error
- handles_cxl_error_iter
- handles_cxl_errors
- cxl_rch_enable_rcec
- pci_aer_handle_error
- handle_error_source
- aer_recover_entry
- aer_recover_ring
- aer_recover_work_func
- aer_recover_ring_lock
- aer_recover_work
- aer_recover_queue
- aer_get_device_error_info
- aer_process_err_devices
- aer_isr_one_error_type
- aer_isr_one_error
- aer_isr
- aer_irq
- aer_enable_irq
- aer_disable_irq
- aer_enable_rootport
- aer_disable_rootport
- aer_remove
- aer_probe
- aer_suspend
- aer_resume
- aer_root_reset
- aerdriver
Improve your Profiling and Debugging skills
Find out more