1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright IBM Corporation 2001, 2005, 2006 |
4 | * Copyright Dave Engebretsen & Todd Inglett 2001 |
5 | * Copyright Linas Vepstas 2005, 2006 |
6 | * Copyright 2001-2012 IBM Corporation. |
7 | * |
8 | * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> |
9 | */ |
10 | |
11 | #include <linux/delay.h> |
12 | #include <linux/sched.h> |
13 | #include <linux/init.h> |
14 | #include <linux/list.h> |
15 | #include <linux/pci.h> |
16 | #include <linux/iommu.h> |
17 | #include <linux/proc_fs.h> |
18 | #include <linux/rbtree.h> |
19 | #include <linux/reboot.h> |
20 | #include <linux/seq_file.h> |
21 | #include <linux/spinlock.h> |
22 | #include <linux/export.h> |
23 | #include <linux/of.h> |
24 | #include <linux/debugfs.h> |
25 | |
26 | #include <linux/atomic.h> |
27 | #include <asm/eeh.h> |
28 | #include <asm/eeh_event.h> |
29 | #include <asm/io.h> |
30 | #include <asm/iommu.h> |
31 | #include <asm/machdep.h> |
32 | #include <asm/ppc-pci.h> |
33 | #include <asm/rtas.h> |
34 | #include <asm/pte-walk.h> |
35 | |
36 | |
37 | /** Overview: |
38 | * EEH, or "Enhanced Error Handling" is a PCI bridge technology for |
39 | * dealing with PCI bus errors that can't be dealt with within the |
40 | * usual PCI framework, except by check-stopping the CPU. Systems |
41 | * that are designed for high-availability/reliability cannot afford |
42 | * to crash due to a "mere" PCI error, thus the need for EEH. |
43 | * An EEH-capable bridge operates by converting a detected error |
44 | * into a "slot freeze", taking the PCI adapter off-line, making |
45 | * the slot behave, from the OS'es point of view, as if the slot |
46 | * were "empty": all reads return 0xff's and all writes are silently |
47 | * ignored. EEH slot isolation events can be triggered by parity |
48 | * errors on the address or data busses (e.g. during posted writes), |
49 | * which in turn might be caused by low voltage on the bus, dust, |
50 | * vibration, humidity, radioactivity or plain-old failed hardware. |
51 | * |
52 | * Note, however, that one of the leading causes of EEH slot |
53 | * freeze events are buggy device drivers, buggy device microcode, |
54 | * or buggy device hardware. This is because any attempt by the |
55 | * device to bus-master data to a memory address that is not |
56 | * assigned to the device will trigger a slot freeze. (The idea |
57 | * is to prevent devices-gone-wild from corrupting system memory). |
58 | * Buggy hardware/drivers will have a miserable time co-existing |
59 | * with EEH. |
60 | * |
61 | * Ideally, a PCI device driver, when suspecting that an isolation |
62 | * event has occurred (e.g. by reading 0xff's), will then ask EEH |
63 | * whether this is the case, and then take appropriate steps to |
64 | * reset the PCI slot, the PCI device, and then resume operations. |
65 | * However, until that day, the checking is done here, with the |
66 | * eeh_check_failure() routine embedded in the MMIO macros. If |
67 | * the slot is found to be isolated, an "EEH Event" is synthesized |
68 | * and sent out for processing. |
69 | */ |
70 | |
71 | /* If a device driver keeps reading an MMIO register in an interrupt |
72 | * handler after a slot isolation event, it might be broken. |
73 | * This sets the threshold for how many read attempts we allow |
74 | * before printing an error message. |
75 | */ |
76 | #define EEH_MAX_FAILS 2100000 |
77 | |
78 | /* Time to wait for a PCI slot to report status, in milliseconds */ |
79 | #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) |
80 | |
81 | /* |
82 | * EEH probe mode support, which is part of the flags, |
83 | * is to support multiple platforms for EEH. Some platforms |
84 | * like pSeries do PCI emunation based on device tree. |
85 | * However, other platforms like powernv probe PCI devices |
86 | * from hardware. The flag is used to distinguish that. |
87 | * In addition, struct eeh_ops::probe would be invoked for |
88 | * particular OF node or PCI device so that the corresponding |
89 | * PE would be created there. |
90 | */ |
91 | int eeh_subsystem_flags; |
92 | EXPORT_SYMBOL(eeh_subsystem_flags); |
93 | |
94 | /* |
95 | * EEH allowed maximal frozen times. If one particular PE's |
96 | * frozen count in last hour exceeds this limit, the PE will |
97 | * be forced to be offline permanently. |
98 | */ |
99 | u32 eeh_max_freezes = 5; |
100 | |
101 | /* |
102 | * Controls whether a recovery event should be scheduled when an |
103 | * isolated device is discovered. This is only really useful for |
104 | * debugging problems with the EEH core. |
105 | */ |
106 | bool eeh_debugfs_no_recover; |
107 | |
108 | /* Platform dependent EEH operations */ |
109 | struct eeh_ops *eeh_ops = NULL; |
110 | |
111 | /* Lock to avoid races due to multiple reports of an error */ |
112 | DEFINE_RAW_SPINLOCK(confirm_error_lock); |
113 | EXPORT_SYMBOL_GPL(confirm_error_lock); |
114 | |
115 | /* Lock to protect passed flags */ |
116 | static DEFINE_MUTEX(eeh_dev_mutex); |
117 | |
118 | /* Buffer for reporting pci register dumps. Its here in BSS, and |
119 | * not dynamically alloced, so that it ends up in RMO where RTAS |
120 | * can access it. |
121 | */ |
122 | #define EEH_PCI_REGS_LOG_LEN 8192 |
123 | static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; |
124 | |
125 | /* |
126 | * The struct is used to maintain the EEH global statistic |
127 | * information. Besides, the EEH global statistics will be |
128 | * exported to user space through procfs |
129 | */ |
130 | struct eeh_stats { |
131 | u64 no_device; /* PCI device not found */ |
132 | u64 no_dn; /* OF node not found */ |
133 | u64 no_cfg_addr; /* Config address not found */ |
134 | u64 ignored_check; /* EEH check skipped */ |
135 | u64 total_mmio_ffs; /* Total EEH checks */ |
136 | u64 false_positives; /* Unnecessary EEH checks */ |
137 | u64 slot_resets; /* PE reset */ |
138 | }; |
139 | |
140 | static struct eeh_stats eeh_stats; |
141 | |
142 | static int __init eeh_setup(char *str) |
143 | { |
144 | if (!strcmp(str, "off" )) |
145 | eeh_add_flag(EEH_FORCE_DISABLED); |
146 | else if (!strcmp(str, "early_log" )) |
147 | eeh_add_flag(EEH_EARLY_DUMP_LOG); |
148 | |
149 | return 1; |
150 | } |
151 | __setup("eeh=" , eeh_setup); |
152 | |
153 | void eeh_show_enabled(void) |
154 | { |
155 | if (eeh_has_flag(EEH_FORCE_DISABLED)) |
156 | pr_info("EEH: Recovery disabled by kernel parameter.\n" ); |
157 | else if (eeh_has_flag(EEH_ENABLED)) |
158 | pr_info("EEH: Capable adapter found: recovery enabled.\n" ); |
159 | else |
160 | pr_info("EEH: No capable adapters found: recovery disabled.\n" ); |
161 | } |
162 | |
163 | /* |
164 | * This routine captures assorted PCI configuration space data |
165 | * for the indicated PCI device, and puts them into a buffer |
166 | * for RTAS error logging. |
167 | */ |
168 | static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) |
169 | { |
170 | u32 cfg; |
171 | int cap, i; |
172 | int n = 0, l = 0; |
173 | char buffer[128]; |
174 | |
175 | n += scnprintf(buf: buf+n, size: len-n, fmt: "%04x:%02x:%02x.%01x\n" , |
176 | edev->pe->phb->global_number, edev->bdfn >> 8, |
177 | PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); |
178 | pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n" , |
179 | edev->pe->phb->global_number, edev->bdfn >> 8, |
180 | PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); |
181 | |
182 | eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); |
183 | n += scnprintf(buf: buf+n, size: len-n, fmt: "dev/vend:%08x\n" , cfg); |
184 | pr_warn("EEH: PCI device/vendor: %08x\n" , cfg); |
185 | |
186 | eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); |
187 | n += scnprintf(buf: buf+n, size: len-n, fmt: "cmd/stat:%x\n" , cfg); |
188 | pr_warn("EEH: PCI cmd/status register: %08x\n" , cfg); |
189 | |
190 | /* Gather bridge-specific registers */ |
191 | if (edev->mode & EEH_DEV_BRIDGE) { |
192 | eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); |
193 | n += scnprintf(buf: buf+n, size: len-n, fmt: "sec stat:%x\n" , cfg); |
194 | pr_warn("EEH: Bridge secondary status: %04x\n" , cfg); |
195 | |
196 | eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); |
197 | n += scnprintf(buf: buf+n, size: len-n, fmt: "brdg ctl:%x\n" , cfg); |
198 | pr_warn("EEH: Bridge control: %04x\n" , cfg); |
199 | } |
200 | |
201 | /* Dump out the PCI-X command and status regs */ |
202 | cap = edev->pcix_cap; |
203 | if (cap) { |
204 | eeh_ops->read_config(edev, cap, 4, &cfg); |
205 | n += scnprintf(buf: buf+n, size: len-n, fmt: "pcix-cmd:%x\n" , cfg); |
206 | pr_warn("EEH: PCI-X cmd: %08x\n" , cfg); |
207 | |
208 | eeh_ops->read_config(edev, cap+4, 4, &cfg); |
209 | n += scnprintf(buf: buf+n, size: len-n, fmt: "pcix-stat:%x\n" , cfg); |
210 | pr_warn("EEH: PCI-X status: %08x\n" , cfg); |
211 | } |
212 | |
213 | /* If PCI-E capable, dump PCI-E cap 10 */ |
214 | cap = edev->pcie_cap; |
215 | if (cap) { |
216 | n += scnprintf(buf: buf+n, size: len-n, fmt: "pci-e cap10:\n" ); |
217 | pr_warn("EEH: PCI-E capabilities and status follow:\n" ); |
218 | |
219 | for (i=0; i<=8; i++) { |
220 | eeh_ops->read_config(edev, cap+4*i, 4, &cfg); |
221 | n += scnprintf(buf: buf+n, size: len-n, fmt: "%02x:%x\n" , 4*i, cfg); |
222 | |
223 | if ((i % 4) == 0) { |
224 | if (i != 0) |
225 | pr_warn("%s\n" , buffer); |
226 | |
227 | l = scnprintf(buf: buffer, size: sizeof(buffer), |
228 | fmt: "EEH: PCI-E %02x: %08x " , |
229 | 4*i, cfg); |
230 | } else { |
231 | l += scnprintf(buf: buffer+l, size: sizeof(buffer)-l, |
232 | fmt: "%08x " , cfg); |
233 | } |
234 | |
235 | } |
236 | |
237 | pr_warn("%s\n" , buffer); |
238 | } |
239 | |
240 | /* If AER capable, dump it */ |
241 | cap = edev->aer_cap; |
242 | if (cap) { |
243 | n += scnprintf(buf: buf+n, size: len-n, fmt: "pci-e AER:\n" ); |
244 | pr_warn("EEH: PCI-E AER capability register set follows:\n" ); |
245 | |
246 | for (i=0; i<=13; i++) { |
247 | eeh_ops->read_config(edev, cap+4*i, 4, &cfg); |
248 | n += scnprintf(buf: buf+n, size: len-n, fmt: "%02x:%x\n" , 4*i, cfg); |
249 | |
250 | if ((i % 4) == 0) { |
251 | if (i != 0) |
252 | pr_warn("%s\n" , buffer); |
253 | |
254 | l = scnprintf(buf: buffer, size: sizeof(buffer), |
255 | fmt: "EEH: PCI-E AER %02x: %08x " , |
256 | 4*i, cfg); |
257 | } else { |
258 | l += scnprintf(buf: buffer+l, size: sizeof(buffer)-l, |
259 | fmt: "%08x " , cfg); |
260 | } |
261 | } |
262 | |
263 | pr_warn("%s\n" , buffer); |
264 | } |
265 | |
266 | return n; |
267 | } |
268 | |
269 | static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) |
270 | { |
271 | struct eeh_dev *edev, *tmp; |
272 | size_t *plen = flag; |
273 | |
274 | eeh_pe_for_each_dev(pe, edev, tmp) |
275 | *plen += eeh_dump_dev_log(edev, buf: pci_regs_buf + *plen, |
276 | EEH_PCI_REGS_LOG_LEN - *plen); |
277 | |
278 | return NULL; |
279 | } |
280 | |
281 | /** |
282 | * eeh_slot_error_detail - Generate combined log including driver log and error log |
283 | * @pe: EEH PE |
284 | * @severity: temporary or permanent error log |
285 | * |
286 | * This routine should be called to generate the combined log, which |
287 | * is comprised of driver log and error log. The driver log is figured |
288 | * out from the config space of the corresponding PCI device, while |
289 | * the error log is fetched through platform dependent function call. |
290 | */ |
291 | void eeh_slot_error_detail(struct eeh_pe *pe, int severity) |
292 | { |
293 | size_t loglen = 0; |
294 | |
295 | /* |
296 | * When the PHB is fenced or dead, it's pointless to collect |
297 | * the data from PCI config space because it should return |
298 | * 0xFF's. For ER, we still retrieve the data from the PCI |
299 | * config space. |
300 | * |
301 | * For pHyp, we have to enable IO for log retrieval. Otherwise, |
302 | * 0xFF's is always returned from PCI config space. |
303 | * |
304 | * When the @severity is EEH_LOG_PERM, the PE is going to be |
305 | * removed. Prior to that, the drivers for devices included in |
306 | * the PE will be closed. The drivers rely on working IO path |
307 | * to bring the devices to quiet state. Otherwise, PCI traffic |
308 | * from those devices after they are removed is like to cause |
309 | * another unexpected EEH error. |
310 | */ |
311 | if (!(pe->type & EEH_PE_PHB)) { |
312 | if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || |
313 | severity == EEH_LOG_PERM) |
314 | eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); |
315 | |
316 | /* |
317 | * The config space of some PCI devices can't be accessed |
318 | * when their PEs are in frozen state. Otherwise, fenced |
319 | * PHB might be seen. Those PEs are identified with flag |
320 | * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED |
321 | * is set automatically when the PE is put to EEH_PE_ISOLATED. |
322 | * |
323 | * Restoring BARs possibly triggers PCI config access in |
324 | * (OPAL) firmware and then causes fenced PHB. If the |
325 | * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's |
326 | * pointless to restore BARs and dump config space. |
327 | */ |
328 | eeh_ops->configure_bridge(pe); |
329 | if (!(pe->state & EEH_PE_CFG_BLOCKED)) { |
330 | eeh_pe_restore_bars(pe); |
331 | |
332 | pci_regs_buf[0] = 0; |
333 | eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); |
334 | } |
335 | } |
336 | |
337 | eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); |
338 | } |
339 | |
340 | /** |
341 | * eeh_token_to_phys - Convert EEH address token to phys address |
342 | * @token: I/O token, should be address in the form 0xA.... |
343 | * |
344 | * This routine should be called to convert virtual I/O address |
345 | * to physical one. |
346 | */ |
347 | static inline unsigned long eeh_token_to_phys(unsigned long token) |
348 | { |
349 | return ppc_find_vmap_phys(token); |
350 | } |
351 | |
352 | /* |
353 | * On PowerNV platform, we might already have fenced PHB there. |
354 | * For that case, it's meaningless to recover frozen PE. Intead, |
355 | * We have to handle fenced PHB firstly. |
356 | */ |
357 | static int eeh_phb_check_failure(struct eeh_pe *pe) |
358 | { |
359 | struct eeh_pe *phb_pe; |
360 | unsigned long flags; |
361 | int ret; |
362 | |
363 | if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) |
364 | return -EPERM; |
365 | |
366 | /* Find the PHB PE */ |
367 | phb_pe = eeh_phb_pe_get(pe->phb); |
368 | if (!phb_pe) { |
369 | pr_warn("%s Can't find PE for PHB#%x\n" , |
370 | __func__, pe->phb->global_number); |
371 | return -EEXIST; |
372 | } |
373 | |
374 | /* If the PHB has been in problematic state */ |
375 | eeh_serialize_lock(&flags); |
376 | if (phb_pe->state & EEH_PE_ISOLATED) { |
377 | ret = 0; |
378 | goto out; |
379 | } |
380 | |
381 | /* Check PHB state */ |
382 | ret = eeh_ops->get_state(phb_pe, NULL); |
383 | if ((ret < 0) || |
384 | (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { |
385 | ret = 0; |
386 | goto out; |
387 | } |
388 | |
389 | /* Isolate the PHB and send event */ |
390 | eeh_pe_mark_isolated(phb_pe); |
391 | eeh_serialize_unlock(flags); |
392 | |
393 | pr_debug("EEH: PHB#%x failure detected, location: %s\n" , |
394 | phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); |
395 | eeh_send_failure_event(phb_pe); |
396 | return 1; |
397 | out: |
398 | eeh_serialize_unlock(flags); |
399 | return ret; |
400 | } |
401 | |
402 | static inline const char *eeh_driver_name(struct pci_dev *pdev) |
403 | { |
404 | if (pdev) |
405 | return dev_driver_string(dev: &pdev->dev); |
406 | |
407 | return "<null>" ; |
408 | } |
409 | |
410 | /** |
411 | * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze |
412 | * @edev: eeh device |
413 | * |
414 | * Check for an EEH failure for the given device node. Call this |
415 | * routine if the result of a read was all 0xff's and you want to |
416 | * find out if this is due to an EEH slot freeze. This routine |
417 | * will query firmware for the EEH status. |
418 | * |
419 | * Returns 0 if there has not been an EEH error; otherwise returns |
420 | * a non-zero value and queues up a slot isolation event notification. |
421 | * |
422 | * It is safe to call this routine in an interrupt context. |
423 | */ |
424 | int eeh_dev_check_failure(struct eeh_dev *edev) |
425 | { |
426 | int ret; |
427 | unsigned long flags; |
428 | struct device_node *dn; |
429 | struct pci_dev *dev; |
430 | struct eeh_pe *pe, *parent_pe; |
431 | int rc = 0; |
432 | const char *location = NULL; |
433 | |
434 | eeh_stats.total_mmio_ffs++; |
435 | |
436 | if (!eeh_enabled()) |
437 | return 0; |
438 | |
439 | if (!edev) { |
440 | eeh_stats.no_dn++; |
441 | return 0; |
442 | } |
443 | dev = eeh_dev_to_pci_dev(edev); |
444 | pe = eeh_dev_to_pe(edev); |
445 | |
446 | /* Access to IO BARs might get this far and still not want checking. */ |
447 | if (!pe) { |
448 | eeh_stats.ignored_check++; |
449 | eeh_edev_dbg(edev, "Ignored check\n" ); |
450 | return 0; |
451 | } |
452 | |
453 | /* |
454 | * On PowerNV platform, we might already have fenced PHB |
455 | * there and we need take care of that firstly. |
456 | */ |
457 | ret = eeh_phb_check_failure(pe); |
458 | if (ret > 0) |
459 | return ret; |
460 | |
461 | /* |
462 | * If the PE isn't owned by us, we shouldn't check the |
463 | * state. Instead, let the owner handle it if the PE has |
464 | * been frozen. |
465 | */ |
466 | if (eeh_pe_passed(pe)) |
467 | return 0; |
468 | |
469 | /* If we already have a pending isolation event for this |
470 | * slot, we know it's bad already, we don't need to check. |
471 | * Do this checking under a lock; as multiple PCI devices |
472 | * in one slot might report errors simultaneously, and we |
473 | * only want one error recovery routine running. |
474 | */ |
475 | eeh_serialize_lock(&flags); |
476 | rc = 1; |
477 | if (pe->state & EEH_PE_ISOLATED) { |
478 | pe->check_count++; |
479 | if (pe->check_count == EEH_MAX_FAILS) { |
480 | dn = pci_device_to_OF_node(pdev: dev); |
481 | if (dn) |
482 | location = of_get_property(node: dn, name: "ibm,loc-code" , |
483 | NULL); |
484 | eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n" , |
485 | pe->check_count, |
486 | location ? location : "unknown" , |
487 | eeh_driver_name(pdev: dev)); |
488 | eeh_edev_err(edev, "Might be infinite loop in %s driver\n" , |
489 | eeh_driver_name(pdev: dev)); |
490 | dump_stack(); |
491 | } |
492 | goto dn_unlock; |
493 | } |
494 | |
495 | /* |
496 | * Now test for an EEH failure. This is VERY expensive. |
497 | * Note that the eeh_config_addr may be a parent device |
498 | * in the case of a device behind a bridge, or it may be |
499 | * function zero of a multi-function device. |
500 | * In any case they must share a common PHB. |
501 | */ |
502 | ret = eeh_ops->get_state(pe, NULL); |
503 | |
504 | /* Note that config-io to empty slots may fail; |
505 | * they are empty when they don't have children. |
506 | * We will punt with the following conditions: Failure to get |
507 | * PE's state, EEH not support and Permanently unavailable |
508 | * state, PE is in good state. |
509 | */ |
510 | if ((ret < 0) || |
511 | (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { |
512 | eeh_stats.false_positives++; |
513 | pe->false_positives++; |
514 | rc = 0; |
515 | goto dn_unlock; |
516 | } |
517 | |
518 | /* |
519 | * It should be corner case that the parent PE has been |
520 | * put into frozen state as well. We should take care |
521 | * that at first. |
522 | */ |
523 | parent_pe = pe->parent; |
524 | while (parent_pe) { |
525 | /* Hit the ceiling ? */ |
526 | if (parent_pe->type & EEH_PE_PHB) |
527 | break; |
528 | |
529 | /* Frozen parent PE ? */ |
530 | ret = eeh_ops->get_state(parent_pe, NULL); |
531 | if (ret > 0 && !eeh_state_active(ret)) { |
532 | pe = parent_pe; |
533 | pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n" , |
534 | pe->phb->global_number, pe->addr, |
535 | pe->phb->global_number, parent_pe->addr); |
536 | } |
537 | |
538 | /* Next parent level */ |
539 | parent_pe = parent_pe->parent; |
540 | } |
541 | |
542 | eeh_stats.slot_resets++; |
543 | |
544 | /* Avoid repeated reports of this failure, including problems |
545 | * with other functions on this device, and functions under |
546 | * bridges. |
547 | */ |
548 | eeh_pe_mark_isolated(pe); |
549 | eeh_serialize_unlock(flags); |
550 | |
551 | /* Most EEH events are due to device driver bugs. Having |
552 | * a stack trace will help the device-driver authors figure |
553 | * out what happened. So print that out. |
554 | */ |
555 | pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n" , |
556 | __func__, pe->phb->global_number, pe->addr); |
557 | eeh_send_failure_event(pe); |
558 | |
559 | return 1; |
560 | |
561 | dn_unlock: |
562 | eeh_serialize_unlock(flags); |
563 | return rc; |
564 | } |
565 | |
566 | EXPORT_SYMBOL_GPL(eeh_dev_check_failure); |
567 | |
568 | /** |
569 | * eeh_check_failure - Check if all 1's data is due to EEH slot freeze |
570 | * @token: I/O address |
571 | * |
572 | * Check for an EEH failure at the given I/O address. Call this |
573 | * routine if the result of a read was all 0xff's and you want to |
574 | * find out if this is due to an EEH slot freeze event. This routine |
575 | * will query firmware for the EEH status. |
576 | * |
577 | * Note this routine is safe to call in an interrupt context. |
578 | */ |
579 | int eeh_check_failure(const volatile void __iomem *token) |
580 | { |
581 | unsigned long addr; |
582 | struct eeh_dev *edev; |
583 | |
584 | /* Finding the phys addr + pci device; this is pretty quick. */ |
585 | addr = eeh_token_to_phys(token: (unsigned long __force) token); |
586 | edev = eeh_addr_cache_get_dev(addr); |
587 | if (!edev) { |
588 | eeh_stats.no_device++; |
589 | return 0; |
590 | } |
591 | |
592 | return eeh_dev_check_failure(edev); |
593 | } |
594 | EXPORT_SYMBOL(eeh_check_failure); |
595 | |
596 | |
597 | /** |
598 | * eeh_pci_enable - Enable MMIO or DMA transfers for this slot |
599 | * @pe: EEH PE |
600 | * @function: EEH option |
601 | * |
602 | * This routine should be called to reenable frozen MMIO or DMA |
603 | * so that it would work correctly again. It's useful while doing |
604 | * recovery or log collection on the indicated device. |
605 | */ |
606 | int eeh_pci_enable(struct eeh_pe *pe, int function) |
607 | { |
608 | int active_flag, rc; |
609 | |
610 | /* |
611 | * pHyp doesn't allow to enable IO or DMA on unfrozen PE. |
612 | * Also, it's pointless to enable them on unfrozen PE. So |
613 | * we have to check before enabling IO or DMA. |
614 | */ |
615 | switch (function) { |
616 | case EEH_OPT_THAW_MMIO: |
617 | active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; |
618 | break; |
619 | case EEH_OPT_THAW_DMA: |
620 | active_flag = EEH_STATE_DMA_ACTIVE; |
621 | break; |
622 | case EEH_OPT_DISABLE: |
623 | case EEH_OPT_ENABLE: |
624 | case EEH_OPT_FREEZE_PE: |
625 | active_flag = 0; |
626 | break; |
627 | default: |
628 | pr_warn("%s: Invalid function %d\n" , |
629 | __func__, function); |
630 | return -EINVAL; |
631 | } |
632 | |
633 | /* |
634 | * Check if IO or DMA has been enabled before |
635 | * enabling them. |
636 | */ |
637 | if (active_flag) { |
638 | rc = eeh_ops->get_state(pe, NULL); |
639 | if (rc < 0) |
640 | return rc; |
641 | |
642 | /* Needn't enable it at all */ |
643 | if (rc == EEH_STATE_NOT_SUPPORT) |
644 | return 0; |
645 | |
646 | /* It's already enabled */ |
647 | if (rc & active_flag) |
648 | return 0; |
649 | } |
650 | |
651 | |
652 | /* Issue the request */ |
653 | rc = eeh_ops->set_option(pe, function); |
654 | if (rc) |
655 | pr_warn("%s: Unexpected state change %d on " |
656 | "PHB#%x-PE#%x, err=%d\n" , |
657 | __func__, function, pe->phb->global_number, |
658 | pe->addr, rc); |
659 | |
660 | /* Check if the request is finished successfully */ |
661 | if (active_flag) { |
662 | rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); |
663 | if (rc < 0) |
664 | return rc; |
665 | |
666 | if (rc & active_flag) |
667 | return 0; |
668 | |
669 | return -EIO; |
670 | } |
671 | |
672 | return rc; |
673 | } |
674 | |
675 | static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, |
676 | void *userdata) |
677 | { |
678 | struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); |
679 | struct pci_dev *dev = userdata; |
680 | |
681 | /* |
682 | * The caller should have disabled and saved the |
683 | * state for the specified device |
684 | */ |
685 | if (!pdev || pdev == dev) |
686 | return; |
687 | |
688 | /* Ensure we have D0 power state */ |
689 | pci_set_power_state(dev: pdev, PCI_D0); |
690 | |
691 | /* Save device state */ |
692 | pci_save_state(dev: pdev); |
693 | |
694 | /* |
695 | * Disable device to avoid any DMA traffic and |
696 | * interrupt from the device |
697 | */ |
698 | pci_write_config_word(dev: pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); |
699 | } |
700 | |
701 | static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) |
702 | { |
703 | struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); |
704 | struct pci_dev *dev = userdata; |
705 | |
706 | if (!pdev) |
707 | return; |
708 | |
709 | /* Apply customization from firmware */ |
710 | if (eeh_ops->restore_config) |
711 | eeh_ops->restore_config(edev); |
712 | |
713 | /* The caller should restore state for the specified device */ |
714 | if (pdev != dev) |
715 | pci_restore_state(dev: pdev); |
716 | } |
717 | |
718 | /** |
719 | * pcibios_set_pcie_reset_state - Set PCI-E reset state |
720 | * @dev: pci device struct |
721 | * @state: reset state to enter |
722 | * |
723 | * Return value: |
724 | * 0 if success |
725 | */ |
726 | int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) |
727 | { |
728 | struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); |
729 | struct eeh_pe *pe = eeh_dev_to_pe(edev); |
730 | |
731 | if (!pe) { |
732 | pr_err("%s: No PE found on PCI device %s\n" , |
733 | __func__, pci_name(dev)); |
734 | return -EINVAL; |
735 | } |
736 | |
737 | switch (state) { |
738 | case pcie_deassert_reset: |
739 | eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); |
740 | eeh_unfreeze_pe(pe); |
741 | if (!(pe->type & EEH_PE_VF)) |
742 | eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); |
743 | eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); |
744 | eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); |
745 | break; |
746 | case pcie_hot_reset: |
747 | eeh_pe_mark_isolated(pe); |
748 | eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); |
749 | eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); |
750 | eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); |
751 | if (!(pe->type & EEH_PE_VF)) |
752 | eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); |
753 | eeh_ops->reset(pe, EEH_RESET_HOT); |
754 | break; |
755 | case pcie_warm_reset: |
756 | eeh_pe_mark_isolated(pe); |
757 | eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); |
758 | eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); |
759 | eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); |
760 | if (!(pe->type & EEH_PE_VF)) |
761 | eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); |
762 | eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); |
763 | break; |
764 | default: |
765 | eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); |
766 | return -EINVAL; |
767 | } |
768 | |
769 | return 0; |
770 | } |
771 | |
772 | /** |
773 | * eeh_set_dev_freset - Check the required reset for the indicated device |
774 | * @edev: EEH device |
775 | * @flag: return value |
776 | * |
777 | * Each device might have its preferred reset type: fundamental or |
778 | * hot reset. The routine is used to collected the information for |
779 | * the indicated device and its children so that the bunch of the |
780 | * devices could be reset properly. |
781 | */ |
782 | static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag) |
783 | { |
784 | struct pci_dev *dev; |
785 | unsigned int *freset = (unsigned int *)flag; |
786 | |
787 | dev = eeh_dev_to_pci_dev(edev); |
788 | if (dev) |
789 | *freset |= dev->needs_freset; |
790 | } |
791 | |
792 | static void eeh_pe_refreeze_passed(struct eeh_pe *root) |
793 | { |
794 | struct eeh_pe *pe; |
795 | int state; |
796 | |
797 | eeh_for_each_pe(root, pe) { |
798 | if (eeh_pe_passed(pe)) { |
799 | state = eeh_ops->get_state(pe, NULL); |
800 | if (state & |
801 | (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { |
802 | pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n" , |
803 | pe->phb->global_number, pe->addr); |
804 | eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); |
805 | } |
806 | } |
807 | } |
808 | } |
809 | |
810 | /** |
811 | * eeh_pe_reset_full - Complete a full reset process on the indicated PE |
812 | * @pe: EEH PE |
813 | * @include_passed: include passed-through devices? |
814 | * |
815 | * This function executes a full reset procedure on a PE, including setting |
816 | * the appropriate flags, performing a fundamental or hot reset, and then |
817 | * deactivating the reset status. It is designed to be used within the EEH |
818 | * subsystem, as opposed to eeh_pe_reset which is exported to drivers and |
819 | * only performs a single operation at a time. |
820 | * |
821 | * This function will attempt to reset a PE three times before failing. |
822 | */ |
823 | int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) |
824 | { |
825 | int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); |
826 | int type = EEH_RESET_HOT; |
827 | unsigned int freset = 0; |
828 | int i, state = 0, ret; |
829 | |
830 | /* |
831 | * Determine the type of reset to perform - hot or fundamental. |
832 | * Hot reset is the default operation, unless any device under the |
833 | * PE requires a fundamental reset. |
834 | */ |
835 | eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); |
836 | |
837 | if (freset) |
838 | type = EEH_RESET_FUNDAMENTAL; |
839 | |
840 | /* Mark the PE as in reset state and block config space accesses */ |
841 | eeh_pe_state_mark(pe, reset_state); |
842 | |
843 | /* Make three attempts at resetting the bus */ |
844 | for (i = 0; i < 3; i++) { |
845 | ret = eeh_pe_reset(pe, type, include_passed); |
846 | if (!ret) |
847 | ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, |
848 | include_passed); |
849 | if (ret) { |
850 | ret = -EIO; |
851 | pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n" , |
852 | state, pe->phb->global_number, pe->addr, i + 1); |
853 | continue; |
854 | } |
855 | if (i) |
856 | pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n" , |
857 | pe->phb->global_number, pe->addr, i + 1); |
858 | |
859 | /* Wait until the PE is in a functioning state */ |
860 | state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); |
861 | if (state < 0) { |
862 | pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x" , |
863 | pe->phb->global_number, pe->addr); |
864 | ret = -ENOTRECOVERABLE; |
865 | break; |
866 | } |
867 | if (eeh_state_active(state)) |
868 | break; |
869 | else |
870 | pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n" , |
871 | pe->phb->global_number, pe->addr, state, i + 1); |
872 | } |
873 | |
874 | /* Resetting the PE may have unfrozen child PEs. If those PEs have been |
875 | * (potentially) passed through to a guest, re-freeze them: |
876 | */ |
877 | if (!include_passed) |
878 | eeh_pe_refreeze_passed(root: pe); |
879 | |
880 | eeh_pe_state_clear(pe, reset_state, true); |
881 | return ret; |
882 | } |
883 | |
884 | /** |
885 | * eeh_save_bars - Save device bars |
886 | * @edev: PCI device associated EEH device |
887 | * |
888 | * Save the values of the device bars. Unlike the restore |
889 | * routine, this routine is *not* recursive. This is because |
890 | * PCI devices are added individually; but, for the restore, |
891 | * an entire slot is reset at a time. |
892 | */ |
893 | void eeh_save_bars(struct eeh_dev *edev) |
894 | { |
895 | int i; |
896 | |
897 | if (!edev) |
898 | return; |
899 | |
900 | for (i = 0; i < 16; i++) |
901 | eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); |
902 | |
903 | /* |
904 | * For PCI bridges including root port, we need enable bus |
905 | * master explicitly. Otherwise, it can't fetch IODA table |
906 | * entries correctly. So we cache the bit in advance so that |
907 | * we can restore it after reset, either PHB range or PE range. |
908 | */ |
909 | if (edev->mode & EEH_DEV_BRIDGE) |
910 | edev->config_space[1] |= PCI_COMMAND_MASTER; |
911 | } |
912 | |
913 | static int eeh_reboot_notifier(struct notifier_block *nb, |
914 | unsigned long action, void *unused) |
915 | { |
916 | eeh_clear_flag(EEH_ENABLED); |
917 | return NOTIFY_DONE; |
918 | } |
919 | |
920 | static struct notifier_block eeh_reboot_nb = { |
921 | .notifier_call = eeh_reboot_notifier, |
922 | }; |
923 | |
924 | static int eeh_device_notifier(struct notifier_block *nb, |
925 | unsigned long action, void *data) |
926 | { |
927 | struct device *dev = data; |
928 | |
929 | switch (action) { |
930 | /* |
931 | * Note: It's not possible to perform EEH device addition (i.e. |
932 | * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on |
933 | * the device's resources, which have not yet been set up. |
934 | */ |
935 | case BUS_NOTIFY_DEL_DEVICE: |
936 | eeh_remove_device(to_pci_dev(dev)); |
937 | break; |
938 | default: |
939 | break; |
940 | } |
941 | return NOTIFY_DONE; |
942 | } |
943 | |
944 | static struct notifier_block eeh_device_nb = { |
945 | .notifier_call = eeh_device_notifier, |
946 | }; |
947 | |
948 | /** |
949 | * eeh_init - System wide EEH initialization |
950 | * @ops: struct to trace EEH operation callback functions |
951 | * |
952 | * It's the platform's job to call this from an arch_initcall(). |
953 | */ |
954 | int eeh_init(struct eeh_ops *ops) |
955 | { |
956 | struct pci_controller *hose, *tmp; |
957 | int ret = 0; |
958 | |
959 | /* the platform should only initialise EEH once */ |
960 | if (WARN_ON(eeh_ops)) |
961 | return -EEXIST; |
962 | if (WARN_ON(!ops)) |
963 | return -ENOENT; |
964 | eeh_ops = ops; |
965 | |
966 | /* Register reboot notifier */ |
967 | ret = register_reboot_notifier(&eeh_reboot_nb); |
968 | if (ret) { |
969 | pr_warn("%s: Failed to register reboot notifier (%d)\n" , |
970 | __func__, ret); |
971 | return ret; |
972 | } |
973 | |
974 | ret = bus_register_notifier(bus: &pci_bus_type, nb: &eeh_device_nb); |
975 | if (ret) { |
976 | pr_warn("%s: Failed to register bus notifier (%d)\n" , |
977 | __func__, ret); |
978 | return ret; |
979 | } |
980 | |
981 | /* Initialize PHB PEs */ |
982 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) |
983 | eeh_phb_pe_create(hose); |
984 | |
985 | eeh_addr_cache_init(); |
986 | |
987 | /* Initialize EEH event */ |
988 | return eeh_event_init(); |
989 | } |
990 | |
991 | /** |
992 | * eeh_probe_device() - Perform EEH initialization for the indicated pci device |
993 | * @dev: pci device for which to set up EEH |
994 | * |
995 | * This routine must be used to complete EEH initialization for PCI |
996 | * devices that were added after system boot (e.g. hotplug, dlpar). |
997 | */ |
998 | void eeh_probe_device(struct pci_dev *dev) |
999 | { |
1000 | struct eeh_dev *edev; |
1001 | |
1002 | pr_debug("EEH: Adding device %s\n" , pci_name(dev)); |
1003 | |
1004 | /* |
1005 | * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was |
1006 | * already called for this device. |
1007 | */ |
1008 | if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { |
1009 | pci_dbg(dev, "Already bound to an eeh_dev!\n" ); |
1010 | return; |
1011 | } |
1012 | |
1013 | edev = eeh_ops->probe(dev); |
1014 | if (!edev) { |
1015 | pr_debug("EEH: Adding device failed\n" ); |
1016 | return; |
1017 | } |
1018 | |
1019 | /* |
1020 | * FIXME: We rely on pcibios_release_device() to remove the |
1021 | * existing EEH state. The release function is only called if |
1022 | * the pci_dev's refcount drops to zero so if something is |
1023 | * keeping a ref to a device (e.g. a filesystem) we need to |
1024 | * remove the old EEH state. |
1025 | * |
1026 | * FIXME: HEY MA, LOOK AT ME, NO LOCKING! |
1027 | */ |
1028 | if (edev->pdev && edev->pdev != dev) { |
1029 | eeh_pe_tree_remove(edev); |
1030 | eeh_addr_cache_rmv_dev(edev->pdev); |
1031 | eeh_sysfs_remove_device(edev->pdev); |
1032 | |
1033 | /* |
1034 | * We definitely should have the PCI device removed |
1035 | * though it wasn't correctly. So we needn't call |
1036 | * into error handler afterwards. |
1037 | */ |
1038 | edev->mode |= EEH_DEV_NO_HANDLER; |
1039 | } |
1040 | |
1041 | /* bind the pdev and the edev together */ |
1042 | edev->pdev = dev; |
1043 | dev->dev.archdata.edev = edev; |
1044 | eeh_addr_cache_insert_dev(dev); |
1045 | eeh_sysfs_add_device(dev); |
1046 | } |
1047 | |
1048 | /** |
1049 | * eeh_remove_device - Undo EEH setup for the indicated pci device |
1050 | * @dev: pci device to be removed |
1051 | * |
1052 | * This routine should be called when a device is removed from |
1053 | * a running system (e.g. by hotplug or dlpar). It unregisters |
1054 | * the PCI device from the EEH subsystem. I/O errors affecting |
1055 | * this device will no longer be detected after this call; thus, |
1056 | * i/o errors affecting this slot may leave this device unusable. |
1057 | */ |
1058 | void eeh_remove_device(struct pci_dev *dev) |
1059 | { |
1060 | struct eeh_dev *edev; |
1061 | |
1062 | if (!dev || !eeh_enabled()) |
1063 | return; |
1064 | edev = pci_dev_to_eeh_dev(dev); |
1065 | |
1066 | /* Unregister the device with the EEH/PCI address search system */ |
1067 | dev_dbg(&dev->dev, "EEH: Removing device\n" ); |
1068 | |
1069 | if (!edev || !edev->pdev || !edev->pe) { |
1070 | dev_dbg(&dev->dev, "EEH: Device not referenced!\n" ); |
1071 | return; |
1072 | } |
1073 | |
1074 | /* |
1075 | * During the hotplug for EEH error recovery, we need the EEH |
1076 | * device attached to the parent PE in order for BAR restore |
1077 | * a bit later. So we keep it for BAR restore and remove it |
1078 | * from the parent PE during the BAR resotre. |
1079 | */ |
1080 | edev->pdev = NULL; |
1081 | |
1082 | /* |
1083 | * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to |
1084 | * remove the sysfs files before clearing dev.archdata.edev |
1085 | */ |
1086 | if (edev->mode & EEH_DEV_SYSFS) |
1087 | eeh_sysfs_remove_device(dev); |
1088 | |
1089 | /* |
1090 | * We're removing from the PCI subsystem, that means |
1091 | * the PCI device driver can't support EEH or not |
1092 | * well. So we rely on hotplug completely to do recovery |
1093 | * for the specific PCI device. |
1094 | */ |
1095 | edev->mode |= EEH_DEV_NO_HANDLER; |
1096 | |
1097 | eeh_addr_cache_rmv_dev(dev); |
1098 | |
1099 | /* |
1100 | * The flag "in_error" is used to trace EEH devices for VFs |
1101 | * in error state or not. It's set in eeh_report_error(). If |
1102 | * it's not set, eeh_report_{reset,resume}() won't be called |
1103 | * for the VF EEH device. |
1104 | */ |
1105 | edev->in_error = false; |
1106 | dev->dev.archdata.edev = NULL; |
1107 | if (!(edev->pe->state & EEH_PE_KEEP)) |
1108 | eeh_pe_tree_remove(edev); |
1109 | else |
1110 | edev->mode |= EEH_DEV_DISCONNECTED; |
1111 | } |
1112 | |
1113 | int eeh_unfreeze_pe(struct eeh_pe *pe) |
1114 | { |
1115 | int ret; |
1116 | |
1117 | ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); |
1118 | if (ret) { |
1119 | pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n" , |
1120 | __func__, ret, pe->phb->global_number, pe->addr); |
1121 | return ret; |
1122 | } |
1123 | |
1124 | ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); |
1125 | if (ret) { |
1126 | pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n" , |
1127 | __func__, ret, pe->phb->global_number, pe->addr); |
1128 | return ret; |
1129 | } |
1130 | |
1131 | return ret; |
1132 | } |
1133 | |
1134 | |
1135 | static struct pci_device_id eeh_reset_ids[] = { |
1136 | { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ |
1137 | { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ |
1138 | { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ |
1139 | { 0 } |
1140 | }; |
1141 | |
1142 | static int eeh_pe_change_owner(struct eeh_pe *pe) |
1143 | { |
1144 | struct eeh_dev *edev, *tmp; |
1145 | struct pci_dev *pdev; |
1146 | struct pci_device_id *id; |
1147 | int ret; |
1148 | |
1149 | /* Check PE state */ |
1150 | ret = eeh_ops->get_state(pe, NULL); |
1151 | if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) |
1152 | return 0; |
1153 | |
1154 | /* Unfrozen PE, nothing to do */ |
1155 | if (eeh_state_active(ret)) |
1156 | return 0; |
1157 | |
1158 | /* Frozen PE, check if it needs PE level reset */ |
1159 | eeh_pe_for_each_dev(pe, edev, tmp) { |
1160 | pdev = eeh_dev_to_pci_dev(edev); |
1161 | if (!pdev) |
1162 | continue; |
1163 | |
1164 | for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { |
1165 | if (id->vendor != PCI_ANY_ID && |
1166 | id->vendor != pdev->vendor) |
1167 | continue; |
1168 | if (id->device != PCI_ANY_ID && |
1169 | id->device != pdev->device) |
1170 | continue; |
1171 | if (id->subvendor != PCI_ANY_ID && |
1172 | id->subvendor != pdev->subsystem_vendor) |
1173 | continue; |
1174 | if (id->subdevice != PCI_ANY_ID && |
1175 | id->subdevice != pdev->subsystem_device) |
1176 | continue; |
1177 | |
1178 | return eeh_pe_reset_and_recover(pe); |
1179 | } |
1180 | } |
1181 | |
1182 | ret = eeh_unfreeze_pe(pe); |
1183 | if (!ret) |
1184 | eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); |
1185 | return ret; |
1186 | } |
1187 | |
1188 | /** |
1189 | * eeh_dev_open - Increase count of pass through devices for PE |
1190 | * @pdev: PCI device |
1191 | * |
1192 | * Increase count of passed through devices for the indicated |
1193 | * PE. In the result, the EEH errors detected on the PE won't be |
1194 | * reported. The PE owner will be responsible for detection |
1195 | * and recovery. |
1196 | */ |
1197 | int eeh_dev_open(struct pci_dev *pdev) |
1198 | { |
1199 | struct eeh_dev *edev; |
1200 | int ret = -ENODEV; |
1201 | |
1202 | mutex_lock(&eeh_dev_mutex); |
1203 | |
1204 | /* No PCI device ? */ |
1205 | if (!pdev) |
1206 | goto out; |
1207 | |
1208 | /* No EEH device or PE ? */ |
1209 | edev = pci_dev_to_eeh_dev(pdev); |
1210 | if (!edev || !edev->pe) |
1211 | goto out; |
1212 | |
1213 | /* |
1214 | * The PE might have been put into frozen state, but we |
1215 | * didn't detect that yet. The passed through PCI devices |
1216 | * in frozen PE won't work properly. Clear the frozen state |
1217 | * in advance. |
1218 | */ |
1219 | ret = eeh_pe_change_owner(pe: edev->pe); |
1220 | if (ret) |
1221 | goto out; |
1222 | |
1223 | /* Increase PE's pass through count */ |
1224 | atomic_inc(v: &edev->pe->pass_dev_cnt); |
1225 | mutex_unlock(lock: &eeh_dev_mutex); |
1226 | |
1227 | return 0; |
1228 | out: |
1229 | mutex_unlock(lock: &eeh_dev_mutex); |
1230 | return ret; |
1231 | } |
1232 | EXPORT_SYMBOL_GPL(eeh_dev_open); |
1233 | |
1234 | /** |
1235 | * eeh_dev_release - Decrease count of pass through devices for PE |
1236 | * @pdev: PCI device |
1237 | * |
1238 | * Decrease count of pass through devices for the indicated PE. If |
1239 | * there is no passed through device in PE, the EEH errors detected |
1240 | * on the PE will be reported and handled as usual. |
1241 | */ |
1242 | void eeh_dev_release(struct pci_dev *pdev) |
1243 | { |
1244 | struct eeh_dev *edev; |
1245 | |
1246 | mutex_lock(&eeh_dev_mutex); |
1247 | |
1248 | /* No PCI device ? */ |
1249 | if (!pdev) |
1250 | goto out; |
1251 | |
1252 | /* No EEH device ? */ |
1253 | edev = pci_dev_to_eeh_dev(pdev); |
1254 | if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) |
1255 | goto out; |
1256 | |
1257 | /* Decrease PE's pass through count */ |
1258 | WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); |
1259 | eeh_pe_change_owner(pe: edev->pe); |
1260 | out: |
1261 | mutex_unlock(lock: &eeh_dev_mutex); |
1262 | } |
1263 | EXPORT_SYMBOL(eeh_dev_release); |
1264 | |
1265 | #ifdef CONFIG_IOMMU_API |
1266 | |
1267 | static int dev_has_iommu_table(struct device *dev, void *data) |
1268 | { |
1269 | struct pci_dev *pdev = to_pci_dev(dev); |
1270 | struct pci_dev **ppdev = data; |
1271 | |
1272 | if (!dev) |
1273 | return 0; |
1274 | |
1275 | if (device_iommu_mapped(dev)) { |
1276 | *ppdev = pdev; |
1277 | return 1; |
1278 | } |
1279 | |
1280 | return 0; |
1281 | } |
1282 | |
1283 | /** |
1284 | * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE |
1285 | * @group: IOMMU group |
1286 | * |
1287 | * The routine is called to convert IOMMU group to EEH PE. |
1288 | */ |
1289 | struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) |
1290 | { |
1291 | struct pci_dev *pdev = NULL; |
1292 | struct eeh_dev *edev; |
1293 | int ret; |
1294 | |
1295 | /* No IOMMU group ? */ |
1296 | if (!group) |
1297 | return NULL; |
1298 | |
1299 | ret = iommu_group_for_each_dev(group, data: &pdev, fn: dev_has_iommu_table); |
1300 | if (!ret || !pdev) |
1301 | return NULL; |
1302 | |
1303 | /* No EEH device or PE ? */ |
1304 | edev = pci_dev_to_eeh_dev(pdev); |
1305 | if (!edev || !edev->pe) |
1306 | return NULL; |
1307 | |
1308 | return edev->pe; |
1309 | } |
1310 | EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); |
1311 | |
1312 | #endif /* CONFIG_IOMMU_API */ |
1313 | |
1314 | /** |
1315 | * eeh_pe_set_option - Set options for the indicated PE |
1316 | * @pe: EEH PE |
1317 | * @option: requested option |
1318 | * |
1319 | * The routine is called to enable or disable EEH functionality |
1320 | * on the indicated PE, to enable IO or DMA for the frozen PE. |
1321 | */ |
1322 | int eeh_pe_set_option(struct eeh_pe *pe, int option) |
1323 | { |
1324 | int ret = 0; |
1325 | |
1326 | /* Invalid PE ? */ |
1327 | if (!pe) |
1328 | return -ENODEV; |
1329 | |
1330 | /* |
1331 | * EEH functionality could possibly be disabled, just |
1332 | * return error for the case. And the EEH functionality |
1333 | * isn't expected to be disabled on one specific PE. |
1334 | */ |
1335 | switch (option) { |
1336 | case EEH_OPT_ENABLE: |
1337 | if (eeh_enabled()) { |
1338 | ret = eeh_pe_change_owner(pe); |
1339 | break; |
1340 | } |
1341 | ret = -EIO; |
1342 | break; |
1343 | case EEH_OPT_DISABLE: |
1344 | break; |
1345 | case EEH_OPT_THAW_MMIO: |
1346 | case EEH_OPT_THAW_DMA: |
1347 | case EEH_OPT_FREEZE_PE: |
1348 | if (!eeh_ops || !eeh_ops->set_option) { |
1349 | ret = -ENOENT; |
1350 | break; |
1351 | } |
1352 | |
1353 | ret = eeh_pci_enable(pe, function: option); |
1354 | break; |
1355 | default: |
1356 | pr_debug("%s: Option %d out of range (%d, %d)\n" , |
1357 | __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); |
1358 | ret = -EINVAL; |
1359 | } |
1360 | |
1361 | return ret; |
1362 | } |
1363 | EXPORT_SYMBOL_GPL(eeh_pe_set_option); |
1364 | |
1365 | /** |
1366 | * eeh_pe_get_state - Retrieve PE's state |
1367 | * @pe: EEH PE |
1368 | * |
1369 | * Retrieve the PE's state, which includes 3 aspects: enabled |
1370 | * DMA, enabled IO and asserted reset. |
1371 | */ |
1372 | int eeh_pe_get_state(struct eeh_pe *pe) |
1373 | { |
1374 | int result, ret = 0; |
1375 | bool rst_active, dma_en, mmio_en; |
1376 | |
1377 | /* Existing PE ? */ |
1378 | if (!pe) |
1379 | return -ENODEV; |
1380 | |
1381 | if (!eeh_ops || !eeh_ops->get_state) |
1382 | return -ENOENT; |
1383 | |
1384 | /* |
1385 | * If the parent PE is owned by the host kernel and is undergoing |
1386 | * error recovery, we should return the PE state as temporarily |
1387 | * unavailable so that the error recovery on the guest is suspended |
1388 | * until the recovery completes on the host. |
1389 | */ |
1390 | if (pe->parent && |
1391 | !(pe->state & EEH_PE_REMOVED) && |
1392 | (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) |
1393 | return EEH_PE_STATE_UNAVAIL; |
1394 | |
1395 | result = eeh_ops->get_state(pe, NULL); |
1396 | rst_active = !!(result & EEH_STATE_RESET_ACTIVE); |
1397 | dma_en = !!(result & EEH_STATE_DMA_ENABLED); |
1398 | mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); |
1399 | |
1400 | if (rst_active) |
1401 | ret = EEH_PE_STATE_RESET; |
1402 | else if (dma_en && mmio_en) |
1403 | ret = EEH_PE_STATE_NORMAL; |
1404 | else if (!dma_en && !mmio_en) |
1405 | ret = EEH_PE_STATE_STOPPED_IO_DMA; |
1406 | else if (!dma_en && mmio_en) |
1407 | ret = EEH_PE_STATE_STOPPED_DMA; |
1408 | else |
1409 | ret = EEH_PE_STATE_UNAVAIL; |
1410 | |
1411 | return ret; |
1412 | } |
1413 | EXPORT_SYMBOL_GPL(eeh_pe_get_state); |
1414 | |
1415 | static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) |
1416 | { |
1417 | struct eeh_dev *edev, *tmp; |
1418 | struct pci_dev *pdev; |
1419 | int ret = 0; |
1420 | |
1421 | eeh_pe_restore_bars(pe); |
1422 | |
1423 | /* |
1424 | * Reenable PCI devices as the devices passed |
1425 | * through are always enabled before the reset. |
1426 | */ |
1427 | eeh_pe_for_each_dev(pe, edev, tmp) { |
1428 | pdev = eeh_dev_to_pci_dev(edev); |
1429 | if (!pdev) |
1430 | continue; |
1431 | |
1432 | ret = pci_reenable_device(pdev); |
1433 | if (ret) { |
1434 | pr_warn("%s: Failure %d reenabling %s\n" , |
1435 | __func__, ret, pci_name(pdev)); |
1436 | return ret; |
1437 | } |
1438 | } |
1439 | |
1440 | /* The PE is still in frozen state */ |
1441 | if (include_passed || !eeh_pe_passed(pe)) { |
1442 | ret = eeh_unfreeze_pe(pe); |
1443 | } else |
1444 | pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n" , |
1445 | pe->phb->global_number, pe->addr); |
1446 | if (!ret) |
1447 | eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); |
1448 | return ret; |
1449 | } |
1450 | |
1451 | |
1452 | /** |
1453 | * eeh_pe_reset - Issue PE reset according to specified type |
1454 | * @pe: EEH PE |
1455 | * @option: reset type |
1456 | * @include_passed: include passed-through devices? |
1457 | * |
1458 | * The routine is called to reset the specified PE with the |
1459 | * indicated type, either fundamental reset or hot reset. |
1460 | * PE reset is the most important part for error recovery. |
1461 | */ |
1462 | int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) |
1463 | { |
1464 | int ret = 0; |
1465 | |
1466 | /* Invalid PE ? */ |
1467 | if (!pe) |
1468 | return -ENODEV; |
1469 | |
1470 | if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) |
1471 | return -ENOENT; |
1472 | |
1473 | switch (option) { |
1474 | case EEH_RESET_DEACTIVATE: |
1475 | ret = eeh_ops->reset(pe, option); |
1476 | eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); |
1477 | if (ret) |
1478 | break; |
1479 | |
1480 | ret = eeh_pe_reenable_devices(pe, include_passed); |
1481 | break; |
1482 | case EEH_RESET_HOT: |
1483 | case EEH_RESET_FUNDAMENTAL: |
1484 | /* |
1485 | * Proactively freeze the PE to drop all MMIO access |
1486 | * during reset, which should be banned as it's always |
1487 | * cause recursive EEH error. |
1488 | */ |
1489 | eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); |
1490 | |
1491 | eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); |
1492 | ret = eeh_ops->reset(pe, option); |
1493 | break; |
1494 | default: |
1495 | pr_debug("%s: Unsupported option %d\n" , |
1496 | __func__, option); |
1497 | ret = -EINVAL; |
1498 | } |
1499 | |
1500 | return ret; |
1501 | } |
1502 | EXPORT_SYMBOL_GPL(eeh_pe_reset); |
1503 | |
1504 | /** |
1505 | * eeh_pe_configure - Configure PCI bridges after PE reset |
1506 | * @pe: EEH PE |
1507 | * |
1508 | * The routine is called to restore the PCI config space for |
1509 | * those PCI devices, especially PCI bridges affected by PE |
1510 | * reset issued previously. |
1511 | */ |
1512 | int eeh_pe_configure(struct eeh_pe *pe) |
1513 | { |
1514 | int ret = 0; |
1515 | |
1516 | /* Invalid PE ? */ |
1517 | if (!pe) |
1518 | return -ENODEV; |
1519 | |
1520 | return ret; |
1521 | } |
1522 | EXPORT_SYMBOL_GPL(eeh_pe_configure); |
1523 | |
1524 | /** |
1525 | * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE |
1526 | * @pe: the indicated PE |
1527 | * @type: error type |
1528 | * @func: error function |
1529 | * @addr: address |
1530 | * @mask: address mask |
1531 | * |
1532 | * The routine is called to inject the specified PCI error, which |
1533 | * is determined by @type and @func, to the indicated PE for |
1534 | * testing purpose. |
1535 | */ |
1536 | int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, |
1537 | unsigned long addr, unsigned long mask) |
1538 | { |
1539 | /* Invalid PE ? */ |
1540 | if (!pe) |
1541 | return -ENODEV; |
1542 | |
1543 | /* Unsupported operation ? */ |
1544 | if (!eeh_ops || !eeh_ops->err_inject) |
1545 | return -ENOENT; |
1546 | |
1547 | /* Check on PCI error type */ |
1548 | if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) |
1549 | return -EINVAL; |
1550 | |
1551 | /* Check on PCI error function */ |
1552 | if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) |
1553 | return -EINVAL; |
1554 | |
1555 | return eeh_ops->err_inject(pe, type, func, addr, mask); |
1556 | } |
1557 | EXPORT_SYMBOL_GPL(eeh_pe_inject_err); |
1558 | |
1559 | #ifdef CONFIG_PROC_FS |
1560 | static int proc_eeh_show(struct seq_file *m, void *v) |
1561 | { |
1562 | if (!eeh_enabled()) { |
1563 | seq_printf(m, fmt: "EEH Subsystem is globally disabled\n" ); |
1564 | seq_printf(m, fmt: "eeh_total_mmio_ffs=%llu\n" , eeh_stats.total_mmio_ffs); |
1565 | } else { |
1566 | seq_printf(m, fmt: "EEH Subsystem is enabled\n" ); |
1567 | seq_printf(m, |
1568 | fmt: "no device=%llu\n" |
1569 | "no device node=%llu\n" |
1570 | "no config address=%llu\n" |
1571 | "check not wanted=%llu\n" |
1572 | "eeh_total_mmio_ffs=%llu\n" |
1573 | "eeh_false_positives=%llu\n" |
1574 | "eeh_slot_resets=%llu\n" , |
1575 | eeh_stats.no_device, |
1576 | eeh_stats.no_dn, |
1577 | eeh_stats.no_cfg_addr, |
1578 | eeh_stats.ignored_check, |
1579 | eeh_stats.total_mmio_ffs, |
1580 | eeh_stats.false_positives, |
1581 | eeh_stats.slot_resets); |
1582 | } |
1583 | |
1584 | return 0; |
1585 | } |
1586 | #endif /* CONFIG_PROC_FS */ |
1587 | |
1588 | #ifdef CONFIG_DEBUG_FS |
1589 | |
1590 | |
1591 | static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp, |
1592 | const char __user *user_buf, |
1593 | size_t count, loff_t *ppos) |
1594 | { |
1595 | uint32_t domain, bus, dev, fn; |
1596 | struct pci_dev *pdev; |
1597 | char buf[20]; |
1598 | int ret; |
1599 | |
1600 | memset(buf, 0, sizeof(buf)); |
1601 | ret = simple_write_to_buffer(to: buf, available: sizeof(buf)-1, ppos, from: user_buf, count); |
1602 | if (!ret) |
1603 | return ERR_PTR(error: -EFAULT); |
1604 | |
1605 | ret = sscanf(buf, "%x:%x:%x.%x" , &domain, &bus, &dev, &fn); |
1606 | if (ret != 4) { |
1607 | pr_err("%s: expected 4 args, got %d\n" , __func__, ret); |
1608 | return ERR_PTR(error: -EINVAL); |
1609 | } |
1610 | |
1611 | pdev = pci_get_domain_bus_and_slot(domain, bus, devfn: (dev << 3) | fn); |
1612 | if (!pdev) |
1613 | return ERR_PTR(error: -ENODEV); |
1614 | |
1615 | return pdev; |
1616 | } |
1617 | |
1618 | static int eeh_enable_dbgfs_set(void *data, u64 val) |
1619 | { |
1620 | if (val) |
1621 | eeh_clear_flag(EEH_FORCE_DISABLED); |
1622 | else |
1623 | eeh_add_flag(EEH_FORCE_DISABLED); |
1624 | |
1625 | return 0; |
1626 | } |
1627 | |
1628 | static int eeh_enable_dbgfs_get(void *data, u64 *val) |
1629 | { |
1630 | if (eeh_enabled()) |
1631 | *val = 0x1ul; |
1632 | else |
1633 | *val = 0x0ul; |
1634 | return 0; |
1635 | } |
1636 | |
1637 | DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, |
1638 | eeh_enable_dbgfs_set, "0x%llx\n" ); |
1639 | |
1640 | static ssize_t eeh_force_recover_write(struct file *filp, |
1641 | const char __user *user_buf, |
1642 | size_t count, loff_t *ppos) |
1643 | { |
1644 | struct pci_controller *hose; |
1645 | uint32_t phbid, pe_no; |
1646 | struct eeh_pe *pe; |
1647 | char buf[20]; |
1648 | int ret; |
1649 | |
1650 | ret = simple_write_to_buffer(to: buf, available: sizeof(buf), ppos, from: user_buf, count); |
1651 | if (!ret) |
1652 | return -EFAULT; |
1653 | |
1654 | /* |
1655 | * When PE is NULL the event is a "special" event. Rather than |
1656 | * recovering a specific PE it forces the EEH core to scan for failed |
1657 | * PHBs and recovers each. This needs to be done before any device |
1658 | * recoveries can occur. |
1659 | */ |
1660 | if (!strncmp(buf, "hwcheck" , 7)) { |
1661 | __eeh_send_failure_event(NULL); |
1662 | return count; |
1663 | } |
1664 | |
1665 | ret = sscanf(buf, "%x:%x" , &phbid, &pe_no); |
1666 | if (ret != 2) |
1667 | return -EINVAL; |
1668 | |
1669 | hose = pci_find_controller_for_domain(phbid); |
1670 | if (!hose) |
1671 | return -ENODEV; |
1672 | |
1673 | /* Retrieve PE */ |
1674 | pe = eeh_pe_get(hose, pe_no); |
1675 | if (!pe) |
1676 | return -ENODEV; |
1677 | |
1678 | /* |
1679 | * We don't do any state checking here since the detection |
1680 | * process is async to the recovery process. The recovery |
1681 | * thread *should* not break even if we schedule a recovery |
1682 | * from an odd state (e.g. PE removed, or recovery of a |
1683 | * non-isolated PE) |
1684 | */ |
1685 | __eeh_send_failure_event(pe); |
1686 | |
1687 | return ret < 0 ? ret : count; |
1688 | } |
1689 | |
1690 | static const struct file_operations eeh_force_recover_fops = { |
1691 | .open = simple_open, |
1692 | .llseek = no_llseek, |
1693 | .write = eeh_force_recover_write, |
1694 | }; |
1695 | |
1696 | static ssize_t eeh_debugfs_dev_usage(struct file *filp, |
1697 | char __user *user_buf, |
1698 | size_t count, loff_t *ppos) |
1699 | { |
1700 | static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n" ; |
1701 | |
1702 | return simple_read_from_buffer(to: user_buf, count, ppos, |
1703 | from: usage, available: sizeof(usage) - 1); |
1704 | } |
1705 | |
1706 | static ssize_t eeh_dev_check_write(struct file *filp, |
1707 | const char __user *user_buf, |
1708 | size_t count, loff_t *ppos) |
1709 | { |
1710 | struct pci_dev *pdev; |
1711 | struct eeh_dev *edev; |
1712 | int ret; |
1713 | |
1714 | pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); |
1715 | if (IS_ERR(ptr: pdev)) |
1716 | return PTR_ERR(ptr: pdev); |
1717 | |
1718 | edev = pci_dev_to_eeh_dev(pdev); |
1719 | if (!edev) { |
1720 | pci_err(pdev, "No eeh_dev for this device!\n" ); |
1721 | pci_dev_put(dev: pdev); |
1722 | return -ENODEV; |
1723 | } |
1724 | |
1725 | ret = eeh_dev_check_failure(edev); |
1726 | pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n" , |
1727 | pci_name(pdev), ret); |
1728 | |
1729 | pci_dev_put(dev: pdev); |
1730 | |
1731 | return count; |
1732 | } |
1733 | |
1734 | static const struct file_operations eeh_dev_check_fops = { |
1735 | .open = simple_open, |
1736 | .llseek = no_llseek, |
1737 | .write = eeh_dev_check_write, |
1738 | .read = eeh_debugfs_dev_usage, |
1739 | }; |
1740 | |
1741 | static int eeh_debugfs_break_device(struct pci_dev *pdev) |
1742 | { |
1743 | struct resource *bar = NULL; |
1744 | void __iomem *mapped; |
1745 | u16 old, bit; |
1746 | int i, pos; |
1747 | |
1748 | /* Do we have an MMIO BAR to disable? */ |
1749 | for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { |
1750 | struct resource *r = &pdev->resource[i]; |
1751 | |
1752 | if (!r->flags || !r->start) |
1753 | continue; |
1754 | if (r->flags & IORESOURCE_IO) |
1755 | continue; |
1756 | if (r->flags & IORESOURCE_UNSET) |
1757 | continue; |
1758 | |
1759 | bar = r; |
1760 | break; |
1761 | } |
1762 | |
1763 | if (!bar) { |
1764 | pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n" ); |
1765 | return -ENXIO; |
1766 | } |
1767 | |
1768 | pci_err(pdev, "Going to break: %pR\n" , bar); |
1769 | |
1770 | if (pdev->is_virtfn) { |
1771 | #ifndef CONFIG_PCI_IOV |
1772 | return -ENXIO; |
1773 | #else |
1774 | /* |
1775 | * VFs don't have a per-function COMMAND register, so the best |
1776 | * we can do is clear the Memory Space Enable bit in the PF's |
1777 | * SRIOV control reg. |
1778 | * |
1779 | * Unfortunately, this requires that we have a PF (i.e doesn't |
1780 | * work for a passed-through VF) and it has the potential side |
1781 | * effect of also causing an EEH on every other VF under the |
1782 | * PF. Oh well. |
1783 | */ |
1784 | pdev = pdev->physfn; |
1785 | if (!pdev) |
1786 | return -ENXIO; /* passed through VFs have no PF */ |
1787 | |
1788 | pos = pci_find_ext_capability(dev: pdev, PCI_EXT_CAP_ID_SRIOV); |
1789 | pos += PCI_SRIOV_CTRL; |
1790 | bit = PCI_SRIOV_CTRL_MSE; |
1791 | #endif /* !CONFIG_PCI_IOV */ |
1792 | } else { |
1793 | bit = PCI_COMMAND_MEMORY; |
1794 | pos = PCI_COMMAND; |
1795 | } |
1796 | |
1797 | /* |
1798 | * Process here is: |
1799 | * |
1800 | * 1. Disable Memory space. |
1801 | * |
1802 | * 2. Perform an MMIO to the device. This should result in an error |
1803 | * (CA / UR) being raised by the device which results in an EEH |
1804 | * PE freeze. Using the in_8() accessor skips the eeh detection hook |
1805 | * so the freeze hook so the EEH Detection machinery won't be |
1806 | * triggered here. This is to match the usual behaviour of EEH |
1807 | * where the HW will asynchronously freeze a PE and it's up to |
1808 | * the kernel to notice and deal with it. |
1809 | * |
1810 | * 3. Turn Memory space back on. This is more important for VFs |
1811 | * since recovery will probably fail if we don't. For normal |
1812 | * the COMMAND register is reset as a part of re-initialising |
1813 | * the device. |
1814 | * |
1815 | * Breaking stuff is the point so who cares if it's racy ;) |
1816 | */ |
1817 | pci_read_config_word(dev: pdev, where: pos, val: &old); |
1818 | |
1819 | mapped = ioremap(offset: bar->start, PAGE_SIZE); |
1820 | if (!mapped) { |
1821 | pci_err(pdev, "Unable to map MMIO BAR %pR\n" , bar); |
1822 | return -ENXIO; |
1823 | } |
1824 | |
1825 | pci_write_config_word(dev: pdev, where: pos, val: old & ~bit); |
1826 | in_8(mapped); |
1827 | pci_write_config_word(dev: pdev, where: pos, val: old); |
1828 | |
1829 | iounmap(addr: mapped); |
1830 | |
1831 | return 0; |
1832 | } |
1833 | |
1834 | static ssize_t eeh_dev_break_write(struct file *filp, |
1835 | const char __user *user_buf, |
1836 | size_t count, loff_t *ppos) |
1837 | { |
1838 | struct pci_dev *pdev; |
1839 | int ret; |
1840 | |
1841 | pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); |
1842 | if (IS_ERR(ptr: pdev)) |
1843 | return PTR_ERR(ptr: pdev); |
1844 | |
1845 | ret = eeh_debugfs_break_device(pdev); |
1846 | pci_dev_put(dev: pdev); |
1847 | |
1848 | if (ret < 0) |
1849 | return ret; |
1850 | |
1851 | return count; |
1852 | } |
1853 | |
1854 | static const struct file_operations eeh_dev_break_fops = { |
1855 | .open = simple_open, |
1856 | .llseek = no_llseek, |
1857 | .write = eeh_dev_break_write, |
1858 | .read = eeh_debugfs_dev_usage, |
1859 | }; |
1860 | |
1861 | static ssize_t eeh_dev_can_recover(struct file *filp, |
1862 | const char __user *user_buf, |
1863 | size_t count, loff_t *ppos) |
1864 | { |
1865 | struct pci_driver *drv; |
1866 | struct pci_dev *pdev; |
1867 | size_t ret; |
1868 | |
1869 | pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); |
1870 | if (IS_ERR(ptr: pdev)) |
1871 | return PTR_ERR(ptr: pdev); |
1872 | |
1873 | /* |
1874 | * In order for error recovery to work the driver needs to implement |
1875 | * .error_detected(), so it can quiesce IO to the device, and |
1876 | * .slot_reset() so it can re-initialise the device after a reset. |
1877 | * |
1878 | * Ideally they'd implement .resume() too, but some drivers which |
1879 | * we need to support (notably IPR) don't so I guess we can tolerate |
1880 | * that. |
1881 | * |
1882 | * .mmio_enabled() is mostly there as a work-around for devices which |
1883 | * take forever to re-init after a hot reset. Implementing that is |
1884 | * strictly optional. |
1885 | */ |
1886 | drv = pci_dev_driver(dev: pdev); |
1887 | if (drv && |
1888 | drv->err_handler && |
1889 | drv->err_handler->error_detected && |
1890 | drv->err_handler->slot_reset) { |
1891 | ret = count; |
1892 | } else { |
1893 | ret = -EOPNOTSUPP; |
1894 | } |
1895 | |
1896 | pci_dev_put(dev: pdev); |
1897 | |
1898 | return ret; |
1899 | } |
1900 | |
1901 | static const struct file_operations eeh_dev_can_recover_fops = { |
1902 | .open = simple_open, |
1903 | .llseek = no_llseek, |
1904 | .write = eeh_dev_can_recover, |
1905 | .read = eeh_debugfs_dev_usage, |
1906 | }; |
1907 | |
1908 | #endif |
1909 | |
1910 | static int __init eeh_init_proc(void) |
1911 | { |
1912 | if (machine_is(pseries) || machine_is(powernv)) { |
1913 | proc_create_single("powerpc/eeh" , 0, NULL, proc_eeh_show); |
1914 | #ifdef CONFIG_DEBUG_FS |
1915 | debugfs_create_file_unsafe(name: "eeh_enable" , mode: 0600, |
1916 | parent: arch_debugfs_dir, NULL, |
1917 | fops: &eeh_enable_dbgfs_ops); |
1918 | debugfs_create_u32(name: "eeh_max_freezes" , mode: 0600, |
1919 | parent: arch_debugfs_dir, value: &eeh_max_freezes); |
1920 | debugfs_create_bool(name: "eeh_disable_recovery" , mode: 0600, |
1921 | parent: arch_debugfs_dir, |
1922 | value: &eeh_debugfs_no_recover); |
1923 | debugfs_create_file_unsafe(name: "eeh_dev_check" , mode: 0600, |
1924 | parent: arch_debugfs_dir, NULL, |
1925 | fops: &eeh_dev_check_fops); |
1926 | debugfs_create_file_unsafe(name: "eeh_dev_break" , mode: 0600, |
1927 | parent: arch_debugfs_dir, NULL, |
1928 | fops: &eeh_dev_break_fops); |
1929 | debugfs_create_file_unsafe(name: "eeh_force_recover" , mode: 0600, |
1930 | parent: arch_debugfs_dir, NULL, |
1931 | fops: &eeh_force_recover_fops); |
1932 | debugfs_create_file_unsafe(name: "eeh_dev_can_recover" , mode: 0600, |
1933 | parent: arch_debugfs_dir, NULL, |
1934 | fops: &eeh_dev_can_recover_fops); |
1935 | eeh_cache_debugfs_init(); |
1936 | #endif |
1937 | } |
1938 | |
1939 | return 0; |
1940 | } |
1941 | __initcall(eeh_init_proc); |
1942 | |