1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * PCIe AER software error injection support. |
4 | * |
5 | * Debugging PCIe AER code is quite difficult because it is hard to |
6 | * trigger various real hardware errors. Software based error |
7 | * injection can fake almost all kinds of errors with the help of a |
8 | * user space helper tool aer-inject, which can be gotten from: |
9 | * https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ |
10 | * |
11 | * Copyright 2009 Intel Corporation. |
12 | * Huang Ying <ying.huang@intel.com> |
13 | */ |
14 | |
15 | #define dev_fmt(fmt) "aer_inject: " fmt |
16 | |
17 | #include <linux/module.h> |
18 | #include <linux/init.h> |
19 | #include <linux/interrupt.h> |
20 | #include <linux/miscdevice.h> |
21 | #include <linux/pci.h> |
22 | #include <linux/slab.h> |
23 | #include <linux/fs.h> |
24 | #include <linux/uaccess.h> |
25 | #include <linux/stddef.h> |
26 | #include <linux/device.h> |
27 | |
28 | #include "portdrv.h" |
29 | |
30 | /* Override the existing corrected and uncorrected error masks */ |
31 | static bool aer_mask_override; |
32 | module_param(aer_mask_override, bool, 0); |
33 | |
34 | struct aer_error_inj { |
35 | u8 bus; |
36 | u8 dev; |
37 | u8 fn; |
38 | u32 uncor_status; |
39 | u32 cor_status; |
40 | u32 ; |
41 | u32 ; |
42 | u32 ; |
43 | u32 ; |
44 | u32 domain; |
45 | }; |
46 | |
47 | struct aer_error { |
48 | struct list_head list; |
49 | u32 domain; |
50 | unsigned int bus; |
51 | unsigned int devfn; |
52 | int pos_cap_err; |
53 | |
54 | u32 uncor_status; |
55 | u32 cor_status; |
56 | u32 ; |
57 | u32 ; |
58 | u32 ; |
59 | u32 ; |
60 | u32 root_status; |
61 | u32 source_id; |
62 | }; |
63 | |
64 | struct pci_bus_ops { |
65 | struct list_head list; |
66 | struct pci_bus *bus; |
67 | struct pci_ops *ops; |
68 | }; |
69 | |
70 | static LIST_HEAD(einjected); |
71 | |
72 | static LIST_HEAD(pci_bus_ops_list); |
73 | |
74 | /* Protect einjected and pci_bus_ops_list */ |
75 | static DEFINE_SPINLOCK(inject_lock); |
76 | |
77 | static void aer_error_init(struct aer_error *err, u32 domain, |
78 | unsigned int bus, unsigned int devfn, |
79 | int pos_cap_err) |
80 | { |
81 | INIT_LIST_HEAD(list: &err->list); |
82 | err->domain = domain; |
83 | err->bus = bus; |
84 | err->devfn = devfn; |
85 | err->pos_cap_err = pos_cap_err; |
86 | } |
87 | |
88 | /* inject_lock must be held before calling */ |
89 | static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, |
90 | unsigned int devfn) |
91 | { |
92 | struct aer_error *err; |
93 | |
94 | list_for_each_entry(err, &einjected, list) { |
95 | if (domain == err->domain && |
96 | bus == err->bus && |
97 | devfn == err->devfn) |
98 | return err; |
99 | } |
100 | return NULL; |
101 | } |
102 | |
103 | /* inject_lock must be held before calling */ |
104 | static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) |
105 | { |
106 | int domain = pci_domain_nr(bus: dev->bus); |
107 | if (domain < 0) |
108 | return NULL; |
109 | return __find_aer_error(domain, bus: dev->bus->number, devfn: dev->devfn); |
110 | } |
111 | |
112 | /* inject_lock must be held before calling */ |
113 | static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) |
114 | { |
115 | struct pci_bus_ops *bus_ops; |
116 | |
117 | list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { |
118 | if (bus_ops->bus == bus) |
119 | return bus_ops->ops; |
120 | } |
121 | return NULL; |
122 | } |
123 | |
124 | static struct pci_bus_ops *pci_bus_ops_pop(void) |
125 | { |
126 | unsigned long flags; |
127 | struct pci_bus_ops *bus_ops; |
128 | |
129 | spin_lock_irqsave(&inject_lock, flags); |
130 | bus_ops = list_first_entry_or_null(&pci_bus_ops_list, |
131 | struct pci_bus_ops, list); |
132 | if (bus_ops) |
133 | list_del(entry: &bus_ops->list); |
134 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
135 | return bus_ops; |
136 | } |
137 | |
138 | static u32 *find_pci_config_dword(struct aer_error *err, int where, |
139 | int *prw1cs) |
140 | { |
141 | int rw1cs = 0; |
142 | u32 *target = NULL; |
143 | |
144 | if (err->pos_cap_err == -1) |
145 | return NULL; |
146 | |
147 | switch (where - err->pos_cap_err) { |
148 | case PCI_ERR_UNCOR_STATUS: |
149 | target = &err->uncor_status; |
150 | rw1cs = 1; |
151 | break; |
152 | case PCI_ERR_COR_STATUS: |
153 | target = &err->cor_status; |
154 | rw1cs = 1; |
155 | break; |
156 | case PCI_ERR_HEADER_LOG: |
157 | target = &err->header_log0; |
158 | break; |
159 | case PCI_ERR_HEADER_LOG+4: |
160 | target = &err->header_log1; |
161 | break; |
162 | case PCI_ERR_HEADER_LOG+8: |
163 | target = &err->header_log2; |
164 | break; |
165 | case PCI_ERR_HEADER_LOG+12: |
166 | target = &err->header_log3; |
167 | break; |
168 | case PCI_ERR_ROOT_STATUS: |
169 | target = &err->root_status; |
170 | rw1cs = 1; |
171 | break; |
172 | case PCI_ERR_ROOT_ERR_SRC: |
173 | target = &err->source_id; |
174 | break; |
175 | } |
176 | if (prw1cs) |
177 | *prw1cs = rw1cs; |
178 | return target; |
179 | } |
180 | |
181 | static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where, |
182 | int size, u32 *val) |
183 | { |
184 | struct pci_ops *ops, *my_ops; |
185 | int rv; |
186 | |
187 | ops = __find_pci_bus_ops(bus); |
188 | if (!ops) |
189 | return -1; |
190 | |
191 | my_ops = bus->ops; |
192 | bus->ops = ops; |
193 | rv = ops->read(bus, devfn, where, size, val); |
194 | bus->ops = my_ops; |
195 | |
196 | return rv; |
197 | } |
198 | |
199 | static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where, |
200 | int size, u32 val) |
201 | { |
202 | struct pci_ops *ops, *my_ops; |
203 | int rv; |
204 | |
205 | ops = __find_pci_bus_ops(bus); |
206 | if (!ops) |
207 | return -1; |
208 | |
209 | my_ops = bus->ops; |
210 | bus->ops = ops; |
211 | rv = ops->write(bus, devfn, where, size, val); |
212 | bus->ops = my_ops; |
213 | |
214 | return rv; |
215 | } |
216 | |
217 | static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, |
218 | int where, int size, u32 *val) |
219 | { |
220 | u32 *sim; |
221 | struct aer_error *err; |
222 | unsigned long flags; |
223 | int domain; |
224 | int rv; |
225 | |
226 | spin_lock_irqsave(&inject_lock, flags); |
227 | if (size != sizeof(u32)) |
228 | goto out; |
229 | domain = pci_domain_nr(bus); |
230 | if (domain < 0) |
231 | goto out; |
232 | err = __find_aer_error(domain, bus: bus->number, devfn); |
233 | if (!err) |
234 | goto out; |
235 | |
236 | sim = find_pci_config_dword(err, where, NULL); |
237 | if (sim) { |
238 | *val = *sim; |
239 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
240 | return 0; |
241 | } |
242 | out: |
243 | rv = aer_inj_read(bus, devfn, where, size, val); |
244 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
245 | return rv; |
246 | } |
247 | |
248 | static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, |
249 | int where, int size, u32 val) |
250 | { |
251 | u32 *sim; |
252 | struct aer_error *err; |
253 | unsigned long flags; |
254 | int rw1cs; |
255 | int domain; |
256 | int rv; |
257 | |
258 | spin_lock_irqsave(&inject_lock, flags); |
259 | if (size != sizeof(u32)) |
260 | goto out; |
261 | domain = pci_domain_nr(bus); |
262 | if (domain < 0) |
263 | goto out; |
264 | err = __find_aer_error(domain, bus: bus->number, devfn); |
265 | if (!err) |
266 | goto out; |
267 | |
268 | sim = find_pci_config_dword(err, where, prw1cs: &rw1cs); |
269 | if (sim) { |
270 | if (rw1cs) |
271 | *sim ^= val; |
272 | else |
273 | *sim = val; |
274 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
275 | return 0; |
276 | } |
277 | out: |
278 | rv = aer_inj_write(bus, devfn, where, size, val); |
279 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
280 | return rv; |
281 | } |
282 | |
283 | static struct pci_ops aer_inj_pci_ops = { |
284 | .read = aer_inj_read_config, |
285 | .write = aer_inj_write_config, |
286 | }; |
287 | |
288 | static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, |
289 | struct pci_bus *bus, |
290 | struct pci_ops *ops) |
291 | { |
292 | INIT_LIST_HEAD(list: &bus_ops->list); |
293 | bus_ops->bus = bus; |
294 | bus_ops->ops = ops; |
295 | } |
296 | |
297 | static int pci_bus_set_aer_ops(struct pci_bus *bus) |
298 | { |
299 | struct pci_ops *ops; |
300 | struct pci_bus_ops *bus_ops; |
301 | unsigned long flags; |
302 | |
303 | bus_ops = kmalloc(size: sizeof(*bus_ops), GFP_KERNEL); |
304 | if (!bus_ops) |
305 | return -ENOMEM; |
306 | ops = pci_bus_set_ops(bus, ops: &aer_inj_pci_ops); |
307 | spin_lock_irqsave(&inject_lock, flags); |
308 | if (ops == &aer_inj_pci_ops) |
309 | goto out; |
310 | pci_bus_ops_init(bus_ops, bus, ops); |
311 | list_add(new: &bus_ops->list, head: &pci_bus_ops_list); |
312 | bus_ops = NULL; |
313 | out: |
314 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
315 | kfree(objp: bus_ops); |
316 | return 0; |
317 | } |
318 | |
319 | static int aer_inject(struct aer_error_inj *einj) |
320 | { |
321 | struct aer_error *err, *rperr; |
322 | struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; |
323 | struct pci_dev *dev, *rpdev; |
324 | struct pcie_device *edev; |
325 | struct device *device; |
326 | unsigned long flags; |
327 | unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); |
328 | int pos_cap_err, rp_pos_cap_err; |
329 | u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; |
330 | int ret = 0; |
331 | |
332 | dev = pci_get_domain_bus_and_slot(domain: einj->domain, bus: einj->bus, devfn); |
333 | if (!dev) |
334 | return -ENODEV; |
335 | rpdev = pcie_find_root_port(dev); |
336 | /* If Root Port not found, try to find an RCEC */ |
337 | if (!rpdev) |
338 | rpdev = dev->rcec; |
339 | if (!rpdev) { |
340 | pci_err(dev, "Neither Root Port nor RCEC found\n" ); |
341 | ret = -ENODEV; |
342 | goto out_put; |
343 | } |
344 | |
345 | pos_cap_err = dev->aer_cap; |
346 | if (!pos_cap_err) { |
347 | pci_err(dev, "Device doesn't support AER\n" ); |
348 | ret = -EPROTONOSUPPORT; |
349 | goto out_put; |
350 | } |
351 | pci_read_config_dword(dev, where: pos_cap_err + PCI_ERR_UNCOR_SEVER, val: &sever); |
352 | pci_read_config_dword(dev, where: pos_cap_err + PCI_ERR_COR_MASK, val: &cor_mask); |
353 | pci_read_config_dword(dev, where: pos_cap_err + PCI_ERR_UNCOR_MASK, |
354 | val: &uncor_mask); |
355 | |
356 | rp_pos_cap_err = rpdev->aer_cap; |
357 | if (!rp_pos_cap_err) { |
358 | pci_err(rpdev, "Root port doesn't support AER\n" ); |
359 | ret = -EPROTONOSUPPORT; |
360 | goto out_put; |
361 | } |
362 | |
363 | err_alloc = kzalloc(size: sizeof(struct aer_error), GFP_KERNEL); |
364 | if (!err_alloc) { |
365 | ret = -ENOMEM; |
366 | goto out_put; |
367 | } |
368 | rperr_alloc = kzalloc(size: sizeof(struct aer_error), GFP_KERNEL); |
369 | if (!rperr_alloc) { |
370 | ret = -ENOMEM; |
371 | goto out_put; |
372 | } |
373 | |
374 | if (aer_mask_override) { |
375 | cor_mask_orig = cor_mask; |
376 | cor_mask &= !(einj->cor_status); |
377 | pci_write_config_dword(dev, where: pos_cap_err + PCI_ERR_COR_MASK, |
378 | val: cor_mask); |
379 | |
380 | uncor_mask_orig = uncor_mask; |
381 | uncor_mask &= !(einj->uncor_status); |
382 | pci_write_config_dword(dev, where: pos_cap_err + PCI_ERR_UNCOR_MASK, |
383 | val: uncor_mask); |
384 | } |
385 | |
386 | spin_lock_irqsave(&inject_lock, flags); |
387 | |
388 | err = __find_aer_error_by_dev(dev); |
389 | if (!err) { |
390 | err = err_alloc; |
391 | err_alloc = NULL; |
392 | aer_error_init(err, domain: einj->domain, bus: einj->bus, devfn, |
393 | pos_cap_err); |
394 | list_add(new: &err->list, head: &einjected); |
395 | } |
396 | err->uncor_status |= einj->uncor_status; |
397 | err->cor_status |= einj->cor_status; |
398 | err->header_log0 = einj->header_log0; |
399 | err->header_log1 = einj->header_log1; |
400 | err->header_log2 = einj->header_log2; |
401 | err->header_log3 = einj->header_log3; |
402 | |
403 | if (!aer_mask_override && einj->cor_status && |
404 | !(einj->cor_status & ~cor_mask)) { |
405 | ret = -EINVAL; |
406 | pci_warn(dev, "The correctable error(s) is masked by device\n" ); |
407 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
408 | goto out_put; |
409 | } |
410 | if (!aer_mask_override && einj->uncor_status && |
411 | !(einj->uncor_status & ~uncor_mask)) { |
412 | ret = -EINVAL; |
413 | pci_warn(dev, "The uncorrectable error(s) is masked by device\n" ); |
414 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
415 | goto out_put; |
416 | } |
417 | |
418 | rperr = __find_aer_error_by_dev(dev: rpdev); |
419 | if (!rperr) { |
420 | rperr = rperr_alloc; |
421 | rperr_alloc = NULL; |
422 | aer_error_init(err: rperr, domain: pci_domain_nr(bus: rpdev->bus), |
423 | bus: rpdev->bus->number, devfn: rpdev->devfn, |
424 | pos_cap_err: rp_pos_cap_err); |
425 | list_add(new: &rperr->list, head: &einjected); |
426 | } |
427 | if (einj->cor_status) { |
428 | if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) |
429 | rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; |
430 | else |
431 | rperr->root_status |= PCI_ERR_ROOT_COR_RCV; |
432 | rperr->source_id &= 0xffff0000; |
433 | rperr->source_id |= (einj->bus << 8) | devfn; |
434 | } |
435 | if (einj->uncor_status) { |
436 | if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) |
437 | rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; |
438 | if (sever & einj->uncor_status) { |
439 | rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; |
440 | if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) |
441 | rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; |
442 | } else |
443 | rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; |
444 | rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; |
445 | rperr->source_id &= 0x0000ffff; |
446 | rperr->source_id |= ((einj->bus << 8) | devfn) << 16; |
447 | } |
448 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
449 | |
450 | if (aer_mask_override) { |
451 | pci_write_config_dword(dev, where: pos_cap_err + PCI_ERR_COR_MASK, |
452 | val: cor_mask_orig); |
453 | pci_write_config_dword(dev, where: pos_cap_err + PCI_ERR_UNCOR_MASK, |
454 | val: uncor_mask_orig); |
455 | } |
456 | |
457 | ret = pci_bus_set_aer_ops(bus: dev->bus); |
458 | if (ret) |
459 | goto out_put; |
460 | ret = pci_bus_set_aer_ops(bus: rpdev->bus); |
461 | if (ret) |
462 | goto out_put; |
463 | |
464 | device = pcie_port_find_device(dev: rpdev, PCIE_PORT_SERVICE_AER); |
465 | if (device) { |
466 | edev = to_pcie_device(device); |
467 | if (!get_service_data(dev: edev)) { |
468 | pci_warn(edev->port, "AER service is not initialized\n" ); |
469 | ret = -EPROTONOSUPPORT; |
470 | goto out_put; |
471 | } |
472 | pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n" , |
473 | einj->cor_status, einj->uncor_status, pci_name(dev)); |
474 | ret = irq_inject_interrupt(irq: edev->irq); |
475 | } else { |
476 | pci_err(rpdev, "AER device not found\n" ); |
477 | ret = -ENODEV; |
478 | } |
479 | out_put: |
480 | kfree(objp: err_alloc); |
481 | kfree(objp: rperr_alloc); |
482 | pci_dev_put(dev); |
483 | return ret; |
484 | } |
485 | |
486 | static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, |
487 | size_t usize, loff_t *off) |
488 | { |
489 | struct aer_error_inj einj; |
490 | int ret; |
491 | |
492 | if (!capable(CAP_SYS_ADMIN)) |
493 | return -EPERM; |
494 | if (usize < offsetof(struct aer_error_inj, domain) || |
495 | usize > sizeof(einj)) |
496 | return -EINVAL; |
497 | |
498 | memset(&einj, 0, sizeof(einj)); |
499 | if (copy_from_user(to: &einj, from: ubuf, n: usize)) |
500 | return -EFAULT; |
501 | |
502 | ret = aer_inject(einj: &einj); |
503 | return ret ? ret : usize; |
504 | } |
505 | |
506 | static const struct file_operations aer_inject_fops = { |
507 | .write = aer_inject_write, |
508 | .owner = THIS_MODULE, |
509 | .llseek = noop_llseek, |
510 | }; |
511 | |
512 | static struct miscdevice aer_inject_device = { |
513 | .minor = MISC_DYNAMIC_MINOR, |
514 | .name = "aer_inject" , |
515 | .fops = &aer_inject_fops, |
516 | }; |
517 | |
518 | static int __init aer_inject_init(void) |
519 | { |
520 | return misc_register(misc: &aer_inject_device); |
521 | } |
522 | |
523 | static void __exit aer_inject_exit(void) |
524 | { |
525 | struct aer_error *err, *err_next; |
526 | unsigned long flags; |
527 | struct pci_bus_ops *bus_ops; |
528 | |
529 | misc_deregister(misc: &aer_inject_device); |
530 | |
531 | while ((bus_ops = pci_bus_ops_pop())) { |
532 | pci_bus_set_ops(bus: bus_ops->bus, ops: bus_ops->ops); |
533 | kfree(objp: bus_ops); |
534 | } |
535 | |
536 | spin_lock_irqsave(&inject_lock, flags); |
537 | list_for_each_entry_safe(err, err_next, &einjected, list) { |
538 | list_del(entry: &err->list); |
539 | kfree(objp: err); |
540 | } |
541 | spin_unlock_irqrestore(lock: &inject_lock, flags); |
542 | } |
543 | |
544 | module_init(aer_inject_init); |
545 | module_exit(aer_inject_exit); |
546 | |
547 | MODULE_DESCRIPTION("PCIe AER software error injector" ); |
548 | MODULE_LICENSE("GPL" ); |
549 | |