1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2001 Dave Engebretsen IBM Corporation |
4 | */ |
5 | |
6 | #include <linux/sched.h> |
7 | #include <linux/interrupt.h> |
8 | #include <linux/irq.h> |
9 | #include <linux/of.h> |
10 | #include <linux/fs.h> |
11 | #include <linux/reboot.h> |
12 | #include <linux/irq_work.h> |
13 | |
14 | #include <asm/machdep.h> |
15 | #include <asm/rtas.h> |
16 | #include <asm/firmware.h> |
17 | #include <asm/mce.h> |
18 | |
19 | #include "pseries.h" |
20 | |
21 | static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; |
22 | static DEFINE_SPINLOCK(ras_log_buf_lock); |
23 | |
24 | static int ras_check_exception_token; |
25 | |
26 | #define EPOW_SENSOR_TOKEN 9 |
27 | #define EPOW_SENSOR_INDEX 0 |
28 | |
29 | /* EPOW events counter variable */ |
30 | static int num_epow_events; |
31 | |
32 | static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); |
33 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); |
34 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id); |
35 | |
36 | /* RTAS pseries MCE errorlog section. */ |
37 | struct pseries_mc_errorlog { |
38 | __be32 fru_id; |
39 | __be32 proc_id; |
40 | u8 error_type; |
41 | /* |
42 | * sub_err_type (1 byte). Bit fields depends on error_type |
43 | * |
44 | * MSB0 |
45 | * | |
46 | * V |
47 | * 01234567 |
48 | * XXXXXXXX |
49 | * |
50 | * For error_type == MC_ERROR_TYPE_UE |
51 | * XXXXXXXX |
52 | * X 1: Permanent or Transient UE. |
53 | * X 1: Effective address provided. |
54 | * X 1: Logical address provided. |
55 | * XX 2: Reserved. |
56 | * XXX 3: Type of UE error. |
57 | * |
58 | * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB |
59 | * XXXXXXXX |
60 | * X 1: Effective address provided. |
61 | * XXXXX 5: Reserved. |
62 | * XX 2: Type of SLB/ERAT/TLB error. |
63 | * |
64 | * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS |
65 | * XXXXXXXX |
66 | * X 1: Error causing address provided. |
67 | * XXX 3: Type of error. |
68 | * XXXX 4: Reserved. |
69 | */ |
70 | u8 sub_err_type; |
71 | u8 reserved_1[6]; |
72 | __be64 effective_address; |
73 | __be64 logical_address; |
74 | } __packed; |
75 | |
76 | /* RTAS pseries MCE error types */ |
77 | #define MC_ERROR_TYPE_UE 0x00 |
78 | #define MC_ERROR_TYPE_SLB 0x01 |
79 | #define MC_ERROR_TYPE_ERAT 0x02 |
80 | #define MC_ERROR_TYPE_UNKNOWN 0x03 |
81 | #define MC_ERROR_TYPE_TLB 0x04 |
82 | #define MC_ERROR_TYPE_D_CACHE 0x05 |
83 | #define MC_ERROR_TYPE_I_CACHE 0x07 |
84 | #define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 |
85 | |
86 | /* RTAS pseries MCE error sub types */ |
87 | #define MC_ERROR_UE_INDETERMINATE 0 |
88 | #define MC_ERROR_UE_IFETCH 1 |
89 | #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 |
90 | #define MC_ERROR_UE_LOAD_STORE 3 |
91 | #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 |
92 | |
93 | #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 |
94 | #define UE_LOGICAL_ADDR_PROVIDED 0x20 |
95 | #define MC_EFFECTIVE_ADDR_PROVIDED 0x80 |
96 | |
97 | #define MC_ERROR_SLB_PARITY 0 |
98 | #define MC_ERROR_SLB_MULTIHIT 1 |
99 | #define MC_ERROR_SLB_INDETERMINATE 2 |
100 | |
101 | #define MC_ERROR_ERAT_PARITY 1 |
102 | #define MC_ERROR_ERAT_MULTIHIT 2 |
103 | #define MC_ERROR_ERAT_INDETERMINATE 3 |
104 | |
105 | #define MC_ERROR_TLB_PARITY 1 |
106 | #define MC_ERROR_TLB_MULTIHIT 2 |
107 | #define MC_ERROR_TLB_INDETERMINATE 3 |
108 | |
109 | #define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 |
110 | #define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 |
111 | |
112 | static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) |
113 | { |
114 | switch (mlog->error_type) { |
115 | case MC_ERROR_TYPE_UE: |
116 | return (mlog->sub_err_type & 0x07); |
117 | case MC_ERROR_TYPE_SLB: |
118 | case MC_ERROR_TYPE_ERAT: |
119 | case MC_ERROR_TYPE_TLB: |
120 | return (mlog->sub_err_type & 0x03); |
121 | case MC_ERROR_TYPE_CTRL_MEM_ACCESS: |
122 | return (mlog->sub_err_type & 0x70) >> 4; |
123 | default: |
124 | return 0; |
125 | } |
126 | } |
127 | |
128 | /* |
129 | * Enable the hotplug interrupt late because processing them may touch other |
130 | * devices or systems (e.g. hugepages) that have not been initialized at the |
131 | * subsys stage. |
132 | */ |
133 | static int __init init_ras_hotplug_IRQ(void) |
134 | { |
135 | struct device_node *np; |
136 | |
137 | /* Hotplug Events */ |
138 | np = of_find_node_by_path(path: "/event-sources/hot-plug-events" ); |
139 | if (np != NULL) { |
140 | if (dlpar_workqueue_init() == 0) |
141 | request_event_sources_irqs(np, handler: ras_hotplug_interrupt, |
142 | name: "RAS_HOTPLUG" ); |
143 | of_node_put(node: np); |
144 | } |
145 | |
146 | return 0; |
147 | } |
148 | machine_late_initcall(pseries, init_ras_hotplug_IRQ); |
149 | |
150 | /* |
151 | * Initialize handlers for the set of interrupts caused by hardware errors |
152 | * and power system events. |
153 | */ |
154 | static int __init init_ras_IRQ(void) |
155 | { |
156 | struct device_node *np; |
157 | |
158 | ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); |
159 | |
160 | /* Internal Errors */ |
161 | np = of_find_node_by_path(path: "/event-sources/internal-errors" ); |
162 | if (np != NULL) { |
163 | request_event_sources_irqs(np, handler: ras_error_interrupt, |
164 | name: "RAS_ERROR" ); |
165 | of_node_put(node: np); |
166 | } |
167 | |
168 | /* EPOW Events */ |
169 | np = of_find_node_by_path(path: "/event-sources/epow-events" ); |
170 | if (np != NULL) { |
171 | request_event_sources_irqs(np, handler: ras_epow_interrupt, name: "RAS_EPOW" ); |
172 | of_node_put(node: np); |
173 | } |
174 | |
175 | return 0; |
176 | } |
177 | machine_subsys_initcall(pseries, init_ras_IRQ); |
178 | |
179 | #define EPOW_SHUTDOWN_NORMAL 1 |
180 | #define EPOW_SHUTDOWN_ON_UPS 2 |
181 | #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 |
182 | #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 |
183 | |
184 | static void handle_system_shutdown(char event_modifier) |
185 | { |
186 | switch (event_modifier) { |
187 | case EPOW_SHUTDOWN_NORMAL: |
188 | pr_emerg("Power off requested\n" ); |
189 | orderly_poweroff(force: true); |
190 | break; |
191 | |
192 | case EPOW_SHUTDOWN_ON_UPS: |
193 | pr_emerg("Loss of system power detected. System is running on" |
194 | " UPS/battery. Check RTAS error log for details\n" ); |
195 | break; |
196 | |
197 | case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: |
198 | pr_emerg("Loss of system critical functions detected. Check" |
199 | " RTAS error log for details\n" ); |
200 | orderly_poweroff(force: true); |
201 | break; |
202 | |
203 | case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: |
204 | pr_emerg("High ambient temperature detected. Check RTAS" |
205 | " error log for details\n" ); |
206 | orderly_poweroff(force: true); |
207 | break; |
208 | |
209 | default: |
210 | pr_err("Unknown power/cooling shutdown event (modifier = %d)\n" , |
211 | event_modifier); |
212 | } |
213 | } |
214 | |
215 | struct epow_errorlog { |
216 | unsigned char sensor_value; |
217 | unsigned char event_modifier; |
218 | unsigned char extended_modifier; |
219 | unsigned char reserved; |
220 | unsigned char platform_reason; |
221 | }; |
222 | |
223 | #define EPOW_RESET 0 |
224 | #define EPOW_WARN_COOLING 1 |
225 | #define EPOW_WARN_POWER 2 |
226 | #define EPOW_SYSTEM_SHUTDOWN 3 |
227 | #define EPOW_SYSTEM_HALT 4 |
228 | #define EPOW_MAIN_ENCLOSURE 5 |
229 | #define EPOW_POWER_OFF 7 |
230 | |
231 | static void rtas_parse_epow_errlog(struct rtas_error_log *log) |
232 | { |
233 | struct pseries_errorlog *pseries_log; |
234 | struct epow_errorlog *epow_log; |
235 | char action_code; |
236 | char modifier; |
237 | |
238 | pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); |
239 | if (pseries_log == NULL) |
240 | return; |
241 | |
242 | epow_log = (struct epow_errorlog *)pseries_log->data; |
243 | action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ |
244 | modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ |
245 | |
246 | switch (action_code) { |
247 | case EPOW_RESET: |
248 | if (num_epow_events) { |
249 | pr_info("Non critical power/cooling issue cleared\n" ); |
250 | num_epow_events--; |
251 | } |
252 | break; |
253 | |
254 | case EPOW_WARN_COOLING: |
255 | pr_info("Non-critical cooling issue detected. Check RTAS error" |
256 | " log for details\n" ); |
257 | break; |
258 | |
259 | case EPOW_WARN_POWER: |
260 | pr_info("Non-critical power issue detected. Check RTAS error" |
261 | " log for details\n" ); |
262 | break; |
263 | |
264 | case EPOW_SYSTEM_SHUTDOWN: |
265 | handle_system_shutdown(event_modifier: modifier); |
266 | break; |
267 | |
268 | case EPOW_SYSTEM_HALT: |
269 | pr_emerg("Critical power/cooling issue detected. Check RTAS" |
270 | " error log for details. Powering off.\n" ); |
271 | orderly_poweroff(force: true); |
272 | break; |
273 | |
274 | case EPOW_MAIN_ENCLOSURE: |
275 | case EPOW_POWER_OFF: |
276 | pr_emerg("System about to lose power. Check RTAS error log " |
277 | " for details. Powering off immediately.\n" ); |
278 | emergency_sync(); |
279 | kernel_power_off(); |
280 | break; |
281 | |
282 | default: |
283 | pr_err("Unknown power/cooling event (action code = %d)\n" , |
284 | action_code); |
285 | } |
286 | |
287 | /* Increment epow events counter variable */ |
288 | if (action_code != EPOW_RESET) |
289 | num_epow_events++; |
290 | } |
291 | |
292 | static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) |
293 | { |
294 | struct pseries_errorlog *pseries_log; |
295 | struct pseries_hp_errorlog *hp_elog; |
296 | |
297 | spin_lock(lock: &ras_log_buf_lock); |
298 | |
299 | rtas_call(ras_check_exception_token, 6, 1, NULL, |
300 | RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), |
301 | RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), |
302 | rtas_get_error_log_max()); |
303 | |
304 | pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, |
305 | PSERIES_ELOG_SECT_ID_HOTPLUG); |
306 | hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; |
307 | |
308 | /* |
309 | * Since PCI hotplug is not currently supported on pseries, put PCI |
310 | * hotplug events on the ras_log_buf to be handled by rtas_errd. |
311 | */ |
312 | if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || |
313 | hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || |
314 | hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) |
315 | queue_hotplug_event(hp_errlog: hp_elog); |
316 | else |
317 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); |
318 | |
319 | spin_unlock(lock: &ras_log_buf_lock); |
320 | return IRQ_HANDLED; |
321 | } |
322 | |
323 | /* Handle environmental and power warning (EPOW) interrupts. */ |
324 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) |
325 | { |
326 | int state; |
327 | int critical; |
328 | |
329 | rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); |
330 | |
331 | if (state > 3) |
332 | critical = 1; /* Time Critical */ |
333 | else |
334 | critical = 0; |
335 | |
336 | spin_lock(lock: &ras_log_buf_lock); |
337 | |
338 | rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, |
339 | virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), |
340 | rtas_get_error_log_max()); |
341 | |
342 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); |
343 | |
344 | rtas_parse_epow_errlog(log: (struct rtas_error_log *)ras_log_buf); |
345 | |
346 | spin_unlock(lock: &ras_log_buf_lock); |
347 | return IRQ_HANDLED; |
348 | } |
349 | |
350 | /* |
351 | * Handle hardware error interrupts. |
352 | * |
353 | * RTAS check-exception is called to collect data on the exception. If |
354 | * the error is deemed recoverable, we log a warning and return. |
355 | * For nonrecoverable errors, an error is logged and we stop all processing |
356 | * as quickly as possible in order to prevent propagation of the failure. |
357 | */ |
358 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id) |
359 | { |
360 | struct rtas_error_log *rtas_elog; |
361 | int status; |
362 | int fatal; |
363 | |
364 | spin_lock(lock: &ras_log_buf_lock); |
365 | |
366 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, |
367 | RTAS_VECTOR_EXTERNAL_INTERRUPT, |
368 | virq_to_hw(irq), |
369 | RTAS_INTERNAL_ERROR, 1 /* Time Critical */, |
370 | __pa(&ras_log_buf), |
371 | rtas_get_error_log_max()); |
372 | |
373 | rtas_elog = (struct rtas_error_log *)ras_log_buf; |
374 | |
375 | if (status == 0 && |
376 | rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) |
377 | fatal = 1; |
378 | else |
379 | fatal = 0; |
380 | |
381 | /* format and print the extended information */ |
382 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); |
383 | |
384 | if (fatal) { |
385 | pr_emerg("Fatal hardware error detected. Check RTAS error" |
386 | " log for details. Powering off immediately\n" ); |
387 | emergency_sync(); |
388 | kernel_power_off(); |
389 | } else { |
390 | pr_err("Recoverable hardware error detected\n" ); |
391 | } |
392 | |
393 | spin_unlock(lock: &ras_log_buf_lock); |
394 | return IRQ_HANDLED; |
395 | } |
396 | |
397 | /* |
398 | * Some versions of FWNMI place the buffer inside the 4kB page starting at |
399 | * 0x7000. Other versions place it inside the rtas buffer. We check both. |
400 | * Minimum size of the buffer is 16 bytes. |
401 | */ |
402 | #define VALID_FWNMI_BUFFER(A) \ |
403 | ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ |
404 | (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) |
405 | |
406 | static inline struct rtas_error_log *fwnmi_get_errlog(void) |
407 | { |
408 | return (struct rtas_error_log *)local_paca->mce_data_buf; |
409 | } |
410 | |
411 | static __be64 *fwnmi_get_savep(struct pt_regs *regs) |
412 | { |
413 | unsigned long savep_ra; |
414 | |
415 | /* Mask top two bits */ |
416 | savep_ra = regs->gpr[3] & ~(0x3UL << 62); |
417 | if (!VALID_FWNMI_BUFFER(savep_ra)) { |
418 | printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n" , regs->gpr[3]); |
419 | return NULL; |
420 | } |
421 | |
422 | return __va(savep_ra); |
423 | } |
424 | |
425 | /* |
426 | * Get the error information for errors coming through the |
427 | * FWNMI vectors. The pt_regs' r3 will be updated to reflect |
428 | * the actual r3 if possible, and a ptr to the error log entry |
429 | * will be returned if found. |
430 | * |
431 | * Use one buffer mce_data_buf per cpu to store RTAS error. |
432 | * |
433 | * The mce_data_buf does not have any locks or protection around it, |
434 | * if a second machine check comes in, or a system reset is done |
435 | * before we have logged the error, then we will get corruption in the |
436 | * error log. This is preferable over holding off on calling |
437 | * ibm,nmi-interlock which would result in us checkstopping if a |
438 | * second machine check did come in. |
439 | */ |
440 | static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) |
441 | { |
442 | struct rtas_error_log *h; |
443 | __be64 *savep; |
444 | |
445 | savep = fwnmi_get_savep(regs); |
446 | if (!savep) |
447 | return NULL; |
448 | |
449 | regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ |
450 | |
451 | h = (struct rtas_error_log *)&savep[1]; |
452 | /* Use the per cpu buffer from paca to store rtas error log */ |
453 | memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); |
454 | if (!rtas_error_extended(h)) { |
455 | memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); |
456 | } else { |
457 | int len, error_log_length; |
458 | |
459 | error_log_length = 8 + rtas_error_extended_log_length(h); |
460 | len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); |
461 | memcpy(local_paca->mce_data_buf, h, len); |
462 | } |
463 | |
464 | return (struct rtas_error_log *)local_paca->mce_data_buf; |
465 | } |
466 | |
467 | /* Call this when done with the data returned by FWNMI_get_errinfo. |
468 | * It will release the saved data area for other CPUs in the |
469 | * partition to receive FWNMI errors. |
470 | */ |
471 | static void fwnmi_release_errinfo(void) |
472 | { |
473 | struct rtas_args rtas_args; |
474 | int ret; |
475 | |
476 | /* |
477 | * On pseries, the machine check stack is limited to under 4GB, so |
478 | * args can be on-stack. |
479 | */ |
480 | rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); |
481 | ret = be32_to_cpu(rtas_args.rets[0]); |
482 | if (ret != 0) |
483 | printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n" , ret); |
484 | } |
485 | |
486 | int pSeries_system_reset_exception(struct pt_regs *regs) |
487 | { |
488 | #ifdef __LITTLE_ENDIAN__ |
489 | /* |
490 | * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try |
491 | * to detect the bad SRR1 pattern here. Flip the NIP back to correct |
492 | * endian for reporting purposes. Unfortunately the MSR can't be fixed, |
493 | * so clear it. It will be missing MSR_RI so we won't try to recover. |
494 | */ |
495 | if ((be64_to_cpu(regs->msr) & |
496 | (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| |
497 | MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { |
498 | regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); |
499 | regs_set_return_msr(regs, 0); |
500 | } |
501 | #endif |
502 | |
503 | if (fwnmi_active) { |
504 | __be64 *savep; |
505 | |
506 | /* |
507 | * Firmware (PowerVM and KVM) saves r3 to a save area like |
508 | * machine check, which is not exactly what PAPR (2.9) |
509 | * suggests but there is no way to detect otherwise, so this |
510 | * is the interface now. |
511 | * |
512 | * System resets do not save any error log or require an |
513 | * "ibm,nmi-interlock" rtas call to release. |
514 | */ |
515 | |
516 | savep = fwnmi_get_savep(regs); |
517 | if (savep) |
518 | regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ |
519 | } |
520 | |
521 | if (smp_handle_nmi_ipi(regs)) |
522 | return 1; |
523 | |
524 | return 0; /* need to perform reset */ |
525 | } |
526 | |
527 | static int mce_handle_err_realmode(int disposition, u8 error_type) |
528 | { |
529 | #ifdef CONFIG_PPC_BOOK3S_64 |
530 | if (disposition == RTAS_DISP_NOT_RECOVERED) { |
531 | switch (error_type) { |
532 | case MC_ERROR_TYPE_ERAT: |
533 | flush_erat(); |
534 | disposition = RTAS_DISP_FULLY_RECOVERED; |
535 | break; |
536 | case MC_ERROR_TYPE_SLB: |
537 | #ifdef CONFIG_PPC_64S_HASH_MMU |
538 | /* |
539 | * Store the old slb content in paca before flushing. |
540 | * Print this when we go to virtual mode. |
541 | * There are chances that we may hit MCE again if there |
542 | * is a parity error on the SLB entry we trying to read |
543 | * for saving. Hence limit the slb saving to single |
544 | * level of recursion. |
545 | */ |
546 | if (local_paca->in_mce == 1) |
547 | slb_save_contents(local_paca->mce_faulty_slbs); |
548 | flush_and_reload_slb(); |
549 | disposition = RTAS_DISP_FULLY_RECOVERED; |
550 | #endif |
551 | break; |
552 | default: |
553 | break; |
554 | } |
555 | } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { |
556 | /* Platform corrected itself but could be degraded */ |
557 | pr_err("MCE: limited recovery, system may be degraded\n" ); |
558 | disposition = RTAS_DISP_FULLY_RECOVERED; |
559 | } |
560 | #endif |
561 | return disposition; |
562 | } |
563 | |
564 | static int mce_handle_err_virtmode(struct pt_regs *regs, |
565 | struct rtas_error_log *errp, |
566 | struct pseries_mc_errorlog *mce_log, |
567 | int disposition) |
568 | { |
569 | struct mce_error_info mce_err = { 0 }; |
570 | int initiator = rtas_error_initiator(errp); |
571 | int severity = rtas_error_severity(errp); |
572 | unsigned long eaddr = 0, paddr = 0; |
573 | u8 error_type, err_sub_type; |
574 | |
575 | if (!mce_log) |
576 | goto out; |
577 | |
578 | error_type = mce_log->error_type; |
579 | err_sub_type = rtas_mc_error_sub_type(mlog: mce_log); |
580 | |
581 | if (initiator == RTAS_INITIATOR_UNKNOWN) |
582 | mce_err.initiator = MCE_INITIATOR_UNKNOWN; |
583 | else if (initiator == RTAS_INITIATOR_CPU) |
584 | mce_err.initiator = MCE_INITIATOR_CPU; |
585 | else if (initiator == RTAS_INITIATOR_PCI) |
586 | mce_err.initiator = MCE_INITIATOR_PCI; |
587 | else if (initiator == RTAS_INITIATOR_ISA) |
588 | mce_err.initiator = MCE_INITIATOR_ISA; |
589 | else if (initiator == RTAS_INITIATOR_MEMORY) |
590 | mce_err.initiator = MCE_INITIATOR_MEMORY; |
591 | else if (initiator == RTAS_INITIATOR_POWERMGM) |
592 | mce_err.initiator = MCE_INITIATOR_POWERMGM; |
593 | else |
594 | mce_err.initiator = MCE_INITIATOR_UNKNOWN; |
595 | |
596 | if (severity == RTAS_SEVERITY_NO_ERROR) |
597 | mce_err.severity = MCE_SEV_NO_ERROR; |
598 | else if (severity == RTAS_SEVERITY_EVENT) |
599 | mce_err.severity = MCE_SEV_WARNING; |
600 | else if (severity == RTAS_SEVERITY_WARNING) |
601 | mce_err.severity = MCE_SEV_WARNING; |
602 | else if (severity == RTAS_SEVERITY_ERROR_SYNC) |
603 | mce_err.severity = MCE_SEV_SEVERE; |
604 | else if (severity == RTAS_SEVERITY_ERROR) |
605 | mce_err.severity = MCE_SEV_SEVERE; |
606 | else |
607 | mce_err.severity = MCE_SEV_FATAL; |
608 | |
609 | if (severity <= RTAS_SEVERITY_ERROR_SYNC) |
610 | mce_err.sync_error = true; |
611 | else |
612 | mce_err.sync_error = false; |
613 | |
614 | mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; |
615 | mce_err.error_class = MCE_ECLASS_UNKNOWN; |
616 | |
617 | switch (error_type) { |
618 | case MC_ERROR_TYPE_UE: |
619 | mce_err.error_type = MCE_ERROR_TYPE_UE; |
620 | mce_common_process_ue(regs, &mce_err); |
621 | if (mce_err.ignore_event) |
622 | disposition = RTAS_DISP_FULLY_RECOVERED; |
623 | switch (err_sub_type) { |
624 | case MC_ERROR_UE_IFETCH: |
625 | mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; |
626 | break; |
627 | case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: |
628 | mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; |
629 | break; |
630 | case MC_ERROR_UE_LOAD_STORE: |
631 | mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; |
632 | break; |
633 | case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: |
634 | mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; |
635 | break; |
636 | case MC_ERROR_UE_INDETERMINATE: |
637 | default: |
638 | mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; |
639 | break; |
640 | } |
641 | if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) |
642 | eaddr = be64_to_cpu(mce_log->effective_address); |
643 | |
644 | if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { |
645 | paddr = be64_to_cpu(mce_log->logical_address); |
646 | } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { |
647 | unsigned long pfn; |
648 | |
649 | pfn = addr_to_pfn(regs, eaddr); |
650 | if (pfn != ULONG_MAX) |
651 | paddr = pfn << PAGE_SHIFT; |
652 | } |
653 | |
654 | break; |
655 | case MC_ERROR_TYPE_SLB: |
656 | mce_err.error_type = MCE_ERROR_TYPE_SLB; |
657 | switch (err_sub_type) { |
658 | case MC_ERROR_SLB_PARITY: |
659 | mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; |
660 | break; |
661 | case MC_ERROR_SLB_MULTIHIT: |
662 | mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; |
663 | break; |
664 | case MC_ERROR_SLB_INDETERMINATE: |
665 | default: |
666 | mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; |
667 | break; |
668 | } |
669 | if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) |
670 | eaddr = be64_to_cpu(mce_log->effective_address); |
671 | break; |
672 | case MC_ERROR_TYPE_ERAT: |
673 | mce_err.error_type = MCE_ERROR_TYPE_ERAT; |
674 | switch (err_sub_type) { |
675 | case MC_ERROR_ERAT_PARITY: |
676 | mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; |
677 | break; |
678 | case MC_ERROR_ERAT_MULTIHIT: |
679 | mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; |
680 | break; |
681 | case MC_ERROR_ERAT_INDETERMINATE: |
682 | default: |
683 | mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; |
684 | break; |
685 | } |
686 | if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) |
687 | eaddr = be64_to_cpu(mce_log->effective_address); |
688 | break; |
689 | case MC_ERROR_TYPE_TLB: |
690 | mce_err.error_type = MCE_ERROR_TYPE_TLB; |
691 | switch (err_sub_type) { |
692 | case MC_ERROR_TLB_PARITY: |
693 | mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; |
694 | break; |
695 | case MC_ERROR_TLB_MULTIHIT: |
696 | mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; |
697 | break; |
698 | case MC_ERROR_TLB_INDETERMINATE: |
699 | default: |
700 | mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; |
701 | break; |
702 | } |
703 | if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) |
704 | eaddr = be64_to_cpu(mce_log->effective_address); |
705 | break; |
706 | case MC_ERROR_TYPE_D_CACHE: |
707 | mce_err.error_type = MCE_ERROR_TYPE_DCACHE; |
708 | break; |
709 | case MC_ERROR_TYPE_I_CACHE: |
710 | mce_err.error_type = MCE_ERROR_TYPE_ICACHE; |
711 | break; |
712 | case MC_ERROR_TYPE_CTRL_MEM_ACCESS: |
713 | mce_err.error_type = MCE_ERROR_TYPE_RA; |
714 | switch (err_sub_type) { |
715 | case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: |
716 | mce_err.u.ra_error_type = |
717 | MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; |
718 | break; |
719 | case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: |
720 | mce_err.u.ra_error_type = |
721 | MCE_RA_ERROR_LOAD_STORE_FOREIGN; |
722 | break; |
723 | } |
724 | if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) |
725 | eaddr = be64_to_cpu(mce_log->effective_address); |
726 | break; |
727 | case MC_ERROR_TYPE_UNKNOWN: |
728 | default: |
729 | mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; |
730 | break; |
731 | } |
732 | out: |
733 | save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, |
734 | &mce_err, regs->nip, eaddr, paddr); |
735 | return disposition; |
736 | } |
737 | |
738 | static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) |
739 | { |
740 | struct pseries_errorlog *pseries_log; |
741 | struct pseries_mc_errorlog *mce_log = NULL; |
742 | int disposition = rtas_error_disposition(errp); |
743 | u8 error_type; |
744 | |
745 | if (!rtas_error_extended(errp)) |
746 | goto out; |
747 | |
748 | pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); |
749 | if (!pseries_log) |
750 | goto out; |
751 | |
752 | mce_log = (struct pseries_mc_errorlog *)pseries_log->data; |
753 | error_type = mce_log->error_type; |
754 | |
755 | disposition = mce_handle_err_realmode(disposition, error_type); |
756 | out: |
757 | disposition = mce_handle_err_virtmode(regs, errp, mce_log, |
758 | disposition); |
759 | return disposition; |
760 | } |
761 | |
762 | /* |
763 | * Process MCE rtas errlog event. |
764 | */ |
765 | void pSeries_machine_check_log_err(void) |
766 | { |
767 | struct rtas_error_log *err; |
768 | |
769 | err = fwnmi_get_errlog(); |
770 | log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); |
771 | } |
772 | |
773 | /* |
774 | * See if we can recover from a machine check exception. |
775 | * This is only called on power4 (or above) and only via |
776 | * the Firmware Non-Maskable Interrupts (fwnmi) handler |
777 | * which provides the error analysis for us. |
778 | * |
779 | * Return 1 if corrected (or delivered a signal). |
780 | * Return 0 if there is nothing we can do. |
781 | */ |
782 | static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) |
783 | { |
784 | int recovered = 0; |
785 | |
786 | if (regs_is_unrecoverable(regs)) { |
787 | /* If MSR_RI isn't set, we cannot recover */ |
788 | pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n" ); |
789 | recovered = 0; |
790 | } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { |
791 | /* Platform corrected itself */ |
792 | recovered = 1; |
793 | } else if (evt->severity == MCE_SEV_FATAL) { |
794 | /* Fatal machine check */ |
795 | pr_err("Machine check interrupt is fatal\n" ); |
796 | recovered = 0; |
797 | } |
798 | |
799 | if (!recovered && evt->sync_error) { |
800 | /* |
801 | * Try to kill processes if we get a synchronous machine check |
802 | * (e.g., one caused by execution of this instruction). This |
803 | * will devolve into a panic if we try to kill init or are in |
804 | * an interrupt etc. |
805 | * |
806 | * TODO: Queue up this address for hwpoisioning later. |
807 | * TODO: This is not quite right for d-side machine |
808 | * checks ->nip is not necessarily the important |
809 | * address. |
810 | */ |
811 | if ((user_mode(regs))) { |
812 | _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); |
813 | recovered = 1; |
814 | } else if (die_will_crash()) { |
815 | /* |
816 | * die() would kill the kernel, so better to go via |
817 | * the platform reboot code that will log the |
818 | * machine check. |
819 | */ |
820 | recovered = 0; |
821 | } else { |
822 | die_mce("Machine check" , regs, SIGBUS); |
823 | recovered = 1; |
824 | } |
825 | } |
826 | |
827 | return recovered; |
828 | } |
829 | |
830 | /* |
831 | * Handle a machine check. |
832 | * |
833 | * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) |
834 | * should be present. If so the handler which called us tells us if the |
835 | * error was recovered (never true if RI=0). |
836 | * |
837 | * On hardware prior to Power 4 these exceptions were asynchronous which |
838 | * means we can't tell exactly where it occurred and so we can't recover. |
839 | */ |
840 | int pSeries_machine_check_exception(struct pt_regs *regs) |
841 | { |
842 | struct machine_check_event evt; |
843 | |
844 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) |
845 | return 0; |
846 | |
847 | /* Print things out */ |
848 | if (evt.version != MCE_V1) { |
849 | pr_err("Machine Check Exception, Unknown event version %d !\n" , |
850 | evt.version); |
851 | return 0; |
852 | } |
853 | machine_check_print_event_info(&evt, user_mode(regs), false); |
854 | |
855 | if (recover_mce(regs, evt: &evt)) |
856 | return 1; |
857 | |
858 | return 0; |
859 | } |
860 | |
861 | long pseries_machine_check_realmode(struct pt_regs *regs) |
862 | { |
863 | struct rtas_error_log *errp; |
864 | int disposition; |
865 | |
866 | if (fwnmi_active) { |
867 | errp = fwnmi_get_errinfo(regs); |
868 | /* |
869 | * Call to fwnmi_release_errinfo() in real mode causes kernel |
870 | * to panic. Hence we will call it as soon as we go into |
871 | * virtual mode. |
872 | */ |
873 | disposition = mce_handle_error(regs, errp); |
874 | |
875 | fwnmi_release_errinfo(); |
876 | |
877 | if (disposition == RTAS_DISP_FULLY_RECOVERED) |
878 | return 1; |
879 | } |
880 | |
881 | return 0; |
882 | } |
883 | |