1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Machine check exception handling. |
4 | * |
5 | * Copyright 2013 IBM Corporation |
6 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> |
7 | */ |
8 | |
9 | #undef DEBUG |
10 | #define pr_fmt(fmt) "mce: " fmt |
11 | |
12 | #include <linux/hardirq.h> |
13 | #include <linux/types.h> |
14 | #include <linux/ptrace.h> |
15 | #include <linux/percpu.h> |
16 | #include <linux/export.h> |
17 | #include <linux/irq_work.h> |
18 | #include <linux/extable.h> |
19 | #include <linux/ftrace.h> |
20 | #include <linux/memblock.h> |
21 | #include <linux/of.h> |
22 | |
23 | #include <asm/interrupt.h> |
24 | #include <asm/machdep.h> |
25 | #include <asm/mce.h> |
26 | #include <asm/nmi.h> |
27 | |
28 | #include "setup.h" |
29 | |
30 | static void machine_check_ue_event(struct machine_check_event *evt); |
31 | static void machine_process_ue_event(struct work_struct *work); |
32 | |
33 | static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); |
34 | |
35 | static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); |
36 | |
37 | int mce_register_notifier(struct notifier_block *nb) |
38 | { |
39 | return blocking_notifier_chain_register(nh: &mce_notifier_list, nb); |
40 | } |
41 | EXPORT_SYMBOL_GPL(mce_register_notifier); |
42 | |
43 | int mce_unregister_notifier(struct notifier_block *nb) |
44 | { |
45 | return blocking_notifier_chain_unregister(nh: &mce_notifier_list, nb); |
46 | } |
47 | EXPORT_SYMBOL_GPL(mce_unregister_notifier); |
48 | |
49 | static void mce_set_error_info(struct machine_check_event *mce, |
50 | struct mce_error_info *mce_err) |
51 | { |
52 | mce->error_type = mce_err->error_type; |
53 | switch (mce_err->error_type) { |
54 | case MCE_ERROR_TYPE_UE: |
55 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; |
56 | break; |
57 | case MCE_ERROR_TYPE_SLB: |
58 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; |
59 | break; |
60 | case MCE_ERROR_TYPE_ERAT: |
61 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; |
62 | break; |
63 | case MCE_ERROR_TYPE_TLB: |
64 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; |
65 | break; |
66 | case MCE_ERROR_TYPE_USER: |
67 | mce->u.user_error.user_error_type = mce_err->u.user_error_type; |
68 | break; |
69 | case MCE_ERROR_TYPE_RA: |
70 | mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; |
71 | break; |
72 | case MCE_ERROR_TYPE_LINK: |
73 | mce->u.link_error.link_error_type = mce_err->u.link_error_type; |
74 | break; |
75 | case MCE_ERROR_TYPE_UNKNOWN: |
76 | default: |
77 | break; |
78 | } |
79 | } |
80 | |
81 | void mce_irq_work_queue(void) |
82 | { |
83 | /* Raise decrementer interrupt */ |
84 | arch_irq_work_raise(); |
85 | set_mce_pending_irq_work(); |
86 | } |
87 | |
88 | /* |
89 | * Decode and save high level MCE information into per cpu buffer which |
90 | * is an array of machine_check_event structure. |
91 | */ |
92 | void save_mce_event(struct pt_regs *regs, long handled, |
93 | struct mce_error_info *mce_err, |
94 | uint64_t nip, uint64_t addr, uint64_t phys_addr) |
95 | { |
96 | int index = local_paca->mce_info->mce_nest_count++; |
97 | struct machine_check_event *mce; |
98 | |
99 | mce = &local_paca->mce_info->mce_event[index]; |
100 | /* |
101 | * Return if we don't have enough space to log mce event. |
102 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, |
103 | * the check below will stop buffer overrun. |
104 | */ |
105 | if (index >= MAX_MC_EVT) |
106 | return; |
107 | |
108 | /* Populate generic machine check info */ |
109 | mce->version = MCE_V1; |
110 | mce->srr0 = nip; |
111 | mce->srr1 = regs->msr; |
112 | mce->gpr3 = regs->gpr[3]; |
113 | mce->in_use = 1; |
114 | mce->cpu = get_paca()->paca_index; |
115 | |
116 | /* Mark it recovered if we have handled it and MSR(RI=1). */ |
117 | if (handled && (regs->msr & MSR_RI)) |
118 | mce->disposition = MCE_DISPOSITION_RECOVERED; |
119 | else |
120 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; |
121 | |
122 | mce->initiator = mce_err->initiator; |
123 | mce->severity = mce_err->severity; |
124 | mce->sync_error = mce_err->sync_error; |
125 | mce->error_class = mce_err->error_class; |
126 | |
127 | /* |
128 | * Populate the mce error_type and type-specific error_type. |
129 | */ |
130 | mce_set_error_info(mce, mce_err); |
131 | if (mce->error_type == MCE_ERROR_TYPE_UE) |
132 | mce->u.ue_error.ignore_event = mce_err->ignore_event; |
133 | |
134 | /* |
135 | * Raise irq work, So that we don't miss to log the error for |
136 | * unrecoverable errors. |
137 | */ |
138 | if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED) |
139 | mce_irq_work_queue(); |
140 | |
141 | if (!addr) |
142 | return; |
143 | |
144 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { |
145 | mce->u.tlb_error.effective_address_provided = true; |
146 | mce->u.tlb_error.effective_address = addr; |
147 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { |
148 | mce->u.slb_error.effective_address_provided = true; |
149 | mce->u.slb_error.effective_address = addr; |
150 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { |
151 | mce->u.erat_error.effective_address_provided = true; |
152 | mce->u.erat_error.effective_address = addr; |
153 | } else if (mce->error_type == MCE_ERROR_TYPE_USER) { |
154 | mce->u.user_error.effective_address_provided = true; |
155 | mce->u.user_error.effective_address = addr; |
156 | } else if (mce->error_type == MCE_ERROR_TYPE_RA) { |
157 | mce->u.ra_error.effective_address_provided = true; |
158 | mce->u.ra_error.effective_address = addr; |
159 | } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { |
160 | mce->u.link_error.effective_address_provided = true; |
161 | mce->u.link_error.effective_address = addr; |
162 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { |
163 | mce->u.ue_error.effective_address_provided = true; |
164 | mce->u.ue_error.effective_address = addr; |
165 | if (phys_addr != ULONG_MAX) { |
166 | mce->u.ue_error.physical_address_provided = true; |
167 | mce->u.ue_error.physical_address = phys_addr; |
168 | machine_check_ue_event(evt: mce); |
169 | } |
170 | } |
171 | return; |
172 | } |
173 | |
174 | /* |
175 | * get_mce_event: |
176 | * mce Pointer to machine_check_event structure to be filled. |
177 | * release Flag to indicate whether to free the event slot or not. |
178 | * 0 <= do not release the mce event. Caller will invoke |
179 | * release_mce_event() once event has been consumed. |
180 | * 1 <= release the slot. |
181 | * |
182 | * return 1 = success |
183 | * 0 = failure |
184 | * |
185 | * get_mce_event() will be called by platform specific machine check |
186 | * handle routine and in KVM. |
187 | * When we call get_mce_event(), we are still in interrupt context and |
188 | * preemption will not be scheduled until ret_from_expect() routine |
189 | * is called. |
190 | */ |
191 | int get_mce_event(struct machine_check_event *mce, bool release) |
192 | { |
193 | int index = local_paca->mce_info->mce_nest_count - 1; |
194 | struct machine_check_event *mc_evt; |
195 | int ret = 0; |
196 | |
197 | /* Sanity check */ |
198 | if (index < 0) |
199 | return ret; |
200 | |
201 | /* Check if we have MCE info to process. */ |
202 | if (index < MAX_MC_EVT) { |
203 | mc_evt = &local_paca->mce_info->mce_event[index]; |
204 | /* Copy the event structure and release the original */ |
205 | if (mce) |
206 | *mce = *mc_evt; |
207 | if (release) |
208 | mc_evt->in_use = 0; |
209 | ret = 1; |
210 | } |
211 | /* Decrement the count to free the slot. */ |
212 | if (release) |
213 | local_paca->mce_info->mce_nest_count--; |
214 | |
215 | return ret; |
216 | } |
217 | |
218 | void release_mce_event(void) |
219 | { |
220 | get_mce_event(NULL, release: true); |
221 | } |
222 | |
223 | static void machine_check_ue_work(void) |
224 | { |
225 | schedule_work(work: &mce_ue_event_work); |
226 | } |
227 | |
228 | /* |
229 | * Queue up the MCE event which then can be handled later. |
230 | */ |
231 | static void machine_check_ue_event(struct machine_check_event *evt) |
232 | { |
233 | int index; |
234 | |
235 | index = local_paca->mce_info->mce_ue_count++; |
236 | /* If queue is full, just return for now. */ |
237 | if (index >= MAX_MC_EVT) { |
238 | local_paca->mce_info->mce_ue_count--; |
239 | return; |
240 | } |
241 | memcpy(&local_paca->mce_info->mce_ue_event_queue[index], |
242 | evt, sizeof(*evt)); |
243 | } |
244 | |
245 | /* |
246 | * Queue up the MCE event which then can be handled later. |
247 | */ |
248 | void machine_check_queue_event(void) |
249 | { |
250 | int index; |
251 | struct machine_check_event evt; |
252 | |
253 | if (!get_mce_event(mce: &evt, release: MCE_EVENT_RELEASE)) |
254 | return; |
255 | |
256 | index = local_paca->mce_info->mce_queue_count++; |
257 | /* If queue is full, just return for now. */ |
258 | if (index >= MAX_MC_EVT) { |
259 | local_paca->mce_info->mce_queue_count--; |
260 | return; |
261 | } |
262 | memcpy(&local_paca->mce_info->mce_event_queue[index], |
263 | &evt, sizeof(evt)); |
264 | |
265 | mce_irq_work_queue(); |
266 | } |
267 | |
268 | void mce_common_process_ue(struct pt_regs *regs, |
269 | struct mce_error_info *mce_err) |
270 | { |
271 | const struct exception_table_entry *entry; |
272 | |
273 | entry = search_kernel_exception_table(addr: regs->nip); |
274 | if (entry) { |
275 | mce_err->ignore_event = true; |
276 | regs_set_return_ip(regs, extable_fixup(entry)); |
277 | } |
278 | } |
279 | |
280 | /* |
281 | * process pending MCE event from the mce event queue. This function will be |
282 | * called during syscall exit. |
283 | */ |
284 | static void machine_process_ue_event(struct work_struct *work) |
285 | { |
286 | int index; |
287 | struct machine_check_event *evt; |
288 | |
289 | while (local_paca->mce_info->mce_ue_count > 0) { |
290 | index = local_paca->mce_info->mce_ue_count - 1; |
291 | evt = &local_paca->mce_info->mce_ue_event_queue[index]; |
292 | blocking_notifier_call_chain(nh: &mce_notifier_list, val: 0, v: evt); |
293 | #ifdef CONFIG_MEMORY_FAILURE |
294 | /* |
295 | * This should probably queued elsewhere, but |
296 | * oh! well |
297 | * |
298 | * Don't report this machine check because the caller has a |
299 | * asked us to ignore the event, it has a fixup handler which |
300 | * will do the appropriate error handling and reporting. |
301 | */ |
302 | if (evt->error_type == MCE_ERROR_TYPE_UE) { |
303 | if (evt->u.ue_error.ignore_event) { |
304 | local_paca->mce_info->mce_ue_count--; |
305 | continue; |
306 | } |
307 | |
308 | if (evt->u.ue_error.physical_address_provided) { |
309 | unsigned long pfn; |
310 | |
311 | pfn = evt->u.ue_error.physical_address >> |
312 | PAGE_SHIFT; |
313 | memory_failure(pfn, flags: 0); |
314 | } else |
315 | pr_warn("Failed to identify bad address from " |
316 | "where the uncorrectable error (UE) " |
317 | "was generated\n" ); |
318 | } |
319 | #endif |
320 | local_paca->mce_info->mce_ue_count--; |
321 | } |
322 | } |
323 | /* |
324 | * process pending MCE event from the mce event queue. This function will be |
325 | * called during syscall exit. |
326 | */ |
327 | static void machine_check_process_queued_event(void) |
328 | { |
329 | int index; |
330 | struct machine_check_event *evt; |
331 | |
332 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
333 | |
334 | /* |
335 | * For now just print it to console. |
336 | * TODO: log this error event to FSP or nvram. |
337 | */ |
338 | while (local_paca->mce_info->mce_queue_count > 0) { |
339 | index = local_paca->mce_info->mce_queue_count - 1; |
340 | evt = &local_paca->mce_info->mce_event_queue[index]; |
341 | |
342 | if (evt->error_type == MCE_ERROR_TYPE_UE && |
343 | evt->u.ue_error.ignore_event) { |
344 | local_paca->mce_info->mce_queue_count--; |
345 | continue; |
346 | } |
347 | machine_check_print_event_info(evt, false, false); |
348 | local_paca->mce_info->mce_queue_count--; |
349 | } |
350 | } |
351 | |
352 | void set_mce_pending_irq_work(void) |
353 | { |
354 | local_paca->mce_pending_irq_work = 1; |
355 | } |
356 | |
357 | void clear_mce_pending_irq_work(void) |
358 | { |
359 | local_paca->mce_pending_irq_work = 0; |
360 | } |
361 | |
362 | void mce_run_irq_context_handlers(void) |
363 | { |
364 | if (unlikely(local_paca->mce_pending_irq_work)) { |
365 | if (ppc_md.machine_check_log_err) |
366 | ppc_md.machine_check_log_err(); |
367 | machine_check_process_queued_event(); |
368 | machine_check_ue_work(); |
369 | clear_mce_pending_irq_work(); |
370 | } |
371 | } |
372 | |
373 | void machine_check_print_event_info(struct machine_check_event *evt, |
374 | bool user_mode, bool in_guest) |
375 | { |
376 | const char *level, *sevstr, *subtype, *err_type, *initiator; |
377 | uint64_t ea = 0, pa = 0; |
378 | int n = 0; |
379 | char dar_str[50]; |
380 | char pa_str[50]; |
381 | static const char *mc_ue_types[] = { |
382 | "Indeterminate" , |
383 | "Instruction fetch" , |
384 | "Page table walk ifetch" , |
385 | "Load/Store" , |
386 | "Page table walk Load/Store" , |
387 | }; |
388 | static const char *mc_slb_types[] = { |
389 | "Indeterminate" , |
390 | "Parity" , |
391 | "Multihit" , |
392 | }; |
393 | static const char *mc_erat_types[] = { |
394 | "Indeterminate" , |
395 | "Parity" , |
396 | "Multihit" , |
397 | }; |
398 | static const char *mc_tlb_types[] = { |
399 | "Indeterminate" , |
400 | "Parity" , |
401 | "Multihit" , |
402 | }; |
403 | static const char *mc_user_types[] = { |
404 | "Indeterminate" , |
405 | "tlbie(l) invalid" , |
406 | "scv invalid" , |
407 | }; |
408 | static const char *mc_ra_types[] = { |
409 | "Indeterminate" , |
410 | "Instruction fetch (bad)" , |
411 | "Instruction fetch (foreign/control memory)" , |
412 | "Page table walk ifetch (bad)" , |
413 | "Page table walk ifetch (foreign/control memory)" , |
414 | "Load (bad)" , |
415 | "Store (bad)" , |
416 | "Page table walk Load/Store (bad)" , |
417 | "Page table walk Load/Store (foreign/control memory)" , |
418 | "Load/Store (foreign/control memory)" , |
419 | }; |
420 | static const char *mc_link_types[] = { |
421 | "Indeterminate" , |
422 | "Instruction fetch (timeout)" , |
423 | "Page table walk ifetch (timeout)" , |
424 | "Load (timeout)" , |
425 | "Store (timeout)" , |
426 | "Page table walk Load/Store (timeout)" , |
427 | }; |
428 | static const char *mc_error_class[] = { |
429 | "Unknown" , |
430 | "Hardware error" , |
431 | "Probable Hardware error (some chance of software cause)" , |
432 | "Software error" , |
433 | "Probable Software error (some chance of hardware cause)" , |
434 | }; |
435 | |
436 | /* Print things out */ |
437 | if (evt->version != MCE_V1) { |
438 | pr_err("Machine Check Exception, Unknown event version %d !\n" , |
439 | evt->version); |
440 | return; |
441 | } |
442 | switch (evt->severity) { |
443 | case MCE_SEV_NO_ERROR: |
444 | level = KERN_INFO; |
445 | sevstr = "Harmless" ; |
446 | break; |
447 | case MCE_SEV_WARNING: |
448 | level = KERN_WARNING; |
449 | sevstr = "Warning" ; |
450 | break; |
451 | case MCE_SEV_SEVERE: |
452 | level = KERN_ERR; |
453 | sevstr = "Severe" ; |
454 | break; |
455 | case MCE_SEV_FATAL: |
456 | default: |
457 | level = KERN_ERR; |
458 | sevstr = "Fatal" ; |
459 | break; |
460 | } |
461 | |
462 | switch(evt->initiator) { |
463 | case MCE_INITIATOR_CPU: |
464 | initiator = "CPU" ; |
465 | break; |
466 | case MCE_INITIATOR_PCI: |
467 | initiator = "PCI" ; |
468 | break; |
469 | case MCE_INITIATOR_ISA: |
470 | initiator = "ISA" ; |
471 | break; |
472 | case MCE_INITIATOR_MEMORY: |
473 | initiator = "Memory" ; |
474 | break; |
475 | case MCE_INITIATOR_POWERMGM: |
476 | initiator = "Power Management" ; |
477 | break; |
478 | case MCE_INITIATOR_UNKNOWN: |
479 | default: |
480 | initiator = "Unknown" ; |
481 | break; |
482 | } |
483 | |
484 | switch (evt->error_type) { |
485 | case MCE_ERROR_TYPE_UE: |
486 | err_type = "UE" ; |
487 | subtype = evt->u.ue_error.ue_error_type < |
488 | ARRAY_SIZE(mc_ue_types) ? |
489 | mc_ue_types[evt->u.ue_error.ue_error_type] |
490 | : "Unknown" ; |
491 | if (evt->u.ue_error.effective_address_provided) |
492 | ea = evt->u.ue_error.effective_address; |
493 | if (evt->u.ue_error.physical_address_provided) |
494 | pa = evt->u.ue_error.physical_address; |
495 | break; |
496 | case MCE_ERROR_TYPE_SLB: |
497 | err_type = "SLB" ; |
498 | subtype = evt->u.slb_error.slb_error_type < |
499 | ARRAY_SIZE(mc_slb_types) ? |
500 | mc_slb_types[evt->u.slb_error.slb_error_type] |
501 | : "Unknown" ; |
502 | if (evt->u.slb_error.effective_address_provided) |
503 | ea = evt->u.slb_error.effective_address; |
504 | break; |
505 | case MCE_ERROR_TYPE_ERAT: |
506 | err_type = "ERAT" ; |
507 | subtype = evt->u.erat_error.erat_error_type < |
508 | ARRAY_SIZE(mc_erat_types) ? |
509 | mc_erat_types[evt->u.erat_error.erat_error_type] |
510 | : "Unknown" ; |
511 | if (evt->u.erat_error.effective_address_provided) |
512 | ea = evt->u.erat_error.effective_address; |
513 | break; |
514 | case MCE_ERROR_TYPE_TLB: |
515 | err_type = "TLB" ; |
516 | subtype = evt->u.tlb_error.tlb_error_type < |
517 | ARRAY_SIZE(mc_tlb_types) ? |
518 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] |
519 | : "Unknown" ; |
520 | if (evt->u.tlb_error.effective_address_provided) |
521 | ea = evt->u.tlb_error.effective_address; |
522 | break; |
523 | case MCE_ERROR_TYPE_USER: |
524 | err_type = "User" ; |
525 | subtype = evt->u.user_error.user_error_type < |
526 | ARRAY_SIZE(mc_user_types) ? |
527 | mc_user_types[evt->u.user_error.user_error_type] |
528 | : "Unknown" ; |
529 | if (evt->u.user_error.effective_address_provided) |
530 | ea = evt->u.user_error.effective_address; |
531 | break; |
532 | case MCE_ERROR_TYPE_RA: |
533 | err_type = "Real address" ; |
534 | subtype = evt->u.ra_error.ra_error_type < |
535 | ARRAY_SIZE(mc_ra_types) ? |
536 | mc_ra_types[evt->u.ra_error.ra_error_type] |
537 | : "Unknown" ; |
538 | if (evt->u.ra_error.effective_address_provided) |
539 | ea = evt->u.ra_error.effective_address; |
540 | break; |
541 | case MCE_ERROR_TYPE_LINK: |
542 | err_type = "Link" ; |
543 | subtype = evt->u.link_error.link_error_type < |
544 | ARRAY_SIZE(mc_link_types) ? |
545 | mc_link_types[evt->u.link_error.link_error_type] |
546 | : "Unknown" ; |
547 | if (evt->u.link_error.effective_address_provided) |
548 | ea = evt->u.link_error.effective_address; |
549 | break; |
550 | case MCE_ERROR_TYPE_DCACHE: |
551 | err_type = "D-Cache" ; |
552 | subtype = "Unknown" ; |
553 | break; |
554 | case MCE_ERROR_TYPE_ICACHE: |
555 | err_type = "I-Cache" ; |
556 | subtype = "Unknown" ; |
557 | break; |
558 | default: |
559 | case MCE_ERROR_TYPE_UNKNOWN: |
560 | err_type = "Unknown" ; |
561 | subtype = "" ; |
562 | break; |
563 | } |
564 | |
565 | dar_str[0] = pa_str[0] = '\0'; |
566 | if (ea && evt->srr0 != ea) { |
567 | /* Load/Store address */ |
568 | n = sprintf(buf: dar_str, fmt: "DAR: %016llx " , ea); |
569 | if (pa) |
570 | sprintf(buf: dar_str + n, fmt: "paddr: %016llx " , pa); |
571 | } else if (pa) { |
572 | sprintf(buf: pa_str, fmt: " paddr: %016llx" , pa); |
573 | } |
574 | |
575 | printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n" , |
576 | level, evt->cpu, sevstr, in_guest ? "Guest" : "" , |
577 | err_type, subtype, dar_str, |
578 | evt->disposition == MCE_DISPOSITION_RECOVERED ? |
579 | "Recovered" : "Not recovered" ); |
580 | |
581 | if (in_guest || user_mode) { |
582 | printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n" , |
583 | level, evt->cpu, current->pid, current->comm, |
584 | in_guest ? "Guest " : "" , evt->srr0, pa_str); |
585 | } else { |
586 | printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n" , |
587 | level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); |
588 | } |
589 | |
590 | printk("%sMCE: CPU%d: Initiator %s\n" , level, evt->cpu, initiator); |
591 | |
592 | subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? |
593 | mc_error_class[evt->error_class] : "Unknown" ; |
594 | printk("%sMCE: CPU%d: %s\n" , level, evt->cpu, subtype); |
595 | |
596 | #ifdef CONFIG_PPC_64S_HASH_MMU |
597 | /* Display faulty slb contents for SLB errors. */ |
598 | if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) |
599 | slb_dump_contents(local_paca->mce_faulty_slbs); |
600 | #endif |
601 | } |
602 | EXPORT_SYMBOL_GPL(machine_check_print_event_info); |
603 | |
604 | /* |
605 | * This function is called in real mode. Strictly no printk's please. |
606 | * |
607 | * regs->nip and regs->msr contains srr0 and ssr1. |
608 | */ |
609 | DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) |
610 | { |
611 | long handled = 0; |
612 | |
613 | hv_nmi_check_nonrecoverable(regs); |
614 | |
615 | /* |
616 | * See if platform is capable of handling machine check. |
617 | */ |
618 | if (ppc_md.machine_check_early) |
619 | handled = ppc_md.machine_check_early(regs); |
620 | |
621 | return handled; |
622 | } |
623 | |
624 | /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ |
625 | static enum { |
626 | DTRIG_UNKNOWN, |
627 | DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ |
628 | DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ |
629 | } hmer_debug_trig_function; |
630 | |
631 | static int init_debug_trig_function(void) |
632 | { |
633 | int pvr; |
634 | struct device_node *cpun; |
635 | struct property *prop = NULL; |
636 | const char *str; |
637 | |
638 | /* First look in the device tree */ |
639 | preempt_disable(); |
640 | cpun = of_get_cpu_node(smp_processor_id(), NULL); |
641 | if (cpun) { |
642 | of_property_for_each_string(cpun, "ibm,hmi-special-triggers" , |
643 | prop, str) { |
644 | if (strcmp(str, "bit17-vector-ci-load" ) == 0) |
645 | hmer_debug_trig_function = DTRIG_VECTOR_CI; |
646 | else if (strcmp(str, "bit17-tm-suspend-escape" ) == 0) |
647 | hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; |
648 | } |
649 | of_node_put(node: cpun); |
650 | } |
651 | preempt_enable(); |
652 | |
653 | /* If we found the property, don't look at PVR */ |
654 | if (prop) |
655 | goto out; |
656 | |
657 | pvr = mfspr(SPRN_PVR); |
658 | /* Check for POWER9 Nimbus (scale-out) */ |
659 | if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { |
660 | /* DD2.2 and later */ |
661 | if ((pvr & 0xfff) >= 0x202) |
662 | hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; |
663 | /* DD2.0 and DD2.1 - used for vector CI load emulation */ |
664 | else if ((pvr & 0xfff) >= 0x200) |
665 | hmer_debug_trig_function = DTRIG_VECTOR_CI; |
666 | } |
667 | |
668 | out: |
669 | switch (hmer_debug_trig_function) { |
670 | case DTRIG_VECTOR_CI: |
671 | pr_debug("HMI debug trigger used for vector CI load\n" ); |
672 | break; |
673 | case DTRIG_SUSPEND_ESCAPE: |
674 | pr_debug("HMI debug trigger used for TM suspend escape\n" ); |
675 | break; |
676 | default: |
677 | break; |
678 | } |
679 | return 0; |
680 | } |
681 | __initcall(init_debug_trig_function); |
682 | |
683 | /* |
684 | * Handle HMIs that occur as a result of a debug trigger. |
685 | * Return values: |
686 | * -1 means this is not a HMI cause that we know about |
687 | * 0 means no further handling is required |
688 | * 1 means further handling is required |
689 | */ |
690 | long hmi_handle_debugtrig(struct pt_regs *regs) |
691 | { |
692 | unsigned long hmer = mfspr(SPRN_HMER); |
693 | long ret = 0; |
694 | |
695 | /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ |
696 | if (!((hmer & HMER_DEBUG_TRIG) |
697 | && hmer_debug_trig_function != DTRIG_UNKNOWN)) |
698 | return -1; |
699 | |
700 | hmer &= ~HMER_DEBUG_TRIG; |
701 | /* HMER is a write-AND register */ |
702 | mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); |
703 | |
704 | switch (hmer_debug_trig_function) { |
705 | case DTRIG_VECTOR_CI: |
706 | /* |
707 | * Now to avoid problems with soft-disable we |
708 | * only do the emulation if we are coming from |
709 | * host user space |
710 | */ |
711 | if (regs && user_mode(regs)) |
712 | ret = local_paca->hmi_p9_special_emu = 1; |
713 | |
714 | break; |
715 | |
716 | default: |
717 | break; |
718 | } |
719 | |
720 | /* |
721 | * See if any other HMI causes remain to be handled |
722 | */ |
723 | if (hmer & mfspr(SPRN_HMEER)) |
724 | return -1; |
725 | |
726 | return ret; |
727 | } |
728 | |
729 | /* |
730 | * Return values: |
731 | */ |
732 | DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode) |
733 | { |
734 | int ret; |
735 | |
736 | local_paca->hmi_irqs++; |
737 | |
738 | ret = hmi_handle_debugtrig(regs); |
739 | if (ret >= 0) |
740 | return ret; |
741 | |
742 | wait_for_subcore_guest_exit(); |
743 | |
744 | if (ppc_md.hmi_exception_early) |
745 | ppc_md.hmi_exception_early(regs); |
746 | |
747 | wait_for_tb_resync(); |
748 | |
749 | return 1; |
750 | } |
751 | |
752 | void __init mce_init(void) |
753 | { |
754 | struct mce_info *mce_info; |
755 | u64 limit; |
756 | int i; |
757 | |
758 | limit = min(ppc64_bolted_size(), ppc64_rma_size); |
759 | for_each_possible_cpu(i) { |
760 | mce_info = memblock_alloc_try_nid(sizeof(*mce_info), |
761 | __alignof__(*mce_info), |
762 | MEMBLOCK_LOW_LIMIT, |
763 | limit, early_cpu_to_node(i)); |
764 | if (!mce_info) |
765 | goto err; |
766 | paca_ptrs[i]->mce_info = mce_info; |
767 | } |
768 | return; |
769 | err: |
770 | panic(fmt: "Failed to allocate memory for MCE event data\n" ); |
771 | } |
772 | |