traps.c source code [linux/arch/powerpc/kernel/traps.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4	* Copyright 2007-2010 Freescale Semiconductor, Inc.
5	*
6	* Modified by Cort Dougan (cort@cs.nmt.edu)
7	* and Paul Mackerras (paulus@samba.org)
8	*/
9
10	/*
11	* This file handles the architecture-dependent parts of hardware exceptions
12	*/
13
14	#include <linux/errno.h>
15	#include <linux/sched.h>
16	#include <linux/sched/debug.h>
17	#include <linux/kernel.h>
18	#include <linux/mm.h>
19	#include <linux/pkeys.h>
20	#include <linux/stddef.h>
21	#include <linux/unistd.h>
22	#include <linux/ptrace.h>
23	#include <linux/user.h>
24	#include <linux/interrupt.h>
25	#include <linux/init.h>
26	#include <linux/extable.h>
27	#include <linux/module.h> /* print_modules */
28	#include <linux/prctl.h>
29	#include <linux/delay.h>
30	#include <linux/kprobes.h>
31	#include <linux/kexec.h>
32	#include <linux/backlight.h>
33	#include <linux/bug.h>
34	#include <linux/kdebug.h>
35	#include <linux/ratelimit.h>
36	#include <linux/context_tracking.h>
37	#include <linux/smp.h>
38	#include <linux/console.h>
39	#include <linux/kmsg_dump.h>
40	#include <linux/debugfs.h>
41
42	#include <asm/emulated_ops.h>
43	#include <linux/uaccess.h>
44	#include <asm/interrupt.h>
45	#include <asm/io.h>
46	#include <asm/machdep.h>
47	#include <asm/rtas.h>
48	#include <asm/pmc.h>
49	#include <asm/reg.h>
50	#ifdef CONFIG_PMAC_BACKLIGHT
51	#include <asm/backlight.h>
52	#endif
53	#ifdef CONFIG_PPC64
54	#include <asm/firmware.h>
55	#include <asm/processor.h>
56	#endif
57	#include <asm/kexec.h>
58	#include <asm/ppc-opcode.h>
59	#include <asm/rio.h>
60	#include <asm/fadump.h>
61	#include <asm/switch_to.h>
62	#include <asm/tm.h>
63	#include <asm/debug.h>
64	#include <asm/asm-prototypes.h>
65	#include <asm/hmi.h>
66	#include <sysdev/fsl_pci.h>
67	#include <asm/kprobes.h>
68	#include <asm/stacktrace.h>
69	#include <asm/nmi.h>
70	#include <asm/disassemble.h>
71	#include <asm/udbg.h>
72
73	#if defined(CONFIG_DEBUGGER) \|\| defined(CONFIG_KEXEC_CORE)
74	int (__debugger)(struct* pt_regs *regs) __read_mostly;
75	int (__debugger_ipi)(struct* pt_regs *regs) __read_mostly;
76	int (__debugger_bpt)(struct* pt_regs *regs) __read_mostly;
77	int (__debugger_sstep)(struct* pt_regs *regs) __read_mostly;
78	int (__debugger_iabr_match)(struct* pt_regs *regs) __read_mostly;
79	int (__debugger_break_match)(struct* pt_regs *regs) __read_mostly;
80	int (__debugger_fault_handler)(struct* pt_regs *regs) __read_mostly;
81
82	EXPORT_SYMBOL(__debugger);
83	EXPORT_SYMBOL(__debugger_ipi);
84	EXPORT_SYMBOL(__debugger_bpt);
85	EXPORT_SYMBOL(__debugger_sstep);
86	EXPORT_SYMBOL(__debugger_iabr_match);
87	EXPORT_SYMBOL(__debugger_break_match);
88	EXPORT_SYMBOL(__debugger_fault_handler);
89	#endif
90
91	/ Transactional Memory trap debug /
92	#ifdef TM_DEBUG_SW
93	#define TM_DEBUG(x...) printk(KERN_INFO x)
94	#else
95	#define TM_DEBUG(x...) do { } while(0)
96	#endif
97
98	static const char signame(int* signr)
99	{
100	switch (signr) {
101	case SIGBUS: return "bus error";
102	case SIGFPE: return "floating point exception";
103	case SIGILL: return "illegal instruction";
104	case SIGSEGV: return "segfault";
105	case SIGTRAP: return "unhandled trap";
106	}
107
108	return "unknown signal";
109	}
110
111	/*
112	* Trap & Exception support
113	*/
114
115	#ifdef CONFIG_PMAC_BACKLIGHT
116	static void pmac_backlight_unblank(void)
117	{
118	mutex_lock(&pmac_backlight_mutex);
119	if (pmac_backlight) {
120	struct backlight_properties *props;
121
122	props = &pmac_backlight->props;
123	props->brightness = props->max_brightness;
124	props->power = FB_BLANK_UNBLANK;
125	backlight_update_status(pmac_backlight);
126	}
127	mutex_unlock(&pmac_backlight_mutex);
128	}
129	#else
130	static inline void pmac_backlight_unblank(void) { }
131	#endif
132
133	/*
134	* If oops/die is expected to crash the machine, return true here.
135	*
136	* This should not be expected to be 100% accurate, there may be
137	* notifiers registered or other unexpected conditions that may bring
138	* down the kernel. Or if the current process in the kernel is holding
139	* locks or has other critical state, the kernel may become effectively
140	* unusable anyway.
141	*/
142	bool die_will_crash(void)
143	{
144	if (should_fadump_crash())
145	return true;
146	if (kexec_should_crash(current))
147	return true;
148	if (in_interrupt() \|\| panic_on_oops \|\|
149	!current->pid \|\| is_global_init(current))
150	return true;
151
152	return false;
153	}
154
155	static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
156	static int die_owner = -`1`;
157	static unsigned int die_nest_count;
158	static int die_counter;
159
160	void panic_flush_kmsg_start(void)
161	{
162	/*
163	* These are mostly taken from kernel/panic.c, but tries to do
164	* relatively minimal work. Don't use delay functions (TB may
165	* be broken), don't crash dump (need to set a firmware log),
166	* don't run notifiers. We do want to get some information to
167	* Linux console.
168	*/
169	console_verbose();
170	bust_spinlocks(yes: `1`);
171	}
172
173	void panic_flush_kmsg_end(void)
174	{
175	kmsg_dump(reason: KMSG_DUMP_PANIC);
176	bust_spinlocks(yes: `0`);
177	debug_locks_off();
178	console_flush_on_panic(mode: CONSOLE_FLUSH_PENDING);
179	}
180
181	static unsigned long oops_begin(struct pt_regs *regs)
182	{
183	int cpu;
184	unsigned long flags;
185
186	oops_enter();
187
188	/ racy, but better than risking deadlock. /
189	raw_local_irq_save(flags);
190	cpu = smp_processor_id();
191	if (!arch_spin_trylock(&die_lock)) {
192	if (cpu == die_owner)
193	/ nested oops. should stop eventually /;
194	else
195	arch_spin_lock(&die_lock);
196	}
197	die_nest_count++;
198	die_owner = cpu;
199	console_verbose();
200	bust_spinlocks(yes: `1`);
201	if (machine_is(powermac))
202	pmac_backlight_unblank();
203	return flags;
204	}
205	NOKPROBE_SYMBOL(oops_begin);
206
207	static void oops_end(unsigned long flags, struct pt_regs *regs,
208	int signr)
209	{
210	bust_spinlocks(yes: `0`);
211	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
212	die_nest_count--;
213	oops_exit();
214	printk("\n");
215	if (!die_nest_count) {
216	/ Nest count reaches zero, release the lock. /
217	die_owner = -`1`;
218	arch_spin_unlock(&die_lock);
219	}
220	raw_local_irq_restore(flags);
221
222	/*
223	* system_reset_excption handles debugger, crash dump, panic, for 0x100
224	*/
225	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
226	return;
227
228	crash_fadump(regs, "die oops");
229
230	if (kexec_should_crash(current))
231	crash_kexec(regs);
232
233	if (!signr)
234	return;
235
236	/*
237	* While our oops output is serialised by a spinlock, output
238	* from panic() called below can race and corrupt it. If we
239	* know we are going to panic, delay for 1 second so we have a
240	* chance to get clean backtraces from all CPUs that are oopsing.
241	*/
242	if (in_interrupt() \|\| panic_on_oops \|\| !current->pid \|\|
243	is_global_init(current)) {
244	mdelay(MSEC_PER_SEC);
245	}
246
247	if (panic_on_oops)
248	panic(fmt: "Fatal exception");
249	make_task_dead(signr);
250	}
251	NOKPROBE_SYMBOL(oops_end);
252
253	static char get_mmu_str(void*)
254	{
255	if (early_radix_enabled())
256	return " MMU=Radix";
257	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
258	return " MMU=Hash";
259	return "";
260	}
261
262	static int __die(const char str, struct* pt_regs regs, long* err)
263	{
264	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
265
266	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
267	IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
268	PAGE_SIZE / `1024`, get_mmu_str(),
269	IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
270	IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
271	IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
272	debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
273	IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
274	ppc_md.name ? ppc_md.name : "");
275
276	if (notify_die(val: DIE_OOPS, str, regs, err, trap: `255`, SIGSEGV) == NOTIFY_STOP)
277	return `1`;
278
279	print_modules();
280	show_regs(regs);
281
282	return `0`;
283	}
284	NOKPROBE_SYMBOL(__die);
285
286	void die(const char str, struct* pt_regs regs, long* err)
287	{
288	unsigned long flags;
289
290	/*
291	* system_reset_excption handles debugger, crash dump, panic, for 0x100
292	*/
293	if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
294	if (debugger(regs))
295	return;
296	}
297
298	flags = oops_begin(regs);
299	if (__die(str, regs, err))
300	err = `0`;
301	oops_end(flags, regs, signr: err);
302	}
303	NOKPROBE_SYMBOL(die);
304
305	void user_single_step_report(struct pt_regs *regs)
306	{
307	force_sig_fault(SIGTRAP, TRAP_TRACE, addr: (void __user *)regs->nip);
308	}
309
310	static void show_signal_msg(int signr, struct pt_regs regs, int* code,
311	unsigned long addr)
312	{
313	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
314	DEFAULT_RATELIMIT_BURST);
315
316	if (!show_unhandled_signals)
317	return;
318
319	if (!unhandled_signal(current, sig: signr))
320	return;
321
322	if (!__ratelimit(&rs))
323	return;
324
325	pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
326	current->comm, current->pid, signame(signr), signr,
327	addr, regs->nip, regs->link, code);
328
329	print_vma_addr(KERN_CONT " in ", rip: regs->nip);
330
331	pr_cont("\n");
332
333	show_user_instructions(regs);
334	}
335
336	static bool exception_common(int signr, struct pt_regs regs, int* code,
337	unsigned long addr)
338	{
339	if (!user_mode(regs)) {
340	die(str: "Exception in kernel mode", regs, err: signr);
341	return false;
342	}
343
344	/*
345	* Must not enable interrupts even for user-mode exception, because
346	* this can be called from machine check, which may be a NMI or IRQ
347	* which don't like interrupts being enabled. Could check for
348	* in_hardirq \|\| in_nmi perhaps, but there doesn't seem to be a good
349	* reason why _exception() should enable irqs for an exception handler,
350	* the handlers themselves do that directly.
351	*/
352
353	show_signal_msg(signr, regs, code, addr);
354
355	current->thread.trap_nr = code;
356
357	return true;
358	}
359
360	void _exception_pkey(struct pt_regs regs, unsigned* long addr, int key)
361	{
362	if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
363	return;
364
365	force_sig_pkuerr(addr: (void __user *) addr, pkey: key);
366	}
367
368	void _exception(int signr, struct pt_regs regs, int* code, unsigned long addr)
369	{
370	if (!exception_common(signr, regs, code, addr))
371	return;
372
373	force_sig_fault(sig: signr, code, addr: (void __user *)addr);
374	}
375
376	/*
377	* The interrupt architecture has a quirk in that the HV interrupts excluding
378	* the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
379	* that an interrupt handler must do is save off a GPR into a scratch register,
380	* and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
381	* Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
382	* that it is non-reentrant, which leads to random data corruption.
383	*
384	* The solution is for NMI interrupts in HV mode to check if they originated
385	* from these critical HV interrupt regions. If so, then mark them not
386	* recoverable.
387	*
388	* An alternative would be for HV NMIs to use SPRG for scratch to avoid the
389	* HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
390	* guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
391	* that would work. However any other guest OS that may have the SPRG live
392	* and MSR[RI]=1 could encounter silent corruption.
393	*
394	* Builds that do not support KVM could take this second option to increase
395	* the recoverability of NMIs.
396	*/
397	noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
398	{
399	#ifdef CONFIG_PPC_POWERNV
400	unsigned long kbase = (unsigned long)_stext;
401	unsigned long nip = regs->nip;
402
403	if (!(regs->msr & MSR_RI))
404	return;
405	if (!(regs->msr & MSR_HV))
406	return;
407	if (user_mode(regs))
408	return;
409
410	/*
411	* Now test if the interrupt has hit a range that may be using
412	* HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
413	* problem ranges all run un-relocated. Test real and virt modes
414	* at the same time by dropping the high bit of the nip (virt mode
415	* entry points still have the +0x4000 offset).
416	*/
417	nip &= ~`0xc000000000000000ULL`;
418	if ((nip >= `0x500` && nip < `0x600`) \|\| (nip >= `0x4500` && nip < `0x4600`))
419	goto nonrecoverable;
420	if ((nip >= `0x980` && nip < `0xa00`) \|\| (nip >= `0x4980` && nip < `0x4a00`))
421	goto nonrecoverable;
422	if ((nip >= `0xe00` && nip < `0xec0`) \|\| (nip >= `0x4e00` && nip < `0x4ec0`))
423	goto nonrecoverable;
424	if ((nip >= `0xf80` && nip < `0xfa0`) \|\| (nip >= `0x4f80` && nip < `0x4fa0`))
425	goto nonrecoverable;
426
427	/ Trampoline code runs un-relocated so subtract kbase. /
428	if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
429	nip < (unsigned long)(end_real_trampolines - kbase))
430	goto nonrecoverable;
431	if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
432	nip < (unsigned long)(end_virt_trampolines - kbase))
433	goto nonrecoverable;
434	return;
435
436	nonrecoverable:
437	regs->msr &= ~MSR_RI;
438	local_paca->hsrr_valid = `0`;
439	local_paca->srr_valid = `0`;
440	#endif
441	}
442	DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
443	{
444	unsigned long hsrr0, hsrr1;
445	bool saved_hsrrs = false;
446
447	/*
448	* System reset can interrupt code where HSRRs are live and MSR[RI]=1.
449	* The system reset interrupt itself may clobber HSRRs (e.g., to call
450	* OPAL), so save them here and restore them before returning.
451	*
452	* Machine checks don't need to save HSRRs, as the real mode handler
453	* is careful to avoid them, and the regular handler is not delivered
454	* as an NMI.
455	*/
456	if (cpu_has_feature(CPU_FTR_HVMODE)) {
457	hsrr0 = mfspr(SPRN_HSRR0);
458	hsrr1 = mfspr(SPRN_HSRR1);
459	saved_hsrrs = true;
460	}
461
462	hv_nmi_check_nonrecoverable(regs: regs);
463
464	__this_cpu_inc(irq_stat.sreset_irqs);
465
466	/ See if any machine dependent calls /
467	if (ppc_md.system_reset_exception) {
468	if (ppc_md.system_reset_exception(regs))
469	goto out;
470	}
471
472	if (debugger(regs))
473	goto out;
474
475	kmsg_dump(reason: KMSG_DUMP_OOPS);
476	/*
477	* A system reset is a request to dump, so we always send
478	* it through the crashdump code (if fadump or kdump are
479	* registered).
480	*/
481	crash_fadump(regs, "System Reset");
482
483	crash_kexec(regs);
484
485	/*
486	* We aren't the primary crash CPU. We need to send it
487	* to a holding pattern to avoid it ending up in the panic
488	* code.
489	*/
490	crash_kexec_secondary(regs);
491
492	/*
493	* No debugger or crash dump registered, print logs then
494	* panic.
495	*/
496	die("System Reset", regs, SIGABRT);
497
498	mdelay(`2`MSEC_PER_SEC); /* Wait a little while for others to print /
499	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
500	nmi_panic(regs, "System Reset");
501
502	out:
503	#ifdef CONFIG_PPC_BOOK3S_64
504	BUG_ON(get_paca()->in_nmi == `0`);
505	if (get_paca()->in_nmi > `1`)
506	die("Unrecoverable nested System Reset", regs, SIGABRT);
507	#endif
508	/ Must die if the interrupt is not recoverable /
509	if (regs_is_unrecoverable(regs)) {
510	/ For the reason explained in die_mce, nmi_exit before die /
511	nmi_exit();
512	die("Unrecoverable System Reset", regs, SIGABRT);
513	}
514
515	if (saved_hsrrs) {
516	mtspr(SPRN_HSRR0, hsrr0);
517	mtspr(SPRN_HSRR1, hsrr1);
518	}
519
520	/ What should we do here? We could issue a shutdown or hard reset. /
521
522	return `0`;
523	}
524
525	/*
526	* I/O accesses can cause machine checks on powermacs.
527	* Check if the NIP corresponds to the address of a sync
528	* instruction for which there is an entry in the exception
529	* table.
530	* -- paulus.
531	*/
532	static inline int check_io_access(struct pt_regs *regs)
533	{
534	#ifdef CONFIG_PPC32
535	unsigned long msr = regs->msr;
536	const struct exception_table_entry *entry;
537	unsigned int nip = (unsigned* int *)regs->nip;
538
539	if (((msr & `0xffff0000`) == `0` \|\| (msr & (`0x80000` \| `0x40000`)))
540	&& (entry = search_exception_tables(regs->nip)) != NULL) {
541	/*
542	* Check that it's a sync instruction, or somewhere
543	* in the twi; isync; nop sequence that inb/inw/inl uses.
544	* As the address is in the exception table
545	* we should be able to read the instr there.
546	* For the debug message, we look at the preceding
547	* load or store.
548	*/
549	if (*nip == PPC_RAW_NOP())
550	nip -= `2`;
551	else if (*nip == PPC_RAW_ISYNC())
552	--nip;
553	if (nip == PPC_RAW_SYNC() \|\| get_op(nip) == OP_TRAP) {
554	unsigned int rb;
555
556	--nip;
557	rb = (*nip >> `11`) & `0x1f`;
558	printk(KERN_DEBUG "%s bad port %lx at %p\n",
559	(*nip & `0x100`)? "OUT to": "IN from",
560	regs->gpr[rb] - _IO_BASE, nip);
561	regs_set_recoverable(regs);
562	regs_set_return_ip(regs, extable_fixup(entry));
563	return `1`;
564	}
565	}
566	#endif /* CONFIG_PPC32 */
567	return `0`;
568	}
569
570	#ifdef CONFIG_PPC_ADV_DEBUG_REGS
571	/ On 4xx, the reason for the machine check or program exception*
572	is in the ESR. /*
573	#define get_reason(regs) ((regs)->esr)
574	#define REASON_FP ESR_FP
575	#define REASON_ILLEGAL (ESR_PIL \| ESR_PUO)
576	#define REASON_PRIVILEGED ESR_PPR
577	#define REASON_TRAP ESR_PTR
578	#define REASON_PREFIXED 0
579	#define REASON_BOUNDARY 0
580
581	/ single-step stuff /
582	#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
583	#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
584	#define clear_br_trace(regs) do {} while(0)
585	#else
586	/ On non-4xx, the reason for the machine check or program*
587	exception is in the MSR. /*
588	#define get_reason(regs) ((regs)->msr)
589	#define REASON_TM SRR1_PROGTM
590	#define REASON_FP SRR1_PROGFPE
591	#define REASON_ILLEGAL SRR1_PROGILL
592	#define REASON_PRIVILEGED SRR1_PROGPRIV
593	#define REASON_TRAP SRR1_PROGTRAP
594	#define REASON_PREFIXED SRR1_PREFIXED
595	#define REASON_BOUNDARY SRR1_BOUNDARY
596
597	#define single_stepping(regs) ((regs)->msr & MSR_SE)
598	#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
599	#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
600	#endif
601
602	#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4)
603
604	#if defined(CONFIG_PPC_E500)
605	int machine_check_e500mc(struct pt_regs *regs)
606	{
607	unsigned long mcsr = mfspr(SPRN_MCSR);
608	unsigned long pvr = mfspr(SPRN_PVR);
609	unsigned long reason = mcsr;
610	int recoverable = `1`;
611
612	if (reason & MCSR_LD) {
613	recoverable = fsl_rio_mcheck_exception(regs);
614	if (recoverable == `1`)
615	goto silent_out;
616	}
617
618	printk("Machine check in kernel mode.\n");
619	printk("Caused by (from MCSR=%lx): ", reason);
620
621	if (reason & MCSR_MCP)
622	pr_cont("Machine Check Signal\n");
623
624	if (reason & MCSR_ICPERR) {
625	pr_cont("Instruction Cache Parity Error\n");
626
627	/*
628	* This is recoverable by invalidating the i-cache.
629	*/
630	mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) \| L1CSR1_ICFI);
631	while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
632	;
633
634	/*
635	* This will generally be accompanied by an instruction
636	* fetch error report -- only treat MCSR_IF as fatal
637	* if it wasn't due to an L1 parity error.
638	*/
639	reason &= ~MCSR_IF;
640	}
641
642	if (reason & MCSR_DCPERR_MC) {
643	pr_cont("Data Cache Parity Error\n");
644
645	/*
646	* In write shadow mode we auto-recover from the error, but it
647	* may still get logged and cause a machine check. We should
648	* only treat the non-write shadow case as non-recoverable.
649	*/
650	/ On e6500 core, L1 DCWS (Data cache write shadow mode) bit*
651	* is not implemented but L1 data cache always runs in write
652	* shadow mode. Hence on data cache parity errors HW will
653	* automatically invalidate the L1 Data Cache.
654	*/
655	if (PVR_VER(pvr) != PVR_VER_E6500) {
656	if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
657	recoverable = `0`;
658	}
659	}
660
661	if (reason & MCSR_L2MMU_MHIT) {
662	pr_cont("Hit on multiple TLB entries\n");
663	recoverable = `0`;
664	}
665
666	if (reason & MCSR_NMI)
667	pr_cont("Non-maskable interrupt\n");
668
669	if (reason & MCSR_IF) {
670	pr_cont("Instruction Fetch Error Report\n");
671	recoverable = `0`;
672	}
673
674	if (reason & MCSR_LD) {
675	pr_cont("Load Error Report\n");
676	recoverable = `0`;
677	}
678
679	if (reason & MCSR_ST) {
680	pr_cont("Store Error Report\n");
681	recoverable = `0`;
682	}
683
684	if (reason & MCSR_LDG) {
685	pr_cont("Guarded Load Error Report\n");
686	recoverable = `0`;
687	}
688
689	if (reason & MCSR_TLBSYNC)
690	pr_cont("Simultaneous tlbsync operations\n");
691
692	if (reason & MCSR_BSL2_ERR) {
693	pr_cont("Level 2 Cache Error\n");
694	recoverable = `0`;
695	}
696
697	if (reason & MCSR_MAV) {
698	u64 addr;
699
700	addr = mfspr(SPRN_MCAR);
701	addr \|= (u64)mfspr(SPRN_MCARU) << `32`;
702
703	pr_cont("Machine Check %s Address: %#llx\n",
704	reason & MCSR_MEA ? "Effective" : "Physical", addr);
705	}
706
707	silent_out:
708	mtspr(SPRN_MCSR, mcsr);
709	return mfspr(SPRN_MCSR) == `0` && recoverable;
710	}
711
712	int machine_check_e500(struct pt_regs *regs)
713	{
714	unsigned long reason = mfspr(SPRN_MCSR);
715
716	if (reason & MCSR_BUS_RBERR) {
717	if (fsl_rio_mcheck_exception(regs))
718	return `1`;
719	if (fsl_pci_mcheck_exception(regs))
720	return `1`;
721	}
722
723	printk("Machine check in kernel mode.\n");
724	printk("Caused by (from MCSR=%lx): ", reason);
725
726	if (reason & MCSR_MCP)
727	pr_cont("Machine Check Signal\n");
728	if (reason & MCSR_ICPERR)
729	pr_cont("Instruction Cache Parity Error\n");
730	if (reason & MCSR_DCP_PERR)
731	pr_cont("Data Cache Push Parity Error\n");
732	if (reason & MCSR_DCPERR)
733	pr_cont("Data Cache Parity Error\n");
734	if (reason & MCSR_BUS_IAERR)
735	pr_cont("Bus - Instruction Address Error\n");
736	if (reason & MCSR_BUS_RAERR)
737	pr_cont("Bus - Read Address Error\n");
738	if (reason & MCSR_BUS_WAERR)
739	pr_cont("Bus - Write Address Error\n");
740	if (reason & MCSR_BUS_IBERR)
741	pr_cont("Bus - Instruction Data Error\n");
742	if (reason & MCSR_BUS_RBERR)
743	pr_cont("Bus - Read Data Bus Error\n");
744	if (reason & MCSR_BUS_WBERR)
745	pr_cont("Bus - Write Data Bus Error\n");
746	if (reason & MCSR_BUS_IPERR)
747	pr_cont("Bus - Instruction Parity Error\n");
748	if (reason & MCSR_BUS_RPERR)
749	pr_cont("Bus - Read Parity Error\n");
750
751	return `0`;
752	}
753
754	int machine_check_generic(struct pt_regs *regs)
755	{
756	return `0`;
757	}
758	#elif defined(CONFIG_PPC32)
759	int machine_check_generic(struct pt_regs *regs)
760	{
761	unsigned long reason = regs->msr;
762
763	printk("Machine check in kernel mode.\n");
764	printk("Caused by (from SRR1=%lx): ", reason);
765	switch (reason & `0x601F0000`) {
766	case `0x80000`:
767	pr_cont("Machine check signal\n");
768	break;
769	case `0x40000`:
770	case `0x140000`: / 7450 MSS error and TEA /
771	pr_cont("Transfer error ack signal\n");
772	break;
773	case `0x20000`:
774	pr_cont("Data parity error signal\n");
775	break;
776	case `0x10000`:
777	pr_cont("Address parity error signal\n");
778	break;
779	case `0x20000000`:
780	pr_cont("L1 Data Cache error\n");
781	break;
782	case `0x40000000`:
783	pr_cont("L1 Instruction Cache error\n");
784	break;
785	case `0x00100000`:
786	pr_cont("L2 data cache parity error\n");
787	break;
788	default:
789	pr_cont("Unknown values in msr\n");
790	}
791	return `0`;
792	}
793	#endif /* everything else */
794
795	void die_mce(const char str, struct* pt_regs regs, long* err)
796	{
797	/*
798	* The machine check wants to kill the interrupted context,
799	* but make_task_dead() checks for in_interrupt() and panics
800	* in that case, so exit the irq/nmi before calling die.
801	*/
802	if (in_nmi())
803	nmi_exit();
804	else
805	irq_exit();
806	die(str, regs, err);
807	}
808
809	/*
810	* BOOK3S_64 does not usually call this handler as a non-maskable interrupt
811	* (it uses its own early real-mode handler to handle the MCE proper
812	* and then raises irq_work to call this handler when interrupts are
813	* enabled). The only time when this is not true is if the early handler
814	* is unrecoverable, then it does call this directly to try to get a
815	* message out.
816	*/
817	static void __machine_check_exception(struct pt_regs *regs)
818	{
819	int recover = `0`;
820
821	__this_cpu_inc(irq_stat.mce_exceptions);
822
823	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
824
825	/ See if any machine dependent calls. In theory, we would want*
826	* to call the CPU first, and call the ppc_md. one if the CPU
827	* one returns a positive number. However there is existing code
828	* that assumes the board gets a first chance, so let's keep it
829	* that way for now and fix things later. --BenH.
830	*/
831	if (ppc_md.machine_check_exception)
832	recover = ppc_md.machine_check_exception(regs);
833	else if (cur_cpu_spec->machine_check)
834	recover = cur_cpu_spec->machine_check(regs);
835
836	if (recover > `0`)
837	goto bail;
838
839	if (debugger_fault_handler(regs))
840	goto bail;
841
842	if (check_io_access(regs))
843	goto bail;
844
845	die_mce(str: "Machine check", regs, SIGBUS);
846
847	bail:
848	/ Must die if the interrupt is not recoverable /
849	if (regs_is_unrecoverable(regs))
850	die_mce(str: "Unrecoverable Machine check", regs, SIGBUS);
851	}
852
853	#ifdef CONFIG_PPC_BOOK3S_64
854	DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot)
855	{
856	udbg_printf("Machine check (early boot)\n");
857	udbg_printf("SRR0=0x%016lx SRR1=0x%016lx\n", regs->nip, regs->msr);
858	udbg_printf(" DAR=0x%016lx DSISR=0x%08lx\n", regs->dar, regs->dsisr);
859	udbg_printf(" LR=0x%016lx R1=0x%08lx\n", regs->link, regs->gpr[`1`]);
860	udbg_printf("------\n");
861	die("Machine check (early boot)", regs, SIGBUS);
862	for (;;)
863	;
864	return `0`;
865	}
866
867	DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
868	{
869	__machine_check_exception(regs);
870	}
871	#endif
872	DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
873	{
874	__machine_check_exception(regs);
875
876	return `0`;
877	}
878
879	DEFINE_INTERRUPT_HANDLER(SMIException) / async? /
880	{
881	die("System Management Interrupt", regs, SIGABRT);
882	}
883
884	#ifdef CONFIG_VSX
885	static void p9_hmi_special_emu(struct pt_regs *regs)
886	{
887	unsigned int ra, rb, t, i, sel, instr, rc;
888	const void __user *addr;
889	u8 vbuf[`16`] __aligned(`16`), *vdst;
890	unsigned long ea, msr, msr_mask;
891	bool swap;
892
893	if (__get_user(instr, (unsigned int __user *)regs->nip))
894	return;
895
896	/*
897	* lxvb16x opcode: 0x7c0006d8
898	* lxvd2x opcode: 0x7c000698
899	* lxvh8x opcode: 0x7c000658
900	* lxvw4x opcode: 0x7c000618
901	*/
902	if ((instr & `0xfc00073e`) != `0x7c000618`) {
903	pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
904	" instr=%08x\n",
905	smp_processor_id(), current->comm, current->pid,
906	regs->nip, instr);
907	return;
908	}
909
910	/ Grab vector registers into the task struct /
911	msr = regs->msr; / Grab msr before we flush the bits /
912	flush_vsx_to_thread(current);
913	enable_kernel_altivec();
914
915	/*
916	* Is userspace running with a different endian (this is rare but
917	* not impossible)
918	*/
919	swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
920
921	/ Decode the instruction /
922	ra = (instr >> `16`) & `0x1f`;
923	rb = (instr >> `11`) & `0x1f`;
924	t = (instr >> `21`) & `0x1f`;
925	if (instr & `1`)
926	vdst = (u8 *)&current->thread.vr_state.vr[t];
927	else
928	vdst = (u8 *)&current->thread.fp_state.fpr[t][`0`];
929
930	/ Grab the vector address /
931	ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : `0`);
932	if (is_32bit_task())
933	ea &= `0xfffffffful`;
934	addr = (__force const void __user *)ea;
935
936	/ Check it /
937	if (!access_ok(addr, `16`)) {
938	pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
939	" instr=%08x addr=%016lx\n",
940	smp_processor_id(), current->comm, current->pid,
941	regs->nip, instr, (unsigned long)addr);
942	return;
943	}
944
945	/ Read the vector /
946	rc = `0`;
947	if ((unsigned long)addr & `0xfUL`)
948	/ unaligned case /
949	rc = __copy_from_user_inatomic(vbuf, addr, `16`);
950	else
951	__get_user_atomic_128_aligned(vbuf, addr, rc);
952	if (rc) {
953	pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
954	" instr=%08x addr=%016lx\n",
955	smp_processor_id(), current->comm, current->pid,
956	regs->nip, instr, (unsigned long)addr);
957	return;
958	}
959
960	pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
961	" instr=%08x addr=%016lx\n",
962	smp_processor_id(), current->comm, current->pid, regs->nip,
963	instr, (unsigned long) addr);
964
965	/ Grab instruction "selector" /
966	sel = (instr >> `6`) & `3`;
967
968	/*
969	* Check to make sure the facility is actually enabled. This
970	* could happen if we get a false positive hit.
971	*
972	* lxvd2x/lxvw4x always check MSR VSX sel = 0,2
973	* lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
974	*/
975	msr_mask = MSR_VSX;
976	if ((sel & `1`) && (instr & `1`)) / lxvh8x & lxvb16x + VSR >= 32 /
977	msr_mask = MSR_VEC;
978	if (!(msr & msr_mask)) {
979	pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
980	" instr=%08x msr:%016lx\n",
981	smp_processor_id(), current->comm, current->pid,
982	regs->nip, instr, msr);
983	return;
984	}
985
986	/ Do logging here before we modify sel based on endian /
987	switch (sel) {
988	case `0`: / lxvw4x /
989	PPC_WARN_EMULATED(lxvw4x, regs);
990	break;
991	case `1`: / lxvh8x /
992	PPC_WARN_EMULATED(lxvh8x, regs);
993	break;
994	case `2`: / lxvd2x /
995	PPC_WARN_EMULATED(lxvd2x, regs);
996	break;
997	case `3`: / lxvb16x /
998	PPC_WARN_EMULATED(lxvb16x, regs);
999	break;
1000	}
1001
1002	#ifdef __LITTLE_ENDIAN__
1003	/*
1004	* An LE kernel stores the vector in the task struct as an LE
1005	* byte array (effectively swapping both the components and
1006	* the content of the components). Those instructions expect
1007	* the components to remain in ascending address order, so we
1008	* swap them back.
1009	*
1010	* If we are running a BE user space, the expectation is that
1011	* of a simple memcpy, so forcing the emulation to look like
1012	* a lxvb16x should do the trick.
1013	*/
1014	if (swap)
1015	sel = `3`;
1016
1017	switch (sel) {
1018	case `0`: / lxvw4x /
1019	for (i = `0`; i < `4`; i++)
1020	((u32 )vdst)[i] = ((u32 )vbuf)[`3`-i];
1021	break;
1022	case `1`: / lxvh8x /
1023	for (i = `0`; i < `8`; i++)
1024	((u16 )vdst)[i] = ((u16 )vbuf)[`7`-i];
1025	break;
1026	case `2`: / lxvd2x /
1027	for (i = `0`; i < `2`; i++)
1028	((u64 )vdst)[i] = ((u64 )vbuf)[`1`-i];
1029	break;
1030	case `3`: / lxvb16x /
1031	for (i = `0`; i < `16`; i++)
1032	vdst[i] = vbuf[`15`-i];
1033	break;
1034	}
1035	#else /* __LITTLE_ENDIAN__ */
1036	/ On a big endian kernel, a BE userspace only needs a memcpy /
1037	if (!swap)
1038	sel = `3`;
1039
1040	/ Otherwise, we need to swap the content of the components /
1041	switch (sel) {
1042	case `0`: / lxvw4x /
1043	for (i = `0`; i < `4`; i++)
1044	((u32 )vdst)[i] = cpu_to_le32(((u32 )vbuf)[i]);
1045	break;
1046	case `1`: / lxvh8x /
1047	for (i = `0`; i < `8`; i++)
1048	((u16 )vdst)[i] = cpu_to_le16(((u16 )vbuf)[i]);
1049	break;
1050	case `2`: / lxvd2x /
1051	for (i = `0`; i < `2`; i++)
1052	((u64 )vdst)[i] = cpu_to_le64(((u64 )vbuf)[i]);
1053	break;
1054	case `3`: / lxvb16x /
1055	memcpy(vdst, vbuf, `16`);
1056	break;
1057	}
1058	#endif /* !__LITTLE_ENDIAN__ */
1059
1060	/ Go to next instruction /
1061	regs_add_return_ip(regs, `4`);
1062	}
1063	#endif /* CONFIG_VSX */
1064
1065	DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
1066	{
1067	struct pt_regs *old_regs;
1068
1069	old_regs = set_irq_regs(regs);
1070
1071	#ifdef CONFIG_VSX
1072	/ Real mode flagged P9 special emu is needed /
1073	if (local_paca->hmi_p9_special_emu) {
1074	local_paca->hmi_p9_special_emu = `0`;
1075
1076	/*
1077	* We don't want to take page faults while doing the
1078	* emulation, we just replay the instruction if necessary.
1079	*/
1080	pagefault_disable();
1081	p9_hmi_special_emu(regs);
1082	pagefault_enable();
1083	}
1084	#endif /* CONFIG_VSX */
1085
1086	if (ppc_md.handle_hmi_exception)
1087	ppc_md.handle_hmi_exception(regs);
1088
1089	set_irq_regs(old_regs);
1090	}
1091
1092	DEFINE_INTERRUPT_HANDLER(unknown_exception)
1093	{
1094	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1095	regs->nip, regs->msr, regs->trap);
1096
1097	_exception(SIGTRAP, regs, TRAP_UNK, `0`);
1098	}
1099
1100	DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
1101	{
1102	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1103	regs->nip, regs->msr, regs->trap);
1104
1105	_exception(SIGTRAP, regs, TRAP_UNK, `0`);
1106	}
1107
1108	DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
1109	{
1110	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1111	regs->nip, regs->msr, regs->trap);
1112
1113	_exception(SIGTRAP, regs, TRAP_UNK, `0`);
1114
1115	return `0`;
1116	}
1117
1118	DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
1119	{
1120	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, `5`,
1121	`5`, SIGTRAP) == NOTIFY_STOP)
1122	return;
1123	if (debugger_iabr_match(regs))
1124	return;
1125	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1126	}
1127
1128	DEFINE_INTERRUPT_HANDLER(RunModeException)
1129	{
1130	_exception(SIGTRAP, regs, TRAP_UNK, `0`);
1131	}
1132
1133	static void __single_step_exception(struct pt_regs *regs)
1134	{
1135	clear_single_step(regs);
1136	clear_br_trace(regs);
1137
1138	if (kprobe_post_handler(regs))
1139	return;
1140
1141	if (notify_die(DIE_SSTEP, "single_step", regs, `5`,
1142	`5`, SIGTRAP) == NOTIFY_STOP)
1143	return;
1144	if (debugger_sstep(regs))
1145	return;
1146
1147	_exception(SIGTRAP, regs, TRAP_TRACE, addr: regs->nip);
1148	}
1149
1150	DEFINE_INTERRUPT_HANDLER(single_step_exception)
1151	{
1152	__single_step_exception(regs);
1153	}
1154
1155	/*
1156	* After we have successfully emulated an instruction, we have to
1157	* check if the instruction was being single-stepped, and if so,
1158	* pretend we got a single-step exception. This was pointed out
1159	* by Kumar Gala. -- paulus
1160	*/
1161	void emulate_single_step(struct pt_regs *regs)
1162	{
1163	if (single_stepping(regs))
1164	__single_step_exception(regs);
1165	}
1166
1167	#ifdef CONFIG_PPC_FPU_REGS
1168	static inline int __parse_fpscr(unsigned long fpscr)
1169	{
1170	int ret = FPE_FLTUNK;
1171
1172	/ Invalid operation /
1173	if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1174	ret = FPE_FLTINV;
1175
1176	/ Overflow /
1177	else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1178	ret = FPE_FLTOVF;
1179
1180	/ Underflow /
1181	else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1182	ret = FPE_FLTUND;
1183
1184	/ Divide by zero /
1185	else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1186	ret = FPE_FLTDIV;
1187
1188	/ Inexact result /
1189	else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1190	ret = FPE_FLTRES;
1191
1192	return ret;
1193	}
1194	#endif
1195
1196	static void parse_fpe(struct pt_regs *regs)
1197	{
1198	int code = `0`;
1199
1200	flush_fp_to_thread(current);
1201
1202	#ifdef CONFIG_PPC_FPU_REGS
1203	code = __parse_fpscr(current->thread.fp_state.fpscr);
1204	#endif
1205
1206	_exception(SIGFPE, regs, code, addr: regs->nip);
1207	}
1208
1209	/*
1210	* Illegal instruction emulation support. Originally written to
1211	* provide the PVR to user applications using the mfspr rd, PVR.
1212	* Return non-zero if we can't emulate, or -EFAULT if the associated
1213	* memory access caused an access fault. Return zero on success.
1214	*
1215	* There are a couple of ways to do this, either "decode" the instruction
1216	* or directly match lots of bits. In this case, matching lots of
1217	* bits is faster and easier.
1218	*
1219	*/
1220	static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1221	{
1222	u8 rT = (instword >> `21`) & `0x1f`;
1223	u8 rA = (instword >> `16`) & `0x1f`;
1224	u8 NB_RB = (instword >> `11`) & `0x1f`;
1225	u32 num_bytes;
1226	unsigned long EA;
1227	int pos = `0`;
1228
1229	/ Early out if we are an invalid form of lswx /
1230	if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1231	if ((rT == rA) \|\| (rT == NB_RB))
1232	return -EINVAL;
1233
1234	EA = (rA == `0`) ? `0` : regs->gpr[rA];
1235
1236	switch (instword & PPC_INST_STRING_MASK) {
1237	case PPC_INST_LSWX:
1238	case PPC_INST_STSWX:
1239	EA += NB_RB;
1240	num_bytes = regs->xer & `0x7f`;
1241	break;
1242	case PPC_INST_LSWI:
1243	case PPC_INST_STSWI:
1244	num_bytes = (NB_RB == `0`) ? `32` : NB_RB;
1245	break;
1246	default:
1247	return -EINVAL;
1248	}
1249
1250	while (num_bytes != `0`)
1251	{
1252	u8 val;
1253	u32 shift = `8` * (`3` - (pos & `0x3`));
1254
1255	/ if process is 32-bit, clear upper 32 bits of EA /
1256	if ((regs->msr & MSR_64BIT) == `0`)
1257	EA &= `0xFFFFFFFF`;
1258
1259	switch ((instword & PPC_INST_STRING_MASK)) {
1260	case PPC_INST_LSWX:
1261	case PPC_INST_LSWI:
1262	if (get_user(val, (u8 __user *)EA))
1263	return -EFAULT;
1264	/ first time updating this reg,*
1265	* zero it out */
1266	if (pos == `0`)
1267	regs->gpr[rT] = `0`;
1268	regs->gpr[rT] \|= val << shift;
1269	break;
1270	case PPC_INST_STSWI:
1271	case PPC_INST_STSWX:
1272	val = regs->gpr[rT] >> shift;
1273	if (put_user(val, (u8 __user *)EA))
1274	return -EFAULT;
1275	break;
1276	}
1277	/ move EA to next address /
1278	EA += `1`;
1279	num_bytes--;
1280
1281	/ manage our position within the register /
1282	if (++pos == `4`) {
1283	pos = `0`;
1284	if (++rT == `32`)
1285	rT = `0`;
1286	}
1287	}
1288
1289	return `0`;
1290	}
1291
1292	static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1293	{
1294	u32 ra,rs;
1295	unsigned long tmp;
1296
1297	ra = (instword >> `16`) & `0x1f`;
1298	rs = (instword >> `21`) & `0x1f`;
1299
1300	tmp = regs->gpr[rs];
1301	tmp = tmp - ((tmp >> `1`) & `0x5555555555555555ULL`);
1302	tmp = (tmp & `0x3333333333333333ULL`) + ((tmp >> `2`) & `0x3333333333333333ULL`);
1303	tmp = (tmp + (tmp >> `4`)) & `0x0f0f0f0f0f0f0f0fULL`;
1304	regs->gpr[ra] = tmp;
1305
1306	return `0`;
1307	}
1308
1309	static int emulate_isel(struct pt_regs *regs, u32 instword)
1310	{
1311	u8 rT = (instword >> `21`) & `0x1f`;
1312	u8 rA = (instword >> `16`) & `0x1f`;
1313	u8 rB = (instword >> `11`) & `0x1f`;
1314	u8 BC = (instword >> `6`) & `0x1f`;
1315	u8 bit;
1316	unsigned long tmp;
1317
1318	tmp = (rA == `0`) ? `0` : regs->gpr[rA];
1319	bit = (regs->ccr >> (`31` - BC)) & `0x1`;
1320
1321	regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1322
1323	return `0`;
1324	}
1325
1326	#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1327	static inline bool tm_abort_check(struct pt_regs regs, int* cause)
1328	{
1329	/ If we're emulating a load/store in an active transaction, we cannot*
1330	* emulate it as the kernel operates in transaction suspended context.
1331	* We need to abort the transaction. This creates a persistent TM
1332	* abort so tell the user what caused it with a new code.
1333	*/
1334	if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1335	tm_enable();
1336	tm_abort(cause);
1337	return true;
1338	}
1339	return false;
1340	}
1341	#else
1342	static inline bool tm_abort_check(struct pt_regs regs, int* reason)
1343	{
1344	return false;
1345	}
1346	#endif
1347
1348	static int emulate_instruction(struct pt_regs *regs)
1349	{
1350	u32 instword;
1351	u32 rd;
1352
1353	if (!user_mode(regs))
1354	return -EINVAL;
1355
1356	if (get_user(instword, (u32 __user *)(regs->nip)))
1357	return -EFAULT;
1358
1359	/ Emulate the mfspr rD, PVR. /
1360	if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1361	PPC_WARN_EMULATED(mfpvr, regs);
1362	rd = (instword >> `21`) & `0x1f`;
1363	regs->gpr[rd] = mfspr(SPRN_PVR);
1364	return `0`;
1365	}
1366
1367	/ Emulating the dcba insn is just a no-op. /
1368	if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1369	PPC_WARN_EMULATED(dcba, regs);
1370	return `0`;
1371	}
1372
1373	/ Emulate the mcrxr insn. /
1374	if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1375	int shift = (instword >> `21`) & `0x1c`;
1376	unsigned long msk = `0xf0000000UL` >> shift;
1377
1378	PPC_WARN_EMULATED(mcrxr, regs);
1379	regs->ccr = (regs->ccr & ~msk) \| ((regs->xer >> shift) & msk);
1380	regs->xer &= ~`0xf0000000UL`;
1381	return `0`;
1382	}
1383
1384	/ Emulate load/store string insn. /
1385	if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1386	if (tm_abort_check(regs,
1387	TM_CAUSE_EMULATE \| TM_CAUSE_PERSISTENT))
1388	return -EINVAL;
1389	PPC_WARN_EMULATED(string, regs);
1390	return emulate_string_inst(regs, instword);
1391	}
1392
1393	/ Emulate the popcntb (Population Count Bytes) instruction. /
1394	if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1395	PPC_WARN_EMULATED(popcntb, regs);
1396	return emulate_popcntb_inst(regs, instword);
1397	}
1398
1399	/ Emulate isel (Integer Select) instruction /
1400	if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1401	PPC_WARN_EMULATED(isel, regs);
1402	return emulate_isel(regs, instword);
1403	}
1404
1405	/ Emulate sync instruction variants /
1406	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1407	PPC_WARN_EMULATED(sync, regs);
1408	asm volatile("sync");
1409	return `0`;
1410	}
1411
1412	#ifdef CONFIG_PPC64
1413	/ Emulate the mfspr rD, DSCR. /
1414	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1415	PPC_INST_MFSPR_DSCR_USER) \|\|
1416	((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1417	PPC_INST_MFSPR_DSCR)) &&
1418	cpu_has_feature(CPU_FTR_DSCR)) {
1419	PPC_WARN_EMULATED(mfdscr, regs);
1420	rd = (instword >> `21`) & `0x1f`;
1421	regs->gpr[rd] = mfspr(SPRN_DSCR);
1422	return `0`;
1423	}
1424	/ Emulate the mtspr DSCR, rD. /
1425	if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1426	PPC_INST_MTSPR_DSCR_USER) \|\|
1427	((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1428	PPC_INST_MTSPR_DSCR)) &&
1429	cpu_has_feature(CPU_FTR_DSCR)) {
1430	PPC_WARN_EMULATED(mtdscr, regs);
1431	rd = (instword >> `21`) & `0x1f`;
1432	current->thread.dscr = regs->gpr[rd];
1433	current->thread.dscr_inherit = `1`;
1434	mtspr(SPRN_DSCR, current->thread.dscr);
1435	return `0`;
1436	}
1437	#endif
1438
1439	return -EINVAL;
1440	}
1441
1442	#ifdef CONFIG_GENERIC_BUG
1443	int is_valid_bugaddr(unsigned long addr)
1444	{
1445	return is_kernel_addr(addr);
1446	}
1447	#endif
1448
1449	#ifdef CONFIG_MATH_EMULATION
1450	static int emulate_math(struct pt_regs *regs)
1451	{
1452	int ret;
1453
1454	ret = do_mathemu(regs);
1455	if (ret >= `0`)
1456	PPC_WARN_EMULATED(math, regs);
1457
1458	switch (ret) {
1459	case `0`:
1460	emulate_single_step(regs);
1461	return `0`;
1462	case `1`: {
1463	int code = `0`;
1464	code = __parse_fpscr(current->thread.fp_state.fpscr);
1465	_exception(SIGFPE, regs, code, regs->nip);
1466	return `0`;
1467	}
1468	case -EFAULT:
1469	_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1470	return `0`;
1471	}
1472
1473	return -`1`;
1474	}
1475	#else
1476	static inline int emulate_math(struct pt_regs regs) { return* -`1`; }
1477	#endif
1478
1479	static void do_program_check(struct pt_regs *regs)
1480	{
1481	unsigned int reason = get_reason(regs);
1482
1483	/ We can now get here via a FP Unavailable exception if the core*
1484	* has no FPU, in that case the reason flags will be 0 */
1485
1486	if (reason & REASON_FP) {
1487	/ IEEE FP exception /
1488	parse_fpe(regs);
1489	return;
1490	}
1491	if (reason & REASON_TRAP) {
1492	unsigned long bugaddr;
1493	/ Debugger is first in line to stop recursive faults in*
1494	* rcu_lock, notify_die, or atomic_notifier_call_chain */
1495	if (debugger_bpt(regs))
1496	return;
1497
1498	if (kprobe_handler(regs))
1499	return;
1500
1501	/ trap exception /
1502	if (notify_die(DIE_BPT, "breakpoint", regs, `5`, `5`, SIGTRAP)
1503	== NOTIFY_STOP)
1504	return;
1505
1506	bugaddr = regs->nip;
1507	/*
1508	* Fixup bugaddr for BUG_ON() in real mode
1509	*/
1510	if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1511	bugaddr += PAGE_OFFSET;
1512
1513	if (!user_mode(regs) &&
1514	report_bug(bug_addr: bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1515	regs_add_return_ip(regs, `4`);
1516	return;
1517	}
1518
1519	/ User mode considers other cases after enabling IRQs /
1520	if (!user_mode(regs)) {
1521	_exception(SIGTRAP, regs, TRAP_BRKPT, addr: regs->nip);
1522	return;
1523	}
1524	}
1525	#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1526	if (reason & REASON_TM) {
1527	/ This is a TM "Bad Thing Exception" program check.*
1528	* This occurs when:
1529	* - An rfid/hrfid/mtmsrd attempts to cause an illegal
1530	* transition in TM states.
1531	* - A trechkpt is attempted when transactional.
1532	* - A treclaim is attempted when non transactional.
1533	* - A tend is illegally attempted.
1534	* - writing a TM SPR when transactional.
1535	*
1536	* If usermode caused this, it's done something illegal and
1537	* gets a SIGILL slap on the wrist. We call it an illegal
1538	* operand to distinguish from the instruction just being bad
1539	* (e.g. executing a 'tend' on a CPU without TM!); it's an
1540	* illegal /placement/ of a valid instruction.
1541	*/
1542	if (user_mode(regs)) {
1543	_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1544	return;
1545	} else {
1546	printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1547	"at %lx (msr 0x%lx) tm_scratch=%llx\n",
1548	regs->nip, regs->msr, get_paca()->tm_scratch);
1549	die("Unrecoverable exception", regs, SIGABRT);
1550	}
1551	}
1552	#endif
1553
1554	/*
1555	* If we took the program check in the kernel skip down to sending a
1556	* SIGILL. The subsequent cases all relate to user space, such as
1557	* emulating instructions which we should only do for user space. We
1558	* also do not want to enable interrupts for kernel faults because that
1559	* might lead to further faults, and loose the context of the original
1560	* exception.
1561	*/
1562	if (!user_mode(regs))
1563	goto sigill;
1564
1565	interrupt_cond_local_irq_enable(regs);
1566
1567	/*
1568	* (reason & REASON_TRAP) is mostly handled before enabling IRQs,
1569	* except get_user_instr() can sleep so we cannot reliably inspect the
1570	* current instruction in that context. Now that we know we are
1571	* handling a user space trap and can sleep, we can check if the trap
1572	* was a hashchk failure.
1573	*/
1574	if (reason & REASON_TRAP) {
1575	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
1576	ppc_inst_t insn;
1577
1578	if (get_user_instr(insn, (void __user *)regs->nip)) {
1579	_exception(SIGSEGV, regs, SEGV_MAPERR, addr: regs->nip);
1580	return;
1581	}
1582
1583	if (ppc_inst_primary_opcode(insn) == `31` &&
1584	get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
1585	_exception(SIGILL, regs, ILL_ILLOPN, addr: regs->nip);
1586	return;
1587	}
1588	}
1589
1590	_exception(SIGTRAP, regs, TRAP_BRKPT, addr: regs->nip);
1591	return;
1592	}
1593
1594	/ (reason & REASON_ILLEGAL) would be the obvious thing here,*
1595	* but there seems to be a hardware bug on the 405GP (RevD)
1596	* that means ESR is sometimes set incorrectly - either to
1597	* ESR_DST (!?) or 0. In the process of chasing this with the
1598	* hardware people - not sure if it can happen on any illegal
1599	* instruction or only on FP instructions, whether there is a
1600	* pattern to occurrences etc. -dgibson 31/Mar/2003
1601	*/
1602	if (!emulate_math(regs))
1603	return;
1604
1605	/ Try to emulate it if we should. /
1606	if (reason & (REASON_ILLEGAL \| REASON_PRIVILEGED)) {
1607	switch (emulate_instruction(regs)) {
1608	case `0`:
1609	regs_add_return_ip(regs, `4`);
1610	emulate_single_step(regs);
1611	return;
1612	case -EFAULT:
1613	_exception(SIGSEGV, regs, SEGV_MAPERR, addr: regs->nip);
1614	return;
1615	}
1616	}
1617
1618	sigill:
1619	if (reason & REASON_PRIVILEGED)
1620	_exception(SIGILL, regs, ILL_PRVOPC, addr: regs->nip);
1621	else
1622	_exception(SIGILL, regs, ILL_ILLOPC, addr: regs->nip);
1623
1624	}
1625
1626	DEFINE_INTERRUPT_HANDLER(program_check_exception)
1627	{
1628	do_program_check(regs);
1629	}
1630
1631	/*
1632	* This occurs when running in hypervisor mode on POWER6 or later
1633	* and an illegal instruction is encountered.
1634	*/
1635	DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
1636	{
1637	regs_set_return_msr(regs, regs->msr \| REASON_ILLEGAL);
1638	do_program_check(regs);
1639	}
1640
1641	DEFINE_INTERRUPT_HANDLER(alignment_exception)
1642	{
1643	int sig, code, fixed = `0`;
1644	unsigned long reason;
1645
1646	interrupt_cond_local_irq_enable(regs);
1647
1648	reason = get_reason(regs);
1649	if (reason & REASON_BOUNDARY) {
1650	sig = SIGBUS;
1651	code = BUS_ADRALN;
1652	goto bad;
1653	}
1654
1655	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT \| TM_CAUSE_PERSISTENT))
1656	return;
1657
1658	/ we don't implement logging of alignment exceptions /
1659	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1660	fixed = fix_alignment(regs);
1661
1662	if (fixed == `1`) {
1663	/ skip over emulated instruction /
1664	regs_add_return_ip(regs, inst_length(reason));
1665	emulate_single_step(regs);
1666	return;
1667	}
1668
1669	/ Operand address was bad /
1670	if (fixed == -EFAULT) {
1671	sig = SIGSEGV;
1672	code = SEGV_ACCERR;
1673	} else {
1674	sig = SIGBUS;
1675	code = BUS_ADRALN;
1676	}
1677	bad:
1678	if (user_mode(regs))
1679	_exception(sig, regs, code, regs->dar);
1680	else
1681	bad_page_fault(regs, sig);
1682	}
1683
1684	DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
1685	{
1686	die("Kernel stack overflow", regs, SIGSEGV);
1687	}
1688
1689	DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
1690	{
1691	printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1692	"%lx at %lx\n", regs->trap, regs->nip);
1693	die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1694	}
1695
1696	DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
1697	{
1698	if (user_mode(regs)) {
1699	/ A user program has executed an altivec instruction,*
1700	but this kernel doesn't support altivec. /*
1701	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1702	return;
1703	}
1704
1705	printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1706	"%lx at %lx\n", regs->trap, regs->nip);
1707	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1708	}
1709
1710	DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
1711	{
1712	if (user_mode(regs)) {
1713	/ A user program has executed an vsx instruction,*
1714	but this kernel doesn't support vsx. /*
1715	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1716	return;
1717	}
1718
1719	printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1720	"%lx at %lx\n", regs->trap, regs->nip);
1721	die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1722	}
1723
1724	#ifdef CONFIG_PPC_BOOK3S_64
1725	static void tm_unavailable(struct pt_regs *regs)
1726	{
1727	#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1728	if (user_mode(regs)) {
1729	current->thread.load_tm++;
1730	regs_set_return_msr(regs, regs->msr \| MSR_TM);
1731	tm_enable();
1732	tm_restore_sprs(&current->thread);
1733	return;
1734	}
1735	#endif
1736	pr_emerg("Unrecoverable TM Unavailable Exception "
1737	"%lx at %lx\n", regs->trap, regs->nip);
1738	die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1739	}
1740
1741	DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
1742	{
1743	static char *facility_strings[] = {
1744	[FSCR_FP_LG] = "FPU",
1745	[FSCR_VECVSX_LG] = "VMX/VSX",
1746	[FSCR_DSCR_LG] = "DSCR",
1747	[FSCR_PM_LG] = "PMU SPRs",
1748	[FSCR_BHRB_LG] = "BHRB",
1749	[FSCR_TM_LG] = "TM",
1750	[FSCR_EBB_LG] = "EBB",
1751	[FSCR_TAR_LG] = "TAR",
1752	[FSCR_MSGP_LG] = "MSGP",
1753	[FSCR_SCV_LG] = "SCV",
1754	[FSCR_PREFIX_LG] = "PREFIX",
1755	};
1756	char *facility = "unknown";
1757	u64 value;
1758	u32 instword, rd;
1759	u8 status;
1760	bool hv;
1761
1762	hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
1763	if (hv)
1764	value = mfspr(SPRN_HFSCR);
1765	else
1766	value = mfspr(SPRN_FSCR);
1767
1768	status = value >> `56`;
1769	if ((hv \|\| status >= `2`) &&
1770	(status < ARRAY_SIZE(facility_strings)) &&
1771	facility_strings[status])
1772	facility = facility_strings[status];
1773
1774	/ We should not have taken this interrupt in kernel /
1775	if (!user_mode(regs)) {
1776	pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1777	facility, status, regs->nip);
1778	die("Unexpected facility unavailable exception", regs, SIGABRT);
1779	}
1780
1781	interrupt_cond_local_irq_enable(regs);
1782
1783	if (status == FSCR_DSCR_LG) {
1784	/*
1785	* User is accessing the DSCR register using the problem
1786	* state only SPR number (0x03) either through a mfspr or
1787	* a mtspr instruction. If it is a write attempt through
1788	* a mtspr, then we set the inherit bit. This also allows
1789	* the user to write or read the register directly in the
1790	* future by setting via the FSCR DSCR bit. But in case it
1791	* is a read DSCR attempt through a mfspr instruction, we
1792	* just emulate the instruction instead. This code path will
1793	* always emulate all the mfspr instructions till the user
1794	* has attempted at least one mtspr instruction. This way it
1795	* preserves the same behaviour when the user is accessing
1796	* the DSCR through privilege level only SPR number (0x11)
1797	* which is emulated through illegal instruction exception.
1798	* We always leave HFSCR DSCR set.
1799	*/
1800	if (get_user(instword, (u32 __user *)(regs->nip))) {
1801	pr_err("Failed to fetch the user instruction\n");
1802	return;
1803	}
1804
1805	/ Write into DSCR (mtspr 0x03, RS) /
1806	if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1807	== PPC_INST_MTSPR_DSCR_USER) {
1808	rd = (instword >> `21`) & `0x1f`;
1809	current->thread.dscr = regs->gpr[rd];
1810	current->thread.dscr_inherit = `1`;
1811	current->thread.fscr \|= FSCR_DSCR;
1812	mtspr(SPRN_FSCR, current->thread.fscr);
1813	}
1814
1815	/ Read from DSCR (mfspr RT, 0x03) /
1816	if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1817	== PPC_INST_MFSPR_DSCR_USER) {
1818	if (emulate_instruction(regs)) {
1819	pr_err("DSCR based mfspr emulation failed\n");
1820	return;
1821	}
1822	regs_add_return_ip(regs, `4`);
1823	emulate_single_step(regs);
1824	}
1825	return;
1826	}
1827
1828	if (status == FSCR_TM_LG) {
1829	/*
1830	* If we're here then the hardware is TM aware because it
1831	* generated an exception with FSRM_TM set.
1832	*
1833	* If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1834	* told us not to do TM, or the kernel is not built with TM
1835	* support.
1836	*
1837	* If both of those things are true, then userspace can spam the
1838	* console by triggering the printk() below just by continually
1839	* doing tbegin (or any TM instruction). So in that case just
1840	* send the process a SIGILL immediately.
1841	*/
1842	if (!cpu_has_feature(CPU_FTR_TM))
1843	goto out;
1844
1845	tm_unavailable(regs);
1846	return;
1847	}
1848
1849	pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1850	hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1851
1852	out:
1853	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1854	}
1855	#endif
1856
1857	#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1858
1859	DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
1860	{
1861	/ Note: This does not handle any kind of FP laziness. /
1862
1863	TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1864	regs->nip, regs->msr);
1865
1866	/ We can only have got here if the task started using FP after*
1867	* beginning the transaction. So, the transactional regs are just a
1868	* copy of the checkpointed ones. But, we still need to recheckpoint
1869	* as we're enabling FP for the process; it will return, abort the
1870	* transaction, and probably retry but now with FP enabled. So the
1871	* checkpointed FP registers need to be loaded.
1872	*/
1873	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1874
1875	/*
1876	* Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1877	* then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1878	*
1879	* At this point, ck{fp,vr}_state contains the exact values we want to
1880	* recheckpoint.
1881	*/
1882
1883	/ Enable FP for the task: /
1884	current->thread.load_fp = `1`;
1885
1886	/*
1887	* Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1888	*/
1889	tm_recheckpoint(&current->thread);
1890	}
1891
1892	DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
1893	{
1894	/ See the comments in fp_unavailable_tm(). This function operates*
1895	* the same way.
1896	*/
1897
1898	TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1899	"MSR=%lx\n",
1900	regs->nip, regs->msr);
1901	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1902	current->thread.load_vec = `1`;
1903	tm_recheckpoint(&current->thread);
1904	current->thread.used_vr = `1`;
1905	}
1906
1907	DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
1908	{
1909	/ See the comments in fp_unavailable_tm(). This works similarly,*
1910	* though we're loading both FP and VEC registers in here.
1911	*
1912	* If FP isn't in use, load FP regs. If VEC isn't in use, load VEC
1913	* regs. Either way, set MSR_VSX.
1914	*/
1915
1916	TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1917	"MSR=%lx\n",
1918	regs->nip, regs->msr);
1919
1920	current->thread.used_vsr = `1`;
1921
1922	/ This reclaims FP and/or VR regs if they're already enabled /
1923	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1924
1925	current->thread.load_vec = `1`;
1926	current->thread.load_fp = `1`;
1927
1928	tm_recheckpoint(&current->thread);
1929	}
1930	#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1931
1932	#ifdef CONFIG_PPC64
1933	DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
1934	DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
1935	{
1936	__this_cpu_inc(irq_stat.pmu_irqs);
1937
1938	perf_irq(regs);
1939
1940	return `0`;
1941	}
1942	#endif
1943
1944	DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
1945	DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
1946	{
1947	__this_cpu_inc(irq_stat.pmu_irqs);
1948
1949	perf_irq(regs);
1950	}
1951
1952	DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
1953	{
1954	/*
1955	* On 64-bit, if perf interrupts hit in a local_irq_disable
1956	* (soft-masked) region, we consider them as NMIs. This is required to
1957	* prevent hash faults on user addresses when reading callchains (and
1958	* looks better from an irq tracing perspective).
1959	*/
1960	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
1961	performance_monitor_exception_nmi(regs);
1962	else
1963	performance_monitor_exception_async(regs);
1964
1965	return `0`;
1966	}
1967
1968	#ifdef CONFIG_PPC_ADV_DEBUG_REGS
1969	static void handle_debug(struct pt_regs regs, unsigned* long debug_status)
1970	{
1971	int changed = `0`;
1972	/*
1973	* Determine the cause of the debug event, clear the
1974	* event flags and send a trap to the handler. Torez
1975	*/
1976	if (debug_status & (DBSR_DAC1R \| DBSR_DAC1W)) {
1977	dbcr_dac(current) &= ~(DBCR_DAC1R \| DBCR_DAC1W);
1978	#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
1979	current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
1980	#endif
1981	do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
1982	`5`);
1983	changed \|= `0x01`;
1984	} else if (debug_status & (DBSR_DAC2R \| DBSR_DAC2W)) {
1985	dbcr_dac(current) &= ~(DBCR_DAC2R \| DBCR_DAC2W);
1986	do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
1987	`6`);
1988	changed \|= `0x01`;
1989	} else if (debug_status & DBSR_IAC1) {
1990	current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
1991	dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
1992	do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1993	`1`);
1994	changed \|= `0x01`;
1995	} else if (debug_status & DBSR_IAC2) {
1996	current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
1997	do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
1998	`2`);
1999	changed \|= `0x01`;
2000	} else if (debug_status & DBSR_IAC3) {
2001	current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
2002	dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
2003	do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
2004	`3`);
2005	changed \|= `0x01`;
2006	} else if (debug_status & DBSR_IAC4) {
2007	current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
2008	do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
2009	`4`);
2010	changed \|= `0x01`;
2011	}
2012	/*
2013	* At the point this routine was called, the MSR(DE) was turned off.
2014	* Check all other debug flags and see if that bit needs to be turned
2015	* back on or not.
2016	*/
2017	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2018	current->thread.debug.dbcr1))
2019	regs_set_return_msr(regs, regs->msr \| MSR_DE);
2020	else
2021	/ Make sure the IDM flag is off /
2022	current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2023
2024	if (changed & `0x01`)
2025	mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
2026	}
2027
2028	DEFINE_INTERRUPT_HANDLER(DebugException)
2029	{
2030	unsigned long debug_status = regs->dsisr;
2031
2032	current->thread.debug.dbsr = debug_status;
2033
2034	/ Hack alert: On BookE, Branch Taken stops on the branch itself, while*
2035	* on server, it stops on the target of the branch. In order to simulate
2036	* the server behaviour, we thus restart right away with a single step
2037	* instead of stopping here when hitting a BT
2038	*/
2039	if (debug_status & DBSR_BT) {
2040	regs_set_return_msr(regs, regs->msr & ~MSR_DE);
2041
2042	/ Disable BT /
2043	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
2044	/ Clear the BT event /
2045	mtspr(SPRN_DBSR, DBSR_BT);
2046
2047	/ Do the single step trick only when coming from userspace /
2048	if (user_mode(regs)) {
2049	current->thread.debug.dbcr0 &= ~DBCR0_BT;
2050	current->thread.debug.dbcr0 \|= DBCR0_IDM \| DBCR0_IC;
2051	regs_set_return_msr(regs, regs->msr \| MSR_DE);
2052	return;
2053	}
2054
2055	if (kprobe_post_handler(regs))
2056	return;
2057
2058	if (notify_die(DIE_SSTEP, "block_step", regs, `5`,
2059	`5`, SIGTRAP) == NOTIFY_STOP) {
2060	return;
2061	}
2062	if (debugger_sstep(regs))
2063	return;
2064	} else if (debug_status & DBSR_IC) { / Instruction complete /
2065	regs_set_return_msr(regs, regs->msr & ~MSR_DE);
2066
2067	/ Disable instruction completion /
2068	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
2069	/ Clear the instruction completion event /
2070	mtspr(SPRN_DBSR, DBSR_IC);
2071
2072	if (kprobe_post_handler(regs))
2073	return;
2074
2075	if (notify_die(DIE_SSTEP, "single_step", regs, `5`,
2076	`5`, SIGTRAP) == NOTIFY_STOP) {
2077	return;
2078	}
2079
2080	if (debugger_sstep(regs))
2081	return;
2082
2083	if (user_mode(regs)) {
2084	current->thread.debug.dbcr0 &= ~DBCR0_IC;
2085	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2086	current->thread.debug.dbcr1))
2087	regs_set_return_msr(regs, regs->msr \| MSR_DE);
2088	else
2089	/ Make sure the IDM bit is off /
2090	current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2091	}
2092
2093	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
2094	} else
2095	handle_debug(regs, debug_status);
2096	}
2097	#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
2098
2099	#ifdef CONFIG_ALTIVEC
2100	DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
2101	{
2102	int err;
2103
2104	if (!user_mode(regs)) {
2105	printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
2106	" at %lx\n", regs->nip);
2107	die("Kernel VMX/Altivec assist exception", regs, SIGILL);
2108	}
2109
2110	flush_altivec_to_thread(current);
2111
2112	PPC_WARN_EMULATED(altivec, regs);
2113	err = emulate_altivec(regs);
2114	if (err == `0`) {
2115	regs_add_return_ip(regs, `4`); / skip emulated instruction /
2116	emulate_single_step(regs);
2117	return;
2118	}
2119
2120	if (err == -EFAULT) {
2121	/ got an error reading the instruction /
2122	_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2123	} else {
2124	/ didn't recognize the instruction /
2125	/ XXX quick hack for now: set the non-Java bit in the VSCR /
2126	printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
2127	"in %s at %lx\n", current->comm, regs->nip);
2128	current->thread.vr_state.vscr.u[`3`] \|= `0x10000`;
2129	}
2130	}
2131	#endif /* CONFIG_ALTIVEC */
2132
2133	#ifdef CONFIG_PPC_85xx
2134	DEFINE_INTERRUPT_HANDLER(CacheLockingException)
2135	{
2136	unsigned long error_code = regs->dsisr;
2137
2138	/ We treat cache locking instructions from the user*
2139	* as priv ops, in the future we could try to do
2140	* something smarter
2141	*/
2142	if (error_code & (ESR_DLK\|ESR_ILK))
2143	_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
2144	return;
2145	}
2146	#endif /* CONFIG_PPC_85xx */
2147
2148	#ifdef CONFIG_SPE
2149	DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
2150	{
2151	unsigned long spefscr;
2152	int fpexc_mode;
2153	int code = FPE_FLTUNK;
2154	int err;
2155
2156	interrupt_cond_local_irq_enable(regs);
2157
2158	flush_spe_to_thread(current);
2159
2160	spefscr = current->thread.spefscr;
2161	fpexc_mode = current->thread.fpexc_mode;
2162
2163	if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
2164	code = FPE_FLTOVF;
2165	}
2166	else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
2167	code = FPE_FLTUND;
2168	}
2169	else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
2170	code = FPE_FLTDIV;
2171	else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
2172	code = FPE_FLTINV;
2173	}
2174	else if ((spefscr & (SPEFSCR_FG \| SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
2175	code = FPE_FLTRES;
2176
2177	err = do_spe_mathemu(regs);
2178	if (err == `0`) {
2179	regs_add_return_ip(regs, `4`); / skip emulated instruction /
2180	emulate_single_step(regs);
2181	return;
2182	}
2183
2184	if (err == -EFAULT) {
2185	/ got an error reading the instruction /
2186	_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2187	} else if (err == -EINVAL) {
2188	/ didn't recognize the instruction /
2189	printk(KERN_ERR "unrecognized spe instruction "
2190	"in %s at %lx\n", current->comm, regs->nip);
2191	} else {
2192	_exception(SIGFPE, regs, code, regs->nip);
2193	}
2194
2195	return;
2196	}
2197
2198	DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
2199	{
2200	int err;
2201
2202	interrupt_cond_local_irq_enable(regs);
2203
2204	preempt_disable();
2205	if (regs->msr & MSR_SPE)
2206	giveup_spe(current);
2207	preempt_enable();
2208
2209	regs_add_return_ip(regs, -`4`);
2210	err = speround_handler(regs);
2211	if (err == `0`) {
2212	regs_add_return_ip(regs, `4`); / skip emulated instruction /
2213	emulate_single_step(regs);
2214	return;
2215	}
2216
2217	if (err == -EFAULT) {
2218	/ got an error reading the instruction /
2219	_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2220	} else if (err == -EINVAL) {
2221	/ didn't recognize the instruction /
2222	printk(KERN_ERR "unrecognized spe instruction "
2223	"in %s at %lx\n", current->comm, regs->nip);
2224	} else {
2225	_exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
2226	return;
2227	}
2228	}
2229	#endif
2230
2231	/*
2232	* We enter here if we get an unrecoverable exception, that is, one
2233	* that happened at a point where the RI (recoverable interrupt) bit
2234	* in the MSR is 0. This indicates that SRR0/1 are live, and that
2235	* we therefore lost state by taking this exception.
2236	*/
2237	void __noreturn unrecoverable_exception(struct pt_regs *regs)
2238	{
2239	pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2240	regs->trap, regs->nip, regs->msr);
2241	die(str: "Unrecoverable exception", regs, SIGABRT);
2242	/ die() should not return /
2243	for (;;)
2244	;
2245	}
2246
2247	#if defined(CONFIG_BOOKE_WDT) \|\| defined(CONFIG_40x)
2248	DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
2249	{
2250	printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
2251	mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
2252	return `0`;
2253	}
2254	#endif
2255
2256	/*
2257	* We enter here if we discover during exception entry that we are
2258	* running in supervisor mode with a userspace value in the stack pointer.
2259	*/
2260	DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
2261	{
2262	printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
2263	regs->gpr[`1`], regs->nip);
2264	die("Bad kernel stack pointer", regs, SIGABRT);
2265	}
2266
2267	#ifdef CONFIG_PPC_EMULATED_STATS
2268
2269	#define WARN_EMULATED_SETUP(type) .type = { .name = #type }
2270
2271	struct ppc_emulated ppc_emulated = {
2272	#ifdef CONFIG_ALTIVEC
2273	WARN_EMULATED_SETUP(altivec),
2274	#endif
2275	WARN_EMULATED_SETUP(dcba),
2276	WARN_EMULATED_SETUP(dcbz),
2277	WARN_EMULATED_SETUP(fp_pair),
2278	WARN_EMULATED_SETUP(isel),
2279	WARN_EMULATED_SETUP(mcrxr),
2280	WARN_EMULATED_SETUP(mfpvr),
2281	WARN_EMULATED_SETUP(multiple),
2282	WARN_EMULATED_SETUP(popcntb),
2283	WARN_EMULATED_SETUP(spe),
2284	WARN_EMULATED_SETUP(string),
2285	WARN_EMULATED_SETUP(sync),
2286	WARN_EMULATED_SETUP(unaligned),
2287	#ifdef CONFIG_MATH_EMULATION
2288	WARN_EMULATED_SETUP(math),
2289	#endif
2290	#ifdef CONFIG_VSX
2291	WARN_EMULATED_SETUP(vsx),
2292	#endif
2293	#ifdef CONFIG_PPC64
2294	WARN_EMULATED_SETUP(mfdscr),
2295	WARN_EMULATED_SETUP(mtdscr),
2296	WARN_EMULATED_SETUP(lq_stq),
2297	WARN_EMULATED_SETUP(lxvw4x),
2298	WARN_EMULATED_SETUP(lxvh8x),
2299	WARN_EMULATED_SETUP(lxvd2x),
2300	WARN_EMULATED_SETUP(lxvb16x),
2301	#endif
2302	};
2303
2304	u32 ppc_warn_emulated;
2305
2306	void ppc_warn_emulated_print(const char *type)
2307	{
2308	pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
2309	type);
2310	}
2311
2312	static int __init ppc_warn_emulated_init(void)
2313	{
2314	struct dentry *dir;
2315	unsigned int i;
2316	struct ppc_emulated_entry entries = (void* *)&ppc_emulated;
2317
2318	dir = debugfs_create_dir("emulated_instructions",
2319	arch_debugfs_dir);
2320
2321	debugfs_create_u32("do_warn", `0644`, dir, &ppc_warn_emulated);
2322
2323	for (i = `0`; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
2324	debugfs_create_u32(entries[i].name, `0644`, dir,
2325	(u32 *)&entries[i].val.counter);
2326
2327	return `0`;
2328	}
2329
2330	device_initcall(ppc_warn_emulated_init);
2331
2332	#endif /* CONFIG_PPC_EMULATED_STATS */
2333

source code of linux/arch/powerpc/kernel/traps.c