time.c source code [linux/arch/powerpc/kernel/time.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Common time routines among all ppc machines.
4	*
5	* Written by Cort Dougan (cort@cs.nmt.edu) to merge
6	* Paul Mackerras' version and mine for PReP and Pmac.
7	* MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
8	* Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
9	*
10	* First round of bugfixes by Gabriel Paubert (paubert@iram.es)
11	* to make clock more stable (2.4.0-test5). The only thing
12	* that this code assumes is that the timebases have been synchronized
13	* by firmware on SMP and are never stopped (never do sleep
14	* on SMP then, nap and doze are OK).
15	*
16	* Speeded up do_gettimeofday by getting rid of references to
17	* xtime (which required locks for consistency). (mikejc@us.ibm.com)
18	*
19	* TODO (not necessarily in this file):
20	* - improve precision and reproducibility of timebase frequency
21	* measurement at boot time.
22	* - for astronomical applications: add a new function to get
23	* non ambiguous timestamps even around leap seconds. This needs
24	* a new timestamp format and a good name.
25	*
26	* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
27	* "A Kernel Model for Precision Timekeeping" by Dave Mills
28	*/
29
30	#include <linux/errno.h>
31	#include <linux/export.h>
32	#include <linux/sched.h>
33	#include <linux/sched/clock.h>
34	#include <linux/sched/cputime.h>
35	#include <linux/kernel.h>
36	#include <linux/param.h>
37	#include <linux/string.h>
38	#include <linux/mm.h>
39	#include <linux/interrupt.h>
40	#include <linux/timex.h>
41	#include <linux/kernel_stat.h>
42	#include <linux/time.h>
43	#include <linux/init.h>
44	#include <linux/profile.h>
45	#include <linux/cpu.h>
46	#include <linux/security.h>
47	#include <linux/percpu.h>
48	#include <linux/rtc.h>
49	#include <linux/jiffies.h>
50	#include <linux/posix-timers.h>
51	#include <linux/irq.h>
52	#include <linux/delay.h>
53	#include <linux/irq_work.h>
54	#include <linux/of_clk.h>
55	#include <linux/suspend.h>
56	#include <linux/processor.h>
57	#include <linux/mc146818rtc.h>
58	#include <linux/platform_device.h>
59
60	#include <asm/trace.h>
61	#include <asm/interrupt.h>
62	#include <asm/io.h>
63	#include <asm/nvram.h>
64	#include <asm/cache.h>
65	#include <asm/machdep.h>
66	#include <linux/uaccess.h>
67	#include <asm/time.h>
68	#include <asm/irq.h>
69	#include <asm/div64.h>
70	#include <asm/smp.h>
71	#include <asm/vdso_datapage.h>
72	#include <asm/firmware.h>
73	#include <asm/mce.h>
74
75	/ powerpc clocksource/clockevent code /
76
77	#include <linux/clockchips.h>
78	#include <linux/timekeeper_internal.h>
79
80	static u64 timebase_read(struct clocksource *);
81	static struct clocksource clocksource_timebase = {
82	.name = "timebase",
83	.rating = `400`,
84	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
85	.mask = CLOCKSOURCE_MASK(`64`),
86	.read = timebase_read,
87	.vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
88	};
89
90	#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
91	u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
92	EXPORT_SYMBOL_GPL(decrementer_max); / for KVM HDEC /
93
94	static int decrementer_set_next_event(unsigned long evt,
95	struct clock_event_device *dev);
96	static int decrementer_shutdown(struct clock_event_device *evt);
97
98	struct clock_event_device decrementer_clockevent = {
99	.name = "decrementer",
100	.rating = `200`,
101	.irq = `0`,
102	.set_next_event = decrementer_set_next_event,
103	.set_state_oneshot_stopped = decrementer_shutdown,
104	.set_state_shutdown = decrementer_shutdown,
105	.tick_resume = decrementer_shutdown,
106	.features = CLOCK_EVT_FEAT_ONESHOT \|
107	CLOCK_EVT_FEAT_C3STOP,
108	};
109	EXPORT_SYMBOL(decrementer_clockevent);
110
111	/*
112	* This always puts next_tb beyond now, so the clock event will never fire
113	* with the usual comparison, no need for a separate test for stopped.
114	*/
115	#define DEC_CLOCKEVENT_STOPPED ~0ULL
116	DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
117	EXPORT_SYMBOL_GPL(decrementers_next_tb);
118	static DEFINE_PER_CPU(struct clock_event_device, decrementers);
119
120	#define XSEC_PER_SEC (1024*1024)
121
122	#ifdef CONFIG_PPC64
123	#define SCALE_XSEC(xsec, max) (((xsec) * max) / XSEC_PER_SEC)
124	#else
125	/ compute ((xsec << 12) * max) >> 32 /
126	#define SCALE_XSEC(xsec, max) mulhwu((xsec) << 12, max)
127	#endif
128
129	unsigned long tb_ticks_per_jiffy;
130	unsigned long tb_ticks_per_usec = `100`; / sane default /
131	EXPORT_SYMBOL(tb_ticks_per_usec);
132	unsigned long tb_ticks_per_sec;
133	EXPORT_SYMBOL(tb_ticks_per_sec); / for cputime conversions /
134
135	DEFINE_SPINLOCK(rtc_lock);
136	EXPORT_SYMBOL_GPL(rtc_lock);
137
138	static u64 tb_to_ns_scale __read_mostly;
139	static unsigned tb_to_ns_shift __read_mostly;
140	static u64 boot_tb __read_mostly;
141
142	extern struct timezone sys_tz;
143	static long timezone_offset;
144
145	unsigned long ppc_proc_freq;
146	EXPORT_SYMBOL_GPL(ppc_proc_freq);
147	unsigned long ppc_tb_freq;
148	EXPORT_SYMBOL_GPL(ppc_tb_freq);
149
150	bool tb_invalid;
151
152	#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
153	/*
154	* Read the SPURR on systems that have it, otherwise the PURR,
155	* or if that doesn't exist return the timebase value passed in.
156	*/
157	static inline unsigned long read_spurr(unsigned long tb)
158	{
159	if (cpu_has_feature(CPU_FTR_SPURR))
160	return mfspr(SPRN_SPURR);
161	if (cpu_has_feature(CPU_FTR_PURR))
162	return mfspr(SPRN_PURR);
163	return tb;
164	}
165
166	/*
167	* Account time for a transition between system, hard irq
168	* or soft irq state.
169	*/
170	static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
171	unsigned long now, unsigned long stime)
172	{
173	unsigned long stime_scaled = `0`;
174	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
175	unsigned long nowscaled, deltascaled;
176	unsigned long utime, utime_scaled;
177
178	nowscaled = read_spurr(now);
179	deltascaled = nowscaled - acct->startspurr;
180	acct->startspurr = nowscaled;
181	utime = acct->utime - acct->utime_sspurr;
182	acct->utime_sspurr = acct->utime;
183
184	/*
185	* Because we don't read the SPURR on every kernel entry/exit,
186	* deltascaled includes both user and system SPURR ticks.
187	* Apportion these ticks to system SPURR ticks and user
188	* SPURR ticks in the same ratio as the system time (delta)
189	* and user time (udelta) values obtained from the timebase
190	* over the same interval. The system ticks get accounted here;
191	* the user ticks get saved up in paca->user_time_scaled to be
192	* used by account_process_tick.
193	*/
194	stime_scaled = stime;
195	utime_scaled = utime;
196	if (deltascaled != stime + utime) {
197	if (utime) {
198	stime_scaled = deltascaled * stime / (stime + utime);
199	utime_scaled = deltascaled - stime_scaled;
200	} else {
201	stime_scaled = deltascaled;
202	}
203	}
204	acct->utime_scaled += utime_scaled;
205	#endif
206
207	return stime_scaled;
208	}
209
210	static unsigned long vtime_delta(struct cpu_accounting_data *acct,
211	unsigned long *stime_scaled,
212	unsigned long *steal_time)
213	{
214	unsigned long now, stime;
215
216	WARN_ON_ONCE(!irqs_disabled());
217
218	now = mftb();
219	stime = now - acct->starttime;
220	acct->starttime = now;
221
222	*stime_scaled = vtime_delta_scaled(acct, now, stime);
223
224	if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
225	firmware_has_feature(FW_FEATURE_SPLPAR))
226	*steal_time = pseries_calculate_stolen_time(now);
227	else
228	*steal_time = `0`;
229
230	return stime;
231	}
232
233	static void vtime_delta_kernel(struct cpu_accounting_data *acct,
234	unsigned long stime, unsigned* long *stime_scaled)
235	{
236	unsigned long steal_time;
237
238	*stime = vtime_delta(acct, stime_scaled, &steal_time);
239	stime -= min(stime, steal_time);
240	acct->steal_time += steal_time;
241	}
242
243	void vtime_account_kernel(struct task_struct *tsk)
244	{
245	struct cpu_accounting_data *acct = get_accounting(tsk);
246	unsigned long stime, stime_scaled;
247
248	vtime_delta_kernel(acct, &stime, &stime_scaled);
249
250	if (tsk->flags & PF_VCPU) {
251	acct->gtime += stime;
252	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
253	acct->utime_scaled += stime_scaled;
254	#endif
255	} else {
256	acct->stime += stime;
257	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
258	acct->stime_scaled += stime_scaled;
259	#endif
260	}
261	}
262	EXPORT_SYMBOL_GPL(vtime_account_kernel);
263
264	void vtime_account_idle(struct task_struct *tsk)
265	{
266	unsigned long stime, stime_scaled, steal_time;
267	struct cpu_accounting_data *acct = get_accounting(tsk);
268
269	stime = vtime_delta(acct, &stime_scaled, &steal_time);
270	acct->idle_time += stime + steal_time;
271	}
272
273	static void vtime_account_irq_field(struct cpu_accounting_data *acct,
274	unsigned long *field)
275	{
276	unsigned long stime, stime_scaled;
277
278	vtime_delta_kernel(acct, &stime, &stime_scaled);
279	*field += stime;
280	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
281	acct->stime_scaled += stime_scaled;
282	#endif
283	}
284
285	void vtime_account_softirq(struct task_struct *tsk)
286	{
287	struct cpu_accounting_data *acct = get_accounting(tsk);
288	vtime_account_irq_field(acct, &acct->softirq_time);
289	}
290
291	void vtime_account_hardirq(struct task_struct *tsk)
292	{
293	struct cpu_accounting_data *acct = get_accounting(tsk);
294	vtime_account_irq_field(acct, &acct->hardirq_time);
295	}
296
297	static void vtime_flush_scaled(struct task_struct *tsk,
298	struct cpu_accounting_data *acct)
299	{
300	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
301	if (acct->utime_scaled)
302	tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
303	if (acct->stime_scaled)
304	tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
305
306	acct->utime_scaled = `0`;
307	acct->utime_sspurr = `0`;
308	acct->stime_scaled = `0`;
309	#endif
310	}
311
312	/*
313	* Account the whole cputime accumulated in the paca
314	* Must be called with interrupts disabled.
315	* Assumes that vtime_account_kernel/idle() has been called
316	* recently (i.e. since the last entry from usermode) so that
317	* get_paca()->user_time_scaled is up to date.
318	*/
319	void vtime_flush(struct task_struct *tsk)
320	{
321	struct cpu_accounting_data *acct = get_accounting(tsk);
322
323	if (acct->utime)
324	account_user_time(tsk, cputime_to_nsecs(acct->utime));
325
326	if (acct->gtime)
327	account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
328
329	if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
330	account_steal_time(cputime_to_nsecs(acct->steal_time));
331	acct->steal_time = `0`;
332	}
333
334	if (acct->idle_time)
335	account_idle_time(cputime_to_nsecs(acct->idle_time));
336
337	if (acct->stime)
338	account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
339	CPUTIME_SYSTEM);
340
341	if (acct->hardirq_time)
342	account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
343	CPUTIME_IRQ);
344	if (acct->softirq_time)
345	account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
346	CPUTIME_SOFTIRQ);
347
348	vtime_flush_scaled(tsk, acct);
349
350	acct->utime = `0`;
351	acct->gtime = `0`;
352	acct->idle_time = `0`;
353	acct->stime = `0`;
354	acct->hardirq_time = `0`;
355	acct->softirq_time = `0`;
356	}
357	#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
358
359	void __no_kcsan __delay(unsigned long loops)
360	{
361	unsigned long start;
362
363	spin_begin();
364	if (tb_invalid) {
365	/*
366	* TB is in error state and isn't ticking anymore.
367	* HMI handler was unable to recover from TB error.
368	* Return immediately, so that kernel won't get stuck here.
369	*/
370	spin_cpu_relax();
371	} else {
372	start = mftb();
373	while (mftb() - start < loops)
374	spin_cpu_relax();
375	}
376	spin_end();
377	}
378	EXPORT_SYMBOL(__delay);
379
380	void __no_kcsan udelay(unsigned long usecs)
381	{
382	__delay(tb_ticks_per_usec * usecs);
383	}
384	EXPORT_SYMBOL(udelay);
385
386	#ifdef CONFIG_SMP
387	unsigned long profile_pc(struct pt_regs *regs)
388	{
389	unsigned long pc = instruction_pointer(regs);
390
391	if (in_lock_functions(addr: pc))
392	return regs->link;
393
394	return pc;
395	}
396	EXPORT_SYMBOL(profile_pc);
397	#endif
398
399	#ifdef CONFIG_IRQ_WORK
400
401	/*
402	* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
403	*/
404	#ifdef CONFIG_PPC64
405	static inline unsigned long test_irq_work_pending(void)
406	{
407	unsigned long x;
408
409	asm volatile("lbz %0,%1(13)"
410	: "=r" (x)
411	: "i" (offsetof(struct paca_struct, irq_work_pending)));
412	return x;
413	}
414
415	static inline void set_irq_work_pending_flag(void)
416	{
417	asm volatile("stb %0,%1(13)" : :
418	"r" (`1`),
419	"i" (offsetof(struct paca_struct, irq_work_pending)));
420	}
421
422	static inline void clear_irq_work_pending(void)
423	{
424	asm volatile("stb %0,%1(13)" : :
425	"r" (`0`),
426	"i" (offsetof(struct paca_struct, irq_work_pending)));
427	}
428
429	#else /* 32-bit */
430
431	DEFINE_PER_CPU(u8, irq_work_pending);
432
433	#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1)
434	#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
435	#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
436
437	#endif /* 32 vs 64 bit */
438
439	void arch_irq_work_raise(void)
440	{
441	/*
442	* 64-bit code that uses irq soft-mask can just cause an immediate
443	* interrupt here that gets soft masked, if this is called under
444	* local_irq_disable(). It might be possible to prevent that happening
445	* by noticing interrupts are disabled and setting decrementer pending
446	* to be replayed when irqs are enabled. The problem there is that
447	* tracing can call irq_work_raise, including in code that does low
448	* level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
449	* which could get tangled up if we're messing with the same state
450	* here.
451	*/
452	preempt_disable();
453	set_irq_work_pending_flag();
454	set_dec(`1`);
455	preempt_enable();
456	}
457
458	static void set_dec_or_work(u64 val)
459	{
460	set_dec(val);
461	/ We may have raced with new irq work /
462	if (unlikely(test_irq_work_pending()))
463	set_dec(`1`);
464	}
465
466	#else /* CONFIG_IRQ_WORK */
467
468	#define test_irq_work_pending() 0
469	#define clear_irq_work_pending()
470
471	static void set_dec_or_work(u64 val)
472	{
473	set_dec(val);
474	}
475	#endif /* CONFIG_IRQ_WORK */
476
477	#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
478	void timer_rearm_host_dec(u64 now)
479	{
480	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
481
482	WARN_ON_ONCE(!arch_irqs_disabled());
483	WARN_ON_ONCE(mfmsr() & MSR_EE);
484
485	if (now >= *next_tb) {
486	local_paca->irq_happened \|= PACA_IRQ_DEC;
487	} else {
488	now = *next_tb - now;
489	if (now > decrementer_max)
490	now = decrementer_max;
491	set_dec_or_work(now);
492	}
493	}
494	EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
495	#endif
496
497	/*
498	* timer_interrupt - gets called when the decrementer overflows,
499	* with interrupts disabled.
500	*/
501	DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
502	{
503	struct clock_event_device *evt = this_cpu_ptr(&decrementers);
504	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
505	struct pt_regs *old_regs;
506	u64 now;
507
508	/*
509	* Some implementations of hotplug will get timer interrupts while
510	* offline, just ignore these.
511	*/
512	if (unlikely(!cpu_online(smp_processor_id()))) {
513	set_dec(decrementer_max);
514	return;
515	}
516
517	/ Conditionally hard-enable interrupts. /
518	if (should_hard_irq_enable(regs)) {
519	/*
520	* Ensure a positive value is written to the decrementer, or
521	* else some CPUs will continue to take decrementer exceptions.
522	* When the PPC_WATCHDOG (decrementer based) is configured,
523	* keep this at most 31 bits, which is about 4 seconds on most
524	* systems, which gives the watchdog a chance of catching timer
525	* interrupt hard lockups.
526	*/
527	if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
528	set_dec(`0x7fffffff`);
529	else
530	set_dec(decrementer_max);
531
532	do_hard_irq_enable();
533	}
534
535	#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
536	if (atomic_read(&ppc_n_lost_interrupts) != `0`)
537	__do_IRQ(regs);
538	#endif
539
540	old_regs = set_irq_regs(regs);
541
542	trace_timer_interrupt_entry(regs);
543
544	if (test_irq_work_pending()) {
545	clear_irq_work_pending();
546	mce_run_irq_context_handlers();
547	irq_work_run();
548	}
549
550	now = get_tb();
551	if (now >= *next_tb) {
552	evt->event_handler(evt);
553	__this_cpu_inc(irq_stat.timer_irqs_event);
554	} else {
555	now = *next_tb - now;
556	if (now > decrementer_max)
557	now = decrementer_max;
558	set_dec_or_work(now);
559	__this_cpu_inc(irq_stat.timer_irqs_others);
560	}
561
562	trace_timer_interrupt_exit(regs);
563
564	set_irq_regs(old_regs);
565	}
566	EXPORT_SYMBOL(timer_interrupt);
567
568	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
569	void timer_broadcast_interrupt(void)
570	{
571	tick_receive_broadcast();
572	__this_cpu_inc(irq_stat.broadcast_irqs_event);
573	}
574	#endif
575
576	#ifdef CONFIG_SUSPEND
577	/ Overrides the weak version in kernel/power/main.c /
578	void arch_suspend_disable_irqs(void)
579	{
580	if (ppc_md.suspend_disable_irqs)
581	ppc_md.suspend_disable_irqs();
582
583	/ Disable the decrementer, so that it doesn't interfere*
584	* with suspending.
585	*/
586
587	set_dec(decrementer_max);
588	local_irq_disable();
589	set_dec(decrementer_max);
590	}
591
592	/ Overrides the weak version in kernel/power/main.c /
593	void arch_suspend_enable_irqs(void)
594	{
595	local_irq_enable();
596
597	if (ppc_md.suspend_enable_irqs)
598	ppc_md.suspend_enable_irqs();
599	}
600	#endif
601
602	unsigned long long tb_to_ns(unsigned long long ticks)
603	{
604	return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
605	}
606	EXPORT_SYMBOL_GPL(tb_to_ns);
607
608	/*
609	* Scheduler clock - returns current time in nanosec units.
610	*
611	* Note: mulhdu(a, b) (multiply high double unsigned) returns
612	* the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
613	* are 64-bit unsigned numbers.
614	*/
615	notrace unsigned long long sched_clock(void)
616	{
617	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
618	}
619
620
621	#ifdef CONFIG_PPC_PSERIES
622
623	/*
624	* Running clock - attempts to give a view of time passing for a virtualised
625	* kernels.
626	* Uses the VTB register if available otherwise a next best guess.
627	*/
628	unsigned long long running_clock(void)
629	{
630	/*
631	* Don't read the VTB as a host since KVM does not switch in host
632	* timebase into the VTB when it takes a guest off the CPU, reading the
633	* VTB would result in reading 'last switched out' guest VTB.
634	*
635	* Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it
636	* would be unsafe to rely only on the #ifdef above.
637	*/
638	if (firmware_has_feature(FW_FEATURE_LPAR) &&
639	cpu_has_feature(CPU_FTR_ARCH_207S))
640	return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
641
642	/*
643	* This is a next best approximation without a VTB.
644	* On a host which is running bare metal there should never be any stolen
645	* time and on a host which doesn't do any virtualisation TB should equal
646	* VTB so it makes no difference anyway.
647	*/
648	return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL];
649	}
650	#endif
651
652	static int __init get_freq(char name, int* cells, unsigned long *val)
653	{
654	struct device_node *cpu;
655	const __be32 *fp;
656	int found = `0`;
657
658	/ The cpu node should have timebase and clock frequency properties /
659	cpu = of_find_node_by_type(NULL, type: "cpu");
660
661	if (cpu) {
662	fp = of_get_property(node: cpu, name, NULL);
663	if (fp) {
664	found = `1`;
665	*val = of_read_ulong(cell: fp, size: cells);
666	}
667
668	of_node_put(node: cpu);
669	}
670
671	return found;
672	}
673
674	static void start_cpu_decrementer(void)
675	{
676	#ifdef CONFIG_BOOKE_OR_40x
677	unsigned int tcr;
678
679	/ Clear any pending timer interrupts /
680	mtspr(SPRN_TSR, TSR_ENW \| TSR_WIS \| TSR_DIS \| TSR_FIS);
681
682	tcr = mfspr(SPRN_TCR);
683	/*
684	* The watchdog may have already been enabled by u-boot. So leave
685	* TRC[WP] (Watchdog Period) alone.
686	*/
687	tcr &= TCR_WP_MASK; / Clear all bits except for TCR[WP] /
688	tcr \|= TCR_DIE; / Enable decrementer /
689	mtspr(SPRN_TCR, tcr);
690	#endif
691	}
692
693	void __init generic_calibrate_decr(void)
694	{
695	ppc_tb_freq = DEFAULT_TB_FREQ; / hardcoded default /
696
697	if (!get_freq(name: "ibm,extended-timebase-frequency", cells: `2`, val: &ppc_tb_freq) &&
698	!get_freq(name: "timebase-frequency", cells: `1`, val: &ppc_tb_freq)) {
699
700	printk(KERN_ERR "WARNING: Estimating decrementer frequency "
701	"(not found)\n");
702	}
703
704	ppc_proc_freq = DEFAULT_PROC_FREQ; / hardcoded default /
705
706	if (!get_freq(name: "ibm,extended-clock-frequency", cells: `2`, val: &ppc_proc_freq) &&
707	!get_freq(name: "clock-frequency", cells: `1`, val: &ppc_proc_freq)) {
708
709	printk(KERN_ERR "WARNING: Estimating processor frequency "
710	"(not found)\n");
711	}
712	}
713
714	int update_persistent_clock64(struct timespec64 now)
715	{
716	struct rtc_time tm;
717
718	if (!ppc_md.set_rtc_time)
719	return -ENODEV;
720
721	rtc_time64_to_tm(time: now.tv_sec + `1` + timezone_offset, tm: &tm);
722
723	return ppc_md.set_rtc_time(&tm);
724	}
725
726	static void __read_persistent_clock(struct timespec64 *ts)
727	{
728	struct rtc_time tm;
729	static int first = `1`;
730
731	ts->tv_nsec = `0`;
732	/ XXX this is a little fragile but will work okay in the short term /
733	if (first) {
734	first = `0`;
735	if (ppc_md.time_init)
736	timezone_offset = ppc_md.time_init();
737
738	/ get_boot_time() isn't guaranteed to be safe to call late /
739	if (ppc_md.get_boot_time) {
740	ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
741	return;
742	}
743	}
744	if (!ppc_md.get_rtc_time) {
745	ts->tv_sec = `0`;
746	return;
747	}
748	ppc_md.get_rtc_time(&tm);
749
750	ts->tv_sec = rtc_tm_to_time64(tm: &tm);
751	}
752
753	void read_persistent_clock64(struct timespec64 *ts)
754	{
755	__read_persistent_clock(ts);
756
757	/ Sanitize it in case real time clock is set below EPOCH /
758	if (ts->tv_sec < `0`) {
759	ts->tv_sec = `0`;
760	ts->tv_nsec = `0`;
761	}
762
763	}
764
765	/ clocksource code /
766	static notrace u64 timebase_read(struct clocksource *cs)
767	{
768	return (u64)get_tb();
769	}
770
771	static void __init clocksource_init(void)
772	{
773	struct clocksource *clock = &clocksource_timebase;
774
775	if (clocksource_register_hz(cs: clock, hz: tb_ticks_per_sec)) {
776	printk(KERN_ERR "clocksource: %s is already registered\n",
777	clock->name);
778	return;
779	}
780
781	printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
782	clock->name, clock->mult, clock->shift);
783	}
784
785	static int decrementer_set_next_event(unsigned long evt,
786	struct clock_event_device *dev)
787	{
788	__this_cpu_write(decrementers_next_tb, get_tb() + evt);
789	set_dec_or_work(evt);
790
791	return `0`;
792	}
793
794	static int decrementer_shutdown(struct clock_event_device *dev)
795	{
796	__this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED);
797	set_dec_or_work(decrementer_max);
798
799	return `0`;
800	}
801
802	static void register_decrementer_clockevent(int cpu)
803	{
804	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
805
806	*dec = decrementer_clockevent;
807	dec->cpumask = cpumask_of(cpu);
808
809	clockevents_config_and_register(dev: dec, freq: ppc_tb_freq, min_delta: `2`, max_delta: decrementer_max);
810
811	printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
812	dec->name, dec->mult, dec->shift, cpu);
813
814	/ Set values for KVM, see kvm_emulate_dec() /
815	decrementer_clockevent.mult = dec->mult;
816	decrementer_clockevent.shift = dec->shift;
817	}
818
819	static void enable_large_decrementer(void)
820	{
821	if (!cpu_has_feature(CPU_FTR_ARCH_300))
822	return;
823
824	if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
825	return;
826
827	/*
828	* If we're running as the hypervisor we need to enable the LD manually
829	* otherwise firmware should have done it for us.
830	*/
831	if (cpu_has_feature(CPU_FTR_HVMODE))
832	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) \| LPCR_LD);
833	}
834
835	static void __init set_decrementer_max(void)
836	{
837	struct device_node *cpu;
838	u32 bits = `32`;
839
840	/ Prior to ISAv3 the decrementer is always 32 bit /
841	if (!cpu_has_feature(CPU_FTR_ARCH_300))
842	return;
843
844	cpu = of_find_node_by_type(NULL, type: "cpu");
845
846	if (of_property_read_u32(np: cpu, propname: "ibm,dec-bits", out_value: &bits) == `0`) {
847	if (bits > `64` \|\| bits < `32`) {
848	pr_warn("time_init: firmware supplied invalid ibm,dec-bits");
849	bits = `32`;
850	}
851
852	/ calculate the signed maximum given this many bits /
853	decrementer_max = (`1ul` << (bits - `1`)) - `1`;
854	}
855
856	of_node_put(node: cpu);
857
858	pr_info("time_init: %u bit decrementer (max: %llx)\n",
859	bits, decrementer_max);
860	}
861
862	static void __init init_decrementer_clockevent(void)
863	{
864	register_decrementer_clockevent(smp_processor_id());
865	}
866
867	void secondary_cpu_time_init(void)
868	{
869	/ Enable and test the large decrementer for this cpu /
870	enable_large_decrementer();
871
872	/ Start the decrementer on CPUs that have manual control*
873	* such as BookE
874	*/
875	start_cpu_decrementer();
876
877	/ FIME: Should make unrelated change to move snapshot_timebase*
878	* call here ! */
879	register_decrementer_clockevent(smp_processor_id());
880	}
881
882	/ This function is only called on the boot processor /
883	void __init time_init(void)
884	{
885	struct div_result res;
886	u64 scale;
887	unsigned shift;
888
889	/ Normal PowerPC with timebase register /
890	if (ppc_md.calibrate_decr)
891	ppc_md.calibrate_decr();
892	else
893	generic_calibrate_decr();
894
895	printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
896	ppc_tb_freq / `1000000`, ppc_tb_freq % `1000000`);
897	printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
898	ppc_proc_freq / `1000000`, ppc_proc_freq % `1000000`);
899
900	tb_ticks_per_jiffy = ppc_tb_freq / HZ;
901	tb_ticks_per_sec = ppc_tb_freq;
902	tb_ticks_per_usec = ppc_tb_freq / `1000000`;
903
904	/*
905	* Compute scale factor for sched_clock.
906	* The calibrate_decr() function has set tb_ticks_per_sec,
907	* which is the timebase frequency.
908	* We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
909	* the 128-bit result as a 64.64 fixed-point number.
910	* We then shift that number right until it is less than 1.0,
911	* giving us the scale factor and shift count to use in
912	* sched_clock().
913	*/
914	div128_by_32(`1000000000`, `0`, tb_ticks_per_sec, &res);
915	scale = res.result_low;
916	for (shift = `0`; res.result_high != `0`; ++shift) {
917	scale = (scale >> `1`) \| (res.result_high << `63`);
918	res.result_high >>= `1`;
919	}
920	tb_to_ns_scale = scale;
921	tb_to_ns_shift = shift;
922	/ Save the current timebase to pretty up CONFIG_PRINTK_TIME /
923	boot_tb = get_tb();
924
925	/ If platform provided a timezone (pmac), we correct the time /
926	if (timezone_offset) {
927	sys_tz.tz_minuteswest = -timezone_offset / `60`;
928	sys_tz.tz_dsttime = `0`;
929	}
930
931	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
932
933	/ initialise and enable the large decrementer (if we have one) /
934	set_decrementer_max();
935	enable_large_decrementer();
936
937	/ Start the decrementer on CPUs that have manual control*
938	* such as BookE
939	*/
940	start_cpu_decrementer();
941
942	/ Register the clocksource /
943	clocksource_init();
944
945	init_decrementer_clockevent();
946	tick_setup_hrtimer_broadcast();
947
948	of_clk_init(NULL);
949	enable_sched_clock_irqtime();
950	}
951
952	/*
953	* Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
954	* result.
955	*/
956	void div128_by_32(u64 dividend_high, u64 dividend_low,
957	unsigned divisor, struct div_result *dr)
958	{
959	unsigned long a, b, c, d;
960	unsigned long w, x, y, z;
961	u64 ra, rb, rc;
962
963	a = dividend_high >> `32`;
964	b = dividend_high & `0xffffffff`;
965	c = dividend_low >> `32`;
966	d = dividend_low & `0xffffffff`;
967
968	w = a / divisor;
969	ra = ((u64)(a - (w * divisor)) << `32`) + b;
970
971	rb = ((u64) do_div(ra, divisor) << `32`) + c;
972	x = ra;
973
974	rc = ((u64) do_div(rb, divisor) << `32`) + d;
975	y = rb;
976
977	do_div(rc, divisor);
978	z = rc;
979
980	dr->result_high = ((u64)w << `32`) + x;
981	dr->result_low = ((u64)y << `32`) + z;
982
983	}
984
985	/ We don't need to calibrate delay, we use the CPU timebase for that /
986	void calibrate_delay(void)
987	{
988	/ Some generic code (such as spinlock debug) use loops_per_jiffy*
989	* as the number of __delay(1) in a jiffy, so make it so
990	*/
991	loops_per_jiffy = tb_ticks_per_jiffy;
992	}
993
994	#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
995	static int rtc_generic_get_time(struct device dev, struct* rtc_time *tm)
996	{
997	ppc_md.get_rtc_time(tm);
998	return `0`;
999	}
1000
1001	static int rtc_generic_set_time(struct device dev, struct* rtc_time *tm)
1002	{
1003	if (!ppc_md.set_rtc_time)
1004	return -EOPNOTSUPP;
1005
1006	if (ppc_md.set_rtc_time(tm) < `0`)
1007	return -EOPNOTSUPP;
1008
1009	return `0`;
1010	}
1011
1012	static const struct rtc_class_ops rtc_generic_ops = {
1013	.read_time = rtc_generic_get_time,
1014	.set_time = rtc_generic_set_time,
1015	};
1016
1017	static int __init rtc_init(void)
1018	{
1019	struct platform_device *pdev;
1020
1021	if (!ppc_md.get_rtc_time)
1022	return -ENODEV;
1023
1024	pdev = platform_device_register_data(NULL, name: "rtc-generic", id: -`1`,
1025	data: &rtc_generic_ops,
1026	size: sizeof(rtc_generic_ops));
1027
1028	return PTR_ERR_OR_ZERO(ptr: pdev);
1029	}
1030
1031	device_initcall(rtc_init);
1032	#endif
1033

source code of linux/arch/powerpc/kernel/time.c