1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Common time routines among all ppc machines. |
4 | * |
5 | * Written by Cort Dougan (cort@cs.nmt.edu) to merge |
6 | * Paul Mackerras' version and mine for PReP and Pmac. |
7 | * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). |
8 | * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) |
9 | * |
10 | * First round of bugfixes by Gabriel Paubert (paubert@iram.es) |
11 | * to make clock more stable (2.4.0-test5). The only thing |
12 | * that this code assumes is that the timebases have been synchronized |
13 | * by firmware on SMP and are never stopped (never do sleep |
14 | * on SMP then, nap and doze are OK). |
15 | * |
16 | * Speeded up do_gettimeofday by getting rid of references to |
17 | * xtime (which required locks for consistency). (mikejc@us.ibm.com) |
18 | * |
19 | * TODO (not necessarily in this file): |
20 | * - improve precision and reproducibility of timebase frequency |
21 | * measurement at boot time. |
22 | * - for astronomical applications: add a new function to get |
23 | * non ambiguous timestamps even around leap seconds. This needs |
24 | * a new timestamp format and a good name. |
25 | * |
26 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 |
27 | * "A Kernel Model for Precision Timekeeping" by Dave Mills |
28 | */ |
29 | |
30 | #include <linux/errno.h> |
31 | #include <linux/export.h> |
32 | #include <linux/sched.h> |
33 | #include <linux/sched/clock.h> |
34 | #include <linux/sched/cputime.h> |
35 | #include <linux/kernel.h> |
36 | #include <linux/param.h> |
37 | #include <linux/string.h> |
38 | #include <linux/mm.h> |
39 | #include <linux/interrupt.h> |
40 | #include <linux/timex.h> |
41 | #include <linux/kernel_stat.h> |
42 | #include <linux/time.h> |
43 | #include <linux/init.h> |
44 | #include <linux/profile.h> |
45 | #include <linux/cpu.h> |
46 | #include <linux/security.h> |
47 | #include <linux/percpu.h> |
48 | #include <linux/rtc.h> |
49 | #include <linux/jiffies.h> |
50 | #include <linux/posix-timers.h> |
51 | #include <linux/irq.h> |
52 | #include <linux/delay.h> |
53 | #include <linux/irq_work.h> |
54 | #include <linux/of_clk.h> |
55 | #include <linux/suspend.h> |
56 | #include <linux/processor.h> |
57 | #include <linux/mc146818rtc.h> |
58 | #include <linux/platform_device.h> |
59 | |
60 | #include <asm/trace.h> |
61 | #include <asm/interrupt.h> |
62 | #include <asm/io.h> |
63 | #include <asm/nvram.h> |
64 | #include <asm/cache.h> |
65 | #include <asm/machdep.h> |
66 | #include <linux/uaccess.h> |
67 | #include <asm/time.h> |
68 | #include <asm/irq.h> |
69 | #include <asm/div64.h> |
70 | #include <asm/smp.h> |
71 | #include <asm/vdso_datapage.h> |
72 | #include <asm/firmware.h> |
73 | #include <asm/mce.h> |
74 | |
75 | /* powerpc clocksource/clockevent code */ |
76 | |
77 | #include <linux/clockchips.h> |
78 | #include <linux/timekeeper_internal.h> |
79 | |
80 | static u64 timebase_read(struct clocksource *); |
81 | static struct clocksource clocksource_timebase = { |
82 | .name = "timebase" , |
83 | .rating = 400, |
84 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
85 | .mask = CLOCKSOURCE_MASK(64), |
86 | .read = timebase_read, |
87 | .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, |
88 | }; |
89 | |
90 | #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF |
91 | u64 decrementer_max = DECREMENTER_DEFAULT_MAX; |
92 | EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */ |
93 | |
94 | static int decrementer_set_next_event(unsigned long evt, |
95 | struct clock_event_device *dev); |
96 | static int decrementer_shutdown(struct clock_event_device *evt); |
97 | |
98 | struct clock_event_device decrementer_clockevent = { |
99 | .name = "decrementer" , |
100 | .rating = 200, |
101 | .irq = 0, |
102 | .set_next_event = decrementer_set_next_event, |
103 | .set_state_oneshot_stopped = decrementer_shutdown, |
104 | .set_state_shutdown = decrementer_shutdown, |
105 | .tick_resume = decrementer_shutdown, |
106 | .features = CLOCK_EVT_FEAT_ONESHOT | |
107 | CLOCK_EVT_FEAT_C3STOP, |
108 | }; |
109 | EXPORT_SYMBOL(decrementer_clockevent); |
110 | |
111 | /* |
112 | * This always puts next_tb beyond now, so the clock event will never fire |
113 | * with the usual comparison, no need for a separate test for stopped. |
114 | */ |
115 | #define DEC_CLOCKEVENT_STOPPED ~0ULL |
116 | DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED; |
117 | EXPORT_SYMBOL_GPL(decrementers_next_tb); |
118 | static DEFINE_PER_CPU(struct clock_event_device, decrementers); |
119 | |
120 | #define XSEC_PER_SEC (1024*1024) |
121 | |
122 | #ifdef CONFIG_PPC64 |
123 | #define SCALE_XSEC(xsec, max) (((xsec) * max) / XSEC_PER_SEC) |
124 | #else |
125 | /* compute ((xsec << 12) * max) >> 32 */ |
126 | #define SCALE_XSEC(xsec, max) mulhwu((xsec) << 12, max) |
127 | #endif |
128 | |
129 | unsigned long tb_ticks_per_jiffy; |
130 | unsigned long tb_ticks_per_usec = 100; /* sane default */ |
131 | EXPORT_SYMBOL(tb_ticks_per_usec); |
132 | unsigned long tb_ticks_per_sec; |
133 | EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */ |
134 | |
135 | DEFINE_SPINLOCK(rtc_lock); |
136 | EXPORT_SYMBOL_GPL(rtc_lock); |
137 | |
138 | static u64 tb_to_ns_scale __read_mostly; |
139 | static unsigned tb_to_ns_shift __read_mostly; |
140 | static u64 boot_tb __read_mostly; |
141 | |
142 | extern struct timezone sys_tz; |
143 | static long timezone_offset; |
144 | |
145 | unsigned long ppc_proc_freq; |
146 | EXPORT_SYMBOL_GPL(ppc_proc_freq); |
147 | unsigned long ppc_tb_freq; |
148 | EXPORT_SYMBOL_GPL(ppc_tb_freq); |
149 | |
150 | bool tb_invalid; |
151 | |
152 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
153 | /* |
154 | * Read the SPURR on systems that have it, otherwise the PURR, |
155 | * or if that doesn't exist return the timebase value passed in. |
156 | */ |
157 | static inline unsigned long read_spurr(unsigned long tb) |
158 | { |
159 | if (cpu_has_feature(CPU_FTR_SPURR)) |
160 | return mfspr(SPRN_SPURR); |
161 | if (cpu_has_feature(CPU_FTR_PURR)) |
162 | return mfspr(SPRN_PURR); |
163 | return tb; |
164 | } |
165 | |
166 | /* |
167 | * Account time for a transition between system, hard irq |
168 | * or soft irq state. |
169 | */ |
170 | static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct, |
171 | unsigned long now, unsigned long stime) |
172 | { |
173 | unsigned long stime_scaled = 0; |
174 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
175 | unsigned long nowscaled, deltascaled; |
176 | unsigned long utime, utime_scaled; |
177 | |
178 | nowscaled = read_spurr(now); |
179 | deltascaled = nowscaled - acct->startspurr; |
180 | acct->startspurr = nowscaled; |
181 | utime = acct->utime - acct->utime_sspurr; |
182 | acct->utime_sspurr = acct->utime; |
183 | |
184 | /* |
185 | * Because we don't read the SPURR on every kernel entry/exit, |
186 | * deltascaled includes both user and system SPURR ticks. |
187 | * Apportion these ticks to system SPURR ticks and user |
188 | * SPURR ticks in the same ratio as the system time (delta) |
189 | * and user time (udelta) values obtained from the timebase |
190 | * over the same interval. The system ticks get accounted here; |
191 | * the user ticks get saved up in paca->user_time_scaled to be |
192 | * used by account_process_tick. |
193 | */ |
194 | stime_scaled = stime; |
195 | utime_scaled = utime; |
196 | if (deltascaled != stime + utime) { |
197 | if (utime) { |
198 | stime_scaled = deltascaled * stime / (stime + utime); |
199 | utime_scaled = deltascaled - stime_scaled; |
200 | } else { |
201 | stime_scaled = deltascaled; |
202 | } |
203 | } |
204 | acct->utime_scaled += utime_scaled; |
205 | #endif |
206 | |
207 | return stime_scaled; |
208 | } |
209 | |
210 | static unsigned long vtime_delta(struct cpu_accounting_data *acct, |
211 | unsigned long *stime_scaled, |
212 | unsigned long *steal_time) |
213 | { |
214 | unsigned long now, stime; |
215 | |
216 | WARN_ON_ONCE(!irqs_disabled()); |
217 | |
218 | now = mftb(); |
219 | stime = now - acct->starttime; |
220 | acct->starttime = now; |
221 | |
222 | *stime_scaled = vtime_delta_scaled(acct, now, stime); |
223 | |
224 | if (IS_ENABLED(CONFIG_PPC_SPLPAR) && |
225 | firmware_has_feature(FW_FEATURE_SPLPAR)) |
226 | *steal_time = pseries_calculate_stolen_time(now); |
227 | else |
228 | *steal_time = 0; |
229 | |
230 | return stime; |
231 | } |
232 | |
233 | static void vtime_delta_kernel(struct cpu_accounting_data *acct, |
234 | unsigned long *stime, unsigned long *stime_scaled) |
235 | { |
236 | unsigned long steal_time; |
237 | |
238 | *stime = vtime_delta(acct, stime_scaled, &steal_time); |
239 | *stime -= min(*stime, steal_time); |
240 | acct->steal_time += steal_time; |
241 | } |
242 | |
243 | void vtime_account_kernel(struct task_struct *tsk) |
244 | { |
245 | struct cpu_accounting_data *acct = get_accounting(tsk); |
246 | unsigned long stime, stime_scaled; |
247 | |
248 | vtime_delta_kernel(acct, &stime, &stime_scaled); |
249 | |
250 | if (tsk->flags & PF_VCPU) { |
251 | acct->gtime += stime; |
252 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
253 | acct->utime_scaled += stime_scaled; |
254 | #endif |
255 | } else { |
256 | acct->stime += stime; |
257 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
258 | acct->stime_scaled += stime_scaled; |
259 | #endif |
260 | } |
261 | } |
262 | EXPORT_SYMBOL_GPL(vtime_account_kernel); |
263 | |
264 | void vtime_account_idle(struct task_struct *tsk) |
265 | { |
266 | unsigned long stime, stime_scaled, steal_time; |
267 | struct cpu_accounting_data *acct = get_accounting(tsk); |
268 | |
269 | stime = vtime_delta(acct, &stime_scaled, &steal_time); |
270 | acct->idle_time += stime + steal_time; |
271 | } |
272 | |
273 | static void vtime_account_irq_field(struct cpu_accounting_data *acct, |
274 | unsigned long *field) |
275 | { |
276 | unsigned long stime, stime_scaled; |
277 | |
278 | vtime_delta_kernel(acct, &stime, &stime_scaled); |
279 | *field += stime; |
280 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
281 | acct->stime_scaled += stime_scaled; |
282 | #endif |
283 | } |
284 | |
285 | void vtime_account_softirq(struct task_struct *tsk) |
286 | { |
287 | struct cpu_accounting_data *acct = get_accounting(tsk); |
288 | vtime_account_irq_field(acct, &acct->softirq_time); |
289 | } |
290 | |
291 | void vtime_account_hardirq(struct task_struct *tsk) |
292 | { |
293 | struct cpu_accounting_data *acct = get_accounting(tsk); |
294 | vtime_account_irq_field(acct, &acct->hardirq_time); |
295 | } |
296 | |
297 | static void vtime_flush_scaled(struct task_struct *tsk, |
298 | struct cpu_accounting_data *acct) |
299 | { |
300 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
301 | if (acct->utime_scaled) |
302 | tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); |
303 | if (acct->stime_scaled) |
304 | tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); |
305 | |
306 | acct->utime_scaled = 0; |
307 | acct->utime_sspurr = 0; |
308 | acct->stime_scaled = 0; |
309 | #endif |
310 | } |
311 | |
312 | /* |
313 | * Account the whole cputime accumulated in the paca |
314 | * Must be called with interrupts disabled. |
315 | * Assumes that vtime_account_kernel/idle() has been called |
316 | * recently (i.e. since the last entry from usermode) so that |
317 | * get_paca()->user_time_scaled is up to date. |
318 | */ |
319 | void vtime_flush(struct task_struct *tsk) |
320 | { |
321 | struct cpu_accounting_data *acct = get_accounting(tsk); |
322 | |
323 | if (acct->utime) |
324 | account_user_time(tsk, cputime_to_nsecs(acct->utime)); |
325 | |
326 | if (acct->gtime) |
327 | account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); |
328 | |
329 | if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) { |
330 | account_steal_time(cputime_to_nsecs(acct->steal_time)); |
331 | acct->steal_time = 0; |
332 | } |
333 | |
334 | if (acct->idle_time) |
335 | account_idle_time(cputime_to_nsecs(acct->idle_time)); |
336 | |
337 | if (acct->stime) |
338 | account_system_index_time(tsk, cputime_to_nsecs(acct->stime), |
339 | CPUTIME_SYSTEM); |
340 | |
341 | if (acct->hardirq_time) |
342 | account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), |
343 | CPUTIME_IRQ); |
344 | if (acct->softirq_time) |
345 | account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), |
346 | CPUTIME_SOFTIRQ); |
347 | |
348 | vtime_flush_scaled(tsk, acct); |
349 | |
350 | acct->utime = 0; |
351 | acct->gtime = 0; |
352 | acct->idle_time = 0; |
353 | acct->stime = 0; |
354 | acct->hardirq_time = 0; |
355 | acct->softirq_time = 0; |
356 | } |
357 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
358 | |
359 | void __no_kcsan __delay(unsigned long loops) |
360 | { |
361 | unsigned long start; |
362 | |
363 | spin_begin(); |
364 | if (tb_invalid) { |
365 | /* |
366 | * TB is in error state and isn't ticking anymore. |
367 | * HMI handler was unable to recover from TB error. |
368 | * Return immediately, so that kernel won't get stuck here. |
369 | */ |
370 | spin_cpu_relax(); |
371 | } else { |
372 | start = mftb(); |
373 | while (mftb() - start < loops) |
374 | spin_cpu_relax(); |
375 | } |
376 | spin_end(); |
377 | } |
378 | EXPORT_SYMBOL(__delay); |
379 | |
380 | void __no_kcsan udelay(unsigned long usecs) |
381 | { |
382 | __delay(tb_ticks_per_usec * usecs); |
383 | } |
384 | EXPORT_SYMBOL(udelay); |
385 | |
386 | #ifdef CONFIG_SMP |
387 | unsigned long profile_pc(struct pt_regs *regs) |
388 | { |
389 | unsigned long pc = instruction_pointer(regs); |
390 | |
391 | if (in_lock_functions(addr: pc)) |
392 | return regs->link; |
393 | |
394 | return pc; |
395 | } |
396 | EXPORT_SYMBOL(profile_pc); |
397 | #endif |
398 | |
399 | #ifdef CONFIG_IRQ_WORK |
400 | |
401 | /* |
402 | * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... |
403 | */ |
404 | #ifdef CONFIG_PPC64 |
405 | static inline unsigned long test_irq_work_pending(void) |
406 | { |
407 | unsigned long x; |
408 | |
409 | asm volatile("lbz %0,%1(13)" |
410 | : "=r" (x) |
411 | : "i" (offsetof(struct paca_struct, irq_work_pending))); |
412 | return x; |
413 | } |
414 | |
415 | static inline void set_irq_work_pending_flag(void) |
416 | { |
417 | asm volatile("stb %0,%1(13)" : : |
418 | "r" (1), |
419 | "i" (offsetof(struct paca_struct, irq_work_pending))); |
420 | } |
421 | |
422 | static inline void clear_irq_work_pending(void) |
423 | { |
424 | asm volatile("stb %0,%1(13)" : : |
425 | "r" (0), |
426 | "i" (offsetof(struct paca_struct, irq_work_pending))); |
427 | } |
428 | |
429 | #else /* 32-bit */ |
430 | |
431 | DEFINE_PER_CPU(u8, irq_work_pending); |
432 | |
433 | #define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) |
434 | #define test_irq_work_pending() __this_cpu_read(irq_work_pending) |
435 | #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) |
436 | |
437 | #endif /* 32 vs 64 bit */ |
438 | |
439 | void arch_irq_work_raise(void) |
440 | { |
441 | /* |
442 | * 64-bit code that uses irq soft-mask can just cause an immediate |
443 | * interrupt here that gets soft masked, if this is called under |
444 | * local_irq_disable(). It might be possible to prevent that happening |
445 | * by noticing interrupts are disabled and setting decrementer pending |
446 | * to be replayed when irqs are enabled. The problem there is that |
447 | * tracing can call irq_work_raise, including in code that does low |
448 | * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) |
449 | * which could get tangled up if we're messing with the same state |
450 | * here. |
451 | */ |
452 | preempt_disable(); |
453 | set_irq_work_pending_flag(); |
454 | set_dec(1); |
455 | preempt_enable(); |
456 | } |
457 | |
458 | static void set_dec_or_work(u64 val) |
459 | { |
460 | set_dec(val); |
461 | /* We may have raced with new irq work */ |
462 | if (unlikely(test_irq_work_pending())) |
463 | set_dec(1); |
464 | } |
465 | |
466 | #else /* CONFIG_IRQ_WORK */ |
467 | |
468 | #define test_irq_work_pending() 0 |
469 | #define clear_irq_work_pending() |
470 | |
471 | static void set_dec_or_work(u64 val) |
472 | { |
473 | set_dec(val); |
474 | } |
475 | #endif /* CONFIG_IRQ_WORK */ |
476 | |
477 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
478 | void timer_rearm_host_dec(u64 now) |
479 | { |
480 | u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); |
481 | |
482 | WARN_ON_ONCE(!arch_irqs_disabled()); |
483 | WARN_ON_ONCE(mfmsr() & MSR_EE); |
484 | |
485 | if (now >= *next_tb) { |
486 | local_paca->irq_happened |= PACA_IRQ_DEC; |
487 | } else { |
488 | now = *next_tb - now; |
489 | if (now > decrementer_max) |
490 | now = decrementer_max; |
491 | set_dec_or_work(now); |
492 | } |
493 | } |
494 | EXPORT_SYMBOL_GPL(timer_rearm_host_dec); |
495 | #endif |
496 | |
497 | /* |
498 | * timer_interrupt - gets called when the decrementer overflows, |
499 | * with interrupts disabled. |
500 | */ |
501 | DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) |
502 | { |
503 | struct clock_event_device *evt = this_cpu_ptr(&decrementers); |
504 | u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); |
505 | struct pt_regs *old_regs; |
506 | u64 now; |
507 | |
508 | /* |
509 | * Some implementations of hotplug will get timer interrupts while |
510 | * offline, just ignore these. |
511 | */ |
512 | if (unlikely(!cpu_online(smp_processor_id()))) { |
513 | set_dec(decrementer_max); |
514 | return; |
515 | } |
516 | |
517 | /* Conditionally hard-enable interrupts. */ |
518 | if (should_hard_irq_enable(regs)) { |
519 | /* |
520 | * Ensure a positive value is written to the decrementer, or |
521 | * else some CPUs will continue to take decrementer exceptions. |
522 | * When the PPC_WATCHDOG (decrementer based) is configured, |
523 | * keep this at most 31 bits, which is about 4 seconds on most |
524 | * systems, which gives the watchdog a chance of catching timer |
525 | * interrupt hard lockups. |
526 | */ |
527 | if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) |
528 | set_dec(0x7fffffff); |
529 | else |
530 | set_dec(decrementer_max); |
531 | |
532 | do_hard_irq_enable(); |
533 | } |
534 | |
535 | #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) |
536 | if (atomic_read(&ppc_n_lost_interrupts) != 0) |
537 | __do_IRQ(regs); |
538 | #endif |
539 | |
540 | old_regs = set_irq_regs(regs); |
541 | |
542 | trace_timer_interrupt_entry(regs); |
543 | |
544 | if (test_irq_work_pending()) { |
545 | clear_irq_work_pending(); |
546 | mce_run_irq_context_handlers(); |
547 | irq_work_run(); |
548 | } |
549 | |
550 | now = get_tb(); |
551 | if (now >= *next_tb) { |
552 | evt->event_handler(evt); |
553 | __this_cpu_inc(irq_stat.timer_irqs_event); |
554 | } else { |
555 | now = *next_tb - now; |
556 | if (now > decrementer_max) |
557 | now = decrementer_max; |
558 | set_dec_or_work(now); |
559 | __this_cpu_inc(irq_stat.timer_irqs_others); |
560 | } |
561 | |
562 | trace_timer_interrupt_exit(regs); |
563 | |
564 | set_irq_regs(old_regs); |
565 | } |
566 | EXPORT_SYMBOL(timer_interrupt); |
567 | |
568 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
569 | void timer_broadcast_interrupt(void) |
570 | { |
571 | tick_receive_broadcast(); |
572 | __this_cpu_inc(irq_stat.broadcast_irqs_event); |
573 | } |
574 | #endif |
575 | |
576 | #ifdef CONFIG_SUSPEND |
577 | /* Overrides the weak version in kernel/power/main.c */ |
578 | void arch_suspend_disable_irqs(void) |
579 | { |
580 | if (ppc_md.suspend_disable_irqs) |
581 | ppc_md.suspend_disable_irqs(); |
582 | |
583 | /* Disable the decrementer, so that it doesn't interfere |
584 | * with suspending. |
585 | */ |
586 | |
587 | set_dec(decrementer_max); |
588 | local_irq_disable(); |
589 | set_dec(decrementer_max); |
590 | } |
591 | |
592 | /* Overrides the weak version in kernel/power/main.c */ |
593 | void arch_suspend_enable_irqs(void) |
594 | { |
595 | local_irq_enable(); |
596 | |
597 | if (ppc_md.suspend_enable_irqs) |
598 | ppc_md.suspend_enable_irqs(); |
599 | } |
600 | #endif |
601 | |
602 | unsigned long long tb_to_ns(unsigned long long ticks) |
603 | { |
604 | return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift; |
605 | } |
606 | EXPORT_SYMBOL_GPL(tb_to_ns); |
607 | |
608 | /* |
609 | * Scheduler clock - returns current time in nanosec units. |
610 | * |
611 | * Note: mulhdu(a, b) (multiply high double unsigned) returns |
612 | * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b |
613 | * are 64-bit unsigned numbers. |
614 | */ |
615 | notrace unsigned long long sched_clock(void) |
616 | { |
617 | return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; |
618 | } |
619 | |
620 | |
621 | #ifdef CONFIG_PPC_PSERIES |
622 | |
623 | /* |
624 | * Running clock - attempts to give a view of time passing for a virtualised |
625 | * kernels. |
626 | * Uses the VTB register if available otherwise a next best guess. |
627 | */ |
628 | unsigned long long running_clock(void) |
629 | { |
630 | /* |
631 | * Don't read the VTB as a host since KVM does not switch in host |
632 | * timebase into the VTB when it takes a guest off the CPU, reading the |
633 | * VTB would result in reading 'last switched out' guest VTB. |
634 | * |
635 | * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it |
636 | * would be unsafe to rely only on the #ifdef above. |
637 | */ |
638 | if (firmware_has_feature(FW_FEATURE_LPAR) && |
639 | cpu_has_feature(CPU_FTR_ARCH_207S)) |
640 | return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; |
641 | |
642 | /* |
643 | * This is a next best approximation without a VTB. |
644 | * On a host which is running bare metal there should never be any stolen |
645 | * time and on a host which doesn't do any virtualisation TB *should* equal |
646 | * VTB so it makes no difference anyway. |
647 | */ |
648 | return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL]; |
649 | } |
650 | #endif |
651 | |
652 | static int __init get_freq(char *name, int cells, unsigned long *val) |
653 | { |
654 | struct device_node *cpu; |
655 | const __be32 *fp; |
656 | int found = 0; |
657 | |
658 | /* The cpu node should have timebase and clock frequency properties */ |
659 | cpu = of_find_node_by_type(NULL, type: "cpu" ); |
660 | |
661 | if (cpu) { |
662 | fp = of_get_property(node: cpu, name, NULL); |
663 | if (fp) { |
664 | found = 1; |
665 | *val = of_read_ulong(cell: fp, size: cells); |
666 | } |
667 | |
668 | of_node_put(node: cpu); |
669 | } |
670 | |
671 | return found; |
672 | } |
673 | |
674 | static void start_cpu_decrementer(void) |
675 | { |
676 | #ifdef CONFIG_BOOKE_OR_40x |
677 | unsigned int tcr; |
678 | |
679 | /* Clear any pending timer interrupts */ |
680 | mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); |
681 | |
682 | tcr = mfspr(SPRN_TCR); |
683 | /* |
684 | * The watchdog may have already been enabled by u-boot. So leave |
685 | * TRC[WP] (Watchdog Period) alone. |
686 | */ |
687 | tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */ |
688 | tcr |= TCR_DIE; /* Enable decrementer */ |
689 | mtspr(SPRN_TCR, tcr); |
690 | #endif |
691 | } |
692 | |
693 | void __init generic_calibrate_decr(void) |
694 | { |
695 | ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ |
696 | |
697 | if (!get_freq(name: "ibm,extended-timebase-frequency" , cells: 2, val: &ppc_tb_freq) && |
698 | !get_freq(name: "timebase-frequency" , cells: 1, val: &ppc_tb_freq)) { |
699 | |
700 | printk(KERN_ERR "WARNING: Estimating decrementer frequency " |
701 | "(not found)\n" ); |
702 | } |
703 | |
704 | ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */ |
705 | |
706 | if (!get_freq(name: "ibm,extended-clock-frequency" , cells: 2, val: &ppc_proc_freq) && |
707 | !get_freq(name: "clock-frequency" , cells: 1, val: &ppc_proc_freq)) { |
708 | |
709 | printk(KERN_ERR "WARNING: Estimating processor frequency " |
710 | "(not found)\n" ); |
711 | } |
712 | } |
713 | |
714 | int update_persistent_clock64(struct timespec64 now) |
715 | { |
716 | struct rtc_time tm; |
717 | |
718 | if (!ppc_md.set_rtc_time) |
719 | return -ENODEV; |
720 | |
721 | rtc_time64_to_tm(time: now.tv_sec + 1 + timezone_offset, tm: &tm); |
722 | |
723 | return ppc_md.set_rtc_time(&tm); |
724 | } |
725 | |
726 | static void __read_persistent_clock(struct timespec64 *ts) |
727 | { |
728 | struct rtc_time tm; |
729 | static int first = 1; |
730 | |
731 | ts->tv_nsec = 0; |
732 | /* XXX this is a little fragile but will work okay in the short term */ |
733 | if (first) { |
734 | first = 0; |
735 | if (ppc_md.time_init) |
736 | timezone_offset = ppc_md.time_init(); |
737 | |
738 | /* get_boot_time() isn't guaranteed to be safe to call late */ |
739 | if (ppc_md.get_boot_time) { |
740 | ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; |
741 | return; |
742 | } |
743 | } |
744 | if (!ppc_md.get_rtc_time) { |
745 | ts->tv_sec = 0; |
746 | return; |
747 | } |
748 | ppc_md.get_rtc_time(&tm); |
749 | |
750 | ts->tv_sec = rtc_tm_to_time64(tm: &tm); |
751 | } |
752 | |
753 | void read_persistent_clock64(struct timespec64 *ts) |
754 | { |
755 | __read_persistent_clock(ts); |
756 | |
757 | /* Sanitize it in case real time clock is set below EPOCH */ |
758 | if (ts->tv_sec < 0) { |
759 | ts->tv_sec = 0; |
760 | ts->tv_nsec = 0; |
761 | } |
762 | |
763 | } |
764 | |
765 | /* clocksource code */ |
766 | static notrace u64 timebase_read(struct clocksource *cs) |
767 | { |
768 | return (u64)get_tb(); |
769 | } |
770 | |
771 | static void __init clocksource_init(void) |
772 | { |
773 | struct clocksource *clock = &clocksource_timebase; |
774 | |
775 | if (clocksource_register_hz(cs: clock, hz: tb_ticks_per_sec)) { |
776 | printk(KERN_ERR "clocksource: %s is already registered\n" , |
777 | clock->name); |
778 | return; |
779 | } |
780 | |
781 | printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n" , |
782 | clock->name, clock->mult, clock->shift); |
783 | } |
784 | |
785 | static int decrementer_set_next_event(unsigned long evt, |
786 | struct clock_event_device *dev) |
787 | { |
788 | __this_cpu_write(decrementers_next_tb, get_tb() + evt); |
789 | set_dec_or_work(evt); |
790 | |
791 | return 0; |
792 | } |
793 | |
794 | static int decrementer_shutdown(struct clock_event_device *dev) |
795 | { |
796 | __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED); |
797 | set_dec_or_work(decrementer_max); |
798 | |
799 | return 0; |
800 | } |
801 | |
802 | static void register_decrementer_clockevent(int cpu) |
803 | { |
804 | struct clock_event_device *dec = &per_cpu(decrementers, cpu); |
805 | |
806 | *dec = decrementer_clockevent; |
807 | dec->cpumask = cpumask_of(cpu); |
808 | |
809 | clockevents_config_and_register(dev: dec, freq: ppc_tb_freq, min_delta: 2, max_delta: decrementer_max); |
810 | |
811 | printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n" , |
812 | dec->name, dec->mult, dec->shift, cpu); |
813 | |
814 | /* Set values for KVM, see kvm_emulate_dec() */ |
815 | decrementer_clockevent.mult = dec->mult; |
816 | decrementer_clockevent.shift = dec->shift; |
817 | } |
818 | |
819 | static void enable_large_decrementer(void) |
820 | { |
821 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) |
822 | return; |
823 | |
824 | if (decrementer_max <= DECREMENTER_DEFAULT_MAX) |
825 | return; |
826 | |
827 | /* |
828 | * If we're running as the hypervisor we need to enable the LD manually |
829 | * otherwise firmware should have done it for us. |
830 | */ |
831 | if (cpu_has_feature(CPU_FTR_HVMODE)) |
832 | mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD); |
833 | } |
834 | |
835 | static void __init set_decrementer_max(void) |
836 | { |
837 | struct device_node *cpu; |
838 | u32 bits = 32; |
839 | |
840 | /* Prior to ISAv3 the decrementer is always 32 bit */ |
841 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) |
842 | return; |
843 | |
844 | cpu = of_find_node_by_type(NULL, type: "cpu" ); |
845 | |
846 | if (of_property_read_u32(np: cpu, propname: "ibm,dec-bits" , out_value: &bits) == 0) { |
847 | if (bits > 64 || bits < 32) { |
848 | pr_warn("time_init: firmware supplied invalid ibm,dec-bits" ); |
849 | bits = 32; |
850 | } |
851 | |
852 | /* calculate the signed maximum given this many bits */ |
853 | decrementer_max = (1ul << (bits - 1)) - 1; |
854 | } |
855 | |
856 | of_node_put(node: cpu); |
857 | |
858 | pr_info("time_init: %u bit decrementer (max: %llx)\n" , |
859 | bits, decrementer_max); |
860 | } |
861 | |
862 | static void __init init_decrementer_clockevent(void) |
863 | { |
864 | register_decrementer_clockevent(smp_processor_id()); |
865 | } |
866 | |
867 | void secondary_cpu_time_init(void) |
868 | { |
869 | /* Enable and test the large decrementer for this cpu */ |
870 | enable_large_decrementer(); |
871 | |
872 | /* Start the decrementer on CPUs that have manual control |
873 | * such as BookE |
874 | */ |
875 | start_cpu_decrementer(); |
876 | |
877 | /* FIME: Should make unrelated change to move snapshot_timebase |
878 | * call here ! */ |
879 | register_decrementer_clockevent(smp_processor_id()); |
880 | } |
881 | |
882 | /* This function is only called on the boot processor */ |
883 | void __init time_init(void) |
884 | { |
885 | struct div_result res; |
886 | u64 scale; |
887 | unsigned shift; |
888 | |
889 | /* Normal PowerPC with timebase register */ |
890 | if (ppc_md.calibrate_decr) |
891 | ppc_md.calibrate_decr(); |
892 | else |
893 | generic_calibrate_decr(); |
894 | |
895 | printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n" , |
896 | ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); |
897 | printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n" , |
898 | ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); |
899 | |
900 | tb_ticks_per_jiffy = ppc_tb_freq / HZ; |
901 | tb_ticks_per_sec = ppc_tb_freq; |
902 | tb_ticks_per_usec = ppc_tb_freq / 1000000; |
903 | |
904 | /* |
905 | * Compute scale factor for sched_clock. |
906 | * The calibrate_decr() function has set tb_ticks_per_sec, |
907 | * which is the timebase frequency. |
908 | * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret |
909 | * the 128-bit result as a 64.64 fixed-point number. |
910 | * We then shift that number right until it is less than 1.0, |
911 | * giving us the scale factor and shift count to use in |
912 | * sched_clock(). |
913 | */ |
914 | div128_by_32(1000000000, 0, tb_ticks_per_sec, &res); |
915 | scale = res.result_low; |
916 | for (shift = 0; res.result_high != 0; ++shift) { |
917 | scale = (scale >> 1) | (res.result_high << 63); |
918 | res.result_high >>= 1; |
919 | } |
920 | tb_to_ns_scale = scale; |
921 | tb_to_ns_shift = shift; |
922 | /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ |
923 | boot_tb = get_tb(); |
924 | |
925 | /* If platform provided a timezone (pmac), we correct the time */ |
926 | if (timezone_offset) { |
927 | sys_tz.tz_minuteswest = -timezone_offset / 60; |
928 | sys_tz.tz_dsttime = 0; |
929 | } |
930 | |
931 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; |
932 | |
933 | /* initialise and enable the large decrementer (if we have one) */ |
934 | set_decrementer_max(); |
935 | enable_large_decrementer(); |
936 | |
937 | /* Start the decrementer on CPUs that have manual control |
938 | * such as BookE |
939 | */ |
940 | start_cpu_decrementer(); |
941 | |
942 | /* Register the clocksource */ |
943 | clocksource_init(); |
944 | |
945 | init_decrementer_clockevent(); |
946 | tick_setup_hrtimer_broadcast(); |
947 | |
948 | of_clk_init(NULL); |
949 | enable_sched_clock_irqtime(); |
950 | } |
951 | |
952 | /* |
953 | * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit |
954 | * result. |
955 | */ |
956 | void div128_by_32(u64 dividend_high, u64 dividend_low, |
957 | unsigned divisor, struct div_result *dr) |
958 | { |
959 | unsigned long a, b, c, d; |
960 | unsigned long w, x, y, z; |
961 | u64 ra, rb, rc; |
962 | |
963 | a = dividend_high >> 32; |
964 | b = dividend_high & 0xffffffff; |
965 | c = dividend_low >> 32; |
966 | d = dividend_low & 0xffffffff; |
967 | |
968 | w = a / divisor; |
969 | ra = ((u64)(a - (w * divisor)) << 32) + b; |
970 | |
971 | rb = ((u64) do_div(ra, divisor) << 32) + c; |
972 | x = ra; |
973 | |
974 | rc = ((u64) do_div(rb, divisor) << 32) + d; |
975 | y = rb; |
976 | |
977 | do_div(rc, divisor); |
978 | z = rc; |
979 | |
980 | dr->result_high = ((u64)w << 32) + x; |
981 | dr->result_low = ((u64)y << 32) + z; |
982 | |
983 | } |
984 | |
985 | /* We don't need to calibrate delay, we use the CPU timebase for that */ |
986 | void calibrate_delay(void) |
987 | { |
988 | /* Some generic code (such as spinlock debug) use loops_per_jiffy |
989 | * as the number of __delay(1) in a jiffy, so make it so |
990 | */ |
991 | loops_per_jiffy = tb_ticks_per_jiffy; |
992 | } |
993 | |
994 | #if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) |
995 | static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) |
996 | { |
997 | ppc_md.get_rtc_time(tm); |
998 | return 0; |
999 | } |
1000 | |
1001 | static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) |
1002 | { |
1003 | if (!ppc_md.set_rtc_time) |
1004 | return -EOPNOTSUPP; |
1005 | |
1006 | if (ppc_md.set_rtc_time(tm) < 0) |
1007 | return -EOPNOTSUPP; |
1008 | |
1009 | return 0; |
1010 | } |
1011 | |
1012 | static const struct rtc_class_ops rtc_generic_ops = { |
1013 | .read_time = rtc_generic_get_time, |
1014 | .set_time = rtc_generic_set_time, |
1015 | }; |
1016 | |
1017 | static int __init rtc_init(void) |
1018 | { |
1019 | struct platform_device *pdev; |
1020 | |
1021 | if (!ppc_md.get_rtc_time) |
1022 | return -ENODEV; |
1023 | |
1024 | pdev = platform_device_register_data(NULL, name: "rtc-generic" , id: -1, |
1025 | data: &rtc_generic_ops, |
1026 | size: sizeof(rtc_generic_ops)); |
1027 | |
1028 | return PTR_ERR_OR_ZERO(ptr: pdev); |
1029 | } |
1030 | |
1031 | device_initcall(rtc_init); |
1032 | #endif |
1033 | |