hrtimer.c source code [linux/kernel/time/hrtimer.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4	* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5	* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
6	*
7	* High-resolution kernel timers
8	*
9	* In contrast to the low-resolution timeout API, aka timer wheel,
10	* hrtimers provide finer resolution and accuracy depending on system
11	* configuration and capabilities.
12	*
13	* Started by: Thomas Gleixner and Ingo Molnar
14	*
15	* Credits:
16	* Based on the original timer wheel code
17	*
18	* Help, testing, suggestions, bugfixes, improvements were
19	* provided by:
20	*
21	* George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
22	* et. al.
23	*/
24
25	#include <linux/cpu.h>
26	#include <linux/export.h>
27	#include <linux/percpu.h>
28	#include <linux/hrtimer.h>
29	#include <linux/notifier.h>
30	#include <linux/syscalls.h>
31	#include <linux/interrupt.h>
32	#include <linux/tick.h>
33	#include <linux/err.h>
34	#include <linux/debugobjects.h>
35	#include <linux/sched/signal.h>
36	#include <linux/sched/sysctl.h>
37	#include <linux/sched/rt.h>
38	#include <linux/sched/deadline.h>
39	#include <linux/sched/nohz.h>
40	#include <linux/sched/debug.h>
41	#include <linux/sched/isolation.h>
42	#include <linux/timer.h>
43	#include <linux/freezer.h>
44	#include <linux/compat.h>
45
46	#include <linux/uaccess.h>
47
48	#include <trace/events/timer.h>
49
50	#include "tick-internal.h"
51
52	/*
53	* Masks for selecting the soft and hard context timers from
54	* cpu_base->active
55	*/
56	#define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT)
57	#define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1)
58	#define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
59	#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT \| HRTIMER_ACTIVE_HARD)
60
61	static void retrigger_next_event(void *arg);
62
63	/*
64	* The timer bases:
65	*
66	* There are more clockids than hrtimer bases. Thus, we index
67	* into the timer bases by the hrtimer_base_type enum. When trying
68	* to reach a base using a clockid, hrtimer_clockid_to_base()
69	* is used to convert from clockid to the proper hrtimer_base_type.
70	*/
71	DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
72	{
73	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
74	.clock_base =
75	{
76	{
77	.index = HRTIMER_BASE_MONOTONIC,
78	.clockid = CLOCK_MONOTONIC,
79	.get_time = &ktime_get,
80	},
81	{
82	.index = HRTIMER_BASE_REALTIME,
83	.clockid = CLOCK_REALTIME,
84	.get_time = &ktime_get_real,
85	},
86	{
87	.index = HRTIMER_BASE_BOOTTIME,
88	.clockid = CLOCK_BOOTTIME,
89	.get_time = &ktime_get_boottime,
90	},
91	{
92	.index = HRTIMER_BASE_TAI,
93	.clockid = CLOCK_TAI,
94	.get_time = &ktime_get_clocktai,
95	},
96	{
97	.index = HRTIMER_BASE_MONOTONIC_SOFT,
98	.clockid = CLOCK_MONOTONIC,
99	.get_time = &ktime_get,
100	},
101	{
102	.index = HRTIMER_BASE_REALTIME_SOFT,
103	.clockid = CLOCK_REALTIME,
104	.get_time = &ktime_get_real,
105	},
106	{
107	.index = HRTIMER_BASE_BOOTTIME_SOFT,
108	.clockid = CLOCK_BOOTTIME,
109	.get_time = &ktime_get_boottime,
110	},
111	{
112	.index = HRTIMER_BASE_TAI_SOFT,
113	.clockid = CLOCK_TAI,
114	.get_time = &ktime_get_clocktai,
115	},
116	},
117	.csd = CSD_INIT(retrigger_next_event, NULL)
118	};
119
120	static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base)
121	{
122	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
123	return true;
124	else
125	return likely(base->online);
126	}
127
128	/*
129	* Functions and macros which are different for UP/SMP systems are kept in a
130	* single place
131	*/
132	#ifdef CONFIG_SMP
133
134	/*
135	* We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
136	* such that hrtimer_callback_running() can unconditionally dereference
137	* timer->base->cpu_base
138	*/
139	static struct hrtimer_cpu_base migration_cpu_base = {
140	.clock_base = { {
141	.cpu_base = &migration_cpu_base,
142	.seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
143	&migration_cpu_base.lock),
144	}, },
145	};
146
147	#define migration_base migration_cpu_base.clock_base[0]
148
149	/*
150	* We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
151	* means that all timers which are tied to this base via timer->base are
152	* locked, and the base itself is locked too.
153	*
154	* So __run_timers/migrate_timers can safely modify all timers which could
155	* be found on the lists/queues.
156	*
157	* When the timer's base is locked, and the timer removed from list, it is
158	* possible to set timer->base = &migration_base and drop the lock: the timer
159	* remains locked.
160	*/
161	static
162	struct hrtimer_clock_base lock_hrtimer_base(const* struct hrtimer *timer,
163	unsigned long *flags)
164	__acquires(&timer->base->lock)
165	{
166	struct hrtimer_clock_base *base;
167
168	for (;;) {
169	base = READ_ONCE(timer->base);
170	if (likely(base != &migration_base)) {
171	raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
172	if (likely(base == timer->base))
173	return base;
174	/ The timer has migrated to another CPU: /
175	raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
176	}
177	cpu_relax();
178	}
179	}
180
181	/*
182	* Check if the elected target is suitable considering its next
183	* event and the hotplug state of the current CPU.
184	*
185	* If the elected target is remote and its next event is after the timer
186	* to queue, then a remote reprogram is necessary. However there is no
187	* guarantee the IPI handling the operation would arrive in time to meet
188	* the high resolution deadline. In this case the local CPU becomes a
189	* preferred target, unless it is offline.
190	*
191	* High and low resolution modes are handled the same way for simplicity.
192	*
193	* Called with cpu_base->lock of target cpu held.
194	*/
195	static bool hrtimer_suitable_target(struct hrtimer timer, struct* hrtimer_clock_base *new_base,
196	struct hrtimer_cpu_base *new_cpu_base,
197	struct hrtimer_cpu_base *this_cpu_base)
198	{
199	ktime_t expires;
200
201	/*
202	* The local CPU clockevent can be reprogrammed. Also get_target_base()
203	* guarantees it is online.
204	*/
205	if (new_cpu_base == this_cpu_base)
206	return true;
207
208	/*
209	* The offline local CPU can't be the default target if the
210	* next remote target event is after this timer. Keep the
211	* elected new base. An IPI will we issued to reprogram
212	* it as a last resort.
213	*/
214	if (!hrtimer_base_is_online(base: this_cpu_base))
215	return true;
216
217	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
218
219	return expires >= new_base->cpu_base->expires_next;
220	}
221
222	static inline struct hrtimer_cpu_base get_target_base(struct* hrtimer_cpu_base base, int* pinned)
223	{
224	if (!hrtimer_base_is_online(base)) {
225	int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));
226
227	return &per_cpu(hrtimer_bases, cpu);
228	}
229
230	#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
231	if (static_branch_likely(&timers_migration_enabled) && !pinned)
232	return &per_cpu(hrtimer_bases, get_nohz_timer_target());
233	#endif
234	return base;
235	}
236
237	/*
238	* We switch the timer base to a power-optimized selected CPU target,
239	* if:
240	* - NO_HZ_COMMON is enabled
241	* - timer migration is enabled
242	* - the timer callback is not running
243	* - the timer is not the first expiring timer on the new target
244	*
245	* If one of the above requirements is not fulfilled we move the timer
246	* to the current CPU or leave it on the previously assigned CPU if
247	* the timer callback is currently running.
248	*/
249	static inline struct hrtimer_clock_base *
250	switch_hrtimer_base(struct hrtimer timer, struct* hrtimer_clock_base *base,
251	int pinned)
252	{
253	struct hrtimer_cpu_base new_cpu_base, this_cpu_base;
254	struct hrtimer_clock_base *new_base;
255	int basenum = base->index;
256
257	this_cpu_base = this_cpu_ptr(&hrtimer_bases);
258	new_cpu_base = get_target_base(base: this_cpu_base, pinned);
259	again:
260	new_base = &new_cpu_base->clock_base[basenum];
261
262	if (base != new_base) {
263	/*
264	* We are trying to move timer to new_base.
265	* However we can't change timer's base while it is running,
266	* so we keep it on the same CPU. No hassle vs. reprogramming
267	* the event source in the high resolution case. The softirq
268	* code will take care of this when the timer function has
269	* completed. There is no conflict as we hold the lock until
270	* the timer is enqueued.
271	*/
272	if (unlikely(hrtimer_callback_running(timer)))
273	return base;
274
275	/ See the comment in lock_hrtimer_base() /
276	WRITE_ONCE(timer->base, &migration_base);
277	raw_spin_unlock(&base->cpu_base->lock);
278	raw_spin_lock(&new_base->cpu_base->lock);
279
280	if (!hrtimer_suitable_target(timer, new_base, new_cpu_base,
281	this_cpu_base)) {
282	raw_spin_unlock(&new_base->cpu_base->lock);
283	raw_spin_lock(&base->cpu_base->lock);
284	new_cpu_base = this_cpu_base;
285	WRITE_ONCE(timer->base, base);
286	goto again;
287	}
288	WRITE_ONCE(timer->base, new_base);
289	} else {
290	if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) {
291	new_cpu_base = this_cpu_base;
292	goto again;
293	}
294	}
295	return new_base;
296	}
297
298	#else /* CONFIG_SMP */
299
300	static inline struct hrtimer_clock_base *
301	lock_hrtimer_base(const struct hrtimer timer, unsigned* long *flags)
302	__acquires(&timer->base->cpu_base->lock)
303	{
304	struct hrtimer_clock_base *base = timer->base;
305
306	raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
307
308	return base;
309	}
310
311	# define switch_hrtimer_base(t, b, p) (b)
312
313	#endif /* !CONFIG_SMP */
314
315	/*
316	* Functions for the union type storage format of ktime_t which are
317	* too large for inlining:
318	*/
319	#if BITS_PER_LONG < 64
320	/*
321	* Divide a ktime value by a nanosecond value
322	*/
323	s64 __ktime_divns(const ktime_t kt, s64 div)
324	{
325	int sft = `0`;
326	s64 dclc;
327	u64 tmp;
328
329	dclc = ktime_to_ns(kt);
330	tmp = dclc < `0` ? -dclc : dclc;
331
332	/ Make sure the divisor is less than 2^32: /
333	while (div >> `32`) {
334	sft++;
335	div >>= `1`;
336	}
337	tmp >>= sft;
338	do_div(tmp, (u32) div);
339	return dclc < `0` ? -tmp : tmp;
340	}
341	EXPORT_SYMBOL_GPL(__ktime_divns);
342	#endif /* BITS_PER_LONG >= 64 */
343
344	/*
345	* Add two ktime values and do a safety check for overflow:
346	*/
347	ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
348	{
349	ktime_t res = ktime_add_unsafe(lhs, rhs);
350
351	/*
352	* We use KTIME_SEC_MAX here, the maximum timeout which we can
353	* return to user space in a timespec:
354	*/
355	if (res < `0` \|\| res < lhs \|\| res < rhs)
356	res = ktime_set(KTIME_SEC_MAX, nsecs: `0`);
357
358	return res;
359	}
360
361	EXPORT_SYMBOL_GPL(ktime_add_safe);
362
363	#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
364
365	static const struct debug_obj_descr hrtimer_debug_descr;
366
367	static void hrtimer_debug_hint(void* *addr)
368	{
369	return ACCESS_PRIVATE((struct hrtimer *)addr, function);
370	}
371
372	/*
373	* fixup_init is called when:
374	* - an active object is initialized
375	*/
376	static bool hrtimer_fixup_init(void addr, enum* debug_obj_state state)
377	{
378	struct hrtimer *timer = addr;
379
380	switch (state) {
381	case ODEBUG_STATE_ACTIVE:
382	hrtimer_cancel(timer);
383	debug_object_init(addr: timer, descr: &hrtimer_debug_descr);
384	return true;
385	default:
386	return false;
387	}
388	}
389
390	/*
391	* fixup_activate is called when:
392	* - an active object is activated
393	* - an unknown non-static object is activated
394	*/
395	static bool hrtimer_fixup_activate(void addr, enum* debug_obj_state state)
396	{
397	switch (state) {
398	case ODEBUG_STATE_ACTIVE:
399	WARN_ON(`1`);
400	fallthrough;
401	default:
402	return false;
403	}
404	}
405
406	/*
407	* fixup_free is called when:
408	* - an active object is freed
409	*/
410	static bool hrtimer_fixup_free(void addr, enum* debug_obj_state state)
411	{
412	struct hrtimer *timer = addr;
413
414	switch (state) {
415	case ODEBUG_STATE_ACTIVE:
416	hrtimer_cancel(timer);
417	debug_object_free(addr: timer, descr: &hrtimer_debug_descr);
418	return true;
419	default:
420	return false;
421	}
422	}
423
424	static const struct debug_obj_descr hrtimer_debug_descr = {
425	.name = "hrtimer",
426	.debug_hint = hrtimer_debug_hint,
427	.fixup_init = hrtimer_fixup_init,
428	.fixup_activate = hrtimer_fixup_activate,
429	.fixup_free = hrtimer_fixup_free,
430	};
431
432	static inline void debug_hrtimer_init(struct hrtimer *timer)
433	{
434	debug_object_init(addr: timer, descr: &hrtimer_debug_descr);
435	}
436
437	static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer)
438	{
439	debug_object_init_on_stack(addr: timer, descr: &hrtimer_debug_descr);
440	}
441
442	static inline void debug_hrtimer_activate(struct hrtimer *timer,
443	enum hrtimer_mode mode)
444	{
445	debug_object_activate(addr: timer, descr: &hrtimer_debug_descr);
446	}
447
448	static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
449	{
450	debug_object_deactivate(addr: timer, descr: &hrtimer_debug_descr);
451	}
452
453	void destroy_hrtimer_on_stack(struct hrtimer *timer)
454	{
455	debug_object_free(addr: timer, descr: &hrtimer_debug_descr);
456	}
457	EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
458
459	#else
460
461	static inline void debug_hrtimer_init(struct hrtimer *timer) { }
462	static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { }
463	static inline void debug_hrtimer_activate(struct hrtimer *timer,
464	enum hrtimer_mode mode) { }
465	static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
466	#endif
467
468	static inline void debug_setup(struct hrtimer timer, clockid_t clockid, enum* hrtimer_mode mode)
469	{
470	debug_hrtimer_init(timer);
471	trace_hrtimer_setup(hrtimer: timer, clockid, mode);
472	}
473
474	static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid,
475	enum hrtimer_mode mode)
476	{
477	debug_hrtimer_init_on_stack(timer);
478	trace_hrtimer_setup(hrtimer: timer, clockid, mode);
479	}
480
481	static inline void debug_activate(struct hrtimer *timer,
482	enum hrtimer_mode mode)
483	{
484	debug_hrtimer_activate(timer, mode);
485	trace_hrtimer_start(hrtimer: timer, mode);
486	}
487
488	static inline void debug_deactivate(struct hrtimer *timer)
489	{
490	debug_hrtimer_deactivate(timer);
491	trace_hrtimer_cancel(hrtimer: timer);
492	}
493
494	static struct hrtimer_clock_base *
495	__next_base(struct hrtimer_cpu_base cpu_base, unsigned* int *active)
496	{
497	unsigned int idx;
498
499	if (!*active)
500	return NULL;
501
502	idx = __ffs(*active);
503	*active &= ~(`1U` << idx);
504
505	return &cpu_base->clock_base[idx];
506	}
507
508	#define for_each_active_base(base, cpu_base, active) \
509	while ((base = __next_base((cpu_base), &(active))))
510
511	static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
512	const struct hrtimer *exclude,
513	unsigned int active,
514	ktime_t expires_next)
515	{
516	struct hrtimer_clock_base *base;
517	ktime_t expires;
518
519	for_each_active_base(base, cpu_base, active) {
520	struct timerqueue_node *next;
521	struct hrtimer *timer;
522
523	next = timerqueue_getnext(head: &base->active);
524	timer = container_of(next, struct hrtimer, node);
525	if (timer == exclude) {
526	/ Get to the next timer in the queue. /
527	next = timerqueue_iterate_next(node: next);
528	if (!next)
529	continue;
530
531	timer = container_of(next, struct hrtimer, node);
532	}
533	expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
534	if (expires < expires_next) {
535	expires_next = expires;
536
537	/ Skip cpu_base update if a timer is being excluded. /
538	if (exclude)
539	continue;
540
541	if (timer->is_soft)
542	cpu_base->softirq_next_timer = timer;
543	else
544	cpu_base->next_timer = timer;
545	}
546	}
547	/*
548	* clock_was_set() might have changed base->offset of any of
549	* the clock bases so the result might be negative. Fix it up
550	* to prevent a false positive in clockevents_program_event().
551	*/
552	if (expires_next < `0`)
553	expires_next = `0`;
554	return expires_next;
555	}
556
557	/*
558	* Recomputes cpu_base::*next_timer and returns the earliest expires_next
559	* but does not set cpu_base::*expires_next, that is done by
560	* hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
561	* cpu_base::*expires_next right away, reprogramming logic would no longer
562	* work.
563	*
564	* When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
565	* those timers will get run whenever the softirq gets handled, at the end of
566	* hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
567	*
568	* Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
569	* The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
570	* softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
571	*
572	* @active_mask must be one of:
573	* - HRTIMER_ACTIVE_ALL,
574	* - HRTIMER_ACTIVE_SOFT, or
575	* - HRTIMER_ACTIVE_HARD.
576	*/
577	static ktime_t
578	__hrtimer_get_next_event(struct hrtimer_cpu_base cpu_base, unsigned* int active_mask)
579	{
580	unsigned int active;
581	struct hrtimer *next_timer = NULL;
582	ktime_t expires_next = KTIME_MAX;
583
584	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
585	active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
586	cpu_base->softirq_next_timer = NULL;
587	expires_next = __hrtimer_next_event_base(cpu_base, NULL,
588	active, KTIME_MAX);
589
590	next_timer = cpu_base->softirq_next_timer;
591	}
592
593	if (active_mask & HRTIMER_ACTIVE_HARD) {
594	active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
595	cpu_base->next_timer = next_timer;
596	expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
597	expires_next);
598	}
599
600	return expires_next;
601	}
602
603	static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
604	{
605	ktime_t expires_next, soft = KTIME_MAX;
606
607	/*
608	* If the soft interrupt has already been activated, ignore the
609	* soft bases. They will be handled in the already raised soft
610	* interrupt.
611	*/
612	if (!cpu_base->softirq_activated) {
613	soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
614	/*
615	* Update the soft expiry time. clock_settime() might have
616	* affected it.
617	*/
618	cpu_base->softirq_expires_next = soft;
619	}
620
621	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
622	/*
623	* If a softirq timer is expiring first, update cpu_base->next_timer
624	* and program the hardware with the soft expiry time.
625	*/
626	if (expires_next > soft) {
627	cpu_base->next_timer = cpu_base->softirq_next_timer;
628	expires_next = soft;
629	}
630
631	return expires_next;
632	}
633
634	static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
635	{
636	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
637	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
638	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
639
640	ktime_t now = ktime_get_update_offsets_now(cwsseq: &base->clock_was_set_seq,
641	offs_real, offs_boot, offs_tai);
642
643	base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
644	base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
645	base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
646
647	return now;
648	}
649
650	/*
651	* Is the high resolution mode active ?
652	*/
653	static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
654	{
655	return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
656	cpu_base->hres_active : `0`;
657	}
658
659	static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
660	struct hrtimer *next_timer,
661	ktime_t expires_next)
662	{
663	cpu_base->expires_next = expires_next;
664
665	/*
666	* If hres is not active, hardware does not have to be
667	* reprogrammed yet.
668	*
669	* If a hang was detected in the last timer interrupt then we
670	* leave the hang delay active in the hardware. We want the
671	* system to make progress. That also prevents the following
672	* scenario:
673	* T1 expires 50ms from now
674	* T2 expires 5s from now
675	*
676	* T1 is removed, so this code is called and would reprogram
677	* the hardware to 5s from now. Any hrtimer_start after that
678	* will not reprogram the hardware due to hang_detected being
679	* set. So we'd effectively block all timers until the T2 event
680	* fires.
681	*/
682	if (!hrtimer_hres_active(cpu_base) \|\| cpu_base->hang_detected)
683	return;
684
685	tick_program_event(expires: expires_next, force: `1`);
686	}
687
688	/*
689	* Reprogram the event source with checking both queues for the
690	* next event
691	* Called with interrupts disabled and base->lock held
692	*/
693	static void
694	hrtimer_force_reprogram(struct hrtimer_cpu_base cpu_base, int* skip_equal)
695	{
696	ktime_t expires_next;
697
698	expires_next = hrtimer_update_next_event(cpu_base);
699
700	if (skip_equal && expires_next == cpu_base->expires_next)
701	return;
702
703	__hrtimer_reprogram(cpu_base, next_timer: cpu_base->next_timer, expires_next);
704	}
705
706	/ High resolution timer related functions /
707	#ifdef CONFIG_HIGH_RES_TIMERS
708
709	/*
710	* High resolution timer enabled ?
711	*/
712	static bool hrtimer_hres_enabled __read_mostly = true;
713	unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
714	EXPORT_SYMBOL_GPL(hrtimer_resolution);
715
716	/*
717	* Enable / Disable high resolution mode
718	*/
719	static int __init setup_hrtimer_hres(char *str)
720	{
721	return (kstrtobool(s: str, res: &hrtimer_hres_enabled) == `0`);
722	}
723
724	__setup("highres=", setup_hrtimer_hres);
725
726	/*
727	* hrtimer_high_res_enabled - query, if the highres mode is enabled
728	*/
729	static inline int hrtimer_is_hres_enabled(void)
730	{
731	return hrtimer_hres_enabled;
732	}
733
734	/*
735	* Switch to high resolution mode
736	*/
737	static void hrtimer_switch_to_hres(void)
738	{
739	struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
740
741	if (tick_init_highres()) {
742	pr_warn("Could not switch to high resolution mode on CPU %u\n",
743	base->cpu);
744	return;
745	}
746	base->hres_active = `1`;
747	hrtimer_resolution = HIGH_RES_NSEC;
748
749	tick_setup_sched_timer(hrtimer: true);
750	/ "Retrigger" the interrupt to get things going /
751	retrigger_next_event(NULL);
752	}
753
754	#else
755
756	static inline int hrtimer_is_hres_enabled(void) { return `0`; }
757	static inline void hrtimer_switch_to_hres(void) { }
758
759	#endif /* CONFIG_HIGH_RES_TIMERS */
760	/*
761	* Retrigger next event is called after clock was set with interrupts
762	* disabled through an SMP function call or directly from low level
763	* resume code.
764	*
765	* This is only invoked when:
766	* - CONFIG_HIGH_RES_TIMERS is enabled.
767	* - CONFIG_NOHZ_COMMON is enabled
768	*
769	* For the other cases this function is empty and because the call sites
770	* are optimized out it vanishes as well, i.e. no need for lots of
771	* #ifdeffery.
772	*/
773	static void retrigger_next_event(void *arg)
774	{
775	struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
776
777	/*
778	* When high resolution mode or nohz is active, then the offsets of
779	* CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
780	* next tick will take care of that.
781	*
782	* If high resolution mode is active then the next expiring timer
783	* must be reevaluated and the clock event device reprogrammed if
784	* necessary.
785	*
786	* In the NOHZ case the update of the offset and the reevaluation
787	* of the next expiring timer is enough. The return from the SMP
788	* function call will take care of the reprogramming in case the
789	* CPU was in a NOHZ idle sleep.
790	*/
791	if (!hrtimer_hres_active(cpu_base: base) && !tick_nohz_active)
792	return;
793
794	raw_spin_lock(&base->lock);
795	hrtimer_update_base(base);
796	if (hrtimer_hres_active(cpu_base: base))
797	hrtimer_force_reprogram(cpu_base: base, skip_equal: `0`);
798	else
799	hrtimer_update_next_event(cpu_base: base);
800	raw_spin_unlock(&base->lock);
801	}
802
803	/*
804	* When a timer is enqueued and expires earlier than the already enqueued
805	* timers, we have to check, whether it expires earlier than the timer for
806	* which the clock event device was armed.
807	*
808	* Called with interrupts disabled and base->cpu_base.lock held
809	*/
810	static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
811	{
812	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
813	struct hrtimer_clock_base *base = timer->base;
814	ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
815
816	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < `0`);
817
818	/*
819	* CLOCK_REALTIME timer might be requested with an absolute
820	* expiry time which is less than base->offset. Set it to 0.
821	*/
822	if (expires < `0`)
823	expires = `0`;
824
825	if (timer->is_soft) {
826	/*
827	* soft hrtimer could be started on a remote CPU. In this
828	* case softirq_expires_next needs to be updated on the
829	* remote CPU. The soft hrtimer will not expire before the
830	* first hard hrtimer on the remote CPU -
831	* hrtimer_check_target() prevents this case.
832	*/
833	struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
834
835	if (timer_cpu_base->softirq_activated)
836	return;
837
838	if (!ktime_before(cmp1: expires, cmp2: timer_cpu_base->softirq_expires_next))
839	return;
840
841	timer_cpu_base->softirq_next_timer = timer;
842	timer_cpu_base->softirq_expires_next = expires;
843
844	if (!ktime_before(cmp1: expires, cmp2: timer_cpu_base->expires_next) \|\|
845	!reprogram)
846	return;
847	}
848
849	/*
850	* If the timer is not on the current cpu, we cannot reprogram
851	* the other cpus clock event device.
852	*/
853	if (base->cpu_base != cpu_base)
854	return;
855
856	if (expires >= cpu_base->expires_next)
857	return;
858
859	/*
860	* If the hrtimer interrupt is running, then it will reevaluate the
861	* clock bases and reprogram the clock event device.
862	*/
863	if (cpu_base->in_hrtirq)
864	return;
865
866	cpu_base->next_timer = timer;
867
868	__hrtimer_reprogram(cpu_base, next_timer: timer, expires_next: expires);
869	}
870
871	static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
872	unsigned int active)
873	{
874	struct hrtimer_clock_base *base;
875	unsigned int seq;
876	ktime_t expires;
877
878	/*
879	* Update the base offsets unconditionally so the following
880	* checks whether the SMP function call is required works.
881	*
882	* The update is safe even when the remote CPU is in the hrtimer
883	* interrupt or the hrtimer soft interrupt and expiring affected
884	* bases. Either it will see the update before handling a base or
885	* it will see it when it finishes the processing and reevaluates
886	* the next expiring timer.
887	*/
888	seq = cpu_base->clock_was_set_seq;
889	hrtimer_update_base(base: cpu_base);
890
891	/*
892	* If the sequence did not change over the update then the
893	* remote CPU already handled it.
894	*/
895	if (seq == cpu_base->clock_was_set_seq)
896	return false;
897
898	/*
899	* If the remote CPU is currently handling an hrtimer interrupt, it
900	* will reevaluate the first expiring timer of all clock bases
901	* before reprogramming. Nothing to do here.
902	*/
903	if (cpu_base->in_hrtirq)
904	return false;
905
906	/*
907	* Walk the affected clock bases and check whether the first expiring
908	* timer in a clock base is moving ahead of the first expiring timer of
909	* @cpu_base. If so, the IPI must be invoked because per CPU clock
910	* event devices cannot be remotely reprogrammed.
911	*/
912	active &= cpu_base->active_bases;
913
914	for_each_active_base(base, cpu_base, active) {
915	struct timerqueue_node *next;
916
917	next = timerqueue_getnext(head: &base->active);
918	expires = ktime_sub(next->expires, base->offset);
919	if (expires < cpu_base->expires_next)
920	return true;
921
922	/ Extra check for softirq clock bases /
923	if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
924	continue;
925	if (cpu_base->softirq_activated)
926	continue;
927	if (expires < cpu_base->softirq_expires_next)
928	return true;
929	}
930	return false;
931	}
932
933	/*
934	* Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
935	* CLOCK_BOOTTIME (for late sleep time injection).
936	*
937	* This requires to update the offsets for these clocks
938	* vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
939	* also requires to eventually reprogram the per CPU clock event devices
940	* when the change moves an affected timer ahead of the first expiring
941	* timer on that CPU. Obviously remote per CPU clock event devices cannot
942	* be reprogrammed. The other reason why an IPI has to be sent is when the
943	* system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
944	* in the tick, which obviously might be stopped, so this has to bring out
945	* the remote CPU which might sleep in idle to get this sorted.
946	*/
947	void clock_was_set(unsigned int bases)
948	{
949	struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
950	cpumask_var_t mask;
951	int cpu;
952
953	if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active)
954	goto out_timerfd;
955
956	if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) {
957	on_each_cpu(func: retrigger_next_event, NULL, wait: `1`);
958	goto out_timerfd;
959	}
960
961	/ Avoid interrupting CPUs if possible /
962	cpus_read_lock();
963	for_each_online_cpu(cpu) {
964	unsigned long flags;
965
966	cpu_base = &per_cpu(hrtimer_bases, cpu);
967	raw_spin_lock_irqsave(&cpu_base->lock, flags);
968
969	if (update_needs_ipi(cpu_base, active: bases))
970	cpumask_set_cpu(cpu, dstp: mask);
971
972	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
973	}
974
975	preempt_disable();
976	smp_call_function_many(mask, func: retrigger_next_event, NULL, wait: `1`);
977	preempt_enable();
978	cpus_read_unlock();
979	free_cpumask_var(mask);
980
981	out_timerfd:
982	timerfd_clock_was_set();
983	}
984
985	static void clock_was_set_work(struct work_struct *work)
986	{
987	clock_was_set(CLOCK_SET_WALL);
988	}
989
990	static DECLARE_WORK(hrtimer_work, clock_was_set_work);
991
992	/*
993	* Called from timekeeping code to reprogram the hrtimer interrupt device
994	* on all cpus and to notify timerfd.
995	*/
996	void clock_was_set_delayed(void)
997	{
998	schedule_work(work: &hrtimer_work);
999	}
1000
1001	/*
1002	* Called during resume either directly from via timekeeping_resume()
1003	* or in the case of s2idle from tick_unfreeze() to ensure that the
1004	* hrtimers are up to date.
1005	*/
1006	void hrtimers_resume_local(void)
1007	{
1008	lockdep_assert_irqs_disabled();
1009	/ Retrigger on the local CPU /
1010	retrigger_next_event(NULL);
1011	}
1012
1013	/*
1014	* Counterpart to lock_hrtimer_base above:
1015	*/
1016	static inline
1017	void unlock_hrtimer_base(const struct hrtimer timer, unsigned* long *flags)
1018	__releases(&timer->base->cpu_base->lock)
1019	{
1020	raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
1021	}
1022
1023	/**
1024	* hrtimer_forward() - forward the timer expiry
1025	* @timer: hrtimer to forward
1026	* @now: forward past this time
1027	* @interval: the interval to forward
1028	*
1029	* Forward the timer expiry so it will expire in the future.
1030	*
1031	* .. note::
1032	* This only updates the timer expiry value and does not requeue the timer.
1033	*
1034	* There is also a variant of the function hrtimer_forward_now().
1035	*
1036	* Context: Can be safely called from the callback function of @timer. If called
1037	* from other contexts @timer must neither be enqueued nor running the
1038	* callback and the caller needs to take care of serialization.
1039	*
1040	* Return: The number of overruns are returned.
1041	*/
1042	u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
1043	{
1044	u64 orun = `1`;
1045	ktime_t delta;
1046
1047	delta = ktime_sub(now, hrtimer_get_expires(timer));
1048
1049	if (delta < `0`)
1050	return `0`;
1051
1052	if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
1053	return `0`;
1054
1055	if (interval < hrtimer_resolution)
1056	interval = hrtimer_resolution;
1057
1058	if (unlikely(delta >= interval)) {
1059	s64 incr = ktime_to_ns(kt: interval);
1060
1061	orun = ktime_divns(kt: delta, div: incr);
1062	hrtimer_add_expires_ns(timer, ns: incr * orun);
1063	if (hrtimer_get_expires_tv64(timer) > now)
1064	return orun;
1065	/*
1066	* This (and the ktime_add() below) is the
1067	* correction for exact:
1068	*/
1069	orun++;
1070	}
1071	hrtimer_add_expires(timer, time: interval);
1072
1073	return orun;
1074	}
1075	EXPORT_SYMBOL_GPL(hrtimer_forward);
1076
1077	/*
1078	* enqueue_hrtimer - internal function to (re)start a timer
1079	*
1080	* The timer is inserted in expiry order. Insertion into the
1081	* red black tree is O(log(n)). Must hold the base lock.
1082	*
1083	* Returns true when the new timer is the leftmost timer in the tree.
1084	*/
1085	static bool enqueue_hrtimer(struct hrtimer timer, struct* hrtimer_clock_base *base,
1086	enum hrtimer_mode mode)
1087	{
1088	debug_activate(timer, mode);
1089	WARN_ON_ONCE(!base->cpu_base->online);
1090
1091	base->cpu_base->active_bases \|= `1` << base->index;
1092
1093	/ Pairs with the lockless read in hrtimer_is_queued() /
1094	WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
1095
1096	return timerqueue_add(head: &base->active, node: &timer->node);
1097	}
1098
1099	/*
1100	* __remove_hrtimer - internal function to remove a timer
1101	*
1102	* Caller must hold the base lock.
1103	*
1104	* High resolution timer mode reprograms the clock event device when the
1105	* timer is the one which expires next. The caller can disable this by setting
1106	* reprogram to zero. This is useful, when the context does a reprogramming
1107	* anyway (e.g. timer interrupt)
1108	*/
1109	static void __remove_hrtimer(struct hrtimer *timer,
1110	struct hrtimer_clock_base *base,
1111	u8 newstate, int reprogram)
1112	{
1113	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1114	u8 state = timer->state;
1115
1116	/ Pairs with the lockless read in hrtimer_is_queued() /
1117	WRITE_ONCE(timer->state, newstate);
1118	if (!(state & HRTIMER_STATE_ENQUEUED))
1119	return;
1120
1121	if (!timerqueue_del(head: &base->active, node: &timer->node))
1122	cpu_base->active_bases &= ~(`1` << base->index);
1123
1124	/*
1125	* Note: If reprogram is false we do not update
1126	* cpu_base->next_timer. This happens when we remove the first
1127	* timer on a remote cpu. No harm as we never dereference
1128	* cpu_base->next_timer. So the worst thing what can happen is
1129	* an superfluous call to hrtimer_force_reprogram() on the
1130	* remote cpu later on if the same timer gets enqueued again.
1131	*/
1132	if (reprogram && timer == cpu_base->next_timer)
1133	hrtimer_force_reprogram(cpu_base, skip_equal: `1`);
1134	}
1135
1136	/*
1137	* remove hrtimer, called with base lock held
1138	*/
1139	static inline int
1140	remove_hrtimer(struct hrtimer timer, struct* hrtimer_clock_base *base,
1141	bool restart, bool keep_local)
1142	{
1143	u8 state = timer->state;
1144
1145	if (state & HRTIMER_STATE_ENQUEUED) {
1146	bool reprogram;
1147
1148	/*
1149	* Remove the timer and force reprogramming when high
1150	* resolution mode is active and the timer is on the current
1151	* CPU. If we remove a timer on another CPU, reprogramming is
1152	* skipped. The interrupt event on this CPU is fired and
1153	* reprogramming happens in the interrupt handler. This is a
1154	* rare case and less expensive than a smp call.
1155	*/
1156	debug_deactivate(timer);
1157	reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1158
1159	/*
1160	* If the timer is not restarted then reprogramming is
1161	* required if the timer is local. If it is local and about
1162	* to be restarted, avoid programming it twice (on removal
1163	* and a moment later when it's requeued).
1164	*/
1165	if (!restart)
1166	state = HRTIMER_STATE_INACTIVE;
1167	else
1168	reprogram &= !keep_local;
1169
1170	__remove_hrtimer(timer, base, newstate: state, reprogram);
1171	return `1`;
1172	}
1173	return `0`;
1174	}
1175
1176	static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
1177	const enum hrtimer_mode mode)
1178	{
1179	#ifdef CONFIG_TIME_LOW_RES
1180	/*
1181	* CONFIG_TIME_LOW_RES indicates that the system has no way to return
1182	* granular time values. For relative timers we add hrtimer_resolution
1183	* (i.e. one jiffy) to prevent short timeouts.
1184	*/
1185	timer->is_rel = mode & HRTIMER_MODE_REL;
1186	if (timer->is_rel)
1187	tim = ktime_add_safe(tim, hrtimer_resolution);
1188	#endif
1189	return tim;
1190	}
1191
1192	static void
1193	hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
1194	{
1195	ktime_t expires;
1196
1197	/*
1198	* Find the next SOFT expiration.
1199	*/
1200	expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
1201
1202	/*
1203	* reprogramming needs to be triggered, even if the next soft
1204	* hrtimer expires at the same time than the next hard
1205	* hrtimer. cpu_base->softirq_expires_next needs to be updated!
1206	*/
1207	if (expires == KTIME_MAX)
1208	return;
1209
1210	/*
1211	* cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
1212	* cpu_base->*expires_next is only set by hrtimer_reprogram()
1213	*/
1214	hrtimer_reprogram(timer: cpu_base->softirq_next_timer, reprogram);
1215	}
1216
1217	static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1218	u64 delta_ns, const enum hrtimer_mode mode,
1219	struct hrtimer_clock_base *base)
1220	{
1221	struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases);
1222	struct hrtimer_clock_base *new_base;
1223	bool force_local, first;
1224
1225	/*
1226	* If the timer is on the local cpu base and is the first expiring
1227	* timer then this might end up reprogramming the hardware twice
1228	* (on removal and on enqueue). To avoid that by prevent the
1229	* reprogram on removal, keep the timer local to the current CPU
1230	* and enforce reprogramming after it is queued no matter whether
1231	* it is the new first expiring timer again or not.
1232	*/
1233	force_local = base->cpu_base == this_cpu_base;
1234	force_local &= base->cpu_base->next_timer == timer;
1235
1236	/*
1237	* Don't force local queuing if this enqueue happens on a unplugged
1238	* CPU after hrtimer_cpu_dying() has been invoked.
1239	*/
1240	force_local &= this_cpu_base->online;
1241
1242	/*
1243	* Remove an active timer from the queue. In case it is not queued
1244	* on the current CPU, make sure that remove_hrtimer() updates the
1245	* remote data correctly.
1246	*
1247	* If it's on the current CPU and the first expiring timer, then
1248	* skip reprogramming, keep the timer local and enforce
1249	* reprogramming later if it was the first expiring timer. This
1250	* avoids programming the underlying clock event twice (once at
1251	* removal and once after enqueue).
1252	*/
1253	remove_hrtimer(timer, base, restart: true, keep_local: force_local);
1254
1255	if (mode & HRTIMER_MODE_REL)
1256	tim = ktime_add_safe(tim, base->get_time());
1257
1258	tim = hrtimer_update_lowres(timer, tim, mode);
1259
1260	hrtimer_set_expires_range_ns(timer, time: tim, delta: delta_ns);
1261
1262	/ Switch the timer base, if necessary: /
1263	if (!force_local) {
1264	new_base = switch_hrtimer_base(timer, base,
1265	pinned: mode & HRTIMER_MODE_PINNED);
1266	} else {
1267	new_base = base;
1268	}
1269
1270	first = enqueue_hrtimer(timer, base: new_base, mode);
1271	if (!force_local) {
1272	/*
1273	* If the current CPU base is online, then the timer is
1274	* never queued on a remote CPU if it would be the first
1275	* expiring timer there.
1276	*/
1277	if (hrtimer_base_is_online(base: this_cpu_base))
1278	return first;
1279
1280	/*
1281	* Timer was enqueued remote because the current base is
1282	* already offline. If the timer is the first to expire,
1283	* kick the remote CPU to reprogram the clock event.
1284	*/
1285	if (first) {
1286	struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base;
1287
1288	smp_call_function_single_async(cpu: new_cpu_base->cpu, csd: &new_cpu_base->csd);
1289	}
1290	return `0`;
1291	}
1292
1293	/*
1294	* Timer was forced to stay on the current CPU to avoid
1295	* reprogramming on removal and enqueue. Force reprogram the
1296	* hardware by evaluating the new first expiring timer.
1297	*/
1298	hrtimer_force_reprogram(cpu_base: new_base->cpu_base, skip_equal: `1`);
1299	return `0`;
1300	}
1301
1302	/**
1303	* hrtimer_start_range_ns - (re)start an hrtimer
1304	* @timer: the timer to be added
1305	* @tim: expiry time
1306	* @delta_ns: "slack" range for the timer
1307	* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
1308	* relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
1309	* softirq based mode is considered for debug purpose only!
1310	*/
1311	void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1312	u64 delta_ns, const enum hrtimer_mode mode)
1313	{
1314	struct hrtimer_clock_base *base;
1315	unsigned long flags;
1316
1317	/*
1318	* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1319	* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1320	* expiry mode because unmarked timers are moved to softirq expiry.
1321	*/
1322	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1323	WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1324	else
1325	WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
1326
1327	base = lock_hrtimer_base(timer, flags: &flags);
1328
1329	if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
1330	hrtimer_reprogram(timer, reprogram: true);
1331
1332	unlock_hrtimer_base(timer, flags: &flags);
1333	}
1334	EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1335
1336	/**
1337	* hrtimer_try_to_cancel - try to deactivate a timer
1338	* @timer: hrtimer to stop
1339	*
1340	* Returns:
1341	*
1342	* * 0 when the timer was not active
1343	* * 1 when the timer was active
1344	* * -1 when the timer is currently executing the callback function and
1345	* cannot be stopped
1346	*/
1347	int hrtimer_try_to_cancel(struct hrtimer *timer)
1348	{
1349	struct hrtimer_clock_base *base;
1350	unsigned long flags;
1351	int ret = -`1`;
1352
1353	/*
1354	* Check lockless first. If the timer is not active (neither
1355	* enqueued nor running the callback, nothing to do here. The
1356	* base lock does not serialize against a concurrent enqueue,
1357	* so we can avoid taking it.
1358	*/
1359	if (!hrtimer_active(timer))
1360	return `0`;
1361
1362	base = lock_hrtimer_base(timer, flags: &flags);
1363
1364	if (!hrtimer_callback_running(timer))
1365	ret = remove_hrtimer(timer, base, restart: false, keep_local: false);
1366
1367	unlock_hrtimer_base(timer, flags: &flags);
1368
1369	return ret;
1370
1371	}
1372	EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1373
1374	#ifdef CONFIG_PREEMPT_RT
1375	static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1376	{
1377	spin_lock_init(&base->softirq_expiry_lock);
1378	}
1379
1380	static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1381	__acquires(&base->softirq_expiry_lock)
1382	{
1383	spin_lock(&base->softirq_expiry_lock);
1384	}
1385
1386	static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1387	__releases(&base->softirq_expiry_lock)
1388	{
1389	spin_unlock(&base->softirq_expiry_lock);
1390	}
1391
1392	/*
1393	* The counterpart to hrtimer_cancel_wait_running().
1394	*
1395	* If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1396	* the timer callback to finish. Drop expiry_lock and reacquire it. That
1397	* allows the waiter to acquire the lock and make progress.
1398	*/
1399	static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1400	unsigned long flags)
1401	{
1402	if (atomic_read(&cpu_base->timer_waiters)) {
1403	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1404	spin_unlock(&cpu_base->softirq_expiry_lock);
1405	spin_lock(&cpu_base->softirq_expiry_lock);
1406	raw_spin_lock_irq(&cpu_base->lock);
1407	}
1408	}
1409
1410	#ifdef CONFIG_SMP
1411	static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
1412	{
1413	return base == &migration_base;
1414	}
1415	#else
1416	static __always_inline bool is_migration_base(struct hrtimer_clock_base *base)
1417	{
1418	return false;
1419	}
1420	#endif
1421
1422	/*
1423	* This function is called on PREEMPT_RT kernels when the fast path
1424	* deletion of a timer failed because the timer callback function was
1425	* running.
1426	*
1427	* This prevents priority inversion: if the soft irq thread is preempted
1428	* in the middle of a timer callback, then calling hrtimer_cancel() can
1429	* lead to two issues:
1430	*
1431	* - If the caller is on a remote CPU then it has to spin wait for the timer
1432	* handler to complete. This can result in unbound priority inversion.
1433	*
1434	* - If the caller originates from the task which preempted the timer
1435	* handler on the same CPU, then spin waiting for the timer handler to
1436	* complete is never going to end.
1437	*/
1438	void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1439	{
1440	/ Lockless read. Prevent the compiler from reloading it below /
1441	struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1442
1443	/*
1444	* Just relax if the timer expires in hard interrupt context or if
1445	* it is currently on the migration base.
1446	*/
1447	if (!timer->is_soft \|\| is_migration_base(base)) {
1448	cpu_relax();
1449	return;
1450	}
1451
1452	/*
1453	* Mark the base as contended and grab the expiry lock, which is
1454	* held by the softirq across the timer callback. Drop the lock
1455	* immediately so the softirq can expire the next timer. In theory
1456	* the timer could already be running again, but that's more than
1457	* unlikely and just causes another wait loop.
1458	*/
1459	atomic_inc(&base->cpu_base->timer_waiters);
1460	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1461	atomic_dec(&base->cpu_base->timer_waiters);
1462	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1463	}
1464	#else
1465	static inline void
1466	hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1467	static inline void
1468	hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1469	static inline void
1470	hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1471	static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1472	unsigned long flags) { }
1473	#endif
1474
1475	/**
1476	* hrtimer_cancel - cancel a timer and wait for the handler to finish.
1477	* @timer: the timer to be cancelled
1478	*
1479	* Returns:
1480	* 0 when the timer was not active
1481	* 1 when the timer was active
1482	*/
1483	int hrtimer_cancel(struct hrtimer *timer)
1484	{
1485	int ret;
1486
1487	do {
1488	ret = hrtimer_try_to_cancel(timer);
1489
1490	if (ret < `0`)
1491	hrtimer_cancel_wait_running(timer);
1492	} while (ret < `0`);
1493	return ret;
1494	}
1495	EXPORT_SYMBOL_GPL(hrtimer_cancel);
1496
1497	/**
1498	* __hrtimer_get_remaining - get remaining time for the timer
1499	* @timer: the timer to read
1500	* @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y
1501	*/
1502	ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
1503	{
1504	unsigned long flags;
1505	ktime_t rem;
1506
1507	lock_hrtimer_base(timer, flags: &flags);
1508	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
1509	rem = hrtimer_expires_remaining_adjusted(timer);
1510	else
1511	rem = hrtimer_expires_remaining(timer);
1512	unlock_hrtimer_base(timer, flags: &flags);
1513
1514	return rem;
1515	}
1516	EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
1517
1518	#ifdef CONFIG_NO_HZ_COMMON
1519	/**
1520	* hrtimer_get_next_event - get the time until next expiry event
1521	*
1522	* Returns the next expiry time or KTIME_MAX if no timer is pending.
1523	*/
1524	u64 hrtimer_get_next_event(void)
1525	{
1526	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1527	u64 expires = KTIME_MAX;
1528	unsigned long flags;
1529
1530	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1531
1532	if (!hrtimer_hres_active(cpu_base))
1533	expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
1534
1535	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1536
1537	return expires;
1538	}
1539
1540	/**
1541	* hrtimer_next_event_without - time until next expiry event w/o one timer
1542	* @exclude: timer to exclude
1543	*
1544	* Returns the next expiry time over all timers except for the @exclude one or
1545	* KTIME_MAX if none of them is pending.
1546	*/
1547	u64 hrtimer_next_event_without(const struct hrtimer *exclude)
1548	{
1549	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1550	u64 expires = KTIME_MAX;
1551	unsigned long flags;
1552
1553	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1554
1555	if (hrtimer_hres_active(cpu_base)) {
1556	unsigned int active;
1557
1558	if (!cpu_base->softirq_activated) {
1559	active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
1560	expires = __hrtimer_next_event_base(cpu_base, exclude,
1561	active, KTIME_MAX);
1562	}
1563	active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
1564	expires = __hrtimer_next_event_base(cpu_base, exclude, active,
1565	expires_next: expires);
1566	}
1567
1568	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1569
1570	return expires;
1571	}
1572	#endif
1573
1574	static inline int hrtimer_clockid_to_base(clockid_t clock_id)
1575	{
1576	switch (clock_id) {
1577	case CLOCK_REALTIME:
1578	return HRTIMER_BASE_REALTIME;
1579	case CLOCK_MONOTONIC:
1580	return HRTIMER_BASE_MONOTONIC;
1581	case CLOCK_BOOTTIME:
1582	return HRTIMER_BASE_BOOTTIME;
1583	case CLOCK_TAI:
1584	return HRTIMER_BASE_TAI;
1585	default:
1586	WARN(`1`, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
1587	return HRTIMER_BASE_MONOTONIC;
1588	}
1589	}
1590
1591	static void __hrtimer_setup(struct hrtimer *timer,
1592	enum hrtimer_restart (function)(struct* hrtimer *),
1593	clockid_t clock_id, enum hrtimer_mode mode)
1594	{
1595	bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1596	struct hrtimer_cpu_base *cpu_base;
1597	int base;
1598
1599	/*
1600	* On PREEMPT_RT enabled kernels hrtimers which are not explicitly
1601	* marked for hard interrupt expiry mode are moved into soft
1602	* interrupt context for latency reasons and because the callbacks
1603	* can invoke functions which might sleep on RT, e.g. spin_lock().
1604	*/
1605	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1606	softtimer = true;
1607
1608	memset(timer, `0`, sizeof(struct hrtimer));
1609
1610	cpu_base = raw_cpu_ptr(&hrtimer_bases);
1611
1612	/*
1613	* POSIX magic: Relative CLOCK_REALTIME timers are not affected by
1614	* clock modifications, so they needs to become CLOCK_MONOTONIC to
1615	* ensure POSIX compliance.
1616	*/
1617	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
1618	clock_id = CLOCK_MONOTONIC;
1619
1620	base = softtimer ? HRTIMER_MAX_CLOCK_BASES / `2` : `0`;
1621	base += hrtimer_clockid_to_base(clock_id);
1622	timer->is_soft = softtimer;
1623	timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
1624	timer->base = &cpu_base->clock_base[base];
1625	timerqueue_init(node: &timer->node);
1626
1627	if (WARN_ON_ONCE(!function))
1628	ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout;
1629	else
1630	ACCESS_PRIVATE(timer, function) = function;
1631	}
1632
1633	/**
1634	* hrtimer_setup - initialize a timer to the given clock
1635	* @timer: the timer to be initialized
1636	* @function: the callback function
1637	* @clock_id: the clock to be used
1638	* @mode: The modes which are relevant for initialization:
1639	* HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
1640	* HRTIMER_MODE_REL_SOFT
1641	*
1642	* The PINNED variants of the above can be handed in,
1643	* but the PINNED bit is ignored as pinning happens
1644	* when the hrtimer is started
1645	*/
1646	void hrtimer_setup(struct hrtimer timer, enum* hrtimer_restart (function)(struct* hrtimer *),
1647	clockid_t clock_id, enum hrtimer_mode mode)
1648	{
1649	debug_setup(timer, clockid: clock_id, mode);
1650	__hrtimer_setup(timer, function, clock_id, mode);
1651	}
1652	EXPORT_SYMBOL_GPL(hrtimer_setup);
1653
1654	/**
1655	* hrtimer_setup_on_stack - initialize a timer on stack memory
1656	* @timer: The timer to be initialized
1657	* @function: the callback function
1658	* @clock_id: The clock to be used
1659	* @mode: The timer mode
1660	*
1661	* Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack
1662	* memory.
1663	*/
1664	void hrtimer_setup_on_stack(struct hrtimer *timer,
1665	enum hrtimer_restart (function)(struct* hrtimer *),
1666	clockid_t clock_id, enum hrtimer_mode mode)
1667	{
1668	debug_setup_on_stack(timer, clockid: clock_id, mode);
1669	__hrtimer_setup(timer, function, clock_id, mode);
1670	}
1671	EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack);
1672
1673	/*
1674	* A timer is active, when it is enqueued into the rbtree or the
1675	* callback function is running or it's in the state of being migrated
1676	* to another cpu.
1677	*
1678	* It is important for this function to not return a false negative.
1679	*/
1680	bool hrtimer_active(const struct hrtimer *timer)
1681	{
1682	struct hrtimer_clock_base *base;
1683	unsigned int seq;
1684
1685	do {
1686	base = READ_ONCE(timer->base);
1687	seq = raw_read_seqcount_begin(&base->seq);
1688
1689	if (timer->state != HRTIMER_STATE_INACTIVE \|\|
1690	base->running == timer)
1691	return true;
1692
1693	} while (read_seqcount_retry(&base->seq, seq) \|\|
1694	base != READ_ONCE(timer->base));
1695
1696	return false;
1697	}
1698	EXPORT_SYMBOL_GPL(hrtimer_active);
1699
1700	/*
1701	* The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1702	* distinct sections:
1703	*
1704	* - queued: the timer is queued
1705	* - callback: the timer is being ran
1706	* - post: the timer is inactive or (re)queued
1707	*
1708	* On the read side we ensure we observe timer->state and cpu_base->running
1709	* from the same section, if anything changed while we looked at it, we retry.
1710	* This includes timer->base changing because sequence numbers alone are
1711	* insufficient for that.
1712	*
1713	* The sequence numbers are required because otherwise we could still observe
1714	* a false negative if the read side got smeared over multiple consecutive
1715	* __run_hrtimer() invocations.
1716	*/
1717
1718	static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1719	struct hrtimer_clock_base *base,
1720	struct hrtimer timer, ktime_t now,
1721	unsigned long flags) __must_hold(&cpu_base->lock)
1722	{
1723	enum hrtimer_restart (fn)(struct* hrtimer *);
1724	bool expires_in_hardirq;
1725	int restart;
1726
1727	lockdep_assert_held(&cpu_base->lock);
1728
1729	debug_deactivate(timer);
1730	base->running = timer;
1731
1732	/*
1733	* Separate the ->running assignment from the ->state assignment.
1734	*
1735	* As with a regular write barrier, this ensures the read side in
1736	* hrtimer_active() cannot observe base->running == NULL &&
1737	* timer->state == INACTIVE.
1738	*/
1739	raw_write_seqcount_barrier(&base->seq);
1740
1741	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, reprogram: `0`);
1742	fn = ACCESS_PRIVATE(timer, function);
1743
1744	/*
1745	* Clear the 'is relative' flag for the TIME_LOW_RES case. If the
1746	* timer is restarted with a period then it becomes an absolute
1747	* timer. If its not restarted it does not matter.
1748	*/
1749	if (IS_ENABLED(CONFIG_TIME_LOW_RES))
1750	timer->is_rel = false;
1751
1752	/*
1753	* The timer is marked as running in the CPU base, so it is
1754	* protected against migration to a different CPU even if the lock
1755	* is dropped.
1756	*/
1757	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1758	trace_hrtimer_expire_entry(hrtimer: timer, now);
1759	expires_in_hardirq = lockdep_hrtimer_enter(timer);
1760
1761	restart = fn(timer);
1762
1763	lockdep_hrtimer_exit(expires_in_hardirq);
1764	trace_hrtimer_expire_exit(hrtimer: timer);
1765	raw_spin_lock_irq(&cpu_base->lock);
1766
1767	/*
1768	* Note: We clear the running state after enqueue_hrtimer and
1769	* we do not reprogram the event hardware. Happens either in
1770	* hrtimer_start_range_ns() or in hrtimer_interrupt()
1771	*
1772	* Note: Because we dropped the cpu_base->lock above,
1773	* hrtimer_start_range_ns() can have popped in and enqueued the timer
1774	* for us already.
1775	*/
1776	if (restart != HRTIMER_NORESTART &&
1777	!(timer->state & HRTIMER_STATE_ENQUEUED))
1778	enqueue_hrtimer(timer, base, mode: HRTIMER_MODE_ABS);
1779
1780	/*
1781	* Separate the ->running assignment from the ->state assignment.
1782	*
1783	* As with a regular write barrier, this ensures the read side in
1784	* hrtimer_active() cannot observe base->running.timer == NULL &&
1785	* timer->state == INACTIVE.
1786	*/
1787	raw_write_seqcount_barrier(&base->seq);
1788
1789	WARN_ON_ONCE(base->running != timer);
1790	base->running = NULL;
1791	}
1792
1793	static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1794	unsigned long flags, unsigned int active_mask)
1795	{
1796	struct hrtimer_clock_base *base;
1797	unsigned int active = cpu_base->active_bases & active_mask;
1798
1799	for_each_active_base(base, cpu_base, active) {
1800	struct timerqueue_node *node;
1801	ktime_t basenow;
1802
1803	basenow = ktime_add(now, base->offset);
1804
1805	while ((node = timerqueue_getnext(head: &base->active))) {
1806	struct hrtimer *timer;
1807
1808	timer = container_of(node, struct hrtimer, node);
1809
1810	/*
1811	* The immediate goal for using the softexpires is
1812	* minimizing wakeups, not running timers at the
1813	* earliest interrupt after their soft expiration.
1814	* This allows us to avoid using a Priority Search
1815	* Tree, which can answer a stabbing query for
1816	* overlapping intervals and instead use the simple
1817	* BST we already have.
1818	* We don't add extra wakeups by delaying timers that
1819	* are right-of a not yet expired timer, because that
1820	* timer will have to trigger a wakeup anyway.
1821	*/
1822	if (basenow < hrtimer_get_softexpires_tv64(timer))
1823	break;
1824
1825	__run_hrtimer(cpu_base, base, timer, now: &basenow, flags);
1826	if (active_mask == HRTIMER_ACTIVE_SOFT)
1827	hrtimer_sync_wait_running(base: cpu_base, flags);
1828	}
1829	}
1830	}
1831
1832	static __latent_entropy void hrtimer_run_softirq(void)
1833	{
1834	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1835	unsigned long flags;
1836	ktime_t now;
1837
1838	hrtimer_cpu_base_lock_expiry(base: cpu_base);
1839	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1840
1841	now = hrtimer_update_base(base: cpu_base);
1842	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
1843
1844	cpu_base->softirq_activated = `0`;
1845	hrtimer_update_softirq_timer(cpu_base, reprogram: true);
1846
1847	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1848	hrtimer_cpu_base_unlock_expiry(base: cpu_base);
1849	}
1850
1851	#ifdef CONFIG_HIGH_RES_TIMERS
1852
1853	/*
1854	* High resolution timer interrupt
1855	* Called with interrupts disabled
1856	*/
1857	void hrtimer_interrupt(struct clock_event_device *dev)
1858	{
1859	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1860	ktime_t expires_next, now, entry_time, delta;
1861	unsigned long flags;
1862	int retries = `0`;
1863
1864	BUG_ON(!cpu_base->hres_active);
1865	cpu_base->nr_events++;
1866	dev->next_event = KTIME_MAX;
1867
1868	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1869	entry_time = now = hrtimer_update_base(base: cpu_base);
1870	retry:
1871	cpu_base->in_hrtirq = `1`;
1872	/*
1873	* We set expires_next to KTIME_MAX here with cpu_base->lock
1874	* held to prevent that a timer is enqueued in our queue via
1875	* the migration code. This does not affect enqueueing of
1876	* timers which run their callback and need to be requeued on
1877	* this CPU.
1878	*/
1879	cpu_base->expires_next = KTIME_MAX;
1880
1881	if (!ktime_before(cmp1: now, cmp2: cpu_base->softirq_expires_next)) {
1882	cpu_base->softirq_expires_next = KTIME_MAX;
1883	cpu_base->softirq_activated = `1`;
1884	raise_timer_softirq(nr: HRTIMER_SOFTIRQ);
1885	}
1886
1887	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1888
1889	/ Reevaluate the clock bases for the [soft] next expiry /
1890	expires_next = hrtimer_update_next_event(cpu_base);
1891	/*
1892	* Store the new expiry value so the migration code can verify
1893	* against it.
1894	*/
1895	cpu_base->expires_next = expires_next;
1896	cpu_base->in_hrtirq = `0`;
1897	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1898
1899	/ Reprogramming necessary ? /
1900	if (!tick_program_event(expires: expires_next, force: `0`)) {
1901	cpu_base->hang_detected = `0`;
1902	return;
1903	}
1904
1905	/*
1906	* The next timer was already expired due to:
1907	* - tracing
1908	* - long lasting callbacks
1909	* - being scheduled away when running in a VM
1910	*
1911	* We need to prevent that we loop forever in the hrtimer
1912	* interrupt routine. We give it 3 attempts to avoid
1913	* overreacting on some spurious event.
1914	*
1915	* Acquire base lock for updating the offsets and retrieving
1916	* the current time.
1917	*/
1918	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1919	now = hrtimer_update_base(base: cpu_base);
1920	cpu_base->nr_retries++;
1921	if (++retries < `3`)
1922	goto retry;
1923	/*
1924	* Give the system a chance to do something else than looping
1925	* here. We stored the entry time, so we know exactly how long
1926	* we spent here. We schedule the next event this amount of
1927	* time away.
1928	*/
1929	cpu_base->nr_hangs++;
1930	cpu_base->hang_detected = `1`;
1931	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1932
1933	delta = ktime_sub(now, entry_time);
1934	if ((unsigned int)delta > cpu_base->max_hang_time)
1935	cpu_base->max_hang_time = (unsigned int) delta;
1936	/*
1937	* Limit it to a sensible value as we enforce a longer
1938	* delay. Give the CPU at least 100ms to catch up.
1939	*/
1940	if (delta > `100` * NSEC_PER_MSEC)
1941	expires_next = ktime_add_ns(now, `100` * NSEC_PER_MSEC);
1942	else
1943	expires_next = ktime_add(now, delta);
1944	tick_program_event(expires: expires_next, force: `1`);
1945	pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1946	}
1947	#endif /* !CONFIG_HIGH_RES_TIMERS */
1948
1949	/*
1950	* Called from run_local_timers in hardirq context every jiffy
1951	*/
1952	void hrtimer_run_queues(void)
1953	{
1954	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1955	unsigned long flags;
1956	ktime_t now;
1957
1958	if (hrtimer_hres_active(cpu_base))
1959	return;
1960
1961	/*
1962	* This _is_ ugly: We have to check periodically, whether we
1963	* can switch to highres and / or nohz mode. The clocksource
1964	* switch happens with xtime_lock held. Notification from
1965	* there only sets the check bit in the tick_oneshot code,
1966	* otherwise we might deadlock vs. xtime_lock.
1967	*/
1968	if (tick_check_oneshot_change(allow_nohz: !hrtimer_is_hres_enabled())) {
1969	hrtimer_switch_to_hres();
1970	return;
1971	}
1972
1973	raw_spin_lock_irqsave(&cpu_base->lock, flags);
1974	now = hrtimer_update_base(base: cpu_base);
1975
1976	if (!ktime_before(cmp1: now, cmp2: cpu_base->softirq_expires_next)) {
1977	cpu_base->softirq_expires_next = KTIME_MAX;
1978	cpu_base->softirq_activated = `1`;
1979	raise_timer_softirq(nr: HRTIMER_SOFTIRQ);
1980	}
1981
1982	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1983	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1984	}
1985
1986	/*
1987	* Sleep related functions:
1988	*/
1989	static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1990	{
1991	struct hrtimer_sleeper *t =
1992	container_of(timer, struct hrtimer_sleeper, timer);
1993	struct task_struct *task = t->task;
1994
1995	t->task = NULL;
1996	if (task)
1997	wake_up_process(tsk: task);
1998
1999	return HRTIMER_NORESTART;
2000	}
2001
2002	/**
2003	* hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
2004	* @sl: sleeper to be started
2005	* @mode: timer mode abs/rel
2006	*
2007	* Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
2008	* to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
2009	*/
2010	void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
2011	enum hrtimer_mode mode)
2012	{
2013	/*
2014	* Make the enqueue delivery mode check work on RT. If the sleeper
2015	* was initialized for hard interrupt delivery, force the mode bit.
2016	* This is a special case for hrtimer_sleepers because
2017	* __hrtimer_setup_sleeper() determines the delivery mode on RT so the
2018	* fiddling with this decision is avoided at the call sites.
2019	*/
2020	if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
2021	mode \|= HRTIMER_MODE_HARD;
2022
2023	hrtimer_start_expires(timer: &sl->timer, mode);
2024	}
2025	EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
2026
2027	static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl,
2028	clockid_t clock_id, enum hrtimer_mode mode)
2029	{
2030	/*
2031	* On PREEMPT_RT enabled kernels hrtimers which are not explicitly
2032	* marked for hard interrupt expiry mode are moved into soft
2033	* interrupt context either for latency reasons or because the
2034	* hrtimer callback takes regular spinlocks or invokes other
2035	* functions which are not suitable for hard interrupt context on
2036	* PREEMPT_RT.
2037	*
2038	* The hrtimer_sleeper callback is RT compatible in hard interrupt
2039	* context, but there is a latency concern: Untrusted userspace can
2040	* spawn many threads which arm timers for the same expiry time on
2041	* the same CPU. That causes a latency spike due to the wakeup of
2042	* a gazillion threads.
2043	*
2044	* OTOH, privileged real-time user space applications rely on the
2045	* low latency of hard interrupt wakeups. If the current task is in
2046	* a real-time scheduling class, mark the mode for hard interrupt
2047	* expiry.
2048	*/
2049	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
2050	if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT))
2051	mode \|= HRTIMER_MODE_HARD;
2052	}
2053
2054	__hrtimer_setup(timer: &sl->timer, function: hrtimer_wakeup, clock_id, mode);
2055	sl->task = current;
2056	}
2057
2058	/**
2059	* hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory
2060	* @sl: sleeper to be initialized
2061	* @clock_id: the clock to be used
2062	* @mode: timer mode abs/rel
2063	*/
2064	void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,
2065	clockid_t clock_id, enum hrtimer_mode mode)
2066	{
2067	debug_setup_on_stack(timer: &sl->timer, clockid: clock_id, mode);
2068	__hrtimer_setup_sleeper(sl, clock_id, mode);
2069	}
2070	EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack);
2071
2072	int nanosleep_copyout(struct restart_block restart, struct* timespec64 *ts)
2073	{
2074	switch(restart->nanosleep.type) {
2075	#ifdef CONFIG_COMPAT_32BIT_TIME
2076	case TT_COMPAT:
2077	if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
2078	return -EFAULT;
2079	break;
2080	#endif
2081	case TT_NATIVE:
2082	if (put_timespec64(ts, uts: restart->nanosleep.rmtp))
2083	return -EFAULT;
2084	break;
2085	default:
2086	BUG();
2087	}
2088	return -ERESTART_RESTARTBLOCK;
2089	}
2090
2091	static int __sched do_nanosleep(struct hrtimer_sleeper t, enum* hrtimer_mode mode)
2092	{
2093	struct restart_block *restart;
2094
2095	do {
2096	set_current_state(TASK_INTERRUPTIBLE\|TASK_FREEZABLE);
2097	hrtimer_sleeper_start_expires(t, mode);
2098
2099	if (likely(t->task))
2100	schedule();
2101
2102	hrtimer_cancel(&t->timer);
2103	mode = HRTIMER_MODE_ABS;
2104
2105	} while (t->task && !signal_pending(current));
2106
2107	__set_current_state(TASK_RUNNING);
2108
2109	if (!t->task)
2110	return `0`;
2111
2112	restart = &current->restart_block;
2113	if (restart->nanosleep.type != TT_NONE) {
2114	ktime_t rem = hrtimer_expires_remaining(timer: &t->timer);
2115	struct timespec64 rmt;
2116
2117	if (rem <= `0`)
2118	return `0`;
2119	rmt = ktime_to_timespec64(rem);
2120
2121	return nanosleep_copyout(restart, ts: &rmt);
2122	}
2123	return -ERESTART_RESTARTBLOCK;
2124	}
2125
2126	static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
2127	{
2128	struct hrtimer_sleeper t;
2129	int ret;
2130
2131	hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS);
2132	hrtimer_set_expires_tv64(timer: &t.timer, tv64: restart->nanosleep.expires);
2133	ret = do_nanosleep(t: &t, mode: HRTIMER_MODE_ABS);
2134	destroy_hrtimer_on_stack(&t.timer);
2135	return ret;
2136	}
2137
2138	long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
2139	const clockid_t clockid)
2140	{
2141	struct restart_block *restart;
2142	struct hrtimer_sleeper t;
2143	int ret = `0`;
2144
2145	hrtimer_setup_sleeper_on_stack(&t, clockid, mode);
2146	hrtimer_set_expires_range_ns(timer: &t.timer, time: rqtp, current->timer_slack_ns);
2147	ret = do_nanosleep(t: &t, mode);
2148	if (ret != -ERESTART_RESTARTBLOCK)
2149	goto out;
2150
2151	/ Absolute timers do not update the rmtp value and restart: /
2152	if (mode == HRTIMER_MODE_ABS) {
2153	ret = -ERESTARTNOHAND;
2154	goto out;
2155	}
2156
2157	restart = &current->restart_block;
2158	restart->nanosleep.clockid = t.timer.base->clockid;
2159	restart->nanosleep.expires = hrtimer_get_expires_tv64(timer: &t.timer);
2160	set_restart_fn(restart, fn: hrtimer_nanosleep_restart);
2161	out:
2162	destroy_hrtimer_on_stack(&t.timer);
2163	return ret;
2164	}
2165
2166	#ifdef CONFIG_64BIT
2167
2168	SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
2169	struct __kernel_timespec __user *, rmtp)
2170	{
2171	struct timespec64 tu;
2172
2173	if (get_timespec64(ts: &tu, uts: rqtp))
2174	return -EFAULT;
2175
2176	if (!timespec64_valid(ts: &tu))
2177	return -EINVAL;
2178
2179	current->restart_block.fn = do_no_restart_syscall;
2180	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
2181	current->restart_block.nanosleep.rmtp = rmtp;
2182	return hrtimer_nanosleep(rqtp: timespec64_to_ktime(ts: tu), mode: HRTIMER_MODE_REL,
2183	CLOCK_MONOTONIC);
2184	}
2185
2186	#endif
2187
2188	#ifdef CONFIG_COMPAT_32BIT_TIME
2189
2190	SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
2191	struct old_timespec32 __user *, rmtp)
2192	{
2193	struct timespec64 tu;
2194
2195	if (get_old_timespec32(&tu, rqtp))
2196	return -EFAULT;
2197
2198	if (!timespec64_valid(ts: &tu))
2199	return -EINVAL;
2200
2201	current->restart_block.fn = do_no_restart_syscall;
2202	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
2203	current->restart_block.nanosleep.compat_rmtp = rmtp;
2204	return hrtimer_nanosleep(rqtp: timespec64_to_ktime(ts: tu), mode: HRTIMER_MODE_REL,
2205	CLOCK_MONOTONIC);
2206	}
2207	#endif
2208
2209	/*
2210	* Functions related to boot-time initialization:
2211	*/
2212	int hrtimers_prepare_cpu(unsigned int cpu)
2213	{
2214	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
2215	int i;
2216
2217	for (i = `0`; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2218	struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
2219
2220	clock_b->cpu_base = cpu_base;
2221	seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
2222	timerqueue_init_head(head: &clock_b->active);
2223	}
2224
2225	cpu_base->cpu = cpu;
2226	hrtimer_cpu_base_init_expiry_lock(base: cpu_base);
2227	return `0`;
2228	}
2229
2230	int hrtimers_cpu_starting(unsigned int cpu)
2231	{
2232	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
2233
2234	/ Clear out any left over state from a CPU down operation /
2235	cpu_base->active_bases = `0`;
2236	cpu_base->hres_active = `0`;
2237	cpu_base->hang_detected = `0`;
2238	cpu_base->next_timer = NULL;
2239	cpu_base->softirq_next_timer = NULL;
2240	cpu_base->expires_next = KTIME_MAX;
2241	cpu_base->softirq_expires_next = KTIME_MAX;
2242	cpu_base->online = `1`;
2243	return `0`;
2244	}
2245
2246	#ifdef CONFIG_HOTPLUG_CPU
2247
2248	static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
2249	struct hrtimer_clock_base *new_base)
2250	{
2251	struct hrtimer *timer;
2252	struct timerqueue_node *node;
2253
2254	while ((node = timerqueue_getnext(head: &old_base->active))) {
2255	timer = container_of(node, struct hrtimer, node);
2256	BUG_ON(hrtimer_callback_running(timer));
2257	debug_deactivate(timer);
2258
2259	/*
2260	* Mark it as ENQUEUED not INACTIVE otherwise the
2261	* timer could be seen as !active and just vanish away
2262	* under us on another CPU
2263	*/
2264	__remove_hrtimer(timer, base: old_base, HRTIMER_STATE_ENQUEUED, reprogram: `0`);
2265	timer->base = new_base;
2266	/*
2267	* Enqueue the timers on the new cpu. This does not
2268	* reprogram the event device in case the timer
2269	* expires before the earliest on this CPU, but we run
2270	* hrtimer_interrupt after we migrated everything to
2271	* sort out already expired timers and reprogram the
2272	* event device.
2273	*/
2274	enqueue_hrtimer(timer, base: new_base, mode: HRTIMER_MODE_ABS);
2275	}
2276	}
2277
2278	int hrtimers_cpu_dying(unsigned int dying_cpu)
2279	{
2280	int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
2281	struct hrtimer_cpu_base old_base, new_base;
2282
2283	old_base = this_cpu_ptr(&hrtimer_bases);
2284	new_base = &per_cpu(hrtimer_bases, ncpu);
2285
2286	/*
2287	* The caller is globally serialized and nobody else
2288	* takes two locks at once, deadlock is not possible.
2289	*/
2290	raw_spin_lock(&old_base->lock);
2291	raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
2292
2293	for (i = `0`; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2294	migrate_hrtimer_list(old_base: &old_base->clock_base[i],
2295	new_base: &new_base->clock_base[i]);
2296	}
2297
2298	/*
2299	* The migration might have changed the first expiring softirq
2300	* timer on this CPU. Update it.
2301	*/
2302	__hrtimer_get_next_event(cpu_base: new_base, HRTIMER_ACTIVE_SOFT);
2303	/ Tell the other CPU to retrigger the next event /
2304	smp_call_function_single(cpuid: ncpu, func: retrigger_next_event, NULL, wait: `0`);
2305
2306	raw_spin_unlock(&new_base->lock);
2307	old_base->online = `0`;
2308	raw_spin_unlock(&old_base->lock);
2309
2310	return `0`;
2311	}
2312
2313	#endif /* CONFIG_HOTPLUG_CPU */
2314
2315	void __init hrtimers_init(void)
2316	{
2317	hrtimers_prepare_cpu(smp_processor_id());
2318	hrtimers_cpu_starting(smp_processor_id());
2319	open_softirq(nr: HRTIMER_SOFTIRQ, action: hrtimer_run_softirq);
2320	}
2321

source code of linux/kernel/time/hrtimer.c