1 | // SPDX-License-Identifier: GPL-2.0+ |
---|---|
2 | /* |
3 | * 2002-10-15 Posix Clocks & timers |
4 | * by George Anzinger george@mvista.com |
5 | * Copyright (C) 2002 2003 by MontaVista Software. |
6 | * |
7 | * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. |
8 | * Copyright (C) 2004 Boris Hu |
9 | * |
10 | * These are all the functions necessary to implement POSIX clocks & timers |
11 | */ |
12 | #include <linux/compat.h> |
13 | #include <linux/compiler.h> |
14 | #include <linux/init.h> |
15 | #include <linux/jhash.h> |
16 | #include <linux/interrupt.h> |
17 | #include <linux/list.h> |
18 | #include <linux/memblock.h> |
19 | #include <linux/nospec.h> |
20 | #include <linux/posix-clock.h> |
21 | #include <linux/posix-timers.h> |
22 | #include <linux/prctl.h> |
23 | #include <linux/sched/task.h> |
24 | #include <linux/slab.h> |
25 | #include <linux/syscalls.h> |
26 | #include <linux/time.h> |
27 | #include <linux/time_namespace.h> |
28 | #include <linux/uaccess.h> |
29 | |
30 | #include "timekeeping.h" |
31 | #include "posix-timers.h" |
32 | |
33 | /* |
34 | * Timers are managed in a hash table for lockless lookup. The hash key is |
35 | * constructed from current::signal and the timer ID and the timer is |
36 | * matched against current::signal and the timer ID when walking the hash |
37 | * bucket list. |
38 | * |
39 | * This allows checkpoint/restore to reconstruct the exact timer IDs for |
40 | * a process. |
41 | */ |
42 | struct timer_hash_bucket { |
43 | spinlock_t lock; |
44 | struct hlist_head head; |
45 | }; |
46 | |
47 | static struct { |
48 | struct timer_hash_bucket *buckets; |
49 | unsigned long mask; |
50 | struct kmem_cache *cache; |
51 | } __timer_data __ro_after_init __aligned(4*sizeof(long)); |
52 | |
53 | #define timer_buckets (__timer_data.buckets) |
54 | #define timer_hashmask (__timer_data.mask) |
55 | #define posix_timers_cache (__timer_data.cache) |
56 | |
57 | static const struct k_clock * const posix_clocks[]; |
58 | static const struct k_clock *clockid_to_kclock(const clockid_t id); |
59 | static const struct k_clock clock_realtime, clock_monotonic; |
60 | |
61 | #define TIMER_ANY_ID INT_MIN |
62 | |
63 | /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ |
64 | #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ |
65 | ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) |
66 | #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" |
67 | #endif |
68 | |
69 | static struct k_itimer *__lock_timer(timer_t timer_id); |
70 | |
71 | #define lock_timer(tid) \ |
72 | ({ struct k_itimer *__timr; \ |
73 | __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid)); \ |
74 | __timr; \ |
75 | }) |
76 | |
77 | static inline void unlock_timer(struct k_itimer *timr) |
78 | { |
79 | if (likely((timr))) |
80 | spin_unlock_irq(lock: &timr->it_lock); |
81 | } |
82 | |
83 | #define scoped_timer_get_or_fail(_id) \ |
84 | scoped_cond_guard(lock_timer, return -EINVAL, _id) |
85 | |
86 | #define scoped_timer (scope) |
87 | |
88 | DEFINE_CLASS(lock_timer, struct k_itimer *, unlock_timer(_T), __lock_timer(id), timer_t id); |
89 | DEFINE_CLASS_IS_COND_GUARD(lock_timer); |
90 | |
91 | static struct timer_hash_bucket *hash_bucket(struct signal_struct *sig, unsigned int nr) |
92 | { |
93 | return &timer_buckets[jhash2(k: (u32 *)&sig, length: sizeof(sig) / sizeof(u32), initval: nr) & timer_hashmask]; |
94 | } |
95 | |
96 | static struct k_itimer *posix_timer_by_id(timer_t id) |
97 | { |
98 | struct signal_struct *sig = current->signal; |
99 | struct timer_hash_bucket *bucket = hash_bucket(sig, nr: id); |
100 | struct k_itimer *timer; |
101 | |
102 | hlist_for_each_entry_rcu(timer, &bucket->head, t_hash) { |
103 | /* timer->it_signal can be set concurrently */ |
104 | if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) |
105 | return timer; |
106 | } |
107 | return NULL; |
108 | } |
109 | |
110 | static inline struct signal_struct *posix_sig_owner(const struct k_itimer *timer) |
111 | { |
112 | unsigned long val = (unsigned long)timer->it_signal; |
113 | |
114 | /* |
115 | * Mask out bit 0, which acts as invalid marker to prevent |
116 | * posix_timer_by_id() detecting it as valid. |
117 | */ |
118 | return (struct signal_struct *)(val & ~1UL); |
119 | } |
120 | |
121 | static bool posix_timer_hashed(struct timer_hash_bucket *bucket, struct signal_struct *sig, |
122 | timer_t id) |
123 | { |
124 | struct hlist_head *head = &bucket->head; |
125 | struct k_itimer *timer; |
126 | |
127 | hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&bucket->lock)) { |
128 | if ((posix_sig_owner(timer) == sig) && (timer->it_id == id)) |
129 | return true; |
130 | } |
131 | return false; |
132 | } |
133 | |
134 | static bool posix_timer_add_at(struct k_itimer *timer, struct signal_struct *sig, unsigned int id) |
135 | { |
136 | struct timer_hash_bucket *bucket = hash_bucket(sig, nr: id); |
137 | |
138 | scoped_guard (spinlock, &bucket->lock) { |
139 | /* |
140 | * Validate under the lock as this could have raced against |
141 | * another thread ending up with the same ID, which is |
142 | * highly unlikely, but possible. |
143 | */ |
144 | if (!posix_timer_hashed(bucket, sig, id)) { |
145 | /* |
146 | * Set the timer ID and the signal pointer to make |
147 | * it identifiable in the hash table. The signal |
148 | * pointer has bit 0 set to indicate that it is not |
149 | * yet fully initialized. posix_timer_hashed() |
150 | * masks this bit out, but the syscall lookup fails |
151 | * to match due to it being set. This guarantees |
152 | * that there can't be duplicate timer IDs handed |
153 | * out. |
154 | */ |
155 | timer->it_id = (timer_t)id; |
156 | timer->it_signal = (struct signal_struct *)((unsigned long)sig | 1UL); |
157 | hlist_add_head_rcu(n: &timer->t_hash, h: &bucket->head); |
158 | return true; |
159 | } |
160 | } |
161 | return false; |
162 | } |
163 | |
164 | static int posix_timer_add(struct k_itimer *timer, int req_id) |
165 | { |
166 | struct signal_struct *sig = current->signal; |
167 | |
168 | if (unlikely(req_id != TIMER_ANY_ID)) { |
169 | if (!posix_timer_add_at(timer, sig, id: req_id)) |
170 | return -EBUSY; |
171 | |
172 | /* |
173 | * Move the ID counter past the requested ID, so that after |
174 | * switching back to normal mode the IDs are outside of the |
175 | * exact allocated region. That avoids ID collisions on the |
176 | * next regular timer_create() invocations. |
177 | */ |
178 | atomic_set(v: &sig->next_posix_timer_id, i: req_id + 1); |
179 | return req_id; |
180 | } |
181 | |
182 | for (unsigned int cnt = 0; cnt <= INT_MAX; cnt++) { |
183 | /* Get the next timer ID and clamp it to positive space */ |
184 | unsigned int id = atomic_fetch_inc(v: &sig->next_posix_timer_id) & INT_MAX; |
185 | |
186 | if (posix_timer_add_at(timer, sig, id)) |
187 | return id; |
188 | cond_resched(); |
189 | } |
190 | /* POSIX return code when no timer ID could be allocated */ |
191 | return -EAGAIN; |
192 | } |
193 | |
194 | static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) |
195 | { |
196 | ktime_get_real_ts64(tv: tp); |
197 | return 0; |
198 | } |
199 | |
200 | static ktime_t posix_get_realtime_ktime(clockid_t which_clock) |
201 | { |
202 | return ktime_get_real(); |
203 | } |
204 | |
205 | static int posix_clock_realtime_set(const clockid_t which_clock, |
206 | const struct timespec64 *tp) |
207 | { |
208 | return do_sys_settimeofday64(tv: tp, NULL); |
209 | } |
210 | |
211 | static int posix_clock_realtime_adj(const clockid_t which_clock, |
212 | struct __kernel_timex *t) |
213 | { |
214 | return do_adjtimex(t); |
215 | } |
216 | |
217 | static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) |
218 | { |
219 | ktime_get_ts64(ts: tp); |
220 | timens_add_monotonic(ts: tp); |
221 | return 0; |
222 | } |
223 | |
224 | static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) |
225 | { |
226 | return ktime_get(); |
227 | } |
228 | |
229 | static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) |
230 | { |
231 | ktime_get_raw_ts64(ts: tp); |
232 | timens_add_monotonic(ts: tp); |
233 | return 0; |
234 | } |
235 | |
236 | static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) |
237 | { |
238 | ktime_get_coarse_real_ts64(ts: tp); |
239 | return 0; |
240 | } |
241 | |
242 | static int posix_get_monotonic_coarse(clockid_t which_clock, |
243 | struct timespec64 *tp) |
244 | { |
245 | ktime_get_coarse_ts64(ts: tp); |
246 | timens_add_monotonic(ts: tp); |
247 | return 0; |
248 | } |
249 | |
250 | static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) |
251 | { |
252 | *tp = ktime_to_timespec64(KTIME_LOW_RES); |
253 | return 0; |
254 | } |
255 | |
256 | static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) |
257 | { |
258 | ktime_get_boottime_ts64(ts: tp); |
259 | timens_add_boottime(ts: tp); |
260 | return 0; |
261 | } |
262 | |
263 | static ktime_t posix_get_boottime_ktime(const clockid_t which_clock) |
264 | { |
265 | return ktime_get_boottime(); |
266 | } |
267 | |
268 | static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) |
269 | { |
270 | ktime_get_clocktai_ts64(ts: tp); |
271 | return 0; |
272 | } |
273 | |
274 | static ktime_t posix_get_tai_ktime(clockid_t which_clock) |
275 | { |
276 | return ktime_get_clocktai(); |
277 | } |
278 | |
279 | static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) |
280 | { |
281 | tp->tv_sec = 0; |
282 | tp->tv_nsec = hrtimer_resolution; |
283 | return 0; |
284 | } |
285 | |
286 | /* |
287 | * The siginfo si_overrun field and the return value of timer_getoverrun(2) |
288 | * are of type int. Clamp the overrun value to INT_MAX |
289 | */ |
290 | static inline int timer_overrun_to_int(struct k_itimer *timr) |
291 | { |
292 | if (timr->it_overrun_last > (s64)INT_MAX) |
293 | return INT_MAX; |
294 | |
295 | return (int)timr->it_overrun_last; |
296 | } |
297 | |
298 | static void common_hrtimer_rearm(struct k_itimer *timr) |
299 | { |
300 | struct hrtimer *timer = &timr->it.real.timer; |
301 | |
302 | timr->it_overrun += hrtimer_forward(timer, now: timer->base->get_time(), |
303 | interval: timr->it_interval); |
304 | hrtimer_restart(timer); |
305 | } |
306 | |
307 | static bool __posixtimer_deliver_signal(struct kernel_siginfo *info, struct k_itimer *timr) |
308 | { |
309 | guard(spinlock)(l: &timr->it_lock); |
310 | |
311 | /* |
312 | * Check if the timer is still alive or whether it got modified |
313 | * since the signal was queued. In either case, don't rearm and |
314 | * drop the signal. |
315 | */ |
316 | if (timr->it_signal_seq != timr->it_sigqueue_seq || WARN_ON_ONCE(!posixtimer_valid(timr))) |
317 | return false; |
318 | |
319 | if (!timr->it_interval || WARN_ON_ONCE(timr->it_status != POSIX_TIMER_REQUEUE_PENDING)) |
320 | return true; |
321 | |
322 | timr->kclock->timer_rearm(timr); |
323 | timr->it_status = POSIX_TIMER_ARMED; |
324 | timr->it_overrun_last = timr->it_overrun; |
325 | timr->it_overrun = -1LL; |
326 | ++timr->it_signal_seq; |
327 | info->si_overrun = timer_overrun_to_int(timr); |
328 | return true; |
329 | } |
330 | |
331 | /* |
332 | * This function is called from the signal delivery code. It decides |
333 | * whether the signal should be dropped and rearms interval timers. The |
334 | * timer can be unconditionally accessed as there is a reference held on |
335 | * it. |
336 | */ |
337 | bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) |
338 | { |
339 | struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq); |
340 | bool ret; |
341 | |
342 | /* |
343 | * Release siglock to ensure proper locking order versus |
344 | * timr::it_lock. Keep interrupts disabled. |
345 | */ |
346 | spin_unlock(lock: ¤t->sighand->siglock); |
347 | |
348 | ret = __posixtimer_deliver_signal(info, timr); |
349 | |
350 | /* Drop the reference which was acquired when the signal was queued */ |
351 | posixtimer_putref(tmr: timr); |
352 | |
353 | spin_lock(lock: ¤t->sighand->siglock); |
354 | return ret; |
355 | } |
356 | |
357 | void posix_timer_queue_signal(struct k_itimer *timr) |
358 | { |
359 | lockdep_assert_held(&timr->it_lock); |
360 | |
361 | if (!posixtimer_valid(timer: timr)) |
362 | return; |
363 | |
364 | timr->it_status = timr->it_interval ? POSIX_TIMER_REQUEUE_PENDING : POSIX_TIMER_DISARMED; |
365 | posixtimer_send_sigqueue(tmr: timr); |
366 | } |
367 | |
368 | /* |
369 | * This function gets called when a POSIX.1b interval timer expires from |
370 | * the HRTIMER interrupt (soft interrupt on RT kernels). |
371 | * |
372 | * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI |
373 | * based timers. |
374 | */ |
375 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) |
376 | { |
377 | struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer); |
378 | |
379 | guard(spinlock_irqsave)(l: &timr->it_lock); |
380 | posix_timer_queue_signal(timr); |
381 | return HRTIMER_NORESTART; |
382 | } |
383 | |
384 | long posixtimer_create_prctl(unsigned long ctrl) |
385 | { |
386 | switch (ctrl) { |
387 | case PR_TIMER_CREATE_RESTORE_IDS_OFF: |
388 | current->signal->timer_create_restore_ids = 0; |
389 | return 0; |
390 | case PR_TIMER_CREATE_RESTORE_IDS_ON: |
391 | current->signal->timer_create_restore_ids = 1; |
392 | return 0; |
393 | case PR_TIMER_CREATE_RESTORE_IDS_GET: |
394 | return current->signal->timer_create_restore_ids; |
395 | } |
396 | return -EINVAL; |
397 | } |
398 | |
399 | static struct pid *good_sigevent(sigevent_t * event) |
400 | { |
401 | struct pid *pid = task_tgid(current); |
402 | struct task_struct *rtn; |
403 | |
404 | switch (event->sigev_notify) { |
405 | case SIGEV_SIGNAL | SIGEV_THREAD_ID: |
406 | pid = find_vpid(nr: event->sigev_notify_thread_id); |
407 | rtn = pid_task(pid, PIDTYPE_PID); |
408 | if (!rtn || !same_thread_group(p1: rtn, current)) |
409 | return NULL; |
410 | fallthrough; |
411 | case SIGEV_SIGNAL: |
412 | case SIGEV_THREAD: |
413 | if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) |
414 | return NULL; |
415 | fallthrough; |
416 | case SIGEV_NONE: |
417 | return pid; |
418 | default: |
419 | return NULL; |
420 | } |
421 | } |
422 | |
423 | static struct k_itimer *alloc_posix_timer(void) |
424 | { |
425 | struct k_itimer *tmr; |
426 | |
427 | if (unlikely(!posix_timers_cache)) |
428 | return NULL; |
429 | |
430 | tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); |
431 | if (!tmr) |
432 | return tmr; |
433 | |
434 | if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) { |
435 | kmem_cache_free(posix_timers_cache, objp: tmr); |
436 | return NULL; |
437 | } |
438 | rcuref_init(ref: &tmr->rcuref, cnt: 1); |
439 | return tmr; |
440 | } |
441 | |
442 | void posixtimer_free_timer(struct k_itimer *tmr) |
443 | { |
444 | put_pid(pid: tmr->it_pid); |
445 | if (tmr->sigq.ucounts) |
446 | dec_rlimit_put_ucounts(ucounts: tmr->sigq.ucounts, type: UCOUNT_RLIMIT_SIGPENDING); |
447 | kfree_rcu(tmr, rcu); |
448 | } |
449 | |
450 | static void posix_timer_unhash_and_free(struct k_itimer *tmr) |
451 | { |
452 | struct timer_hash_bucket *bucket = hash_bucket(sig: posix_sig_owner(timer: tmr), nr: tmr->it_id); |
453 | |
454 | scoped_guard (spinlock, &bucket->lock) |
455 | hlist_del_rcu(n: &tmr->t_hash); |
456 | posixtimer_putref(tmr); |
457 | } |
458 | |
459 | static int common_timer_create(struct k_itimer *new_timer) |
460 | { |
461 | hrtimer_setup(timer: &new_timer->it.real.timer, function: posix_timer_fn, clock_id: new_timer->it_clock, mode: 0); |
462 | return 0; |
463 | } |
464 | |
465 | /* Create a POSIX.1b interval timer. */ |
466 | static int do_timer_create(clockid_t which_clock, struct sigevent *event, |
467 | timer_t __user *created_timer_id) |
468 | { |
469 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
470 | timer_t req_id = TIMER_ANY_ID; |
471 | struct k_itimer *new_timer; |
472 | int error, new_timer_id; |
473 | |
474 | if (!kc) |
475 | return -EINVAL; |
476 | if (!kc->timer_create) |
477 | return -EOPNOTSUPP; |
478 | |
479 | new_timer = alloc_posix_timer(); |
480 | if (unlikely(!new_timer)) |
481 | return -EAGAIN; |
482 | |
483 | spin_lock_init(&new_timer->it_lock); |
484 | |
485 | /* Special case for CRIU to restore timers with a given timer ID. */ |
486 | if (unlikely(current->signal->timer_create_restore_ids)) { |
487 | if (copy_from_user(to: &req_id, from: created_timer_id, n: sizeof(req_id))) |
488 | return -EFAULT; |
489 | /* Valid IDs are 0..INT_MAX */ |
490 | if ((unsigned int)req_id > INT_MAX) |
491 | return -EINVAL; |
492 | } |
493 | |
494 | /* |
495 | * Add the timer to the hash table. The timer is not yet valid |
496 | * after insertion, but has a unique ID allocated. |
497 | */ |
498 | new_timer_id = posix_timer_add(timer: new_timer, req_id); |
499 | if (new_timer_id < 0) { |
500 | posixtimer_free_timer(tmr: new_timer); |
501 | return new_timer_id; |
502 | } |
503 | |
504 | new_timer->it_clock = which_clock; |
505 | new_timer->kclock = kc; |
506 | new_timer->it_overrun = -1LL; |
507 | |
508 | if (event) { |
509 | scoped_guard (rcu) |
510 | new_timer->it_pid = get_pid(pid: good_sigevent(event)); |
511 | if (!new_timer->it_pid) { |
512 | error = -EINVAL; |
513 | goto out; |
514 | } |
515 | new_timer->it_sigev_notify = event->sigev_notify; |
516 | new_timer->sigq.info.si_signo = event->sigev_signo; |
517 | new_timer->sigq.info.si_value = event->sigev_value; |
518 | } else { |
519 | new_timer->it_sigev_notify = SIGEV_SIGNAL; |
520 | new_timer->sigq.info.si_signo = SIGALRM; |
521 | new_timer->sigq.info.si_value.sival_int = new_timer->it_id; |
522 | new_timer->it_pid = get_pid(pid: task_tgid(current)); |
523 | } |
524 | |
525 | if (new_timer->it_sigev_notify & SIGEV_THREAD_ID) |
526 | new_timer->it_pid_type = PIDTYPE_PID; |
527 | else |
528 | new_timer->it_pid_type = PIDTYPE_TGID; |
529 | |
530 | new_timer->sigq.info.si_tid = new_timer->it_id; |
531 | new_timer->sigq.info.si_code = SI_TIMER; |
532 | |
533 | if (copy_to_user(to: created_timer_id, from: &new_timer_id, n: sizeof (new_timer_id))) { |
534 | error = -EFAULT; |
535 | goto out; |
536 | } |
537 | /* |
538 | * After succesful copy out, the timer ID is visible to user space |
539 | * now but not yet valid because new_timer::signal low order bit is 1. |
540 | * |
541 | * Complete the initialization with the clock specific create |
542 | * callback. |
543 | */ |
544 | error = kc->timer_create(new_timer); |
545 | if (error) |
546 | goto out; |
547 | |
548 | /* |
549 | * timer::it_lock ensures that __lock_timer() observes a fully |
550 | * initialized timer when it observes a valid timer::it_signal. |
551 | * |
552 | * sighand::siglock is required to protect signal::posix_timers. |
553 | */ |
554 | scoped_guard (spinlock_irq, &new_timer->it_lock) { |
555 | guard(spinlock)(l: ¤t->sighand->siglock); |
556 | /* |
557 | * new_timer::it_signal contains the signal pointer with |
558 | * bit 0 set, which makes it invalid for syscall operations. |
559 | * Store the unmodified signal pointer to make it valid. |
560 | */ |
561 | WRITE_ONCE(new_timer->it_signal, current->signal); |
562 | hlist_add_head_rcu(n: &new_timer->list, h: ¤t->signal->posix_timers); |
563 | } |
564 | /* |
565 | * After unlocking @new_timer is subject to concurrent removal and |
566 | * cannot be touched anymore |
567 | */ |
568 | return 0; |
569 | out: |
570 | posix_timer_unhash_and_free(tmr: new_timer); |
571 | return error; |
572 | } |
573 | |
574 | SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, |
575 | struct sigevent __user *, timer_event_spec, |
576 | timer_t __user *, created_timer_id) |
577 | { |
578 | if (timer_event_spec) { |
579 | sigevent_t event; |
580 | |
581 | if (copy_from_user(to: &event, from: timer_event_spec, n: sizeof (event))) |
582 | return -EFAULT; |
583 | return do_timer_create(which_clock, event: &event, created_timer_id); |
584 | } |
585 | return do_timer_create(which_clock, NULL, created_timer_id); |
586 | } |
587 | |
588 | #ifdef CONFIG_COMPAT |
589 | COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, |
590 | struct compat_sigevent __user *, timer_event_spec, |
591 | timer_t __user *, created_timer_id) |
592 | { |
593 | if (timer_event_spec) { |
594 | sigevent_t event; |
595 | |
596 | if (get_compat_sigevent(event: &event, u_event: timer_event_spec)) |
597 | return -EFAULT; |
598 | return do_timer_create(which_clock, event: &event, created_timer_id); |
599 | } |
600 | return do_timer_create(which_clock, NULL, created_timer_id); |
601 | } |
602 | #endif |
603 | |
604 | static struct k_itimer *__lock_timer(timer_t timer_id) |
605 | { |
606 | struct k_itimer *timr; |
607 | |
608 | /* |
609 | * timer_t could be any type >= int and we want to make sure any |
610 | * @timer_id outside positive int range fails lookup. |
611 | */ |
612 | if ((unsigned long long)timer_id > INT_MAX) |
613 | return NULL; |
614 | |
615 | /* |
616 | * The hash lookup and the timers are RCU protected. |
617 | * |
618 | * Timers are added to the hash in invalid state where |
619 | * timr::it_signal is marked invalid. timer::it_signal is only set |
620 | * after the rest of the initialization succeeded. |
621 | * |
622 | * Timer destruction happens in steps: |
623 | * 1) Set timr::it_signal marked invalid with timr::it_lock held |
624 | * 2) Release timr::it_lock |
625 | * 3) Remove from the hash under hash_lock |
626 | * 4) Put the reference count. |
627 | * |
628 | * The reference count might not drop to zero if timr::sigq is |
629 | * queued. In that case the signal delivery or flush will put the |
630 | * last reference count. |
631 | * |
632 | * When the reference count reaches zero, the timer is scheduled |
633 | * for RCU removal after the grace period. |
634 | * |
635 | * Holding rcu_read_lock() across the lookup ensures that |
636 | * the timer cannot be freed. |
637 | * |
638 | * The lookup validates locklessly that timr::it_signal == |
639 | * current::it_signal and timr::it_id == @timer_id. timr::it_id |
640 | * can't change, but timr::it_signal can become invalid during |
641 | * destruction, which makes the locked check fail. |
642 | */ |
643 | guard(rcu)(); |
644 | timr = posix_timer_by_id(id: timer_id); |
645 | if (timr) { |
646 | spin_lock_irq(lock: &timr->it_lock); |
647 | /* |
648 | * Validate under timr::it_lock that timr::it_signal is |
649 | * still valid. Pairs with #1 above. |
650 | */ |
651 | if (timr->it_signal == current->signal) |
652 | return timr; |
653 | spin_unlock_irq(lock: &timr->it_lock); |
654 | } |
655 | return NULL; |
656 | } |
657 | |
658 | static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) |
659 | { |
660 | struct hrtimer *timer = &timr->it.real.timer; |
661 | |
662 | return __hrtimer_expires_remaining_adjusted(timer, now); |
663 | } |
664 | |
665 | static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) |
666 | { |
667 | struct hrtimer *timer = &timr->it.real.timer; |
668 | |
669 | return hrtimer_forward(timer, now, interval: timr->it_interval); |
670 | } |
671 | |
672 | /* |
673 | * Get the time remaining on a POSIX.1b interval timer. |
674 | * |
675 | * Two issues to handle here: |
676 | * |
677 | * 1) The timer has a requeue pending. The return value must appear as |
678 | * if the timer has been requeued right now. |
679 | * |
680 | * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued |
681 | * into the hrtimer queue and therefore never expired. Emulate expiry |
682 | * here taking #1 into account. |
683 | */ |
684 | void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) |
685 | { |
686 | const struct k_clock *kc = timr->kclock; |
687 | ktime_t now, remaining, iv; |
688 | bool sig_none; |
689 | |
690 | sig_none = timr->it_sigev_notify == SIGEV_NONE; |
691 | iv = timr->it_interval; |
692 | |
693 | /* interval timer ? */ |
694 | if (iv) { |
695 | cur_setting->it_interval = ktime_to_timespec64(iv); |
696 | } else if (timr->it_status == POSIX_TIMER_DISARMED) { |
697 | /* |
698 | * SIGEV_NONE oneshot timers are never queued and therefore |
699 | * timr->it_status is always DISARMED. The check below |
700 | * vs. remaining time will handle this case. |
701 | * |
702 | * For all other timers there is nothing to update here, so |
703 | * return. |
704 | */ |
705 | if (!sig_none) |
706 | return; |
707 | } |
708 | |
709 | now = kc->clock_get_ktime(timr->it_clock); |
710 | |
711 | /* |
712 | * If this is an interval timer and either has requeue pending or |
713 | * is a SIGEV_NONE timer move the expiry time forward by intervals, |
714 | * so expiry is > now. |
715 | */ |
716 | if (iv && timr->it_status != POSIX_TIMER_ARMED) |
717 | timr->it_overrun += kc->timer_forward(timr, now); |
718 | |
719 | remaining = kc->timer_remaining(timr, now); |
720 | /* |
721 | * As @now is retrieved before a possible timer_forward() and |
722 | * cannot be reevaluated by the compiler @remaining is based on the |
723 | * same @now value. Therefore @remaining is consistent vs. @now. |
724 | * |
725 | * Consequently all interval timers, i.e. @iv > 0, cannot have a |
726 | * remaining time <= 0 because timer_forward() guarantees to move |
727 | * them forward so that the next timer expiry is > @now. |
728 | */ |
729 | if (remaining <= 0) { |
730 | /* |
731 | * A single shot SIGEV_NONE timer must return 0, when it is |
732 | * expired! Timers which have a real signal delivery mode |
733 | * must return a remaining time greater than 0 because the |
734 | * signal has not yet been delivered. |
735 | */ |
736 | if (!sig_none) |
737 | cur_setting->it_value.tv_nsec = 1; |
738 | } else { |
739 | cur_setting->it_value = ktime_to_timespec64(remaining); |
740 | } |
741 | } |
742 | |
743 | static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) |
744 | { |
745 | memset(setting, 0, sizeof(*setting)); |
746 | scoped_timer_get_or_fail(timer_id) |
747 | scoped_timer->kclock->timer_get(scoped_timer, setting); |
748 | return 0; |
749 | } |
750 | |
751 | /* Get the time remaining on a POSIX.1b interval timer. */ |
752 | SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, |
753 | struct __kernel_itimerspec __user *, setting) |
754 | { |
755 | struct itimerspec64 cur_setting; |
756 | |
757 | int ret = do_timer_gettime(timer_id, setting: &cur_setting); |
758 | if (!ret) { |
759 | if (put_itimerspec64(it: &cur_setting, uit: setting)) |
760 | ret = -EFAULT; |
761 | } |
762 | return ret; |
763 | } |
764 | |
765 | #ifdef CONFIG_COMPAT_32BIT_TIME |
766 | |
767 | SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, |
768 | struct old_itimerspec32 __user *, setting) |
769 | { |
770 | struct itimerspec64 cur_setting; |
771 | |
772 | int ret = do_timer_gettime(timer_id, setting: &cur_setting); |
773 | if (!ret) { |
774 | if (put_old_itimerspec32(its: &cur_setting, uits: setting)) |
775 | ret = -EFAULT; |
776 | } |
777 | return ret; |
778 | } |
779 | |
780 | #endif |
781 | |
782 | /** |
783 | * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer |
784 | * @timer_id: The timer ID which identifies the timer |
785 | * |
786 | * The "overrun count" of a timer is one plus the number of expiration |
787 | * intervals which have elapsed between the first expiry, which queues the |
788 | * signal and the actual signal delivery. On signal delivery the "overrun |
789 | * count" is calculated and cached, so it can be returned directly here. |
790 | * |
791 | * As this is relative to the last queued signal the returned overrun count |
792 | * is meaningless outside of the signal delivery path and even there it |
793 | * does not accurately reflect the current state when user space evaluates |
794 | * it. |
795 | * |
796 | * Returns: |
797 | * -EINVAL @timer_id is invalid |
798 | * 1..INT_MAX The number of overruns related to the last delivered signal |
799 | */ |
800 | SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) |
801 | { |
802 | scoped_timer_get_or_fail(timer_id) |
803 | return timer_overrun_to_int(scoped_timer); |
804 | } |
805 | |
806 | static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, |
807 | bool absolute, bool sigev_none) |
808 | { |
809 | struct hrtimer *timer = &timr->it.real.timer; |
810 | enum hrtimer_mode mode; |
811 | |
812 | mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; |
813 | /* |
814 | * Posix magic: Relative CLOCK_REALTIME timers are not affected by |
815 | * clock modifications, so they become CLOCK_MONOTONIC based under the |
816 | * hood. See hrtimer_setup(). Update timr->kclock, so the generic |
817 | * functions which use timr->kclock->clock_get_*() work. |
818 | * |
819 | * Note: it_clock stays unmodified, because the next timer_set() might |
820 | * use ABSTIME, so it needs to switch back. |
821 | */ |
822 | if (timr->it_clock == CLOCK_REALTIME) |
823 | timr->kclock = absolute ? &clock_realtime : &clock_monotonic; |
824 | |
825 | hrtimer_setup(timer: &timr->it.real.timer, function: posix_timer_fn, clock_id: timr->it_clock, mode); |
826 | |
827 | if (!absolute) |
828 | expires = ktime_add_safe(lhs: expires, rhs: timer->base->get_time()); |
829 | hrtimer_set_expires(timer, time: expires); |
830 | |
831 | if (!sigev_none) |
832 | hrtimer_start_expires(timer, mode: HRTIMER_MODE_ABS); |
833 | } |
834 | |
835 | static int common_hrtimer_try_to_cancel(struct k_itimer *timr) |
836 | { |
837 | return hrtimer_try_to_cancel(timer: &timr->it.real.timer); |
838 | } |
839 | |
840 | static void common_timer_wait_running(struct k_itimer *timer) |
841 | { |
842 | hrtimer_cancel_wait_running(timer: &timer->it.real.timer); |
843 | } |
844 | |
845 | /* |
846 | * On PREEMPT_RT this prevents priority inversion and a potential livelock |
847 | * against the ksoftirqd thread in case that ksoftirqd gets preempted while |
848 | * executing a hrtimer callback. |
849 | * |
850 | * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this |
851 | * just results in a cpu_relax(). |
852 | * |
853 | * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is |
854 | * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this |
855 | * prevents spinning on an eventually scheduled out task and a livelock |
856 | * when the task which tries to delete or disarm the timer has preempted |
857 | * the task which runs the expiry in task work context. |
858 | */ |
859 | static void timer_wait_running(struct k_itimer *timer) |
860 | { |
861 | /* |
862 | * kc->timer_wait_running() might drop RCU lock. So @timer |
863 | * cannot be touched anymore after the function returns! |
864 | */ |
865 | timer->kclock->timer_wait_running(timer); |
866 | } |
867 | |
868 | /* |
869 | * Set up the new interval and reset the signal delivery data |
870 | */ |
871 | void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting) |
872 | { |
873 | if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec) |
874 | timer->it_interval = timespec64_to_ktime(ts: new_setting->it_interval); |
875 | else |
876 | timer->it_interval = 0; |
877 | |
878 | /* Reset overrun accounting */ |
879 | timer->it_overrun_last = 0; |
880 | timer->it_overrun = -1LL; |
881 | } |
882 | |
883 | /* Set a POSIX.1b interval timer. */ |
884 | int common_timer_set(struct k_itimer *timr, int flags, |
885 | struct itimerspec64 *new_setting, |
886 | struct itimerspec64 *old_setting) |
887 | { |
888 | const struct k_clock *kc = timr->kclock; |
889 | bool sigev_none; |
890 | ktime_t expires; |
891 | |
892 | if (old_setting) |
893 | common_timer_get(timr, cur_setting: old_setting); |
894 | |
895 | /* |
896 | * Careful here. On SMP systems the timer expiry function could be |
897 | * active and spinning on timr->it_lock. |
898 | */ |
899 | if (kc->timer_try_to_cancel(timr) < 0) |
900 | return TIMER_RETRY; |
901 | |
902 | timr->it_status = POSIX_TIMER_DISARMED; |
903 | posix_timer_set_common(timer: timr, new_setting); |
904 | |
905 | /* Keep timer disarmed when it_value is zero */ |
906 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) |
907 | return 0; |
908 | |
909 | expires = timespec64_to_ktime(ts: new_setting->it_value); |
910 | if (flags & TIMER_ABSTIME) |
911 | expires = timens_ktime_to_host(clockid: timr->it_clock, tim: expires); |
912 | sigev_none = timr->it_sigev_notify == SIGEV_NONE; |
913 | |
914 | kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); |
915 | if (!sigev_none) |
916 | timr->it_status = POSIX_TIMER_ARMED; |
917 | return 0; |
918 | } |
919 | |
920 | static int do_timer_settime(timer_t timer_id, int tmr_flags, struct itimerspec64 *new_spec64, |
921 | struct itimerspec64 *old_spec64) |
922 | { |
923 | if (!timespec64_valid(ts: &new_spec64->it_interval) || |
924 | !timespec64_valid(ts: &new_spec64->it_value)) |
925 | return -EINVAL; |
926 | |
927 | if (old_spec64) |
928 | memset(old_spec64, 0, sizeof(*old_spec64)); |
929 | |
930 | for (; ; old_spec64 = NULL) { |
931 | struct k_itimer *timr; |
932 | |
933 | scoped_timer_get_or_fail(timer_id) { |
934 | timr = scoped_timer; |
935 | |
936 | if (old_spec64) |
937 | old_spec64->it_interval = ktime_to_timespec64(timr->it_interval); |
938 | |
939 | /* Prevent signal delivery and rearming. */ |
940 | timr->it_signal_seq++; |
941 | |
942 | int ret = timr->kclock->timer_set(timr, tmr_flags, new_spec64, old_spec64); |
943 | if (ret != TIMER_RETRY) |
944 | return ret; |
945 | |
946 | /* Protect the timer from being freed when leaving the lock scope */ |
947 | rcu_read_lock(); |
948 | } |
949 | timer_wait_running(timer: timr); |
950 | rcu_read_unlock(); |
951 | } |
952 | } |
953 | |
954 | /* Set a POSIX.1b interval timer */ |
955 | SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, |
956 | const struct __kernel_itimerspec __user *, new_setting, |
957 | struct __kernel_itimerspec __user *, old_setting) |
958 | { |
959 | struct itimerspec64 new_spec, old_spec, *rtn; |
960 | int error = 0; |
961 | |
962 | if (!new_setting) |
963 | return -EINVAL; |
964 | |
965 | if (get_itimerspec64(it: &new_spec, uit: new_setting)) |
966 | return -EFAULT; |
967 | |
968 | rtn = old_setting ? &old_spec : NULL; |
969 | error = do_timer_settime(timer_id, tmr_flags: flags, new_spec64: &new_spec, old_spec64: rtn); |
970 | if (!error && old_setting) { |
971 | if (put_itimerspec64(it: &old_spec, uit: old_setting)) |
972 | error = -EFAULT; |
973 | } |
974 | return error; |
975 | } |
976 | |
977 | #ifdef CONFIG_COMPAT_32BIT_TIME |
978 | SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, |
979 | struct old_itimerspec32 __user *, new, |
980 | struct old_itimerspec32 __user *, old) |
981 | { |
982 | struct itimerspec64 new_spec, old_spec; |
983 | struct itimerspec64 *rtn = old ? &old_spec : NULL; |
984 | int error = 0; |
985 | |
986 | if (!new) |
987 | return -EINVAL; |
988 | if (get_old_itimerspec32(its: &new_spec, uits: new)) |
989 | return -EFAULT; |
990 | |
991 | error = do_timer_settime(timer_id, tmr_flags: flags, new_spec64: &new_spec, old_spec64: rtn); |
992 | if (!error && old) { |
993 | if (put_old_itimerspec32(its: &old_spec, uits: old)) |
994 | error = -EFAULT; |
995 | } |
996 | return error; |
997 | } |
998 | #endif |
999 | |
1000 | int common_timer_del(struct k_itimer *timer) |
1001 | { |
1002 | const struct k_clock *kc = timer->kclock; |
1003 | |
1004 | if (kc->timer_try_to_cancel(timer) < 0) |
1005 | return TIMER_RETRY; |
1006 | timer->it_status = POSIX_TIMER_DISARMED; |
1007 | return 0; |
1008 | } |
1009 | |
1010 | /* |
1011 | * If the deleted timer is on the ignored list, remove it and |
1012 | * drop the associated reference. |
1013 | */ |
1014 | static inline void posix_timer_cleanup_ignored(struct k_itimer *tmr) |
1015 | { |
1016 | if (!hlist_unhashed(h: &tmr->ignored_list)) { |
1017 | hlist_del_init(n: &tmr->ignored_list); |
1018 | posixtimer_putref(tmr); |
1019 | } |
1020 | } |
1021 | |
1022 | static void posix_timer_delete(struct k_itimer *timer) |
1023 | { |
1024 | /* |
1025 | * Invalidate the timer, remove it from the linked list and remove |
1026 | * it from the ignored list if pending. |
1027 | * |
1028 | * The invalidation must be written with siglock held so that the |
1029 | * signal code observes the invalidated timer::it_signal in |
1030 | * do_sigaction(), which prevents it from moving a pending signal |
1031 | * of a deleted timer to the ignore list. |
1032 | * |
1033 | * The invalidation also prevents signal queueing, signal delivery |
1034 | * and therefore rearming from the signal delivery path. |
1035 | * |
1036 | * A concurrent lookup can still find the timer in the hash, but it |
1037 | * will check timer::it_signal with timer::it_lock held and observe |
1038 | * bit 0 set, which invalidates it. That also prevents the timer ID |
1039 | * from being handed out before this timer is completely gone. |
1040 | */ |
1041 | timer->it_signal_seq++; |
1042 | |
1043 | scoped_guard (spinlock, ¤t->sighand->siglock) { |
1044 | unsigned long sig = (unsigned long)timer->it_signal | 1UL; |
1045 | |
1046 | WRITE_ONCE(timer->it_signal, (struct signal_struct *)sig); |
1047 | hlist_del_rcu(n: &timer->list); |
1048 | posix_timer_cleanup_ignored(tmr: timer); |
1049 | } |
1050 | |
1051 | while (timer->kclock->timer_del(timer) == TIMER_RETRY) { |
1052 | guard(rcu)(); |
1053 | spin_unlock_irq(lock: &timer->it_lock); |
1054 | timer_wait_running(timer); |
1055 | spin_lock_irq(lock: &timer->it_lock); |
1056 | } |
1057 | } |
1058 | |
1059 | /* Delete a POSIX.1b interval timer. */ |
1060 | SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) |
1061 | { |
1062 | struct k_itimer *timer; |
1063 | |
1064 | scoped_timer_get_or_fail(timer_id) { |
1065 | timer = scoped_timer; |
1066 | posix_timer_delete(timer); |
1067 | } |
1068 | /* Remove it from the hash, which frees up the timer ID */ |
1069 | posix_timer_unhash_and_free(tmr: timer); |
1070 | return 0; |
1071 | } |
1072 | |
1073 | /* |
1074 | * Invoked from do_exit() when the last thread of a thread group exits. |
1075 | * At that point no other task can access the timers of the dying |
1076 | * task anymore. |
1077 | */ |
1078 | void exit_itimers(struct task_struct *tsk) |
1079 | { |
1080 | struct hlist_head timers; |
1081 | struct hlist_node *next; |
1082 | struct k_itimer *timer; |
1083 | |
1084 | /* Clear restore mode for exec() */ |
1085 | tsk->signal->timer_create_restore_ids = 0; |
1086 | |
1087 | if (hlist_empty(h: &tsk->signal->posix_timers)) |
1088 | return; |
1089 | |
1090 | /* Protect against concurrent read via /proc/$PID/timers */ |
1091 | scoped_guard (spinlock_irq, &tsk->sighand->siglock) |
1092 | hlist_move_list(old: &tsk->signal->posix_timers, new: &timers); |
1093 | |
1094 | /* The timers are not longer accessible via tsk::signal */ |
1095 | hlist_for_each_entry_safe(timer, next, &timers, list) { |
1096 | scoped_guard (spinlock_irq, &timer->it_lock) |
1097 | posix_timer_delete(timer); |
1098 | posix_timer_unhash_and_free(tmr: timer); |
1099 | cond_resched(); |
1100 | } |
1101 | |
1102 | /* |
1103 | * There should be no timers on the ignored list. itimer_delete() has |
1104 | * mopped them up. |
1105 | */ |
1106 | if (!WARN_ON_ONCE(!hlist_empty(&tsk->signal->ignored_posix_timers))) |
1107 | return; |
1108 | |
1109 | hlist_move_list(old: &tsk->signal->ignored_posix_timers, new: &timers); |
1110 | while (!hlist_empty(h: &timers)) { |
1111 | posix_timer_cleanup_ignored(hlist_entry(timers.first, struct k_itimer, |
1112 | ignored_list)); |
1113 | } |
1114 | } |
1115 | |
1116 | SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, |
1117 | const struct __kernel_timespec __user *, tp) |
1118 | { |
1119 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1120 | struct timespec64 new_tp; |
1121 | |
1122 | if (!kc || !kc->clock_set) |
1123 | return -EINVAL; |
1124 | |
1125 | if (get_timespec64(ts: &new_tp, uts: tp)) |
1126 | return -EFAULT; |
1127 | |
1128 | /* |
1129 | * Permission checks have to be done inside the clock specific |
1130 | * setter callback. |
1131 | */ |
1132 | return kc->clock_set(which_clock, &new_tp); |
1133 | } |
1134 | |
1135 | SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, |
1136 | struct __kernel_timespec __user *, tp) |
1137 | { |
1138 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1139 | struct timespec64 kernel_tp; |
1140 | int error; |
1141 | |
1142 | if (!kc) |
1143 | return -EINVAL; |
1144 | |
1145 | error = kc->clock_get_timespec(which_clock, &kernel_tp); |
1146 | |
1147 | if (!error && put_timespec64(ts: &kernel_tp, uts: tp)) |
1148 | error = -EFAULT; |
1149 | |
1150 | return error; |
1151 | } |
1152 | |
1153 | int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) |
1154 | { |
1155 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1156 | |
1157 | if (!kc) |
1158 | return -EINVAL; |
1159 | if (!kc->clock_adj) |
1160 | return -EOPNOTSUPP; |
1161 | |
1162 | return kc->clock_adj(which_clock, ktx); |
1163 | } |
1164 | |
1165 | SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, |
1166 | struct __kernel_timex __user *, utx) |
1167 | { |
1168 | struct __kernel_timex ktx; |
1169 | int err; |
1170 | |
1171 | if (copy_from_user(to: &ktx, from: utx, n: sizeof(ktx))) |
1172 | return -EFAULT; |
1173 | |
1174 | err = do_clock_adjtime(which_clock, ktx: &ktx); |
1175 | |
1176 | if (err >= 0 && copy_to_user(to: utx, from: &ktx, n: sizeof(ktx))) |
1177 | return -EFAULT; |
1178 | |
1179 | return err; |
1180 | } |
1181 | |
1182 | /** |
1183 | * sys_clock_getres - Get the resolution of a clock |
1184 | * @which_clock: The clock to get the resolution for |
1185 | * @tp: Pointer to a a user space timespec64 for storage |
1186 | * |
1187 | * POSIX defines: |
1188 | * |
1189 | * "The clock_getres() function shall return the resolution of any |
1190 | * clock. Clock resolutions are implementation-defined and cannot be set by |
1191 | * a process. If the argument res is not NULL, the resolution of the |
1192 | * specified clock shall be stored in the location pointed to by res. If |
1193 | * res is NULL, the clock resolution is not returned. If the time argument |
1194 | * of clock_settime() is not a multiple of res, then the value is truncated |
1195 | * to a multiple of res." |
1196 | * |
1197 | * Due to the various hardware constraints the real resolution can vary |
1198 | * wildly and even change during runtime when the underlying devices are |
1199 | * replaced. The kernel also can use hardware devices with different |
1200 | * resolutions for reading the time and for arming timers. |
1201 | * |
1202 | * The kernel therefore deviates from the POSIX spec in various aspects: |
1203 | * |
1204 | * 1) The resolution returned to user space |
1205 | * |
1206 | * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, |
1207 | * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW |
1208 | * the kernel differentiates only two cases: |
1209 | * |
1210 | * I) Low resolution mode: |
1211 | * |
1212 | * When high resolution timers are disabled at compile or runtime |
1213 | * the resolution returned is nanoseconds per tick, which represents |
1214 | * the precision at which timers expire. |
1215 | * |
1216 | * II) High resolution mode: |
1217 | * |
1218 | * When high resolution timers are enabled the resolution returned |
1219 | * is always one nanosecond independent of the actual resolution of |
1220 | * the underlying hardware devices. |
1221 | * |
1222 | * For CLOCK_*_ALARM the actual resolution depends on system |
1223 | * state. When system is running the resolution is the same as the |
1224 | * resolution of the other clocks. During suspend the actual |
1225 | * resolution is the resolution of the underlying RTC device which |
1226 | * might be way less precise than the clockevent device used during |
1227 | * running state. |
1228 | * |
1229 | * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution |
1230 | * returned is always nanoseconds per tick. |
1231 | * |
1232 | * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution |
1233 | * returned is always one nanosecond under the assumption that the |
1234 | * underlying scheduler clock has a better resolution than nanoseconds |
1235 | * per tick. |
1236 | * |
1237 | * For dynamic POSIX clocks (PTP devices) the resolution returned is |
1238 | * always one nanosecond. |
1239 | * |
1240 | * 2) Affect on sys_clock_settime() |
1241 | * |
1242 | * The kernel does not truncate the time which is handed in to |
1243 | * sys_clock_settime(). The kernel internal timekeeping is always using |
1244 | * nanoseconds precision independent of the clocksource device which is |
1245 | * used to read the time from. The resolution of that device only |
1246 | * affects the presicion of the time returned by sys_clock_gettime(). |
1247 | * |
1248 | * Returns: |
1249 | * 0 Success. @tp contains the resolution |
1250 | * -EINVAL @which_clock is not a valid clock ID |
1251 | * -EFAULT Copying the resolution to @tp faulted |
1252 | * -ENODEV Dynamic POSIX clock is not backed by a device |
1253 | * -EOPNOTSUPP Dynamic POSIX clock does not support getres() |
1254 | */ |
1255 | SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, |
1256 | struct __kernel_timespec __user *, tp) |
1257 | { |
1258 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1259 | struct timespec64 rtn_tp; |
1260 | int error; |
1261 | |
1262 | if (!kc) |
1263 | return -EINVAL; |
1264 | |
1265 | error = kc->clock_getres(which_clock, &rtn_tp); |
1266 | |
1267 | if (!error && tp && put_timespec64(ts: &rtn_tp, uts: tp)) |
1268 | error = -EFAULT; |
1269 | |
1270 | return error; |
1271 | } |
1272 | |
1273 | #ifdef CONFIG_COMPAT_32BIT_TIME |
1274 | |
1275 | SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, |
1276 | struct old_timespec32 __user *, tp) |
1277 | { |
1278 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1279 | struct timespec64 ts; |
1280 | |
1281 | if (!kc || !kc->clock_set) |
1282 | return -EINVAL; |
1283 | |
1284 | if (get_old_timespec32(&ts, tp)) |
1285 | return -EFAULT; |
1286 | |
1287 | return kc->clock_set(which_clock, &ts); |
1288 | } |
1289 | |
1290 | SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, |
1291 | struct old_timespec32 __user *, tp) |
1292 | { |
1293 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1294 | struct timespec64 ts; |
1295 | int err; |
1296 | |
1297 | if (!kc) |
1298 | return -EINVAL; |
1299 | |
1300 | err = kc->clock_get_timespec(which_clock, &ts); |
1301 | |
1302 | if (!err && put_old_timespec32(&ts, tp)) |
1303 | err = -EFAULT; |
1304 | |
1305 | return err; |
1306 | } |
1307 | |
1308 | SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, |
1309 | struct old_timex32 __user *, utp) |
1310 | { |
1311 | struct __kernel_timex ktx; |
1312 | int err; |
1313 | |
1314 | err = get_old_timex32(&ktx, utp); |
1315 | if (err) |
1316 | return err; |
1317 | |
1318 | err = do_clock_adjtime(which_clock, ktx: &ktx); |
1319 | |
1320 | if (err >= 0 && put_old_timex32(utp, &ktx)) |
1321 | return -EFAULT; |
1322 | |
1323 | return err; |
1324 | } |
1325 | |
1326 | SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, |
1327 | struct old_timespec32 __user *, tp) |
1328 | { |
1329 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1330 | struct timespec64 ts; |
1331 | int err; |
1332 | |
1333 | if (!kc) |
1334 | return -EINVAL; |
1335 | |
1336 | err = kc->clock_getres(which_clock, &ts); |
1337 | if (!err && tp && put_old_timespec32(&ts, tp)) |
1338 | return -EFAULT; |
1339 | |
1340 | return err; |
1341 | } |
1342 | |
1343 | #endif |
1344 | |
1345 | /* |
1346 | * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI |
1347 | */ |
1348 | static int common_nsleep(const clockid_t which_clock, int flags, |
1349 | const struct timespec64 *rqtp) |
1350 | { |
1351 | ktime_t texp = timespec64_to_ktime(ts: *rqtp); |
1352 | |
1353 | return hrtimer_nanosleep(rqtp: texp, mode: flags & TIMER_ABSTIME ? |
1354 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
1355 | clockid: which_clock); |
1356 | } |
1357 | |
1358 | /* |
1359 | * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME |
1360 | * |
1361 | * Absolute nanosleeps for these clocks are time-namespace adjusted. |
1362 | */ |
1363 | static int common_nsleep_timens(const clockid_t which_clock, int flags, |
1364 | const struct timespec64 *rqtp) |
1365 | { |
1366 | ktime_t texp = timespec64_to_ktime(ts: *rqtp); |
1367 | |
1368 | if (flags & TIMER_ABSTIME) |
1369 | texp = timens_ktime_to_host(clockid: which_clock, tim: texp); |
1370 | |
1371 | return hrtimer_nanosleep(rqtp: texp, mode: flags & TIMER_ABSTIME ? |
1372 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
1373 | clockid: which_clock); |
1374 | } |
1375 | |
1376 | SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, |
1377 | const struct __kernel_timespec __user *, rqtp, |
1378 | struct __kernel_timespec __user *, rmtp) |
1379 | { |
1380 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1381 | struct timespec64 t; |
1382 | |
1383 | if (!kc) |
1384 | return -EINVAL; |
1385 | if (!kc->nsleep) |
1386 | return -EOPNOTSUPP; |
1387 | |
1388 | if (get_timespec64(ts: &t, uts: rqtp)) |
1389 | return -EFAULT; |
1390 | |
1391 | if (!timespec64_valid(ts: &t)) |
1392 | return -EINVAL; |
1393 | if (flags & TIMER_ABSTIME) |
1394 | rmtp = NULL; |
1395 | current->restart_block.fn = do_no_restart_syscall; |
1396 | current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; |
1397 | current->restart_block.nanosleep.rmtp = rmtp; |
1398 | |
1399 | return kc->nsleep(which_clock, flags, &t); |
1400 | } |
1401 | |
1402 | #ifdef CONFIG_COMPAT_32BIT_TIME |
1403 | |
1404 | SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, |
1405 | struct old_timespec32 __user *, rqtp, |
1406 | struct old_timespec32 __user *, rmtp) |
1407 | { |
1408 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1409 | struct timespec64 t; |
1410 | |
1411 | if (!kc) |
1412 | return -EINVAL; |
1413 | if (!kc->nsleep) |
1414 | return -EOPNOTSUPP; |
1415 | |
1416 | if (get_old_timespec32(&t, rqtp)) |
1417 | return -EFAULT; |
1418 | |
1419 | if (!timespec64_valid(ts: &t)) |
1420 | return -EINVAL; |
1421 | if (flags & TIMER_ABSTIME) |
1422 | rmtp = NULL; |
1423 | current->restart_block.fn = do_no_restart_syscall; |
1424 | current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; |
1425 | current->restart_block.nanosleep.compat_rmtp = rmtp; |
1426 | |
1427 | return kc->nsleep(which_clock, flags, &t); |
1428 | } |
1429 | |
1430 | #endif |
1431 | |
1432 | static const struct k_clock clock_realtime = { |
1433 | .clock_getres = posix_get_hrtimer_res, |
1434 | .clock_get_timespec = posix_get_realtime_timespec, |
1435 | .clock_get_ktime = posix_get_realtime_ktime, |
1436 | .clock_set = posix_clock_realtime_set, |
1437 | .clock_adj = posix_clock_realtime_adj, |
1438 | .nsleep = common_nsleep, |
1439 | .timer_create = common_timer_create, |
1440 | .timer_set = common_timer_set, |
1441 | .timer_get = common_timer_get, |
1442 | .timer_del = common_timer_del, |
1443 | .timer_rearm = common_hrtimer_rearm, |
1444 | .timer_forward = common_hrtimer_forward, |
1445 | .timer_remaining = common_hrtimer_remaining, |
1446 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1447 | .timer_wait_running = common_timer_wait_running, |
1448 | .timer_arm = common_hrtimer_arm, |
1449 | }; |
1450 | |
1451 | static const struct k_clock clock_monotonic = { |
1452 | .clock_getres = posix_get_hrtimer_res, |
1453 | .clock_get_timespec = posix_get_monotonic_timespec, |
1454 | .clock_get_ktime = posix_get_monotonic_ktime, |
1455 | .nsleep = common_nsleep_timens, |
1456 | .timer_create = common_timer_create, |
1457 | .timer_set = common_timer_set, |
1458 | .timer_get = common_timer_get, |
1459 | .timer_del = common_timer_del, |
1460 | .timer_rearm = common_hrtimer_rearm, |
1461 | .timer_forward = common_hrtimer_forward, |
1462 | .timer_remaining = common_hrtimer_remaining, |
1463 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1464 | .timer_wait_running = common_timer_wait_running, |
1465 | .timer_arm = common_hrtimer_arm, |
1466 | }; |
1467 | |
1468 | static const struct k_clock clock_monotonic_raw = { |
1469 | .clock_getres = posix_get_hrtimer_res, |
1470 | .clock_get_timespec = posix_get_monotonic_raw, |
1471 | }; |
1472 | |
1473 | static const struct k_clock clock_realtime_coarse = { |
1474 | .clock_getres = posix_get_coarse_res, |
1475 | .clock_get_timespec = posix_get_realtime_coarse, |
1476 | }; |
1477 | |
1478 | static const struct k_clock clock_monotonic_coarse = { |
1479 | .clock_getres = posix_get_coarse_res, |
1480 | .clock_get_timespec = posix_get_monotonic_coarse, |
1481 | }; |
1482 | |
1483 | static const struct k_clock clock_tai = { |
1484 | .clock_getres = posix_get_hrtimer_res, |
1485 | .clock_get_ktime = posix_get_tai_ktime, |
1486 | .clock_get_timespec = posix_get_tai_timespec, |
1487 | .nsleep = common_nsleep, |
1488 | .timer_create = common_timer_create, |
1489 | .timer_set = common_timer_set, |
1490 | .timer_get = common_timer_get, |
1491 | .timer_del = common_timer_del, |
1492 | .timer_rearm = common_hrtimer_rearm, |
1493 | .timer_forward = common_hrtimer_forward, |
1494 | .timer_remaining = common_hrtimer_remaining, |
1495 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1496 | .timer_wait_running = common_timer_wait_running, |
1497 | .timer_arm = common_hrtimer_arm, |
1498 | }; |
1499 | |
1500 | static const struct k_clock clock_boottime = { |
1501 | .clock_getres = posix_get_hrtimer_res, |
1502 | .clock_get_ktime = posix_get_boottime_ktime, |
1503 | .clock_get_timespec = posix_get_boottime_timespec, |
1504 | .nsleep = common_nsleep_timens, |
1505 | .timer_create = common_timer_create, |
1506 | .timer_set = common_timer_set, |
1507 | .timer_get = common_timer_get, |
1508 | .timer_del = common_timer_del, |
1509 | .timer_rearm = common_hrtimer_rearm, |
1510 | .timer_forward = common_hrtimer_forward, |
1511 | .timer_remaining = common_hrtimer_remaining, |
1512 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1513 | .timer_wait_running = common_timer_wait_running, |
1514 | .timer_arm = common_hrtimer_arm, |
1515 | }; |
1516 | |
1517 | static const struct k_clock * const posix_clocks[] = { |
1518 | [CLOCK_REALTIME] = &clock_realtime, |
1519 | [CLOCK_MONOTONIC] = &clock_monotonic, |
1520 | [CLOCK_PROCESS_CPUTIME_ID] = &clock_process, |
1521 | [CLOCK_THREAD_CPUTIME_ID] = &clock_thread, |
1522 | [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, |
1523 | [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, |
1524 | [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, |
1525 | [CLOCK_BOOTTIME] = &clock_boottime, |
1526 | [CLOCK_REALTIME_ALARM] = &alarm_clock, |
1527 | [CLOCK_BOOTTIME_ALARM] = &alarm_clock, |
1528 | [CLOCK_TAI] = &clock_tai, |
1529 | }; |
1530 | |
1531 | static const struct k_clock *clockid_to_kclock(const clockid_t id) |
1532 | { |
1533 | clockid_t idx = id; |
1534 | |
1535 | if (id < 0) { |
1536 | return (id & CLOCKFD_MASK) == CLOCKFD ? |
1537 | &clock_posix_dynamic : &clock_posix_cpu; |
1538 | } |
1539 | |
1540 | if (id >= ARRAY_SIZE(posix_clocks)) |
1541 | return NULL; |
1542 | |
1543 | return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))]; |
1544 | } |
1545 | |
1546 | static int __init posixtimer_init(void) |
1547 | { |
1548 | unsigned long i, size; |
1549 | unsigned int shift; |
1550 | |
1551 | posix_timers_cache = kmem_cache_create("posix_timers_cache", |
1552 | sizeof(struct k_itimer), |
1553 | __alignof__(struct k_itimer), |
1554 | SLAB_ACCOUNT, NULL); |
1555 | |
1556 | if (IS_ENABLED(CONFIG_BASE_SMALL)) |
1557 | size = 512; |
1558 | else |
1559 | size = roundup_pow_of_two(512 * num_possible_cpus()); |
1560 | |
1561 | timer_buckets = alloc_large_system_hash(tablename: "posixtimers", bucketsize: sizeof(*timer_buckets), |
1562 | numentries: size, scale: 0, flags: 0, hash_shift: &shift, NULL, low_limit: size, high_limit: size); |
1563 | size = 1UL << shift; |
1564 | timer_hashmask = size - 1; |
1565 | |
1566 | for (i = 0; i < size; i++) { |
1567 | spin_lock_init(&timer_buckets[i].lock); |
1568 | INIT_HLIST_HEAD(&timer_buckets[i].head); |
1569 | } |
1570 | return 0; |
1571 | } |
1572 | core_initcall(posixtimer_init); |
1573 |
Definitions
- timer_hash_bucket
- __timer_data
- posix_clocks
- clock_realtime
- clock_monotonic
- unlock_timer
- hash_bucket
- posix_timer_by_id
- posix_sig_owner
- posix_timer_hashed
- posix_timer_add_at
- posix_timer_add
- posix_get_realtime_timespec
- posix_get_realtime_ktime
- posix_clock_realtime_set
- posix_clock_realtime_adj
- posix_get_monotonic_timespec
- posix_get_monotonic_ktime
- posix_get_monotonic_raw
- posix_get_realtime_coarse
- posix_get_monotonic_coarse
- posix_get_coarse_res
- posix_get_boottime_timespec
- posix_get_boottime_ktime
- posix_get_tai_timespec
- posix_get_tai_ktime
- posix_get_hrtimer_res
- timer_overrun_to_int
- common_hrtimer_rearm
- __posixtimer_deliver_signal
- posixtimer_deliver_signal
- posix_timer_queue_signal
- posix_timer_fn
- posixtimer_create_prctl
- good_sigevent
- alloc_posix_timer
- posixtimer_free_timer
- posix_timer_unhash_and_free
- common_timer_create
- do_timer_create
- __lock_timer
- common_hrtimer_remaining
- common_hrtimer_forward
- common_timer_get
- do_timer_gettime
- common_hrtimer_arm
- common_hrtimer_try_to_cancel
- common_timer_wait_running
- timer_wait_running
- posix_timer_set_common
- common_timer_set
- do_timer_settime
- common_timer_del
- posix_timer_cleanup_ignored
- posix_timer_delete
- exit_itimers
- do_clock_adjtime
- common_nsleep
- common_nsleep_timens
- clock_realtime
- clock_monotonic
- clock_monotonic_raw
- clock_realtime_coarse
- clock_monotonic_coarse
- clock_tai
- clock_boottime
- posix_clocks
- clockid_to_kclock
Improve your Profiling and Debugging skills
Find out more