1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | |
3 | #include <linux/sched/task.h> |
4 | #include <linux/sched/signal.h> |
5 | #include <linux/freezer.h> |
6 | |
7 | #include "futex.h" |
8 | |
9 | /* |
10 | * READ this before attempting to hack on futexes! |
11 | * |
12 | * Basic futex operation and ordering guarantees |
13 | * ============================================= |
14 | * |
15 | * The waiter reads the futex value in user space and calls |
16 | * futex_wait(). This function computes the hash bucket and acquires |
17 | * the hash bucket lock. After that it reads the futex user space value |
18 | * again and verifies that the data has not changed. If it has not changed |
19 | * it enqueues itself into the hash bucket, releases the hash bucket lock |
20 | * and schedules. |
21 | * |
22 | * The waker side modifies the user space value of the futex and calls |
23 | * futex_wake(). This function computes the hash bucket and acquires the |
24 | * hash bucket lock. Then it looks for waiters on that futex in the hash |
25 | * bucket and wakes them. |
26 | * |
27 | * In futex wake up scenarios where no tasks are blocked on a futex, taking |
28 | * the hb spinlock can be avoided and simply return. In order for this |
29 | * optimization to work, ordering guarantees must exist so that the waiter |
30 | * being added to the list is acknowledged when the list is concurrently being |
31 | * checked by the waker, avoiding scenarios like the following: |
32 | * |
33 | * CPU 0 CPU 1 |
34 | * val = *futex; |
35 | * sys_futex(WAIT, futex, val); |
36 | * futex_wait(futex, val); |
37 | * uval = *futex; |
38 | * *futex = newval; |
39 | * sys_futex(WAKE, futex); |
40 | * futex_wake(futex); |
41 | * if (queue_empty()) |
42 | * return; |
43 | * if (uval == val) |
44 | * lock(hash_bucket(futex)); |
45 | * queue(); |
46 | * unlock(hash_bucket(futex)); |
47 | * schedule(); |
48 | * |
49 | * This would cause the waiter on CPU 0 to wait forever because it |
50 | * missed the transition of the user space value from val to newval |
51 | * and the waker did not find the waiter in the hash bucket queue. |
52 | * |
53 | * The correct serialization ensures that a waiter either observes |
54 | * the changed user space value before blocking or is woken by a |
55 | * concurrent waker: |
56 | * |
57 | * CPU 0 CPU 1 |
58 | * val = *futex; |
59 | * sys_futex(WAIT, futex, val); |
60 | * futex_wait(futex, val); |
61 | * |
62 | * waiters++; (a) |
63 | * smp_mb(); (A) <-- paired with -. |
64 | * | |
65 | * lock(hash_bucket(futex)); | |
66 | * | |
67 | * uval = *futex; | |
68 | * | *futex = newval; |
69 | * | sys_futex(WAKE, futex); |
70 | * | futex_wake(futex); |
71 | * | |
72 | * `--------> smp_mb(); (B) |
73 | * if (uval == val) |
74 | * queue(); |
75 | * unlock(hash_bucket(futex)); |
76 | * schedule(); if (waiters) |
77 | * lock(hash_bucket(futex)); |
78 | * else wake_waiters(futex); |
79 | * waiters--; (b) unlock(hash_bucket(futex)); |
80 | * |
81 | * Where (A) orders the waiters increment and the futex value read through |
82 | * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write |
83 | * to futex and the waiters read (see futex_hb_waiters_pending()). |
84 | * |
85 | * This yields the following case (where X:=waiters, Y:=futex): |
86 | * |
87 | * X = Y = 0 |
88 | * |
89 | * w[X]=1 w[Y]=1 |
90 | * MB MB |
91 | * r[Y]=y r[X]=x |
92 | * |
93 | * Which guarantees that x==0 && y==0 is impossible; which translates back into |
94 | * the guarantee that we cannot both miss the futex variable change and the |
95 | * enqueue. |
96 | * |
97 | * Note that a new waiter is accounted for in (a) even when it is possible that |
98 | * the wait call can return error, in which case we backtrack from it in (b). |
99 | * Refer to the comment in futex_q_lock(). |
100 | * |
101 | * Similarly, in order to account for waiters being requeued on another |
102 | * address we always increment the waiters for the destination bucket before |
103 | * acquiring the lock. It then decrements them again after releasing it - |
104 | * the code that actually moves the futex(es) between hash buckets (requeue_futex) |
105 | * will do the additional required waiter count housekeeping. This is done for |
106 | * double_lock_hb() and double_unlock_hb(), respectively. |
107 | */ |
108 | |
109 | bool __futex_wake_mark(struct futex_q *q) |
110 | { |
111 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n" )) |
112 | return false; |
113 | |
114 | __futex_unqueue(q); |
115 | /* |
116 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL |
117 | * is written, without taking any locks. This is possible in the event |
118 | * of a spurious wakeup, for example. A memory barrier is required here |
119 | * to prevent the following store to lock_ptr from getting ahead of the |
120 | * plist_del in __futex_unqueue(). |
121 | */ |
122 | smp_store_release(&q->lock_ptr, NULL); |
123 | |
124 | return true; |
125 | } |
126 | |
127 | /* |
128 | * The hash bucket lock must be held when this is called. |
129 | * Afterwards, the futex_q must not be accessed. Callers |
130 | * must ensure to later call wake_up_q() for the actual |
131 | * wakeups to occur. |
132 | */ |
133 | void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q) |
134 | { |
135 | struct task_struct *p = q->task; |
136 | |
137 | get_task_struct(t: p); |
138 | |
139 | if (!__futex_wake_mark(q)) { |
140 | put_task_struct(t: p); |
141 | return; |
142 | } |
143 | |
144 | /* |
145 | * Queue the task for later wakeup for after we've released |
146 | * the hb->lock. |
147 | */ |
148 | wake_q_add_safe(head: wake_q, task: p); |
149 | } |
150 | |
151 | /* |
152 | * Wake up waiters matching bitset queued on this futex (uaddr). |
153 | */ |
154 | int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) |
155 | { |
156 | struct futex_hash_bucket *hb; |
157 | struct futex_q *this, *next; |
158 | union futex_key key = FUTEX_KEY_INIT; |
159 | DEFINE_WAKE_Q(wake_q); |
160 | int ret; |
161 | |
162 | if (!bitset) |
163 | return -EINVAL; |
164 | |
165 | ret = get_futex_key(uaddr, flags, key: &key, rw: FUTEX_READ); |
166 | if (unlikely(ret != 0)) |
167 | return ret; |
168 | |
169 | if ((flags & FLAGS_STRICT) && !nr_wake) |
170 | return 0; |
171 | |
172 | hb = futex_hash(key: &key); |
173 | |
174 | /* Make sure we really have tasks to wakeup */ |
175 | if (!futex_hb_waiters_pending(hb)) |
176 | return ret; |
177 | |
178 | spin_lock(lock: &hb->lock); |
179 | |
180 | plist_for_each_entry_safe(this, next, &hb->chain, list) { |
181 | if (futex_match (key1: &this->key, key2: &key)) { |
182 | if (this->pi_state || this->rt_waiter) { |
183 | ret = -EINVAL; |
184 | break; |
185 | } |
186 | |
187 | /* Check if one of the bits is set in both bitsets */ |
188 | if (!(this->bitset & bitset)) |
189 | continue; |
190 | |
191 | this->wake(&wake_q, this); |
192 | if (++ret >= nr_wake) |
193 | break; |
194 | } |
195 | } |
196 | |
197 | spin_unlock(lock: &hb->lock); |
198 | wake_up_q(head: &wake_q); |
199 | return ret; |
200 | } |
201 | |
202 | static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) |
203 | { |
204 | unsigned int op = (encoded_op & 0x70000000) >> 28; |
205 | unsigned int cmp = (encoded_op & 0x0f000000) >> 24; |
206 | int oparg = sign_extend32(value: (encoded_op & 0x00fff000) >> 12, index: 11); |
207 | int cmparg = sign_extend32(value: encoded_op & 0x00000fff, index: 11); |
208 | int oldval, ret; |
209 | |
210 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { |
211 | if (oparg < 0 || oparg > 31) { |
212 | char comm[sizeof(current->comm)]; |
213 | /* |
214 | * kill this print and return -EINVAL when userspace |
215 | * is sane again |
216 | */ |
217 | pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n" , |
218 | get_task_comm(comm, current), oparg); |
219 | oparg &= 31; |
220 | } |
221 | oparg = 1 << oparg; |
222 | } |
223 | |
224 | pagefault_disable(); |
225 | ret = arch_futex_atomic_op_inuser(op, oparg, oval: &oldval, uaddr); |
226 | pagefault_enable(); |
227 | if (ret) |
228 | return ret; |
229 | |
230 | switch (cmp) { |
231 | case FUTEX_OP_CMP_EQ: |
232 | return oldval == cmparg; |
233 | case FUTEX_OP_CMP_NE: |
234 | return oldval != cmparg; |
235 | case FUTEX_OP_CMP_LT: |
236 | return oldval < cmparg; |
237 | case FUTEX_OP_CMP_GE: |
238 | return oldval >= cmparg; |
239 | case FUTEX_OP_CMP_LE: |
240 | return oldval <= cmparg; |
241 | case FUTEX_OP_CMP_GT: |
242 | return oldval > cmparg; |
243 | default: |
244 | return -ENOSYS; |
245 | } |
246 | } |
247 | |
248 | /* |
249 | * Wake up all waiters hashed on the physical page that is mapped |
250 | * to this virtual address: |
251 | */ |
252 | int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, |
253 | int nr_wake, int nr_wake2, int op) |
254 | { |
255 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
256 | struct futex_hash_bucket *hb1, *hb2; |
257 | struct futex_q *this, *next; |
258 | int ret, op_ret; |
259 | DEFINE_WAKE_Q(wake_q); |
260 | |
261 | retry: |
262 | ret = get_futex_key(uaddr: uaddr1, flags, key: &key1, rw: FUTEX_READ); |
263 | if (unlikely(ret != 0)) |
264 | return ret; |
265 | ret = get_futex_key(uaddr: uaddr2, flags, key: &key2, rw: FUTEX_WRITE); |
266 | if (unlikely(ret != 0)) |
267 | return ret; |
268 | |
269 | hb1 = futex_hash(key: &key1); |
270 | hb2 = futex_hash(key: &key2); |
271 | |
272 | retry_private: |
273 | double_lock_hb(hb1, hb2); |
274 | op_ret = futex_atomic_op_inuser(encoded_op: op, uaddr: uaddr2); |
275 | if (unlikely(op_ret < 0)) { |
276 | double_unlock_hb(hb1, hb2); |
277 | |
278 | if (!IS_ENABLED(CONFIG_MMU) || |
279 | unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { |
280 | /* |
281 | * we don't get EFAULT from MMU faults if we don't have |
282 | * an MMU, but we might get them from range checking |
283 | */ |
284 | ret = op_ret; |
285 | return ret; |
286 | } |
287 | |
288 | if (op_ret == -EFAULT) { |
289 | ret = fault_in_user_writeable(uaddr: uaddr2); |
290 | if (ret) |
291 | return ret; |
292 | } |
293 | |
294 | cond_resched(); |
295 | if (!(flags & FLAGS_SHARED)) |
296 | goto retry_private; |
297 | goto retry; |
298 | } |
299 | |
300 | plist_for_each_entry_safe(this, next, &hb1->chain, list) { |
301 | if (futex_match (key1: &this->key, key2: &key1)) { |
302 | if (this->pi_state || this->rt_waiter) { |
303 | ret = -EINVAL; |
304 | goto out_unlock; |
305 | } |
306 | this->wake(&wake_q, this); |
307 | if (++ret >= nr_wake) |
308 | break; |
309 | } |
310 | } |
311 | |
312 | if (op_ret > 0) { |
313 | op_ret = 0; |
314 | plist_for_each_entry_safe(this, next, &hb2->chain, list) { |
315 | if (futex_match (key1: &this->key, key2: &key2)) { |
316 | if (this->pi_state || this->rt_waiter) { |
317 | ret = -EINVAL; |
318 | goto out_unlock; |
319 | } |
320 | this->wake(&wake_q, this); |
321 | if (++op_ret >= nr_wake2) |
322 | break; |
323 | } |
324 | } |
325 | ret += op_ret; |
326 | } |
327 | |
328 | out_unlock: |
329 | double_unlock_hb(hb1, hb2); |
330 | wake_up_q(head: &wake_q); |
331 | return ret; |
332 | } |
333 | |
334 | static long futex_wait_restart(struct restart_block *restart); |
335 | |
336 | /** |
337 | * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal |
338 | * @hb: the futex hash bucket, must be locked by the caller |
339 | * @q: the futex_q to queue up on |
340 | * @timeout: the prepared hrtimer_sleeper, or null for no timeout |
341 | */ |
342 | void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, |
343 | struct hrtimer_sleeper *timeout) |
344 | { |
345 | /* |
346 | * The task state is guaranteed to be set before another task can |
347 | * wake it. set_current_state() is implemented using smp_store_mb() and |
348 | * futex_queue() calls spin_unlock() upon completion, both serializing |
349 | * access to the hash list and forcing another memory barrier. |
350 | */ |
351 | set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); |
352 | futex_queue(q, hb); |
353 | |
354 | /* Arm the timer */ |
355 | if (timeout) |
356 | hrtimer_sleeper_start_expires(sl: timeout, mode: HRTIMER_MODE_ABS); |
357 | |
358 | /* |
359 | * If we have been removed from the hash list, then another task |
360 | * has tried to wake us, and we can skip the call to schedule(). |
361 | */ |
362 | if (likely(!plist_node_empty(&q->list))) { |
363 | /* |
364 | * If the timer has already expired, current will already be |
365 | * flagged for rescheduling. Only call schedule if there |
366 | * is no timeout, or if it has yet to expire. |
367 | */ |
368 | if (!timeout || timeout->task) |
369 | schedule(); |
370 | } |
371 | __set_current_state(TASK_RUNNING); |
372 | } |
373 | |
374 | /** |
375 | * futex_unqueue_multiple - Remove various futexes from their hash bucket |
376 | * @v: The list of futexes to unqueue |
377 | * @count: Number of futexes in the list |
378 | * |
379 | * Helper to unqueue a list of futexes. This can't fail. |
380 | * |
381 | * Return: |
382 | * - >=0 - Index of the last futex that was awoken; |
383 | * - -1 - No futex was awoken |
384 | */ |
385 | int futex_unqueue_multiple(struct futex_vector *v, int count) |
386 | { |
387 | int ret = -1, i; |
388 | |
389 | for (i = 0; i < count; i++) { |
390 | if (!futex_unqueue(q: &v[i].q)) |
391 | ret = i; |
392 | } |
393 | |
394 | return ret; |
395 | } |
396 | |
397 | /** |
398 | * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes |
399 | * @vs: The futex list to wait on |
400 | * @count: The size of the list |
401 | * @woken: Index of the last woken futex, if any. Used to notify the |
402 | * caller that it can return this index to userspace (return parameter) |
403 | * |
404 | * Prepare multiple futexes in a single step and enqueue them. This may fail if |
405 | * the futex list is invalid or if any futex was already awoken. On success the |
406 | * task is ready to interruptible sleep. |
407 | * |
408 | * Return: |
409 | * - 1 - One of the futexes was woken by another thread |
410 | * - 0 - Success |
411 | * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL |
412 | */ |
413 | int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) |
414 | { |
415 | struct futex_hash_bucket *hb; |
416 | bool retry = false; |
417 | int ret, i; |
418 | u32 uval; |
419 | |
420 | /* |
421 | * Enqueuing multiple futexes is tricky, because we need to enqueue |
422 | * each futex on the list before dealing with the next one to avoid |
423 | * deadlocking on the hash bucket. But, before enqueuing, we need to |
424 | * make sure that current->state is TASK_INTERRUPTIBLE, so we don't |
425 | * lose any wake events, which cannot be done before the get_futex_key |
426 | * of the next key, because it calls get_user_pages, which can sleep. |
427 | * Thus, we fetch the list of futexes keys in two steps, by first |
428 | * pinning all the memory keys in the futex key, and only then we read |
429 | * each key and queue the corresponding futex. |
430 | * |
431 | * Private futexes doesn't need to recalculate hash in retry, so skip |
432 | * get_futex_key() when retrying. |
433 | */ |
434 | retry: |
435 | for (i = 0; i < count; i++) { |
436 | if (!(vs[i].w.flags & FLAGS_SHARED) && retry) |
437 | continue; |
438 | |
439 | ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), |
440 | flags: vs[i].w.flags, |
441 | key: &vs[i].q.key, rw: FUTEX_READ); |
442 | |
443 | if (unlikely(ret)) |
444 | return ret; |
445 | } |
446 | |
447 | set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); |
448 | |
449 | for (i = 0; i < count; i++) { |
450 | u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; |
451 | struct futex_q *q = &vs[i].q; |
452 | u32 val = vs[i].w.val; |
453 | |
454 | hb = futex_q_lock(q); |
455 | ret = futex_get_value_locked(dest: &uval, from: uaddr); |
456 | |
457 | if (!ret && uval == val) { |
458 | /* |
459 | * The bucket lock can't be held while dealing with the |
460 | * next futex. Queue each futex at this moment so hb can |
461 | * be unlocked. |
462 | */ |
463 | futex_queue(q, hb); |
464 | continue; |
465 | } |
466 | |
467 | futex_q_unlock(hb); |
468 | __set_current_state(TASK_RUNNING); |
469 | |
470 | /* |
471 | * Even if something went wrong, if we find out that a futex |
472 | * was woken, we don't return error and return this index to |
473 | * userspace |
474 | */ |
475 | *woken = futex_unqueue_multiple(v: vs, count: i); |
476 | if (*woken >= 0) |
477 | return 1; |
478 | |
479 | if (ret) { |
480 | /* |
481 | * If we need to handle a page fault, we need to do so |
482 | * without any lock and any enqueued futex (otherwise |
483 | * we could lose some wakeup). So we do it here, after |
484 | * undoing all the work done so far. In success, we |
485 | * retry all the work. |
486 | */ |
487 | if (get_user(uval, uaddr)) |
488 | return -EFAULT; |
489 | |
490 | retry = true; |
491 | goto retry; |
492 | } |
493 | |
494 | if (uval != val) |
495 | return -EWOULDBLOCK; |
496 | } |
497 | |
498 | return 0; |
499 | } |
500 | |
501 | /** |
502 | * futex_sleep_multiple - Check sleeping conditions and sleep |
503 | * @vs: List of futexes to wait for |
504 | * @count: Length of vs |
505 | * @to: Timeout |
506 | * |
507 | * Sleep if and only if the timeout hasn't expired and no futex on the list has |
508 | * been woken up. |
509 | */ |
510 | static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count, |
511 | struct hrtimer_sleeper *to) |
512 | { |
513 | if (to && !to->task) |
514 | return; |
515 | |
516 | for (; count; count--, vs++) { |
517 | if (!READ_ONCE(vs->q.lock_ptr)) |
518 | return; |
519 | } |
520 | |
521 | schedule(); |
522 | } |
523 | |
524 | /** |
525 | * futex_wait_multiple - Prepare to wait on and enqueue several futexes |
526 | * @vs: The list of futexes to wait on |
527 | * @count: The number of objects |
528 | * @to: Timeout before giving up and returning to userspace |
529 | * |
530 | * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function |
531 | * sleeps on a group of futexes and returns on the first futex that is |
532 | * wake, or after the timeout has elapsed. |
533 | * |
534 | * Return: |
535 | * - >=0 - Hint to the futex that was awoken |
536 | * - <0 - On error |
537 | */ |
538 | int futex_wait_multiple(struct futex_vector *vs, unsigned int count, |
539 | struct hrtimer_sleeper *to) |
540 | { |
541 | int ret, hint = 0; |
542 | |
543 | if (to) |
544 | hrtimer_sleeper_start_expires(sl: to, mode: HRTIMER_MODE_ABS); |
545 | |
546 | while (1) { |
547 | ret = futex_wait_multiple_setup(vs, count, woken: &hint); |
548 | if (ret) { |
549 | if (ret > 0) { |
550 | /* A futex was woken during setup */ |
551 | ret = hint; |
552 | } |
553 | return ret; |
554 | } |
555 | |
556 | futex_sleep_multiple(vs, count, to); |
557 | |
558 | __set_current_state(TASK_RUNNING); |
559 | |
560 | ret = futex_unqueue_multiple(v: vs, count); |
561 | if (ret >= 0) |
562 | return ret; |
563 | |
564 | if (to && !to->task) |
565 | return -ETIMEDOUT; |
566 | else if (signal_pending(current)) |
567 | return -ERESTARTSYS; |
568 | /* |
569 | * The final case is a spurious wakeup, for |
570 | * which just retry. |
571 | */ |
572 | } |
573 | } |
574 | |
575 | /** |
576 | * futex_wait_setup() - Prepare to wait on a futex |
577 | * @uaddr: the futex userspace address |
578 | * @val: the expected value |
579 | * @flags: futex flags (FLAGS_SHARED, etc.) |
580 | * @q: the associated futex_q |
581 | * @hb: storage for hash_bucket pointer to be returned to caller |
582 | * |
583 | * Setup the futex_q and locate the hash_bucket. Get the futex value and |
584 | * compare it with the expected value. Handle atomic faults internally. |
585 | * Return with the hb lock held on success, and unlocked on failure. |
586 | * |
587 | * Return: |
588 | * - 0 - uaddr contains val and hb has been locked; |
589 | * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked |
590 | */ |
591 | int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
592 | struct futex_q *q, struct futex_hash_bucket **hb) |
593 | { |
594 | u32 uval; |
595 | int ret; |
596 | |
597 | /* |
598 | * Access the page AFTER the hash-bucket is locked. |
599 | * Order is important: |
600 | * |
601 | * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); |
602 | * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } |
603 | * |
604 | * The basic logical guarantee of a futex is that it blocks ONLY |
605 | * if cond(var) is known to be true at the time of blocking, for |
606 | * any cond. If we locked the hash-bucket after testing *uaddr, that |
607 | * would open a race condition where we could block indefinitely with |
608 | * cond(var) false, which would violate the guarantee. |
609 | * |
610 | * On the other hand, we insert q and release the hash-bucket only |
611 | * after testing *uaddr. This guarantees that futex_wait() will NOT |
612 | * absorb a wakeup if *uaddr does not match the desired values |
613 | * while the syscall executes. |
614 | */ |
615 | retry: |
616 | ret = get_futex_key(uaddr, flags, key: &q->key, rw: FUTEX_READ); |
617 | if (unlikely(ret != 0)) |
618 | return ret; |
619 | |
620 | retry_private: |
621 | *hb = futex_q_lock(q); |
622 | |
623 | ret = futex_get_value_locked(dest: &uval, from: uaddr); |
624 | |
625 | if (ret) { |
626 | futex_q_unlock(hb: *hb); |
627 | |
628 | ret = get_user(uval, uaddr); |
629 | if (ret) |
630 | return ret; |
631 | |
632 | if (!(flags & FLAGS_SHARED)) |
633 | goto retry_private; |
634 | |
635 | goto retry; |
636 | } |
637 | |
638 | if (uval != val) { |
639 | futex_q_unlock(hb: *hb); |
640 | ret = -EWOULDBLOCK; |
641 | } |
642 | |
643 | return ret; |
644 | } |
645 | |
646 | int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
647 | struct hrtimer_sleeper *to, u32 bitset) |
648 | { |
649 | struct futex_q q = futex_q_init; |
650 | struct futex_hash_bucket *hb; |
651 | int ret; |
652 | |
653 | if (!bitset) |
654 | return -EINVAL; |
655 | |
656 | q.bitset = bitset; |
657 | |
658 | retry: |
659 | /* |
660 | * Prepare to wait on uaddr. On success, it holds hb->lock and q |
661 | * is initialized. |
662 | */ |
663 | ret = futex_wait_setup(uaddr, val, flags, q: &q, hb: &hb); |
664 | if (ret) |
665 | return ret; |
666 | |
667 | /* futex_queue and wait for wakeup, timeout, or a signal. */ |
668 | futex_wait_queue(hb, q: &q, timeout: to); |
669 | |
670 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
671 | if (!futex_unqueue(q: &q)) |
672 | return 0; |
673 | |
674 | if (to && !to->task) |
675 | return -ETIMEDOUT; |
676 | |
677 | /* |
678 | * We expect signal_pending(current), but we might be the |
679 | * victim of a spurious wakeup as well. |
680 | */ |
681 | if (!signal_pending(current)) |
682 | goto retry; |
683 | |
684 | return -ERESTARTSYS; |
685 | } |
686 | |
687 | int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) |
688 | { |
689 | struct hrtimer_sleeper timeout, *to; |
690 | struct restart_block *restart; |
691 | int ret; |
692 | |
693 | to = futex_setup_timer(time: abs_time, timeout: &timeout, flags, |
694 | current->timer_slack_ns); |
695 | |
696 | ret = __futex_wait(uaddr, flags, val, to, bitset); |
697 | |
698 | /* No timeout, nothing to clean up. */ |
699 | if (!to) |
700 | return ret; |
701 | |
702 | hrtimer_cancel(timer: &to->timer); |
703 | destroy_hrtimer_on_stack(timer: &to->timer); |
704 | |
705 | if (ret == -ERESTARTSYS) { |
706 | restart = ¤t->restart_block; |
707 | restart->futex.uaddr = uaddr; |
708 | restart->futex.val = val; |
709 | restart->futex.time = *abs_time; |
710 | restart->futex.bitset = bitset; |
711 | restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; |
712 | |
713 | return set_restart_fn(restart, fn: futex_wait_restart); |
714 | } |
715 | |
716 | return ret; |
717 | } |
718 | |
719 | static long futex_wait_restart(struct restart_block *restart) |
720 | { |
721 | u32 __user *uaddr = restart->futex.uaddr; |
722 | ktime_t t, *tp = NULL; |
723 | |
724 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { |
725 | t = restart->futex.time; |
726 | tp = &t; |
727 | } |
728 | restart->fn = do_no_restart_syscall; |
729 | |
730 | return (long)futex_wait(uaddr, flags: restart->futex.flags, |
731 | val: restart->futex.val, abs_time: tp, bitset: restart->futex.bitset); |
732 | } |
733 | |
734 | |