| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _FUTEX_H |
| 3 | #define _FUTEX_H |
| 4 | |
| 5 | #include <linux/futex.h> |
| 6 | #include <linux/rtmutex.h> |
| 7 | #include <linux/sched/wake_q.h> |
| 8 | #include <linux/compat.h> |
| 9 | #include <linux/uaccess.h> |
| 10 | #include <linux/cleanup.h> |
| 11 | |
| 12 | #ifdef CONFIG_PREEMPT_RT |
| 13 | #include <linux/rcuwait.h> |
| 14 | #endif |
| 15 | |
| 16 | #include <asm/futex.h> |
| 17 | |
| 18 | /* |
| 19 | * Futex flags used to encode options to functions and preserve them across |
| 20 | * restarts. |
| 21 | */ |
| 22 | #define FLAGS_SIZE_8 0x0000 |
| 23 | #define FLAGS_SIZE_16 0x0001 |
| 24 | #define FLAGS_SIZE_32 0x0002 |
| 25 | #define FLAGS_SIZE_64 0x0003 |
| 26 | |
| 27 | #define FLAGS_SIZE_MASK 0x0003 |
| 28 | |
| 29 | #ifdef CONFIG_MMU |
| 30 | # define FLAGS_SHARED 0x0010 |
| 31 | #else |
| 32 | /* |
| 33 | * NOMMU does not have per process address space. Let the compiler optimize |
| 34 | * code away. |
| 35 | */ |
| 36 | # define FLAGS_SHARED 0x0000 |
| 37 | #endif |
| 38 | #define FLAGS_CLOCKRT 0x0020 |
| 39 | #define FLAGS_HAS_TIMEOUT 0x0040 |
| 40 | #define FLAGS_NUMA 0x0080 |
| 41 | #define FLAGS_STRICT 0x0100 |
| 42 | #define FLAGS_MPOL 0x0200 |
| 43 | |
| 44 | /* FUTEX_ to FLAGS_ */ |
| 45 | static inline unsigned int futex_to_flags(unsigned int op) |
| 46 | { |
| 47 | unsigned int flags = FLAGS_SIZE_32; |
| 48 | |
| 49 | if (!(op & FUTEX_PRIVATE_FLAG)) |
| 50 | flags |= FLAGS_SHARED; |
| 51 | |
| 52 | if (op & FUTEX_CLOCK_REALTIME) |
| 53 | flags |= FLAGS_CLOCKRT; |
| 54 | |
| 55 | return flags; |
| 56 | } |
| 57 | |
| 58 | #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE) |
| 59 | |
| 60 | /* FUTEX2_ to FLAGS_ */ |
| 61 | static inline unsigned int futex2_to_flags(unsigned int flags2) |
| 62 | { |
| 63 | unsigned int flags = flags2 & FUTEX2_SIZE_MASK; |
| 64 | |
| 65 | if (!(flags2 & FUTEX2_PRIVATE)) |
| 66 | flags |= FLAGS_SHARED; |
| 67 | |
| 68 | if (flags2 & FUTEX2_NUMA) |
| 69 | flags |= FLAGS_NUMA; |
| 70 | |
| 71 | if (flags2 & FUTEX2_MPOL) |
| 72 | flags |= FLAGS_MPOL; |
| 73 | |
| 74 | return flags; |
| 75 | } |
| 76 | |
| 77 | static inline unsigned int futex_size(unsigned int flags) |
| 78 | { |
| 79 | return 1 << (flags & FLAGS_SIZE_MASK); |
| 80 | } |
| 81 | |
| 82 | static inline bool futex_flags_valid(unsigned int flags) |
| 83 | { |
| 84 | /* Only 64bit futexes for 64bit code */ |
| 85 | if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { |
| 86 | if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) |
| 87 | return false; |
| 88 | } |
| 89 | |
| 90 | /* Only 32bit futexes are implemented -- for now */ |
| 91 | if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) |
| 92 | return false; |
| 93 | |
| 94 | /* |
| 95 | * Must be able to represent both FUTEX_NO_NODE and every valid nodeid |
| 96 | * in a futex word. |
| 97 | */ |
| 98 | if (flags & FLAGS_NUMA) { |
| 99 | int bits = 8 * futex_size(flags); |
| 100 | u64 max = ~0ULL; |
| 101 | |
| 102 | max >>= 64 - bits; |
| 103 | if (nr_node_ids >= max) |
| 104 | return false; |
| 105 | } |
| 106 | |
| 107 | return true; |
| 108 | } |
| 109 | |
| 110 | static inline bool futex_validate_input(unsigned int flags, u64 val) |
| 111 | { |
| 112 | int bits = 8 * futex_size(flags); |
| 113 | |
| 114 | if (bits < 64 && (val >> bits)) |
| 115 | return false; |
| 116 | |
| 117 | return true; |
| 118 | } |
| 119 | |
| 120 | #ifdef CONFIG_FAIL_FUTEX |
| 121 | extern bool should_fail_futex(bool fshared); |
| 122 | #else |
| 123 | static inline bool should_fail_futex(bool fshared) |
| 124 | { |
| 125 | return false; |
| 126 | } |
| 127 | #endif |
| 128 | |
| 129 | /* |
| 130 | * Hash buckets are shared by all the futex_keys that hash to the same |
| 131 | * location. Each key may have multiple futex_q structures, one for each task |
| 132 | * waiting on a futex. |
| 133 | */ |
| 134 | struct futex_hash_bucket { |
| 135 | atomic_t waiters; |
| 136 | spinlock_t lock; |
| 137 | struct plist_head chain; |
| 138 | struct futex_private_hash *priv; |
| 139 | } ____cacheline_aligned_in_smp; |
| 140 | |
| 141 | /* |
| 142 | * Priority Inheritance state: |
| 143 | */ |
| 144 | struct futex_pi_state { |
| 145 | /* |
| 146 | * list of 'owned' pi_state instances - these have to be |
| 147 | * cleaned up in do_exit() if the task exits prematurely: |
| 148 | */ |
| 149 | struct list_head list; |
| 150 | |
| 151 | /* |
| 152 | * The PI object: |
| 153 | */ |
| 154 | struct rt_mutex_base pi_mutex; |
| 155 | |
| 156 | struct task_struct *owner; |
| 157 | refcount_t refcount; |
| 158 | |
| 159 | union futex_key key; |
| 160 | } __randomize_layout; |
| 161 | |
| 162 | struct futex_q; |
| 163 | typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q); |
| 164 | |
| 165 | /** |
| 166 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 167 | * @list: priority-sorted list of tasks waiting on this futex |
| 168 | * @task: the task waiting on the futex |
| 169 | * @lock_ptr: the hash bucket lock |
| 170 | * @wake: the wake handler for this queue |
| 171 | * @wake_data: data associated with the wake handler |
| 172 | * @key: the key the futex is hashed on |
| 173 | * @pi_state: optional priority inheritance state |
| 174 | * @rt_waiter: rt_waiter storage for use with requeue_pi |
| 175 | * @requeue_pi_key: the requeue_pi target futex key |
| 176 | * @bitset: bitset for the optional bitmasked wakeup |
| 177 | * @requeue_state: State field for futex_requeue_pi() |
| 178 | * @drop_hb_ref: Waiter should drop the extra hash bucket reference if true |
| 179 | * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) |
| 180 | * |
| 181 | * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so |
| 182 | * we can wake only the relevant ones (hashed queues may be shared). |
| 183 | * |
| 184 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 185 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 186 | * The order of wakeup is always to make the first condition true, then |
| 187 | * the second. |
| 188 | * |
| 189 | * PI futexes are typically woken before they are removed from the hash list via |
| 190 | * the rt_mutex code. See futex_unqueue_pi(). |
| 191 | */ |
| 192 | struct futex_q { |
| 193 | struct plist_node list; |
| 194 | |
| 195 | struct task_struct *task; |
| 196 | spinlock_t *lock_ptr; |
| 197 | futex_wake_fn *wake; |
| 198 | void *wake_data; |
| 199 | union futex_key key; |
| 200 | struct futex_pi_state *pi_state; |
| 201 | struct rt_mutex_waiter *rt_waiter; |
| 202 | union futex_key *requeue_pi_key; |
| 203 | u32 bitset; |
| 204 | atomic_t requeue_state; |
| 205 | bool drop_hb_ref; |
| 206 | #ifdef CONFIG_PREEMPT_RT |
| 207 | struct rcuwait requeue_wait; |
| 208 | #endif |
| 209 | } __randomize_layout; |
| 210 | |
| 211 | extern const struct futex_q futex_q_init; |
| 212 | |
| 213 | enum futex_access { |
| 214 | FUTEX_READ, |
| 215 | FUTEX_WRITE |
| 216 | }; |
| 217 | |
| 218 | extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, |
| 219 | enum futex_access rw); |
| 220 | extern void futex_q_lockptr_lock(struct futex_q *q); |
| 221 | extern struct hrtimer_sleeper * |
| 222 | futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, |
| 223 | int flags, u64 range_ns); |
| 224 | |
| 225 | extern struct futex_hash_bucket *futex_hash(union futex_key *key); |
| 226 | #ifdef CONFIG_FUTEX_PRIVATE_HASH |
| 227 | extern void futex_hash_get(struct futex_hash_bucket *hb); |
| 228 | extern void futex_hash_put(struct futex_hash_bucket *hb); |
| 229 | |
| 230 | extern struct futex_private_hash *futex_private_hash(void); |
| 231 | extern void futex_private_hash_put(struct futex_private_hash *fph); |
| 232 | |
| 233 | #else /* !CONFIG_FUTEX_PRIVATE_HASH */ |
| 234 | static inline void futex_hash_get(struct futex_hash_bucket *hb) { } |
| 235 | static inline void futex_hash_put(struct futex_hash_bucket *hb) { } |
| 236 | static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } |
| 237 | static inline void futex_private_hash_put(struct futex_private_hash *fph) { } |
| 238 | #endif |
| 239 | |
| 240 | DEFINE_CLASS(hb, struct futex_hash_bucket *, |
| 241 | if (_T) futex_hash_put(_T), |
| 242 | futex_hash(key), union futex_key *key); |
| 243 | |
| 244 | DEFINE_CLASS(private_hash, struct futex_private_hash *, |
| 245 | if (_T) futex_private_hash_put(_T), |
| 246 | futex_private_hash(), void); |
| 247 | |
| 248 | /** |
| 249 | * futex_match - Check whether two futex keys are equal |
| 250 | * @key1: Pointer to key1 |
| 251 | * @key2: Pointer to key2 |
| 252 | * |
| 253 | * Return 1 if two futex_keys are equal, 0 otherwise. |
| 254 | */ |
| 255 | static inline int futex_match(union futex_key *key1, union futex_key *key2) |
| 256 | { |
| 257 | return (key1 && key2 |
| 258 | && key1->both.word == key2->both.word |
| 259 | && key1->both.ptr == key2->both.ptr |
| 260 | && key1->both.offset == key2->both.offset); |
| 261 | } |
| 262 | |
| 263 | extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
| 264 | struct futex_q *q, union futex_key *key2, |
| 265 | struct task_struct *task); |
| 266 | extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout); |
| 267 | extern bool __futex_wake_mark(struct futex_q *q); |
| 268 | extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); |
| 269 | |
| 270 | extern int fault_in_user_writeable(u32 __user *uaddr); |
| 271 | extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); |
| 272 | |
| 273 | static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) |
| 274 | { |
| 275 | int ret; |
| 276 | |
| 277 | pagefault_disable(); |
| 278 | ret = futex_atomic_cmpxchg_inatomic(uval: curval, uaddr, oldval: uval, newval); |
| 279 | pagefault_enable(); |
| 280 | |
| 281 | return ret; |
| 282 | } |
| 283 | |
| 284 | /* Read from user memory with pagefaults disabled */ |
| 285 | static inline int futex_get_value_locked(u32 *dest, u32 __user *from) |
| 286 | { |
| 287 | guard(pagefault)(); |
| 288 | return get_user_inline(*dest, from); |
| 289 | } |
| 290 | |
| 291 | extern void __futex_unqueue(struct futex_q *q); |
| 292 | extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, |
| 293 | struct task_struct *task); |
| 294 | extern int futex_unqueue(struct futex_q *q); |
| 295 | |
| 296 | /** |
| 297 | * futex_queue() - Enqueue the futex_q on the futex_hash_bucket |
| 298 | * @q: The futex_q to enqueue |
| 299 | * @hb: The destination hash bucket |
| 300 | * @task: Task queueing this futex |
| 301 | * |
| 302 | * The hb->lock must be held by the caller, and is released here. A call to |
| 303 | * futex_queue() is typically paired with exactly one call to futex_unqueue(). The |
| 304 | * exceptions involve the PI related operations, which may use futex_unqueue_pi() |
| 305 | * or nothing if the unqueue is done as part of the wake process and the unqueue |
| 306 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for |
| 307 | * an example). |
| 308 | * |
| 309 | * Note that @task may be NULL, for async usage of futexes. |
| 310 | */ |
| 311 | static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, |
| 312 | struct task_struct *task) |
| 313 | __releases(&hb->lock) |
| 314 | { |
| 315 | __futex_queue(q, hb, task); |
| 316 | spin_unlock(lock: &hb->lock); |
| 317 | } |
| 318 | |
| 319 | extern void futex_unqueue_pi(struct futex_q *q); |
| 320 | |
| 321 | extern void wait_for_owner_exiting(int ret, struct task_struct *exiting); |
| 322 | |
| 323 | /* |
| 324 | * Reflects a new waiter being added to the waitqueue. |
| 325 | */ |
| 326 | static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb) |
| 327 | { |
| 328 | #ifdef CONFIG_SMP |
| 329 | atomic_inc(v: &hb->waiters); |
| 330 | /* |
| 331 | * Full barrier (A), see the ordering comment above. |
| 332 | */ |
| 333 | smp_mb__after_atomic(); |
| 334 | #endif |
| 335 | } |
| 336 | |
| 337 | /* |
| 338 | * Reflects a waiter being removed from the waitqueue by wakeup |
| 339 | * paths. |
| 340 | */ |
| 341 | static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb) |
| 342 | { |
| 343 | #ifdef CONFIG_SMP |
| 344 | atomic_dec(v: &hb->waiters); |
| 345 | #endif |
| 346 | } |
| 347 | |
| 348 | static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb) |
| 349 | { |
| 350 | #ifdef CONFIG_SMP |
| 351 | /* |
| 352 | * Full barrier (B), see the ordering comment above. |
| 353 | */ |
| 354 | smp_mb(); |
| 355 | return atomic_read(v: &hb->waiters); |
| 356 | #else |
| 357 | return 1; |
| 358 | #endif |
| 359 | } |
| 360 | |
| 361 | extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb); |
| 362 | extern void futex_q_unlock(struct futex_hash_bucket *hb); |
| 363 | |
| 364 | |
| 365 | extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, |
| 366 | union futex_key *key, |
| 367 | struct futex_pi_state **ps, |
| 368 | struct task_struct *task, |
| 369 | struct task_struct **exiting, |
| 370 | int set_waiters); |
| 371 | |
| 372 | extern int refill_pi_state_cache(void); |
| 373 | extern void get_pi_state(struct futex_pi_state *pi_state); |
| 374 | extern void put_pi_state(struct futex_pi_state *pi_state); |
| 375 | extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked); |
| 376 | |
| 377 | /* |
| 378 | * Express the locking dependencies for lockdep: |
| 379 | */ |
| 380 | static inline void |
| 381 | double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) |
| 382 | { |
| 383 | if (hb1 > hb2) |
| 384 | swap(hb1, hb2); |
| 385 | |
| 386 | spin_lock(lock: &hb1->lock); |
| 387 | if (hb1 != hb2) |
| 388 | spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); |
| 389 | } |
| 390 | |
| 391 | static inline void |
| 392 | double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) |
| 393 | { |
| 394 | spin_unlock(lock: &hb1->lock); |
| 395 | if (hb1 != hb2) |
| 396 | spin_unlock(lock: &hb2->lock); |
| 397 | } |
| 398 | |
| 399 | /* syscalls */ |
| 400 | |
| 401 | extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 |
| 402 | val, ktime_t *abs_time, u32 bitset, u32 __user |
| 403 | *uaddr2); |
| 404 | |
| 405 | extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, |
| 406 | u32 __user *uaddr2, unsigned int flags2, |
| 407 | int nr_wake, int nr_requeue, |
| 408 | u32 *cmpval, int requeue_pi); |
| 409 | |
| 410 | extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
| 411 | struct hrtimer_sleeper *to, u32 bitset); |
| 412 | |
| 413 | extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
| 414 | ktime_t *abs_time, u32 bitset); |
| 415 | |
| 416 | /** |
| 417 | * struct futex_vector - Auxiliary struct for futex_waitv() |
| 418 | * @w: Userspace provided data |
| 419 | * @q: Kernel side data |
| 420 | * |
| 421 | * Struct used to build an array with all data need for futex_waitv() |
| 422 | */ |
| 423 | struct futex_vector { |
| 424 | struct futex_waitv w; |
| 425 | struct futex_q q; |
| 426 | }; |
| 427 | |
| 428 | extern int futex_parse_waitv(struct futex_vector *futexv, |
| 429 | struct futex_waitv __user *uwaitv, |
| 430 | unsigned int nr_futexes, futex_wake_fn *wake, |
| 431 | void *wake_data); |
| 432 | |
| 433 | extern int futex_wait_multiple_setup(struct futex_vector *vs, int count, |
| 434 | int *woken); |
| 435 | |
| 436 | extern int futex_unqueue_multiple(struct futex_vector *v, int count); |
| 437 | |
| 438 | extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count, |
| 439 | struct hrtimer_sleeper *to); |
| 440 | |
| 441 | extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset); |
| 442 | |
| 443 | extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, |
| 444 | u32 __user *uaddr2, int nr_wake, int nr_wake2, int op); |
| 445 | |
| 446 | extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags); |
| 447 | |
| 448 | extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock); |
| 449 | |
| 450 | #endif /* _FUTEX_H */ |
| 451 | |