pthread_cond_wait.c source code [glibc/nptl/pthread_cond_wait.c]

1	/ Copyright (C) 2003-2024 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library; if not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#include <endian.h>
19	#include <errno.h>
20	#include <sysdep.h>
21	#include <futex-internal.h>
22	#include <pthread.h>
23	#include <pthreadP.h>
24	#include <sys/time.h>
25	#include <atomic.h>
26	#include <stdint.h>
27	#include <stdbool.h>
28
29	#include <shlib-compat.h>
30	#include <stap-probe.h>
31	#include <time.h>
32
33	#include "pthread_cond_common.c"
34
35
36	struct _condvar_cleanup_buffer
37	{
38	uint64_t wseq;
39	pthread_cond_t *cond;
40	pthread_mutex_t *mutex;
41	int private;
42	};
43
44
45	/ Decrease the waiter reference count. /
46	static void
47	__condvar_confirm_wakeup (pthread_cond_t cond, int* private)
48	{
49	/ If destruction is pending (i.e., the wake-request flag is nonzero) and we*
50	are the last waiter (prior value of __wrefs was 1 << 3), then wake any
51	threads waiting in pthread_cond_destroy. Release MO to synchronize with
52	these threads. Don't bother clearing the wake-up request flag. /*
53	if ((atomic_fetch_add_release (&cond->__data.__wrefs, -`8`) >> `2`) == `3`)
54	futex_wake (futex_word: &cond->__data.__wrefs, INT_MAX, private);
55	}
56
57
58	/ Cancel waiting after having registered as a waiter previously. SEQ is our*
59	position and G is our group index.
60	The goal of cancellation is to make our group smaller if that is still
61	possible. If we are in a closed group, this is not possible anymore; in
62	this case, we need to send a replacement signal for the one we effectively
63	consumed because the signal should have gotten consumed by another waiter
64	instead; we must not both cancel waiting and consume a signal.
65
66	Must not be called while still holding a reference on the group.
67
68	Returns true iff we consumed a signal.
69
70	On some kind of timeouts, we may be able to pretend that a signal we
71	effectively consumed happened before the timeout (i.e., similarly to first
72	spinning on signals before actually checking whether the timeout has
73	passed already). Doing this would allow us to skip sending a replacement
74	signal, but this case might happen rarely because the end of the timeout
75	must race with someone else sending a signal. Therefore, we don't bother
76	trying to optimize this. /*
77	static void
78	__condvar_cancel_waiting (pthread_cond_t cond, uint64_t seq, unsigned* int g,
79	int private)
80	{
81	bool consumed_signal = false;
82
83	/ No deadlock with group switching is possible here because we do*
84	not hold a reference on the group. /*
85	__condvar_acquire_lock (cond, private);
86
87	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
88	if (g1_start > seq)
89	{
90	/ Our group is closed, so someone provided enough signals for it.*
91	Thus, we effectively consumed a signal. /*
92	consumed_signal = true;
93	}
94	else
95	{
96	if (g1_start + __condvar_get_orig_size (cond) <= seq)
97	{
98	/ We are in the current G2 and thus cannot have consumed a signal.*
99	Reduce its effective size or handle overflow. Remember that in
100	G2, unsigned int size is zero or a negative value. /*
101	if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > `0`)
102	{
103	cond->__data.__g_size[g]--;
104	}
105	else
106	{
107	/ Cancellations would overflow the maximum group size. Just*
108	wake up everyone spuriously to create a clean state. This
109	also means we do not consume a signal someone else sent. /*
110	__condvar_release_lock (cond, private);
111	__pthread_cond_broadcast (cond);
112	return;
113	}
114	}
115	else
116	{
117	/ We are in current G1. If the group's size is zero, someone put*
118	a signal in the group that nobody else but us can consume. /*
119	if (cond->__data.__g_size[g] == `0`)
120	consumed_signal = true;
121	else
122	{
123	/ Otherwise, we decrease the size of the group. This is*
124	equivalent to atomically putting in a signal just for us and
125	consuming it right away. We do not consume a signal sent
126	by someone else. We also cannot have consumed a futex
127	wake-up because if we were cancelled or timed out in a futex
128	call, the futex will wake another waiter. /*
129	cond->__data.__g_size[g]--;
130	}
131	}
132	}
133
134	__condvar_release_lock (cond, private);
135
136	if (consumed_signal)
137	{
138	/ We effectively consumed a signal even though we didn't want to.*
139	Therefore, we need to send a replacement signal.
140	If we would want to optimize this, we could do what
141	pthread_cond_signal does right in the critical section above. /*
142	__pthread_cond_signal (cond);
143	}
144	}
145
146	/ Clean-up for cancellation of waiters waiting for normal signals. We cancel*
147	our registration as a waiter, confirm we have woken up, and re-acquire the
148	mutex. /*
149	static void
150	__condvar_cleanup_waiting (void *arg)
151	{
152	struct _condvar_cleanup_buffer *cbuffer =
153	(struct _condvar_cleanup_buffer *) arg;
154	pthread_cond_t *cond = cbuffer->cond;
155	unsigned g = cbuffer->wseq & `1`;
156
157	__condvar_cancel_waiting (cond, seq: cbuffer->wseq >> `1`, g, private: cbuffer->private);
158	/ FIXME With the current cancellation implementation, it is possible that*
159	a thread is cancelled after it has returned from a syscall. This could
160	result in a cancelled waiter consuming a futex wake-up that is then
161	causing another waiter in the same group to not wake up. To work around
162	this issue until we have fixed cancellation, just add a futex wake-up
163	conservatively. /*
164	futex_wake (futex_word: cond->__data.__g_signals + g, processes_to_wake: `1`, private: cbuffer->private);
165
166	__condvar_confirm_wakeup (cond, private: cbuffer->private);
167
168	/ XXX If locking the mutex fails, should we just stop execution? This*
169	might be better than silently ignoring the error. /*
170	__pthread_mutex_cond_lock (mutex: cbuffer->mutex);
171	}
172
173	/ This condvar implementation guarantees that all calls to signal and*
174	broadcast and all of the three virtually atomic parts of each call to wait
175	(i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
176	acquiring the mutex) happen in some total order that is consistent with the
177	happens-before relations in the calling program. However, this order does
178	not necessarily result in additional happens-before relations being
179	established (which aligns well with spurious wake-ups being allowed).
180
181	All waiters acquire a certain position in a 64b waiter sequence (__wseq).
182	This sequence determines which waiters are allowed to consume signals.
183	A broadcast is equal to sending as many signals as are unblocked waiters.
184	When a signal arrives, it samples the current value of __wseq with a
185	relaxed-MO load (i.e., the position the next waiter would get). (This is
186	sufficient because it is consistent with happens-before; the caller can
187	enforce stronger ordering constraints by calling signal while holding the
188	mutex.) Only waiters with a position less than the __wseq value observed
189	by the signal are eligible to consume this signal.
190
191	This would be straight-forward to implement if waiters would just spin but
192	we need to let them block using futexes. Futexes give no guarantee of
193	waking in FIFO order, so we cannot reliably wake eligible waiters if we
194	just use a single futex. Also, futex words are 32b in size, but we need
195	to distinguish more than 1<<32 states because we need to represent the
196	order of wake-up (and thus which waiters are eligible to consume signals);
197	blocking in a futex is not atomic with a waiter determining its position in
198	the waiter sequence, so we need the futex word to reliably notify waiters
199	that they should not attempt to block anymore because they have been
200	already signaled in the meantime. While an ABA issue on a 32b value will
201	be rare, ignoring it when we are aware of it is not the right thing to do
202	either.
203
204	Therefore, we use a 64b counter to represent the waiter sequence (on
205	architectures which only support 32b atomics, we use a few bits less).
206	To deal with the blocking using futexes, we maintain two groups of waiters:
207	* Group G1 consists of waiters that are all eligible to consume signals;
208	incoming signals will always signal waiters in this group until all
209	waiters in G1 have been signaled.
210	* Group G2 consists of waiters that arrive when a G1 is present and still
211	contains waiters that have not been signaled. When all waiters in G1
212	are signaled and a new signal arrives, the new signal will convert G2
213	into the new G1 and create a new G2 for future waiters.
214
215	We cannot allocate new memory because of process-shared condvars, so we
216	have just two slots of groups that change their role between G1 and G2.
217	Each has a separate futex word, a number of signals available for
218	consumption, a size (number of waiters in the group that have not been
219	signaled), and a reference count.
220
221	The group reference count is used to maintain the number of waiters that
222	are using the group's futex.
223
224	To represent which intervals in the waiter sequence the groups cover (and
225	thus also which group slot contains G1 or G2), we use a 64b counter to
226	designate the start position of G1 (inclusive), and a single bit in the
227	waiter sequence counter to represent which group slot currently contains
228	G2. This allows us to switch group roles atomically wrt. waiters obtaining
229	a position in the waiter sequence. The G1 start position allows waiters to
230	figure out whether they are in a group that has already been completely
231	signaled (i.e., if the current G1 starts at a later position that the
232	waiter's position). Waiters cannot determine whether they are currently
233	in G2 or G1 -- but they do not have to because all they are interested in
234	is whether there are available signals, and they always start in G2 (whose
235	group slot they know because of the bit in the waiter sequence. Signalers
236	will simply fill the right group until it is completely signaled and can
237	be closed (they do not switch group roles until they really have to to
238	decrease the likelihood of having to wait for waiters still holding a
239	reference on the now-closed G1).
240
241	Signalers maintain the initial size of G1 to be able to determine where
242	G2 starts (G2 is always open-ended until it becomes G1). They track the
243	remaining size of a group; when waiters cancel waiting (due to PThreads
244	cancellation or timeouts), they will decrease this remaining size as well.
245
246	To implement condvar destruction requirements (i.e., that
247	pthread_cond_destroy can be called as soon as all waiters have been
248	signaled), waiters increment a reference count before starting to wait and
249	decrement it after they stopped waiting but right before they acquire the
250	mutex associated with the condvar.
251
252	pthread_cond_t thus consists of the following (bits that are used for
253	flags and are not part of the primary value of each field but necessary
254	to make some things atomic or because there was no space for them
255	elsewhere in the data structure):
256
257	__wseq: Waiter sequence counter
258	* LSB is index of current G2.
259	* Waiters fetch-add while having acquire the mutex associated with the
260	condvar. Signalers load it and fetch-xor it concurrently.
261	__g1_start: Starting position of G1 (inclusive)
262	* Modified by signalers while having acquired the condvar-internal lock
263	and observed concurrently by waiters.
264	__g1_orig_size: Initial size of G1
265	* The two least-significant bits represent the condvar-internal lock.
266	* Only accessed while having acquired the condvar-internal lock.
267	__wrefs: Waiter reference counter.
268	* Bit 2 is true if waiters should run futex_wake when they remove the
269	last reference. pthread_cond_destroy uses this as futex word.
270	* Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
271	* Bit 0 is true iff this is a process-shared condvar.
272	* Simple reference count used by both waiters and pthread_cond_destroy.
273	(If the format of __wrefs is changed, update nptl_lock_constants.pysym
274	and the pretty printers.)
275	For each of the two groups, we have:
276	__g_refs: Futex waiter reference count.
277	* LSB is true if waiters should run futex_wake when they remove the
278	last reference.
279	* Reference count used by waiters concurrently with signalers that have
280	acquired the condvar-internal lock.
281	__g_signals: The number of signals that can still be consumed, relative to
282	the current g1_start. (i.e. g1_start with the signal count added)
283	* Used as a futex word by waiters. Used concurrently by waiters and
284	signalers.
285	__g_size: Waiters remaining in this group (i.e., which have not been
286	signaled yet.
287	* Accessed by signalers and waiters that cancel waiting (both do so only
288	when having acquired the condvar-internal lock.
289	* The size of G2 is always zero because it cannot be determined until
290	the group becomes G1.
291	* Although this is of unsigned type, we rely on using unsigned overflow
292	rules to make this hold effectively negative values too (in
293	particular, when waiters in G2 cancel waiting).
294
295	A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
296	a condvar that has G2 starting at position 0 and a G1 that is closed.
297
298	Because waiters do not claim ownership of a group right when obtaining a
299	position in __wseq but only reference count the group when using futexes
300	to block, it can happen that a group gets closed before a waiter can
301	increment the reference count. Therefore, waiters have to check whether
302	their group is already closed using __g1_start. They also have to perform
303	this check when spinning when trying to grab a signal from __g_signals.
304	Note that for these checks, using relaxed MO to load __g1_start is
305	sufficient because if a waiter can see a sufficiently large value, it could
306	have also consume a signal in the waiters group.
307
308
309	Limitations:
310	* This condvar isn't designed to allow for more than
311	__PTHREAD_COND_MAX_GROUP_SIZE (1 << 31) calls to __pthread_cond_wait.*
312	* More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
313	supported.
314	* Beyond what is allowed as errors by POSIX or documented, we can also
315	return the following errors:
316	* EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
317	* EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
318	for other errors, this can happen when we re-acquire the mutex; this
319	isn't allowed by POSIX (which requires all errors to virtually happen
320	before we release the mutex or change the condvar state), but there's
321	nothing we can do really.
322	* When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
323	returned by __pthread_tpp_change_priority. We will already have
324	released the mutex in such cases, so the caller cannot expect to own
325	MUTEX.
326
327	Other notes:
328	* Instead of the normal mutex unlock / lock functions, we use
329	__pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
330	because those will not change the mutex-internal users count, so that it
331	can be detected when a condvar is still associated with a particular
332	mutex because there is a waiter blocked on this condvar using this mutex.
333	*/
334	static __always_inline int
335	__pthread_cond_wait_common (pthread_cond_t cond, pthread_mutex_t mutex,
336	clockid_t clockid, const struct __timespec64 *abstime)
337	{
338	int err;
339	int result = `0`;
340
341	LIBC_PROBE (cond_wait, `2`, cond, mutex);
342
343	/ clockid will already have been checked by*
344	__pthread_cond_clockwait or pthread_condattr_setclock, or we
345	don't use it if abstime is NULL, so we don't need to check it
346	here. /*
347
348	/ Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an*
349	atomic operation because signals and broadcasts may update the group
350	switch without acquiring the mutex. We do not need release MO here
351	because we do not need to establish any happens-before relation with
352	signalers (see __pthread_cond_signal); modification order alone
353	establishes a total order of waiters/signals. We do need acquire MO
354	to synchronize with group reinitialization in __condvar_switch_g1. /*
355	uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, val: `2`);
356	/ Find our group's index. We always go into what was G2 when we acquired*
357	our position. /*
358	unsigned int g = wseq & `1`;
359	uint64_t seq = wseq >> `1`;
360
361	/ Increase the waiter reference count. Relaxed MO is sufficient because*
362	we only need to synchronize when decrementing the reference count. /*
363	unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, `8`);
364	int private = __condvar_get_private (flags);
365
366	/ Now that we are registered as a waiter, we can release the mutex.*
367	Waiting on the condvar must be atomic with releasing the mutex, so if
368	the mutex is used to establish a happens-before relation with any
369	signaler, the waiter must be visible to the latter; thus, we release the
370	mutex after registering as waiter.
371	If releasing the mutex fails, we just cancel our registration as a
372	waiter and confirm that we have woken up. /*
373	err = __pthread_mutex_unlock_usercnt (mutex, `0`);
374	if (__glibc_unlikely (err != `0`))
375	{
376	__condvar_cancel_waiting (cond, seq, g, private);
377	__condvar_confirm_wakeup (cond, private);
378	return err;
379	}
380
381
382	while (`1`)
383	{
384	/ Now wait until a signal is available in our group or it is closed.*
385	Acquire MO so that if we observe (signals == lowseq) after group
386	switching in __condvar_switch_g1, we synchronize with that store and
387	will see the prior update of __g1_start done while switching groups
388	too. /*
389	unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
390	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
391
392	if (seq < g1_start)
393	{
394	/ If the group is closed already,*
395	then this waiter originally had enough extra signals to
396	consume, up until the time its group was closed. /*
397	break;
398	}
399
400	/ If there is an available signal, don't block.*
401	If __g1_start has advanced at all, then we must be in G1
402	by now, perhaps in the process of switching back to an older
403	G2, but in either case we're allowed to consume the available
404	signal and should not block anymore. /*
405	if ((int)(signals - (unsigned int)g1_start) > `0`)
406	{
407	/ Try to grab a signal. See above for MO. (if we do another loop*
408	iteration we need to see the correct value of g1_start) /*
409	if (atomic_compare_exchange_weak_acquire (
410	cond->__data.__g_signals + g,
411	&signals, signals - `1`))
412	break;
413	else
414	continue;
415	}
416
417	// Now block.
418	struct _pthread_cleanup_buffer buffer;
419	struct _condvar_cleanup_buffer cbuffer;
420	cbuffer.wseq = wseq;
421	cbuffer.cond = cond;
422	cbuffer.mutex = mutex;
423	cbuffer.private = private;
424	__pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
425
426	err = __futex_abstimed_wait_cancelable64 (
427	cond->__data.__g_signals + g, signals, clockid, abstime, private);
428
429	__pthread_cleanup_pop (&buffer, `0`);
430
431	if (__glibc_unlikely (err == ETIMEDOUT \|\| err == EOVERFLOW))
432	{
433	/ If we timed out, we effectively cancel waiting. /
434	__condvar_cancel_waiting (cond, seq, g, private);
435	result = err;
436	break;
437	}
438	}
439
440	/ Confirm that we have been woken. We do that before acquiring the mutex*
441	to allow for execution of pthread_cond_destroy while having acquired the
442	mutex. /*
443	__condvar_confirm_wakeup (cond, private);
444
445	/ Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,*
446	which is set to ETIMEDOUT if a timeout occurred, or zero otherwise. /*
447	err = __pthread_mutex_cond_lock (mutex: mutex);
448	/ XXX Abort on errors that are disallowed by POSIX? /
449	return (err != `0`) ? err : result;
450	}
451
452
453	/ See __pthread_cond_wait_common. /
454	int
455	___pthread_cond_wait (pthread_cond_t cond, pthread_mutex_t mutex)
456	{
457	/ clockid is unused when abstime is NULL. /
458	return __pthread_cond_wait_common (cond, mutex, clockid: `0`, NULL);
459	}
460
461	versioned_symbol (libc, ___pthread_cond_wait, pthread_cond_wait,
462	GLIBC_2_3_2);
463	libc_hidden_ver (___pthread_cond_wait, __pthread_cond_wait)
464	#ifndef SHARED
465	strong_alias (___pthread_cond_wait, __pthread_cond_wait)
466	#endif
467
468	/ See __pthread_cond_wait_common. /
469	int
470	___pthread_cond_timedwait64 (pthread_cond_t cond, pthread_mutex_t mutex,
471	const struct __timespec64 *abstime)
472	{
473	/ Check parameter validity. This should also tell the compiler that*
474	it can assume that abstime is not NULL. /*
475	if (! valid_nanoseconds (ns: abstime->tv_nsec))
476	return EINVAL;
477
478	/ Relaxed MO is suffice because clock ID bit is only modified*
479	in condition creation. /*
480	unsigned int flags = atomic_load_relaxed (&cond->__data.__wrefs);
481	clockid_t clockid = (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK)
482	? CLOCK_MONOTONIC : CLOCK_REALTIME;
483	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
484	}
485
486	#if __TIMESIZE == 64
487	strong_alias (___pthread_cond_timedwait64, ___pthread_cond_timedwait)
488	#else
489	strong_alias (___pthread_cond_timedwait64, __pthread_cond_timedwait64)
490	libc_hidden_def (__pthread_cond_timedwait64)
491
492	int
493	___pthread_cond_timedwait (pthread_cond_t cond, pthread_mutex_t mutex,
494	const struct timespec *abstime)
495	{
496	struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime);
497
498	return __pthread_cond_timedwait64 (cond, mutex, &ts64);
499	}
500	#endif /* __TIMESIZE == 64 */
501	versioned_symbol (libc, ___pthread_cond_timedwait,
502	pthread_cond_timedwait, GLIBC_2_3_2);
503	libc_hidden_ver (___pthread_cond_timedwait, __pthread_cond_timedwait)
504	#ifndef SHARED
505	strong_alias (___pthread_cond_timedwait, __pthread_cond_timedwait)
506	#endif
507
508	/ See __pthread_cond_wait_common. /
509	int
510	___pthread_cond_clockwait64 (pthread_cond_t cond, pthread_mutex_t mutex,
511	clockid_t clockid,
512	const struct __timespec64 *abstime)
513	{
514	/ Check parameter validity. This should also tell the compiler that*
515	it can assume that abstime is not NULL. /*
516	if (! valid_nanoseconds (ns: abstime->tv_nsec))
517	return EINVAL;
518
519	if (!futex_abstimed_supported_clockid (clockid))
520	return EINVAL;
521
522	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
523	}
524
525	#if __TIMESIZE == 64
526	strong_alias (___pthread_cond_clockwait64, ___pthread_cond_clockwait)
527	#else
528	strong_alias (___pthread_cond_clockwait64, __pthread_cond_clockwait64);
529	libc_hidden_def (__pthread_cond_clockwait64)
530
531	int
532	___pthread_cond_clockwait (pthread_cond_t cond, pthread_mutex_t mutex,
533	clockid_t clockid,
534	const struct timespec *abstime)
535	{
536	struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime);
537
538	return __pthread_cond_clockwait64 (cond, mutex, clockid, &ts64);
539	}
540	#endif /* __TIMESIZE == 64 */
541	libc_hidden_ver (___pthread_cond_clockwait, __pthread_cond_clockwait)
542	#ifndef SHARED
543	strong_alias (___pthread_cond_clockwait, __pthread_cond_clockwait)
544	#endif
545	versioned_symbol (libc, ___pthread_cond_clockwait,
546	pthread_cond_clockwait, GLIBC_2_34);
547	#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_30, GLIBC_2_34)
548	compat_symbol (libpthread, ___pthread_cond_clockwait,
549	pthread_cond_clockwait, GLIBC_2_30);
550	#endif
551

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of glibc/nptl/pthread_cond_wait.c