1/* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <assert.h>
20#include <errno.h>
21#include <libintl.h>
22#include <signal.h>
23#include <stdlib.h>
24#include <unistd.h>
25#include <sys/param.h>
26#include <atomic.h>
27
28#include <tls.h>
29#include <dl-tls.h>
30#include <ldsodefs.h>
31
32#if PTHREAD_IN_LIBC
33# include <list.h>
34#endif
35
36#define TUNABLE_NAMESPACE rtld
37#include <dl-tunables.h>
38
39/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
40
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
44 initial namespace,
45 - and optionally for optimizing dynamic TLS access.
46
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
51
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
57
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
60
61/* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65#define LIBC_IE_TLS 144
66
67/* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70#define OTHER_IE_TLS 144
71
72/* Default number of namespaces. */
73#define DEFAULT_NNS 4
74
75/* Default for dl_tls_static_optional. */
76#define OPTIONAL_TLS 512
77
78/* Used to count the number of threads currently executing dynamic TLS
79 updates. Used to avoid recursive malloc calls in __tls_get_addr
80 for an interposed malloc that uses global-dynamic TLS (which is not
81 recommended); see _dl_tls_allocate_active checks. This could be a
82 per-thread flag, but would need TLS access in the dynamic linker. */
83unsigned int _dl_tls_threads_in_update;
84
85static inline void
86_dl_tls_allocate_begin (void)
87{
88 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1);
89}
90
91static inline void
92_dl_tls_allocate_end (void)
93{
94 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1);
95}
96
97static inline bool
98_dl_tls_allocate_active (void)
99{
100 return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0;
101}
102
103/* Compute the static TLS surplus based on the namespace count and the
104 TLS space that can be used for optimizations. */
105static inline int
106tls_static_surplus (int nns, int opt_tls)
107{
108 return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
109}
110
111/* This value is chosen so that with default values for the tunables,
112 the computation of dl_tls_static_surplus in
113 _dl_tls_static_surplus_init yields the historic value 1664, for
114 backwards compatibility. */
115#define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
116
117/* Calculate the size of the static TLS surplus, when the given
118 number of audit modules are loaded. Must be called after the
119 number of audit modules is known and before static TLS allocation. */
120void
121_dl_tls_static_surplus_init (size_t naudit)
122{
123 size_t nns, opt_tls;
124
125 nns = TUNABLE_GET (nns, size_t, NULL);
126 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
127 if (nns > DL_NNS)
128 nns = DL_NNS;
129 if (DL_NNS - nns < naudit)
130 _dl_fatal_printf (fmt: "Failed loading %lu audit modules, %lu are supported.\n",
131 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
132 nns += naudit;
133
134 GL(dl_tls_static_optional) = opt_tls;
135 assert (LEGACY_TLS >= 0);
136 GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
137}
138
139/* Out-of-memory handler. */
140static void
141__attribute__ ((__noreturn__))
142oom (void)
143{
144 _dl_fatal_printf (fmt: "cannot allocate memory for thread-local data: ABORT\n");
145}
146
147
148void
149_dl_assign_tls_modid (struct link_map *l)
150{
151 size_t result;
152
153 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
154 {
155 size_t disp = 0;
156 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
157
158 /* Note that this branch will never be executed during program
159 start since there are no gaps at that time. Therefore it
160 does not matter that the dl_tls_dtv_slotinfo is not allocated
161 yet when the function is called for the first times.
162
163 NB: the offset +1 is due to the fact that DTV[0] is used
164 for something else. */
165 result = GL(dl_tls_static_nelem) + 1;
166 if (result <= GL(dl_tls_max_dtv_idx))
167 do
168 {
169 while (result - disp < runp->len)
170 {
171 if (runp->slotinfo[result - disp].map == NULL)
172 break;
173
174 ++result;
175 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
176 }
177
178 if (result - disp < runp->len)
179 {
180 /* Mark the entry as used, so any dependency see it. */
181 atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
182 atomic_store_relaxed (&runp->slotinfo[result - disp].gen, 0);
183 break;
184 }
185
186 disp += runp->len;
187 }
188 while ((runp = runp->next) != NULL);
189
190 if (result > GL(dl_tls_max_dtv_idx))
191 {
192 /* The new index must indeed be exactly one higher than the
193 previous high. */
194 assert (result == GL(dl_tls_max_dtv_idx) + 1);
195 /* There is no gap anymore. */
196 GL(dl_tls_dtv_gaps) = false;
197
198 goto nogaps;
199 }
200 }
201 else
202 {
203 /* No gaps, allocate a new entry. */
204 nogaps:
205
206 result = GL(dl_tls_max_dtv_idx) + 1;
207 /* Can be read concurrently. */
208 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
209 }
210
211 l->l_tls_modid = result;
212}
213
214
215size_t
216_dl_count_modids (void)
217{
218 /* The count is the max unless dlclose or failed dlopen created gaps. */
219 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
220 return GL(dl_tls_max_dtv_idx);
221
222 /* We have gaps and are forced to count the non-NULL entries. */
223 size_t n = 0;
224 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
225 while (runp != NULL)
226 {
227 for (size_t i = 0; i < runp->len; ++i)
228 if (runp->slotinfo[i].map != NULL)
229 ++n;
230
231 runp = runp->next;
232 }
233
234 return n;
235}
236
237
238#ifdef SHARED
239void
240_dl_determine_tlsoffset (void)
241{
242 size_t max_align = TCB_ALIGNMENT;
243 size_t freetop = 0;
244 size_t freebottom = 0;
245
246 /* The first element of the dtv slot info list is allocated. */
247 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
248 /* There is at this point only one element in the
249 dl_tls_dtv_slotinfo_list list. */
250 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
251
252 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
253
254 /* Determining the offset of the various parts of the static TLS
255 block has several dependencies. In addition we have to work
256 around bugs in some toolchains.
257
258 Each TLS block from the objects available at link time has a size
259 and an alignment requirement. The GNU ld computes the alignment
260 requirements for the data at the positions *in the file*, though.
261 I.e, it is not simply possible to allocate a block with the size
262 of the TLS program header entry. The data is laid out assuming
263 that the first byte of the TLS block fulfills
264
265 p_vaddr mod p_align == &TLS_BLOCK mod p_align
266
267 This means we have to add artificial padding at the beginning of
268 the TLS block. These bytes are never used for the TLS data in
269 this module but the first byte allocated must be aligned
270 according to mod p_align == 0 so that the first byte of the TLS
271 block is aligned according to p_vaddr mod p_align. This is ugly
272 and the linker can help by computing the offsets in the TLS block
273 assuming the first byte of the TLS block is aligned according to
274 p_align.
275
276 The extra space which might be allocated before the first byte of
277 the TLS block need not go unused. The code below tries to use
278 that memory for the next TLS block. This can work if the total
279 memory requirement for the next TLS block is smaller than the
280 gap. */
281
282#if TLS_TCB_AT_TP
283 /* We simply start with zero. */
284 size_t offset = 0;
285
286 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
287 {
288 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
289
290 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
291 & (slotinfo[cnt].map->l_tls_align - 1));
292 size_t off;
293 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
294
295 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
296 {
297 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
298 - firstbyte, slotinfo[cnt].map->l_tls_align)
299 + firstbyte;
300 if (off <= freebottom)
301 {
302 freetop = off;
303
304 /* XXX For some architectures we perhaps should store the
305 negative offset. */
306 slotinfo[cnt].map->l_tls_offset = off;
307 continue;
308 }
309 }
310
311 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
312 slotinfo[cnt].map->l_tls_align) + firstbyte;
313 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
314 + (freebottom - freetop))
315 {
316 freetop = offset;
317 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
318 }
319 offset = off;
320
321 /* XXX For some architectures we perhaps should store the
322 negative offset. */
323 slotinfo[cnt].map->l_tls_offset = off;
324 }
325
326 GL(dl_tls_static_used) = offset;
327 GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
328 max_align)
329 + TLS_TCB_SIZE);
330#elif TLS_DTV_AT_TP
331 /* The TLS blocks start right after the TCB. */
332 size_t offset = TLS_TCB_SIZE;
333
334 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
335 {
336 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
337
338 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
339 & (slotinfo[cnt].map->l_tls_align - 1));
340 size_t off;
341 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
342
343 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
344 {
345 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
346 if (off - freebottom < firstbyte)
347 off += slotinfo[cnt].map->l_tls_align;
348 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
349 {
350 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
351 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
352 - firstbyte);
353 continue;
354 }
355 }
356
357 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
358 if (off - offset < firstbyte)
359 off += slotinfo[cnt].map->l_tls_align;
360
361 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
362 if (off - firstbyte - offset > freetop - freebottom)
363 {
364 freebottom = offset;
365 freetop = off - firstbyte;
366 }
367
368 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
369 }
370
371 GL(dl_tls_static_used) = offset;
372 GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
373 TCB_ALIGNMENT);
374#else
375# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
376#endif
377
378 /* The alignment requirement for the static TLS block. */
379 GLRO (dl_tls_static_align) = max_align;
380}
381#endif /* SHARED */
382
383static void *
384allocate_dtv (void *result)
385{
386 dtv_t *dtv;
387 size_t dtv_length;
388
389 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
390 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
391 /* We allocate a few more elements in the dtv than are needed for the
392 initial set of modules. This should avoid in most cases expansions
393 of the dtv. */
394 dtv_length = max_modid + DTV_SURPLUS;
395 dtv = calloc (nmemb: dtv_length + 2, size: sizeof (dtv_t));
396 if (dtv != NULL)
397 {
398 /* This is the initial length of the dtv. */
399 dtv[0].counter = dtv_length;
400
401 /* The rest of the dtv (including the generation counter) is
402 Initialize with zero to indicate nothing there. */
403
404 /* Add the dtv to the thread data structures. */
405 INSTALL_DTV (result, dtv);
406 }
407 else
408 result = NULL;
409
410 return result;
411}
412
413/* Get size and alignment requirements of the static TLS block. This
414 function is no longer used by glibc itself, but the GCC sanitizers
415 use it despite the GLIBC_PRIVATE status. */
416void
417_dl_get_tls_static_info (size_t *sizep, size_t *alignp)
418{
419 *sizep = GLRO (dl_tls_static_size);
420 *alignp = GLRO (dl_tls_static_align);
421}
422
423/* Derive the location of the pointer to the start of the original
424 allocation (before alignment) from the pointer to the TCB. */
425static inline void **
426tcb_to_pointer_to_free_location (void *tcb)
427{
428#if TLS_TCB_AT_TP
429 /* The TCB follows the TLS blocks, and the pointer to the front
430 follows the TCB. */
431 void **original_pointer_location = tcb + TLS_TCB_SIZE;
432#elif TLS_DTV_AT_TP
433 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
434 before that. */
435 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
436#endif
437 return original_pointer_location;
438}
439
440void *
441_dl_allocate_tls_storage (void)
442{
443 void *result;
444 size_t size = GLRO (dl_tls_static_size);
445
446#if TLS_DTV_AT_TP
447 /* Memory layout is:
448 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
449 ^ This should be returned. */
450 size += TLS_PRE_TCB_SIZE;
451#endif
452
453 /* Reserve space for the required alignment and the pointer to the
454 original allocation. */
455 size_t alignment = GLRO (dl_tls_static_align);
456
457 /* Perform the allocation. */
458 _dl_tls_allocate_begin ();
459 void *allocated = malloc (size: size + alignment + sizeof (void *));
460 if (__glibc_unlikely (allocated == NULL))
461 {
462 _dl_tls_allocate_end ();
463 return NULL;
464 }
465
466 /* Perform alignment and allocate the DTV. */
467#if TLS_TCB_AT_TP
468 /* The TCB follows the TLS blocks, which determine the alignment.
469 (TCB alignment requirements have been taken into account when
470 calculating GLRO (dl_tls_static_align).) */
471 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
472 result = aligned + size - TLS_TCB_SIZE;
473
474 /* Clear the TCB data structure. We can't ask the caller (i.e.
475 libpthread) to do it, because we will initialize the DTV et al. */
476 memset (result, '\0', TLS_TCB_SIZE);
477#elif TLS_DTV_AT_TP
478 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
479 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
480 TLS block alignment, and not just the TLS blocks after it. This
481 can leave an unused alignment gap between the TCB and the TLS
482 blocks. */
483 result = (void *) roundup
484 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
485 alignment);
486
487 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
488 it. We can't ask the caller (i.e. libpthread) to do it, because
489 we will initialize the DTV et al. */
490 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
491#endif
492
493 /* Record the value of the original pointer for later
494 deallocation. */
495 *tcb_to_pointer_to_free_location (tcb: result) = allocated;
496
497 result = allocate_dtv (result);
498 if (result == NULL)
499 free (ptr: allocated);
500
501 _dl_tls_allocate_end ();
502 return result;
503}
504
505
506#ifndef SHARED
507extern dtv_t _dl_static_dtv[];
508# define _dl_initial_dtv (&_dl_static_dtv[1])
509#endif
510
511static dtv_t *
512_dl_resize_dtv (dtv_t *dtv, size_t max_modid)
513{
514 /* Resize the dtv. */
515 dtv_t *newp;
516 size_t newsize = max_modid + DTV_SURPLUS;
517 size_t oldsize = dtv[-1].counter;
518
519 _dl_tls_allocate_begin ();
520 if (dtv == GL(dl_initial_dtv))
521 {
522 /* This is the initial dtv that was either statically allocated in
523 __libc_setup_tls or allocated during rtld startup using the
524 dl-minimal.c malloc instead of the real malloc. We can't free
525 it, we have to abandon the old storage. */
526
527 newp = malloc (size: (2 + newsize) * sizeof (dtv_t));
528 if (newp == NULL)
529 oom ();
530 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
531#ifdef SHARED
532 /* Auditors can trigger a DTV resize event while the full malloc
533 is not yet in use. Mark the new DTV allocation as the
534 initial allocation. */
535 if (!__rtld_malloc_is_complete ())
536 GL(dl_initial_dtv) = &newp[1];
537#endif
538 }
539 else
540 {
541 newp = realloc (ptr: &dtv[-1],
542 size: (2 + newsize) * sizeof (dtv_t));
543 if (newp == NULL)
544 oom ();
545 }
546 _dl_tls_allocate_end ();
547
548 newp[0].counter = newsize;
549
550 /* Clear the newly allocated part. */
551 memset (newp + 2 + oldsize, '\0',
552 (newsize - oldsize) * sizeof (dtv_t));
553
554 /* Return the generation counter. */
555 return &newp[1];
556}
557
558
559/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
560 for the TLS space. The DTV may be resized, and so this function may
561 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
562 is taken when manipulating global TLS-related data in the loader.
563
564 If MAIN_THREAD, this is the first call during process
565 initialization. In this case, TLS initialization for secondary
566 (audit) namespaces is skipped because that has already been handled
567 by dlopen. */
568void *
569_dl_allocate_tls_init (void *result, bool main_thread)
570{
571 if (result == NULL)
572 /* The memory allocation failed. */
573 return NULL;
574
575 dtv_t *dtv = GET_DTV (result);
576 struct dtv_slotinfo_list *listp;
577 size_t total = 0;
578 size_t maxgen = 0;
579
580 /* Protects global dynamic TLS related state. */
581 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
582
583 /* Check if the current dtv is big enough. */
584 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
585 {
586 /* Resize the dtv. */
587 dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
588
589 /* Install this new dtv in the thread data structures. */
590 INSTALL_DTV (result, &dtv[-1]);
591 }
592
593 /* We have to prepare the dtv for all currently loaded modules using
594 TLS. For those which are dynamically loaded we add the values
595 indicating deferred allocation. */
596 listp = GL(dl_tls_dtv_slotinfo_list);
597 while (1)
598 {
599 size_t cnt;
600
601 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
602 {
603 struct link_map *map;
604 void *dest;
605
606 /* Check for the total number of used slots. */
607 if (total + cnt > GL(dl_tls_max_dtv_idx))
608 break;
609
610 map = listp->slotinfo[cnt].map;
611 if (map == NULL)
612 /* Unused entry. */
613 continue;
614
615 /* Keep track of the maximum generation number. This might
616 not be the generation counter. */
617 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
618 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
619
620 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
621 dtv[map->l_tls_modid].pointer.to_free = NULL;
622
623 if (map->l_tls_offset == NO_TLS_OFFSET
624 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
625 continue;
626
627 assert (map->l_tls_modid == total + cnt);
628 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
629#if TLS_TCB_AT_TP
630 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
631 dest = (char *) result - map->l_tls_offset;
632#elif TLS_DTV_AT_TP
633 dest = (char *) result + map->l_tls_offset;
634#else
635# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
636#endif
637
638 /* Set up the DTV entry. The simplified __tls_get_addr that
639 some platforms use in static programs requires it. */
640 dtv[map->l_tls_modid].pointer.val = dest;
641
642 /* Copy the initialization image and clear the BSS part.
643 For audit modules or dependencies with initial-exec TLS,
644 we can not set the initial TLS image on default loader
645 initialization because it would already be set by the
646 audit setup, which uses the dlopen code and already
647 clears l_need_tls_init. Calls with !main_thread from
648 pthread_create need to initialze TLS for the current
649 thread regardless of namespace. */
650 if (map->l_ns != LM_ID_BASE && main_thread)
651 continue;
652 memset (__mempcpy (dest, map->l_tls_initimage,
653 map->l_tls_initimage_size), '\0',
654 map->l_tls_blocksize - map->l_tls_initimage_size);
655 if (main_thread)
656 map->l_need_tls_init = 0;
657 }
658
659 total += cnt;
660 if (total > GL(dl_tls_max_dtv_idx))
661 break;
662
663 listp = listp->next;
664 assert (listp != NULL);
665 }
666 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
667
668 /* The DTV version is up-to-date now. */
669 dtv[0].counter = maxgen;
670
671 return result;
672}
673rtld_hidden_def (_dl_allocate_tls_init)
674
675void *
676_dl_allocate_tls (void *mem)
677{
678 return _dl_allocate_tls_init (result: mem == NULL
679 ? _dl_allocate_tls_storage ()
680 : allocate_dtv (result: mem), false);
681}
682rtld_hidden_def (_dl_allocate_tls)
683
684
685void
686_dl_deallocate_tls (void *tcb, bool dealloc_tcb)
687{
688 dtv_t *dtv = GET_DTV (tcb);
689
690 /* We need to free the memory allocated for non-static TLS. */
691 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
692 free (ptr: dtv[1 + cnt].pointer.to_free);
693
694 /* The array starts with dtv[-1]. */
695 if (dtv != GL(dl_initial_dtv))
696 free (ptr: dtv - 1);
697
698 if (dealloc_tcb)
699 free (ptr: *tcb_to_pointer_to_free_location (tcb));
700}
701rtld_hidden_def (_dl_deallocate_tls)
702
703
704#ifdef SHARED
705/* The __tls_get_addr function has two basic forms which differ in the
706 arguments. The IA-64 form takes two parameters, the module ID and
707 offset. The form used, among others, on IA-32 takes a reference to
708 a special structure which contain the same information. The second
709 form seems to be more often used (in the moment) so we default to
710 it. Users of the IA-64 form have to provide adequate definitions
711 of the following macros. */
712# ifndef GET_ADDR_ARGS
713# define GET_ADDR_ARGS tls_index *ti
714# define GET_ADDR_PARAM ti
715# endif
716# ifndef GET_ADDR_MODULE
717# define GET_ADDR_MODULE ti->ti_module
718# endif
719# ifndef GET_ADDR_OFFSET
720# define GET_ADDR_OFFSET ti->ti_offset
721# endif
722
723/* Allocate one DTV entry. */
724static struct dtv_pointer
725allocate_dtv_entry (size_t alignment, size_t size)
726{
727 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
728 {
729 /* The alignment is supported by malloc. */
730 _dl_tls_allocate_begin ();
731 void *ptr = malloc (size);
732 _dl_tls_allocate_end ();
733 return (struct dtv_pointer) { ptr, ptr };
734 }
735
736 /* Emulate memalign to by manually aligning a pointer returned by
737 malloc. First compute the size with an overflow check. */
738 size_t alloc_size = size + alignment;
739 if (alloc_size < size)
740 return (struct dtv_pointer) {};
741
742 /* Perform the allocation. This is the pointer we need to free
743 later. */
744 _dl_tls_allocate_begin ();
745 void *start = malloc (alloc_size);
746 _dl_tls_allocate_end ();
747
748 if (start == NULL)
749 return (struct dtv_pointer) {};
750
751 /* Find the aligned position within the larger allocation. */
752 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
753
754 return (struct dtv_pointer) { .val = aligned, .to_free = start };
755}
756
757static struct dtv_pointer
758allocate_and_init (struct link_map *map)
759{
760 struct dtv_pointer result = allocate_dtv_entry
761 (map->l_tls_align, map->l_tls_blocksize);
762 if (result.val == NULL)
763 oom ();
764
765 /* Initialize the memory. */
766 memset (__mempcpy (result.val, map->l_tls_initimage,
767 map->l_tls_initimage_size),
768 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
769
770 return result;
771}
772
773
774struct link_map *
775_dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
776{
777 struct link_map *the_map = NULL;
778 dtv_t *dtv = THREAD_DTV ();
779
780 /* CONCURRENCY NOTES:
781
782 The global dl_tls_dtv_slotinfo_list array contains for each module
783 index the generation counter current when that entry was updated.
784 This array never shrinks so that all module indices which were
785 valid at some time can be used to access it. Concurrent loading
786 and unloading of modules can update slotinfo entries or extend
787 the array. The updates happen under the GL(dl_load_tls_lock) and
788 finish with the release store of the generation counter to
789 GL(dl_tls_generation) which is synchronized with the load of
790 new_gen in the caller. So updates up to new_gen are synchronized
791 but updates for later generations may not be.
792
793 Here we update the thread dtv from old_gen (== dtv[0].counter) to
794 new_gen generation. For this, each dtv[i] entry is either set to
795 an unallocated state (set), or left unmodified (nop). Where (set)
796 may resize the dtv first if modid i >= dtv[-1].counter. The rules
797 for the decision between (set) and (nop) are
798
799 (1) If slotinfo entry i is concurrently updated then either (set)
800 or (nop) is valid: TLS access cannot use dtv[i] unless it is
801 synchronized with a generation > new_gen.
802
803 Otherwise, if the generation of slotinfo entry i is gen and the
804 loaded module for this entry is map then
805
806 (2) If gen <= old_gen then do (nop).
807
808 (3) If old_gen < gen <= new_gen then
809 (3.1) if map != 0 then (set)
810 (3.2) if map == 0 then either (set) or (nop).
811
812 Note that (1) cannot be reliably detected, but since both actions
813 are valid it does not have to be. Only (2) and (3.1) cases need
814 to be distinguished for which relaxed mo access of gen and map is
815 enough: their value is synchronized when it matters.
816
817 Note that a relaxed mo load may give an out-of-thin-air value since
818 it is used in decisions that can affect concurrent stores. But this
819 should only happen if the OOTA value causes UB that justifies the
820 concurrent store of the value. This is not expected to be an issue
821 in practice. */
822 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
823
824 if (dtv[0].counter < new_gen)
825 {
826 size_t total = 0;
827 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
828 assert (max_modid >= req_modid);
829
830 /* We have to look through the entire dtv slotinfo list. */
831 listp = GL(dl_tls_dtv_slotinfo_list);
832 do
833 {
834 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
835 {
836 size_t modid = total + cnt;
837
838 /* Case (1) for all later modids. */
839 if (modid > max_modid)
840 break;
841
842 size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
843
844 /* Case (1). */
845 if (gen > new_gen)
846 continue;
847
848 /* Case (2) or (1). */
849 if (gen <= dtv[0].counter)
850 continue;
851
852 /* Case (3) or (1). */
853
854 /* If there is no map this means the entry is empty. */
855 struct link_map *map
856 = atomic_load_relaxed (&listp->slotinfo[cnt].map);
857 /* Check whether the current dtv array is large enough. */
858 if (dtv[-1].counter < modid)
859 {
860 /* Case (3.2) or (1). */
861 if (map == NULL)
862 continue;
863
864 /* Resizing the dtv aborts on failure: bug 16134. */
865 dtv = _dl_resize_dtv (dtv, max_modid);
866
867 assert (modid <= dtv[-1].counter);
868
869 /* Install this new dtv in the thread data
870 structures. */
871 INSTALL_NEW_DTV (dtv);
872 }
873
874 /* If there is currently memory allocate for this
875 dtv entry free it. Note: this is not AS-safe. */
876 /* XXX Ideally we will at some point create a memory
877 pool. */
878 /* Avoid calling free on a null pointer. Some mallocs
879 incorrectly use dynamic TLS, and depending on how the
880 free function was compiled, it could call
881 __tls_get_addr before the null pointer check in the
882 free implementation. Checking here papers over at
883 least some dynamic TLS usage by interposed mallocs. */
884 if (dtv[modid].pointer.to_free != NULL)
885 {
886 _dl_tls_allocate_begin ();
887 free (dtv[modid].pointer.to_free);
888 _dl_tls_allocate_end ();
889 }
890 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
891 dtv[modid].pointer.to_free = NULL;
892
893 if (modid == req_modid)
894 the_map = map;
895 }
896
897 total += listp->len;
898 if (total > max_modid)
899 break;
900
901 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
902 be consume MO since we only need to order the accesses to
903 the next node after the read of the address and on most
904 hardware (other than alpha) a normal load would do that
905 because of the address dependency. */
906 listp = atomic_load_acquire (&listp->next);
907 }
908 while (listp != NULL);
909
910 /* This will be the new maximum generation counter. */
911 dtv[0].counter = new_gen;
912 }
913
914 return the_map;
915}
916
917
918static void *
919__attribute_noinline__
920tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
921{
922 /* The allocation was deferred. Do it now. */
923 if (the_map == NULL)
924 {
925 /* Find the link map for this module. */
926 size_t idx = GET_ADDR_MODULE;
927 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
928
929 while (idx >= listp->len)
930 {
931 idx -= listp->len;
932 listp = listp->next;
933 }
934
935 the_map = listp->slotinfo[idx].map;
936 }
937
938 /* Make sure that, if a dlopen running in parallel forces the
939 variable into static storage, we'll wait until the address in the
940 static TLS block is set up, and use that. If we're undecided
941 yet, make sure we make the decision holding the lock as well. */
942 if (__glibc_unlikely (the_map->l_tls_offset
943 != FORCED_DYNAMIC_TLS_OFFSET))
944 {
945 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
946 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
947 {
948 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
949 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
950 }
951 else if (__glibc_likely (the_map->l_tls_offset
952 != FORCED_DYNAMIC_TLS_OFFSET))
953 {
954#if TLS_TCB_AT_TP
955 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
956#elif TLS_DTV_AT_TP
957 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
958#else
959# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
960#endif
961 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
962
963 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
964 dtv[GET_ADDR_MODULE].pointer.val = p;
965
966 return (char *) p + GET_ADDR_OFFSET;
967 }
968 else
969 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
970 }
971 struct dtv_pointer result = allocate_and_init (the_map);
972 dtv[GET_ADDR_MODULE].pointer = result;
973 assert (result.to_free != NULL);
974
975 return (char *) result.val + GET_ADDR_OFFSET;
976}
977
978
979static struct link_map *
980__attribute_noinline__
981update_get_addr (GET_ADDR_ARGS, size_t gen)
982{
983 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
984 dtv_t *dtv = THREAD_DTV ();
985
986 void *p = dtv[GET_ADDR_MODULE].pointer.val;
987
988 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
989 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
990
991 return (void *) p + GET_ADDR_OFFSET;
992}
993
994/* For all machines that have a non-macro version of __tls_get_addr, we
995 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
996 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
997 in ld.so for __tls_get_addr. */
998
999#ifndef __tls_get_addr
1000extern void * __tls_get_addr (GET_ADDR_ARGS);
1001rtld_hidden_proto (__tls_get_addr)
1002rtld_hidden_def (__tls_get_addr)
1003#endif
1004
1005/* The generic dynamic and local dynamic model cannot be used in
1006 statically linked applications. */
1007void *
1008__tls_get_addr (GET_ADDR_ARGS)
1009{
1010 dtv_t *dtv = THREAD_DTV ();
1011
1012 /* Update is needed if dtv[0].counter < the generation of the accessed
1013 module, but the global generation counter is easier to check (which
1014 must be synchronized up to the generation of the accessed module by
1015 user code doing the TLS access so relaxed mo read is enough). */
1016 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
1017 if (__glibc_unlikely (dtv[0].counter != gen))
1018 {
1019 if (_dl_tls_allocate_active ()
1020 && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)
1021 /* This is a reentrant __tls_get_addr call, but we can
1022 satisfy it because it's an initially-loaded module ID.
1023 These TLS slotinfo slots do not change, so the
1024 out-of-date generation counter does not matter. However,
1025 if not in a TLS update, still update_get_addr below, to
1026 get off the slow path eventually. */
1027 ;
1028 else
1029 {
1030 /* Update DTV up to the global generation, see CONCURRENCY NOTES
1031 in _dl_update_slotinfo. */
1032 gen = atomic_load_acquire (&GL(dl_tls_generation));
1033 return update_get_addr (GET_ADDR_PARAM, gen);
1034 }
1035 }
1036
1037 void *p = dtv[GET_ADDR_MODULE].pointer.val;
1038
1039 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
1040 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
1041
1042 return (char *) p + GET_ADDR_OFFSET;
1043}
1044#endif /* SHARED */
1045
1046
1047/* Look up the module's TLS block as for __tls_get_addr,
1048 but never touch anything. Return null if it's not allocated yet. */
1049void *
1050_dl_tls_get_addr_soft (struct link_map *l)
1051{
1052 if (__glibc_unlikely (l->l_tls_modid == 0))
1053 /* This module has no TLS segment. */
1054 return NULL;
1055
1056 dtv_t *dtv = THREAD_DTV ();
1057 /* This may be called without holding the GL(dl_load_tls_lock). Reading
1058 arbitrary gen value is fine since this is best effort code. */
1059 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
1060 if (__glibc_unlikely (dtv[0].counter != gen))
1061 {
1062 /* This thread's DTV is not completely current,
1063 but it might already cover this module. */
1064
1065 if (l->l_tls_modid >= dtv[-1].counter)
1066 /* Nope. */
1067 return NULL;
1068
1069 size_t idx = l->l_tls_modid;
1070 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
1071 while (idx >= listp->len)
1072 {
1073 idx -= listp->len;
1074 listp = listp->next;
1075 }
1076
1077 /* We've reached the slot for this module.
1078 If its generation counter is higher than the DTV's,
1079 this thread does not know about this module yet. */
1080 if (dtv[0].counter < listp->slotinfo[idx].gen)
1081 return NULL;
1082 }
1083
1084 void *data = dtv[l->l_tls_modid].pointer.val;
1085 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
1086 /* The DTV is current, but this thread has not yet needed
1087 to allocate this module's segment. */
1088 data = NULL;
1089
1090 return data;
1091}
1092
1093size_t _dl_tls_initial_modid_limit;
1094
1095void
1096_dl_tls_initial_modid_limit_setup (void)
1097{
1098 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
1099 size_t idx;
1100 for (idx = 0; idx < listp->len; ++idx)
1101 {
1102 struct link_map *l = listp->slotinfo[idx].map;
1103 if (l == NULL
1104 /* The object can be unloaded, so its modid can be
1105 reassociated. */
1106 || !(l->l_type == lt_executable || l->l_type == lt_library))
1107 break;
1108 }
1109 _dl_tls_initial_modid_limit = idx;
1110}
1111
1112
1113/* Add module to slot information data. If DO_ADD is false, only the
1114 required memory is allocated. Must be called with
1115 GL (dl_load_tls_lock) acquired. If the function has already been
1116 called for the link map L with !DO_ADD, then this function will not
1117 raise an exception, otherwise it is possible that it encounters a
1118 memory allocation failure.
1119
1120 Return false if L has already been added to the slotinfo data, or
1121 if L has no TLS data. If the returned value is true, L has been
1122 added with this call (DO_ADD), or has been added in a previous call
1123 (!DO_ADD).
1124
1125 The expected usage is as follows: Call _dl_add_to_slotinfo for
1126 several link maps with DO_ADD set to false, and record if any calls
1127 result in a true result. If there was a true result, call
1128 _dl_add_to_slotinfo again, this time with DO_ADD set to true. (For
1129 simplicity, it's possible to call the function for link maps where
1130 the previous result was false.) The return value from the second
1131 round of calls can be ignored. If there was true result initially,
1132 call _dl_update_slotinfo to update the TLS generation counter. */
1133bool
1134_dl_add_to_slotinfo (struct link_map *l, bool do_add)
1135{
1136 if (l->l_tls_blocksize == 0 || l->l_tls_in_slotinfo)
1137 return false;
1138
1139 /* Now that we know the object is loaded successfully add
1140 modules containing TLS data to the dtv info table. We
1141 might have to increase its size. */
1142 struct dtv_slotinfo_list *listp;
1143 struct dtv_slotinfo_list *prevp;
1144 size_t idx = l->l_tls_modid;
1145
1146 /* Find the place in the dtv slotinfo list. */
1147 listp = GL(dl_tls_dtv_slotinfo_list);
1148 prevp = NULL; /* Needed to shut up gcc. */
1149 do
1150 {
1151 /* Does it fit in the array of this list element? */
1152 if (idx < listp->len)
1153 break;
1154 idx -= listp->len;
1155 prevp = listp;
1156 listp = listp->next;
1157 }
1158 while (listp != NULL);
1159
1160 if (listp == NULL)
1161 {
1162 /* When we come here it means we have to add a new element
1163 to the slotinfo list. And the new module must be in
1164 the first slot. */
1165 assert (idx == 0);
1166
1167 _dl_tls_allocate_begin ();
1168 listp = (struct dtv_slotinfo_list *)
1169 malloc (size: sizeof (struct dtv_slotinfo_list)
1170 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1171 _dl_tls_allocate_end ();
1172 if (listp == NULL)
1173 {
1174 /* We ran out of memory while resizing the dtv slotinfo list. */
1175 _dl_signal_error (ENOMEM, object: "dlopen", NULL, N_("\
1176cannot create TLS data structures"));
1177 }
1178
1179 listp->len = TLS_SLOTINFO_SURPLUS;
1180 listp->next = NULL;
1181 memset (listp->slotinfo, '\0',
1182 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1183 /* Synchronize with _dl_update_slotinfo. */
1184 atomic_store_release (&prevp->next, listp);
1185 }
1186
1187 /* Add the information into the slotinfo data structure. */
1188 if (do_add)
1189 {
1190 /* Can be read concurrently. See _dl_update_slotinfo. */
1191 atomic_store_relaxed (&listp->slotinfo[idx].map, l);
1192 atomic_store_relaxed (&listp->slotinfo[idx].gen,
1193 GL(dl_tls_generation) + 1);
1194 l->l_tls_in_slotinfo = true;
1195 }
1196
1197 return true;
1198}
1199
1200#if PTHREAD_IN_LIBC
1201static inline void __attribute__((always_inline))
1202init_one_static_tls (struct pthread *curp, struct link_map *map)
1203{
1204# if TLS_TCB_AT_TP
1205 void *dest = (char *) curp - map->l_tls_offset;
1206# elif TLS_DTV_AT_TP
1207 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1208# else
1209# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1210# endif
1211
1212 /* Initialize the memory. */
1213 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1214 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1215}
1216
1217void
1218_dl_init_static_tls (struct link_map *map)
1219{
1220 lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1221
1222 /* Iterate over the list with system-allocated threads first. */
1223 list_t *runp;
1224 list_for_each (runp, &GL (dl_stack_used))
1225 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1226
1227 /* Now the list with threads using user-allocated stacks. */
1228 list_for_each (runp, &GL (dl_stack_user))
1229 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1230
1231 lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1232}
1233#endif /* PTHREAD_IN_LIBC */
1234

source code of glibc/elf/dl-tls.c