1//===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/threads/thread.h"
10#include "config/linux/app.h"
11#include "src/__support/CPP/atomic.h"
12#include "src/__support/CPP/string_view.h"
13#include "src/__support/CPP/stringstream.h"
14#include "src/__support/OSUtil/syscall.h" // For syscall functions.
15#include "src/__support/common.h"
16#include "src/__support/error_or.h"
17#include "src/__support/threads/linux/futex_word.h" // For FutexWordType
18#include "src/errno/libc_errno.h" // For error macros
19
20#ifdef LIBC_TARGET_ARCH_IS_AARCH64
21#include <arm_acle.h>
22#endif
23
24#include <fcntl.h>
25#include <linux/futex.h>
26#include <linux/param.h> // For EXEC_PAGESIZE.
27#include <linux/prctl.h> // For PR_SET_NAME
28#include <linux/sched.h> // For CLONE_* flags.
29#include <stdint.h>
30#include <sys/mman.h> // For PROT_* and MAP_* definitions.
31#include <sys/syscall.h> // For syscall numbers.
32
33namespace LIBC_NAMESPACE {
34
35#ifdef SYS_mmap2
36static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
37#elif defined(SYS_mmap)
38static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
39#else
40#error "mmap or mmap2 syscalls not available."
41#endif
42
43static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
44static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
45static constexpr unsigned CLONE_SYSCALL_FLAGS =
46 CLONE_VM // Share the memory space with the parent.
47 | CLONE_FS // Share the file system with the parent.
48 | CLONE_FILES // Share the files with the parent.
49 | CLONE_SIGHAND // Share the signal handlers with the parent.
50 | CLONE_THREAD // Same thread group as the parent.
51 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
52 // values
53 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent.
54 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
55 // wake the joining thread.
56 | CLONE_SETTLS; // Setup the thread pointer of the new thread.
57
58#ifdef LIBC_TARGET_ARCH_IS_AARCH64
59#define CLONE_RESULT_REGISTER "x0"
60#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
61#define CLONE_RESULT_REGISTER "t0"
62#elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63#define CLONE_RESULT_REGISTER "rax"
64#else
65#error "CLONE_RESULT_REGISTER not defined for your target architecture"
66#endif
67
68static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
69 if (lhs > SIZE_MAX - rhs)
70 return Error{EINVAL};
71 if (rhs > SIZE_MAX - lhs)
72 return Error{EINVAL};
73 return lhs + rhs;
74}
75
76static constexpr ErrorOr<size_t> round_to_page(size_t v) {
77 auto vp_or_err = add_no_overflow(lhs: v, EXEC_PAGESIZE - 1);
78 if (!vp_or_err)
79 return vp_or_err;
80
81 return vp_or_err.value() & -EXEC_PAGESIZE;
82}
83
84LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
85
86 // Guard needs to be mapped with PROT_NONE
87 int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
88 auto size_or_err = add_no_overflow(lhs: stacksize, rhs: guardsize);
89 if (!size_or_err)
90 return Error{int(size_or_err.error())};
91 size_t size = size_or_err.value();
92
93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
94 // future-proof.
95 long mmap_result = LIBC_NAMESPACE::syscall_impl<long>(
96 number: MMAP_SYSCALL_NUMBER,
97 ts: 0, // No special address
98 ts: size, ts: prot,
99 MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
100 ts: -1, // Not backed by any file
101 ts: 0 // No offset
102 );
103 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
104 return Error{int(-mmap_result)};
105
106 if (guardsize) {
107 // Give read/write permissions to actual stack.
108 // TODO: We are assuming stack growsdown here.
109 long result = LIBC_NAMESPACE::syscall_impl<long>(
110 SYS_mprotect, ts: mmap_result + guardsize, ts: stacksize,
111 PROT_READ | PROT_WRITE);
112
113 if (result != 0)
114 return Error{int(-result)};
115 }
116 mmap_result += guardsize;
117 return reinterpret_cast<void *>(mmap_result);
118}
119
120// This must always be inlined as we may be freeing the calling threads stack in
121// which case a normal return from the top the stack would cause an invalid
122// memory read.
123[[gnu::always_inline]] LIBC_INLINE void
124free_stack(void *stack, size_t stacksize, size_t guardsize) {
125 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
126 stackaddr -= guardsize;
127 stack = reinterpret_cast<void *>(stackaddr);
128 LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, ts: stack, ts: stacksize + guardsize);
129}
130
131struct Thread;
132
133// We align the start args to 16-byte boundary as we adjust the allocated
134// stack memory with its size. We want the adjusted address to be at a
135// 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136// If different architecture in future requires higher alignment, then we
137// can add a platform specific alignment spec.
138struct alignas(STACK_ALIGNMENT) StartArgs {
139 ThreadAttributes *thread_attrib;
140 ThreadRunner runner;
141 void *arg;
142};
143
144// This must always be inlined as we may be freeing the calling threads stack in
145// which case a normal return from the top the stack would cause an invalid
146// memory read.
147[[gnu::always_inline]] LIBC_INLINE void
148cleanup_thread_resources(ThreadAttributes *attrib) {
149 // Cleanup the TLS before the stack as the TLS information is stored on
150 // the stack.
151 cleanup_tls(tls_addr: attrib->tls, tls_size: attrib->tls_size);
152 if (attrib->owned_stack)
153 free_stack(stack: attrib->stack, stacksize: attrib->stacksize, guardsize: attrib->guardsize);
154}
155
156[[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
157// NOTE: For __builtin_frame_address to work reliably across compilers,
158// architectures and various optimization levels, the TU including this file
159// should be compiled with -fno-omit-frame-pointer.
160#ifdef LIBC_TARGET_ARCH_IS_X86_64
161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162 // The x86_64 call instruction pushes resume address on to the stack.
163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164 // on to the stack. So, we have to step past two 64-bit values to get
165 // to the start args.
166 + sizeof(uintptr_t) * 2;
167#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168 // The frame pointer after cloning the new thread in the Thread::run method
169 // is set to the stack pointer where start args are stored. So, we fetch
170 // from there.
171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
173 // The current frame pointer is the previous stack pointer where the start
174 // args are stored.
175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
176#endif
177}
178
179[[gnu::noinline]] void start_thread() {
180 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
181 auto *attrib = start_args->thread_attrib;
182 self.attrib = attrib;
183 self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
184
185 if (attrib->style == ThreadStyle::POSIX) {
186 attrib->retval.posix_retval =
187 start_args->runner.posix_runner(start_args->arg);
188 thread_exit(retval: ThreadReturnValue(attrib->retval.posix_retval),
189 style: ThreadStyle::POSIX);
190 } else {
191 attrib->retval.stdc_retval =
192 start_args->runner.stdc_runner(start_args->arg);
193 thread_exit(retval: ThreadReturnValue(attrib->retval.stdc_retval),
194 style: ThreadStyle::STDC);
195 }
196}
197
198int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
199 size_t stacksize, size_t guardsize, bool detached) {
200 bool owned_stack = false;
201 if (stack == nullptr) {
202 // TODO: Should we return EINVAL here? Should we have a generic concept of a
203 // minimum stacksize (like 16384 for pthread).
204 if (stacksize == 0)
205 stacksize = DEFAULT_STACKSIZE;
206 // Roundup stacksize/guardsize to page size.
207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
208 // meta data?
209 auto round_or_err = round_to_page(v: guardsize);
210 if (!round_or_err)
211 return round_or_err.error();
212 guardsize = round_or_err.value();
213
214 round_or_err = round_to_page(v: stacksize);
215 if (!round_or_err)
216 return round_or_err.error();
217
218 stacksize = round_or_err.value();
219 auto alloc = alloc_stack(stacksize, guardsize);
220 if (!alloc)
221 return alloc.error();
222 else
223 stack = alloc.value();
224 owned_stack = true;
225 }
226
227 // Validate that stack/stacksize are validly aligned.
228 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
229 if ((stackaddr % STACK_ALIGNMENT != 0) ||
230 ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
231 if (owned_stack)
232 free_stack(stack, stacksize, guardsize);
233 return EINVAL;
234 }
235
236 TLSDescriptor tls;
237 init_tls(tls);
238
239 // When the new thread is spawned by the kernel, the new thread gets the
240 // stack we pass to the clone syscall. However, this stack is empty and does
241 // not have any local vars present in this function. Hence, one cannot
242 // pass arguments to the thread start function, or use any local vars from
243 // here. So, we pack them into the new stack from where the thread can sniff
244 // them out.
245 //
246 // Likewise, the actual thread state information is also stored on the
247 // stack memory.
248
249 static constexpr size_t INTERNAL_STACK_DATA_SIZE =
250 sizeof(StartArgs) + sizeof(ThreadAttributes) +
251 sizeof(cpp::Atomic<FutexWordType>);
252
253 // This is pretty arbitrary, but at the moment we don't adjust user provided
254 // stacksize (or default) to account for this data as its assumed minimal. If
255 // this assert starts failing we probably should. Likewise if we can't bound
256 // this we may overflow when we subtract it from the top of the stack.
257 static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
258
259 // TODO: We are assuming stack growsdown here.
260 auto adjusted_stack_or_err =
261 add_no_overflow(lhs: reinterpret_cast<uintptr_t>(stack), rhs: stacksize);
262 if (!adjusted_stack_or_err) {
263 cleanup_tls(tls_addr: tls.addr, tls_size: tls.size);
264 if (owned_stack)
265 free_stack(stack, stacksize, guardsize);
266 return adjusted_stack_or_err.error();
267 }
268
269 uintptr_t adjusted_stack =
270 adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
271 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
272
273 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
274
275 attrib =
276 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
277 attrib->style = style;
278 attrib->detach_state =
279 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
280 attrib->stack = stack;
281 attrib->stacksize = stacksize;
282 attrib->guardsize = guardsize;
283 attrib->owned_stack = owned_stack;
284 attrib->tls = tls.addr;
285 attrib->tls_size = tls.size;
286
287 start_args->thread_attrib = attrib;
288 start_args->runner = runner;
289 start_args->arg = arg;
290
291 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>(
292 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
293 clear_tid->val = CLEAR_TID_VALUE;
294 attrib->platform_data = clear_tid;
295
296 // The clone syscall takes arguments in an architecture specific order.
297 // Also, we want the result of the syscall to be in a register as the child
298 // thread gets a completely different stack after it is created. The stack
299 // variables from this function will not be availalbe to the child thread.
300#if defined(LIBC_TARGET_ARCH_IS_X86_64)
301 long register clone_result asm(CLONE_RESULT_REGISTER);
302 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
303 SYS_clone, ts: CLONE_SYSCALL_FLAGS, ts: adjusted_stack,
304 ts: &attrib->tid, // The address where the child tid is written
305 ts: &clear_tid->val, // The futex where the child thread status is signalled
306 ts: tls.tp // The thread pointer value for the new thread.
307 );
308#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
309 defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
310 long register clone_result asm(CLONE_RESULT_REGISTER);
311 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
312 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
313 &attrib->tid, // The address where the child tid is written
314 tls.tp, // The thread pointer value for the new thread.
315 &clear_tid->val // The futex where the child thread status is signalled
316 );
317#else
318#error "Unsupported architecture for the clone syscall."
319#endif
320
321 if (clone_result == 0) {
322#ifdef LIBC_TARGET_ARCH_IS_AARCH64
323 // We set the frame pointer to be the same as the "sp" so that start args
324 // can be sniffed out from start_thread.
325#ifdef __clang__
326 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
327 __arm_wsr64("x29", __arm_rsr64("sp"));
328#else
329 asm volatile("mov x29, sp");
330#endif
331#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
332 asm volatile("mv fp, sp");
333#endif
334 start_thread();
335 } else if (clone_result < 0) {
336 cleanup_thread_resources(attrib);
337 return static_cast<int>(-clone_result);
338 }
339
340 return 0;
341}
342
343int Thread::join(ThreadReturnValue &retval) {
344 wait();
345
346 if (attrib->style == ThreadStyle::POSIX)
347 retval.posix_retval = attrib->retval.posix_retval;
348 else
349 retval.stdc_retval = attrib->retval.stdc_retval;
350
351 cleanup_thread_resources(attrib);
352
353 return 0;
354}
355
356int Thread::detach() {
357 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
358 if (attrib->detach_state.compare_exchange_strong(
359 expected&: joinable_state, desired: uint32_t(DetachState::DETACHED))) {
360 return int(DetachType::SIMPLE);
361 }
362
363 // If the thread was already detached, then the detach method should not
364 // be called at all. If the thread is exiting, then we wait for it to exit
365 // and free up resources.
366 wait();
367
368 cleanup_thread_resources(attrib);
369
370 return int(DetachType::CLEANUP);
371}
372
373void Thread::wait() {
374 // The kernel should set the value at the clear tid address to zero.
375 // If not, it is a spurious wake and we should continue to wait on
376 // the futex.
377 auto *clear_tid =
378 reinterpret_cast<cpp::Atomic<FutexWordType> *>(attrib->platform_data);
379 while (clear_tid->load() != 0) {
380 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
381 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
382 LIBC_NAMESPACE::syscall_impl<long>(number: FUTEX_SYSCALL_ID, ts: &clear_tid->val,
383 FUTEX_WAIT, ts: CLEAR_TID_VALUE, ts: nullptr);
384 }
385}
386
387bool Thread::operator==(const Thread &thread) const {
388 return attrib->tid == thread.attrib->tid;
389}
390
391static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
392static constexpr size_t THREAD_NAME_PATH_SIZE =
393 THREAD_NAME_PATH_PREFIX.size() +
394 IntegerToString<int>::buffer_size() + // Size of tid
395 1 + // For '/' character
396 5; // For the file name "comm" and the nullterminator.
397
398static void construct_thread_name_file_path(cpp::StringStream &stream,
399 int tid) {
400 stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
401 << cpp::StringStream::ENDS;
402}
403
404int Thread::set_name(const cpp::string_view &name) {
405 if (name.size() >= NAME_SIZE_MAX)
406 return ERANGE;
407
408 if (*this == self) {
409 // If we are setting the name of the current thread, then we can
410 // use the syscall to set the name.
411 int retval =
412 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, ts: name.data());
413 if (retval < 0)
414 return -retval;
415 else
416 return 0;
417 }
418
419 char path_name_buffer[THREAD_NAME_PATH_SIZE];
420 cpp::StringStream path_stream(path_name_buffer);
421 construct_thread_name_file_path(stream&: path_stream, tid: attrib->tid);
422#ifdef SYS_open
423 int fd =
424 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, ts: path_name_buffer, O_RDWR);
425#else
426 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
427 path_name_buffer, O_RDWR);
428#endif
429 if (fd < 0)
430 return -fd;
431
432 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, ts: fd, ts: name.data(),
433 ts: name.size());
434 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, ts: fd);
435
436 if (retval < 0)
437 return -retval;
438 else if (retval != int(name.size()))
439 return EIO;
440 else
441 return 0;
442}
443
444int Thread::get_name(cpp::StringStream &name) const {
445 if (name.bufsize() < NAME_SIZE_MAX)
446 return ERANGE;
447
448 char name_buffer[NAME_SIZE_MAX];
449
450 if (*this == self) {
451 // If we are getting the name of the current thread, then we can
452 // use the syscall to get the name.
453 int retval =
454 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, ts: name_buffer);
455 if (retval < 0)
456 return -retval;
457 name << name_buffer << cpp::StringStream::ENDS;
458 return 0;
459 }
460
461 char path_name_buffer[THREAD_NAME_PATH_SIZE];
462 cpp::StringStream path_stream(path_name_buffer);
463 construct_thread_name_file_path(stream&: path_stream, tid: attrib->tid);
464#ifdef SYS_open
465 int fd =
466 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, ts: path_name_buffer, O_RDONLY);
467#else
468 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
469 path_name_buffer, O_RDONLY);
470#endif
471 if (fd < 0)
472 return -fd;
473
474 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, ts: fd, ts: name_buffer,
475 ts: NAME_SIZE_MAX);
476 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, ts: fd);
477 if (retval < 0)
478 return -retval;
479 if (retval == NAME_SIZE_MAX)
480 return ERANGE;
481 if (name_buffer[retval - 1] == '\n')
482 name_buffer[retval - 1] = '\0';
483 else
484 name_buffer[retval] = '\0';
485 name << name_buffer << cpp::StringStream::ENDS;
486 return 0;
487}
488
489void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
490 auto attrib = self.attrib;
491
492 // The very first thing we do is to call the thread's atexit callbacks.
493 // These callbacks could be the ones registered by the language runtimes,
494 // for example, the destructors of thread local objects. They can also
495 // be destructors of the TSS objects set using API like pthread_setspecific.
496 // NOTE: We cannot call the atexit callbacks as part of the
497 // cleanup_thread_resources function as that function can be called from a
498 // different thread. The destructors of thread local and TSS objects should
499 // be called by the thread which owns them.
500 internal::call_atexit_callbacks(attrib);
501
502 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
503 if (!attrib->detach_state.compare_exchange_strong(
504 expected&: joinable_state, desired: uint32_t(DetachState::EXITING))) {
505 // Thread is detached so cleanup the resources.
506 cleanup_thread_resources(attrib);
507
508 // Set the CLEAR_TID address to nullptr to prevent the kernel
509 // from signalling at a non-existent futex location.
510 LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, ts: 0);
511 // Return value for detached thread should be unused. We need to avoid
512 // referencing `style` or `retval.*` because they may be stored on the stack
513 // and we have deallocated our stack!
514 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, ts: 0);
515 __builtin_unreachable();
516 }
517
518 if (style == ThreadStyle::POSIX)
519 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, ts: retval.posix_retval);
520 else
521 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, ts: retval.stdc_retval);
522 __builtin_unreachable();
523}
524
525} // namespace LIBC_NAMESPACE
526

source code of libc/src/__support/threads/linux/thread.cpp