1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Unix specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Unix.h"
14#include "llvm/ADT/ScopeExit.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/Support/MemoryBuffer.h"
20#include "llvm/Support/raw_ostream.h"
21
22#if defined(__APPLE__)
23#include <mach/mach_init.h>
24#include <mach/mach_port.h>
25#include <pthread/qos.h>
26#include <sys/sysctl.h>
27#include <sys/types.h>
28#endif
29
30#include <pthread.h>
31
32#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34#endif
35
36// Must be included after Threading.inc to provide definition for llvm::thread
37// because FreeBSD's condvar.h (included by user.h) misuses the "thread"
38// keyword.
39#ifndef __FreeBSD__
40#include "llvm/Support/thread.h"
41#endif
42
43#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
44#include <errno.h>
45#include <sys/cpuset.h>
46#include <sys/sysctl.h>
47#include <sys/user.h>
48#include <unistd.h>
49#endif
50
51#if defined(__NetBSD__)
52#include <lwp.h> // For _lwp_self()
53#endif
54
55#if defined(__OpenBSD__)
56#include <unistd.h> // For getthrid()
57#endif
58
59#if defined(__linux__)
60#include <sched.h> // For sched_getaffinity
61#include <sys/syscall.h> // For syscall codes
62#include <unistd.h> // For syscall()
63#endif
64
65#if defined(__HAIKU__)
66#include <OS.h> // For B_OS_NAME_LENGTH
67#endif
68
69namespace llvm {
70pthread_t
71llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
72 std::optional<unsigned> StackSizeInBytes) {
73 int errnum;
74
75 // Construct the attributes object.
76 pthread_attr_t Attr;
77 if ((errnum = ::pthread_attr_init(attr: &Attr)) != 0) {
78 ReportErrnumFatal(Msg: "pthread_attr_init failed", errnum);
79 }
80
81 auto AttrGuard = llvm::make_scope_exit(F: [&] {
82 if ((errnum = ::pthread_attr_destroy(attr: &Attr)) != 0) {
83 ReportErrnumFatal(Msg: "pthread_attr_destroy failed", errnum);
84 }
85 });
86
87 // Set the requested stack size, if given.
88 if (StackSizeInBytes) {
89 if ((errnum = ::pthread_attr_setstacksize(attr: &Attr, stacksize: *StackSizeInBytes)) != 0) {
90 ReportErrnumFatal(Msg: "pthread_attr_setstacksize failed", errnum);
91 }
92 }
93
94 // Construct and execute the thread.
95 pthread_t Thread;
96 if ((errnum = ::pthread_create(newthread: &Thread, attr: &Attr, start_routine: ThreadFunc, arg: Arg)) != 0)
97 ReportErrnumFatal(Msg: "pthread_create failed", errnum);
98
99 return Thread;
100}
101
102void llvm_thread_detach_impl(pthread_t Thread) {
103 int errnum;
104
105 if ((errnum = ::pthread_detach(th: Thread)) != 0) {
106 ReportErrnumFatal(Msg: "pthread_detach failed", errnum);
107 }
108}
109
110void llvm_thread_join_impl(pthread_t Thread) {
111 int errnum;
112
113 if ((errnum = ::pthread_join(th: Thread, thread_return: nullptr)) != 0) {
114 ReportErrnumFatal(Msg: "pthread_join failed", errnum);
115 }
116}
117
118pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
119
120pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
121
122} // namespace llvm
123
124uint64_t llvm::get_threadid() {
125#if defined(__APPLE__)
126 // Calling "mach_thread_self()" bumps the reference count on the thread
127 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
128 // count.
129 static thread_local thread_port_t Self = [] {
130 thread_port_t InitSelf = mach_thread_self();
131 mach_port_deallocate(mach_task_self(), Self);
132 return InitSelf;
133 }();
134 return Self;
135#elif defined(__FreeBSD__) || defined(__DragonFly__)
136 return uint64_t(pthread_getthreadid_np());
137#elif defined(__NetBSD__)
138 return uint64_t(_lwp_self());
139#elif defined(__OpenBSD__)
140 return uint64_t(getthrid());
141#elif defined(__ANDROID__)
142 return uint64_t(gettid());
143#elif defined(__linux__)
144 return uint64_t(syscall(__NR_gettid));
145#elif defined(_AIX)
146 return uint64_t(thread_self());
147#else
148 return uint64_t(pthread_self());
149#endif
150}
151
152static constexpr uint32_t get_max_thread_name_length_impl() {
153#if defined(PTHREAD_MAX_NAMELEN_NP)
154 return PTHREAD_MAX_NAMELEN_NP;
155#elif defined(__HAIKU__)
156 return B_OS_NAME_LENGTH;
157#elif defined(__APPLE__)
158 return 64;
159#elif defined(__sun__) && defined(__svr4__)
160 return 31;
161#elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP
162 return 16;
163#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
164 defined(__DragonFly__)
165 return 16;
166#elif defined(__OpenBSD__)
167 return 24;
168#else
169 return 0;
170#endif
171}
172
173uint32_t llvm::get_max_thread_name_length() {
174 return get_max_thread_name_length_impl();
175}
176
177void llvm::set_thread_name(const Twine &Name) {
178 // Make sure the input is null terminated.
179 SmallString<64> Storage;
180 StringRef NameStr = Name.toNullTerminatedStringRef(Out&: Storage);
181
182 // Truncate from the beginning, not the end, if the specified name is too
183 // long. For one, this ensures that the resulting string is still null
184 // terminated, but additionally the end of a long thread name will usually
185 // be more unique than the beginning, since a common pattern is for similar
186 // threads to share a common prefix.
187 // Note that the name length includes the null terminator.
188 if (get_max_thread_name_length() > 0)
189 NameStr = NameStr.take_back(N: get_max_thread_name_length() - 1);
190 (void)NameStr;
191#if defined(HAVE_PTHREAD_SET_NAME_NP)
192 ::pthread_set_name_np(::pthread_self(), NameStr.data());
193#elif defined(HAVE_PTHREAD_SETNAME_NP)
194#if defined(__NetBSD__)
195 ::pthread_setname_np(::pthread_self(), "%s",
196 const_cast<char *>(NameStr.data()));
197#elif defined(__APPLE__)
198 ::pthread_setname_np(NameStr.data());
199#else
200 ::pthread_setname_np(target_thread: ::pthread_self(), name: NameStr.data());
201#endif
202#endif
203}
204
205void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
206 Name.clear();
207
208#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
209 int pid = ::getpid();
210 uint64_t tid = get_threadid();
211
212 struct kinfo_proc *kp = nullptr, *nkp;
213 size_t len = 0;
214 int error;
215 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
216 (int)pid};
217
218 while (1) {
219 error = sysctl(ctl, 4, kp, &len, nullptr, 0);
220 if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
221 // Add extra space in case threads are added before next call.
222 len += sizeof(*kp) + len / 10;
223 nkp = (struct kinfo_proc *)::realloc(kp, len);
224 if (nkp == nullptr) {
225 free(kp);
226 return;
227 }
228 kp = nkp;
229 continue;
230 }
231 if (error != 0)
232 len = 0;
233 break;
234 }
235
236 for (size_t i = 0; i < len / sizeof(*kp); i++) {
237 if (kp[i].ki_tid == (lwpid_t)tid) {
238 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
239 break;
240 }
241 }
242 free(kp);
243 return;
244#elif defined(__linux__) && HAVE_PTHREAD_GETNAME_NP
245 constexpr uint32_t len = get_max_thread_name_length_impl();
246 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
247 if (0 == ::pthread_getname_np(target_thread: ::pthread_self(), buf: Buffer, buflen: len))
248 Name.append(in_start: Buffer, in_end: Buffer + strlen(s: Buffer));
249#elif defined(HAVE_PTHREAD_GET_NAME_NP) && HAVE_PTHREAD_GET_NAME_NP
250 constexpr uint32_t len = get_max_thread_name_length_impl();
251 char buf[len];
252 ::pthread_get_name_np(::pthread_self(), buf, len);
253
254 Name.append(buf, buf + strlen(buf));
255
256#elif defined(HAVE_PTHREAD_GETNAME_NP) && HAVE_PTHREAD_GETNAME_NP
257 constexpr uint32_t len = get_max_thread_name_length_impl();
258 char buf[len];
259 ::pthread_getname_np(::pthread_self(), buf, len);
260
261 Name.append(buf, buf + strlen(buf));
262#endif
263}
264
265SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
266#if defined(__linux__) && defined(SCHED_IDLE)
267 // Some *really* old glibcs are missing SCHED_IDLE.
268 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
269 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
270 sched_param priority;
271 // For each of the above policies, param->sched_priority must be 0.
272 priority.sched_priority = 0;
273 // SCHED_IDLE for running very low priority background jobs.
274 // SCHED_OTHER the standard round-robin time-sharing policy;
275 return !pthread_setschedparam(
276 target_thread: pthread_self(),
277 // FIXME: consider SCHED_BATCH for Low
278 policy: Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
279 param: &priority)
280 ? SetThreadPriorityResult::SUCCESS
281 : SetThreadPriorityResult::FAILURE;
282#elif defined(__APPLE__)
283 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
284 //
285 // Background - Applies to work that isn’t visible to the user and may take
286 // significant time to complete. Examples include indexing, backing up, or
287 // synchronizing data. This class emphasizes energy efficiency.
288 //
289 // Utility - Applies to work that takes anywhere from a few seconds to a few
290 // minutes to complete. Examples include downloading a document or importing
291 // data. This class offers a balance between responsiveness, performance, and
292 // energy efficiency.
293 const auto qosClass = [&]() {
294 switch (Priority) {
295 case ThreadPriority::Background:
296 return QOS_CLASS_BACKGROUND;
297 case ThreadPriority::Low:
298 return QOS_CLASS_UTILITY;
299 case ThreadPriority::Default:
300 return QOS_CLASS_DEFAULT;
301 }
302 }();
303 return !pthread_set_qos_class_self_np(qosClass, 0)
304 ? SetThreadPriorityResult::SUCCESS
305 : SetThreadPriorityResult::FAILURE;
306#endif
307 return SetThreadPriorityResult::FAILURE;
308}
309
310#include <thread>
311
312static int computeHostNumHardwareThreads() {
313#if defined(__FreeBSD__)
314 cpuset_t mask;
315 CPU_ZERO(&mask);
316 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
317 &mask) == 0)
318 return CPU_COUNT(&mask);
319#elif defined(__linux__)
320 cpu_set_t Set;
321 CPU_ZERO(&Set);
322 if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Set), cpuset: &Set) == 0)
323 return CPU_COUNT(&Set);
324#endif
325 // Guard against std::thread::hardware_concurrency() returning 0.
326 if (unsigned Val = std::thread::hardware_concurrency())
327 return Val;
328 return 1;
329}
330
331void llvm::ThreadPoolStrategy::apply_thread_strategy(
332 unsigned ThreadPoolNum) const {}
333
334llvm::BitVector llvm::get_thread_affinity_mask() {
335 // FIXME: Implement
336 llvm_unreachable("Not implemented!");
337}
338
339unsigned llvm::get_cpus() { return 1; }
340
341#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
342// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
343// using the number of unique physical/core id pairs. The following
344// implementation reads the /proc/cpuinfo format on an x86_64 system.
345static int computeHostNumPhysicalCores() {
346 // Enabled represents the number of physical id/core id pairs with at least
347 // one processor id enabled by the CPU affinity mask.
348 cpu_set_t Affinity, Enabled;
349 if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Affinity), cpuset: &Affinity) != 0)
350 return -1;
351 CPU_ZERO(&Enabled);
352
353 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
354 // mmapped because it appears to have 0 size.
355 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
356 llvm::MemoryBuffer::getFileAsStream(Filename: "/proc/cpuinfo");
357 if (std::error_code EC = Text.getError()) {
358 llvm::errs() << "Can't read "
359 << "/proc/cpuinfo: " << EC.message() << "\n";
360 return -1;
361 }
362 SmallVector<StringRef, 8> strs;
363 (*Text)->getBuffer().split(A&: strs, Separator: "\n", /*MaxSplit=*/MaxSplit: -1,
364 /*KeepEmpty=*/KeepEmpty: false);
365 int CurProcessor = -1;
366 int CurPhysicalId = -1;
367 int CurSiblings = -1;
368 int CurCoreId = -1;
369 for (StringRef Line : strs) {
370 std::pair<StringRef, StringRef> Data = Line.split(Separator: ':');
371 auto Name = Data.first.trim();
372 auto Val = Data.second.trim();
373 // These fields are available if the kernel is configured with CONFIG_SMP.
374 if (Name == "processor")
375 Val.getAsInteger(Radix: 10, Result&: CurProcessor);
376 else if (Name == "physical id")
377 Val.getAsInteger(Radix: 10, Result&: CurPhysicalId);
378 else if (Name == "siblings")
379 Val.getAsInteger(Radix: 10, Result&: CurSiblings);
380 else if (Name == "core id") {
381 Val.getAsInteger(Radix: 10, Result&: CurCoreId);
382 // The processor id corresponds to an index into cpu_set_t.
383 if (CPU_ISSET(CurProcessor, &Affinity))
384 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
385 }
386 }
387 return CPU_COUNT(&Enabled);
388}
389#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
390static int computeHostNumPhysicalCores() {
391 return sysconf(_SC_NPROCESSORS_ONLN);
392}
393#elif defined(__linux__)
394static int computeHostNumPhysicalCores() {
395 cpu_set_t Affinity;
396 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
397 return CPU_COUNT(&Affinity);
398
399 // The call to sched_getaffinity() may have failed because the Affinity
400 // mask is too small for the number of CPU's on the system (i.e. the
401 // system has more than 1024 CPUs). Allocate a mask large enough for
402 // twice as many CPUs.
403 cpu_set_t *DynAffinity;
404 DynAffinity = CPU_ALLOC(2048);
405 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
406 int NumCPUs = CPU_COUNT(DynAffinity);
407 CPU_FREE(DynAffinity);
408 return NumCPUs;
409 }
410 return -1;
411}
412#elif defined(__APPLE__)
413// Gets the number of *physical cores* on the machine.
414static int computeHostNumPhysicalCores() {
415 uint32_t count;
416 size_t len = sizeof(count);
417 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
418 if (count < 1) {
419 int nm[2];
420 nm[0] = CTL_HW;
421 nm[1] = HW_AVAILCPU;
422 sysctl(nm, 2, &count, &len, NULL, 0);
423 if (count < 1)
424 return -1;
425 }
426 return count;
427}
428#elif defined(__MVS__)
429static int computeHostNumPhysicalCores() {
430 enum {
431 // Byte offset of the pointer to the Communications Vector Table (CVT) in
432 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
433 // will be zero-extended to uintptr_t.
434 FLCCVT = 16,
435 // Byte offset of the pointer to the Common System Data Area (CSD) in the
436 // CVT. The table entry is a 31-bit pointer and will be zero-extended to
437 // uintptr_t.
438 CVTCSD = 660,
439 // Byte offset to the number of live CPs in the LPAR, stored as a signed
440 // 32-bit value in the table.
441 CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
442 };
443 char *PSA = 0;
444 char *CVT = reinterpret_cast<char *>(
445 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
446 char *CSD = reinterpret_cast<char *>(
447 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
448 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
449}
450#else
451// On other systems, return -1 to indicate unknown.
452static int computeHostNumPhysicalCores() { return -1; }
453#endif
454
455int llvm::get_physical_cores() {
456 static int NumCores = computeHostNumPhysicalCores();
457 return NumCores;
458}
459

source code of llvm/lib/Support/Unix/Threading.inc