1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "thread.h"
5#include "sysinfo.h"
6#include "string.h"
7
8#include <iostream>
9#if defined(__ARM_NEON)
10#include "../simd/arm/emulation.h"
11#else
12#include <xmmintrin.h>
13#if defined(__EMSCRIPTEN__)
14#include "../simd/wasm/emulation.h"
15#endif
16#endif
17
18#if defined(PTHREADS_WIN32)
19#pragma comment (lib, "pthreadVC.lib")
20#endif
21
22////////////////////////////////////////////////////////////////////////////////
23/// Windows Platform
24////////////////////////////////////////////////////////////////////////////////
25
26#if defined(__WIN32__)
27
28#define WIN32_LEAN_AND_MEAN
29#include <windows.h>
30
31namespace embree
32{
33 /*! set the affinity of a given thread */
34 void setAffinity(HANDLE thread, ssize_t affinity)
35 {
36 typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
37 typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
38 typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
39 typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
40 HMODULE hlib = LoadLibrary("Kernel32");
41 GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
42 GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
43 SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
44 SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
45 if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
46 {
47 int groups = pGetActiveProcessorGroupCount();
48 int totalProcessors = 0, group = 0, number = 0;
49 for (int i = 0; i<groups; i++) {
50 int processors = pGetActiveProcessorCount(i);
51 if (totalProcessors + processors > affinity) {
52 group = i;
53 number = (int)affinity - totalProcessors;
54 break;
55 }
56 totalProcessors += processors;
57 }
58
59 GROUP_AFFINITY groupAffinity;
60 groupAffinity.Group = (WORD)group;
61 groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
62 groupAffinity.Reserved[0] = 0;
63 groupAffinity.Reserved[1] = 0;
64 groupAffinity.Reserved[2] = 0;
65 if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
66 WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
67
68 PROCESSOR_NUMBER processorNumber;
69 processorNumber.Group = group;
70 processorNumber.Number = number;
71 processorNumber.Reserved = 0;
72 if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
73 WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
74 }
75 else
76 {
77 if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
78 WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
79 if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
80 WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
81 }
82 }
83
84 /*! set affinity of the calling thread */
85 void setAffinity(ssize_t affinity) {
86 setAffinity(GetCurrentThread(), affinity);
87 }
88
89 struct ThreadStartupData
90 {
91 public:
92 ThreadStartupData (thread_func f, void* arg)
93 : f(f), arg(arg) {}
94 public:
95 thread_func f;
96 void* arg;
97 };
98
99 DWORD WINAPI threadStartup(LPVOID ptr)
100 {
101 ThreadStartupData* parg = (ThreadStartupData*) ptr;
102 _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
103 parg->f(parg->arg);
104 delete parg;
105 return 0;
106 }
107
108#if !defined(PTHREADS_WIN32)
109
110 /*! creates a hardware thread running on specific core */
111 thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
112 {
113 HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
114 if (thread == nullptr) FATAL("CreateThread failed");
115 if (threadID >= 0) setAffinity(thread, threadID);
116 return thread_t(thread);
117 }
118
119 /*! the thread calling this function gets yielded */
120 void yield() {
121 SwitchToThread();
122 }
123
124 /*! waits until the given thread has terminated */
125 void join(thread_t tid) {
126 WaitForSingleObject(HANDLE(tid), INFINITE);
127 CloseHandle(HANDLE(tid));
128 }
129
130 /*! destroy a hardware thread by its handle */
131 void destroyThread(thread_t tid) {
132 TerminateThread(HANDLE(tid),0);
133 CloseHandle(HANDLE(tid));
134 }
135
136 /*! creates thread local storage */
137 tls_t createTls() {
138 return tls_t(size_t(TlsAlloc()));
139 }
140
141 /*! set the thread local storage pointer */
142 void setTls(tls_t tls, void* const ptr) {
143 TlsSetValue(DWORD(size_t(tls)), ptr);
144 }
145
146 /*! return the thread local storage pointer */
147 void* getTls(tls_t tls) {
148 return TlsGetValue(DWORD(size_t(tls)));
149 }
150
151 /*! destroys thread local storage identifier */
152 void destroyTls(tls_t tls) {
153 TlsFree(DWORD(size_t(tls)));
154 }
155#endif
156}
157
158#endif
159
160////////////////////////////////////////////////////////////////////////////////
161/// Linux Platform
162////////////////////////////////////////////////////////////////////////////////
163
164#if defined(__LINUX__) && !defined(__ANDROID__)
165
166#include <fstream>
167#include <sstream>
168#include <algorithm>
169
170namespace embree
171{
172 static MutexSys mutex;
173 static std::vector<size_t> threadIDs;
174
175 /* changes thread ID mapping such that we first fill up all thread on one core */
176 size_t mapThreadID(size_t threadID)
177 {
178 Lock<MutexSys> lock(mutex);
179
180 if (threadIDs.size() == 0)
181 {
182 /* parse thread/CPU topology */
183 for (size_t cpuID=0;;cpuID++)
184 {
185 std::fstream fs;
186 std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(val: (long long)cpuID) + std::string("/topology/thread_siblings_list");
187 fs.open (s: cpu.c_str(), mode: std::fstream::in);
188 if (fs.fail()) break;
189
190 int i;
191 while (fs >> i)
192 {
193 if (std::none_of(first: threadIDs.begin(),last: threadIDs.end(),pred: [&] (int id) { return id == i; }))
194 threadIDs.push_back(x: i);
195 if (fs.peek() == ',')
196 fs.ignore();
197 }
198 fs.close();
199 }
200
201#if 0
202 for (size_t i=0;i<threadIDs.size();i++)
203 std::cout << i << " -> " << threadIDs[i] << std::endl;
204#endif
205
206 /* verify the mapping and do not use it if the mapping has errors */
207 for (size_t i=0;i<threadIDs.size();i++) {
208 for (size_t j=0;j<threadIDs.size();j++) {
209 if (i != j && threadIDs[i] == threadIDs[j]) {
210 threadIDs.clear();
211 }
212 }
213 }
214 }
215
216 /* re-map threadIDs if mapping is available */
217 size_t ID = threadID;
218 if (threadID < threadIDs.size())
219 ID = threadIDs[threadID];
220
221 /* find correct thread to affinitize to */
222 cpu_set_t set;
223 CPU_ZERO(&set);
224
225 if (pthread_getaffinity_np(th: pthread_self(), cpusetsize: sizeof(set), cpuset: &set) == 0)
226 {
227 for (int i=0, j=0; i<CPU_SETSIZE; i++)
228 {
229 if (!CPU_ISSET(i,&set)) continue;
230
231 if (j == ID) {
232 ID = i;
233 break;
234 }
235 j++;
236 }
237 }
238
239 return ID;
240 }
241
242 /*! set affinity of the calling thread */
243 void setAffinity(ssize_t affinity)
244 {
245 cpu_set_t cset;
246 CPU_ZERO(&cset);
247 size_t threadID = mapThreadID(threadID: affinity);
248 CPU_SET(threadID, &cset);
249
250 pthread_setaffinity_np(th: pthread_self(), cpusetsize: sizeof(cset), cpuset: &cset);
251 }
252}
253#endif
254
255////////////////////////////////////////////////////////////////////////////////
256/// Android Platform
257////////////////////////////////////////////////////////////////////////////////
258
259#if defined(__ANDROID__)
260
261namespace embree
262{
263 /*! set affinity of the calling thread */
264 void setAffinity(ssize_t affinity)
265 {
266 cpu_set_t cset;
267 CPU_ZERO(&cset);
268 CPU_SET(affinity, &cset);
269
270 sched_setaffinity(0, sizeof(cset), &cset);
271 }
272}
273#endif
274
275////////////////////////////////////////////////////////////////////////////////
276/// FreeBSD Platform
277////////////////////////////////////////////////////////////////////////////////
278
279#if defined(__FreeBSD__)
280
281#include <pthread_np.h>
282
283namespace embree
284{
285 /*! set affinity of the calling thread */
286 void setAffinity(ssize_t affinity)
287 {
288 cpuset_t cset;
289 CPU_ZERO(&cset);
290 CPU_SET(affinity, &cset);
291
292 pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
293 }
294}
295#endif
296
297////////////////////////////////////////////////////////////////////////////////
298/// WebAssembly Platform
299////////////////////////////////////////////////////////////////////////////////
300
301#if defined(__EMSCRIPTEN__)
302namespace embree
303{
304 /*! set affinity of the calling thread */
305 void setAffinity(ssize_t affinity)
306 {
307 // Setting thread affinity is not supported in WASM.
308 }
309}
310#endif
311
312////////////////////////////////////////////////////////////////////////////////
313/// MacOSX Platform
314////////////////////////////////////////////////////////////////////////////////
315
316#if defined(__MACOSX__)
317
318#include <mach/thread_act.h>
319#include <mach/thread_policy.h>
320#include <mach/mach_init.h>
321
322namespace embree
323{
324 /*! set affinity of the calling thread */
325 void setAffinity(ssize_t affinity)
326 {
327#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
328
329 thread_affinity_policy ap;
330 ap.affinity_tag = affinity;
331 if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
332 WARNING("setting thread affinity failed"); // on purpose only a warning
333
334#endif
335 }
336}
337#endif
338
339////////////////////////////////////////////////////////////////////////////////
340/// Unix Platform
341////////////////////////////////////////////////////////////////////////////////
342
343#if defined(__UNIX__) || defined(PTHREADS_WIN32)
344
345#include <pthread.h>
346#include <sched.h>
347
348#if defined(__USE_NUMA__)
349#include <numa.h>
350#endif
351
352namespace embree
353{
354 struct ThreadStartupData
355 {
356 public:
357 ThreadStartupData (thread_func f, void* arg, int affinity)
358 : f(f), arg(arg), affinity(affinity) {}
359 public:
360 thread_func f;
361 void* arg;
362 ssize_t affinity;
363 };
364
365 static void* threadStartup(ThreadStartupData* parg)
366 {
367 _mm_setcsr(i: _mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
368
369 /*! Mac OS X does not support setting affinity at thread creation time */
370#if defined(__MACOSX__)
371 if (parg->affinity >= 0)
372 setAffinity(parg->affinity);
373#endif
374
375 parg->f(parg->arg);
376 delete parg;
377 return nullptr;
378 }
379
380 /*! creates a hardware thread running on specific core */
381 thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
382 {
383 /* set stack size */
384 pthread_attr_t attr;
385 pthread_attr_init(attr: &attr);
386 if (stack_size > 0) pthread_attr_setstacksize (attr: &attr, stacksize: stack_size);
387
388 /* create thread */
389 pthread_t* tid = new pthread_t;
390 if (pthread_create(newthread: tid,attr: &attr,start_routine: (void*(*)(void*))threadStartup,arg: new ThreadStartupData(f,arg,threadID)) != 0) {
391 pthread_attr_destroy(attr: &attr);
392 delete tid;
393 FATAL("pthread_create failed");
394 }
395 pthread_attr_destroy(attr: &attr);
396
397 /* set affinity */
398#if defined(__LINUX__) && !defined(__ANDROID__)
399 if (threadID >= 0) {
400 cpu_set_t cset;
401 CPU_ZERO(&cset);
402 threadID = mapThreadID(threadID);
403 CPU_SET(threadID, &cset);
404 pthread_setaffinity_np(th: *tid, cpusetsize: sizeof(cset), cpuset: &cset);
405 }
406#elif defined(__FreeBSD__)
407 if (threadID >= 0) {
408 cpuset_t cset;
409 CPU_ZERO(&cset);
410 CPU_SET(threadID, &cset);
411 pthread_setaffinity_np(*tid, sizeof(cset), &cset);
412 }
413#elif defined(__ANDROID__)
414 if (threadID >= 0) {
415 cpu_set_t cset;
416 CPU_ZERO(&cset);
417 CPU_SET(threadID, &cset);
418 sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
419 }
420#endif
421
422 return thread_t(tid);
423 }
424
425 /*! the thread calling this function gets yielded */
426 void yield() {
427 sched_yield();
428 }
429
430 /*! waits until the given thread has terminated */
431 void join(thread_t tid) {
432 if (pthread_join(th: *(pthread_t*)tid, thread_return: nullptr) != 0)
433 FATAL("pthread_join failed");
434 delete (pthread_t*)tid;
435 }
436
437 /*! destroy a hardware thread by its handle */
438 void destroyThread(thread_t tid) {
439#if defined(__ANDROID__)
440 FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
441#else
442 pthread_cancel(th: *(pthread_t*)tid);
443 delete (pthread_t*)tid;
444#endif
445 }
446
447 /*! creates thread local storage */
448 tls_t createTls()
449 {
450 pthread_key_t* key = new pthread_key_t;
451 if (pthread_key_create(key: key,destr_function: nullptr) != 0) {
452 delete key;
453 FATAL("pthread_key_create failed");
454 }
455
456 return tls_t(key);
457 }
458
459 /*! return the thread local storage pointer */
460 void* getTls(tls_t tls)
461 {
462 assert(tls);
463 return pthread_getspecific(key: *(pthread_key_t*)tls);
464 }
465
466 /*! set the thread local storage pointer */
467 void setTls(tls_t tls, void* const ptr)
468 {
469 assert(tls);
470 if (pthread_setspecific(key: *(pthread_key_t*)tls, pointer: ptr) != 0)
471 FATAL("pthread_setspecific failed");
472 }
473
474 /*! destroys thread local storage identifier */
475 void destroyTls(tls_t tls)
476 {
477 assert(tls);
478 if (pthread_key_delete(key: *(pthread_key_t*)tls) != 0)
479 FATAL("pthread_key_delete failed");
480 delete (pthread_key_t*)tls;
481 }
482}
483
484#endif
485

source code of qtquick3d/src/3rdparty/embree/common/sys/thread.cpp