1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #include "thread.h" |
5 | #include "sysinfo.h" |
6 | #include "string.h" |
7 | |
8 | #include <iostream> |
9 | #if defined(__ARM_NEON) |
10 | #include "../simd/arm/emulation.h" |
11 | #else |
12 | #include <xmmintrin.h> |
13 | #if defined(__EMSCRIPTEN__) |
14 | #include "../simd/wasm/emulation.h" |
15 | #endif |
16 | #endif |
17 | |
18 | #if defined(PTHREADS_WIN32) |
19 | #pragma comment (lib, "pthreadVC.lib") |
20 | #endif |
21 | |
22 | //////////////////////////////////////////////////////////////////////////////// |
23 | /// Windows Platform |
24 | //////////////////////////////////////////////////////////////////////////////// |
25 | |
26 | #if defined(__WIN32__) |
27 | |
28 | #define WIN32_LEAN_AND_MEAN |
29 | #include <windows.h> |
30 | |
31 | namespace embree |
32 | { |
33 | /*! set the affinity of a given thread */ |
34 | void setAffinity(HANDLE thread, ssize_t affinity) |
35 | { |
36 | typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); |
37 | typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); |
38 | typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); |
39 | typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); |
40 | HMODULE hlib = LoadLibrary("Kernel32" ); |
41 | GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount" ); |
42 | GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount" ); |
43 | SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity" ); |
44 | SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx" ); |
45 | if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) |
46 | { |
47 | int groups = pGetActiveProcessorGroupCount(); |
48 | int totalProcessors = 0, group = 0, number = 0; |
49 | for (int i = 0; i<groups; i++) { |
50 | int processors = pGetActiveProcessorCount(i); |
51 | if (totalProcessors + processors > affinity) { |
52 | group = i; |
53 | number = (int)affinity - totalProcessors; |
54 | break; |
55 | } |
56 | totalProcessors += processors; |
57 | } |
58 | |
59 | GROUP_AFFINITY groupAffinity; |
60 | groupAffinity.Group = (WORD)group; |
61 | groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); |
62 | groupAffinity.Reserved[0] = 0; |
63 | groupAffinity.Reserved[1] = 0; |
64 | groupAffinity.Reserved[2] = 0; |
65 | if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) |
66 | WARNING("SetThreadGroupAffinity failed" ); // on purpose only a warning |
67 | |
68 | PROCESSOR_NUMBER processorNumber; |
69 | processorNumber.Group = group; |
70 | processorNumber.Number = number; |
71 | processorNumber.Reserved = 0; |
72 | if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) |
73 | WARNING("SetThreadIdealProcessorEx failed" ); // on purpose only a warning |
74 | } |
75 | else |
76 | { |
77 | if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) |
78 | WARNING("SetThreadAffinityMask failed" ); // on purpose only a warning |
79 | if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) |
80 | WARNING("SetThreadIdealProcessor failed" ); // on purpose only a warning |
81 | } |
82 | } |
83 | |
84 | /*! set affinity of the calling thread */ |
85 | void setAffinity(ssize_t affinity) { |
86 | setAffinity(GetCurrentThread(), affinity); |
87 | } |
88 | |
89 | struct ThreadStartupData |
90 | { |
91 | public: |
92 | ThreadStartupData (thread_func f, void* arg) |
93 | : f(f), arg(arg) {} |
94 | public: |
95 | thread_func f; |
96 | void* arg; |
97 | }; |
98 | |
99 | DWORD WINAPI threadStartup(LPVOID ptr) |
100 | { |
101 | ThreadStartupData* parg = (ThreadStartupData*) ptr; |
102 | _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); |
103 | parg->f(parg->arg); |
104 | delete parg; |
105 | return 0; |
106 | } |
107 | |
108 | #if !defined(PTHREADS_WIN32) |
109 | |
110 | /*! creates a hardware thread running on specific core */ |
111 | thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) |
112 | { |
113 | HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr); |
114 | if (thread == nullptr) FATAL("CreateThread failed" ); |
115 | if (threadID >= 0) setAffinity(thread, threadID); |
116 | return thread_t(thread); |
117 | } |
118 | |
119 | /*! the thread calling this function gets yielded */ |
120 | void yield() { |
121 | SwitchToThread(); |
122 | } |
123 | |
124 | /*! waits until the given thread has terminated */ |
125 | void join(thread_t tid) { |
126 | WaitForSingleObject(HANDLE(tid), INFINITE); |
127 | CloseHandle(HANDLE(tid)); |
128 | } |
129 | |
130 | /*! destroy a hardware thread by its handle */ |
131 | void destroyThread(thread_t tid) { |
132 | TerminateThread(HANDLE(tid),0); |
133 | CloseHandle(HANDLE(tid)); |
134 | } |
135 | |
136 | /*! creates thread local storage */ |
137 | tls_t createTls() { |
138 | return tls_t(size_t(TlsAlloc())); |
139 | } |
140 | |
141 | /*! set the thread local storage pointer */ |
142 | void setTls(tls_t tls, void* const ptr) { |
143 | TlsSetValue(DWORD(size_t(tls)), ptr); |
144 | } |
145 | |
146 | /*! return the thread local storage pointer */ |
147 | void* getTls(tls_t tls) { |
148 | return TlsGetValue(DWORD(size_t(tls))); |
149 | } |
150 | |
151 | /*! destroys thread local storage identifier */ |
152 | void destroyTls(tls_t tls) { |
153 | TlsFree(DWORD(size_t(tls))); |
154 | } |
155 | #endif |
156 | } |
157 | |
158 | #endif |
159 | |
160 | //////////////////////////////////////////////////////////////////////////////// |
161 | /// Linux Platform |
162 | //////////////////////////////////////////////////////////////////////////////// |
163 | |
164 | #if defined(__LINUX__) && !defined(__ANDROID__) |
165 | |
166 | #include <fstream> |
167 | #include <sstream> |
168 | #include <algorithm> |
169 | |
170 | namespace embree |
171 | { |
172 | static MutexSys mutex; |
173 | static std::vector<size_t> threadIDs; |
174 | |
175 | /* changes thread ID mapping such that we first fill up all thread on one core */ |
176 | size_t mapThreadID(size_t threadID) |
177 | { |
178 | Lock<MutexSys> lock(mutex); |
179 | |
180 | if (threadIDs.size() == 0) |
181 | { |
182 | /* parse thread/CPU topology */ |
183 | for (size_t cpuID=0;;cpuID++) |
184 | { |
185 | std::fstream fs; |
186 | std::string cpu = std::string("/sys/devices/system/cpu/cpu" ) + std::to_string(val: (long long)cpuID) + std::string("/topology/thread_siblings_list" ); |
187 | fs.open (s: cpu.c_str(), mode: std::fstream::in); |
188 | if (fs.fail()) break; |
189 | |
190 | int i; |
191 | while (fs >> i) |
192 | { |
193 | if (std::none_of(first: threadIDs.begin(),last: threadIDs.end(),pred: [&] (int id) { return id == i; })) |
194 | threadIDs.push_back(x: i); |
195 | if (fs.peek() == ',') |
196 | fs.ignore(); |
197 | } |
198 | fs.close(); |
199 | } |
200 | |
201 | #if 0 |
202 | for (size_t i=0;i<threadIDs.size();i++) |
203 | std::cout << i << " -> " << threadIDs[i] << std::endl; |
204 | #endif |
205 | |
206 | /* verify the mapping and do not use it if the mapping has errors */ |
207 | for (size_t i=0;i<threadIDs.size();i++) { |
208 | for (size_t j=0;j<threadIDs.size();j++) { |
209 | if (i != j && threadIDs[i] == threadIDs[j]) { |
210 | threadIDs.clear(); |
211 | } |
212 | } |
213 | } |
214 | } |
215 | |
216 | /* re-map threadIDs if mapping is available */ |
217 | size_t ID = threadID; |
218 | if (threadID < threadIDs.size()) |
219 | ID = threadIDs[threadID]; |
220 | |
221 | /* find correct thread to affinitize to */ |
222 | cpu_set_t set; |
223 | CPU_ZERO(&set); |
224 | |
225 | if (pthread_getaffinity_np(th: pthread_self(), cpusetsize: sizeof(set), cpuset: &set) == 0) |
226 | { |
227 | for (int i=0, j=0; i<CPU_SETSIZE; i++) |
228 | { |
229 | if (!CPU_ISSET(i,&set)) continue; |
230 | |
231 | if (j == ID) { |
232 | ID = i; |
233 | break; |
234 | } |
235 | j++; |
236 | } |
237 | } |
238 | |
239 | return ID; |
240 | } |
241 | |
242 | /*! set affinity of the calling thread */ |
243 | void setAffinity(ssize_t affinity) |
244 | { |
245 | cpu_set_t cset; |
246 | CPU_ZERO(&cset); |
247 | size_t threadID = mapThreadID(threadID: affinity); |
248 | CPU_SET(threadID, &cset); |
249 | |
250 | pthread_setaffinity_np(th: pthread_self(), cpusetsize: sizeof(cset), cpuset: &cset); |
251 | } |
252 | } |
253 | #endif |
254 | |
255 | //////////////////////////////////////////////////////////////////////////////// |
256 | /// Android Platform |
257 | //////////////////////////////////////////////////////////////////////////////// |
258 | |
259 | #if defined(__ANDROID__) |
260 | |
261 | namespace embree |
262 | { |
263 | /*! set affinity of the calling thread */ |
264 | void setAffinity(ssize_t affinity) |
265 | { |
266 | cpu_set_t cset; |
267 | CPU_ZERO(&cset); |
268 | CPU_SET(affinity, &cset); |
269 | |
270 | sched_setaffinity(0, sizeof(cset), &cset); |
271 | } |
272 | } |
273 | #endif |
274 | |
275 | //////////////////////////////////////////////////////////////////////////////// |
276 | /// FreeBSD Platform |
277 | //////////////////////////////////////////////////////////////////////////////// |
278 | |
279 | #if defined(__FreeBSD__) |
280 | |
281 | #include <pthread_np.h> |
282 | |
283 | namespace embree |
284 | { |
285 | /*! set affinity of the calling thread */ |
286 | void setAffinity(ssize_t affinity) |
287 | { |
288 | cpuset_t cset; |
289 | CPU_ZERO(&cset); |
290 | CPU_SET(affinity, &cset); |
291 | |
292 | pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); |
293 | } |
294 | } |
295 | #endif |
296 | |
297 | //////////////////////////////////////////////////////////////////////////////// |
298 | /// WebAssembly Platform |
299 | //////////////////////////////////////////////////////////////////////////////// |
300 | |
301 | #if defined(__EMSCRIPTEN__) |
302 | namespace embree |
303 | { |
304 | /*! set affinity of the calling thread */ |
305 | void setAffinity(ssize_t affinity) |
306 | { |
307 | // Setting thread affinity is not supported in WASM. |
308 | } |
309 | } |
310 | #endif |
311 | |
312 | //////////////////////////////////////////////////////////////////////////////// |
313 | /// MacOSX Platform |
314 | //////////////////////////////////////////////////////////////////////////////// |
315 | |
316 | #if defined(__MACOSX__) |
317 | |
318 | #include <mach/thread_act.h> |
319 | #include <mach/thread_policy.h> |
320 | #include <mach/mach_init.h> |
321 | |
322 | namespace embree |
323 | { |
324 | /*! set affinity of the calling thread */ |
325 | void setAffinity(ssize_t affinity) |
326 | { |
327 | #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip |
328 | |
329 | thread_affinity_policy ap; |
330 | ap.affinity_tag = affinity; |
331 | if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) |
332 | WARNING("setting thread affinity failed" ); // on purpose only a warning |
333 | |
334 | #endif |
335 | } |
336 | } |
337 | #endif |
338 | |
339 | //////////////////////////////////////////////////////////////////////////////// |
340 | /// Unix Platform |
341 | //////////////////////////////////////////////////////////////////////////////// |
342 | |
343 | #if defined(__UNIX__) || defined(PTHREADS_WIN32) |
344 | |
345 | #include <pthread.h> |
346 | #include <sched.h> |
347 | |
348 | #if defined(__USE_NUMA__) |
349 | #include <numa.h> |
350 | #endif |
351 | |
352 | namespace embree |
353 | { |
354 | struct ThreadStartupData |
355 | { |
356 | public: |
357 | ThreadStartupData (thread_func f, void* arg, int affinity) |
358 | : f(f), arg(arg), affinity(affinity) {} |
359 | public: |
360 | thread_func f; |
361 | void* arg; |
362 | ssize_t affinity; |
363 | }; |
364 | |
365 | static void* threadStartup(ThreadStartupData* parg) |
366 | { |
367 | _mm_setcsr(i: _mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); |
368 | |
369 | /*! Mac OS X does not support setting affinity at thread creation time */ |
370 | #if defined(__MACOSX__) |
371 | if (parg->affinity >= 0) |
372 | setAffinity(parg->affinity); |
373 | #endif |
374 | |
375 | parg->f(parg->arg); |
376 | delete parg; |
377 | return nullptr; |
378 | } |
379 | |
380 | /*! creates a hardware thread running on specific core */ |
381 | thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) |
382 | { |
383 | /* set stack size */ |
384 | pthread_attr_t attr; |
385 | pthread_attr_init(attr: &attr); |
386 | if (stack_size > 0) pthread_attr_setstacksize (attr: &attr, stacksize: stack_size); |
387 | |
388 | /* create thread */ |
389 | pthread_t* tid = new pthread_t; |
390 | if (pthread_create(newthread: tid,attr: &attr,start_routine: (void*(*)(void*))threadStartup,arg: new ThreadStartupData(f,arg,threadID)) != 0) { |
391 | pthread_attr_destroy(attr: &attr); |
392 | delete tid; |
393 | FATAL("pthread_create failed" ); |
394 | } |
395 | pthread_attr_destroy(attr: &attr); |
396 | |
397 | /* set affinity */ |
398 | #if defined(__LINUX__) && !defined(__ANDROID__) |
399 | if (threadID >= 0) { |
400 | cpu_set_t cset; |
401 | CPU_ZERO(&cset); |
402 | threadID = mapThreadID(threadID); |
403 | CPU_SET(threadID, &cset); |
404 | pthread_setaffinity_np(th: *tid, cpusetsize: sizeof(cset), cpuset: &cset); |
405 | } |
406 | #elif defined(__FreeBSD__) |
407 | if (threadID >= 0) { |
408 | cpuset_t cset; |
409 | CPU_ZERO(&cset); |
410 | CPU_SET(threadID, &cset); |
411 | pthread_setaffinity_np(*tid, sizeof(cset), &cset); |
412 | } |
413 | #elif defined(__ANDROID__) |
414 | if (threadID >= 0) { |
415 | cpu_set_t cset; |
416 | CPU_ZERO(&cset); |
417 | CPU_SET(threadID, &cset); |
418 | sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); |
419 | } |
420 | #endif |
421 | |
422 | return thread_t(tid); |
423 | } |
424 | |
425 | /*! the thread calling this function gets yielded */ |
426 | void yield() { |
427 | sched_yield(); |
428 | } |
429 | |
430 | /*! waits until the given thread has terminated */ |
431 | void join(thread_t tid) { |
432 | if (pthread_join(th: *(pthread_t*)tid, thread_return: nullptr) != 0) |
433 | FATAL("pthread_join failed" ); |
434 | delete (pthread_t*)tid; |
435 | } |
436 | |
437 | /*! destroy a hardware thread by its handle */ |
438 | void destroyThread(thread_t tid) { |
439 | #if defined(__ANDROID__) |
440 | FATAL("Can't destroy threads on Android." ); // pthread_cancel not implemented. |
441 | #else |
442 | pthread_cancel(th: *(pthread_t*)tid); |
443 | delete (pthread_t*)tid; |
444 | #endif |
445 | } |
446 | |
447 | /*! creates thread local storage */ |
448 | tls_t createTls() |
449 | { |
450 | pthread_key_t* key = new pthread_key_t; |
451 | if (pthread_key_create(key: key,destr_function: nullptr) != 0) { |
452 | delete key; |
453 | FATAL("pthread_key_create failed" ); |
454 | } |
455 | |
456 | return tls_t(key); |
457 | } |
458 | |
459 | /*! return the thread local storage pointer */ |
460 | void* getTls(tls_t tls) |
461 | { |
462 | assert(tls); |
463 | return pthread_getspecific(key: *(pthread_key_t*)tls); |
464 | } |
465 | |
466 | /*! set the thread local storage pointer */ |
467 | void setTls(tls_t tls, void* const ptr) |
468 | { |
469 | assert(tls); |
470 | if (pthread_setspecific(key: *(pthread_key_t*)tls, pointer: ptr) != 0) |
471 | FATAL("pthread_setspecific failed" ); |
472 | } |
473 | |
474 | /*! destroys thread local storage identifier */ |
475 | void destroyTls(tls_t tls) |
476 | { |
477 | assert(tls); |
478 | if (pthread_key_delete(key: *(pthread_key_t*)tls) != 0) |
479 | FATAL("pthread_key_delete failed" ); |
480 | delete (pthread_key_t*)tls; |
481 | } |
482 | } |
483 | |
484 | #endif |
485 | |