| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #include "thread.h" | 
| 5 | #include "sysinfo.h" | 
| 6 | #include "string.h" | 
| 7 |  | 
| 8 | #include <iostream> | 
| 9 | #if defined(__ARM_NEON) | 
| 10 | #include "../simd/arm/emulation.h" | 
| 11 | #else | 
| 12 | #include <xmmintrin.h> | 
| 13 | #if defined(__EMSCRIPTEN__) | 
| 14 | #include "../simd/wasm/emulation.h" | 
| 15 | #endif | 
| 16 | #endif | 
| 17 |  | 
| 18 | #if defined(PTHREADS_WIN32) | 
| 19 | #pragma comment (lib, "pthreadVC.lib") | 
| 20 | #endif | 
| 21 |  | 
| 22 | //////////////////////////////////////////////////////////////////////////////// | 
| 23 | /// Windows Platform | 
| 24 | //////////////////////////////////////////////////////////////////////////////// | 
| 25 |  | 
| 26 | #if defined(__WIN32__) | 
| 27 |  | 
| 28 | #define WIN32_LEAN_AND_MEAN | 
| 29 | #include <windows.h> | 
| 30 |  | 
| 31 | namespace embree | 
| 32 | { | 
| 33 |   /*! set the affinity of a given thread */ | 
| 34 |   void setAffinity(HANDLE thread, ssize_t affinity) | 
| 35 |   { | 
| 36 |     typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); | 
| 37 |     typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); | 
| 38 |     typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); | 
| 39 |     typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); | 
| 40 |     HMODULE hlib = LoadLibrary("Kernel32" ); | 
| 41 |     GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount" ); | 
| 42 |     GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount" ); | 
| 43 |     SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity" ); | 
| 44 |     SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx" ); | 
| 45 |     if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)  | 
| 46 |     { | 
| 47 |       int groups = pGetActiveProcessorGroupCount(); | 
| 48 |       int totalProcessors = 0, group = 0, number = 0; | 
| 49 |       for (int i = 0; i<groups; i++) { | 
| 50 |         int processors = pGetActiveProcessorCount(i); | 
| 51 |         if (totalProcessors + processors > affinity) { | 
| 52 |           group = i; | 
| 53 |           number = (int)affinity - totalProcessors; | 
| 54 |           break; | 
| 55 |         } | 
| 56 |         totalProcessors += processors; | 
| 57 |       } | 
| 58 |    | 
| 59 |       GROUP_AFFINITY groupAffinity; | 
| 60 |       groupAffinity.Group = (WORD)group; | 
| 61 |       groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); | 
| 62 |       groupAffinity.Reserved[0] = 0; | 
| 63 |       groupAffinity.Reserved[1] = 0; | 
| 64 |       groupAffinity.Reserved[2] = 0; | 
| 65 |       if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) | 
| 66 |         WARNING("SetThreadGroupAffinity failed" ); // on purpose only a warning | 
| 67 |    | 
| 68 |       PROCESSOR_NUMBER processorNumber; | 
| 69 |       processorNumber.Group = group; | 
| 70 |       processorNumber.Number = number; | 
| 71 |       processorNumber.Reserved = 0; | 
| 72 |       if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) | 
| 73 |         WARNING("SetThreadIdealProcessorEx failed" ); // on purpose only a warning | 
| 74 |     }  | 
| 75 |     else  | 
| 76 |     { | 
| 77 |       if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) | 
| 78 |         WARNING("SetThreadAffinityMask failed" ); // on purpose only a warning | 
| 79 |       if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) | 
| 80 |         WARNING("SetThreadIdealProcessor failed" ); // on purpose only a warning | 
| 81 |       } | 
| 82 |   } | 
| 83 |  | 
| 84 |   /*! set affinity of the calling thread */ | 
| 85 |   void setAffinity(ssize_t affinity) { | 
| 86 |     setAffinity(GetCurrentThread(), affinity); | 
| 87 |   } | 
| 88 |  | 
| 89 |   struct ThreadStartupData  | 
| 90 |   { | 
| 91 |   public: | 
| 92 |     ThreadStartupData (thread_func f, void* arg)  | 
| 93 |       : f(f), arg(arg) {} | 
| 94 |   public: | 
| 95 |     thread_func f; | 
| 96 |     void* arg; | 
| 97 |   }; | 
| 98 |  | 
| 99 |   DWORD WINAPI threadStartup(LPVOID ptr) | 
| 100 |   { | 
| 101 |     ThreadStartupData* parg = (ThreadStartupData*) ptr; | 
| 102 |     _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); | 
| 103 |     parg->f(parg->arg); | 
| 104 |     delete parg; | 
| 105 |     return 0; | 
| 106 |   } | 
| 107 |  | 
| 108 | #if !defined(PTHREADS_WIN32) | 
| 109 |  | 
| 110 |   /*! creates a hardware thread running on specific core */ | 
| 111 |   thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) | 
| 112 |   { | 
| 113 |     HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr); | 
| 114 |     if (thread == nullptr) FATAL("CreateThread failed" ); | 
| 115 |     if (threadID >= 0) setAffinity(thread, threadID); | 
| 116 |     return thread_t(thread); | 
| 117 |   } | 
| 118 |  | 
| 119 |   /*! the thread calling this function gets yielded */ | 
| 120 |   void yield() { | 
| 121 |     SwitchToThread(); | 
| 122 |   } | 
| 123 |  | 
| 124 |   /*! waits until the given thread has terminated */ | 
| 125 |   void join(thread_t tid) { | 
| 126 |     WaitForSingleObject(HANDLE(tid), INFINITE); | 
| 127 |     CloseHandle(HANDLE(tid)); | 
| 128 |   } | 
| 129 |  | 
| 130 |   /*! destroy a hardware thread by its handle */ | 
| 131 |   void destroyThread(thread_t tid) { | 
| 132 |     TerminateThread(HANDLE(tid),0); | 
| 133 |     CloseHandle(HANDLE(tid)); | 
| 134 |   } | 
| 135 |  | 
| 136 |   /*! creates thread local storage */ | 
| 137 |   tls_t createTls() { | 
| 138 |     return tls_t(size_t(TlsAlloc())); | 
| 139 |   } | 
| 140 |  | 
| 141 |   /*! set the thread local storage pointer */ | 
| 142 |   void setTls(tls_t tls, void* const ptr) { | 
| 143 |     TlsSetValue(DWORD(size_t(tls)), ptr); | 
| 144 |   } | 
| 145 |  | 
| 146 |   /*! return the thread local storage pointer */ | 
| 147 |   void* getTls(tls_t tls) { | 
| 148 |     return TlsGetValue(DWORD(size_t(tls))); | 
| 149 |   } | 
| 150 |  | 
| 151 |   /*! destroys thread local storage identifier */ | 
| 152 |   void destroyTls(tls_t tls) { | 
| 153 |     TlsFree(DWORD(size_t(tls))); | 
| 154 |   } | 
| 155 | #endif | 
| 156 | } | 
| 157 |  | 
| 158 | #endif | 
| 159 |  | 
| 160 | //////////////////////////////////////////////////////////////////////////////// | 
| 161 | /// Linux Platform | 
| 162 | //////////////////////////////////////////////////////////////////////////////// | 
| 163 |  | 
| 164 | #if defined(__LINUX__) && !defined(__ANDROID__) | 
| 165 |  | 
| 166 | #include <fstream> | 
| 167 | #include <sstream> | 
| 168 | #include <algorithm> | 
| 169 |  | 
| 170 | namespace embree | 
| 171 | { | 
| 172 |   static MutexSys mutex; | 
| 173 |   static std::vector<size_t> threadIDs; | 
| 174 |    | 
| 175 |   /* changes thread ID mapping such that we first fill up all thread on one core */ | 
| 176 |   size_t mapThreadID(size_t threadID) | 
| 177 |   { | 
| 178 |     Lock<MutexSys> lock(mutex); | 
| 179 |      | 
| 180 |     if (threadIDs.size() == 0) | 
| 181 |     { | 
| 182 |       /* parse thread/CPU topology */ | 
| 183 |       for (size_t cpuID=0;;cpuID++) | 
| 184 |       { | 
| 185 |         std::fstream fs; | 
| 186 |         std::string cpu = std::string("/sys/devices/system/cpu/cpu" ) + std::to_string(val: (long long)cpuID) + std::string("/topology/thread_siblings_list" ); | 
| 187 |         fs.open (s: cpu.c_str(), mode: std::fstream::in); | 
| 188 |         if (fs.fail()) break; | 
| 189 |  | 
| 190 |         int i; | 
| 191 |         while (fs >> i)  | 
| 192 |         { | 
| 193 |           if (std::none_of(first: threadIDs.begin(),last: threadIDs.end(),pred: [&] (int id) { return id == i; })) | 
| 194 |             threadIDs.push_back(x: i); | 
| 195 |           if (fs.peek() == ',')  | 
| 196 |             fs.ignore(); | 
| 197 |         } | 
| 198 |         fs.close(); | 
| 199 |       } | 
| 200 |  | 
| 201 | #if 0 | 
| 202 |       for (size_t i=0;i<threadIDs.size();i++) | 
| 203 |         std::cout << i << " -> "  << threadIDs[i] << std::endl; | 
| 204 | #endif | 
| 205 |  | 
| 206 |       /* verify the mapping and do not use it if the mapping has errors */ | 
| 207 |       for (size_t i=0;i<threadIDs.size();i++) { | 
| 208 |         for (size_t j=0;j<threadIDs.size();j++) { | 
| 209 |           if (i != j && threadIDs[i] == threadIDs[j]) { | 
| 210 |             threadIDs.clear(); | 
| 211 |           } | 
| 212 |         } | 
| 213 |       } | 
| 214 |     } | 
| 215 |  | 
| 216 |     /* re-map threadIDs if mapping is available */ | 
| 217 |     size_t ID = threadID; | 
| 218 |     if (threadID < threadIDs.size()) | 
| 219 |       ID = threadIDs[threadID]; | 
| 220 |  | 
| 221 |     /* find correct thread to affinitize to */ | 
| 222 |     cpu_set_t set; | 
| 223 |     CPU_ZERO(&set); | 
| 224 |      | 
| 225 |     if (pthread_getaffinity_np(th: pthread_self(), cpusetsize: sizeof(set), cpuset: &set) == 0) | 
| 226 |     { | 
| 227 |       for (int i=0, j=0; i<CPU_SETSIZE; i++) | 
| 228 |       { | 
| 229 |         if (!CPU_ISSET(i,&set)) continue; | 
| 230 |  | 
| 231 |         if (j == ID) { | 
| 232 |           ID = i; | 
| 233 |           break; | 
| 234 |         } | 
| 235 |         j++; | 
| 236 |       } | 
| 237 |     } | 
| 238 |  | 
| 239 |     return ID; | 
| 240 |   } | 
| 241 |  | 
| 242 |   /*! set affinity of the calling thread */ | 
| 243 |   void setAffinity(ssize_t affinity) | 
| 244 |   { | 
| 245 |     cpu_set_t cset; | 
| 246 |     CPU_ZERO(&cset); | 
| 247 |     size_t threadID = mapThreadID(threadID: affinity); | 
| 248 |     CPU_SET(threadID, &cset); | 
| 249 |  | 
| 250 |     pthread_setaffinity_np(th: pthread_self(), cpusetsize: sizeof(cset), cpuset: &cset); | 
| 251 |   } | 
| 252 | } | 
| 253 | #endif | 
| 254 |  | 
| 255 | //////////////////////////////////////////////////////////////////////////////// | 
| 256 | /// Android Platform | 
| 257 | //////////////////////////////////////////////////////////////////////////////// | 
| 258 |  | 
| 259 | #if defined(__ANDROID__) | 
| 260 |  | 
| 261 | namespace embree | 
| 262 | { | 
| 263 |   /*! set affinity of the calling thread */ | 
| 264 |   void setAffinity(ssize_t affinity) | 
| 265 |   { | 
| 266 |     cpu_set_t cset; | 
| 267 |     CPU_ZERO(&cset); | 
| 268 |     CPU_SET(affinity, &cset); | 
| 269 |  | 
| 270 |     sched_setaffinity(0, sizeof(cset), &cset); | 
| 271 |   } | 
| 272 | } | 
| 273 | #endif | 
| 274 |  | 
| 275 | //////////////////////////////////////////////////////////////////////////////// | 
| 276 | /// FreeBSD Platform | 
| 277 | //////////////////////////////////////////////////////////////////////////////// | 
| 278 |  | 
| 279 | #if defined(__FreeBSD__) | 
| 280 |  | 
| 281 | #include <pthread_np.h> | 
| 282 |  | 
| 283 | namespace embree | 
| 284 | { | 
| 285 |   /*! set affinity of the calling thread */ | 
| 286 |   void setAffinity(ssize_t affinity) | 
| 287 |   { | 
| 288 |     cpuset_t cset; | 
| 289 |     CPU_ZERO(&cset); | 
| 290 |     CPU_SET(affinity, &cset); | 
| 291 |  | 
| 292 |     pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); | 
| 293 |   } | 
| 294 | } | 
| 295 | #endif | 
| 296 |  | 
| 297 | //////////////////////////////////////////////////////////////////////////////// | 
| 298 | /// WebAssembly Platform | 
| 299 | //////////////////////////////////////////////////////////////////////////////// | 
| 300 |  | 
| 301 | #if defined(__EMSCRIPTEN__) | 
| 302 | namespace embree | 
| 303 | { | 
| 304 |   /*! set affinity of the calling thread */ | 
| 305 |   void setAffinity(ssize_t affinity) | 
| 306 |   { | 
| 307 |       // Setting thread affinity is not supported in WASM. | 
| 308 |   } | 
| 309 | } | 
| 310 | #endif | 
| 311 |  | 
| 312 | //////////////////////////////////////////////////////////////////////////////// | 
| 313 | /// MacOSX Platform | 
| 314 | //////////////////////////////////////////////////////////////////////////////// | 
| 315 |  | 
| 316 | #if defined(__MACOSX__) | 
| 317 |  | 
| 318 | #include <mach/thread_act.h> | 
| 319 | #include <mach/thread_policy.h> | 
| 320 | #include <mach/mach_init.h> | 
| 321 |  | 
| 322 | namespace embree | 
| 323 | { | 
| 324 |   /*! set affinity of the calling thread */ | 
| 325 |   void setAffinity(ssize_t affinity) | 
| 326 |   { | 
| 327 | #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip | 
| 328 |      | 
| 329 |     thread_affinity_policy ap; | 
| 330 |     ap.affinity_tag = affinity; | 
| 331 |     if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) | 
| 332 |       WARNING("setting thread affinity failed" ); // on purpose only a warning | 
| 333 |      | 
| 334 | #endif | 
| 335 |   } | 
| 336 | } | 
| 337 | #endif | 
| 338 |  | 
| 339 | //////////////////////////////////////////////////////////////////////////////// | 
| 340 | /// Unix Platform | 
| 341 | //////////////////////////////////////////////////////////////////////////////// | 
| 342 |  | 
| 343 | #if defined(__UNIX__) || defined(PTHREADS_WIN32) | 
| 344 |  | 
| 345 | #include <pthread.h> | 
| 346 | #include <sched.h> | 
| 347 |  | 
| 348 | #if defined(__USE_NUMA__) | 
| 349 | #include <numa.h> | 
| 350 | #endif | 
| 351 |  | 
| 352 | namespace embree | 
| 353 | { | 
| 354 |   struct ThreadStartupData  | 
| 355 |   { | 
| 356 |   public: | 
| 357 |     ThreadStartupData (thread_func f, void* arg, int affinity)  | 
| 358 |       : f(f), arg(arg), affinity(affinity) {} | 
| 359 |   public:  | 
| 360 |     thread_func f; | 
| 361 |     void* arg; | 
| 362 |     ssize_t affinity; | 
| 363 |   }; | 
| 364 |    | 
| 365 |   static void* threadStartup(ThreadStartupData* parg) | 
| 366 |   { | 
| 367 |     _mm_setcsr(i: _mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); | 
| 368 |      | 
| 369 |     /*! Mac OS X does not support setting affinity at thread creation time */ | 
| 370 | #if defined(__MACOSX__) | 
| 371 |     if (parg->affinity >= 0) | 
| 372 | 	setAffinity(parg->affinity); | 
| 373 | #endif | 
| 374 |  | 
| 375 |     parg->f(parg->arg); | 
| 376 |     delete parg; | 
| 377 |     return nullptr; | 
| 378 |   } | 
| 379 |  | 
| 380 |   /*! creates a hardware thread running on specific core */ | 
| 381 |   thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) | 
| 382 |   { | 
| 383 |     /* set stack size */ | 
| 384 |     pthread_attr_t attr; | 
| 385 |     pthread_attr_init(attr: &attr); | 
| 386 |     if (stack_size > 0) pthread_attr_setstacksize (attr: &attr, stacksize: stack_size); | 
| 387 |  | 
| 388 |     /* create thread */ | 
| 389 |     pthread_t* tid = new pthread_t; | 
| 390 |     if (pthread_create(newthread: tid,attr: &attr,start_routine: (void*(*)(void*))threadStartup,arg: new ThreadStartupData(f,arg,threadID)) != 0) { | 
| 391 |       pthread_attr_destroy(attr: &attr); | 
| 392 |       delete tid;  | 
| 393 |       FATAL("pthread_create failed" ); | 
| 394 |     } | 
| 395 |     pthread_attr_destroy(attr: &attr); | 
| 396 |  | 
| 397 |     /* set affinity */ | 
| 398 | #if defined(__LINUX__) && !defined(__ANDROID__) | 
| 399 |     if (threadID >= 0) { | 
| 400 |       cpu_set_t cset; | 
| 401 |       CPU_ZERO(&cset); | 
| 402 |       threadID = mapThreadID(threadID); | 
| 403 |       CPU_SET(threadID, &cset); | 
| 404 |       pthread_setaffinity_np(th: *tid, cpusetsize: sizeof(cset), cpuset: &cset); | 
| 405 |     } | 
| 406 | #elif defined(__FreeBSD__) | 
| 407 |     if (threadID >= 0) { | 
| 408 |       cpuset_t cset; | 
| 409 |       CPU_ZERO(&cset); | 
| 410 |       CPU_SET(threadID, &cset); | 
| 411 |       pthread_setaffinity_np(*tid, sizeof(cset), &cset); | 
| 412 |     } | 
| 413 | #elif defined(__ANDROID__) | 
| 414 |     if (threadID >= 0) { | 
| 415 |       cpu_set_t cset; | 
| 416 |       CPU_ZERO(&cset); | 
| 417 |       CPU_SET(threadID, &cset); | 
| 418 |       sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); | 
| 419 |     } | 
| 420 | #endif | 
| 421 |  | 
| 422 |     return thread_t(tid); | 
| 423 |   } | 
| 424 |  | 
| 425 |   /*! the thread calling this function gets yielded */ | 
| 426 |   void yield() { | 
| 427 |     sched_yield(); | 
| 428 |   } | 
| 429 |  | 
| 430 |   /*! waits until the given thread has terminated */ | 
| 431 |   void join(thread_t tid) { | 
| 432 |     if (pthread_join(th: *(pthread_t*)tid, thread_return: nullptr) != 0) | 
| 433 |       FATAL("pthread_join failed" ); | 
| 434 |     delete (pthread_t*)tid; | 
| 435 |   } | 
| 436 |  | 
| 437 |   /*! destroy a hardware thread by its handle */ | 
| 438 |   void destroyThread(thread_t tid) { | 
| 439 | #if defined(__ANDROID__) | 
| 440 |     FATAL("Can't destroy threads on Android." ); // pthread_cancel not implemented. | 
| 441 | #else | 
| 442 |     pthread_cancel(th: *(pthread_t*)tid); | 
| 443 |     delete (pthread_t*)tid; | 
| 444 | #endif | 
| 445 |   } | 
| 446 |  | 
| 447 |   /*! creates thread local storage */ | 
| 448 |   tls_t createTls()  | 
| 449 |   { | 
| 450 |     pthread_key_t* key = new pthread_key_t; | 
| 451 |     if (pthread_key_create(key: key,destr_function: nullptr) != 0) { | 
| 452 |       delete key; | 
| 453 |       FATAL("pthread_key_create failed" ); | 
| 454 |     } | 
| 455 |  | 
| 456 |     return tls_t(key); | 
| 457 |   } | 
| 458 |  | 
| 459 |   /*! return the thread local storage pointer */ | 
| 460 |   void* getTls(tls_t tls)  | 
| 461 |   { | 
| 462 |     assert(tls); | 
| 463 |     return pthread_getspecific(key: *(pthread_key_t*)tls); | 
| 464 |   } | 
| 465 |  | 
| 466 |   /*! set the thread local storage pointer */ | 
| 467 |   void setTls(tls_t tls, void* const ptr)  | 
| 468 |   { | 
| 469 |     assert(tls); | 
| 470 |     if (pthread_setspecific(key: *(pthread_key_t*)tls, pointer: ptr) != 0) | 
| 471 |       FATAL("pthread_setspecific failed" ); | 
| 472 |   } | 
| 473 |  | 
| 474 |   /*! destroys thread local storage identifier */ | 
| 475 |   void destroyTls(tls_t tls)  | 
| 476 |   { | 
| 477 |     assert(tls); | 
| 478 |     if (pthread_key_delete(key: *(pthread_key_t*)tls) != 0) | 
| 479 |       FATAL("pthread_key_delete failed" ); | 
| 480 |     delete (pthread_key_t*)tls; | 
| 481 |   } | 
| 482 | } | 
| 483 |  | 
| 484 | #endif | 
| 485 |  |