1 | /* |
2 | * z_Windows_NT_util.cpp -- platform specific routines. |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "kmp.h" |
14 | #include "kmp_affinity.h" |
15 | #include "kmp_i18n.h" |
16 | #include "kmp_io.h" |
17 | #include "kmp_itt.h" |
18 | #include "kmp_wait_release.h" |
19 | |
20 | /* This code is related to NtQuerySystemInformation() function. This function |
21 | is used in the Load balance algorithm for OMP_DYNAMIC=true to find the |
22 | number of running threads in the system. */ |
23 | |
24 | #include <ntsecapi.h> // UNICODE_STRING |
25 | #undef WIN32_NO_STATUS |
26 | #include <ntstatus.h> |
27 | #include <psapi.h> |
28 | #ifdef _MSC_VER |
29 | #pragma comment(lib, "psapi.lib") |
30 | #endif |
31 | |
32 | enum SYSTEM_INFORMATION_CLASS { |
33 | SystemProcessInformation = 5 |
34 | }; // SYSTEM_INFORMATION_CLASS |
35 | |
36 | struct CLIENT_ID { |
37 | HANDLE UniqueProcess; |
38 | HANDLE UniqueThread; |
39 | }; // struct CLIENT_ID |
40 | |
41 | enum THREAD_STATE { |
42 | StateInitialized, |
43 | StateReady, |
44 | StateRunning, |
45 | StateStandby, |
46 | StateTerminated, |
47 | StateWait, |
48 | StateTransition, |
49 | StateUnknown |
50 | }; // enum THREAD_STATE |
51 | |
52 | struct VM_COUNTERS { |
53 | SIZE_T PeakVirtualSize; |
54 | SIZE_T VirtualSize; |
55 | ULONG PageFaultCount; |
56 | SIZE_T PeakWorkingSetSize; |
57 | SIZE_T WorkingSetSize; |
58 | SIZE_T QuotaPeakPagedPoolUsage; |
59 | SIZE_T QuotaPagedPoolUsage; |
60 | SIZE_T QuotaPeakNonPagedPoolUsage; |
61 | SIZE_T QuotaNonPagedPoolUsage; |
62 | SIZE_T PagefileUsage; |
63 | SIZE_T PeakPagefileUsage; |
64 | SIZE_T PrivatePageCount; |
65 | }; // struct VM_COUNTERS |
66 | |
67 | struct SYSTEM_THREAD { |
68 | LARGE_INTEGER KernelTime; |
69 | LARGE_INTEGER UserTime; |
70 | LARGE_INTEGER CreateTime; |
71 | ULONG WaitTime; |
72 | LPVOID StartAddress; |
73 | CLIENT_ID ClientId; |
74 | DWORD Priority; |
75 | LONG BasePriority; |
76 | ULONG ContextSwitchCount; |
77 | THREAD_STATE State; |
78 | ULONG WaitReason; |
79 | }; // SYSTEM_THREAD |
80 | |
81 | KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, KernelTime) == 0); |
82 | #if KMP_ARCH_X86 || KMP_ARCH_ARM |
83 | KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 28); |
84 | KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 52); |
85 | #else |
86 | KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 32); |
87 | KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 68); |
88 | #endif |
89 | |
90 | struct SYSTEM_PROCESS_INFORMATION { |
91 | ULONG NextEntryOffset; |
92 | ULONG NumberOfThreads; |
93 | LARGE_INTEGER Reserved[3]; |
94 | LARGE_INTEGER CreateTime; |
95 | LARGE_INTEGER UserTime; |
96 | LARGE_INTEGER KernelTime; |
97 | UNICODE_STRING ImageName; |
98 | DWORD BasePriority; |
99 | HANDLE ProcessId; |
100 | HANDLE ParentProcessId; |
101 | ULONG HandleCount; |
102 | ULONG Reserved2[2]; |
103 | VM_COUNTERS VMCounters; |
104 | IO_COUNTERS IOCounters; |
105 | SYSTEM_THREAD Threads[1]; |
106 | }; // SYSTEM_PROCESS_INFORMATION |
107 | typedef SYSTEM_PROCESS_INFORMATION *PSYSTEM_PROCESS_INFORMATION; |
108 | |
109 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, NextEntryOffset) == 0); |
110 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, CreateTime) == 32); |
111 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ImageName) == 56); |
112 | #if KMP_ARCH_X86 || KMP_ARCH_ARM |
113 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 68); |
114 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 76); |
115 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 88); |
116 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 136); |
117 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 184); |
118 | #else |
119 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 80); |
120 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 96); |
121 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 112); |
122 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 208); |
123 | KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 256); |
124 | #endif |
125 | |
126 | typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, |
127 | PVOID, ULONG, PULONG); |
128 | NtQuerySystemInformation_t NtQuerySystemInformation = NULL; |
129 | |
130 | HMODULE ntdll = NULL; |
131 | |
132 | /* End of NtQuerySystemInformation()-related code */ |
133 | |
134 | static HMODULE kernel32 = NULL; |
135 | |
136 | #if KMP_HANDLE_SIGNALS |
137 | typedef void (*sig_func_t)(int); |
138 | static sig_func_t __kmp_sighldrs[NSIG]; |
139 | static int __kmp_siginstalled[NSIG]; |
140 | #endif |
141 | |
142 | #if KMP_USE_MONITOR |
143 | static HANDLE __kmp_monitor_ev; |
144 | #endif |
145 | static kmp_int64 __kmp_win32_time; |
146 | double __kmp_win32_tick; |
147 | |
148 | int __kmp_init_runtime = FALSE; |
149 | CRITICAL_SECTION __kmp_win32_section; |
150 | |
151 | void __kmp_win32_mutex_init(kmp_win32_mutex_t *mx) { |
152 | InitializeCriticalSection(&mx->cs); |
153 | #if USE_ITT_BUILD |
154 | __kmp_itt_system_object_created(&mx->cs, "Critical Section" ); |
155 | #endif /* USE_ITT_BUILD */ |
156 | } |
157 | |
158 | void __kmp_win32_mutex_destroy(kmp_win32_mutex_t *mx) { |
159 | DeleteCriticalSection(&mx->cs); |
160 | } |
161 | |
162 | void __kmp_win32_mutex_lock(kmp_win32_mutex_t *mx) { |
163 | EnterCriticalSection(&mx->cs); |
164 | } |
165 | |
166 | int __kmp_win32_mutex_trylock(kmp_win32_mutex_t *mx) { |
167 | return TryEnterCriticalSection(&mx->cs); |
168 | } |
169 | |
170 | void __kmp_win32_mutex_unlock(kmp_win32_mutex_t *mx) { |
171 | LeaveCriticalSection(&mx->cs); |
172 | } |
173 | |
174 | void __kmp_win32_cond_init(kmp_win32_cond_t *cv) { |
175 | cv->waiters_count_ = 0; |
176 | cv->wait_generation_count_ = 0; |
177 | cv->release_count_ = 0; |
178 | |
179 | /* Initialize the critical section */ |
180 | __kmp_win32_mutex_init(&cv->waiters_count_lock_); |
181 | |
182 | /* Create a manual-reset event. */ |
183 | cv->event_ = CreateEvent(NULL, // no security |
184 | TRUE, // manual-reset |
185 | FALSE, // non-signaled initially |
186 | NULL); // unnamed |
187 | #if USE_ITT_BUILD |
188 | __kmp_itt_system_object_created(cv->event_, "Event" ); |
189 | #endif /* USE_ITT_BUILD */ |
190 | } |
191 | |
192 | void __kmp_win32_cond_destroy(kmp_win32_cond_t *cv) { |
193 | __kmp_win32_mutex_destroy(&cv->waiters_count_lock_); |
194 | __kmp_free_handle(cv->event_); |
195 | memset(cv, '\0', sizeof(*cv)); |
196 | } |
197 | |
198 | /* TODO associate cv with a team instead of a thread so as to optimize |
199 | the case where we wake up a whole team */ |
200 | |
201 | template <class C> |
202 | static void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, |
203 | kmp_info_t *th, C *flag) { |
204 | int my_generation; |
205 | int last_waiter; |
206 | |
207 | /* Avoid race conditions */ |
208 | __kmp_win32_mutex_lock(&cv->waiters_count_lock_); |
209 | |
210 | /* Increment count of waiters */ |
211 | cv->waiters_count_++; |
212 | |
213 | /* Store current generation in our activation record. */ |
214 | my_generation = cv->wait_generation_count_; |
215 | |
216 | __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); |
217 | __kmp_win32_mutex_unlock(mx); |
218 | |
219 | for (;;) { |
220 | int wait_done = 0; |
221 | DWORD res, timeout = 5000; // just tried to quess an appropriate number |
222 | /* Wait until the event is signaled */ |
223 | res = WaitForSingleObject(cv->event_, timeout); |
224 | |
225 | if (res == WAIT_OBJECT_0) { |
226 | // event signaled |
227 | __kmp_win32_mutex_lock(&cv->waiters_count_lock_); |
228 | /* Exit the loop when the <cv->event_> is signaled and there are still |
229 | waiting threads from this <wait_generation> that haven't been released |
230 | from this wait yet. */ |
231 | wait_done = (cv->release_count_ > 0) && |
232 | (cv->wait_generation_count_ != my_generation); |
233 | __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); |
234 | } else if (res == WAIT_TIMEOUT || res == WAIT_FAILED) { |
235 | // check if the flag and cv counters are in consistent state |
236 | // as MS sent us debug dump whith inconsistent state of data |
237 | __kmp_win32_mutex_lock(mx); |
238 | typename C::flag_t old_f = flag->set_sleeping(); |
239 | if (!flag->done_check_val(old_f & ~KMP_BARRIER_SLEEP_STATE)) { |
240 | __kmp_win32_mutex_unlock(mx); |
241 | continue; |
242 | } |
243 | // condition fulfilled, exiting |
244 | flag->unset_sleeping(); |
245 | TCW_PTR(th->th.th_sleep_loc, NULL); |
246 | th->th.th_sleep_loc_type = flag_unset; |
247 | KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition " |
248 | "fulfilled: flag's loc(%p): %u\n" , |
249 | flag->get(), (unsigned int)flag->load())); |
250 | |
251 | __kmp_win32_mutex_lock(&cv->waiters_count_lock_); |
252 | KMP_DEBUG_ASSERT(cv->waiters_count_ > 0); |
253 | cv->release_count_ = cv->waiters_count_; |
254 | cv->wait_generation_count_++; |
255 | wait_done = 1; |
256 | __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); |
257 | |
258 | __kmp_win32_mutex_unlock(mx); |
259 | } |
260 | /* there used to be a semicolon after the if statement, it looked like a |
261 | bug, so i removed it */ |
262 | if (wait_done) |
263 | break; |
264 | } |
265 | |
266 | __kmp_win32_mutex_lock(mx); |
267 | __kmp_win32_mutex_lock(&cv->waiters_count_lock_); |
268 | |
269 | cv->waiters_count_--; |
270 | cv->release_count_--; |
271 | |
272 | last_waiter = (cv->release_count_ == 0); |
273 | |
274 | __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); |
275 | |
276 | if (last_waiter) { |
277 | /* We're the last waiter to be notified, so reset the manual event. */ |
278 | ResetEvent(cv->event_); |
279 | } |
280 | } |
281 | |
282 | void __kmp_win32_cond_broadcast(kmp_win32_cond_t *cv) { |
283 | __kmp_win32_mutex_lock(&cv->waiters_count_lock_); |
284 | |
285 | if (cv->waiters_count_ > 0) { |
286 | SetEvent(cv->event_); |
287 | /* Release all the threads in this generation. */ |
288 | |
289 | cv->release_count_ = cv->waiters_count_; |
290 | |
291 | /* Start a new generation. */ |
292 | cv->wait_generation_count_++; |
293 | } |
294 | |
295 | __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); |
296 | } |
297 | |
298 | void __kmp_win32_cond_signal(kmp_win32_cond_t *cv) { |
299 | __kmp_win32_cond_broadcast(cv); |
300 | } |
301 | |
302 | void __kmp_enable(int new_state) { |
303 | if (__kmp_init_runtime) |
304 | LeaveCriticalSection(&__kmp_win32_section); |
305 | } |
306 | |
307 | void __kmp_disable(int *old_state) { |
308 | *old_state = 0; |
309 | |
310 | if (__kmp_init_runtime) |
311 | EnterCriticalSection(&__kmp_win32_section); |
312 | } |
313 | |
314 | void __kmp_suspend_initialize(void) { /* do nothing */ |
315 | } |
316 | |
317 | void __kmp_suspend_initialize_thread(kmp_info_t *th) { |
318 | int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init); |
319 | int new_value = TRUE; |
320 | // Return if already initialized |
321 | if (old_value == new_value) |
322 | return; |
323 | // Wait, then return if being initialized |
324 | if (old_value == -1 || |
325 | !__kmp_atomic_compare_store(&th->th.th_suspend_init, old_value, -1)) { |
326 | while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init) != new_value) { |
327 | KMP_CPU_PAUSE(); |
328 | } |
329 | } else { |
330 | // Claim to be the initializer and do initializations |
331 | __kmp_win32_cond_init(&th->th.th_suspend_cv); |
332 | __kmp_win32_mutex_init(&th->th.th_suspend_mx); |
333 | KMP_ATOMIC_ST_REL(&th->th.th_suspend_init, new_value); |
334 | } |
335 | } |
336 | |
337 | void __kmp_suspend_uninitialize_thread(kmp_info_t *th) { |
338 | if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init)) { |
339 | /* this means we have initialize the suspension pthread objects for this |
340 | thread in this instance of the process */ |
341 | __kmp_win32_cond_destroy(&th->th.th_suspend_cv); |
342 | __kmp_win32_mutex_destroy(&th->th.th_suspend_mx); |
343 | KMP_ATOMIC_ST_REL(&th->th.th_suspend_init, FALSE); |
344 | } |
345 | } |
346 | |
347 | int __kmp_try_suspend_mx(kmp_info_t *th) { |
348 | return __kmp_win32_mutex_trylock(&th->th.th_suspend_mx); |
349 | } |
350 | |
351 | void __kmp_lock_suspend_mx(kmp_info_t *th) { |
352 | __kmp_win32_mutex_lock(&th->th.th_suspend_mx); |
353 | } |
354 | |
355 | void __kmp_unlock_suspend_mx(kmp_info_t *th) { |
356 | __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); |
357 | } |
358 | |
359 | /* This routine puts the calling thread to sleep after setting the |
360 | sleep bit for the indicated flag variable to true. */ |
361 | template <class C> |
362 | static inline void __kmp_suspend_template(int th_gtid, C *flag) { |
363 | kmp_info_t *th = __kmp_threads[th_gtid]; |
364 | typename C::flag_t old_spin; |
365 | |
366 | KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n" , |
367 | th_gtid, flag->get())); |
368 | |
369 | __kmp_suspend_initialize_thread(th); |
370 | __kmp_lock_suspend_mx(th); |
371 | |
372 | KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's" |
373 | " loc(%p)\n" , |
374 | th_gtid, flag->get())); |
375 | |
376 | /* TODO: shouldn't this use release semantics to ensure that |
377 | __kmp_suspend_initialize_thread gets called first? */ |
378 | old_spin = flag->set_sleeping(); |
379 | TCW_PTR(th->th.th_sleep_loc, (void *)flag); |
380 | th->th.th_sleep_loc_type = flag->get_type(); |
381 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
382 | __kmp_pause_status != kmp_soft_paused) { |
383 | flag->unset_sleeping(); |
384 | TCW_PTR(th->th.th_sleep_loc, NULL); |
385 | th->th.th_sleep_loc_type = flag_unset; |
386 | __kmp_unlock_suspend_mx(th); |
387 | return; |
388 | } |
389 | |
390 | KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's" |
391 | " loc(%p)==%u\n" , |
392 | th_gtid, flag->get(), (unsigned int)flag->load())); |
393 | |
394 | if (flag->done_check_val(old_spin) || flag->done_check()) { |
395 | flag->unset_sleeping(); |
396 | TCW_PTR(th->th.th_sleep_loc, NULL); |
397 | th->th.th_sleep_loc_type = flag_unset; |
398 | KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " |
399 | "for flag's loc(%p)\n" , |
400 | th_gtid, flag->get())); |
401 | } else { |
402 | #ifdef DEBUG_SUSPEND |
403 | __kmp_suspend_count++; |
404 | #endif |
405 | /* Encapsulate in a loop as the documentation states that this may "with |
406 | low probability" return when the condition variable has not been signaled |
407 | or broadcast */ |
408 | int deactivated = FALSE; |
409 | |
410 | while (flag->is_sleeping()) { |
411 | KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " |
412 | "kmp_win32_cond_wait()\n" , |
413 | th_gtid)); |
414 | // Mark the thread as no longer active (only in the first iteration of the |
415 | // loop). |
416 | if (!deactivated) { |
417 | th->th.th_active = FALSE; |
418 | if (th->th.th_active_in_pool) { |
419 | th->th.th_active_in_pool = FALSE; |
420 | KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); |
421 | KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); |
422 | } |
423 | deactivated = TRUE; |
424 | } |
425 | |
426 | KMP_DEBUG_ASSERT(th->th.th_sleep_loc); |
427 | KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type()); |
428 | |
429 | __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th, |
430 | flag); |
431 | |
432 | #ifdef KMP_DEBUG |
433 | if (flag->is_sleeping()) { |
434 | KF_TRACE(100, |
435 | ("__kmp_suspend_template: T#%d spurious wakeup\n" , th_gtid)); |
436 | } |
437 | #endif /* KMP_DEBUG */ |
438 | |
439 | } // while |
440 | |
441 | // We may have had the loop variable set before entering the loop body; |
442 | // so we need to reset sleep_loc. |
443 | TCW_PTR(th->th.th_sleep_loc, NULL); |
444 | th->th.th_sleep_loc_type = flag_unset; |
445 | |
446 | KMP_DEBUG_ASSERT(!flag->is_sleeping()); |
447 | KMP_DEBUG_ASSERT(!th->th.th_sleep_loc); |
448 | |
449 | // Mark the thread as active again (if it was previous marked as inactive) |
450 | if (deactivated) { |
451 | th->th.th_active = TRUE; |
452 | if (TCR_4(th->th.th_in_pool)) { |
453 | KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); |
454 | th->th.th_active_in_pool = TRUE; |
455 | } |
456 | } |
457 | } |
458 | |
459 | __kmp_unlock_suspend_mx(th); |
460 | KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n" , th_gtid)); |
461 | } |
462 | |
463 | template <bool C, bool S> |
464 | void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) { |
465 | __kmp_suspend_template(th_gtid, flag); |
466 | } |
467 | template <bool C, bool S> |
468 | void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) { |
469 | __kmp_suspend_template(th_gtid, flag); |
470 | } |
471 | template <bool C, bool S> |
472 | void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) { |
473 | __kmp_suspend_template(th_gtid, flag); |
474 | } |
475 | void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { |
476 | __kmp_suspend_template(th_gtid, flag); |
477 | } |
478 | |
479 | template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *); |
480 | template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *); |
481 | template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *); |
482 | template void |
483 | __kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *); |
484 | template void |
485 | __kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *); |
486 | |
487 | /* This routine signals the thread specified by target_gtid to wake up |
488 | after setting the sleep bit indicated by the flag argument to FALSE */ |
489 | template <class C> |
490 | static inline void __kmp_resume_template(int target_gtid, C *flag) { |
491 | kmp_info_t *th = __kmp_threads[target_gtid]; |
492 | |
493 | #ifdef KMP_DEBUG |
494 | int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; |
495 | #endif |
496 | |
497 | KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n" , |
498 | gtid, target_gtid)); |
499 | |
500 | __kmp_suspend_initialize_thread(th); |
501 | __kmp_lock_suspend_mx(th); |
502 | |
503 | if (!flag || flag != th->th.th_sleep_loc) { |
504 | // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a |
505 | // different location; wake up at new location |
506 | flag = (C *)th->th.th_sleep_loc; |
507 | } |
508 | |
509 | // First, check if the flag is null or its type has changed. If so, someone |
510 | // else woke it up. |
511 | if (!flag || flag->get_type() != th->th.th_sleep_loc_type) { |
512 | // simply shows what flag was cast to |
513 | KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " |
514 | "awake: flag's loc(%p)\n" , |
515 | gtid, target_gtid, NULL)); |
516 | __kmp_unlock_suspend_mx(th); |
517 | return; |
518 | } else { |
519 | if (!flag->is_sleeping()) { |
520 | KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " |
521 | "awake: flag's loc(%p): %u\n" , |
522 | gtid, target_gtid, flag->get(), (unsigned int)flag->load())); |
523 | __kmp_unlock_suspend_mx(th); |
524 | return; |
525 | } |
526 | } |
527 | KMP_DEBUG_ASSERT(flag); |
528 | flag->unset_sleeping(); |
529 | TCW_PTR(th->th.th_sleep_loc, NULL); |
530 | th->th.th_sleep_loc_type = flag_unset; |
531 | |
532 | KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep " |
533 | "bit for flag's loc(%p)\n" , |
534 | gtid, target_gtid, flag->get())); |
535 | |
536 | __kmp_win32_cond_signal(&th->th.th_suspend_cv); |
537 | __kmp_unlock_suspend_mx(th); |
538 | |
539 | KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" |
540 | " for T#%d\n" , |
541 | gtid, target_gtid)); |
542 | } |
543 | |
544 | template <bool C, bool S> |
545 | void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) { |
546 | __kmp_resume_template(target_gtid, flag); |
547 | } |
548 | template <bool C, bool S> |
549 | void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) { |
550 | __kmp_resume_template(target_gtid, flag); |
551 | } |
552 | template <bool C, bool S> |
553 | void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) { |
554 | __kmp_resume_template(target_gtid, flag); |
555 | } |
556 | void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { |
557 | __kmp_resume_template(target_gtid, flag); |
558 | } |
559 | |
560 | template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *); |
561 | template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *); |
562 | template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *); |
563 | template void |
564 | __kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *); |
565 | |
566 | void __kmp_yield() { Sleep(0); } |
567 | |
568 | void __kmp_gtid_set_specific(int gtid) { |
569 | if (__kmp_init_gtid) { |
570 | KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n" , gtid, |
571 | __kmp_gtid_threadprivate_key)); |
572 | kmp_intptr_t g = (kmp_intptr_t)gtid; |
573 | if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(g + 1))) |
574 | KMP_FATAL(TLSSetValueFailed); |
575 | } else { |
576 | KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n" )); |
577 | } |
578 | } |
579 | |
580 | int __kmp_gtid_get_specific() { |
581 | int gtid; |
582 | if (!__kmp_init_gtid) { |
583 | KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning " |
584 | "KMP_GTID_SHUTDOWN\n" )); |
585 | return KMP_GTID_SHUTDOWN; |
586 | } |
587 | gtid = (int)(kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key); |
588 | if (gtid == 0) { |
589 | gtid = KMP_GTID_DNE; |
590 | } else { |
591 | gtid--; |
592 | } |
593 | KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n" , |
594 | __kmp_gtid_threadprivate_key, gtid)); |
595 | return gtid; |
596 | } |
597 | |
598 | void __kmp_affinity_bind_thread(int proc) { |
599 | if (__kmp_num_proc_groups > 1) { |
600 | // Form the GROUP_AFFINITY struct directly, rather than filling |
601 | // out a bit vector and calling __kmp_set_system_affinity(). |
602 | GROUP_AFFINITY ga; |
603 | KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups * CHAR_BIT * |
604 | sizeof(DWORD_PTR)))); |
605 | ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); |
606 | ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); |
607 | ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; |
608 | |
609 | KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); |
610 | if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { |
611 | DWORD error = GetLastError(); |
612 | // AC: continue silently if not verbose |
613 | if (__kmp_affinity.flags.verbose) { |
614 | kmp_msg_t err_code = KMP_ERR(error); |
615 | __kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code, |
616 | __kmp_msg_null); |
617 | if (__kmp_generate_warnings == kmp_warnings_off) { |
618 | __kmp_str_free(str: &err_code.str); |
619 | } |
620 | } |
621 | } |
622 | } else { |
623 | kmp_affin_mask_t *mask; |
624 | KMP_CPU_ALLOC_ON_STACK(mask); |
625 | KMP_CPU_ZERO(mask); |
626 | KMP_CPU_SET(proc, mask); |
627 | __kmp_set_system_affinity(mask, TRUE); |
628 | KMP_CPU_FREE_FROM_STACK(mask); |
629 | } |
630 | } |
631 | |
632 | void __kmp_affinity_determine_capable(const char *env_var) { |
633 | // All versions of Windows* OS (since Win '95) support |
634 | // SetThreadAffinityMask(). |
635 | |
636 | #if KMP_GROUP_AFFINITY |
637 | KMP_AFFINITY_ENABLE(__kmp_num_proc_groups * sizeof(DWORD_PTR)); |
638 | #else |
639 | KMP_AFFINITY_ENABLE(sizeof(DWORD_PTR)); |
640 | #endif |
641 | |
642 | KA_TRACE(10, ("__kmp_affinity_determine_capable: " |
643 | "Windows* OS affinity interface functional (mask size = " |
644 | "%" KMP_SIZE_T_SPEC ").\n" , |
645 | __kmp_affin_mask_size)); |
646 | } |
647 | |
648 | double __kmp_read_cpu_time(void) { |
649 | FILETIME CreationTime, ExitTime, KernelTime, UserTime; |
650 | int status; |
651 | double cpu_time; |
652 | |
653 | cpu_time = 0; |
654 | |
655 | status = GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime, |
656 | &KernelTime, &UserTime); |
657 | |
658 | if (status) { |
659 | double sec = 0; |
660 | |
661 | sec += KernelTime.dwHighDateTime; |
662 | sec += UserTime.dwHighDateTime; |
663 | |
664 | /* Shift left by 32 bits */ |
665 | sec *= (double)(1 << 16) * (double)(1 << 16); |
666 | |
667 | sec += KernelTime.dwLowDateTime; |
668 | sec += UserTime.dwLowDateTime; |
669 | |
670 | cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; |
671 | } |
672 | |
673 | return cpu_time; |
674 | } |
675 | |
676 | int __kmp_read_system_info(struct kmp_sys_info *info) { |
677 | info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ |
678 | info->minflt = 0; /* the number of page faults serviced without any I/O */ |
679 | info->majflt = 0; /* the number of page faults serviced that required I/O */ |
680 | info->nswap = 0; // the number of times a process was "swapped" out of memory |
681 | info->inblock = 0; // the number of times the file system had to perform input |
682 | info->oublock = 0; // number of times the file system had to perform output |
683 | info->nvcsw = 0; /* the number of times a context switch was voluntarily */ |
684 | info->nivcsw = 0; /* the number of times a context switch was forced */ |
685 | |
686 | return 1; |
687 | } |
688 | |
689 | void __kmp_runtime_initialize(void) { |
690 | SYSTEM_INFO info; |
691 | kmp_str_buf_t path; |
692 | UINT path_size; |
693 | |
694 | if (__kmp_init_runtime) { |
695 | return; |
696 | } |
697 | |
698 | #if KMP_DYNAMIC_LIB |
699 | /* Pin dynamic library for the lifetime of application */ |
700 | { |
701 | // First, turn off error message boxes |
702 | UINT err_mode = SetErrorMode(SEM_FAILCRITICALERRORS); |
703 | HMODULE h; |
704 | BOOL ret = GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | |
705 | GET_MODULE_HANDLE_EX_FLAG_PIN, |
706 | (LPCTSTR)&__kmp_serial_initialize, &h); |
707 | (void)ret; |
708 | KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded" ); |
709 | SetErrorMode(err_mode); // Restore error mode |
710 | KA_TRACE(10, ("__kmp_runtime_initialize: dynamic library pinned\n" )); |
711 | } |
712 | #endif |
713 | |
714 | InitializeCriticalSection(&__kmp_win32_section); |
715 | #if USE_ITT_BUILD |
716 | __kmp_itt_system_object_created(&__kmp_win32_section, "Critical Section" ); |
717 | #endif /* USE_ITT_BUILD */ |
718 | __kmp_initialize_system_tick(); |
719 | |
720 | #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) |
721 | if (!__kmp_cpuinfo.initialized) { |
722 | __kmp_query_cpuid(p: &__kmp_cpuinfo); |
723 | } |
724 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
725 | |
726 | /* Set up minimum number of threads to switch to TLS gtid */ |
727 | #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB |
728 | // Windows* OS, static library. |
729 | /* New thread may use stack space previously used by another thread, |
730 | currently terminated. On Windows* OS, in case of static linking, we do not |
731 | know the moment of thread termination, and our structures (__kmp_threads |
732 | and __kmp_root arrays) are still keep info about dead threads. This leads |
733 | to problem in __kmp_get_global_thread_id() function: it wrongly finds gtid |
734 | (by searching through stack addresses of all known threads) for |
735 | unregistered foreign tread. |
736 | |
737 | Setting __kmp_tls_gtid_min to 0 workarounds this problem: |
738 | __kmp_get_global_thread_id() does not search through stacks, but get gtid |
739 | from TLS immediately. |
740 | --ln |
741 | */ |
742 | __kmp_tls_gtid_min = 0; |
743 | #else |
744 | __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; |
745 | #endif |
746 | |
747 | /* for the static library */ |
748 | if (!__kmp_gtid_threadprivate_key) { |
749 | __kmp_gtid_threadprivate_key = TlsAlloc(); |
750 | if (__kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES) { |
751 | KMP_FATAL(TLSOutOfIndexes); |
752 | } |
753 | } |
754 | |
755 | // Load ntdll.dll. |
756 | /* Simple GetModuleHandle( "ntdll.dl" ) is not suitable due to security issue |
757 | (see http://www.microsoft.com/technet/security/advisory/2269637.mspx). We |
758 | have to specify full path to the library. */ |
759 | __kmp_str_buf_init(&path); |
760 | path_size = GetSystemDirectory(path.str, path.size); |
761 | KMP_DEBUG_ASSERT(path_size > 0); |
762 | if (path_size >= path.size) { |
763 | // Buffer is too short. Expand the buffer and try again. |
764 | __kmp_str_buf_reserve(&path, path_size); |
765 | path_size = GetSystemDirectory(path.str, path.size); |
766 | KMP_DEBUG_ASSERT(path_size > 0); |
767 | } |
768 | if (path_size > 0 && path_size < path.size) { |
769 | // Now we have system directory name in the buffer. |
770 | // Append backslash and name of dll to form full path, |
771 | path.used = path_size; |
772 | __kmp_str_buf_print(buffer: &path, format: "\\%s" , "ntdll.dll" ); |
773 | |
774 | // Now load ntdll using full path. |
775 | ntdll = GetModuleHandle(path.str); |
776 | } |
777 | |
778 | KMP_DEBUG_ASSERT(ntdll != NULL); |
779 | if (ntdll != NULL) { |
780 | NtQuerySystemInformation = (NtQuerySystemInformation_t)GetProcAddress( |
781 | ntdll, "NtQuerySystemInformation" ); |
782 | } |
783 | KMP_DEBUG_ASSERT(NtQuerySystemInformation != NULL); |
784 | |
785 | #if KMP_GROUP_AFFINITY |
786 | // Load kernel32.dll. |
787 | // Same caveat - must use full system path name. |
788 | if (path_size > 0 && path_size < path.size) { |
789 | // Truncate the buffer back to just the system path length, |
790 | // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". |
791 | path.used = path_size; |
792 | __kmp_str_buf_print(&path, "\\%s" , "kernel32.dll" ); |
793 | |
794 | // Load kernel32.dll using full path. |
795 | kernel32 = GetModuleHandle(path.str); |
796 | KA_TRACE(10, ("__kmp_runtime_initialize: kernel32.dll = %s\n" , path.str)); |
797 | |
798 | // Load the function pointers to kernel32.dll routines |
799 | // that may or may not exist on this system. |
800 | if (kernel32 != NULL) { |
801 | __kmp_GetActiveProcessorCount = |
802 | (kmp_GetActiveProcessorCount_t)GetProcAddress( |
803 | kernel32, "GetActiveProcessorCount" ); |
804 | __kmp_GetActiveProcessorGroupCount = |
805 | (kmp_GetActiveProcessorGroupCount_t)GetProcAddress( |
806 | kernel32, "GetActiveProcessorGroupCount" ); |
807 | __kmp_GetThreadGroupAffinity = |
808 | (kmp_GetThreadGroupAffinity_t)GetProcAddress( |
809 | kernel32, "GetThreadGroupAffinity" ); |
810 | __kmp_SetThreadGroupAffinity = |
811 | (kmp_SetThreadGroupAffinity_t)GetProcAddress( |
812 | kernel32, "SetThreadGroupAffinity" ); |
813 | |
814 | KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount" |
815 | " = %p\n" , |
816 | __kmp_GetActiveProcessorCount)); |
817 | KA_TRACE(10, ("__kmp_runtime_initialize: " |
818 | "__kmp_GetActiveProcessorGroupCount = %p\n" , |
819 | __kmp_GetActiveProcessorGroupCount)); |
820 | KA_TRACE(10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity" |
821 | " = %p\n" , |
822 | __kmp_GetThreadGroupAffinity)); |
823 | KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity" |
824 | " = %p\n" , |
825 | __kmp_SetThreadGroupAffinity)); |
826 | KA_TRACE(10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n" , |
827 | sizeof(kmp_affin_mask_t))); |
828 | |
829 | // See if group affinity is supported on this system. |
830 | // If so, calculate the #groups and #procs. |
831 | // |
832 | // Group affinity was introduced with Windows* 7 OS and |
833 | // Windows* Server 2008 R2 OS. |
834 | if ((__kmp_GetActiveProcessorCount != NULL) && |
835 | (__kmp_GetActiveProcessorGroupCount != NULL) && |
836 | (__kmp_GetThreadGroupAffinity != NULL) && |
837 | (__kmp_SetThreadGroupAffinity != NULL) && |
838 | ((__kmp_num_proc_groups = __kmp_GetActiveProcessorGroupCount()) > |
839 | 1)) { |
840 | // Calculate the total number of active OS procs. |
841 | int i; |
842 | |
843 | KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" |
844 | " detected\n" , |
845 | __kmp_num_proc_groups)); |
846 | |
847 | __kmp_xproc = 0; |
848 | |
849 | for (i = 0; i < __kmp_num_proc_groups; i++) { |
850 | DWORD size = __kmp_GetActiveProcessorCount(i); |
851 | __kmp_xproc += size; |
852 | KA_TRACE(10, ("__kmp_runtime_initialize: proc group %d size = %d\n" , |
853 | i, size)); |
854 | } |
855 | } else { |
856 | KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" |
857 | " detected\n" , |
858 | __kmp_num_proc_groups)); |
859 | } |
860 | } |
861 | } |
862 | if (__kmp_num_proc_groups <= 1) { |
863 | GetSystemInfo(&info); |
864 | __kmp_xproc = info.dwNumberOfProcessors; |
865 | } |
866 | #else |
867 | (void)kernel32; |
868 | GetSystemInfo(&info); |
869 | __kmp_xproc = info.dwNumberOfProcessors; |
870 | #endif /* KMP_GROUP_AFFINITY */ |
871 | |
872 | // If the OS said there were 0 procs, take a guess and use a value of 2. |
873 | // This is done for Linux* OS, also. Do we need error / warning? |
874 | if (__kmp_xproc <= 0) { |
875 | __kmp_xproc = 2; |
876 | } |
877 | |
878 | KA_TRACE(5, |
879 | ("__kmp_runtime_initialize: total processors = %d\n" , __kmp_xproc)); |
880 | |
881 | __kmp_str_buf_free(buffer: &path); |
882 | |
883 | #if USE_ITT_BUILD |
884 | __kmp_itt_initialize(); |
885 | #endif /* USE_ITT_BUILD */ |
886 | |
887 | __kmp_init_runtime = TRUE; |
888 | } // __kmp_runtime_initialize |
889 | |
890 | void __kmp_runtime_destroy(void) { |
891 | if (!__kmp_init_runtime) { |
892 | return; |
893 | } |
894 | |
895 | #if USE_ITT_BUILD |
896 | __kmp_itt_destroy(); |
897 | #endif /* USE_ITT_BUILD */ |
898 | |
899 | /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ |
900 | /* due to the KX_TRACE() commands */ |
901 | KA_TRACE(40, ("__kmp_runtime_destroy\n" )); |
902 | |
903 | if (__kmp_gtid_threadprivate_key) { |
904 | TlsFree(__kmp_gtid_threadprivate_key); |
905 | __kmp_gtid_threadprivate_key = 0; |
906 | } |
907 | |
908 | __kmp_affinity_uninitialize(); |
909 | DeleteCriticalSection(&__kmp_win32_section); |
910 | |
911 | ntdll = NULL; |
912 | NtQuerySystemInformation = NULL; |
913 | |
914 | #if KMP_ARCH_X86_64 |
915 | kernel32 = NULL; |
916 | __kmp_GetActiveProcessorCount = NULL; |
917 | __kmp_GetActiveProcessorGroupCount = NULL; |
918 | __kmp_GetThreadGroupAffinity = NULL; |
919 | __kmp_SetThreadGroupAffinity = NULL; |
920 | #endif // KMP_ARCH_X86_64 |
921 | |
922 | __kmp_init_runtime = FALSE; |
923 | } |
924 | |
925 | void __kmp_terminate_thread(int gtid) { |
926 | kmp_info_t *th = __kmp_threads[gtid]; |
927 | |
928 | if (!th) |
929 | return; |
930 | |
931 | KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n" , gtid)); |
932 | |
933 | if (TerminateThread(th->th.th_info.ds.ds_thread, (DWORD)-1) == FALSE) { |
934 | /* It's OK, the thread may have exited already */ |
935 | } |
936 | __kmp_free_handle(th->th.th_info.ds.ds_thread); |
937 | } |
938 | |
939 | void __kmp_clear_system_time(void) { |
940 | LARGE_INTEGER time; |
941 | QueryPerformanceCounter(&time); |
942 | __kmp_win32_time = (kmp_int64)time.QuadPart; |
943 | } |
944 | |
945 | void __kmp_initialize_system_tick(void) { |
946 | { |
947 | BOOL status; |
948 | LARGE_INTEGER freq; |
949 | |
950 | status = QueryPerformanceFrequency(&freq); |
951 | if (!status) { |
952 | DWORD error = GetLastError(); |
953 | __kmp_fatal(KMP_MSG(FunctionError, "QueryPerformanceFrequency()" ), |
954 | KMP_ERR(error), __kmp_msg_null); |
955 | |
956 | } else { |
957 | __kmp_win32_tick = ((double)1.0) / (double)freq.QuadPart; |
958 | } |
959 | } |
960 | } |
961 | |
962 | /* Calculate the elapsed wall clock time for the user */ |
963 | |
964 | void __kmp_elapsed(double *t) { |
965 | LARGE_INTEGER now; |
966 | QueryPerformanceCounter(&now); |
967 | *t = ((double)now.QuadPart) * __kmp_win32_tick; |
968 | } |
969 | |
970 | /* Calculate the elapsed wall clock tick for the user */ |
971 | |
972 | void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; } |
973 | |
974 | void __kmp_read_system_time(double *delta) { |
975 | if (delta != NULL) { |
976 | LARGE_INTEGER now; |
977 | QueryPerformanceCounter(&now); |
978 | *delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) * |
979 | __kmp_win32_tick; |
980 | } |
981 | } |
982 | |
983 | /* Return the current time stamp in nsec */ |
984 | kmp_uint64 __kmp_now_nsec() { |
985 | LARGE_INTEGER now; |
986 | QueryPerformanceCounter(&now); |
987 | return 1e9 * __kmp_win32_tick * now.QuadPart; |
988 | } |
989 | |
990 | extern "C" void *__stdcall __kmp_launch_worker(void *arg) { |
991 | volatile void *stack_data; |
992 | void *exit_val; |
993 | void *padding = 0; |
994 | kmp_info_t *this_thr = (kmp_info_t *)arg; |
995 | int gtid; |
996 | |
997 | gtid = this_thr->th.th_info.ds.ds_gtid; |
998 | __kmp_gtid_set_specific(gtid); |
999 | #ifdef KMP_TDATA_GTID |
1000 | #error "This define causes problems with LoadLibrary() + declspec(thread) " \ |
1001 | "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ |
1002 | "reference: http://support.microsoft.com/kb/118816" |
1003 | //__kmp_gtid = gtid; |
1004 | #endif |
1005 | |
1006 | #if USE_ITT_BUILD |
1007 | __kmp_itt_thread_name(gtid); |
1008 | #endif /* USE_ITT_BUILD */ |
1009 | |
1010 | __kmp_affinity_bind_init_mask(gtid); |
1011 | |
1012 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1013 | // Set FP control regs to be a copy of the parallel initialization thread's. |
1014 | __kmp_clear_x87_fpu_status_word(); |
1015 | __kmp_load_x87_fpu_control_word(p: &__kmp_init_x87_fpu_control_word); |
1016 | __kmp_load_mxcsr(p: &__kmp_init_mxcsr); |
1017 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1018 | |
1019 | if (__kmp_stkoffset > 0 && gtid > 0) { |
1020 | padding = KMP_ALLOCA(gtid * __kmp_stkoffset); |
1021 | (void)padding; |
1022 | } |
1023 | |
1024 | KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); |
1025 | this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); |
1026 | TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); |
1027 | |
1028 | if (TCR_4(__kmp_gtid_mode) < |
1029 | 2) { // check stack only if it is used to get gtid |
1030 | TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); |
1031 | KMP_ASSERT(this_thr->th.th_info.ds.ds_stackgrow == FALSE); |
1032 | __kmp_check_stack_overlap(thr: this_thr); |
1033 | } |
1034 | KMP_MB(); |
1035 | exit_val = __kmp_launch_thread(thr: this_thr); |
1036 | KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); |
1037 | TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); |
1038 | KMP_MB(); |
1039 | return exit_val; |
1040 | } |
1041 | |
1042 | #if KMP_USE_MONITOR |
1043 | /* The monitor thread controls all of the threads in the complex */ |
1044 | |
1045 | void *__stdcall __kmp_launch_monitor(void *arg) { |
1046 | DWORD wait_status; |
1047 | kmp_thread_t monitor; |
1048 | int status; |
1049 | int interval; |
1050 | kmp_info_t *this_thr = (kmp_info_t *)arg; |
1051 | |
1052 | KMP_DEBUG_ASSERT(__kmp_init_monitor); |
1053 | TCW_4(__kmp_init_monitor, 2); // AC: Signal library that monitor has started |
1054 | // TODO: hide "2" in enum (like {true,false,started}) |
1055 | this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); |
1056 | TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); |
1057 | |
1058 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1059 | KA_TRACE(10, ("__kmp_launch_monitor: launched\n" )); |
1060 | |
1061 | monitor = GetCurrentThread(); |
1062 | |
1063 | /* set thread priority */ |
1064 | status = SetThreadPriority(monitor, THREAD_PRIORITY_HIGHEST); |
1065 | if (!status) { |
1066 | DWORD error = GetLastError(); |
1067 | __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null); |
1068 | } |
1069 | |
1070 | /* register us as monitor */ |
1071 | __kmp_gtid_set_specific(KMP_GTID_MONITOR); |
1072 | #ifdef KMP_TDATA_GTID |
1073 | #error "This define causes problems with LoadLibrary() + declspec(thread) " \ |
1074 | "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ |
1075 | "reference: http://support.microsoft.com/kb/118816" |
1076 | //__kmp_gtid = KMP_GTID_MONITOR; |
1077 | #endif |
1078 | |
1079 | #if USE_ITT_BUILD |
1080 | __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore |
1081 | // monitor thread. |
1082 | #endif /* USE_ITT_BUILD */ |
1083 | |
1084 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1085 | |
1086 | interval = (1000 / __kmp_monitor_wakeups); /* in milliseconds */ |
1087 | |
1088 | while (!TCR_4(__kmp_global.g.g_done)) { |
1089 | /* This thread monitors the state of the system */ |
1090 | |
1091 | KA_TRACE(15, ("__kmp_launch_monitor: update\n" )); |
1092 | |
1093 | wait_status = WaitForSingleObject(__kmp_monitor_ev, interval); |
1094 | |
1095 | if (wait_status == WAIT_TIMEOUT) { |
1096 | TCW_4(__kmp_global.g.g_time.dt.t_value, |
1097 | TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); |
1098 | } |
1099 | |
1100 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1101 | } |
1102 | |
1103 | KA_TRACE(10, ("__kmp_launch_monitor: finished\n" )); |
1104 | |
1105 | status = SetThreadPriority(monitor, THREAD_PRIORITY_NORMAL); |
1106 | if (!status) { |
1107 | DWORD error = GetLastError(); |
1108 | __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null); |
1109 | } |
1110 | |
1111 | if (__kmp_global.g.g_abort != 0) { |
1112 | /* now we need to terminate the worker threads */ |
1113 | /* the value of t_abort is the signal we caught */ |
1114 | int gtid; |
1115 | |
1116 | KA_TRACE(10, ("__kmp_launch_monitor: terminate sig=%d\n" , |
1117 | (__kmp_global.g.g_abort))); |
1118 | |
1119 | /* terminate the OpenMP worker threads */ |
1120 | /* TODO this is not valid for sibling threads!! |
1121 | * the uber master might not be 0 anymore.. */ |
1122 | for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) |
1123 | __kmp_terminate_thread(gtid); |
1124 | |
1125 | __kmp_cleanup(); |
1126 | |
1127 | Sleep(0); |
1128 | |
1129 | KA_TRACE(10, |
1130 | ("__kmp_launch_monitor: raise sig=%d\n" , __kmp_global.g.g_abort)); |
1131 | |
1132 | if (__kmp_global.g.g_abort > 0) { |
1133 | raise(__kmp_global.g.g_abort); |
1134 | } |
1135 | } |
1136 | |
1137 | TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); |
1138 | |
1139 | KMP_MB(); |
1140 | return arg; |
1141 | } |
1142 | #endif |
1143 | |
1144 | void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { |
1145 | kmp_thread_t handle; |
1146 | DWORD idThread; |
1147 | |
1148 | KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n" , gtid)); |
1149 | |
1150 | th->th.th_info.ds.ds_gtid = gtid; |
1151 | |
1152 | if (KMP_UBER_GTID(gtid)) { |
1153 | int stack_data; |
1154 | |
1155 | /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for |
1156 | other threads to use. Is it appropriate to just use GetCurrentThread? |
1157 | When should we close this handle? When unregistering the root? */ |
1158 | { |
1159 | BOOL rc; |
1160 | rc = DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), |
1161 | GetCurrentProcess(), &th->th.th_info.ds.ds_thread, 0, |
1162 | FALSE, DUPLICATE_SAME_ACCESS); |
1163 | KMP_ASSERT(rc); |
1164 | KA_TRACE(10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, " |
1165 | "handle = %" KMP_UINTPTR_SPEC "\n" , |
1166 | (LPVOID)th, th->th.th_info.ds.ds_thread)); |
1167 | th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); |
1168 | } |
1169 | if (TCR_4(__kmp_gtid_mode) < 2) { // check stack only if used to get gtid |
1170 | /* we will dynamically update the stack range if gtid_mode == 1 */ |
1171 | TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); |
1172 | TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); |
1173 | TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); |
1174 | __kmp_check_stack_overlap(thr: th); |
1175 | } |
1176 | } else { |
1177 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1178 | |
1179 | /* Set stack size for this thread now. */ |
1180 | KA_TRACE(10, |
1181 | ("__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC " bytes\n" , |
1182 | stack_size)); |
1183 | |
1184 | stack_size += gtid * __kmp_stkoffset; |
1185 | |
1186 | TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); |
1187 | TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); |
1188 | |
1189 | KA_TRACE(10, |
1190 | ("__kmp_create_worker: (before) stack_size = %" KMP_SIZE_T_SPEC |
1191 | " bytes, &__kmp_launch_worker = %p, th = %p, &idThread = %p\n" , |
1192 | (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, |
1193 | (LPVOID)th, &idThread)); |
1194 | |
1195 | handle = CreateThread( |
1196 | NULL, (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)__kmp_launch_worker, |
1197 | (LPVOID)th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); |
1198 | |
1199 | KA_TRACE(10, |
1200 | ("__kmp_create_worker: (after) stack_size = %" KMP_SIZE_T_SPEC |
1201 | " bytes, &__kmp_launch_worker = %p, th = %p, " |
1202 | "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n" , |
1203 | (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, |
1204 | (LPVOID)th, idThread, handle)); |
1205 | |
1206 | if (handle == 0) { |
1207 | DWORD error = GetLastError(); |
1208 | __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null); |
1209 | } else { |
1210 | th->th.th_info.ds.ds_thread = handle; |
1211 | } |
1212 | |
1213 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1214 | } |
1215 | |
1216 | KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n" , gtid)); |
1217 | } |
1218 | |
1219 | int __kmp_still_running(kmp_info_t *th) { |
1220 | return (WAIT_TIMEOUT == WaitForSingleObject(th->th.th_info.ds.ds_thread, 0)); |
1221 | } |
1222 | |
1223 | #if KMP_USE_MONITOR |
1224 | void __kmp_create_monitor(kmp_info_t *th) { |
1225 | kmp_thread_t handle; |
1226 | DWORD idThread; |
1227 | int ideal, new_ideal; |
1228 | |
1229 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { |
1230 | // We don't need monitor thread in case of MAX_BLOCKTIME |
1231 | KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of " |
1232 | "MAX blocktime\n" )); |
1233 | th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op |
1234 | th->th.th_info.ds.ds_gtid = 0; |
1235 | TCW_4(__kmp_init_monitor, 2); // Signal to stop waiting for monitor creation |
1236 | return; |
1237 | } |
1238 | KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n" )); |
1239 | |
1240 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1241 | |
1242 | __kmp_monitor_ev = CreateEvent(NULL, TRUE, FALSE, NULL); |
1243 | if (__kmp_monitor_ev == NULL) { |
1244 | DWORD error = GetLastError(); |
1245 | __kmp_fatal(KMP_MSG(CantCreateEvent), KMP_ERR(error), __kmp_msg_null); |
1246 | } |
1247 | #if USE_ITT_BUILD |
1248 | __kmp_itt_system_object_created(__kmp_monitor_ev, "Event" ); |
1249 | #endif /* USE_ITT_BUILD */ |
1250 | |
1251 | th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; |
1252 | th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; |
1253 | |
1254 | // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how |
1255 | // to automatically expand stacksize based on CreateThread error code. |
1256 | if (__kmp_monitor_stksize == 0) { |
1257 | __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; |
1258 | } |
1259 | if (__kmp_monitor_stksize < __kmp_sys_min_stksize) { |
1260 | __kmp_monitor_stksize = __kmp_sys_min_stksize; |
1261 | } |
1262 | |
1263 | KA_TRACE(10, ("__kmp_create_monitor: requested stacksize = %d bytes\n" , |
1264 | (int)__kmp_monitor_stksize)); |
1265 | |
1266 | TCW_4(__kmp_global.g.g_time.dt.t_value, 0); |
1267 | |
1268 | handle = |
1269 | CreateThread(NULL, (SIZE_T)__kmp_monitor_stksize, |
1270 | (LPTHREAD_START_ROUTINE)__kmp_launch_monitor, (LPVOID)th, |
1271 | STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); |
1272 | if (handle == 0) { |
1273 | DWORD error = GetLastError(); |
1274 | __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null); |
1275 | } else |
1276 | th->th.th_info.ds.ds_thread = handle; |
1277 | |
1278 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1279 | |
1280 | KA_TRACE(10, ("__kmp_create_monitor: monitor created %p\n" , |
1281 | (void *)th->th.th_info.ds.ds_thread)); |
1282 | } |
1283 | #endif |
1284 | |
1285 | /* Check to see if thread is still alive. |
1286 | NOTE: The ExitProcess(code) system call causes all threads to Terminate |
1287 | with a exit_val = code. Because of this we can not rely on exit_val having |
1288 | any particular value. So this routine may return STILL_ALIVE in exit_val |
1289 | even after the thread is dead. */ |
1290 | |
1291 | int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val) { |
1292 | DWORD rc; |
1293 | rc = GetExitCodeThread(th->th.th_info.ds.ds_thread, exit_val); |
1294 | if (rc == 0) { |
1295 | DWORD error = GetLastError(); |
1296 | __kmp_fatal(KMP_MSG(FunctionError, "GetExitCodeThread()" ), KMP_ERR(error), |
1297 | __kmp_msg_null); |
1298 | } |
1299 | return (*exit_val == STILL_ACTIVE); |
1300 | } |
1301 | |
1302 | void __kmp_exit_thread(int exit_status) { |
1303 | ExitThread(exit_status); |
1304 | } // __kmp_exit_thread |
1305 | |
1306 | // This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). |
1307 | static void __kmp_reap_common(kmp_info_t *th) { |
1308 | DWORD exit_val; |
1309 | |
1310 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1311 | |
1312 | KA_TRACE( |
1313 | 10, ("__kmp_reap_common: try to reap (%d)\n" , th->th.th_info.ds.ds_gtid)); |
1314 | |
1315 | /* 2006-10-19: |
1316 | There are two opposite situations: |
1317 | 1. Windows* OS keep thread alive after it resets ds_alive flag and |
1318 | exits from thread function. (For example, see C70770/Q394281 "unloading of |
1319 | dll based on OMP is very slow".) |
1320 | 2. Windows* OS may kill thread before it resets ds_alive flag. |
1321 | |
1322 | Right solution seems to be waiting for *either* thread termination *or* |
1323 | ds_alive resetting. */ |
1324 | { |
1325 | // TODO: This code is very similar to KMP_WAIT. Need to generalize |
1326 | // KMP_WAIT to cover this usage also. |
1327 | void *obj = NULL; |
1328 | kmp_uint32 spins; |
1329 | kmp_uint64 time; |
1330 | #if USE_ITT_BUILD |
1331 | KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive); |
1332 | #endif /* USE_ITT_BUILD */ |
1333 | KMP_INIT_YIELD(spins); |
1334 | KMP_INIT_BACKOFF(time); |
1335 | do { |
1336 | #if USE_ITT_BUILD |
1337 | KMP_FSYNC_SPIN_PREPARE(obj); |
1338 | #endif /* USE_ITT_BUILD */ |
1339 | __kmp_is_thread_alive(th, &exit_val); |
1340 | KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); |
1341 | } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive)); |
1342 | #if USE_ITT_BUILD |
1343 | if (exit_val == STILL_ACTIVE) { |
1344 | KMP_FSYNC_CANCEL(obj); |
1345 | } else { |
1346 | KMP_FSYNC_SPIN_ACQUIRED(obj); |
1347 | } |
1348 | #endif /* USE_ITT_BUILD */ |
1349 | } |
1350 | |
1351 | __kmp_free_handle(th->th.th_info.ds.ds_thread); |
1352 | |
1353 | /* NOTE: The ExitProcess(code) system call causes all threads to Terminate |
1354 | with a exit_val = code. Because of this we can not rely on exit_val having |
1355 | any particular value. */ |
1356 | kmp_intptr_t e = (kmp_intptr_t)exit_val; |
1357 | if (exit_val == STILL_ACTIVE) { |
1358 | KA_TRACE(1, ("__kmp_reap_common: thread still active.\n" )); |
1359 | } else if ((void *)e != (void *)th) { |
1360 | KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n" )); |
1361 | } |
1362 | |
1363 | KA_TRACE(10, |
1364 | ("__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC |
1365 | "\n" , |
1366 | th->th.th_info.ds.ds_gtid, th->th.th_info.ds.ds_thread)); |
1367 | |
1368 | th->th.th_info.ds.ds_thread = 0; |
1369 | th->th.th_info.ds.ds_tid = KMP_GTID_DNE; |
1370 | th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; |
1371 | th->th.th_info.ds.ds_thread_id = 0; |
1372 | |
1373 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1374 | } |
1375 | |
1376 | #if KMP_USE_MONITOR |
1377 | void __kmp_reap_monitor(kmp_info_t *th) { |
1378 | int status; |
1379 | |
1380 | KA_TRACE(10, ("__kmp_reap_monitor: try to reap %p\n" , |
1381 | (void *)th->th.th_info.ds.ds_thread)); |
1382 | |
1383 | // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. |
1384 | // If both tid and gtid are 0, it means the monitor did not ever start. |
1385 | // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. |
1386 | KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid); |
1387 | if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) { |
1388 | KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n" )); |
1389 | return; |
1390 | } |
1391 | |
1392 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1393 | |
1394 | status = SetEvent(__kmp_monitor_ev); |
1395 | if (status == FALSE) { |
1396 | DWORD error = GetLastError(); |
1397 | __kmp_fatal(KMP_MSG(CantSetEvent), KMP_ERR(error), __kmp_msg_null); |
1398 | } |
1399 | KA_TRACE(10, ("__kmp_reap_monitor: reaping thread (%d)\n" , |
1400 | th->th.th_info.ds.ds_gtid)); |
1401 | __kmp_reap_common(th); |
1402 | |
1403 | __kmp_free_handle(__kmp_monitor_ev); |
1404 | |
1405 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1406 | } |
1407 | #endif |
1408 | |
1409 | void __kmp_reap_worker(kmp_info_t *th) { |
1410 | KA_TRACE(10, ("__kmp_reap_worker: reaping thread (%d)\n" , |
1411 | th->th.th_info.ds.ds_gtid)); |
1412 | __kmp_reap_common(th); |
1413 | } |
1414 | |
1415 | #if KMP_HANDLE_SIGNALS |
1416 | |
1417 | static void __kmp_team_handler(int signo) { |
1418 | if (__kmp_global.g.g_abort == 0) { |
1419 | // Stage 1 signal handler, let's shut down all of the threads. |
1420 | if (__kmp_debug_buf) { |
1421 | __kmp_dump_debug_buffer(); |
1422 | } |
1423 | KMP_MB(); // Flush all pending memory write invalidates. |
1424 | TCW_4(__kmp_global.g.g_abort, signo); |
1425 | KMP_MB(); // Flush all pending memory write invalidates. |
1426 | TCW_4(__kmp_global.g.g_done, TRUE); |
1427 | KMP_MB(); // Flush all pending memory write invalidates. |
1428 | } |
1429 | } // __kmp_team_handler |
1430 | |
1431 | static sig_func_t __kmp_signal(int signum, sig_func_t handler) { |
1432 | sig_func_t old = signal(sig: signum, handler: handler); |
1433 | if (old == SIG_ERR) { |
1434 | int error = errno; |
1435 | __kmp_fatal(KMP_MSG(FunctionError, "signal" ), KMP_ERR(error), |
1436 | __kmp_msg_null); |
1437 | } |
1438 | return old; |
1439 | } |
1440 | |
1441 | static void __kmp_install_one_handler(int sig, sig_func_t handler, |
1442 | int parallel_init) { |
1443 | sig_func_t old; |
1444 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1445 | KB_TRACE(60, ("__kmp_install_one_handler: called: sig=%d\n" , sig)); |
1446 | if (parallel_init) { |
1447 | old = __kmp_signal(signum: sig, handler); |
1448 | // SIG_DFL on Windows* OS in NULL or 0. |
1449 | if (old == __kmp_sighldrs[sig]) { |
1450 | __kmp_siginstalled[sig] = 1; |
1451 | } else { // Restore/keep user's handler if one previously installed. |
1452 | old = __kmp_signal(signum: sig, handler: old); |
1453 | } |
1454 | } else { |
1455 | // Save initial/system signal handlers to see if user handlers installed. |
1456 | // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals |
1457 | // called once with parallel_init == TRUE. |
1458 | old = __kmp_signal(signum: sig, SIG_DFL); |
1459 | __kmp_sighldrs[sig] = old; |
1460 | __kmp_signal(signum: sig, handler: old); |
1461 | } |
1462 | KMP_MB(); /* Flush all pending memory write invalidates. */ |
1463 | } // __kmp_install_one_handler |
1464 | |
1465 | static void __kmp_remove_one_handler(int sig) { |
1466 | if (__kmp_siginstalled[sig]) { |
1467 | sig_func_t old; |
1468 | KMP_MB(); // Flush all pending memory write invalidates. |
1469 | KB_TRACE(60, ("__kmp_remove_one_handler: called: sig=%d\n" , sig)); |
1470 | old = __kmp_signal(signum: sig, handler: __kmp_sighldrs[sig]); |
1471 | if (old != __kmp_team_handler) { |
1472 | KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, " |
1473 | "restoring: sig=%d\n" , |
1474 | sig)); |
1475 | old = __kmp_signal(signum: sig, handler: old); |
1476 | } |
1477 | __kmp_sighldrs[sig] = NULL; |
1478 | __kmp_siginstalled[sig] = 0; |
1479 | KMP_MB(); // Flush all pending memory write invalidates. |
1480 | } |
1481 | } // __kmp_remove_one_handler |
1482 | |
1483 | void __kmp_install_signals(int parallel_init) { |
1484 | KB_TRACE(10, ("__kmp_install_signals: called\n" )); |
1485 | if (!__kmp_handle_signals) { |
1486 | KB_TRACE(10, ("__kmp_install_signals: KMP_HANDLE_SIGNALS is false - " |
1487 | "handlers not installed\n" )); |
1488 | return; |
1489 | } |
1490 | __kmp_install_one_handler(SIGINT, handler: __kmp_team_handler, parallel_init); |
1491 | __kmp_install_one_handler(SIGILL, handler: __kmp_team_handler, parallel_init); |
1492 | __kmp_install_one_handler(SIGABRT, handler: __kmp_team_handler, parallel_init); |
1493 | __kmp_install_one_handler(SIGFPE, handler: __kmp_team_handler, parallel_init); |
1494 | __kmp_install_one_handler(SIGSEGV, handler: __kmp_team_handler, parallel_init); |
1495 | __kmp_install_one_handler(SIGTERM, handler: __kmp_team_handler, parallel_init); |
1496 | } // __kmp_install_signals |
1497 | |
1498 | void __kmp_remove_signals(void) { |
1499 | int sig; |
1500 | KB_TRACE(10, ("__kmp_remove_signals: called\n" )); |
1501 | for (sig = 1; sig < NSIG; ++sig) { |
1502 | __kmp_remove_one_handler(sig); |
1503 | } |
1504 | } // __kmp_remove_signals |
1505 | |
1506 | #endif // KMP_HANDLE_SIGNALS |
1507 | |
1508 | /* Put the thread to sleep for a time period */ |
1509 | void __kmp_thread_sleep(int millis) { |
1510 | DWORD status; |
1511 | |
1512 | status = SleepEx((DWORD)millis, FALSE); |
1513 | if (status) { |
1514 | DWORD error = GetLastError(); |
1515 | __kmp_fatal(KMP_MSG(FunctionError, "SleepEx()" ), KMP_ERR(error), |
1516 | __kmp_msg_null); |
1517 | } |
1518 | } |
1519 | |
1520 | // Determine whether the given address is mapped into the current address space. |
1521 | int __kmp_is_address_mapped(void *addr) { |
1522 | MEMORY_BASIC_INFORMATION lpBuffer; |
1523 | SIZE_T dwLength; |
1524 | |
1525 | dwLength = sizeof(MEMORY_BASIC_INFORMATION); |
1526 | |
1527 | VirtualQuery(addr, &lpBuffer, dwLength); |
1528 | |
1529 | return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) || |
1530 | ((lpBuffer.Protect == PAGE_NOACCESS) || |
1531 | (lpBuffer.Protect == PAGE_EXECUTE))); |
1532 | } |
1533 | |
1534 | kmp_uint64 __kmp_hardware_timestamp(void) { |
1535 | kmp_uint64 r = 0; |
1536 | |
1537 | QueryPerformanceCounter((LARGE_INTEGER *)&r); |
1538 | return r; |
1539 | } |
1540 | |
1541 | /* Free handle and check the error code */ |
1542 | void __kmp_free_handle(kmp_thread_t tHandle) { |
1543 | /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined |
1544 | * as HANDLE */ |
1545 | BOOL rc; |
1546 | rc = CloseHandle(tHandle); |
1547 | if (!rc) { |
1548 | DWORD error = GetLastError(); |
1549 | __kmp_fatal(KMP_MSG(CantCloseHandle), KMP_ERR(error), __kmp_msg_null); |
1550 | } |
1551 | } |
1552 | |
1553 | int __kmp_get_load_balance(int max) { |
1554 | static ULONG glb_buff_size = 100 * 1024; |
1555 | |
1556 | // Saved count of the running threads for the thread balance algorithm |
1557 | static int glb_running_threads = 0; |
1558 | static double glb_call_time = 0; /* Thread balance algorithm call time */ |
1559 | |
1560 | int running_threads = 0; // Number of running threads in the system. |
1561 | NTSTATUS status = 0; |
1562 | ULONG buff_size = 0; |
1563 | ULONG info_size = 0; |
1564 | void *buffer = NULL; |
1565 | PSYSTEM_PROCESS_INFORMATION spi = NULL; |
1566 | int first_time = 1; |
1567 | |
1568 | double call_time = 0.0; // start, finish; |
1569 | |
1570 | __kmp_elapsed(t: &call_time); |
1571 | |
1572 | if (glb_call_time && |
1573 | (call_time - glb_call_time < __kmp_load_balance_interval)) { |
1574 | running_threads = glb_running_threads; |
1575 | goto finish; |
1576 | } |
1577 | glb_call_time = call_time; |
1578 | |
1579 | // Do not spend time on running algorithm if we have a permanent error. |
1580 | if (NtQuerySystemInformation == NULL) { |
1581 | running_threads = -1; |
1582 | goto finish; |
1583 | } |
1584 | |
1585 | if (max <= 0) { |
1586 | max = INT_MAX; |
1587 | } |
1588 | |
1589 | do { |
1590 | |
1591 | if (first_time) { |
1592 | buff_size = glb_buff_size; |
1593 | } else { |
1594 | buff_size = 2 * buff_size; |
1595 | } |
1596 | |
1597 | buffer = KMP_INTERNAL_REALLOC(buffer, buff_size); |
1598 | if (buffer == NULL) { |
1599 | running_threads = -1; |
1600 | goto finish; |
1601 | } |
1602 | status = NtQuerySystemInformation(SystemProcessInformation, buffer, |
1603 | buff_size, &info_size); |
1604 | first_time = 0; |
1605 | |
1606 | } while (status == STATUS_INFO_LENGTH_MISMATCH); |
1607 | glb_buff_size = buff_size; |
1608 | |
1609 | #define CHECK(cond) \ |
1610 | { \ |
1611 | KMP_DEBUG_ASSERT(cond); \ |
1612 | if (!(cond)) { \ |
1613 | running_threads = -1; \ |
1614 | goto finish; \ |
1615 | } \ |
1616 | } |
1617 | |
1618 | CHECK(buff_size >= info_size); |
1619 | spi = PSYSTEM_PROCESS_INFORMATION(buffer); |
1620 | for (;;) { |
1621 | ptrdiff_t offset = uintptr_t(spi) - uintptr_t(buffer); |
1622 | CHECK(0 <= offset && |
1623 | offset + sizeof(SYSTEM_PROCESS_INFORMATION) < info_size); |
1624 | HANDLE pid = spi->ProcessId; |
1625 | ULONG num = spi->NumberOfThreads; |
1626 | CHECK(num >= 1); |
1627 | size_t spi_size = |
1628 | sizeof(SYSTEM_PROCESS_INFORMATION) + sizeof(SYSTEM_THREAD) * (num - 1); |
1629 | CHECK(offset + spi_size < |
1630 | info_size); // Make sure process info record fits the buffer. |
1631 | if (spi->NextEntryOffset != 0) { |
1632 | CHECK(spi_size <= |
1633 | spi->NextEntryOffset); // And do not overlap with the next record. |
1634 | } |
1635 | // pid == 0 corresponds to the System Idle Process. It always has running |
1636 | // threads on all cores. So, we don't consider the running threads of this |
1637 | // process. |
1638 | if (pid != 0) { |
1639 | for (ULONG i = 0; i < num; ++i) { |
1640 | THREAD_STATE state = spi->Threads[i].State; |
1641 | // Count threads that have Ready or Running state. |
1642 | // !!! TODO: Why comment does not match the code??? |
1643 | if (state == StateRunning) { |
1644 | ++running_threads; |
1645 | // Stop counting running threads if the number is already greater than |
1646 | // the number of available cores |
1647 | if (running_threads >= max) { |
1648 | goto finish; |
1649 | } |
1650 | } |
1651 | } |
1652 | } |
1653 | if (spi->NextEntryOffset == 0) { |
1654 | break; |
1655 | } |
1656 | spi = PSYSTEM_PROCESS_INFORMATION(uintptr_t(spi) + spi->NextEntryOffset); |
1657 | } |
1658 | |
1659 | #undef CHECK |
1660 | |
1661 | finish: // Clean up and exit. |
1662 | |
1663 | if (buffer != NULL) { |
1664 | KMP_INTERNAL_FREE(buffer); |
1665 | } |
1666 | |
1667 | glb_running_threads = running_threads; |
1668 | |
1669 | return running_threads; |
1670 | } //__kmp_get_load_balance() |
1671 | |
1672 | // Find symbol from the loaded modules |
1673 | void *__kmp_lookup_symbol(const char *name, bool next) { |
1674 | HANDLE process = GetCurrentProcess(); |
1675 | DWORD needed; |
1676 | HMODULE *modules = nullptr; |
1677 | if (!EnumProcessModules(process, modules, 0, &needed)) |
1678 | return nullptr; |
1679 | DWORD num_modules = needed / sizeof(HMODULE); |
1680 | modules = (HMODULE *)malloc(num_modules * sizeof(HMODULE)); |
1681 | if (!EnumProcessModules(process, modules, needed, &needed)) { |
1682 | free(modules); |
1683 | return nullptr; |
1684 | } |
1685 | HMODULE curr_module = nullptr; |
1686 | if (next) { |
1687 | // Current module needs to be skipped if next flag is true |
1688 | if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, |
1689 | (LPCTSTR)&__kmp_lookup_symbol, &curr_module)) { |
1690 | free(modules); |
1691 | return nullptr; |
1692 | } |
1693 | } |
1694 | void *proc = nullptr; |
1695 | for (uint32_t i = 0; i < num_modules; i++) { |
1696 | if (next && modules[i] == curr_module) |
1697 | continue; |
1698 | proc = (void *)GetProcAddress(modules[i], name); |
1699 | if (proc) |
1700 | break; |
1701 | } |
1702 | free(modules); |
1703 | return proc; |
1704 | } |
1705 | |
1706 | // Functions for hidden helper task |
1707 | void __kmp_hidden_helper_worker_thread_wait() { |
1708 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1709 | } |
1710 | |
1711 | void __kmp_do_initialize_hidden_helper_threads() { |
1712 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1713 | } |
1714 | |
1715 | void __kmp_hidden_helper_threads_initz_wait() { |
1716 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1717 | } |
1718 | |
1719 | void __kmp_hidden_helper_initz_release() { |
1720 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1721 | } |
1722 | |
1723 | void __kmp_hidden_helper_main_thread_wait() { |
1724 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1725 | } |
1726 | |
1727 | void __kmp_hidden_helper_main_thread_release() { |
1728 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1729 | } |
1730 | |
1731 | void __kmp_hidden_helper_worker_thread_signal() { |
1732 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1733 | } |
1734 | |
1735 | void __kmp_hidden_helper_threads_deinitz_wait() { |
1736 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1737 | } |
1738 | |
1739 | void __kmp_hidden_helper_threads_deinitz_release() { |
1740 | KMP_ASSERT(0 && "Hidden helper task is not supported on Windows" ); |
1741 | } |
1742 | |