1 | /* |
2 | * kmp_wait_release.h -- Wait/Release implementation |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef KMP_WAIT_RELEASE_H |
14 | #define KMP_WAIT_RELEASE_H |
15 | |
16 | #include "kmp.h" |
17 | #include "kmp_itt.h" |
18 | #include "kmp_stats.h" |
19 | #if OMPT_SUPPORT |
20 | #include "ompt-specific.h" |
21 | #endif |
22 | |
23 | /*! |
24 | @defgroup WAIT_RELEASE Wait/Release operations |
25 | |
26 | The definitions and functions here implement the lowest level thread |
27 | synchronizations of suspending a thread and awaking it. They are used to build |
28 | higher level operations such as barriers and fork/join. |
29 | */ |
30 | |
31 | /*! |
32 | @ingroup WAIT_RELEASE |
33 | @{ |
34 | */ |
35 | |
36 | struct flag_properties { |
37 | unsigned int type : 16; |
38 | unsigned int reserved : 16; |
39 | }; |
40 | |
41 | template <enum flag_type FlagType> struct flag_traits {}; |
42 | |
43 | template <> struct flag_traits<flag32> { |
44 | typedef kmp_uint32 flag_t; |
45 | static const flag_type t = flag32; |
46 | static inline flag_t tcr(flag_t f) { return TCR_4(f); } |
47 | static inline flag_t test_then_add4(volatile flag_t *f) { |
48 | return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); |
49 | } |
50 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
51 | return KMP_TEST_THEN_OR32(f, v); |
52 | } |
53 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
54 | return KMP_TEST_THEN_AND32(f, v); |
55 | } |
56 | }; |
57 | |
58 | template <> struct flag_traits<atomic_flag64> { |
59 | typedef kmp_uint64 flag_t; |
60 | static const flag_type t = atomic_flag64; |
61 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
62 | static inline flag_t test_then_add4(volatile flag_t *f) { |
63 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
64 | } |
65 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
66 | return KMP_TEST_THEN_OR64(f, v); |
67 | } |
68 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
69 | return KMP_TEST_THEN_AND64(f, v); |
70 | } |
71 | }; |
72 | |
73 | template <> struct flag_traits<flag64> { |
74 | typedef kmp_uint64 flag_t; |
75 | static const flag_type t = flag64; |
76 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
77 | static inline flag_t test_then_add4(volatile flag_t *f) { |
78 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
79 | } |
80 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
81 | return KMP_TEST_THEN_OR64(f, v); |
82 | } |
83 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
84 | return KMP_TEST_THEN_AND64(f, v); |
85 | } |
86 | }; |
87 | |
88 | template <> struct flag_traits<flag_oncore> { |
89 | typedef kmp_uint64 flag_t; |
90 | static const flag_type t = flag_oncore; |
91 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
92 | static inline flag_t test_then_add4(volatile flag_t *f) { |
93 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
94 | } |
95 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
96 | return KMP_TEST_THEN_OR64(f, v); |
97 | } |
98 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
99 | return KMP_TEST_THEN_AND64(f, v); |
100 | } |
101 | }; |
102 | |
103 | /*! Base class for all flags */ |
104 | template <flag_type FlagType> class kmp_flag { |
105 | protected: |
106 | flag_properties t; /**< "Type" of the flag in loc */ |
107 | /**< Threads sleeping on this thread. */ |
108 | kmp_info_t *waiting_threads[1] = {nullptr}; |
109 | kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */ |
110 | std::atomic<bool> *sleepLoc; |
111 | |
112 | public: |
113 | typedef flag_traits<FlagType> traits_type; |
114 | kmp_flag() : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(0), sleepLoc(nullptr) {} |
115 | kmp_flag(int nwaiters) |
116 | : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {} |
117 | kmp_flag(std::atomic<bool> *sloc) |
118 | : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(0), sleepLoc(sloc) {} |
119 | /*! @result the flag_type */ |
120 | flag_type get_type() { return (flag_type)(t.type); } |
121 | |
122 | /*! param i in index into waiting_threads |
123 | * @result the thread that is waiting at index i */ |
124 | kmp_info_t *get_waiter(kmp_uint32 i) { |
125 | KMP_DEBUG_ASSERT(i < num_waiting_threads); |
126 | return waiting_threads[i]; |
127 | } |
128 | /*! @result num_waiting_threads */ |
129 | kmp_uint32 get_num_waiters() { return num_waiting_threads; } |
130 | /*! @param thr in the thread which is now waiting |
131 | * Insert a waiting thread at index 0. */ |
132 | void set_waiter(kmp_info_t *thr) { |
133 | waiting_threads[0] = thr; |
134 | num_waiting_threads = 1; |
135 | } |
136 | enum barrier_type get_bt() { return bs_last_barrier; } |
137 | }; |
138 | |
139 | /*! Base class for wait/release volatile flag */ |
140 | template <typename PtrType, flag_type FlagType, bool Sleepable> |
141 | class kmp_flag_native : public kmp_flag<FlagType> { |
142 | protected: |
143 | volatile PtrType *loc; |
144 | PtrType checker = (PtrType)0; /**< When flag==checker, it has been released */ |
145 | typedef flag_traits<FlagType> traits_type; |
146 | |
147 | public: |
148 | typedef PtrType flag_t; |
149 | kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {} |
150 | kmp_flag_native(volatile PtrType *p, kmp_info_t *thr) |
151 | : kmp_flag<FlagType>(1), loc(p) { |
152 | this->waiting_threads[0] = thr; |
153 | } |
154 | kmp_flag_native(volatile PtrType *p, PtrType c) |
155 | : kmp_flag<FlagType>(), loc(p), checker(c) {} |
156 | kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc) |
157 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} |
158 | virtual ~kmp_flag_native() {} |
159 | void *operator new(size_t size) { return __kmp_allocate(size); } |
160 | void operator delete(void *p) { __kmp_free(p); } |
161 | volatile PtrType *get() { return loc; } |
162 | void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); } |
163 | void set(volatile PtrType *new_loc) { loc = new_loc; } |
164 | PtrType load() { return *loc; } |
165 | void store(PtrType val) { *loc = val; } |
166 | /*! @result true if the flag object has been released. */ |
167 | virtual bool done_check() { |
168 | if (Sleepable && !(this->sleepLoc)) |
169 | return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == |
170 | checker; |
171 | else |
172 | return traits_type::tcr(*(this->get())) == checker; |
173 | } |
174 | /*! @param old_loc in old value of flag |
175 | * @result true if the flag's old value indicates it was released. */ |
176 | virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; } |
177 | /*! @result true if the flag object is not yet released. |
178 | * Used in __kmp_wait_template like: |
179 | * @code |
180 | * while (flag.notdone_check()) { pause(); } |
181 | * @endcode */ |
182 | virtual bool notdone_check() { |
183 | return traits_type::tcr(*(this->get())) != checker; |
184 | } |
185 | /*! @result Actual flag value before release was applied. |
186 | * Trigger all waiting threads to run by modifying flag to release state. */ |
187 | void internal_release() { |
188 | (void)traits_type::test_then_add4((volatile PtrType *)this->get()); |
189 | } |
190 | /*! @result Actual flag value before sleep bit(s) set. |
191 | * Notes that there is at least one thread sleeping on the flag by setting |
192 | * sleep bit(s). */ |
193 | PtrType set_sleeping() { |
194 | if (this->sleepLoc) { |
195 | this->sleepLoc->store(true); |
196 | return *(this->get()); |
197 | } |
198 | return traits_type::test_then_or((volatile PtrType *)this->get(), |
199 | KMP_BARRIER_SLEEP_STATE); |
200 | } |
201 | /*! @result Actual flag value before sleep bit(s) cleared. |
202 | * Notes that there are no longer threads sleeping on the flag by clearing |
203 | * sleep bit(s). */ |
204 | void unset_sleeping() { |
205 | if (this->sleepLoc) { |
206 | this->sleepLoc->store(false); |
207 | return; |
208 | } |
209 | traits_type::test_then_and((volatile PtrType *)this->get(), |
210 | ~KMP_BARRIER_SLEEP_STATE); |
211 | } |
212 | /*! @param old_loc in old value of flag |
213 | * Test if there are threads sleeping on the flag's old value in old_loc. */ |
214 | bool is_sleeping_val(PtrType old_loc) { |
215 | if (this->sleepLoc) |
216 | return this->sleepLoc->load(); |
217 | return old_loc & KMP_BARRIER_SLEEP_STATE; |
218 | } |
219 | /*! Test whether there are threads sleeping on the flag. */ |
220 | bool is_sleeping() { |
221 | if (this->sleepLoc) |
222 | return this->sleepLoc->load(); |
223 | return is_sleeping_val(old_loc: *(this->get())); |
224 | } |
225 | bool is_any_sleeping() { |
226 | if (this->sleepLoc) |
227 | return this->sleepLoc->load(); |
228 | return is_sleeping_val(old_loc: *(this->get())); |
229 | } |
230 | kmp_uint8 *get_stolen() { return NULL; } |
231 | }; |
232 | |
233 | /*! Base class for wait/release atomic flag */ |
234 | template <typename PtrType, flag_type FlagType, bool Sleepable> |
235 | class kmp_flag_atomic : public kmp_flag<FlagType> { |
236 | protected: |
237 | std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */ |
238 | PtrType checker = (PtrType)0; /**< Flag==checker means it has been released */ |
239 | public: |
240 | typedef flag_traits<FlagType> traits_type; |
241 | typedef PtrType flag_t; |
242 | kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {} |
243 | kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr) |
244 | : kmp_flag<FlagType>(1), loc(p) { |
245 | this->waiting_threads[0] = thr; |
246 | } |
247 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c) |
248 | : kmp_flag<FlagType>(), loc(p), checker(c) {} |
249 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc) |
250 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} |
251 | /*! @result the pointer to the actual flag */ |
252 | std::atomic<PtrType> *get() { return loc; } |
253 | /*! @result void* pointer to the actual flag */ |
254 | void *get_void_p() { return RCAST(void *, loc); } |
255 | /*! @param new_loc in set loc to point at new_loc */ |
256 | void set(std::atomic<PtrType> *new_loc) { loc = new_loc; } |
257 | /*! @result flag value */ |
258 | PtrType load() { return loc->load(std::memory_order_acquire); } |
259 | /*! @param val the new flag value to be stored */ |
260 | void store(PtrType val) { loc->store(val, std::memory_order_release); } |
261 | /*! @result true if the flag object has been released. */ |
262 | bool done_check() { |
263 | if (Sleepable && !(this->sleepLoc)) |
264 | return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; |
265 | else |
266 | return this->load() == checker; |
267 | } |
268 | /*! @param old_loc in old value of flag |
269 | * @result true if the flag's old value indicates it was released. */ |
270 | bool done_check_val(PtrType old_loc) { return old_loc == checker; } |
271 | /*! @result true if the flag object is not yet released. |
272 | * Used in __kmp_wait_template like: |
273 | * @code |
274 | * while (flag.notdone_check()) { pause(); } |
275 | * @endcode */ |
276 | bool notdone_check() { return this->load() != checker; } |
277 | /*! @result Actual flag value before release was applied. |
278 | * Trigger all waiting threads to run by modifying flag to release state. */ |
279 | void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } |
280 | /*! @result Actual flag value before sleep bit(s) set. |
281 | * Notes that there is at least one thread sleeping on the flag by setting |
282 | * sleep bit(s). */ |
283 | PtrType set_sleeping() { |
284 | if (this->sleepLoc) { |
285 | this->sleepLoc->store(true); |
286 | return *(this->get()); |
287 | } |
288 | return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); |
289 | } |
290 | /*! @result Actual flag value before sleep bit(s) cleared. |
291 | * Notes that there are no longer threads sleeping on the flag by clearing |
292 | * sleep bit(s). */ |
293 | void unset_sleeping() { |
294 | if (this->sleepLoc) { |
295 | this->sleepLoc->store(false); |
296 | return; |
297 | } |
298 | KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); |
299 | } |
300 | /*! @param old_loc in old value of flag |
301 | * Test whether there are threads sleeping on flag's old value in old_loc. */ |
302 | bool is_sleeping_val(PtrType old_loc) { |
303 | if (this->sleepLoc) |
304 | return this->sleepLoc->load(); |
305 | return old_loc & KMP_BARRIER_SLEEP_STATE; |
306 | } |
307 | /*! Test whether there are threads sleeping on the flag. */ |
308 | bool is_sleeping() { |
309 | if (this->sleepLoc) |
310 | return this->sleepLoc->load(); |
311 | return is_sleeping_val(old_loc: this->load()); |
312 | } |
313 | bool is_any_sleeping() { |
314 | if (this->sleepLoc) |
315 | return this->sleepLoc->load(); |
316 | return is_sleeping_val(old_loc: this->load()); |
317 | } |
318 | kmp_uint8 *get_stolen() { return NULL; } |
319 | }; |
320 | |
321 | #if OMPT_SUPPORT |
322 | OMPT_NOINLINE |
323 | static void __ompt_implicit_task_end(kmp_info_t *this_thr, |
324 | ompt_state_t ompt_state, |
325 | ompt_data_t *tId) { |
326 | int ds_tid = this_thr->th.th_info.ds.ds_tid; |
327 | if (ompt_state == ompt_state_wait_barrier_implicit_parallel || |
328 | ompt_state == ompt_state_wait_barrier_teams) { |
329 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
330 | #if OMPT_OPTIONAL |
331 | void *codeptr = NULL; |
332 | ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel; |
333 | if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) |
334 | sync_kind = ompt_sync_region_barrier_teams; |
335 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
336 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
337 | sync_kind, ompt_scope_end, NULL, tId, codeptr); |
338 | } |
339 | if (ompt_enabled.ompt_callback_sync_region) { |
340 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
341 | sync_kind, ompt_scope_end, NULL, tId, codeptr); |
342 | } |
343 | #endif |
344 | if (!KMP_MASTER_TID(ds_tid)) { |
345 | if (ompt_enabled.ompt_callback_implicit_task) { |
346 | int flags = this_thr->th.ompt_thread_info.parallel_flags; |
347 | flags = (flags & ompt_parallel_league) ? ompt_task_initial |
348 | : ompt_task_implicit; |
349 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( |
350 | ompt_scope_end, NULL, tId, 0, ds_tid, flags); |
351 | } |
352 | // return to idle state |
353 | this_thr->th.ompt_thread_info.state = ompt_state_idle; |
354 | } else { |
355 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
356 | } |
357 | } |
358 | } |
359 | #endif |
360 | |
361 | /* Spin wait loop that first does pause/yield, then sleep. A thread that calls |
362 | __kmp_wait_* must make certain that another thread calls __kmp_release |
363 | to wake it back up to prevent deadlocks! |
364 | |
365 | NOTE: We may not belong to a team at this point. */ |
366 | template <class C, bool final_spin, bool Cancellable = false, |
367 | bool Sleepable = true> |
368 | static inline bool |
369 | __kmp_wait_template(kmp_info_t *this_thr, |
370 | C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
371 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
372 | volatile void *spin = flag->get(); |
373 | #endif |
374 | kmp_uint32 spins; |
375 | int th_gtid; |
376 | int tasks_completed = FALSE; |
377 | #if !KMP_USE_MONITOR |
378 | kmp_uint64 poll_count; |
379 | kmp_uint64 hibernate_goal; |
380 | #else |
381 | kmp_uint32 hibernate; |
382 | #endif |
383 | kmp_uint64 time; |
384 | |
385 | KMP_FSYNC_SPIN_INIT(spin, NULL); |
386 | if (flag->done_check()) { |
387 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); |
388 | return false; |
389 | } |
390 | th_gtid = this_thr->th.th_info.ds.ds_gtid; |
391 | if (Cancellable) { |
392 | kmp_team_t *team = this_thr->th.th_team; |
393 | if (team && team->t.t_cancel_request == cancel_parallel) |
394 | return true; |
395 | } |
396 | #if KMP_OS_UNIX |
397 | if (final_spin) |
398 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); |
399 | #endif |
400 | KA_TRACE(20, |
401 | ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n" , th_gtid, flag)); |
402 | #if KMP_STATS_ENABLED |
403 | stats_state_e thread_state = KMP_GET_THREAD_STATE(); |
404 | #endif |
405 | |
406 | /* OMPT Behavior: |
407 | THIS function is called from |
408 | __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) |
409 | these have join / fork behavior |
410 | |
411 | In these cases, we don't change the state or trigger events in THIS |
412 | function. |
413 | Events are triggered in the calling code (__kmp_barrier): |
414 | |
415 | state := ompt_state_overhead |
416 | barrier-begin |
417 | barrier-wait-begin |
418 | state := ompt_state_wait_barrier |
419 | call join-barrier-implementation (finally arrive here) |
420 | {} |
421 | call fork-barrier-implementation (finally arrive here) |
422 | {} |
423 | state := ompt_state_overhead |
424 | barrier-wait-end |
425 | barrier-end |
426 | state := ompt_state_work_parallel |
427 | |
428 | |
429 | __kmp_fork_barrier (after thread creation, before executing implicit task) |
430 | call fork-barrier-implementation (finally arrive here) |
431 | {} // worker arrive here with state = ompt_state_idle |
432 | |
433 | |
434 | __kmp_join_barrier (implicit barrier at end of parallel region) |
435 | state := ompt_state_barrier_implicit |
436 | barrier-begin |
437 | barrier-wait-begin |
438 | call join-barrier-implementation (finally arrive here |
439 | final_spin=FALSE) |
440 | { |
441 | } |
442 | __kmp_fork_barrier (implicit barrier at end of parallel region) |
443 | call fork-barrier-implementation (finally arrive here final_spin=TRUE) |
444 | |
445 | Worker after task-team is finished: |
446 | barrier-wait-end |
447 | barrier-end |
448 | implicit-task-end |
449 | idle-begin |
450 | state := ompt_state_idle |
451 | |
452 | Before leaving, if state = ompt_state_idle |
453 | idle-end |
454 | state := ompt_state_overhead |
455 | */ |
456 | #if OMPT_SUPPORT |
457 | ompt_state_t ompt_entry_state; |
458 | ompt_data_t *tId; |
459 | if (ompt_enabled.enabled) { |
460 | ompt_entry_state = this_thr->th.ompt_thread_info.state; |
461 | if (!final_spin || |
462 | (ompt_entry_state != ompt_state_wait_barrier_implicit_parallel && |
463 | ompt_entry_state != ompt_state_wait_barrier_teams) || |
464 | KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { |
465 | ompt_lw_taskteam_t *team = NULL; |
466 | if (this_thr->th.th_team) |
467 | team = this_thr->th.th_team->t.ompt_serialized_team_info; |
468 | if (team) { |
469 | tId = &(team->ompt_task_info.task_data); |
470 | } else { |
471 | tId = OMPT_CUR_TASK_DATA(this_thr); |
472 | } |
473 | } else { |
474 | tId = &(this_thr->th.ompt_thread_info.task_data); |
475 | } |
476 | if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || |
477 | this_thr->th.th_task_team == NULL)) { |
478 | // implicit task is done. Either no taskqueue, or task-team finished |
479 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_entry_state, tId); |
480 | } |
481 | } |
482 | #endif |
483 | |
484 | KMP_INIT_YIELD(spins); // Setup for waiting |
485 | KMP_INIT_BACKOFF(time); |
486 | |
487 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || |
488 | __kmp_pause_status == kmp_soft_paused) { |
489 | #if KMP_USE_MONITOR |
490 | // The worker threads cannot rely on the team struct existing at this point. |
491 | // Use the bt values cached in the thread struct instead. |
492 | #ifdef KMP_ADJUST_BLOCKTIME |
493 | if (__kmp_pause_status == kmp_soft_paused || |
494 | (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) |
495 | // Force immediate suspend if not set by user and more threads than |
496 | // available procs |
497 | hibernate = 0; |
498 | else |
499 | hibernate = this_thr->th.th_team_bt_intervals; |
500 | #else |
501 | hibernate = this_thr->th.th_team_bt_intervals; |
502 | #endif /* KMP_ADJUST_BLOCKTIME */ |
503 | |
504 | /* If the blocktime is nonzero, we want to make sure that we spin wait for |
505 | the entirety of the specified #intervals, plus up to one interval more. |
506 | This increment make certain that this thread doesn't go to sleep too |
507 | soon. */ |
508 | if (hibernate != 0) |
509 | hibernate++; |
510 | |
511 | // Add in the current time value. |
512 | hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); |
513 | KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n" , |
514 | th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, |
515 | hibernate - __kmp_global.g.g_time.dt.t_value)); |
516 | #else |
517 | if (__kmp_pause_status == kmp_soft_paused) { |
518 | // Force immediate suspend |
519 | hibernate_goal = KMP_NOW(); |
520 | } else |
521 | hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; |
522 | poll_count = 0; |
523 | (void)poll_count; |
524 | #endif // KMP_USE_MONITOR |
525 | } |
526 | |
527 | KMP_MB(); |
528 | |
529 | // Main wait spin loop |
530 | while (flag->notdone_check()) { |
531 | kmp_task_team_t *task_team = NULL; |
532 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
533 | task_team = this_thr->th.th_task_team; |
534 | /* If the thread's task team pointer is NULL, it means one of 3 things: |
535 | 1) A newly-created thread is first being released by |
536 | __kmp_fork_barrier(), and its task team has not been set up yet. |
537 | 2) All tasks have been executed to completion. |
538 | 3) Tasking is off for this region. This could be because we are in a |
539 | serialized region (perhaps the outer one), or else tasking was manually |
540 | disabled (KMP_TASKING=0). */ |
541 | if (task_team != NULL) { |
542 | if (TCR_SYNC_4(task_team->tt.tt_active)) { |
543 | if (KMP_TASKING_ENABLED(task_team)) { |
544 | flag->execute_tasks( |
545 | this_thr, th_gtid, final_spin, |
546 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); |
547 | } else |
548 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
549 | } else { |
550 | KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); |
551 | #if OMPT_SUPPORT |
552 | // task-team is done now, other cases should be catched above |
553 | if (final_spin && ompt_enabled.enabled) |
554 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_entry_state, tId); |
555 | #endif |
556 | this_thr->th.th_task_team = NULL; |
557 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
558 | } |
559 | } else { |
560 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
561 | } // if |
562 | } // if |
563 | |
564 | KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); |
565 | if (TCR_4(__kmp_global.g.g_done)) { |
566 | if (__kmp_global.g.g_abort) |
567 | __kmp_abort_thread(); |
568 | break; |
569 | } |
570 | |
571 | // If we are oversubscribed, or have waited a bit (and |
572 | // KMP_LIBRARY=throughput), then yield |
573 | KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); |
574 | |
575 | #if KMP_STATS_ENABLED |
576 | // Check if thread has been signalled to idle state |
577 | // This indicates that the logical "join-barrier" has finished |
578 | if (this_thr->th.th_stats->isIdle() && |
579 | KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { |
580 | KMP_SET_THREAD_STATE(IDLE); |
581 | KMP_PUSH_PARTITIONED_TIMER(OMP_idle); |
582 | } |
583 | #endif |
584 | // Check if the barrier surrounding this wait loop has been cancelled |
585 | if (Cancellable) { |
586 | kmp_team_t *team = this_thr->th.th_team; |
587 | if (team && team->t.t_cancel_request == cancel_parallel) |
588 | break; |
589 | } |
590 | |
591 | // For hidden helper thread, if task_team is nullptr, it means the main |
592 | // thread has not released the barrier. We cannot wait here because once the |
593 | // main thread releases all children barriers, all hidden helper threads are |
594 | // still sleeping. This leads to a problem that following configuration, |
595 | // such as task team sync, will not be performed such that this thread does |
596 | // not have task team. Usually it is not bad. However, a corner case is, |
597 | // when the first task encountered is an untied task, the check in |
598 | // __kmp_task_alloc will crash because it uses the task team pointer without |
599 | // checking whether it is nullptr. It is probably under some kind of |
600 | // assumption. |
601 | if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) && |
602 | !TCR_4(__kmp_hidden_helper_team_done)) { |
603 | // If there is still hidden helper tasks to be executed, the hidden helper |
604 | // thread will not enter a waiting status. |
605 | if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) { |
606 | __kmp_hidden_helper_worker_thread_wait(); |
607 | } |
608 | continue; |
609 | } |
610 | |
611 | // Don't suspend if KMP_BLOCKTIME is set to "infinite" |
612 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
613 | __kmp_pause_status != kmp_soft_paused) |
614 | continue; |
615 | |
616 | // Don't suspend if there is a likelihood of new tasks being spawned. |
617 | if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) && |
618 | !__kmp_wpolicy_passive) |
619 | continue; |
620 | |
621 | #if KMP_USE_MONITOR |
622 | // If we have waited a bit more, fall asleep |
623 | if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) |
624 | continue; |
625 | #else |
626 | if (KMP_BLOCKING(hibernate_goal, poll_count++)) |
627 | continue; |
628 | #endif |
629 | // Don't suspend if wait loop designated non-sleepable |
630 | // in template parameters |
631 | if (!Sleepable) |
632 | continue; |
633 | |
634 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
635 | if (__kmp_mwait_enabled || __kmp_umwait_enabled) { |
636 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n" , th_gtid)); |
637 | flag->mwait(th_gtid); |
638 | } else { |
639 | #endif |
640 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n" , th_gtid)); |
641 | #if KMP_OS_UNIX |
642 | if (final_spin) |
643 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); |
644 | #endif |
645 | flag->suspend(th_gtid); |
646 | #if KMP_OS_UNIX |
647 | if (final_spin) |
648 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); |
649 | #endif |
650 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
651 | } |
652 | #endif |
653 | |
654 | if (TCR_4(__kmp_global.g.g_done)) { |
655 | if (__kmp_global.g.g_abort) |
656 | __kmp_abort_thread(); |
657 | break; |
658 | } else if (__kmp_tasking_mode != tskm_immediate_exec && |
659 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { |
660 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; |
661 | } |
662 | // TODO: If thread is done with work and times out, disband/free |
663 | } |
664 | |
665 | #if OMPT_SUPPORT |
666 | ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; |
667 | if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { |
668 | #if OMPT_OPTIONAL |
669 | if (final_spin) { |
670 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_exit_state, tId); |
671 | ompt_exit_state = this_thr->th.ompt_thread_info.state; |
672 | } |
673 | #endif |
674 | if (ompt_exit_state == ompt_state_idle) { |
675 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
676 | } |
677 | } |
678 | #endif |
679 | #if KMP_STATS_ENABLED |
680 | // If we were put into idle state, pop that off the state stack |
681 | if (KMP_GET_THREAD_STATE() == IDLE) { |
682 | KMP_POP_PARTITIONED_TIMER(); |
683 | KMP_SET_THREAD_STATE(thread_state); |
684 | this_thr->th.th_stats->resetIdleFlag(); |
685 | } |
686 | #endif |
687 | |
688 | #if KMP_OS_UNIX |
689 | if (final_spin) |
690 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); |
691 | #endif |
692 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); |
693 | if (Cancellable) { |
694 | kmp_team_t *team = this_thr->th.th_team; |
695 | if (team && team->t.t_cancel_request == cancel_parallel) { |
696 | if (tasks_completed) { |
697 | // undo the previous decrement of unfinished_threads so that the |
698 | // thread can decrement at the join barrier with no problem |
699 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
700 | std::atomic<kmp_int32> *unfinished_threads = |
701 | &(task_team->tt.tt_unfinished_threads); |
702 | KMP_ATOMIC_INC(unfinished_threads); |
703 | } |
704 | return true; |
705 | } |
706 | } |
707 | return false; |
708 | } |
709 | |
710 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
711 | // Set up a monitor on the flag variable causing the calling thread to wait in |
712 | // a less active state until the flag variable is modified. |
713 | template <class C> |
714 | static inline void __kmp_mwait_template(int th_gtid, C *flag) { |
715 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait); |
716 | kmp_info_t *th = __kmp_threads[th_gtid]; |
717 | |
718 | KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n" , th_gtid, |
719 | flag->get())); |
720 | |
721 | // User-level mwait is available |
722 | KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled); |
723 | |
724 | __kmp_suspend_initialize_thread(th); |
725 | __kmp_lock_suspend_mx(th); |
726 | |
727 | volatile void *spin = flag->get(); |
728 | void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1)); |
729 | |
730 | if (!flag->done_check()) { |
731 | // Mark thread as no longer active |
732 | th->th.th_active = FALSE; |
733 | if (th->th.th_active_in_pool) { |
734 | th->th.th_active_in_pool = FALSE; |
735 | KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); |
736 | KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); |
737 | } |
738 | flag->set_sleeping(); |
739 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n" , th_gtid)); |
740 | #if KMP_HAVE_UMWAIT |
741 | if (__kmp_umwait_enabled) { |
742 | __kmp_umonitor(cacheline); |
743 | } |
744 | #elif KMP_HAVE_MWAIT |
745 | if (__kmp_mwait_enabled) { |
746 | __kmp_mm_monitor(cacheline, 0, 0); |
747 | } |
748 | #endif |
749 | // To avoid a race, check flag between 'monitor' and 'mwait'. A write to |
750 | // the address could happen after the last time we checked and before |
751 | // monitoring started, in which case monitor can't detect the change. |
752 | if (flag->done_check()) |
753 | flag->unset_sleeping(); |
754 | else { |
755 | // if flag changes here, wake-up happens immediately |
756 | TCW_PTR(th->th.th_sleep_loc, (void *)flag); |
757 | th->th.th_sleep_loc_type = flag->get_type(); |
758 | __kmp_unlock_suspend_mx(th); |
759 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n" , th_gtid)); |
760 | #if KMP_HAVE_UMWAIT |
761 | if (__kmp_umwait_enabled) { |
762 | __kmp_umwait(hint: 1, counter: 100); // to do: enable ctrl via hints, backoff counter |
763 | } |
764 | #elif KMP_HAVE_MWAIT |
765 | if (__kmp_mwait_enabled) { |
766 | __kmp_mm_mwait(0, __kmp_mwait_hints); |
767 | } |
768 | #endif |
769 | KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n" , th_gtid)); |
770 | __kmp_lock_suspend_mx(th); |
771 | // Clean up sleep info; doesn't matter how/why this thread stopped waiting |
772 | if (flag->is_sleeping()) |
773 | flag->unset_sleeping(); |
774 | TCW_PTR(th->th.th_sleep_loc, NULL); |
775 | th->th.th_sleep_loc_type = flag_unset; |
776 | } |
777 | // Mark thread as active again |
778 | th->th.th_active = TRUE; |
779 | if (TCR_4(th->th.th_in_pool)) { |
780 | KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); |
781 | th->th.th_active_in_pool = TRUE; |
782 | } |
783 | } // Drop out to main wait loop to check flag, handle tasks, etc. |
784 | __kmp_unlock_suspend_mx(th); |
785 | KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n" , th_gtid)); |
786 | } |
787 | #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
788 | |
789 | /* Release any threads specified as waiting on the flag by releasing the flag |
790 | and resume the waiting thread if indicated by the sleep bit(s). A thread that |
791 | calls __kmp_wait_template must call this function to wake up the potentially |
792 | sleeping thread and prevent deadlocks! */ |
793 | template <class C> static inline void __kmp_release_template(C *flag) { |
794 | #ifdef KMP_DEBUG |
795 | int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; |
796 | #endif |
797 | KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n" , gtid, flag->get())); |
798 | KMP_DEBUG_ASSERT(flag->get()); |
799 | KMP_FSYNC_RELEASING(flag->get_void_p()); |
800 | |
801 | flag->internal_release(); |
802 | |
803 | KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n" , gtid, flag->get(), |
804 | flag->load())); |
805 | |
806 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
807 | // Only need to check sleep stuff if infinite block time not set. |
808 | // Are *any* threads waiting on flag sleeping? |
809 | if (flag->is_any_sleeping()) { |
810 | for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { |
811 | // if sleeping waiter exists at i, sets current_waiter to i inside flag |
812 | kmp_info_t *waiter = flag->get_waiter(i); |
813 | if (waiter) { |
814 | int wait_gtid = waiter->th.th_info.ds.ds_gtid; |
815 | // Wake up thread if needed |
816 | KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " |
817 | "flag(%p) set\n" , |
818 | gtid, wait_gtid, flag->get())); |
819 | flag->resume(wait_gtid); // unsets flag's current_waiter when done |
820 | } |
821 | } |
822 | } |
823 | } |
824 | } |
825 | |
826 | template <bool Cancellable, bool Sleepable> |
827 | class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> { |
828 | public: |
829 | kmp_flag_32(std::atomic<kmp_uint32> *p) |
830 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {} |
831 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) |
832 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {} |
833 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) |
834 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {} |
835 | void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } |
836 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
837 | void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } |
838 | #endif |
839 | void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } |
840 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
841 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
842 | kmp_int32 is_constrained) { |
843 | return __kmp_execute_tasks_32( |
844 | this_thr, gtid, this, final_spin, |
845 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
846 | } |
847 | bool wait(kmp_info_t *this_thr, |
848 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
849 | if (final_spin) |
850 | return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>( |
851 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
852 | else |
853 | return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>( |
854 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
855 | } |
856 | void release() { __kmp_release_template(this); } |
857 | flag_type get_ptr_type() { return flag32; } |
858 | }; |
859 | |
860 | template <bool Cancellable, bool Sleepable> |
861 | class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> { |
862 | public: |
863 | kmp_flag_64(volatile kmp_uint64 *p) |
864 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {} |
865 | kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) |
866 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {} |
867 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) |
868 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {} |
869 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc) |
870 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {} |
871 | void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } |
872 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
873 | void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } |
874 | #endif |
875 | void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } |
876 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
877 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
878 | kmp_int32 is_constrained) { |
879 | return __kmp_execute_tasks_64( |
880 | this_thr, gtid, this, final_spin, |
881 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
882 | } |
883 | bool wait(kmp_info_t *this_thr, |
884 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
885 | if (final_spin) |
886 | return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>( |
887 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
888 | else |
889 | return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>( |
890 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
891 | } |
892 | void release() { __kmp_release_template(this); } |
893 | flag_type get_ptr_type() { return flag64; } |
894 | }; |
895 | |
896 | template <bool Cancellable, bool Sleepable> |
897 | class kmp_atomic_flag_64 |
898 | : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> { |
899 | public: |
900 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p) |
901 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {} |
902 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr) |
903 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {} |
904 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c) |
905 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {} |
906 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c, |
907 | std::atomic<bool> *loc) |
908 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {} |
909 | void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); } |
910 | void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); } |
911 | void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); } |
912 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
913 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
914 | kmp_int32 is_constrained) { |
915 | return __kmp_atomic_execute_tasks_64( |
916 | this_thr, gtid, this, final_spin, |
917 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
918 | } |
919 | bool wait(kmp_info_t *this_thr, |
920 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
921 | if (final_spin) |
922 | return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable, |
923 | Sleepable>( |
924 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
925 | else |
926 | return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable, |
927 | Sleepable>( |
928 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
929 | } |
930 | void release() { __kmp_release_template(this); } |
931 | flag_type get_ptr_type() { return atomic_flag64; } |
932 | }; |
933 | |
934 | // Hierarchical 64-bit on-core barrier instantiation |
935 | class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> { |
936 | kmp_uint32 offset; /**< Portion of flag of interest for an operation. */ |
937 | bool flag_switch; /**< Indicates a switch in flag location. */ |
938 | enum barrier_type bt; /**< Barrier type. */ |
939 | /**< Thread to redirect to different flag location. */ |
940 | kmp_info_t *this_thr = nullptr; |
941 | #if USE_ITT_BUILD |
942 | void *itt_sync_obj; /**< ITT object to pass to new flag location. */ |
943 | #endif |
944 | unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { |
945 | return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; |
946 | } |
947 | |
948 | public: |
949 | kmp_flag_oncore(volatile kmp_uint64 *p) |
950 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) { |
951 | } |
952 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) |
953 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx), |
954 | flag_switch(false), |
955 | bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {} |
956 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, |
957 | enum barrier_type bar_t, |
958 | kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) |
959 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx), |
960 | flag_switch(false), bt(bar_t), |
961 | this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} |
962 | virtual ~kmp_flag_oncore() override {} |
963 | void *operator new(size_t size) { return __kmp_allocate(size); } |
964 | void operator delete(void *p) { __kmp_free(p); } |
965 | bool done_check_val(kmp_uint64 old_loc) override { |
966 | return byteref(loc: &old_loc, offset) == checker; |
967 | } |
968 | bool done_check() override { return done_check_val(old_loc: *get()); } |
969 | bool notdone_check() override { |
970 | // Calculate flag_switch |
971 | if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) |
972 | flag_switch = true; |
973 | if (byteref(loc: get(), offset) != 1 && !flag_switch) |
974 | return true; |
975 | else if (flag_switch) { |
976 | this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; |
977 | kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, |
978 | (kmp_uint64)KMP_BARRIER_STATE_BUMP); |
979 | __kmp_wait_64(this_thr, flag: &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
980 | } |
981 | return false; |
982 | } |
983 | void internal_release() { |
984 | // Other threads can write their own bytes simultaneously. |
985 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { |
986 | byteref(loc: get(), offset) = 1; |
987 | } else { |
988 | kmp_uint64 mask = 0; |
989 | byteref(loc: &mask, offset) = 1; |
990 | KMP_TEST_THEN_OR64(get(), mask); |
991 | } |
992 | } |
993 | void wait(kmp_info_t *this_thr, int final_spin) { |
994 | if (final_spin) |
995 | __kmp_wait_template<kmp_flag_oncore, TRUE>( |
996 | this_thr, flag: this USE_ITT_BUILD_ARG(itt_sync_obj)); |
997 | else |
998 | __kmp_wait_template<kmp_flag_oncore, FALSE>( |
999 | this_thr, flag: this USE_ITT_BUILD_ARG(itt_sync_obj)); |
1000 | } |
1001 | void release() { __kmp_release_template(flag: this); } |
1002 | void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, flag: this); } |
1003 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
1004 | void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, flag: this); } |
1005 | #endif |
1006 | void resume(int th_gtid) { __kmp_resume_oncore(target_gtid: th_gtid, flag: this); } |
1007 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
1008 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
1009 | kmp_int32 is_constrained) { |
1010 | #if OMPD_SUPPORT |
1011 | int ret = __kmp_execute_tasks_oncore( |
1012 | thread: this_thr, gtid, flag: this, final_spin, |
1013 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
1014 | if (ompd_state & OMPD_ENABLE_BP) |
1015 | ompd_bp_task_end(); |
1016 | return ret; |
1017 | #else |
1018 | return __kmp_execute_tasks_oncore( |
1019 | this_thr, gtid, this, final_spin, |
1020 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
1021 | #endif |
1022 | } |
1023 | enum barrier_type get_bt() { return bt; } |
1024 | flag_type get_ptr_type() { return flag_oncore; } |
1025 | }; |
1026 | |
1027 | static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) { |
1028 | int gtid = __kmp_gtid_from_thread(thr); |
1029 | void *flag = CCAST(void *, thr->th.th_sleep_loc); |
1030 | flag_type type = thr->th.th_sleep_loc_type; |
1031 | if (!flag) |
1032 | return; |
1033 | // Attempt to wake up a thread: examine its type and call appropriate template |
1034 | switch (type) { |
1035 | case flag32: |
1036 | __kmp_resume_32(target_gtid: gtid, RCAST(kmp_flag_32<> *, flag)); |
1037 | break; |
1038 | case flag64: |
1039 | __kmp_resume_64(target_gtid: gtid, RCAST(kmp_flag_64<> *, flag)); |
1040 | break; |
1041 | case atomic_flag64: |
1042 | __kmp_atomic_resume_64(target_gtid: gtid, RCAST(kmp_atomic_flag_64<> *, flag)); |
1043 | break; |
1044 | case flag_oncore: |
1045 | __kmp_resume_oncore(target_gtid: gtid, RCAST(kmp_flag_oncore *, flag)); |
1046 | break; |
1047 | case flag_unset: |
1048 | KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n" , type)); |
1049 | break; |
1050 | } |
1051 | } |
1052 | |
1053 | /*! |
1054 | @} |
1055 | */ |
1056 | |
1057 | #endif // KMP_WAIT_RELEASE_H |
1058 | |