1 | /* |
2 | * kmp_wait_release.h -- Wait/Release implementation |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef KMP_WAIT_RELEASE_H |
14 | #define KMP_WAIT_RELEASE_H |
15 | |
16 | #include "kmp.h" |
17 | #include "kmp_itt.h" |
18 | #include "kmp_stats.h" |
19 | #if OMPT_SUPPORT |
20 | #include "ompt-specific.h" |
21 | #endif |
22 | |
23 | /*! |
24 | @defgroup WAIT_RELEASE Wait/Release operations |
25 | |
26 | The definitions and functions here implement the lowest level thread |
27 | synchronizations of suspending a thread and awaking it. They are used to build |
28 | higher level operations such as barriers and fork/join. |
29 | */ |
30 | |
31 | /*! |
32 | @ingroup WAIT_RELEASE |
33 | @{ |
34 | */ |
35 | |
36 | struct flag_properties { |
37 | unsigned int type : 16; |
38 | unsigned int reserved : 16; |
39 | }; |
40 | |
41 | template <enum flag_type FlagType> struct flag_traits {}; |
42 | |
43 | template <> struct flag_traits<flag32> { |
44 | typedef kmp_uint32 flag_t; |
45 | static const flag_type t = flag32; |
46 | static inline flag_t tcr(flag_t f) { return TCR_4(f); } |
47 | static inline flag_t test_then_add4(volatile flag_t *f) { |
48 | return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); |
49 | } |
50 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
51 | return KMP_TEST_THEN_OR32(f, v); |
52 | } |
53 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
54 | return KMP_TEST_THEN_AND32(f, v); |
55 | } |
56 | }; |
57 | |
58 | template <> struct flag_traits<atomic_flag64> { |
59 | typedef kmp_uint64 flag_t; |
60 | static const flag_type t = atomic_flag64; |
61 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
62 | static inline flag_t test_then_add4(volatile flag_t *f) { |
63 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
64 | } |
65 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
66 | return KMP_TEST_THEN_OR64(f, v); |
67 | } |
68 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
69 | return KMP_TEST_THEN_AND64(f, v); |
70 | } |
71 | }; |
72 | |
73 | template <> struct flag_traits<flag64> { |
74 | typedef kmp_uint64 flag_t; |
75 | static const flag_type t = flag64; |
76 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
77 | static inline flag_t test_then_add4(volatile flag_t *f) { |
78 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
79 | } |
80 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
81 | return KMP_TEST_THEN_OR64(f, v); |
82 | } |
83 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
84 | return KMP_TEST_THEN_AND64(f, v); |
85 | } |
86 | }; |
87 | |
88 | template <> struct flag_traits<flag_oncore> { |
89 | typedef kmp_uint64 flag_t; |
90 | static const flag_type t = flag_oncore; |
91 | static inline flag_t tcr(flag_t f) { return TCR_8(f); } |
92 | static inline flag_t test_then_add4(volatile flag_t *f) { |
93 | return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); |
94 | } |
95 | static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { |
96 | return KMP_TEST_THEN_OR64(f, v); |
97 | } |
98 | static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { |
99 | return KMP_TEST_THEN_AND64(f, v); |
100 | } |
101 | }; |
102 | |
103 | /*! Base class for all flags */ |
104 | template <flag_type FlagType> class kmp_flag { |
105 | protected: |
106 | flag_properties t; /**< "Type" of the flag in loc */ |
107 | kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */ |
108 | kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */ |
109 | std::atomic<bool> *sleepLoc; |
110 | |
111 | public: |
112 | typedef flag_traits<FlagType> traits_type; |
113 | kmp_flag() : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(0), sleepLoc(nullptr) {} |
114 | kmp_flag(int nwaiters) |
115 | : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {} |
116 | kmp_flag(std::atomic<bool> *sloc) |
117 | : t({.type: FlagType, .reserved: 0U}), num_waiting_threads(0), sleepLoc(sloc) {} |
118 | /*! @result the flag_type */ |
119 | flag_type get_type() { return (flag_type)(t.type); } |
120 | |
121 | /*! param i in index into waiting_threads |
122 | * @result the thread that is waiting at index i */ |
123 | kmp_info_t *get_waiter(kmp_uint32 i) { |
124 | KMP_DEBUG_ASSERT(i < num_waiting_threads); |
125 | return waiting_threads[i]; |
126 | } |
127 | /*! @result num_waiting_threads */ |
128 | kmp_uint32 get_num_waiters() { return num_waiting_threads; } |
129 | /*! @param thr in the thread which is now waiting |
130 | * Insert a waiting thread at index 0. */ |
131 | void set_waiter(kmp_info_t *thr) { |
132 | waiting_threads[0] = thr; |
133 | num_waiting_threads = 1; |
134 | } |
135 | enum barrier_type get_bt() { return bs_last_barrier; } |
136 | }; |
137 | |
138 | /*! Base class for wait/release volatile flag */ |
139 | template <typename PtrType, flag_type FlagType, bool Sleepable> |
140 | class kmp_flag_native : public kmp_flag<FlagType> { |
141 | protected: |
142 | volatile PtrType *loc; |
143 | PtrType checker; /**< When flag==checker, it has been released. */ |
144 | typedef flag_traits<FlagType> traits_type; |
145 | |
146 | public: |
147 | typedef PtrType flag_t; |
148 | kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {} |
149 | kmp_flag_native(volatile PtrType *p, kmp_info_t *thr) |
150 | : kmp_flag<FlagType>(1), loc(p) { |
151 | this->waiting_threads[0] = thr; |
152 | } |
153 | kmp_flag_native(volatile PtrType *p, PtrType c) |
154 | : kmp_flag<FlagType>(), loc(p), checker(c) {} |
155 | kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc) |
156 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} |
157 | virtual ~kmp_flag_native() {} |
158 | void *operator new(size_t size) { return __kmp_allocate(size); } |
159 | void operator delete(void *p) { __kmp_free(p); } |
160 | volatile PtrType *get() { return loc; } |
161 | void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); } |
162 | void set(volatile PtrType *new_loc) { loc = new_loc; } |
163 | PtrType load() { return *loc; } |
164 | void store(PtrType val) { *loc = val; } |
165 | /*! @result true if the flag object has been released. */ |
166 | virtual bool done_check() { |
167 | if (Sleepable && !(this->sleepLoc)) |
168 | return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == |
169 | checker; |
170 | else |
171 | return traits_type::tcr(*(this->get())) == checker; |
172 | } |
173 | /*! @param old_loc in old value of flag |
174 | * @result true if the flag's old value indicates it was released. */ |
175 | virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; } |
176 | /*! @result true if the flag object is not yet released. |
177 | * Used in __kmp_wait_template like: |
178 | * @code |
179 | * while (flag.notdone_check()) { pause(); } |
180 | * @endcode */ |
181 | virtual bool notdone_check() { |
182 | return traits_type::tcr(*(this->get())) != checker; |
183 | } |
184 | /*! @result Actual flag value before release was applied. |
185 | * Trigger all waiting threads to run by modifying flag to release state. */ |
186 | void internal_release() { |
187 | (void)traits_type::test_then_add4((volatile PtrType *)this->get()); |
188 | } |
189 | /*! @result Actual flag value before sleep bit(s) set. |
190 | * Notes that there is at least one thread sleeping on the flag by setting |
191 | * sleep bit(s). */ |
192 | PtrType set_sleeping() { |
193 | if (this->sleepLoc) { |
194 | this->sleepLoc->store(true); |
195 | return *(this->get()); |
196 | } |
197 | return traits_type::test_then_or((volatile PtrType *)this->get(), |
198 | KMP_BARRIER_SLEEP_STATE); |
199 | } |
200 | /*! @result Actual flag value before sleep bit(s) cleared. |
201 | * Notes that there are no longer threads sleeping on the flag by clearing |
202 | * sleep bit(s). */ |
203 | void unset_sleeping() { |
204 | if (this->sleepLoc) { |
205 | this->sleepLoc->store(false); |
206 | return; |
207 | } |
208 | traits_type::test_then_and((volatile PtrType *)this->get(), |
209 | ~KMP_BARRIER_SLEEP_STATE); |
210 | } |
211 | /*! @param old_loc in old value of flag |
212 | * Test if there are threads sleeping on the flag's old value in old_loc. */ |
213 | bool is_sleeping_val(PtrType old_loc) { |
214 | if (this->sleepLoc) |
215 | return this->sleepLoc->load(); |
216 | return old_loc & KMP_BARRIER_SLEEP_STATE; |
217 | } |
218 | /*! Test whether there are threads sleeping on the flag. */ |
219 | bool is_sleeping() { |
220 | if (this->sleepLoc) |
221 | return this->sleepLoc->load(); |
222 | return is_sleeping_val(old_loc: *(this->get())); |
223 | } |
224 | bool is_any_sleeping() { |
225 | if (this->sleepLoc) |
226 | return this->sleepLoc->load(); |
227 | return is_sleeping_val(old_loc: *(this->get())); |
228 | } |
229 | kmp_uint8 *get_stolen() { return NULL; } |
230 | }; |
231 | |
232 | /*! Base class for wait/release atomic flag */ |
233 | template <typename PtrType, flag_type FlagType, bool Sleepable> |
234 | class kmp_flag_atomic : public kmp_flag<FlagType> { |
235 | protected: |
236 | std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */ |
237 | PtrType checker; /**< Flag == checker means it has been released. */ |
238 | public: |
239 | typedef flag_traits<FlagType> traits_type; |
240 | typedef PtrType flag_t; |
241 | kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {} |
242 | kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr) |
243 | : kmp_flag<FlagType>(1), loc(p) { |
244 | this->waiting_threads[0] = thr; |
245 | } |
246 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c) |
247 | : kmp_flag<FlagType>(), loc(p), checker(c) {} |
248 | kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc) |
249 | : kmp_flag<FlagType>(sloc), loc(p), checker(c) {} |
250 | /*! @result the pointer to the actual flag */ |
251 | std::atomic<PtrType> *get() { return loc; } |
252 | /*! @result void* pointer to the actual flag */ |
253 | void *get_void_p() { return RCAST(void *, loc); } |
254 | /*! @param new_loc in set loc to point at new_loc */ |
255 | void set(std::atomic<PtrType> *new_loc) { loc = new_loc; } |
256 | /*! @result flag value */ |
257 | PtrType load() { return loc->load(std::memory_order_acquire); } |
258 | /*! @param val the new flag value to be stored */ |
259 | void store(PtrType val) { loc->store(val, std::memory_order_release); } |
260 | /*! @result true if the flag object has been released. */ |
261 | bool done_check() { |
262 | if (Sleepable && !(this->sleepLoc)) |
263 | return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; |
264 | else |
265 | return this->load() == checker; |
266 | } |
267 | /*! @param old_loc in old value of flag |
268 | * @result true if the flag's old value indicates it was released. */ |
269 | bool done_check_val(PtrType old_loc) { return old_loc == checker; } |
270 | /*! @result true if the flag object is not yet released. |
271 | * Used in __kmp_wait_template like: |
272 | * @code |
273 | * while (flag.notdone_check()) { pause(); } |
274 | * @endcode */ |
275 | bool notdone_check() { return this->load() != checker; } |
276 | /*! @result Actual flag value before release was applied. |
277 | * Trigger all waiting threads to run by modifying flag to release state. */ |
278 | void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } |
279 | /*! @result Actual flag value before sleep bit(s) set. |
280 | * Notes that there is at least one thread sleeping on the flag by setting |
281 | * sleep bit(s). */ |
282 | PtrType set_sleeping() { |
283 | if (this->sleepLoc) { |
284 | this->sleepLoc->store(true); |
285 | return *(this->get()); |
286 | } |
287 | return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); |
288 | } |
289 | /*! @result Actual flag value before sleep bit(s) cleared. |
290 | * Notes that there are no longer threads sleeping on the flag by clearing |
291 | * sleep bit(s). */ |
292 | void unset_sleeping() { |
293 | if (this->sleepLoc) { |
294 | this->sleepLoc->store(false); |
295 | return; |
296 | } |
297 | KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); |
298 | } |
299 | /*! @param old_loc in old value of flag |
300 | * Test whether there are threads sleeping on flag's old value in old_loc. */ |
301 | bool is_sleeping_val(PtrType old_loc) { |
302 | if (this->sleepLoc) |
303 | return this->sleepLoc->load(); |
304 | return old_loc & KMP_BARRIER_SLEEP_STATE; |
305 | } |
306 | /*! Test whether there are threads sleeping on the flag. */ |
307 | bool is_sleeping() { |
308 | if (this->sleepLoc) |
309 | return this->sleepLoc->load(); |
310 | return is_sleeping_val(old_loc: this->load()); |
311 | } |
312 | bool is_any_sleeping() { |
313 | if (this->sleepLoc) |
314 | return this->sleepLoc->load(); |
315 | return is_sleeping_val(old_loc: this->load()); |
316 | } |
317 | kmp_uint8 *get_stolen() { return NULL; } |
318 | }; |
319 | |
320 | #if OMPT_SUPPORT |
321 | OMPT_NOINLINE |
322 | static void __ompt_implicit_task_end(kmp_info_t *this_thr, |
323 | ompt_state_t ompt_state, |
324 | ompt_data_t *tId) { |
325 | int ds_tid = this_thr->th.th_info.ds.ds_tid; |
326 | if (ompt_state == ompt_state_wait_barrier_implicit) { |
327 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
328 | #if OMPT_OPTIONAL |
329 | void *codeptr = NULL; |
330 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
331 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
332 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, |
333 | codeptr); |
334 | } |
335 | if (ompt_enabled.ompt_callback_sync_region) { |
336 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
337 | ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, |
338 | codeptr); |
339 | } |
340 | #endif |
341 | if (!KMP_MASTER_TID(ds_tid)) { |
342 | if (ompt_enabled.ompt_callback_implicit_task) { |
343 | int flags = this_thr->th.ompt_thread_info.parallel_flags; |
344 | flags = (flags & ompt_parallel_league) ? ompt_task_initial |
345 | : ompt_task_implicit; |
346 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( |
347 | ompt_scope_end, NULL, tId, 0, ds_tid, flags); |
348 | } |
349 | // return to idle state |
350 | this_thr->th.ompt_thread_info.state = ompt_state_idle; |
351 | } else { |
352 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
353 | } |
354 | } |
355 | } |
356 | #endif |
357 | |
358 | /* Spin wait loop that first does pause/yield, then sleep. A thread that calls |
359 | __kmp_wait_* must make certain that another thread calls __kmp_release |
360 | to wake it back up to prevent deadlocks! |
361 | |
362 | NOTE: We may not belong to a team at this point. */ |
363 | template <class C, bool final_spin, bool Cancellable = false, |
364 | bool Sleepable = true> |
365 | static inline bool |
366 | __kmp_wait_template(kmp_info_t *this_thr, |
367 | C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
368 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
369 | volatile void *spin = flag->get(); |
370 | #endif |
371 | kmp_uint32 spins; |
372 | int th_gtid; |
373 | int tasks_completed = FALSE; |
374 | #if !KMP_USE_MONITOR |
375 | kmp_uint64 poll_count; |
376 | kmp_uint64 hibernate_goal; |
377 | #else |
378 | kmp_uint32 hibernate; |
379 | #endif |
380 | kmp_uint64 time; |
381 | |
382 | KMP_FSYNC_SPIN_INIT(spin, NULL); |
383 | if (flag->done_check()) { |
384 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); |
385 | return false; |
386 | } |
387 | th_gtid = this_thr->th.th_info.ds.ds_gtid; |
388 | if (Cancellable) { |
389 | kmp_team_t *team = this_thr->th.th_team; |
390 | if (team && team->t.t_cancel_request == cancel_parallel) |
391 | return true; |
392 | } |
393 | #if KMP_OS_UNIX |
394 | if (final_spin) |
395 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); |
396 | #endif |
397 | KA_TRACE(20, |
398 | ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n" , th_gtid, flag)); |
399 | #if KMP_STATS_ENABLED |
400 | stats_state_e thread_state = KMP_GET_THREAD_STATE(); |
401 | #endif |
402 | |
403 | /* OMPT Behavior: |
404 | THIS function is called from |
405 | __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) |
406 | these have join / fork behavior |
407 | |
408 | In these cases, we don't change the state or trigger events in THIS |
409 | function. |
410 | Events are triggered in the calling code (__kmp_barrier): |
411 | |
412 | state := ompt_state_overhead |
413 | barrier-begin |
414 | barrier-wait-begin |
415 | state := ompt_state_wait_barrier |
416 | call join-barrier-implementation (finally arrive here) |
417 | {} |
418 | call fork-barrier-implementation (finally arrive here) |
419 | {} |
420 | state := ompt_state_overhead |
421 | barrier-wait-end |
422 | barrier-end |
423 | state := ompt_state_work_parallel |
424 | |
425 | |
426 | __kmp_fork_barrier (after thread creation, before executing implicit task) |
427 | call fork-barrier-implementation (finally arrive here) |
428 | {} // worker arrive here with state = ompt_state_idle |
429 | |
430 | |
431 | __kmp_join_barrier (implicit barrier at end of parallel region) |
432 | state := ompt_state_barrier_implicit |
433 | barrier-begin |
434 | barrier-wait-begin |
435 | call join-barrier-implementation (finally arrive here |
436 | final_spin=FALSE) |
437 | { |
438 | } |
439 | __kmp_fork_barrier (implicit barrier at end of parallel region) |
440 | call fork-barrier-implementation (finally arrive here final_spin=TRUE) |
441 | |
442 | Worker after task-team is finished: |
443 | barrier-wait-end |
444 | barrier-end |
445 | implicit-task-end |
446 | idle-begin |
447 | state := ompt_state_idle |
448 | |
449 | Before leaving, if state = ompt_state_idle |
450 | idle-end |
451 | state := ompt_state_overhead |
452 | */ |
453 | #if OMPT_SUPPORT |
454 | ompt_state_t ompt_entry_state; |
455 | ompt_data_t *tId; |
456 | if (ompt_enabled.enabled) { |
457 | ompt_entry_state = this_thr->th.ompt_thread_info.state; |
458 | if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || |
459 | KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { |
460 | ompt_lw_taskteam_t *team = NULL; |
461 | if (this_thr->th.th_team) |
462 | team = this_thr->th.th_team->t.ompt_serialized_team_info; |
463 | if (team) { |
464 | tId = &(team->ompt_task_info.task_data); |
465 | } else { |
466 | tId = OMPT_CUR_TASK_DATA(this_thr); |
467 | } |
468 | } else { |
469 | tId = &(this_thr->th.ompt_thread_info.task_data); |
470 | } |
471 | if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || |
472 | this_thr->th.th_task_team == NULL)) { |
473 | // implicit task is done. Either no taskqueue, or task-team finished |
474 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_entry_state, tId); |
475 | } |
476 | } |
477 | #endif |
478 | |
479 | KMP_INIT_YIELD(spins); // Setup for waiting |
480 | KMP_INIT_BACKOFF(time); |
481 | |
482 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || |
483 | __kmp_pause_status == kmp_soft_paused) { |
484 | #if KMP_USE_MONITOR |
485 | // The worker threads cannot rely on the team struct existing at this point. |
486 | // Use the bt values cached in the thread struct instead. |
487 | #ifdef KMP_ADJUST_BLOCKTIME |
488 | if (__kmp_pause_status == kmp_soft_paused || |
489 | (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) |
490 | // Force immediate suspend if not set by user and more threads than |
491 | // available procs |
492 | hibernate = 0; |
493 | else |
494 | hibernate = this_thr->th.th_team_bt_intervals; |
495 | #else |
496 | hibernate = this_thr->th.th_team_bt_intervals; |
497 | #endif /* KMP_ADJUST_BLOCKTIME */ |
498 | |
499 | /* If the blocktime is nonzero, we want to make sure that we spin wait for |
500 | the entirety of the specified #intervals, plus up to one interval more. |
501 | This increment make certain that this thread doesn't go to sleep too |
502 | soon. */ |
503 | if (hibernate != 0) |
504 | hibernate++; |
505 | |
506 | // Add in the current time value. |
507 | hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); |
508 | KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n" , |
509 | th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, |
510 | hibernate - __kmp_global.g.g_time.dt.t_value)); |
511 | #else |
512 | if (__kmp_pause_status == kmp_soft_paused) { |
513 | // Force immediate suspend |
514 | hibernate_goal = KMP_NOW(); |
515 | } else |
516 | hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; |
517 | poll_count = 0; |
518 | (void)poll_count; |
519 | #endif // KMP_USE_MONITOR |
520 | } |
521 | |
522 | KMP_MB(); |
523 | |
524 | // Main wait spin loop |
525 | while (flag->notdone_check()) { |
526 | kmp_task_team_t *task_team = NULL; |
527 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
528 | task_team = this_thr->th.th_task_team; |
529 | /* If the thread's task team pointer is NULL, it means one of 3 things: |
530 | 1) A newly-created thread is first being released by |
531 | __kmp_fork_barrier(), and its task team has not been set up yet. |
532 | 2) All tasks have been executed to completion. |
533 | 3) Tasking is off for this region. This could be because we are in a |
534 | serialized region (perhaps the outer one), or else tasking was manually |
535 | disabled (KMP_TASKING=0). */ |
536 | if (task_team != NULL) { |
537 | if (TCR_SYNC_4(task_team->tt.tt_active)) { |
538 | if (KMP_TASKING_ENABLED(task_team)) { |
539 | flag->execute_tasks( |
540 | this_thr, th_gtid, final_spin, |
541 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); |
542 | } else |
543 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
544 | } else { |
545 | KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); |
546 | #if OMPT_SUPPORT |
547 | // task-team is done now, other cases should be catched above |
548 | if (final_spin && ompt_enabled.enabled) |
549 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_entry_state, tId); |
550 | #endif |
551 | this_thr->th.th_task_team = NULL; |
552 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
553 | } |
554 | } else { |
555 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
556 | } // if |
557 | } // if |
558 | |
559 | KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); |
560 | if (TCR_4(__kmp_global.g.g_done)) { |
561 | if (__kmp_global.g.g_abort) |
562 | __kmp_abort_thread(); |
563 | break; |
564 | } |
565 | |
566 | // If we are oversubscribed, or have waited a bit (and |
567 | // KMP_LIBRARY=throughput), then yield |
568 | KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); |
569 | |
570 | #if KMP_STATS_ENABLED |
571 | // Check if thread has been signalled to idle state |
572 | // This indicates that the logical "join-barrier" has finished |
573 | if (this_thr->th.th_stats->isIdle() && |
574 | KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { |
575 | KMP_SET_THREAD_STATE(IDLE); |
576 | KMP_PUSH_PARTITIONED_TIMER(OMP_idle); |
577 | } |
578 | #endif |
579 | // Check if the barrier surrounding this wait loop has been cancelled |
580 | if (Cancellable) { |
581 | kmp_team_t *team = this_thr->th.th_team; |
582 | if (team && team->t.t_cancel_request == cancel_parallel) |
583 | break; |
584 | } |
585 | |
586 | // For hidden helper thread, if task_team is nullptr, it means the main |
587 | // thread has not released the barrier. We cannot wait here because once the |
588 | // main thread releases all children barriers, all hidden helper threads are |
589 | // still sleeping. This leads to a problem that following configuration, |
590 | // such as task team sync, will not be performed such that this thread does |
591 | // not have task team. Usually it is not bad. However, a corner case is, |
592 | // when the first task encountered is an untied task, the check in |
593 | // __kmp_task_alloc will crash because it uses the task team pointer without |
594 | // checking whether it is nullptr. It is probably under some kind of |
595 | // assumption. |
596 | if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) && |
597 | !TCR_4(__kmp_hidden_helper_team_done)) { |
598 | // If there is still hidden helper tasks to be executed, the hidden helper |
599 | // thread will not enter a waiting status. |
600 | if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) { |
601 | __kmp_hidden_helper_worker_thread_wait(); |
602 | } |
603 | continue; |
604 | } |
605 | |
606 | // Don't suspend if KMP_BLOCKTIME is set to "infinite" |
607 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
608 | __kmp_pause_status != kmp_soft_paused) |
609 | continue; |
610 | |
611 | // Don't suspend if there is a likelihood of new tasks being spawned. |
612 | if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) && |
613 | !__kmp_wpolicy_passive) |
614 | continue; |
615 | |
616 | #if KMP_USE_MONITOR |
617 | // If we have waited a bit more, fall asleep |
618 | if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) |
619 | continue; |
620 | #else |
621 | if (KMP_BLOCKING(hibernate_goal, poll_count++)) |
622 | continue; |
623 | #endif |
624 | // Don't suspend if wait loop designated non-sleepable |
625 | // in template parameters |
626 | if (!Sleepable) |
627 | continue; |
628 | |
629 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
630 | if (__kmp_mwait_enabled || __kmp_umwait_enabled) { |
631 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n" , th_gtid)); |
632 | flag->mwait(th_gtid); |
633 | } else { |
634 | #endif |
635 | KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n" , th_gtid)); |
636 | #if KMP_OS_UNIX |
637 | if (final_spin) |
638 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); |
639 | #endif |
640 | flag->suspend(th_gtid); |
641 | #if KMP_OS_UNIX |
642 | if (final_spin) |
643 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); |
644 | #endif |
645 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
646 | } |
647 | #endif |
648 | |
649 | if (TCR_4(__kmp_global.g.g_done)) { |
650 | if (__kmp_global.g.g_abort) |
651 | __kmp_abort_thread(); |
652 | break; |
653 | } else if (__kmp_tasking_mode != tskm_immediate_exec && |
654 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { |
655 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; |
656 | } |
657 | // TODO: If thread is done with work and times out, disband/free |
658 | } |
659 | |
660 | #if OMPT_SUPPORT |
661 | ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; |
662 | if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { |
663 | #if OMPT_OPTIONAL |
664 | if (final_spin) { |
665 | __ompt_implicit_task_end(this_thr, ompt_state: ompt_exit_state, tId); |
666 | ompt_exit_state = this_thr->th.ompt_thread_info.state; |
667 | } |
668 | #endif |
669 | if (ompt_exit_state == ompt_state_idle) { |
670 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
671 | } |
672 | } |
673 | #endif |
674 | #if KMP_STATS_ENABLED |
675 | // If we were put into idle state, pop that off the state stack |
676 | if (KMP_GET_THREAD_STATE() == IDLE) { |
677 | KMP_POP_PARTITIONED_TIMER(); |
678 | KMP_SET_THREAD_STATE(thread_state); |
679 | this_thr->th.th_stats->resetIdleFlag(); |
680 | } |
681 | #endif |
682 | |
683 | #if KMP_OS_UNIX |
684 | if (final_spin) |
685 | KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); |
686 | #endif |
687 | KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); |
688 | if (Cancellable) { |
689 | kmp_team_t *team = this_thr->th.th_team; |
690 | if (team && team->t.t_cancel_request == cancel_parallel) { |
691 | if (tasks_completed) { |
692 | // undo the previous decrement of unfinished_threads so that the |
693 | // thread can decrement at the join barrier with no problem |
694 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
695 | std::atomic<kmp_int32> *unfinished_threads = |
696 | &(task_team->tt.tt_unfinished_threads); |
697 | KMP_ATOMIC_INC(unfinished_threads); |
698 | } |
699 | return true; |
700 | } |
701 | } |
702 | return false; |
703 | } |
704 | |
705 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
706 | // Set up a monitor on the flag variable causing the calling thread to wait in |
707 | // a less active state until the flag variable is modified. |
708 | template <class C> |
709 | static inline void __kmp_mwait_template(int th_gtid, C *flag) { |
710 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait); |
711 | kmp_info_t *th = __kmp_threads[th_gtid]; |
712 | |
713 | KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n" , th_gtid, |
714 | flag->get())); |
715 | |
716 | // User-level mwait is available |
717 | KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled); |
718 | |
719 | __kmp_suspend_initialize_thread(th); |
720 | __kmp_lock_suspend_mx(th); |
721 | |
722 | volatile void *spin = flag->get(); |
723 | void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1)); |
724 | |
725 | if (!flag->done_check()) { |
726 | // Mark thread as no longer active |
727 | th->th.th_active = FALSE; |
728 | if (th->th.th_active_in_pool) { |
729 | th->th.th_active_in_pool = FALSE; |
730 | KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); |
731 | KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); |
732 | } |
733 | flag->set_sleeping(); |
734 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n" , th_gtid)); |
735 | #if KMP_HAVE_UMWAIT |
736 | if (__kmp_umwait_enabled) { |
737 | __kmp_umonitor(cacheline); |
738 | } |
739 | #elif KMP_HAVE_MWAIT |
740 | if (__kmp_mwait_enabled) { |
741 | __kmp_mm_monitor(cacheline, 0, 0); |
742 | } |
743 | #endif |
744 | // To avoid a race, check flag between 'monitor' and 'mwait'. A write to |
745 | // the address could happen after the last time we checked and before |
746 | // monitoring started, in which case monitor can't detect the change. |
747 | if (flag->done_check()) |
748 | flag->unset_sleeping(); |
749 | else { |
750 | // if flag changes here, wake-up happens immediately |
751 | TCW_PTR(th->th.th_sleep_loc, (void *)flag); |
752 | th->th.th_sleep_loc_type = flag->get_type(); |
753 | __kmp_unlock_suspend_mx(th); |
754 | KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n" , th_gtid)); |
755 | #if KMP_HAVE_UMWAIT |
756 | if (__kmp_umwait_enabled) { |
757 | __kmp_umwait(hint: 1, counter: 100); // to do: enable ctrl via hints, backoff counter |
758 | } |
759 | #elif KMP_HAVE_MWAIT |
760 | if (__kmp_mwait_enabled) { |
761 | __kmp_mm_mwait(0, __kmp_mwait_hints); |
762 | } |
763 | #endif |
764 | KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n" , th_gtid)); |
765 | __kmp_lock_suspend_mx(th); |
766 | // Clean up sleep info; doesn't matter how/why this thread stopped waiting |
767 | if (flag->is_sleeping()) |
768 | flag->unset_sleeping(); |
769 | TCW_PTR(th->th.th_sleep_loc, NULL); |
770 | th->th.th_sleep_loc_type = flag_unset; |
771 | } |
772 | // Mark thread as active again |
773 | th->th.th_active = TRUE; |
774 | if (TCR_4(th->th.th_in_pool)) { |
775 | KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); |
776 | th->th.th_active_in_pool = TRUE; |
777 | } |
778 | } // Drop out to main wait loop to check flag, handle tasks, etc. |
779 | __kmp_unlock_suspend_mx(th); |
780 | KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n" , th_gtid)); |
781 | } |
782 | #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
783 | |
784 | /* Release any threads specified as waiting on the flag by releasing the flag |
785 | and resume the waiting thread if indicated by the sleep bit(s). A thread that |
786 | calls __kmp_wait_template must call this function to wake up the potentially |
787 | sleeping thread and prevent deadlocks! */ |
788 | template <class C> static inline void __kmp_release_template(C *flag) { |
789 | #ifdef KMP_DEBUG |
790 | int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; |
791 | #endif |
792 | KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n" , gtid, flag->get())); |
793 | KMP_DEBUG_ASSERT(flag->get()); |
794 | KMP_FSYNC_RELEASING(flag->get_void_p()); |
795 | |
796 | flag->internal_release(); |
797 | |
798 | KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n" , gtid, flag->get(), |
799 | flag->load())); |
800 | |
801 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
802 | // Only need to check sleep stuff if infinite block time not set. |
803 | // Are *any* threads waiting on flag sleeping? |
804 | if (flag->is_any_sleeping()) { |
805 | for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { |
806 | // if sleeping waiter exists at i, sets current_waiter to i inside flag |
807 | kmp_info_t *waiter = flag->get_waiter(i); |
808 | if (waiter) { |
809 | int wait_gtid = waiter->th.th_info.ds.ds_gtid; |
810 | // Wake up thread if needed |
811 | KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " |
812 | "flag(%p) set\n" , |
813 | gtid, wait_gtid, flag->get())); |
814 | flag->resume(wait_gtid); // unsets flag's current_waiter when done |
815 | } |
816 | } |
817 | } |
818 | } |
819 | } |
820 | |
821 | template <bool Cancellable, bool Sleepable> |
822 | class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> { |
823 | public: |
824 | kmp_flag_32(std::atomic<kmp_uint32> *p) |
825 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {} |
826 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) |
827 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {} |
828 | kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) |
829 | : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {} |
830 | void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } |
831 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
832 | void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } |
833 | #endif |
834 | void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } |
835 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
836 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
837 | kmp_int32 is_constrained) { |
838 | return __kmp_execute_tasks_32( |
839 | this_thr, gtid, this, final_spin, |
840 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
841 | } |
842 | bool wait(kmp_info_t *this_thr, |
843 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
844 | if (final_spin) |
845 | return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>( |
846 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
847 | else |
848 | return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>( |
849 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
850 | } |
851 | void release() { __kmp_release_template(this); } |
852 | flag_type get_ptr_type() { return flag32; } |
853 | }; |
854 | |
855 | template <bool Cancellable, bool Sleepable> |
856 | class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> { |
857 | public: |
858 | kmp_flag_64(volatile kmp_uint64 *p) |
859 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {} |
860 | kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) |
861 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {} |
862 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) |
863 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {} |
864 | kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc) |
865 | : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {} |
866 | void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } |
867 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
868 | void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } |
869 | #endif |
870 | void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } |
871 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
872 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
873 | kmp_int32 is_constrained) { |
874 | return __kmp_execute_tasks_64( |
875 | this_thr, gtid, this, final_spin, |
876 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
877 | } |
878 | bool wait(kmp_info_t *this_thr, |
879 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
880 | if (final_spin) |
881 | return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>( |
882 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
883 | else |
884 | return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>( |
885 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
886 | } |
887 | void release() { __kmp_release_template(this); } |
888 | flag_type get_ptr_type() { return flag64; } |
889 | }; |
890 | |
891 | template <bool Cancellable, bool Sleepable> |
892 | class kmp_atomic_flag_64 |
893 | : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> { |
894 | public: |
895 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p) |
896 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {} |
897 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr) |
898 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {} |
899 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c) |
900 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {} |
901 | kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c, |
902 | std::atomic<bool> *loc) |
903 | : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {} |
904 | void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); } |
905 | void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); } |
906 | void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); } |
907 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
908 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
909 | kmp_int32 is_constrained) { |
910 | return __kmp_atomic_execute_tasks_64( |
911 | this_thr, gtid, this, final_spin, |
912 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
913 | } |
914 | bool wait(kmp_info_t *this_thr, |
915 | int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
916 | if (final_spin) |
917 | return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable, |
918 | Sleepable>( |
919 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
920 | else |
921 | return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable, |
922 | Sleepable>( |
923 | this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); |
924 | } |
925 | void release() { __kmp_release_template(this); } |
926 | flag_type get_ptr_type() { return atomic_flag64; } |
927 | }; |
928 | |
929 | // Hierarchical 64-bit on-core barrier instantiation |
930 | class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> { |
931 | kmp_uint32 offset; /**< Portion of flag of interest for an operation. */ |
932 | bool flag_switch; /**< Indicates a switch in flag location. */ |
933 | enum barrier_type bt; /**< Barrier type. */ |
934 | kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */ |
935 | #if USE_ITT_BUILD |
936 | void *itt_sync_obj; /**< ITT object to pass to new flag location. */ |
937 | #endif |
938 | unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { |
939 | return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; |
940 | } |
941 | |
942 | public: |
943 | kmp_flag_oncore(volatile kmp_uint64 *p) |
944 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) { |
945 | } |
946 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) |
947 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx), |
948 | flag_switch(false), |
949 | bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {} |
950 | kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, |
951 | enum barrier_type bar_t, |
952 | kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) |
953 | : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx), |
954 | flag_switch(false), bt(bar_t), |
955 | this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} |
956 | virtual ~kmp_flag_oncore() override {} |
957 | void *operator new(size_t size) { return __kmp_allocate(size); } |
958 | void operator delete(void *p) { __kmp_free(p); } |
959 | bool done_check_val(kmp_uint64 old_loc) override { |
960 | return byteref(loc: &old_loc, offset) == checker; |
961 | } |
962 | bool done_check() override { return done_check_val(old_loc: *get()); } |
963 | bool notdone_check() override { |
964 | // Calculate flag_switch |
965 | if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) |
966 | flag_switch = true; |
967 | if (byteref(loc: get(), offset) != 1 && !flag_switch) |
968 | return true; |
969 | else if (flag_switch) { |
970 | this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; |
971 | kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, |
972 | (kmp_uint64)KMP_BARRIER_STATE_BUMP); |
973 | __kmp_wait_64(this_thr, flag: &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
974 | } |
975 | return false; |
976 | } |
977 | void internal_release() { |
978 | // Other threads can write their own bytes simultaneously. |
979 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { |
980 | byteref(loc: get(), offset) = 1; |
981 | } else { |
982 | kmp_uint64 mask = 0; |
983 | byteref(loc: &mask, offset) = 1; |
984 | KMP_TEST_THEN_OR64(get(), mask); |
985 | } |
986 | } |
987 | void wait(kmp_info_t *this_thr, int final_spin) { |
988 | if (final_spin) |
989 | __kmp_wait_template<kmp_flag_oncore, TRUE>( |
990 | this_thr, flag: this USE_ITT_BUILD_ARG(itt_sync_obj)); |
991 | else |
992 | __kmp_wait_template<kmp_flag_oncore, FALSE>( |
993 | this_thr, flag: this USE_ITT_BUILD_ARG(itt_sync_obj)); |
994 | } |
995 | void release() { __kmp_release_template(flag: this); } |
996 | void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, flag: this); } |
997 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
998 | void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, flag: this); } |
999 | #endif |
1000 | void resume(int th_gtid) { __kmp_resume_oncore(target_gtid: th_gtid, flag: this); } |
1001 | int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, |
1002 | int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), |
1003 | kmp_int32 is_constrained) { |
1004 | #if OMPD_SUPPORT |
1005 | int ret = __kmp_execute_tasks_oncore( |
1006 | thread: this_thr, gtid, flag: this, final_spin, |
1007 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
1008 | if (ompd_state & OMPD_ENABLE_BP) |
1009 | ompd_bp_task_end(); |
1010 | return ret; |
1011 | #else |
1012 | return __kmp_execute_tasks_oncore( |
1013 | this_thr, gtid, this, final_spin, |
1014 | thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); |
1015 | #endif |
1016 | } |
1017 | enum barrier_type get_bt() { return bt; } |
1018 | flag_type get_ptr_type() { return flag_oncore; } |
1019 | }; |
1020 | |
1021 | static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) { |
1022 | int gtid = __kmp_gtid_from_thread(thr); |
1023 | void *flag = CCAST(void *, thr->th.th_sleep_loc); |
1024 | flag_type type = thr->th.th_sleep_loc_type; |
1025 | if (!flag) |
1026 | return; |
1027 | // Attempt to wake up a thread: examine its type and call appropriate template |
1028 | switch (type) { |
1029 | case flag32: |
1030 | __kmp_resume_32(target_gtid: gtid, RCAST(kmp_flag_32<> *, flag)); |
1031 | break; |
1032 | case flag64: |
1033 | __kmp_resume_64(target_gtid: gtid, RCAST(kmp_flag_64<> *, flag)); |
1034 | break; |
1035 | case atomic_flag64: |
1036 | __kmp_atomic_resume_64(target_gtid: gtid, RCAST(kmp_atomic_flag_64<> *, flag)); |
1037 | break; |
1038 | case flag_oncore: |
1039 | __kmp_resume_oncore(target_gtid: gtid, RCAST(kmp_flag_oncore *, flag)); |
1040 | break; |
1041 | case flag_unset: |
1042 | KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n" , type)); |
1043 | break; |
1044 | } |
1045 | } |
1046 | |
1047 | /*! |
1048 | @} |
1049 | */ |
1050 | |
1051 | #endif // KMP_WAIT_RELEASE_H |
1052 | |