1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "kmp_utils.h"
22#include "ompt-specific.h"
23
24#define MAX_MESSAGE 512
25
26// flags will be used in future, e.g. to implement openmp_strict library
27// restrictions
28
29/*!
30 * @ingroup STARTUP_SHUTDOWN
31 * @param loc in source location information
32 * @param flags in for future use (currently ignored)
33 *
34 * Initialize the runtime library. This call is optional; if it is not made then
35 * it will be implicitly called by attempts to use other library functions.
36 */
37void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv(name: "KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(data: env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
47 __kmp_internal_begin();
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50}
51
52/*!
53 * @ingroup STARTUP_SHUTDOWN
54 * @param loc source location information
55 *
56 * Shutdown the runtime library. This is also optional, and even if called will
57 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
58 * zero.
59 */
60void __kmpc_end(ident_t *loc) {
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
70 __kmp_internal_end_thread(gtid: -1);
71 }
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
79#endif
80}
81
82/*!
83@ingroup THREAD_STATES
84@param loc Source location information.
85@return The global thread index of the active thread.
86
87This function can be called in any context.
88
89If the runtime has ony been entered at the outermost level from a
90single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
91that which would be returned by omp_get_thread_num() in the outermost
92active parallel construct. (Or zero if there is no active parallel
93construct, since the primary thread is necessarily thread zero).
94
95If multiple non-OpenMP threads all enter an OpenMP construct then this
96will be a unique thread identifier among all the threads created by
97the OpenMP runtime (but the value cannot be defined in terms of
98OpenMP thread ids returned by omp_get_thread_num()).
99*/
100kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106}
107
108/*!
109@ingroup THREAD_STATES
110@param loc Source location information.
111@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113This function can be called in any context.
114It returns the total number of threads under the control of the OpenMP runtime.
115That is not a number that can be determined by any OpenMP standard calls, since
116the library may be called from more than one non-OpenMP thread, and this
117reflects the total over all such calls. Similarly the runtime maintains
118underlying threads even when they are not active (since the cost of creating
119and destroying OS threads is high), this call counts all such threads even if
120they are not waiting for work.
121*/
122kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127}
128
129/*!
130@ingroup THREAD_STATES
131@param loc Source location information.
132@return The thread number of the calling thread in the innermost active parallel
133construct.
134*/
135kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
138}
139
140/*!
141@ingroup THREAD_STATES
142@param loc Source location information.
143@return The number of threads in the innermost active parallel construct.
144*/
145kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149}
150
151/*!
152 * @ingroup DEPRECATED
153 * @param loc location description
154 *
155 * This function need not be called. It always returns TRUE.
156 */
157kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(s: semi2, c: ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(s: semi2 + 1, c: ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(s1: __kmp_par_range_filename, s2: name, n: semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(s: semi2 + 1, c: ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(s1: __kmp_par_range_routine, s2: semi2 + 1, n: semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(s: semi3 + 1, format: "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211}
212
213/*!
214@ingroup THREAD_STATES
215@param loc Source location information.
216@return 1 if this thread is executing inside an active parallel region, zero if
217not.
218*/
219kmp_int32 __kmpc_in_parallel(ident_t *loc) {
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221}
222
223/*!
224@ingroup PARALLEL
225@param loc source location information
226@param global_tid global thread number
227@param num_threads number of threads requested for this parallel construct
228
229Set the number of threads to be used by the next fork spawned by this thread.
230This call is only required if the parallel construct has a `num_threads` clause.
231*/
232void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(gtid: global_tid);
237 __kmp_push_num_threads(loc, gtid: global_tid, num_threads);
238}
239
240void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
241 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
242 /* the num_threads are automatically popped */
243}
244
245void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
246 kmp_int32 proc_bind) {
247 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 proc_bind));
249 __kmp_assert_valid_gtid(gtid: global_tid);
250 __kmp_push_proc_bind(loc, gtid: global_tid, proc_bind: (kmp_proc_bind_t)proc_bind);
251}
252
253/*!
254@ingroup PARALLEL
255@param loc source location information
256@param argc total number of arguments in the ellipsis
257@param microtask pointer to callback routine consisting of outlined parallel
258construct
259@param ... pointers to shared variables that aren't global
260
261Do the actual fork and call the microtask in the relevant number of threads.
262*/
263void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
264 int gtid = __kmp_entry_gtid();
265
266#if (KMP_STATS_ENABLED)
267 // If we were in a serial region, then stop the serial timer, record
268 // the event, and start parallel region timer
269 stats_state_e previous_state = KMP_GET_THREAD_STATE();
270 if (previous_state == stats_state_e::SERIAL_REGION) {
271 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 } else {
273 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
274 }
275 int inParallel = __kmpc_in_parallel(loc);
276 if (inParallel) {
277 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
278 } else {
279 KMP_COUNT_BLOCK(OMP_PARALLEL);
280 }
281#endif
282
283 // maybe to save thr_state is enough here
284 {
285 va_list ap;
286 va_start(ap, microtask);
287
288#if OMPT_SUPPORT
289 ompt_frame_t *ompt_frame;
290 if (ompt_enabled.enabled) {
291 kmp_info_t *master_th = __kmp_threads[gtid];
292 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
293 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 }
295 OMPT_STORE_RETURN_ADDRESS(gtid);
296#endif
297
298#if INCLUDE_SSC_MARKS
299 SSC_MARK_FORKING();
300#endif
301 __kmp_fork_call(loc, gtid, fork_context: fork_context_intel, argc,
302 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
303 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
304 kmp_va_addr_of(ap));
305#if INCLUDE_SSC_MARKS
306 SSC_MARK_JOINING();
307#endif
308 __kmp_join_call(loc, gtid
309#if OMPT_SUPPORT
310 ,
311 fork_context: fork_context_intel
312#endif
313 );
314
315 va_end(ap);
316
317#if OMPT_SUPPORT
318 if (ompt_enabled.enabled) {
319 ompt_frame->enter_frame = ompt_data_none;
320 }
321#endif
322 }
323
324#if KMP_STATS_ENABLED
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327 KMP_SET_THREAD_STATE(previous_state);
328 } else {
329 KMP_POP_PARTITIONED_TIMER();
330 }
331#endif // KMP_STATS_ENABLED
332}
333
334/*!
335@ingroup PARALLEL
336@param loc source location information
337@param microtask pointer to callback routine consisting of outlined parallel
338construct
339@param cond condition for running in parallel
340@param args struct of pointers to shared variables that aren't global
341
342Perform a fork only if the condition is true.
343*/
344void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
345 kmp_int32 cond, void *args) {
346 int gtid = __kmp_entry_gtid();
347 if (cond) {
348 if (args)
349 __kmpc_fork_call(loc, argc, microtask, args);
350 else
351 __kmpc_fork_call(loc, argc, microtask);
352 } else {
353 __kmpc_serialized_parallel(loc, global_tid: gtid);
354
355#if OMPT_SUPPORT
356 void *exit_frame_ptr;
357#endif
358
359 if (args)
360 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
361 /*npr=*/0,
362 /*argc=*/1, argv: &args
363#if OMPT_SUPPORT
364 ,
365 exit_frame_ptr: &exit_frame_ptr
366#endif
367 );
368 else
369 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
370 /*npr=*/0,
371 /*argc=*/0,
372 /*args=*/argv: nullptr
373#if OMPT_SUPPORT
374 ,
375 exit_frame_ptr: &exit_frame_ptr
376#endif
377 );
378
379 __kmpc_end_serialized_parallel(loc, global_tid: gtid);
380 }
381}
382
383/*!
384@ingroup PARALLEL
385@param loc source location information
386@param global_tid global thread number
387@param num_teams number of teams requested for the teams construct
388@param num_threads number of threads per team requested for the teams construct
389
390Set the number of teams to be used by the teams construct.
391This call is only required if the teams construct has a `num_teams` clause
392or a `thread_limit` clause (or both).
393*/
394void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
395 kmp_int32 num_teams, kmp_int32 num_threads) {
396 KA_TRACE(20,
397 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
398 global_tid, num_teams, num_threads));
399 __kmp_assert_valid_gtid(gtid: global_tid);
400 __kmp_push_num_teams(loc, gtid: global_tid, num_teams, num_threads);
401}
402
403/*!
404@ingroup PARALLEL
405@param loc source location information
406@param global_tid global thread number
407@param thread_limit limit on number of threads which can be created within the
408current task
409
410Set the thread_limit for the current task
411This call is there to support `thread_limit` clause on the `target` construct
412*/
413void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
414 kmp_int32 thread_limit) {
415 __kmp_assert_valid_gtid(gtid: global_tid);
416 kmp_info_t *thread = __kmp_threads[global_tid];
417 if (thread_limit > 0)
418 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
419}
420
421/*!
422@ingroup PARALLEL
423@param loc source location information
424@param global_tid global thread number
425@param num_teams_lb lower bound on number of teams requested for the teams
426construct
427@param num_teams_ub upper bound on number of teams requested for the teams
428construct
429@param num_threads number of threads per team requested for the teams construct
430
431Set the number of teams to be used by the teams construct. The number of initial
432teams cretaed will be greater than or equal to the lower bound and less than or
433equal to the upper bound.
434This call is only required if the teams construct has a `num_teams` clause
435or a `thread_limit` clause (or both).
436*/
437void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
438 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
439 kmp_int32 num_threads) {
440 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
441 " num_teams_ub=%d num_threads=%d\n",
442 global_tid, num_teams_lb, num_teams_ub, num_threads));
443 __kmp_assert_valid_gtid(gtid: global_tid);
444 __kmp_push_num_teams_51(loc, gtid: global_tid, num_teams_lb, num_teams_ub,
445 num_threads);
446}
447
448/*!
449@ingroup PARALLEL
450@param loc source location information
451@param argc total number of arguments in the ellipsis
452@param microtask pointer to callback routine consisting of outlined teams
453construct
454@param ... pointers to shared variables that aren't global
455
456Do the actual fork and call the microtask in the relevant number of threads.
457*/
458void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
459 ...) {
460 int gtid = __kmp_entry_gtid();
461 kmp_info_t *this_thr = __kmp_threads[gtid];
462 va_list ap;
463 va_start(ap, microtask);
464
465#if KMP_STATS_ENABLED
466 KMP_COUNT_BLOCK(OMP_TEAMS);
467 stats_state_e previous_state = KMP_GET_THREAD_STATE();
468 if (previous_state == stats_state_e::SERIAL_REGION) {
469 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
470 } else {
471 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
472 }
473#endif
474
475 // remember teams entry point and nesting level
476 this_thr->th.th_teams_microtask = microtask;
477 this_thr->th.th_teams_level =
478 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
479
480#if OMPT_SUPPORT
481 kmp_team_t *parent_team = this_thr->th.th_team;
482 int tid = __kmp_tid_from_gtid(gtid);
483 if (ompt_enabled.enabled) {
484 parent_team->t.t_implicit_task_taskdata[tid]
485 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
486 }
487 OMPT_STORE_RETURN_ADDRESS(gtid);
488#endif
489
490 // check if __kmpc_push_num_teams called, set default number of teams
491 // otherwise
492 if (this_thr->th.th_teams_size.nteams == 0) {
493 __kmp_push_num_teams(loc, gtid, num_teams: 0, num_threads: 0);
494 }
495 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
497 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
498
499 __kmp_fork_call(
500 loc, gtid, fork_context: fork_context_intel, argc,
501 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
502 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
503 __kmp_join_call(loc, gtid
504#if OMPT_SUPPORT
505 ,
506 fork_context: fork_context_intel
507#endif
508 );
509
510 // Pop current CG root off list
511 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
512 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
513 this_thr->th.th_cg_roots = tmp->up;
514 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
515 " to node %p. cg_nthreads was %d\n",
516 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
517 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
518 int i = tmp->cg_nthreads--;
519 if (i == 1) { // check is we are the last thread in CG (not always the case)
520 __kmp_free(tmp);
521 }
522 // Restore current task's thread_limit from CG root
523 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
524 this_thr->th.th_current_task->td_icvs.thread_limit =
525 this_thr->th.th_cg_roots->cg_thread_limit;
526
527 this_thr->th.th_teams_microtask = NULL;
528 this_thr->th.th_teams_level = 0;
529 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
530 va_end(ap);
531#if KMP_STATS_ENABLED
532 if (previous_state == stats_state_e::SERIAL_REGION) {
533 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
534 KMP_SET_THREAD_STATE(previous_state);
535 } else {
536 KMP_POP_PARTITIONED_TIMER();
537 }
538#endif // KMP_STATS_ENABLED
539}
540
541// I don't think this function should ever have been exported.
542// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
543// openmp code ever called it, but it's been exported from the RTL for so
544// long that I'm afraid to remove the definition.
545int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
546
547/*!
548@ingroup PARALLEL
549@param loc source location information
550@param global_tid global thread number
551
552Enter a serialized parallel construct. This interface is used to handle a
553conditional parallel region, like this,
554@code
555#pragma omp parallel if (condition)
556@endcode
557when the condition is false.
558*/
559void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
560 // The implementation is now in kmp_runtime.cpp so that it can share static
561 // functions with kmp_fork_call since the tasks to be done are similar in
562 // each case.
563 __kmp_assert_valid_gtid(gtid: global_tid);
564#if OMPT_SUPPORT
565 OMPT_STORE_RETURN_ADDRESS(global_tid);
566#endif
567 __kmp_serialized_parallel(id: loc, gtid: global_tid);
568}
569
570/*!
571@ingroup PARALLEL
572@param loc source location information
573@param global_tid global thread number
574
575Leave a serialized parallel construct.
576*/
577void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
578 kmp_internal_control_t *top;
579 kmp_info_t *this_thr;
580 kmp_team_t *serial_team;
581
582 KC_TRACE(10,
583 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
584
585 /* skip all this code for autopar serialized loops since it results in
586 unacceptable overhead */
587 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
588 return;
589
590 // Not autopar code
591 __kmp_assert_valid_gtid(gtid: global_tid);
592 if (!TCR_4(__kmp_init_parallel))
593 __kmp_parallel_initialize();
594
595 __kmp_resume_if_soft_paused();
596
597 this_thr = __kmp_threads[global_tid];
598 serial_team = this_thr->th.th_serial_team;
599
600 kmp_task_team_t *task_team = this_thr->th.th_task_team;
601 // we need to wait for the proxy tasks before finishing the thread
602 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
603 task_team->tt.tt_hidden_helper_task_encountered))
604 __kmp_task_team_wait(this_thr, team: serial_team USE_ITT_BUILD_ARG(NULL));
605
606 KMP_MB();
607 KMP_DEBUG_ASSERT(serial_team);
608 KMP_ASSERT(serial_team->t.t_serialized);
609 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
610 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
612 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
613
614#if OMPT_SUPPORT
615 if (ompt_enabled.enabled &&
616 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
617 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
618 if (ompt_enabled.ompt_callback_implicit_task) {
619 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
620 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
621 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
622 }
623
624 // reset clear the task id only after unlinking the task
625 ompt_data_t *parent_task_data;
626 __ompt_get_task_info_internal(ancestor_level: 1, NULL, task_data: &parent_task_data, NULL, NULL, NULL);
627
628 if (ompt_enabled.ompt_callback_parallel_end) {
629 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
630 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
631 ompt_parallel_invoker_program | ompt_parallel_team,
632 OMPT_LOAD_RETURN_ADDRESS(global_tid));
633 }
634 __ompt_lw_taskteam_unlink(thr: this_thr);
635 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
636 }
637#endif
638
639 /* If necessary, pop the internal control stack values and replace the team
640 * values */
641 top = serial_team->t.t_control_stack_top;
642 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
643 copy_icvs(dst: &serial_team->t.t_threads[0]->th.th_current_task->td_icvs, src: top);
644 serial_team->t.t_control_stack_top = top->next;
645 __kmp_free(top);
646 }
647
648 /* pop dispatch buffers stack */
649 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
650 {
651 dispatch_private_info_t *disp_buffer =
652 serial_team->t.t_dispatch->th_disp_buffer;
653 serial_team->t.t_dispatch->th_disp_buffer =
654 serial_team->t.t_dispatch->th_disp_buffer->next;
655 __kmp_free(disp_buffer);
656 }
657 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
658
659 --serial_team->t.t_serialized;
660 if (serial_team->t.t_serialized == 0) {
661
662 /* return to the parallel section */
663
664#if KMP_ARCH_X86 || KMP_ARCH_X86_64
665 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
666 __kmp_clear_x87_fpu_status_word();
667 __kmp_load_x87_fpu_control_word(p: &serial_team->t.t_x87_fpu_control_word);
668 __kmp_load_mxcsr(p: &serial_team->t.t_mxcsr);
669 }
670#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
671
672 __kmp_pop_current_task_from_thread(this_thr);
673#if OMPD_SUPPORT
674 if (ompd_state & OMPD_ENABLE_BP)
675 ompd_bp_parallel_end();
676#endif
677
678 this_thr->th.th_team = serial_team->t.t_parent;
679 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
680
681 /* restore values cached in the thread */
682 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
683 this_thr->th.th_team_master =
684 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
685 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
686
687 /* TODO the below shouldn't need to be adjusted for serialized teams */
688 this_thr->th.th_dispatch =
689 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
690
691 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
692 this_thr->th.th_current_task->td_flags.executing = 1;
693
694 if (__kmp_tasking_mode != tskm_immediate_exec) {
695 // Copy the task team from the new child / old parent team to the thread.
696 this_thr->th.th_task_team =
697 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
698 KA_TRACE(20,
699 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
700 "team %p\n",
701 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
702 }
703#if KMP_AFFINITY_SUPPORTED
704 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
705 __kmp_reset_root_init_mask(gtid: global_tid);
706 }
707#endif
708 } else {
709 if (__kmp_tasking_mode != tskm_immediate_exec) {
710 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
711 "depth of serial team %p to %d\n",
712 global_tid, serial_team, serial_team->t.t_serialized));
713 }
714 }
715
716 serial_team->t.t_level--;
717 if (__kmp_env_consistency_check)
718 __kmp_pop_parallel(gtid: global_tid, NULL);
719#if OMPT_SUPPORT
720 if (ompt_enabled.enabled)
721 this_thr->th.ompt_thread_info.state =
722 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
723 : ompt_state_work_parallel);
724#endif
725}
726
727/*!
728@ingroup SYNCHRONIZATION
729@param loc source location information.
730
731Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
732depending on the memory ordering convention obeyed by the compiler
733even that may not be necessary).
734*/
735void __kmpc_flush(ident_t *loc) {
736 KC_TRACE(10, ("__kmpc_flush: called\n"));
737
738 /* need explicit __mf() here since use volatile instead in library */
739 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
740
741#if OMPT_SUPPORT && OMPT_OPTIONAL
742 if (ompt_enabled.ompt_callback_flush) {
743 ompt_callbacks.ompt_callback(ompt_callback_flush)(
744 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
745 }
746#endif
747}
748
749/* -------------------------------------------------------------------------- */
750/*!
751@ingroup SYNCHRONIZATION
752@param loc source location information
753@param global_tid thread id.
754
755Execute a barrier.
756*/
757void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
758 KMP_COUNT_BLOCK(OMP_BARRIER);
759 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
760 __kmp_assert_valid_gtid(gtid: global_tid);
761
762 if (!TCR_4(__kmp_init_parallel))
763 __kmp_parallel_initialize();
764
765 __kmp_resume_if_soft_paused();
766
767 if (__kmp_env_consistency_check) {
768 if (loc == 0) {
769 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
770 }
771 __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc);
772 }
773
774#if OMPT_SUPPORT
775 ompt_frame_t *ompt_frame;
776 if (ompt_enabled.enabled) {
777 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
778 if (ompt_frame->enter_frame.ptr == NULL)
779 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
780 }
781 OMPT_STORE_RETURN_ADDRESS(global_tid);
782#endif
783 __kmp_threads[global_tid]->th.th_ident = loc;
784 // TODO: explicit barrier_wait_id:
785 // this function is called when 'barrier' directive is present or
786 // implicit barrier at the end of a worksharing construct.
787 // 1) better to add a per-thread barrier counter to a thread data structure
788 // 2) set to 0 when a new team is created
789 // 4) no sync is required
790
791 __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL);
792#if OMPT_SUPPORT && OMPT_OPTIONAL
793 if (ompt_enabled.enabled) {
794 ompt_frame->enter_frame = ompt_data_none;
795 }
796#endif
797}
798
799/* The BARRIER for a MASTER section is always explicit */
800/*!
801@ingroup WORK_SHARING
802@param loc source location information.
803@param global_tid global thread number .
804@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
805*/
806kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
807 int status = 0;
808
809 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
810 __kmp_assert_valid_gtid(gtid: global_tid);
811
812 if (!TCR_4(__kmp_init_parallel))
813 __kmp_parallel_initialize();
814
815 __kmp_resume_if_soft_paused();
816
817 if (KMP_MASTER_GTID(global_tid)) {
818 KMP_COUNT_BLOCK(OMP_MASTER);
819 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
820 status = 1;
821 }
822
823#if OMPT_SUPPORT && OMPT_OPTIONAL
824 if (status) {
825 if (ompt_enabled.ompt_callback_masked) {
826 kmp_info_t *this_thr = __kmp_threads[global_tid];
827 kmp_team_t *team = this_thr->th.th_team;
828
829 int tid = __kmp_tid_from_gtid(gtid: global_tid);
830 ompt_callbacks.ompt_callback(ompt_callback_masked)(
831 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
832 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
833 OMPT_GET_RETURN_ADDRESS(0));
834 }
835 }
836#endif
837
838 if (__kmp_env_consistency_check) {
839#if KMP_USE_DYNAMIC_LOCK
840 if (status)
841 __kmp_push_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0);
842 else
843 __kmp_check_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0);
844#else
845 if (status)
846 __kmp_push_sync(global_tid, ct_master, loc, NULL);
847 else
848 __kmp_check_sync(global_tid, ct_master, loc, NULL);
849#endif
850 }
851
852 return status;
853}
854
855/*!
856@ingroup WORK_SHARING
857@param loc source location information.
858@param global_tid global thread number .
859
860Mark the end of a <tt>master</tt> region. This should only be called by the
861thread that executes the <tt>master</tt> region.
862*/
863void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
864 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(gtid: global_tid);
866 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
867 KMP_POP_PARTITIONED_TIMER();
868
869#if OMPT_SUPPORT && OMPT_OPTIONAL
870 kmp_info_t *this_thr = __kmp_threads[global_tid];
871 kmp_team_t *team = this_thr->th.th_team;
872 if (ompt_enabled.ompt_callback_masked) {
873 int tid = __kmp_tid_from_gtid(gtid: global_tid);
874 ompt_callbacks.ompt_callback(ompt_callback_masked)(
875 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
876 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
877 OMPT_GET_RETURN_ADDRESS(0));
878 }
879#endif
880
881 if (__kmp_env_consistency_check) {
882 if (KMP_MASTER_GTID(global_tid))
883 __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc);
884 }
885}
886
887/*!
888@ingroup WORK_SHARING
889@param loc source location information.
890@param global_tid global thread number.
891@param filter result of evaluating filter clause on thread global_tid, or zero
892if no filter clause present
893@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
894*/
895kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
896 int status = 0;
897 int tid;
898 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
899 __kmp_assert_valid_gtid(gtid: global_tid);
900
901 if (!TCR_4(__kmp_init_parallel))
902 __kmp_parallel_initialize();
903
904 __kmp_resume_if_soft_paused();
905
906 tid = __kmp_tid_from_gtid(gtid: global_tid);
907 if (tid == filter) {
908 KMP_COUNT_BLOCK(OMP_MASKED);
909 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
910 status = 1;
911 }
912
913#if OMPT_SUPPORT && OMPT_OPTIONAL
914 if (status) {
915 if (ompt_enabled.ompt_callback_masked) {
916 kmp_info_t *this_thr = __kmp_threads[global_tid];
917 kmp_team_t *team = this_thr->th.th_team;
918 ompt_callbacks.ompt_callback(ompt_callback_masked)(
919 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
920 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
921 OMPT_GET_RETURN_ADDRESS(0));
922 }
923 }
924#endif
925
926 if (__kmp_env_consistency_check) {
927#if KMP_USE_DYNAMIC_LOCK
928 if (status)
929 __kmp_push_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0);
930 else
931 __kmp_check_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0);
932#else
933 if (status)
934 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
935 else
936 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
937#endif
938 }
939
940 return status;
941}
942
943/*!
944@ingroup WORK_SHARING
945@param loc source location information.
946@param global_tid global thread number .
947
948Mark the end of a <tt>masked</tt> region. This should only be called by the
949thread that executes the <tt>masked</tt> region.
950*/
951void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
952 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
953 __kmp_assert_valid_gtid(gtid: global_tid);
954 KMP_POP_PARTITIONED_TIMER();
955
956#if OMPT_SUPPORT && OMPT_OPTIONAL
957 kmp_info_t *this_thr = __kmp_threads[global_tid];
958 kmp_team_t *team = this_thr->th.th_team;
959 if (ompt_enabled.ompt_callback_masked) {
960 int tid = __kmp_tid_from_gtid(gtid: global_tid);
961 ompt_callbacks.ompt_callback(ompt_callback_masked)(
962 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
963 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
964 OMPT_GET_RETURN_ADDRESS(0));
965 }
966#endif
967
968 if (__kmp_env_consistency_check) {
969 __kmp_pop_sync(gtid: global_tid, ct: ct_masked, ident: loc);
970 }
971}
972
973/*!
974@ingroup WORK_SHARING
975@param loc source location information.
976@param gtid global thread number.
977
978Start execution of an <tt>ordered</tt> construct.
979*/
980void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
981 int cid = 0;
982 kmp_info_t *th;
983 KMP_DEBUG_ASSERT(__kmp_init_serial);
984
985 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
986 __kmp_assert_valid_gtid(gtid);
987
988 if (!TCR_4(__kmp_init_parallel))
989 __kmp_parallel_initialize();
990
991 __kmp_resume_if_soft_paused();
992
993#if USE_ITT_BUILD
994 __kmp_itt_ordered_prep(gtid);
995// TODO: ordered_wait_id
996#endif /* USE_ITT_BUILD */
997
998 th = __kmp_threads[gtid];
999
1000#if OMPT_SUPPORT && OMPT_OPTIONAL
1001 kmp_team_t *team;
1002 ompt_wait_id_t lck;
1003 void *codeptr_ra;
1004 OMPT_STORE_RETURN_ADDRESS(gtid);
1005 if (ompt_enabled.enabled) {
1006 team = __kmp_team_from_gtid(gtid);
1007 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1008 /* OMPT state update */
1009 th->th.ompt_thread_info.wait_id = lck;
1010 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1011
1012 /* OMPT event callback */
1013 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1014 if (ompt_enabled.ompt_callback_mutex_acquire) {
1015 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1016 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1017 codeptr_ra);
1018 }
1019 }
1020#endif
1021
1022 if (th->th.th_dispatch->th_deo_fcn != 0)
1023 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1024 else
1025 __kmp_parallel_deo(gtid_ref: &gtid, cid_ref: &cid, loc_ref: loc);
1026
1027#if OMPT_SUPPORT && OMPT_OPTIONAL
1028 if (ompt_enabled.enabled) {
1029 /* OMPT state update */
1030 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1031 th->th.ompt_thread_info.wait_id = 0;
1032
1033 /* OMPT event callback */
1034 if (ompt_enabled.ompt_callback_mutex_acquired) {
1035 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1036 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1037 }
1038 }
1039#endif
1040
1041#if USE_ITT_BUILD
1042 __kmp_itt_ordered_start(gtid);
1043#endif /* USE_ITT_BUILD */
1044}
1045
1046/*!
1047@ingroup WORK_SHARING
1048@param loc source location information.
1049@param gtid global thread number.
1050
1051End execution of an <tt>ordered</tt> construct.
1052*/
1053void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1054 int cid = 0;
1055 kmp_info_t *th;
1056
1057 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1058 __kmp_assert_valid_gtid(gtid);
1059
1060#if USE_ITT_BUILD
1061 __kmp_itt_ordered_end(gtid);
1062// TODO: ordered_wait_id
1063#endif /* USE_ITT_BUILD */
1064
1065 th = __kmp_threads[gtid];
1066
1067 if (th->th.th_dispatch->th_dxo_fcn != 0)
1068 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1069 else
1070 __kmp_parallel_dxo(gtid_ref: &gtid, cid_ref: &cid, loc_ref: loc);
1071
1072#if OMPT_SUPPORT && OMPT_OPTIONAL
1073 OMPT_STORE_RETURN_ADDRESS(gtid);
1074 if (ompt_enabled.ompt_callback_mutex_released) {
1075 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1076 ompt_mutex_ordered,
1077 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1078 ->t.t_ordered.dt.t_value,
1079 OMPT_LOAD_RETURN_ADDRESS(gtid));
1080 }
1081#endif
1082}
1083
1084#if KMP_USE_DYNAMIC_LOCK
1085
1086static __forceinline void
1087__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1088 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1089 // Pointer to the allocated indirect lock is written to crit, while indexing
1090 // is ignored.
1091 void *idx;
1092 kmp_indirect_lock_t **lck;
1093 lck = (kmp_indirect_lock_t **)crit;
1094 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1095 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1096 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1097 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1098 KA_TRACE(20,
1099 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1100#if USE_ITT_BUILD
1101 __kmp_itt_critical_creating(lock: ilk->lock, loc);
1102#endif
1103 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1104 if (status == 0) {
1105#if USE_ITT_BUILD
1106 __kmp_itt_critical_destroyed(lock: ilk->lock);
1107#endif
1108 // We don't really need to destroy the unclaimed lock here since it will be
1109 // cleaned up at program exit.
1110 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1111 }
1112 KMP_DEBUG_ASSERT(*lck != NULL);
1113}
1114
1115// Fast-path acquire tas lock
1116#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1117 { \
1118 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1119 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1120 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1121 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1122 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1123 kmp_uint32 spins; \
1124 KMP_FSYNC_PREPARE(l); \
1125 KMP_INIT_YIELD(spins); \
1126 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1127 do { \
1128 if (TCR_4(__kmp_nth) > \
1129 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1130 KMP_YIELD(TRUE); \
1131 } else { \
1132 KMP_YIELD_SPIN(spins); \
1133 } \
1134 __kmp_spin_backoff(&backoff); \
1135 } while ( \
1136 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1137 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1138 } \
1139 KMP_FSYNC_ACQUIRED(l); \
1140 }
1141
1142// Fast-path test tas lock
1143#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1144 { \
1145 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1146 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1147 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1148 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1149 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1150 }
1151
1152// Fast-path release tas lock
1153#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1154 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1155
1156#if KMP_USE_FUTEX
1157
1158#include <sys/syscall.h>
1159#include <unistd.h>
1160#ifndef FUTEX_WAIT
1161#define FUTEX_WAIT 0
1162#endif
1163#ifndef FUTEX_WAKE
1164#define FUTEX_WAKE 1
1165#endif
1166
1167// Fast-path acquire futex lock
1168#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1169 { \
1170 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1171 kmp_int32 gtid_code = (gtid + 1) << 1; \
1172 KMP_MB(); \
1173 KMP_FSYNC_PREPARE(ftx); \
1174 kmp_int32 poll_val; \
1175 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1176 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1177 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1178 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1179 if (!cond) { \
1180 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1181 poll_val | \
1182 KMP_LOCK_BUSY(1, futex))) { \
1183 continue; \
1184 } \
1185 poll_val |= KMP_LOCK_BUSY(1, futex); \
1186 } \
1187 kmp_int32 rc; \
1188 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1189 NULL, NULL, 0)) != 0) { \
1190 continue; \
1191 } \
1192 gtid_code |= 1; \
1193 } \
1194 KMP_FSYNC_ACQUIRED(ftx); \
1195 }
1196
1197// Fast-path test futex lock
1198#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1199 { \
1200 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1201 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1202 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1203 KMP_FSYNC_ACQUIRED(ftx); \
1204 rc = TRUE; \
1205 } else { \
1206 rc = FALSE; \
1207 } \
1208 }
1209
1210// Fast-path release futex lock
1211#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1212 { \
1213 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1214 KMP_MB(); \
1215 KMP_FSYNC_RELEASING(ftx); \
1216 kmp_int32 poll_val = \
1217 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1218 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1219 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1220 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1221 } \
1222 KMP_MB(); \
1223 KMP_YIELD_OVERSUB(); \
1224 }
1225
1226#endif // KMP_USE_FUTEX
1227
1228#else // KMP_USE_DYNAMIC_LOCK
1229
1230static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1231 ident_t const *loc,
1232 kmp_int32 gtid) {
1233 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1234
1235 // Because of the double-check, the following load doesn't need to be volatile
1236 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1237
1238 if (lck == NULL) {
1239 void *idx;
1240
1241 // Allocate & initialize the lock.
1242 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1243 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1244 __kmp_init_user_lock_with_checks(lck);
1245 __kmp_set_user_lock_location(lck, loc);
1246#if USE_ITT_BUILD
1247 __kmp_itt_critical_creating(lck);
1248// __kmp_itt_critical_creating() should be called *before* the first usage
1249// of underlying lock. It is the only place where we can guarantee it. There
1250// are chances the lock will destroyed with no usage, but it is not a
1251// problem, because this is not real event seen by user but rather setting
1252// name for object (lock). See more details in kmp_itt.h.
1253#endif /* USE_ITT_BUILD */
1254
1255 // Use a cmpxchg instruction to slam the start of the critical section with
1256 // the lock pointer. If another thread beat us to it, deallocate the lock,
1257 // and use the lock that the other thread allocated.
1258 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1259
1260 if (status == 0) {
1261// Deallocate the lock and reload the value.
1262#if USE_ITT_BUILD
1263 __kmp_itt_critical_destroyed(lck);
1264// Let ITT know the lock is destroyed and the same memory location may be reused
1265// for another purpose.
1266#endif /* USE_ITT_BUILD */
1267 __kmp_destroy_user_lock_with_checks(lck);
1268 __kmp_user_lock_free(&idx, gtid, lck);
1269 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1270 KMP_DEBUG_ASSERT(lck != NULL);
1271 }
1272 }
1273 return lck;
1274}
1275
1276#endif // KMP_USE_DYNAMIC_LOCK
1277
1278/*!
1279@ingroup WORK_SHARING
1280@param loc source location information.
1281@param global_tid global thread number.
1282@param crit identity of the critical section. This could be a pointer to a lock
1283associated with the critical section, or some other suitably unique value.
1284
1285Enter code protected by a `critical` construct.
1286This function blocks until the executing thread can enter the critical section.
1287*/
1288void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1289 kmp_critical_name *crit) {
1290#if KMP_USE_DYNAMIC_LOCK
1291#if OMPT_SUPPORT && OMPT_OPTIONAL
1292 OMPT_STORE_RETURN_ADDRESS(global_tid);
1293#endif // OMPT_SUPPORT
1294 __kmpc_critical_with_hint(loc, global_tid, crit, hint: omp_lock_hint_none);
1295#else
1296 KMP_COUNT_BLOCK(OMP_CRITICAL);
1297#if OMPT_SUPPORT && OMPT_OPTIONAL
1298 ompt_state_t prev_state = ompt_state_undefined;
1299 ompt_thread_info_t ti;
1300#endif
1301 kmp_user_lock_p lck;
1302
1303 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1304 __kmp_assert_valid_gtid(global_tid);
1305
1306 // TODO: add THR_OVHD_STATE
1307
1308 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1309 KMP_CHECK_USER_LOCK_INIT();
1310
1311 if ((__kmp_user_lock_kind == lk_tas) &&
1312 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1313 lck = (kmp_user_lock_p)crit;
1314 }
1315#if KMP_USE_FUTEX
1316 else if ((__kmp_user_lock_kind == lk_futex) &&
1317 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1318 lck = (kmp_user_lock_p)crit;
1319 }
1320#endif
1321 else { // ticket, queuing or drdpa
1322 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1323 }
1324
1325 if (__kmp_env_consistency_check)
1326 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1327
1328 // since the critical directive binds to all threads, not just the current
1329 // team we have to check this even if we are in a serialized team.
1330 // also, even if we are the uber thread, we still have to conduct the lock,
1331 // as we have to contend with sibling threads.
1332
1333#if USE_ITT_BUILD
1334 __kmp_itt_critical_acquiring(lck);
1335#endif /* USE_ITT_BUILD */
1336#if OMPT_SUPPORT && OMPT_OPTIONAL
1337 OMPT_STORE_RETURN_ADDRESS(gtid);
1338 void *codeptr_ra = NULL;
1339 if (ompt_enabled.enabled) {
1340 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1341 /* OMPT state update */
1342 prev_state = ti.state;
1343 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1344 ti.state = ompt_state_wait_critical;
1345
1346 /* OMPT event callback */
1347 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1348 if (ompt_enabled.ompt_callback_mutex_acquire) {
1349 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1350 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1351 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1352 }
1353 }
1354#endif
1355 // Value of 'crit' should be good for using as a critical_id of the critical
1356 // section directive.
1357 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1358
1359#if USE_ITT_BUILD
1360 __kmp_itt_critical_acquired(lck);
1361#endif /* USE_ITT_BUILD */
1362#if OMPT_SUPPORT && OMPT_OPTIONAL
1363 if (ompt_enabled.enabled) {
1364 /* OMPT state update */
1365 ti.state = prev_state;
1366 ti.wait_id = 0;
1367
1368 /* OMPT event callback */
1369 if (ompt_enabled.ompt_callback_mutex_acquired) {
1370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1371 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1372 }
1373 }
1374#endif
1375 KMP_POP_PARTITIONED_TIMER();
1376
1377 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1378 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1379#endif // KMP_USE_DYNAMIC_LOCK
1380}
1381
1382#if KMP_USE_DYNAMIC_LOCK
1383
1384// Converts the given hint to an internal lock implementation
1385static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1386#if KMP_USE_TSX
1387#define KMP_TSX_LOCK(seq) lockseq_##seq
1388#else
1389#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1390#endif
1391
1392#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1393#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1394#else
1395#define KMP_CPUINFO_RTM 0
1396#endif
1397
1398 // Hints that do not require further logic
1399 if (hint & kmp_lock_hint_hle)
1400 return KMP_TSX_LOCK(hle);
1401 if (hint & kmp_lock_hint_rtm)
1402 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1403 if (hint & kmp_lock_hint_adaptive)
1404 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1405
1406 // Rule out conflicting hints first by returning the default lock
1407 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1408 return __kmp_user_lock_seq;
1409 if ((hint & omp_lock_hint_speculative) &&
1410 (hint & omp_lock_hint_nonspeculative))
1411 return __kmp_user_lock_seq;
1412
1413 // Do not even consider speculation when it appears to be contended
1414 if (hint & omp_lock_hint_contended)
1415 return lockseq_queuing;
1416
1417 // Uncontended lock without speculation
1418 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1419 return lockseq_tas;
1420
1421 // Use RTM lock for speculation
1422 if (hint & omp_lock_hint_speculative)
1423 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1424
1425 return __kmp_user_lock_seq;
1426}
1427
1428#if OMPT_SUPPORT && OMPT_OPTIONAL
1429#if KMP_USE_DYNAMIC_LOCK
1430static kmp_mutex_impl_t
1431__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1432 if (user_lock) {
1433 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1434 case 0:
1435 break;
1436#if KMP_USE_FUTEX
1437 case locktag_futex:
1438 return kmp_mutex_impl_queuing;
1439#endif
1440 case locktag_tas:
1441 return kmp_mutex_impl_spin;
1442#if KMP_USE_TSX
1443 case locktag_hle:
1444 case locktag_rtm_spin:
1445 return kmp_mutex_impl_speculative;
1446#endif
1447 default:
1448 return kmp_mutex_impl_none;
1449 }
1450 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1451 }
1452 KMP_ASSERT(ilock);
1453 switch (ilock->type) {
1454#if KMP_USE_TSX
1455 case locktag_adaptive:
1456 case locktag_rtm_queuing:
1457 return kmp_mutex_impl_speculative;
1458#endif
1459 case locktag_nested_tas:
1460 return kmp_mutex_impl_spin;
1461#if KMP_USE_FUTEX
1462 case locktag_nested_futex:
1463#endif
1464 case locktag_ticket:
1465 case locktag_queuing:
1466 case locktag_drdpa:
1467 case locktag_nested_ticket:
1468 case locktag_nested_queuing:
1469 case locktag_nested_drdpa:
1470 return kmp_mutex_impl_queuing;
1471 default:
1472 return kmp_mutex_impl_none;
1473 }
1474}
1475#else
1476// For locks without dynamic binding
1477static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1478 switch (__kmp_user_lock_kind) {
1479 case lk_tas:
1480 return kmp_mutex_impl_spin;
1481#if KMP_USE_FUTEX
1482 case lk_futex:
1483#endif
1484 case lk_ticket:
1485 case lk_queuing:
1486 case lk_drdpa:
1487 return kmp_mutex_impl_queuing;
1488#if KMP_USE_TSX
1489 case lk_hle:
1490 case lk_rtm_queuing:
1491 case lk_rtm_spin:
1492 case lk_adaptive:
1493 return kmp_mutex_impl_speculative;
1494#endif
1495 default:
1496 return kmp_mutex_impl_none;
1497 }
1498}
1499#endif // KMP_USE_DYNAMIC_LOCK
1500#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1501
1502/*!
1503@ingroup WORK_SHARING
1504@param loc source location information.
1505@param global_tid global thread number.
1506@param crit identity of the critical section. This could be a pointer to a lock
1507associated with the critical section, or some other suitably unique value.
1508@param hint the lock hint.
1509
1510Enter code protected by a `critical` construct with a hint. The hint value is
1511used to suggest a lock implementation. This function blocks until the executing
1512thread can enter the critical section unless the hint suggests use of
1513speculative execution and the hardware supports it.
1514*/
1515void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1516 kmp_critical_name *crit, uint32_t hint) {
1517 KMP_COUNT_BLOCK(OMP_CRITICAL);
1518 kmp_user_lock_p lck;
1519#if OMPT_SUPPORT && OMPT_OPTIONAL
1520 ompt_state_t prev_state = ompt_state_undefined;
1521 ompt_thread_info_t ti;
1522 // This is the case, if called from __kmpc_critical:
1523 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1524 if (!codeptr)
1525 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1526#endif
1527
1528 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1529 __kmp_assert_valid_gtid(gtid: global_tid);
1530
1531 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1532 // Check if it is initialized.
1533 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1534 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1535 if (*lk == 0) {
1536 if (KMP_IS_D_LOCK(lockseq)) {
1537 KMP_COMPARE_AND_STORE_ACQ32(
1538 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1539 KMP_GET_D_TAG(lockseq));
1540 } else {
1541 __kmp_init_indirect_csptr(crit, loc, gtid: global_tid, KMP_GET_I_TAG(lockseq));
1542 }
1543 }
1544 // Branch for accessing the actual lock object and set operation. This
1545 // branching is inevitable since this lock initialization does not follow the
1546 // normal dispatch path (lock table is not used).
1547 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1548 lck = (kmp_user_lock_p)lk;
1549 if (__kmp_env_consistency_check) {
1550 __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck,
1551 __kmp_map_hint_to_lock(hint));
1552 }
1553#if USE_ITT_BUILD
1554 __kmp_itt_critical_acquiring(lock: lck);
1555#endif
1556#if OMPT_SUPPORT && OMPT_OPTIONAL
1557 if (ompt_enabled.enabled) {
1558 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1559 /* OMPT state update */
1560 prev_state = ti.state;
1561 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1562 ti.state = ompt_state_wait_critical;
1563
1564 /* OMPT event callback */
1565 if (ompt_enabled.ompt_callback_mutex_acquire) {
1566 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1567 ompt_mutex_critical, (unsigned int)hint,
1568 __ompt_get_mutex_impl_type(user_lock: crit), (ompt_wait_id_t)(uintptr_t)lck,
1569 codeptr);
1570 }
1571 }
1572#endif
1573#if KMP_USE_INLINED_TAS
1574 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1575 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1576 } else
1577#elif KMP_USE_INLINED_FUTEX
1578 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1579 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1580 } else
1581#endif
1582 {
1583 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1584 }
1585 } else {
1586 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1587 lck = ilk->lock;
1588 if (__kmp_env_consistency_check) {
1589 __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck,
1590 __kmp_map_hint_to_lock(hint));
1591 }
1592#if USE_ITT_BUILD
1593 __kmp_itt_critical_acquiring(lock: lck);
1594#endif
1595#if OMPT_SUPPORT && OMPT_OPTIONAL
1596 if (ompt_enabled.enabled) {
1597 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1598 /* OMPT state update */
1599 prev_state = ti.state;
1600 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1601 ti.state = ompt_state_wait_critical;
1602
1603 /* OMPT event callback */
1604 if (ompt_enabled.ompt_callback_mutex_acquire) {
1605 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1606 ompt_mutex_critical, (unsigned int)hint,
1607 __ompt_get_mutex_impl_type(user_lock: 0, ilock: ilk), (ompt_wait_id_t)(uintptr_t)lck,
1608 codeptr);
1609 }
1610 }
1611#endif
1612 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1613 }
1614 KMP_POP_PARTITIONED_TIMER();
1615
1616#if USE_ITT_BUILD
1617 __kmp_itt_critical_acquired(lock: lck);
1618#endif /* USE_ITT_BUILD */
1619#if OMPT_SUPPORT && OMPT_OPTIONAL
1620 if (ompt_enabled.enabled) {
1621 /* OMPT state update */
1622 ti.state = prev_state;
1623 ti.wait_id = 0;
1624
1625 /* OMPT event callback */
1626 if (ompt_enabled.ompt_callback_mutex_acquired) {
1627 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1628 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1629 }
1630 }
1631#endif
1632
1633 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1634 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1635} // __kmpc_critical_with_hint
1636
1637#endif // KMP_USE_DYNAMIC_LOCK
1638
1639/*!
1640@ingroup WORK_SHARING
1641@param loc source location information.
1642@param global_tid global thread number .
1643@param crit identity of the critical section. This could be a pointer to a lock
1644associated with the critical section, or some other suitably unique value.
1645
1646Leave a critical section, releasing any lock that was held during its execution.
1647*/
1648void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1649 kmp_critical_name *crit) {
1650 kmp_user_lock_p lck;
1651
1652 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1653
1654#if KMP_USE_DYNAMIC_LOCK
1655 int locktag = KMP_EXTRACT_D_TAG(crit);
1656 if (locktag) {
1657 lck = (kmp_user_lock_p)crit;
1658 KMP_ASSERT(lck != NULL);
1659 if (__kmp_env_consistency_check) {
1660 __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc);
1661 }
1662#if USE_ITT_BUILD
1663 __kmp_itt_critical_releasing(lock: lck);
1664#endif
1665#if KMP_USE_INLINED_TAS
1666 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1667 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1668 } else
1669#elif KMP_USE_INLINED_FUTEX
1670 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1671 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1672 } else
1673#endif
1674 {
1675 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1676 }
1677 } else {
1678 kmp_indirect_lock_t *ilk =
1679 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1680 KMP_ASSERT(ilk != NULL);
1681 lck = ilk->lock;
1682 if (__kmp_env_consistency_check) {
1683 __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc);
1684 }
1685#if USE_ITT_BUILD
1686 __kmp_itt_critical_releasing(lock: lck);
1687#endif
1688 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1689 }
1690
1691#else // KMP_USE_DYNAMIC_LOCK
1692
1693 if ((__kmp_user_lock_kind == lk_tas) &&
1694 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1695 lck = (kmp_user_lock_p)crit;
1696 }
1697#if KMP_USE_FUTEX
1698 else if ((__kmp_user_lock_kind == lk_futex) &&
1699 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1700 lck = (kmp_user_lock_p)crit;
1701 }
1702#endif
1703 else { // ticket, queuing or drdpa
1704 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1705 }
1706
1707 KMP_ASSERT(lck != NULL);
1708
1709 if (__kmp_env_consistency_check)
1710 __kmp_pop_sync(global_tid, ct_critical, loc);
1711
1712#if USE_ITT_BUILD
1713 __kmp_itt_critical_releasing(lck);
1714#endif /* USE_ITT_BUILD */
1715 // Value of 'crit' should be good for using as a critical_id of the critical
1716 // section directive.
1717 __kmp_release_user_lock_with_checks(lck, global_tid);
1718
1719#endif // KMP_USE_DYNAMIC_LOCK
1720
1721#if OMPT_SUPPORT && OMPT_OPTIONAL
1722 /* OMPT release event triggers after lock is released; place here to trigger
1723 * for all #if branches */
1724 OMPT_STORE_RETURN_ADDRESS(global_tid);
1725 if (ompt_enabled.ompt_callback_mutex_released) {
1726 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1727 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1728 OMPT_LOAD_RETURN_ADDRESS(0));
1729 }
1730#endif
1731
1732 KMP_POP_PARTITIONED_TIMER();
1733 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1734}
1735
1736/*!
1737@ingroup SYNCHRONIZATION
1738@param loc source location information
1739@param global_tid thread id.
1740@return one if the thread should execute the master block, zero otherwise
1741
1742Start execution of a combined barrier and master. The barrier is executed inside
1743this function.
1744*/
1745kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1746 int status;
1747 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1748 __kmp_assert_valid_gtid(gtid: global_tid);
1749
1750 if (!TCR_4(__kmp_init_parallel))
1751 __kmp_parallel_initialize();
1752
1753 __kmp_resume_if_soft_paused();
1754
1755 if (__kmp_env_consistency_check)
1756 __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc);
1757
1758#if OMPT_SUPPORT
1759 ompt_frame_t *ompt_frame;
1760 if (ompt_enabled.enabled) {
1761 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
1762 if (ompt_frame->enter_frame.ptr == NULL)
1763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1764 }
1765 OMPT_STORE_RETURN_ADDRESS(global_tid);
1766#endif
1767#if USE_ITT_NOTIFY
1768 __kmp_threads[global_tid]->th.th_ident = loc;
1769#endif
1770 status = __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, TRUE, reduce_size: 0, NULL, NULL);
1771#if OMPT_SUPPORT && OMPT_OPTIONAL
1772 if (ompt_enabled.enabled) {
1773 ompt_frame->enter_frame = ompt_data_none;
1774 }
1775#endif
1776
1777 return (status != 0) ? 0 : 1;
1778}
1779
1780/*!
1781@ingroup SYNCHRONIZATION
1782@param loc source location information
1783@param global_tid thread id.
1784
1785Complete the execution of a combined barrier and master. This function should
1786only be called at the completion of the <tt>master</tt> code. Other threads will
1787still be waiting at the barrier and this call releases them.
1788*/
1789void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1790 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1791 __kmp_assert_valid_gtid(gtid: global_tid);
1792 __kmp_end_split_barrier(bt: bs_plain_barrier, gtid: global_tid);
1793}
1794
1795/*!
1796@ingroup SYNCHRONIZATION
1797@param loc source location information
1798@param global_tid thread id.
1799@return one if the thread should execute the master block, zero otherwise
1800
1801Start execution of a combined barrier and master(nowait) construct.
1802The barrier is executed inside this function.
1803There is no equivalent "end" function, since the
1804*/
1805kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1806 kmp_int32 ret;
1807 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1808 __kmp_assert_valid_gtid(gtid: global_tid);
1809
1810 if (!TCR_4(__kmp_init_parallel))
1811 __kmp_parallel_initialize();
1812
1813 __kmp_resume_if_soft_paused();
1814
1815 if (__kmp_env_consistency_check) {
1816 if (loc == 0) {
1817 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1818 }
1819 __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc);
1820 }
1821
1822#if OMPT_SUPPORT
1823 ompt_frame_t *ompt_frame;
1824 if (ompt_enabled.enabled) {
1825 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
1826 if (ompt_frame->enter_frame.ptr == NULL)
1827 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1828 }
1829 OMPT_STORE_RETURN_ADDRESS(global_tid);
1830#endif
1831#if USE_ITT_NOTIFY
1832 __kmp_threads[global_tid]->th.th_ident = loc;
1833#endif
1834 __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL);
1835#if OMPT_SUPPORT && OMPT_OPTIONAL
1836 if (ompt_enabled.enabled) {
1837 ompt_frame->enter_frame = ompt_data_none;
1838 }
1839#endif
1840
1841 ret = __kmpc_master(loc, global_tid);
1842
1843 if (__kmp_env_consistency_check) {
1844 /* there's no __kmpc_end_master called; so the (stats) */
1845 /* actions of __kmpc_end_master are done here */
1846 if (ret) {
1847 /* only one thread should do the pop since only */
1848 /* one did the push (see __kmpc_master()) */
1849 __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc);
1850 }
1851 }
1852
1853 return (ret);
1854}
1855
1856/* The BARRIER for a SINGLE process section is always explicit */
1857/*!
1858@ingroup WORK_SHARING
1859@param loc source location information
1860@param global_tid global thread number
1861@return One if this thread should execute the single construct, zero otherwise.
1862
1863Test whether to execute a <tt>single</tt> construct.
1864There are no implicit barriers in the two "single" calls, rather the compiler
1865should introduce an explicit barrier if it is required.
1866*/
1867
1868kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1869 __kmp_assert_valid_gtid(gtid: global_tid);
1870 kmp_int32 rc = __kmp_enter_single(gtid: global_tid, id_ref: loc, TRUE);
1871
1872 if (rc) {
1873 // We are going to execute the single statement, so we should count it.
1874 KMP_COUNT_BLOCK(OMP_SINGLE);
1875 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1876 }
1877
1878#if OMPT_SUPPORT && OMPT_OPTIONAL
1879 kmp_info_t *this_thr = __kmp_threads[global_tid];
1880 kmp_team_t *team = this_thr->th.th_team;
1881 int tid = __kmp_tid_from_gtid(gtid: global_tid);
1882
1883 if (ompt_enabled.enabled) {
1884 if (rc) {
1885 if (ompt_enabled.ompt_callback_work) {
1886 ompt_callbacks.ompt_callback(ompt_callback_work)(
1887 ompt_work_single_executor, ompt_scope_begin,
1888 &(team->t.ompt_team_info.parallel_data),
1889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1890 1, OMPT_GET_RETURN_ADDRESS(0));
1891 }
1892 } else {
1893 if (ompt_enabled.ompt_callback_work) {
1894 ompt_callbacks.ompt_callback(ompt_callback_work)(
1895 ompt_work_single_other, ompt_scope_begin,
1896 &(team->t.ompt_team_info.parallel_data),
1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1898 1, OMPT_GET_RETURN_ADDRESS(0));
1899 ompt_callbacks.ompt_callback(ompt_callback_work)(
1900 ompt_work_single_other, ompt_scope_end,
1901 &(team->t.ompt_team_info.parallel_data),
1902 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1903 1, OMPT_GET_RETURN_ADDRESS(0));
1904 }
1905 }
1906 }
1907#endif
1908
1909 return rc;
1910}
1911
1912/*!
1913@ingroup WORK_SHARING
1914@param loc source location information
1915@param global_tid global thread number
1916
1917Mark the end of a <tt>single</tt> construct. This function should
1918only be called by the thread that executed the block of code protected
1919by the `single` construct.
1920*/
1921void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1922 __kmp_assert_valid_gtid(gtid: global_tid);
1923 __kmp_exit_single(gtid: global_tid);
1924 KMP_POP_PARTITIONED_TIMER();
1925
1926#if OMPT_SUPPORT && OMPT_OPTIONAL
1927 kmp_info_t *this_thr = __kmp_threads[global_tid];
1928 kmp_team_t *team = this_thr->th.th_team;
1929 int tid = __kmp_tid_from_gtid(gtid: global_tid);
1930
1931 if (ompt_enabled.ompt_callback_work) {
1932 ompt_callbacks.ompt_callback(ompt_callback_work)(
1933 ompt_work_single_executor, ompt_scope_end,
1934 &(team->t.ompt_team_info.parallel_data),
1935 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1936 OMPT_GET_RETURN_ADDRESS(0));
1937 }
1938#endif
1939}
1940
1941/*!
1942@ingroup WORK_SHARING
1943@param loc Source location
1944@param global_tid Global thread id
1945
1946Mark the end of a statically scheduled loop.
1947*/
1948void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1949 KMP_POP_PARTITIONED_TIMER();
1950 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1951
1952#if OMPT_SUPPORT && OMPT_OPTIONAL
1953 if (ompt_enabled.ompt_callback_work) {
1954 ompt_work_t ompt_work_type = ompt_work_loop;
1955 ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL);
1956 ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0);
1957 // Determine workshare type
1958 if (loc != NULL) {
1959 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1960 ompt_work_type = ompt_work_loop;
1961 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1962 ompt_work_type = ompt_work_sections;
1963 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1964 ompt_work_type = ompt_work_distribute;
1965 } else {
1966 // use default set above.
1967 // a warning about this case is provided in __kmpc_for_static_init
1968 }
1969 KMP_DEBUG_ASSERT(ompt_work_type);
1970 }
1971 ompt_callbacks.ompt_callback(ompt_callback_work)(
1972 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1973 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1974 }
1975#endif
1976 if (__kmp_env_consistency_check)
1977 __kmp_pop_workshare(gtid: global_tid, ct: ct_pdo, ident: loc);
1978}
1979
1980// User routines which take C-style arguments (call by value)
1981// different from the Fortran equivalent routines
1982
1983void ompc_set_num_threads(int arg) {
1984 // !!!!! TODO: check the per-task binding
1985 __kmp_set_num_threads(new_nth: arg, __kmp_entry_gtid());
1986}
1987
1988void ompc_set_dynamic(int flag) {
1989 kmp_info_t *thread;
1990
1991 /* For the thread-private implementation of the internal controls */
1992 thread = __kmp_entry_thread();
1993
1994 __kmp_save_internal_controls(thread);
1995
1996 set__dynamic(thread, flag ? true : false);
1997}
1998
1999void ompc_set_nested(int flag) {
2000 kmp_info_t *thread;
2001
2002 /* For the thread-private internal controls implementation */
2003 thread = __kmp_entry_thread();
2004
2005 __kmp_save_internal_controls(thread);
2006
2007 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2008}
2009
2010void ompc_set_max_active_levels(int max_active_levels) {
2011 /* TO DO */
2012 /* we want per-task implementation of this internal control */
2013
2014 /* For the per-thread internal controls implementation */
2015 __kmp_set_max_active_levels(__kmp_entry_gtid(), new_max_active_levels: max_active_levels);
2016}
2017
2018void ompc_set_schedule(omp_sched_t kind, int modifier) {
2019 // !!!!! TODO: check the per-task binding
2020 __kmp_set_schedule(__kmp_entry_gtid(), new_sched: (kmp_sched_t)kind, chunk: modifier);
2021}
2022
2023int ompc_get_ancestor_thread_num(int level) {
2024 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2025}
2026
2027int ompc_get_team_size(int level) {
2028 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2029}
2030
2031/* OpenMP 5.0 Affinity Format API */
2032void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2033 if (!__kmp_init_serial) {
2034 __kmp_serial_initialize();
2035 }
2036 __kmp_strncpy_truncate(buffer: __kmp_affinity_format, buf_size: KMP_AFFINITY_FORMAT_SIZE,
2037 src: format, KMP_STRLEN(s: format) + 1);
2038}
2039
2040size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2041 size_t format_size;
2042 if (!__kmp_init_serial) {
2043 __kmp_serial_initialize();
2044 }
2045 format_size = KMP_STRLEN(s: __kmp_affinity_format);
2046 if (buffer && size) {
2047 __kmp_strncpy_truncate(buffer, buf_size: size, src: __kmp_affinity_format,
2048 src_size: format_size + 1);
2049 }
2050 return format_size;
2051}
2052
2053void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2054 int gtid;
2055 if (!TCR_4(__kmp_init_middle)) {
2056 __kmp_middle_initialize();
2057 }
2058 __kmp_assign_root_init_mask();
2059 gtid = __kmp_get_gtid();
2060#if KMP_AFFINITY_SUPPORTED
2061 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2062 __kmp_affinity.flags.reset) {
2063 __kmp_reset_root_init_mask(gtid);
2064 }
2065#endif
2066 __kmp_aux_display_affinity(gtid, format);
2067}
2068
2069size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2070 char const *format) {
2071 int gtid;
2072 size_t num_required;
2073 kmp_str_buf_t capture_buf;
2074 if (!TCR_4(__kmp_init_middle)) {
2075 __kmp_middle_initialize();
2076 }
2077 __kmp_assign_root_init_mask();
2078 gtid = __kmp_get_gtid();
2079#if KMP_AFFINITY_SUPPORTED
2080 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2081 __kmp_affinity.flags.reset) {
2082 __kmp_reset_root_init_mask(gtid);
2083 }
2084#endif
2085 __kmp_str_buf_init(&capture_buf);
2086 num_required = __kmp_aux_capture_affinity(gtid, format, buffer: &capture_buf);
2087 if (buffer && buf_size) {
2088 __kmp_strncpy_truncate(buffer, buf_size, src: capture_buf.str,
2089 src_size: capture_buf.used + 1);
2090 }
2091 __kmp_str_buf_free(buffer: &capture_buf);
2092 return num_required;
2093}
2094
2095void kmpc_set_stacksize(int arg) {
2096 // __kmp_aux_set_stacksize initializes the library if needed
2097 __kmp_aux_set_stacksize(arg);
2098}
2099
2100void kmpc_set_stacksize_s(size_t arg) {
2101 // __kmp_aux_set_stacksize initializes the library if needed
2102 __kmp_aux_set_stacksize(arg);
2103}
2104
2105void kmpc_set_blocktime(int arg) {
2106 int gtid, tid, bt = arg;
2107 kmp_info_t *thread;
2108
2109 gtid = __kmp_entry_gtid();
2110 tid = __kmp_tid_from_gtid(gtid);
2111 thread = __kmp_thread_from_gtid(gtid);
2112
2113 __kmp_aux_convert_blocktime(bt: &bt);
2114 __kmp_aux_set_blocktime(arg: bt, thread, tid);
2115}
2116
2117void kmpc_set_library(int arg) {
2118 // __kmp_user_set_library initializes the library if needed
2119 __kmp_user_set_library(arg: (enum library_type)arg);
2120}
2121
2122void kmpc_set_defaults(char const *str) {
2123 // __kmp_aux_set_defaults initializes the library if needed
2124 __kmp_aux_set_defaults(str, KMP_STRLEN(s: str));
2125}
2126
2127void kmpc_set_disp_num_buffers(int arg) {
2128 // ignore after initialization because some teams have already
2129 // allocated dispatch buffers
2130 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2131 arg <= KMP_MAX_DISP_NUM_BUFF) {
2132 __kmp_dispatch_num_buffers = arg;
2133 }
2134}
2135
2136int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2137#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2138 return -1;
2139#else
2140 if (!TCR_4(__kmp_init_middle)) {
2141 __kmp_middle_initialize();
2142 }
2143 __kmp_assign_root_init_mask();
2144 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2145#endif
2146}
2147
2148int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2149#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2150 return -1;
2151#else
2152 if (!TCR_4(__kmp_init_middle)) {
2153 __kmp_middle_initialize();
2154 }
2155 __kmp_assign_root_init_mask();
2156 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2157#endif
2158}
2159
2160int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2161#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2162 return -1;
2163#else
2164 if (!TCR_4(__kmp_init_middle)) {
2165 __kmp_middle_initialize();
2166 }
2167 __kmp_assign_root_init_mask();
2168 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2169#endif
2170}
2171
2172/* -------------------------------------------------------------------------- */
2173/*!
2174@ingroup THREADPRIVATE
2175@param loc source location information
2176@param gtid global thread number
2177@param cpy_size size of the cpy_data buffer
2178@param cpy_data pointer to data to be copied
2179@param cpy_func helper function to call for copying data
2180@param didit flag variable: 1=single thread; 0=not single thread
2181
2182__kmpc_copyprivate implements the interface for the private data broadcast
2183needed for the copyprivate clause associated with a single region in an
2184OpenMP<sup>*</sup> program (both C and Fortran).
2185All threads participating in the parallel region call this routine.
2186One of the threads (called the single thread) should have the <tt>didit</tt>
2187variable set to 1 and all other threads should have that variable set to 0.
2188All threads pass a pointer to a data buffer (cpy_data) that they have built.
2189
2190The OpenMP specification forbids the use of nowait on the single region when a
2191copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2192barrier internally to avoid race conditions, so the code generation for the
2193single region should avoid generating a barrier after the call to @ref
2194__kmpc_copyprivate.
2195
2196The <tt>gtid</tt> parameter is the global thread id for the current thread.
2197The <tt>loc</tt> parameter is a pointer to source location information.
2198
2199Internal implementation: The single thread will first copy its descriptor
2200address (cpy_data) to a team-private location, then the other threads will each
2201call the function pointed to by the parameter cpy_func, which carries out the
2202copy by copying the data using the cpy_data buffer.
2203
2204The cpy_func routine used for the copy and the contents of the data area defined
2205by cpy_data and cpy_size may be built in any fashion that will allow the copy
2206to be done. For instance, the cpy_data buffer can hold the actual data to be
2207copied or it may hold a list of pointers to the data. The cpy_func routine must
2208interpret the cpy_data buffer appropriately.
2209
2210The interface to cpy_func is as follows:
2211@code
2212void cpy_func( void *destination, void *source )
2213@endcode
2214where void *destination is the cpy_data pointer for the thread being copied to
2215and void *source is the cpy_data pointer for the thread being copied from.
2216*/
2217void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2218 void *cpy_data, void (*cpy_func)(void *, void *),
2219 kmp_int32 didit) {
2220 void **data_ptr;
2221 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2222 __kmp_assert_valid_gtid(gtid);
2223
2224 KMP_MB();
2225
2226 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2227
2228 if (__kmp_env_consistency_check) {
2229 if (loc == 0) {
2230 KMP_WARNING(ConstructIdentInvalid);
2231 }
2232 }
2233
2234 // ToDo: Optimize the following two barriers into some kind of split barrier
2235
2236 if (didit)
2237 *data_ptr = cpy_data;
2238
2239#if OMPT_SUPPORT
2240 ompt_frame_t *ompt_frame;
2241 if (ompt_enabled.enabled) {
2242 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
2243 if (ompt_frame->enter_frame.ptr == NULL)
2244 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2245 }
2246 OMPT_STORE_RETURN_ADDRESS(gtid);
2247#endif
2248/* This barrier is not a barrier region boundary */
2249#if USE_ITT_NOTIFY
2250 __kmp_threads[gtid]->th.th_ident = loc;
2251#endif
2252 __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL);
2253
2254 if (!didit)
2255 (*cpy_func)(cpy_data, *data_ptr);
2256
2257 // Consider next barrier a user-visible barrier for barrier region boundaries
2258 // Nesting checks are already handled by the single construct checks
2259 {
2260#if OMPT_SUPPORT
2261 OMPT_STORE_RETURN_ADDRESS(gtid);
2262#endif
2263#if USE_ITT_NOTIFY
2264 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2265// tasks can overwrite the location)
2266#endif
2267 __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL);
2268#if OMPT_SUPPORT && OMPT_OPTIONAL
2269 if (ompt_enabled.enabled) {
2270 ompt_frame->enter_frame = ompt_data_none;
2271 }
2272#endif
2273 }
2274}
2275
2276/* --------------------------------------------------------------------------*/
2277/*!
2278@ingroup THREADPRIVATE
2279@param loc source location information
2280@param gtid global thread number
2281@param cpy_data pointer to the data to be saved/copied or 0
2282@return the saved pointer to the data
2283
2284__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2285__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2286coming from single), and returns that pointer in all calls (for single thread
2287it's not needed). This version doesn't do any actual data copying. Data copying
2288has to be done somewhere else, e.g. inline in the generated code. Due to this,
2289this function doesn't have any barrier at the end of the function, like
2290__kmpc_copyprivate does, so generated code needs barrier after copying of all
2291data was done.
2292*/
2293void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2294 void **data_ptr;
2295
2296 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2297
2298 KMP_MB();
2299
2300 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2301
2302 if (__kmp_env_consistency_check) {
2303 if (loc == 0) {
2304 KMP_WARNING(ConstructIdentInvalid);
2305 }
2306 }
2307
2308 // ToDo: Optimize the following barrier
2309
2310 if (cpy_data)
2311 *data_ptr = cpy_data;
2312
2313#if OMPT_SUPPORT
2314 ompt_frame_t *ompt_frame;
2315 if (ompt_enabled.enabled) {
2316 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
2317 if (ompt_frame->enter_frame.ptr == NULL)
2318 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2319 OMPT_STORE_RETURN_ADDRESS(gtid);
2320 }
2321#endif
2322/* This barrier is not a barrier region boundary */
2323#if USE_ITT_NOTIFY
2324 __kmp_threads[gtid]->th.th_ident = loc;
2325#endif
2326 __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL);
2327
2328 return *data_ptr;
2329}
2330
2331/* -------------------------------------------------------------------------- */
2332
2333#define INIT_LOCK __kmp_init_user_lock_with_checks
2334#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2335#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2336#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2337#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2338#define ACQUIRE_NESTED_LOCK_TIMED \
2339 __kmp_acquire_nested_user_lock_with_checks_timed
2340#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2341#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2342#define TEST_LOCK __kmp_test_user_lock_with_checks
2343#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2344#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2345#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2346
2347// TODO: Make check abort messages use location info & pass it into
2348// with_checks routines
2349
2350#if KMP_USE_DYNAMIC_LOCK
2351
2352// internal lock initializer
2353static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2354 kmp_dyna_lockseq_t seq) {
2355 if (KMP_IS_D_LOCK(seq)) {
2356 KMP_INIT_D_LOCK(lock, seq);
2357#if USE_ITT_BUILD
2358 __kmp_itt_lock_creating(lock: (kmp_user_lock_p)lock, NULL);
2359#endif
2360 } else {
2361 KMP_INIT_I_LOCK(lock, seq);
2362#if USE_ITT_BUILD
2363 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2364 __kmp_itt_lock_creating(lock: ilk->lock, loc);
2365#endif
2366 }
2367}
2368
2369// internal nest lock initializer
2370static __forceinline void
2371__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2372 kmp_dyna_lockseq_t seq) {
2373#if KMP_USE_TSX
2374 // Don't have nested lock implementation for speculative locks
2375 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2376 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2377 seq = __kmp_user_lock_seq;
2378#endif
2379 switch (seq) {
2380 case lockseq_tas:
2381 seq = lockseq_nested_tas;
2382 break;
2383#if KMP_USE_FUTEX
2384 case lockseq_futex:
2385 seq = lockseq_nested_futex;
2386 break;
2387#endif
2388 case lockseq_ticket:
2389 seq = lockseq_nested_ticket;
2390 break;
2391 case lockseq_queuing:
2392 seq = lockseq_nested_queuing;
2393 break;
2394 case lockseq_drdpa:
2395 seq = lockseq_nested_drdpa;
2396 break;
2397 default:
2398 seq = lockseq_nested_queuing;
2399 }
2400 KMP_INIT_I_LOCK(lock, seq);
2401#if USE_ITT_BUILD
2402 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2403 __kmp_itt_lock_creating(lock: ilk->lock, loc);
2404#endif
2405}
2406
2407/* initialize the lock with a hint */
2408void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2409 uintptr_t hint) {
2410 KMP_DEBUG_ASSERT(__kmp_init_serial);
2411 if (__kmp_env_consistency_check && user_lock == NULL) {
2412 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2413 }
2414
2415 __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint));
2416
2417#if OMPT_SUPPORT && OMPT_OPTIONAL
2418 // This is the case, if called from omp_init_lock_with_hint:
2419 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2420 if (!codeptr)
2421 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2422 if (ompt_enabled.ompt_callback_lock_init) {
2423 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2424 ompt_mutex_lock, (omp_lock_hint_t)hint,
2425 __ompt_get_mutex_impl_type(user_lock),
2426 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2427 }
2428#endif
2429}
2430
2431/* initialize the lock with a hint */
2432void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2433 void **user_lock, uintptr_t hint) {
2434 KMP_DEBUG_ASSERT(__kmp_init_serial);
2435 if (__kmp_env_consistency_check && user_lock == NULL) {
2436 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2437 }
2438
2439 __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint));
2440
2441#if OMPT_SUPPORT && OMPT_OPTIONAL
2442 // This is the case, if called from omp_init_lock_with_hint:
2443 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2444 if (!codeptr)
2445 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2446 if (ompt_enabled.ompt_callback_lock_init) {
2447 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2448 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2449 __ompt_get_mutex_impl_type(user_lock),
2450 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2451 }
2452#endif
2453}
2454
2455#endif // KMP_USE_DYNAMIC_LOCK
2456
2457/* initialize the lock */
2458void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2459#if KMP_USE_DYNAMIC_LOCK
2460
2461 KMP_DEBUG_ASSERT(__kmp_init_serial);
2462 if (__kmp_env_consistency_check && user_lock == NULL) {
2463 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2464 }
2465 __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq);
2466
2467#if OMPT_SUPPORT && OMPT_OPTIONAL
2468 // This is the case, if called from omp_init_lock_with_hint:
2469 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2470 if (!codeptr)
2471 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2472 if (ompt_enabled.ompt_callback_lock_init) {
2473 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2474 ompt_mutex_lock, omp_lock_hint_none,
2475 __ompt_get_mutex_impl_type(user_lock),
2476 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2477 }
2478#endif
2479
2480#else // KMP_USE_DYNAMIC_LOCK
2481
2482 static char const *const func = "omp_init_lock";
2483 kmp_user_lock_p lck;
2484 KMP_DEBUG_ASSERT(__kmp_init_serial);
2485
2486 if (__kmp_env_consistency_check) {
2487 if (user_lock == NULL) {
2488 KMP_FATAL(LockIsUninitialized, func);
2489 }
2490 }
2491
2492 KMP_CHECK_USER_LOCK_INIT();
2493
2494 if ((__kmp_user_lock_kind == lk_tas) &&
2495 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2496 lck = (kmp_user_lock_p)user_lock;
2497 }
2498#if KMP_USE_FUTEX
2499 else if ((__kmp_user_lock_kind == lk_futex) &&
2500 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2501 lck = (kmp_user_lock_p)user_lock;
2502 }
2503#endif
2504 else {
2505 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2506 }
2507 INIT_LOCK(lck);
2508 __kmp_set_user_lock_location(lck, loc);
2509
2510#if OMPT_SUPPORT && OMPT_OPTIONAL
2511 // This is the case, if called from omp_init_lock_with_hint:
2512 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2513 if (!codeptr)
2514 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2515 if (ompt_enabled.ompt_callback_lock_init) {
2516 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2517 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2518 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2519 }
2520#endif
2521
2522#if USE_ITT_BUILD
2523 __kmp_itt_lock_creating(lck);
2524#endif /* USE_ITT_BUILD */
2525
2526#endif // KMP_USE_DYNAMIC_LOCK
2527} // __kmpc_init_lock
2528
2529/* initialize the lock */
2530void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2531#if KMP_USE_DYNAMIC_LOCK
2532
2533 KMP_DEBUG_ASSERT(__kmp_init_serial);
2534 if (__kmp_env_consistency_check && user_lock == NULL) {
2535 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2536 }
2537 __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq);
2538
2539#if OMPT_SUPPORT && OMPT_OPTIONAL
2540 // This is the case, if called from omp_init_lock_with_hint:
2541 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2542 if (!codeptr)
2543 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2544 if (ompt_enabled.ompt_callback_lock_init) {
2545 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2546 ompt_mutex_nest_lock, omp_lock_hint_none,
2547 __ompt_get_mutex_impl_type(user_lock),
2548 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2549 }
2550#endif
2551
2552#else // KMP_USE_DYNAMIC_LOCK
2553
2554 static char const *const func = "omp_init_nest_lock";
2555 kmp_user_lock_p lck;
2556 KMP_DEBUG_ASSERT(__kmp_init_serial);
2557
2558 if (__kmp_env_consistency_check) {
2559 if (user_lock == NULL) {
2560 KMP_FATAL(LockIsUninitialized, func);
2561 }
2562 }
2563
2564 KMP_CHECK_USER_LOCK_INIT();
2565
2566 if ((__kmp_user_lock_kind == lk_tas) &&
2567 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2568 OMP_NEST_LOCK_T_SIZE)) {
2569 lck = (kmp_user_lock_p)user_lock;
2570 }
2571#if KMP_USE_FUTEX
2572 else if ((__kmp_user_lock_kind == lk_futex) &&
2573 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2574 OMP_NEST_LOCK_T_SIZE)) {
2575 lck = (kmp_user_lock_p)user_lock;
2576 }
2577#endif
2578 else {
2579 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2580 }
2581
2582 INIT_NESTED_LOCK(lck);
2583 __kmp_set_user_lock_location(lck, loc);
2584
2585#if OMPT_SUPPORT && OMPT_OPTIONAL
2586 // This is the case, if called from omp_init_lock_with_hint:
2587 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2588 if (!codeptr)
2589 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2590 if (ompt_enabled.ompt_callback_lock_init) {
2591 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2592 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2593 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2594 }
2595#endif
2596
2597#if USE_ITT_BUILD
2598 __kmp_itt_lock_creating(lck);
2599#endif /* USE_ITT_BUILD */
2600
2601#endif // KMP_USE_DYNAMIC_LOCK
2602} // __kmpc_init_nest_lock
2603
2604void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2605#if KMP_USE_DYNAMIC_LOCK
2606
2607#if USE_ITT_BUILD
2608 kmp_user_lock_p lck;
2609 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2610 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2611 } else {
2612 lck = (kmp_user_lock_p)user_lock;
2613 }
2614 __kmp_itt_lock_destroyed(lock: lck);
2615#endif
2616#if OMPT_SUPPORT && OMPT_OPTIONAL
2617 // This is the case, if called from omp_init_lock_with_hint:
2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619 if (!codeptr)
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_lock_destroy) {
2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2623 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2624 }
2625#endif
2626 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2627#else
2628 kmp_user_lock_p lck;
2629
2630 if ((__kmp_user_lock_kind == lk_tas) &&
2631 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2632 lck = (kmp_user_lock_p)user_lock;
2633 }
2634#if KMP_USE_FUTEX
2635 else if ((__kmp_user_lock_kind == lk_futex) &&
2636 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2637 lck = (kmp_user_lock_p)user_lock;
2638 }
2639#endif
2640 else {
2641 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2642 }
2643
2644#if OMPT_SUPPORT && OMPT_OPTIONAL
2645 // This is the case, if called from omp_init_lock_with_hint:
2646 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2647 if (!codeptr)
2648 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2649 if (ompt_enabled.ompt_callback_lock_destroy) {
2650 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2651 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2652 }
2653#endif
2654
2655#if USE_ITT_BUILD
2656 __kmp_itt_lock_destroyed(lck);
2657#endif /* USE_ITT_BUILD */
2658 DESTROY_LOCK(lck);
2659
2660 if ((__kmp_user_lock_kind == lk_tas) &&
2661 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2662 ;
2663 }
2664#if KMP_USE_FUTEX
2665 else if ((__kmp_user_lock_kind == lk_futex) &&
2666 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2667 ;
2668 }
2669#endif
2670 else {
2671 __kmp_user_lock_free(user_lock, gtid, lck);
2672 }
2673#endif // KMP_USE_DYNAMIC_LOCK
2674} // __kmpc_destroy_lock
2675
2676/* destroy the lock */
2677void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2678#if KMP_USE_DYNAMIC_LOCK
2679
2680#if USE_ITT_BUILD
2681 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2682 __kmp_itt_lock_destroyed(lock: ilk->lock);
2683#endif
2684#if OMPT_SUPPORT && OMPT_OPTIONAL
2685 // This is the case, if called from omp_init_lock_with_hint:
2686 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687 if (!codeptr)
2688 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689 if (ompt_enabled.ompt_callback_lock_destroy) {
2690 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2691 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2692 }
2693#endif
2694 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2695
2696#else // KMP_USE_DYNAMIC_LOCK
2697
2698 kmp_user_lock_p lck;
2699
2700 if ((__kmp_user_lock_kind == lk_tas) &&
2701 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZE)) {
2703 lck = (kmp_user_lock_p)user_lock;
2704 }
2705#if KMP_USE_FUTEX
2706 else if ((__kmp_user_lock_kind == lk_futex) &&
2707 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2708 OMP_NEST_LOCK_T_SIZE)) {
2709 lck = (kmp_user_lock_p)user_lock;
2710 }
2711#endif
2712 else {
2713 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2714 }
2715
2716#if OMPT_SUPPORT && OMPT_OPTIONAL
2717 // This is the case, if called from omp_init_lock_with_hint:
2718 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2719 if (!codeptr)
2720 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2721 if (ompt_enabled.ompt_callback_lock_destroy) {
2722 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2723 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2724 }
2725#endif
2726
2727#if USE_ITT_BUILD
2728 __kmp_itt_lock_destroyed(lck);
2729#endif /* USE_ITT_BUILD */
2730
2731 DESTROY_NESTED_LOCK(lck);
2732
2733 if ((__kmp_user_lock_kind == lk_tas) &&
2734 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2735 OMP_NEST_LOCK_T_SIZE)) {
2736 ;
2737 }
2738#if KMP_USE_FUTEX
2739 else if ((__kmp_user_lock_kind == lk_futex) &&
2740 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2741 OMP_NEST_LOCK_T_SIZE)) {
2742 ;
2743 }
2744#endif
2745 else {
2746 __kmp_user_lock_free(user_lock, gtid, lck);
2747 }
2748#endif // KMP_USE_DYNAMIC_LOCK
2749} // __kmpc_destroy_nest_lock
2750
2751void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2752 KMP_COUNT_BLOCK(OMP_set_lock);
2753#if KMP_USE_DYNAMIC_LOCK
2754 int tag = KMP_EXTRACT_D_TAG(user_lock);
2755#if USE_ITT_BUILD
2756 __kmp_itt_lock_acquiring(
2757 lock: (kmp_user_lock_p)
2758 user_lock); // itt function will get to the right lock object.
2759#endif
2760#if OMPT_SUPPORT && OMPT_OPTIONAL
2761 // This is the case, if called from omp_init_lock_with_hint:
2762 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2763 if (!codeptr)
2764 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2765 if (ompt_enabled.ompt_callback_mutex_acquire) {
2766 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2767 ompt_mutex_lock, omp_lock_hint_none,
2768 __ompt_get_mutex_impl_type(user_lock),
2769 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2770 }
2771#endif
2772#if KMP_USE_INLINED_TAS
2773 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2774 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2775 } else
2776#elif KMP_USE_INLINED_FUTEX
2777 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2778 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2779 } else
2780#endif
2781 {
2782 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2783 }
2784#if USE_ITT_BUILD
2785 __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock);
2786#endif
2787#if OMPT_SUPPORT && OMPT_OPTIONAL
2788 if (ompt_enabled.ompt_callback_mutex_acquired) {
2789 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2790 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2791 }
2792#endif
2793
2794#else // KMP_USE_DYNAMIC_LOCK
2795
2796 kmp_user_lock_p lck;
2797
2798 if ((__kmp_user_lock_kind == lk_tas) &&
2799 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2800 lck = (kmp_user_lock_p)user_lock;
2801 }
2802#if KMP_USE_FUTEX
2803 else if ((__kmp_user_lock_kind == lk_futex) &&
2804 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2805 lck = (kmp_user_lock_p)user_lock;
2806 }
2807#endif
2808 else {
2809 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2810 }
2811
2812#if USE_ITT_BUILD
2813 __kmp_itt_lock_acquiring(lck);
2814#endif /* USE_ITT_BUILD */
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2823 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2824 }
2825#endif
2826
2827 ACQUIRE_LOCK(lck, gtid);
2828
2829#if USE_ITT_BUILD
2830 __kmp_itt_lock_acquired(lck);
2831#endif /* USE_ITT_BUILD */
2832
2833#if OMPT_SUPPORT && OMPT_OPTIONAL
2834 if (ompt_enabled.ompt_callback_mutex_acquired) {
2835 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2836 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2837 }
2838#endif
2839
2840#endif // KMP_USE_DYNAMIC_LOCK
2841}
2842
2843void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2844#if KMP_USE_DYNAMIC_LOCK
2845
2846#if USE_ITT_BUILD
2847 __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock);
2848#endif
2849#if OMPT_SUPPORT && OMPT_OPTIONAL
2850 // This is the case, if called from omp_init_lock_with_hint:
2851 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2852 if (!codeptr)
2853 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2854 if (ompt_enabled.enabled) {
2855 if (ompt_enabled.ompt_callback_mutex_acquire) {
2856 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2857 ompt_mutex_nest_lock, omp_lock_hint_none,
2858 __ompt_get_mutex_impl_type(user_lock),
2859 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2860 }
2861 }
2862#endif
2863 int acquire_status =
2864 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2865 (void)acquire_status;
2866#if USE_ITT_BUILD
2867 __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock);
2868#endif
2869
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2871 if (ompt_enabled.enabled) {
2872 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2873 if (ompt_enabled.ompt_callback_mutex_acquired) {
2874 // lock_first
2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2876 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2877 codeptr);
2878 }
2879 } else {
2880 if (ompt_enabled.ompt_callback_nest_lock) {
2881 // lock_next
2882 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2883 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2884 }
2885 }
2886 }
2887#endif
2888
2889#else // KMP_USE_DYNAMIC_LOCK
2890 int acquire_status;
2891 kmp_user_lock_p lck;
2892
2893 if ((__kmp_user_lock_kind == lk_tas) &&
2894 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2895 OMP_NEST_LOCK_T_SIZE)) {
2896 lck = (kmp_user_lock_p)user_lock;
2897 }
2898#if KMP_USE_FUTEX
2899 else if ((__kmp_user_lock_kind == lk_futex) &&
2900 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2901 OMP_NEST_LOCK_T_SIZE)) {
2902 lck = (kmp_user_lock_p)user_lock;
2903 }
2904#endif
2905 else {
2906 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2907 }
2908
2909#if USE_ITT_BUILD
2910 __kmp_itt_lock_acquiring(lck);
2911#endif /* USE_ITT_BUILD */
2912#if OMPT_SUPPORT && OMPT_OPTIONAL
2913 // This is the case, if called from omp_init_lock_with_hint:
2914 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2915 if (!codeptr)
2916 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2917 if (ompt_enabled.enabled) {
2918 if (ompt_enabled.ompt_callback_mutex_acquire) {
2919 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2920 ompt_mutex_nest_lock, omp_lock_hint_none,
2921 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2922 codeptr);
2923 }
2924 }
2925#endif
2926
2927 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2928
2929#if USE_ITT_BUILD
2930 __kmp_itt_lock_acquired(lck);
2931#endif /* USE_ITT_BUILD */
2932
2933#if OMPT_SUPPORT && OMPT_OPTIONAL
2934 if (ompt_enabled.enabled) {
2935 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2936 if (ompt_enabled.ompt_callback_mutex_acquired) {
2937 // lock_first
2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2939 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940 }
2941 } else {
2942 if (ompt_enabled.ompt_callback_nest_lock) {
2943 // lock_next
2944 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2945 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2946 }
2947 }
2948 }
2949#endif
2950
2951#endif // KMP_USE_DYNAMIC_LOCK
2952}
2953
2954void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2955#if KMP_USE_DYNAMIC_LOCK
2956
2957 int tag = KMP_EXTRACT_D_TAG(user_lock);
2958#if USE_ITT_BUILD
2959 __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock);
2960#endif
2961#if KMP_USE_INLINED_TAS
2962 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2963 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2964 } else
2965#elif KMP_USE_INLINED_FUTEX
2966 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2967 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2968 } else
2969#endif
2970 {
2971 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2972 }
2973
2974#if OMPT_SUPPORT && OMPT_OPTIONAL
2975 // This is the case, if called from omp_init_lock_with_hint:
2976 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2977 if (!codeptr)
2978 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2979 if (ompt_enabled.ompt_callback_mutex_released) {
2980 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2981 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2982 }
2983#endif
2984
2985#else // KMP_USE_DYNAMIC_LOCK
2986
2987 kmp_user_lock_p lck;
2988
2989 /* Can't use serial interval since not block structured */
2990 /* release the lock */
2991
2992 if ((__kmp_user_lock_kind == lk_tas) &&
2993 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2994#if KMP_OS_LINUX && \
2995 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2996// "fast" path implemented to fix customer performance issue
2997#if USE_ITT_BUILD
2998 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2999#endif /* USE_ITT_BUILD */
3000 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3001 KMP_MB();
3002
3003#if OMPT_SUPPORT && OMPT_OPTIONAL
3004 // This is the case, if called from omp_init_lock_with_hint:
3005 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3006 if (!codeptr)
3007 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3008 if (ompt_enabled.ompt_callback_mutex_released) {
3009 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3010 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3011 }
3012#endif
3013
3014 return;
3015#else
3016 lck = (kmp_user_lock_p)user_lock;
3017#endif
3018 }
3019#if KMP_USE_FUTEX
3020 else if ((__kmp_user_lock_kind == lk_futex) &&
3021 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3022 lck = (kmp_user_lock_p)user_lock;
3023 }
3024#endif
3025 else {
3026 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3027 }
3028
3029#if USE_ITT_BUILD
3030 __kmp_itt_lock_releasing(lck);
3031#endif /* USE_ITT_BUILD */
3032
3033 RELEASE_LOCK(lck, gtid);
3034
3035#if OMPT_SUPPORT && OMPT_OPTIONAL
3036 // This is the case, if called from omp_init_lock_with_hint:
3037 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3038 if (!codeptr)
3039 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3040 if (ompt_enabled.ompt_callback_mutex_released) {
3041 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3042 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3043 }
3044#endif
3045
3046#endif // KMP_USE_DYNAMIC_LOCK
3047}
3048
3049/* release the lock */
3050void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3051#if KMP_USE_DYNAMIC_LOCK
3052
3053#if USE_ITT_BUILD
3054 __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock);
3055#endif
3056 int release_status =
3057 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3058 (void)release_status;
3059
3060#if OMPT_SUPPORT && OMPT_OPTIONAL
3061 // This is the case, if called from omp_init_lock_with_hint:
3062 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3063 if (!codeptr)
3064 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3065 if (ompt_enabled.enabled) {
3066 if (release_status == KMP_LOCK_RELEASED) {
3067 if (ompt_enabled.ompt_callback_mutex_released) {
3068 // release_lock_last
3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3070 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3071 codeptr);
3072 }
3073 } else if (ompt_enabled.ompt_callback_nest_lock) {
3074 // release_lock_prev
3075 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3076 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3077 }
3078 }
3079#endif
3080
3081#else // KMP_USE_DYNAMIC_LOCK
3082
3083 kmp_user_lock_p lck;
3084
3085 /* Can't use serial interval since not block structured */
3086
3087 if ((__kmp_user_lock_kind == lk_tas) &&
3088 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3089 OMP_NEST_LOCK_T_SIZE)) {
3090#if KMP_OS_LINUX && \
3091 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3092 // "fast" path implemented to fix customer performance issue
3093 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3094#if USE_ITT_BUILD
3095 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3096#endif /* USE_ITT_BUILD */
3097
3098#if OMPT_SUPPORT && OMPT_OPTIONAL
3099 int release_status = KMP_LOCK_STILL_HELD;
3100#endif
3101
3102 if (--(tl->lk.depth_locked) == 0) {
3103 TCW_4(tl->lk.poll, 0);
3104#if OMPT_SUPPORT && OMPT_OPTIONAL
3105 release_status = KMP_LOCK_RELEASED;
3106#endif
3107 }
3108 KMP_MB();
3109
3110#if OMPT_SUPPORT && OMPT_OPTIONAL
3111 // This is the case, if called from omp_init_lock_with_hint:
3112 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3113 if (!codeptr)
3114 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3115 if (ompt_enabled.enabled) {
3116 if (release_status == KMP_LOCK_RELEASED) {
3117 if (ompt_enabled.ompt_callback_mutex_released) {
3118 // release_lock_last
3119 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3120 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3121 }
3122 } else if (ompt_enabled.ompt_callback_nest_lock) {
3123 // release_lock_previous
3124 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3125 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3126 }
3127 }
3128#endif
3129
3130 return;
3131#else
3132 lck = (kmp_user_lock_p)user_lock;
3133#endif
3134 }
3135#if KMP_USE_FUTEX
3136 else if ((__kmp_user_lock_kind == lk_futex) &&
3137 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3138 OMP_NEST_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3140 }
3141#endif
3142 else {
3143 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3144 }
3145
3146#if USE_ITT_BUILD
3147 __kmp_itt_lock_releasing(lck);
3148#endif /* USE_ITT_BUILD */
3149
3150 int release_status;
3151 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3152#if OMPT_SUPPORT && OMPT_OPTIONAL
3153 // This is the case, if called from omp_init_lock_with_hint:
3154 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3155 if (!codeptr)
3156 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3157 if (ompt_enabled.enabled) {
3158 if (release_status == KMP_LOCK_RELEASED) {
3159 if (ompt_enabled.ompt_callback_mutex_released) {
3160 // release_lock_last
3161 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3162 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3163 }
3164 } else if (ompt_enabled.ompt_callback_nest_lock) {
3165 // release_lock_previous
3166 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3167 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3168 }
3169 }
3170#endif
3171
3172#endif // KMP_USE_DYNAMIC_LOCK
3173}
3174
3175/* try to acquire the lock */
3176int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3177 KMP_COUNT_BLOCK(OMP_test_lock);
3178
3179#if KMP_USE_DYNAMIC_LOCK
3180 int rc;
3181 int tag = KMP_EXTRACT_D_TAG(user_lock);
3182#if USE_ITT_BUILD
3183 __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock);
3184#endif
3185#if OMPT_SUPPORT && OMPT_OPTIONAL
3186 // This is the case, if called from omp_init_lock_with_hint:
3187 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3188 if (!codeptr)
3189 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3190 if (ompt_enabled.ompt_callback_mutex_acquire) {
3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3192 ompt_mutex_test_lock, omp_lock_hint_none,
3193 __ompt_get_mutex_impl_type(user_lock),
3194 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3195 }
3196#endif
3197#if KMP_USE_INLINED_TAS
3198 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3199 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3200 } else
3201#elif KMP_USE_INLINED_FUTEX
3202 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3203 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3204 } else
3205#endif
3206 {
3207 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3208 }
3209 if (rc) {
3210#if USE_ITT_BUILD
3211 __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock);
3212#endif
3213#if OMPT_SUPPORT && OMPT_OPTIONAL
3214 if (ompt_enabled.ompt_callback_mutex_acquired) {
3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3216 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3217 }
3218#endif
3219 return FTN_TRUE;
3220 } else {
3221#if USE_ITT_BUILD
3222 __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock);
3223#endif
3224 return FTN_FALSE;
3225 }
3226
3227#else // KMP_USE_DYNAMIC_LOCK
3228
3229 kmp_user_lock_p lck;
3230 int rc;
3231
3232 if ((__kmp_user_lock_kind == lk_tas) &&
3233 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3234 lck = (kmp_user_lock_p)user_lock;
3235 }
3236#if KMP_USE_FUTEX
3237 else if ((__kmp_user_lock_kind == lk_futex) &&
3238 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3239 lck = (kmp_user_lock_p)user_lock;
3240 }
3241#endif
3242 else {
3243 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3244 }
3245
3246#if USE_ITT_BUILD
3247 __kmp_itt_lock_acquiring(lck);
3248#endif /* USE_ITT_BUILD */
3249#if OMPT_SUPPORT && OMPT_OPTIONAL
3250 // This is the case, if called from omp_init_lock_with_hint:
3251 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3252 if (!codeptr)
3253 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3254 if (ompt_enabled.ompt_callback_mutex_acquire) {
3255 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3256 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3257 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3258 }
3259#endif
3260
3261 rc = TEST_LOCK(lck, gtid);
3262#if USE_ITT_BUILD
3263 if (rc) {
3264 __kmp_itt_lock_acquired(lck);
3265 } else {
3266 __kmp_itt_lock_cancelled(lck);
3267 }
3268#endif /* USE_ITT_BUILD */
3269#if OMPT_SUPPORT && OMPT_OPTIONAL
3270 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3271 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3272 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3273 }
3274#endif
3275
3276 return (rc ? FTN_TRUE : FTN_FALSE);
3277
3278 /* Can't use serial interval since not block structured */
3279
3280#endif // KMP_USE_DYNAMIC_LOCK
3281}
3282
3283/* try to acquire the lock */
3284int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3285#if KMP_USE_DYNAMIC_LOCK
3286 int rc;
3287#if USE_ITT_BUILD
3288 __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock);
3289#endif
3290#if OMPT_SUPPORT && OMPT_OPTIONAL
3291 // This is the case, if called from omp_init_lock_with_hint:
3292 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3293 if (!codeptr)
3294 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3295 if (ompt_enabled.ompt_callback_mutex_acquire) {
3296 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3297 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3298 __ompt_get_mutex_impl_type(user_lock),
3299 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3300 }
3301#endif
3302 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3303#if USE_ITT_BUILD
3304 if (rc) {
3305 __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock);
3306 } else {
3307 __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock);
3308 }
3309#endif
3310#if OMPT_SUPPORT && OMPT_OPTIONAL
3311 if (ompt_enabled.enabled && rc) {
3312 if (rc == 1) {
3313 if (ompt_enabled.ompt_callback_mutex_acquired) {
3314 // lock_first
3315 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3316 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3317 codeptr);
3318 }
3319 } else {
3320 if (ompt_enabled.ompt_callback_nest_lock) {
3321 // lock_next
3322 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3323 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3324 }
3325 }
3326 }
3327#endif
3328 return rc;
3329
3330#else // KMP_USE_DYNAMIC_LOCK
3331
3332 kmp_user_lock_p lck;
3333 int rc;
3334
3335 if ((__kmp_user_lock_kind == lk_tas) &&
3336 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3337 OMP_NEST_LOCK_T_SIZE)) {
3338 lck = (kmp_user_lock_p)user_lock;
3339 }
3340#if KMP_USE_FUTEX
3341 else if ((__kmp_user_lock_kind == lk_futex) &&
3342 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3343 OMP_NEST_LOCK_T_SIZE)) {
3344 lck = (kmp_user_lock_p)user_lock;
3345 }
3346#endif
3347 else {
3348 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3349 }
3350
3351#if USE_ITT_BUILD
3352 __kmp_itt_lock_acquiring(lck);
3353#endif /* USE_ITT_BUILD */
3354
3355#if OMPT_SUPPORT && OMPT_OPTIONAL
3356 // This is the case, if called from omp_init_lock_with_hint:
3357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3358 if (!codeptr)
3359 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3360 if (ompt_enabled.enabled) &&
3361 ompt_enabled.ompt_callback_mutex_acquire) {
3362 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3363 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3364 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3365 codeptr);
3366 }
3367#endif
3368
3369 rc = TEST_NESTED_LOCK(lck, gtid);
3370#if USE_ITT_BUILD
3371 if (rc) {
3372 __kmp_itt_lock_acquired(lck);
3373 } else {
3374 __kmp_itt_lock_cancelled(lck);
3375 }
3376#endif /* USE_ITT_BUILD */
3377#if OMPT_SUPPORT && OMPT_OPTIONAL
3378 if (ompt_enabled.enabled && rc) {
3379 if (rc == 1) {
3380 if (ompt_enabled.ompt_callback_mutex_acquired) {
3381 // lock_first
3382 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3383 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3384 }
3385 } else {
3386 if (ompt_enabled.ompt_callback_nest_lock) {
3387 // lock_next
3388 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3389 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3390 }
3391 }
3392 }
3393#endif
3394 return rc;
3395
3396 /* Can't use serial interval since not block structured */
3397
3398#endif // KMP_USE_DYNAMIC_LOCK
3399}
3400
3401// Interface to fast scalable reduce methods routines
3402
3403// keep the selected method in a thread local structure for cross-function
3404// usage: will be used in __kmpc_end_reduce* functions;
3405// another solution: to re-determine the method one more time in
3406// __kmpc_end_reduce* functions (new prototype required then)
3407// AT: which solution is better?
3408#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3409 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3410
3411#define __KMP_GET_REDUCTION_METHOD(gtid) \
3412 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3413
3414// description of the packed_reduction_method variable: look at the macros in
3415// kmp.h
3416
3417// used in a critical section reduce block
3418static __forceinline void
3419__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3420 kmp_critical_name *crit) {
3421
3422 // this lock was visible to a customer and to the threading profile tool as a
3423 // serial overhead span (although it's used for an internal purpose only)
3424 // why was it visible in previous implementation?
3425 // should we keep it visible in new reduce block?
3426 kmp_user_lock_p lck;
3427
3428#if KMP_USE_DYNAMIC_LOCK
3429
3430 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3431 // Check if it is initialized.
3432 if (*lk == 0) {
3433 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3434 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3435 KMP_GET_D_TAG(__kmp_user_lock_seq));
3436 } else {
3437 __kmp_init_indirect_csptr(crit, loc, gtid: global_tid,
3438 KMP_GET_I_TAG(__kmp_user_lock_seq));
3439 }
3440 }
3441 // Branch for accessing the actual lock object and set operation. This
3442 // branching is inevitable since this lock initialization does not follow the
3443 // normal dispatch path (lock table is not used).
3444 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3445 lck = (kmp_user_lock_p)lk;
3446 KMP_DEBUG_ASSERT(lck != NULL);
3447 if (__kmp_env_consistency_check) {
3448 __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq);
3449 }
3450 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3451 } else {
3452 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3453 lck = ilk->lock;
3454 KMP_DEBUG_ASSERT(lck != NULL);
3455 if (__kmp_env_consistency_check) {
3456 __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq);
3457 }
3458 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3459 }
3460
3461#else // KMP_USE_DYNAMIC_LOCK
3462
3463 // We know that the fast reduction code is only emitted by Intel compilers
3464 // with 32 byte critical sections. If there isn't enough space, then we
3465 // have to use a pointer.
3466 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3467 lck = (kmp_user_lock_p)crit;
3468 } else {
3469 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3470 }
3471 KMP_DEBUG_ASSERT(lck != NULL);
3472
3473 if (__kmp_env_consistency_check)
3474 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3475
3476 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3477
3478#endif // KMP_USE_DYNAMIC_LOCK
3479}
3480
3481// used in a critical section reduce block
3482static __forceinline void
3483__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3484 kmp_critical_name *crit) {
3485
3486 kmp_user_lock_p lck;
3487
3488#if KMP_USE_DYNAMIC_LOCK
3489
3490 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3491 lck = (kmp_user_lock_p)crit;
3492 if (__kmp_env_consistency_check)
3493 __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc);
3494 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3495 } else {
3496 kmp_indirect_lock_t *ilk =
3497 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3498 if (__kmp_env_consistency_check)
3499 __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc);
3500 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3501 }
3502
3503#else // KMP_USE_DYNAMIC_LOCK
3504
3505 // We know that the fast reduction code is only emitted by Intel compilers
3506 // with 32 byte critical sections. If there isn't enough space, then we have
3507 // to use a pointer.
3508 if (__kmp_base_user_lock_size > 32) {
3509 lck = *((kmp_user_lock_p *)crit);
3510 KMP_ASSERT(lck != NULL);
3511 } else {
3512 lck = (kmp_user_lock_p)crit;
3513 }
3514
3515 if (__kmp_env_consistency_check)
3516 __kmp_pop_sync(global_tid, ct_critical, loc);
3517
3518 __kmp_release_user_lock_with_checks(lck, global_tid);
3519
3520#endif // KMP_USE_DYNAMIC_LOCK
3521} // __kmp_end_critical_section_reduce_block
3522
3523static __forceinline int
3524__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3525 int *task_state) {
3526 kmp_team_t *team;
3527
3528 // Check if we are inside the teams construct?
3529 if (th->th.th_teams_microtask) {
3530 *team_p = team = th->th.th_team;
3531 if (team->t.t_level == th->th.th_teams_level) {
3532 // This is reduction at teams construct.
3533 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3534 // Let's swap teams temporarily for the reduction.
3535 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3536 th->th.th_team = team->t.t_parent;
3537 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3538 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3539 *task_state = th->th.th_task_state;
3540 th->th.th_task_state = 0;
3541
3542 return 1;
3543 }
3544 }
3545 return 0;
3546}
3547
3548static __forceinline void
3549__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3550 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3551 th->th.th_info.ds.ds_tid = 0;
3552 th->th.th_team = team;
3553 th->th.th_team_nproc = team->t.t_nproc;
3554 th->th.th_task_team = team->t.t_task_team[task_state];
3555 __kmp_type_convert(src: task_state, dest: &(th->th.th_task_state));
3556}
3557
3558/* 2.a.i. Reduce Block without a terminating barrier */
3559/*!
3560@ingroup SYNCHRONIZATION
3561@param loc source location information
3562@param global_tid global thread number
3563@param num_vars number of items (variables) to be reduced
3564@param reduce_size size of data in bytes to be reduced
3565@param reduce_data pointer to data to be reduced
3566@param reduce_func callback function providing reduction operation on two
3567operands and returning result of reduction in lhs_data
3568@param lck pointer to the unique lock data structure
3569@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3570threads if atomic reduction needed
3571
3572The nowait version is used for a reduce clause with the nowait argument.
3573*/
3574kmp_int32
3575__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3576 size_t reduce_size, void *reduce_data,
3577 void (*reduce_func)(void *lhs_data, void *rhs_data),
3578 kmp_critical_name *lck) {
3579
3580 KMP_COUNT_BLOCK(REDUCE_nowait);
3581 int retval = 0;
3582 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3583 kmp_info_t *th;
3584 kmp_team_t *team;
3585 int teams_swapped = 0, task_state;
3586 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3587 __kmp_assert_valid_gtid(gtid: global_tid);
3588
3589 // why do we need this initialization here at all?
3590 // Reduction clause can not be used as a stand-alone directive.
3591
3592 // do not call __kmp_serial_initialize(), it will be called by
3593 // __kmp_parallel_initialize() if needed
3594 // possible detection of false-positive race by the threadchecker ???
3595 if (!TCR_4(__kmp_init_parallel))
3596 __kmp_parallel_initialize();
3597
3598 __kmp_resume_if_soft_paused();
3599
3600// check correctness of reduce block nesting
3601#if KMP_USE_DYNAMIC_LOCK
3602 if (__kmp_env_consistency_check)
3603 __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0);
3604#else
3605 if (__kmp_env_consistency_check)
3606 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3607#endif
3608
3609 th = __kmp_thread_from_gtid(gtid: global_tid);
3610 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state);
3611
3612 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3613 // the value should be kept in a variable
3614 // the variable should be either a construct-specific or thread-specific
3615 // property, not a team specific property
3616 // (a thread can reach the next reduce block on the next construct, reduce
3617 // method may differ on the next construct)
3618 // an ident_t "loc" parameter could be used as a construct-specific property
3619 // (what if loc == 0?)
3620 // (if both construct-specific and team-specific variables were shared,
3621 // then unness extra syncs should be needed)
3622 // a thread-specific variable is better regarding two issues above (next
3623 // construct and extra syncs)
3624 // a thread-specific "th_local.reduction_method" variable is used currently
3625 // each thread executes 'determine' and 'set' lines (no need to execute by one
3626 // thread, to avoid unness extra syncs)
3627
3628 packed_reduction_method = __kmp_determine_reduction_method(
3629 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3630 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3631
3632 OMPT_REDUCTION_DECL(th, global_tid);
3633 if (packed_reduction_method == critical_reduce_block) {
3634
3635 OMPT_REDUCTION_BEGIN;
3636
3637 __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck);
3638 retval = 1;
3639
3640 } else if (packed_reduction_method == empty_reduce_block) {
3641
3642 OMPT_REDUCTION_BEGIN;
3643
3644 // usage: if team size == 1, no synchronization is required ( Intel
3645 // platforms only )
3646 retval = 1;
3647
3648 } else if (packed_reduction_method == atomic_reduce_block) {
3649
3650 retval = 2;
3651
3652 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3653 // won't be called by the code gen)
3654 // (it's not quite good, because the checking block has been closed by
3655 // this 'pop',
3656 // but atomic operation has not been executed yet, will be executed
3657 // slightly later, literally on next instruction)
3658 if (__kmp_env_consistency_check)
3659 __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc);
3660
3661 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3662 tree_reduce_block)) {
3663
3664// AT: performance issue: a real barrier here
3665// AT: (if primary thread is slow, other threads are blocked here waiting for
3666// the primary thread to come and release them)
3667// AT: (it's not what a customer might expect specifying NOWAIT clause)
3668// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3669// be confusing to a customer)
3670// AT: another implementation of *barrier_gather*nowait() (or some other design)
3671// might go faster and be more in line with sense of NOWAIT
3672// AT: TO DO: do epcc test and compare times
3673
3674// this barrier should be invisible to a customer and to the threading profile
3675// tool (it's neither a terminating barrier nor customer's code, it's
3676// used for an internal purpose)
3677#if OMPT_SUPPORT
3678 // JP: can this barrier potentially leed to task scheduling?
3679 // JP: as long as there is a barrier in the implementation, OMPT should and
3680 // will provide the barrier events
3681 // so we set-up the necessary frame/return addresses.
3682 ompt_frame_t *ompt_frame;
3683 if (ompt_enabled.enabled) {
3684 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
3685 if (ompt_frame->enter_frame.ptr == NULL)
3686 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3687 }
3688 OMPT_STORE_RETURN_ADDRESS(global_tid);
3689#endif
3690#if USE_ITT_NOTIFY
3691 __kmp_threads[global_tid]->th.th_ident = loc;
3692#endif
3693 retval =
3694 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3695 gtid: global_tid, FALSE, reduce_size, reduce_data, reduce: reduce_func);
3696 retval = (retval != 0) ? (0) : (1);
3697#if OMPT_SUPPORT && OMPT_OPTIONAL
3698 if (ompt_enabled.enabled) {
3699 ompt_frame->enter_frame = ompt_data_none;
3700 }
3701#endif
3702
3703 // all other workers except primary thread should do this pop here
3704 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3705 if (__kmp_env_consistency_check) {
3706 if (retval == 0) {
3707 __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc);
3708 }
3709 }
3710
3711 } else {
3712
3713 // should never reach this block
3714 KMP_ASSERT(0); // "unexpected method"
3715 }
3716 if (teams_swapped) {
3717 __kmp_restore_swapped_teams(th, team, task_state);
3718 }
3719 KA_TRACE(
3720 10,
3721 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3722 global_tid, packed_reduction_method, retval));
3723
3724 return retval;
3725}
3726
3727/*!
3728@ingroup SYNCHRONIZATION
3729@param loc source location information
3730@param global_tid global thread id.
3731@param lck pointer to the unique lock data structure
3732
3733Finish the execution of a reduce nowait.
3734*/
3735void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3736 kmp_critical_name *lck) {
3737
3738 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3739
3740 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3741 __kmp_assert_valid_gtid(gtid: global_tid);
3742
3743 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3744
3745 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3746
3747 if (packed_reduction_method == critical_reduce_block) {
3748
3749 __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck);
3750 OMPT_REDUCTION_END;
3751
3752 } else if (packed_reduction_method == empty_reduce_block) {
3753
3754 // usage: if team size == 1, no synchronization is required ( on Intel
3755 // platforms only )
3756
3757 OMPT_REDUCTION_END;
3758
3759 } else if (packed_reduction_method == atomic_reduce_block) {
3760
3761 // neither primary thread nor other workers should get here
3762 // (code gen does not generate this call in case 2: atomic reduce block)
3763 // actually it's better to remove this elseif at all;
3764 // after removal this value will checked by the 'else' and will assert
3765
3766 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3767 tree_reduce_block)) {
3768
3769 // only primary thread gets here
3770 // OMPT: tree reduction is annotated in the barrier code
3771
3772 } else {
3773
3774 // should never reach this block
3775 KMP_ASSERT(0); // "unexpected method"
3776 }
3777
3778 if (__kmp_env_consistency_check)
3779 __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc);
3780
3781 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3782 global_tid, packed_reduction_method));
3783
3784 return;
3785}
3786
3787/* 2.a.ii. Reduce Block with a terminating barrier */
3788
3789/*!
3790@ingroup SYNCHRONIZATION
3791@param loc source location information
3792@param global_tid global thread number
3793@param num_vars number of items (variables) to be reduced
3794@param reduce_size size of data in bytes to be reduced
3795@param reduce_data pointer to data to be reduced
3796@param reduce_func callback function providing reduction operation on two
3797operands and returning result of reduction in lhs_data
3798@param lck pointer to the unique lock data structure
3799@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3800threads if atomic reduction needed
3801
3802A blocking reduce that includes an implicit barrier.
3803*/
3804kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3805 size_t reduce_size, void *reduce_data,
3806 void (*reduce_func)(void *lhs_data, void *rhs_data),
3807 kmp_critical_name *lck) {
3808 KMP_COUNT_BLOCK(REDUCE_wait);
3809 int retval = 0;
3810 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3811 kmp_info_t *th;
3812 kmp_team_t *team;
3813 int teams_swapped = 0, task_state;
3814
3815 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3816 __kmp_assert_valid_gtid(gtid: global_tid);
3817
3818 // why do we need this initialization here at all?
3819 // Reduction clause can not be a stand-alone directive.
3820
3821 // do not call __kmp_serial_initialize(), it will be called by
3822 // __kmp_parallel_initialize() if needed
3823 // possible detection of false-positive race by the threadchecker ???
3824 if (!TCR_4(__kmp_init_parallel))
3825 __kmp_parallel_initialize();
3826
3827 __kmp_resume_if_soft_paused();
3828
3829// check correctness of reduce block nesting
3830#if KMP_USE_DYNAMIC_LOCK
3831 if (__kmp_env_consistency_check)
3832 __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0);
3833#else
3834 if (__kmp_env_consistency_check)
3835 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3836#endif
3837
3838 th = __kmp_thread_from_gtid(gtid: global_tid);
3839 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state);
3840
3841 packed_reduction_method = __kmp_determine_reduction_method(
3842 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3843 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3844
3845 OMPT_REDUCTION_DECL(th, global_tid);
3846
3847 if (packed_reduction_method == critical_reduce_block) {
3848
3849 OMPT_REDUCTION_BEGIN;
3850 __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck);
3851 retval = 1;
3852
3853 } else if (packed_reduction_method == empty_reduce_block) {
3854
3855 OMPT_REDUCTION_BEGIN;
3856 // usage: if team size == 1, no synchronization is required ( Intel
3857 // platforms only )
3858 retval = 1;
3859
3860 } else if (packed_reduction_method == atomic_reduce_block) {
3861
3862 retval = 2;
3863
3864 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3865 tree_reduce_block)) {
3866
3867// case tree_reduce_block:
3868// this barrier should be visible to a customer and to the threading profile
3869// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3870#if OMPT_SUPPORT
3871 ompt_frame_t *ompt_frame;
3872 if (ompt_enabled.enabled) {
3873 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
3874 if (ompt_frame->enter_frame.ptr == NULL)
3875 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3876 }
3877 OMPT_STORE_RETURN_ADDRESS(global_tid);
3878#endif
3879#if USE_ITT_NOTIFY
3880 __kmp_threads[global_tid]->th.th_ident =
3881 loc; // needed for correct notification of frames
3882#endif
3883 retval =
3884 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3885 gtid: global_tid, TRUE, reduce_size, reduce_data, reduce: reduce_func);
3886 retval = (retval != 0) ? (0) : (1);
3887#if OMPT_SUPPORT && OMPT_OPTIONAL
3888 if (ompt_enabled.enabled) {
3889 ompt_frame->enter_frame = ompt_data_none;
3890 }
3891#endif
3892
3893 // all other workers except primary thread should do this pop here
3894 // (none of other workers except primary will enter __kmpc_end_reduce())
3895 if (__kmp_env_consistency_check) {
3896 if (retval == 0) { // 0: all other workers; 1: primary thread
3897 __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc);
3898 }
3899 }
3900
3901 } else {
3902
3903 // should never reach this block
3904 KMP_ASSERT(0); // "unexpected method"
3905 }
3906 if (teams_swapped) {
3907 __kmp_restore_swapped_teams(th, team, task_state);
3908 }
3909
3910 KA_TRACE(10,
3911 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3912 global_tid, packed_reduction_method, retval));
3913 return retval;
3914}
3915
3916/*!
3917@ingroup SYNCHRONIZATION
3918@param loc source location information
3919@param global_tid global thread id.
3920@param lck pointer to the unique lock data structure
3921
3922Finish the execution of a blocking reduce.
3923The <tt>lck</tt> pointer must be the same as that used in the corresponding
3924start function.
3925*/
3926void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3927 kmp_critical_name *lck) {
3928
3929 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3930 kmp_info_t *th;
3931 kmp_team_t *team;
3932 int teams_swapped = 0, task_state;
3933
3934 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3935 __kmp_assert_valid_gtid(gtid: global_tid);
3936
3937 th = __kmp_thread_from_gtid(gtid: global_tid);
3938 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state);
3939
3940 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3941
3942 // this barrier should be visible to a customer and to the threading profile
3943 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3944 OMPT_REDUCTION_DECL(th, global_tid);
3945
3946 if (packed_reduction_method == critical_reduce_block) {
3947 __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck);
3948
3949 OMPT_REDUCTION_END;
3950
3951// TODO: implicit barrier: should be exposed
3952#if OMPT_SUPPORT
3953 ompt_frame_t *ompt_frame;
3954 if (ompt_enabled.enabled) {
3955 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
3956 if (ompt_frame->enter_frame.ptr == NULL)
3957 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3958 }
3959 OMPT_STORE_RETURN_ADDRESS(global_tid);
3960#endif
3961#if USE_ITT_NOTIFY
3962 __kmp_threads[global_tid]->th.th_ident = loc;
3963#endif
3964 __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL);
3965#if OMPT_SUPPORT && OMPT_OPTIONAL
3966 if (ompt_enabled.enabled) {
3967 ompt_frame->enter_frame = ompt_data_none;
3968 }
3969#endif
3970
3971 } else if (packed_reduction_method == empty_reduce_block) {
3972
3973 OMPT_REDUCTION_END;
3974
3975// usage: if team size==1, no synchronization is required (Intel platforms only)
3976
3977// TODO: implicit barrier: should be exposed
3978#if OMPT_SUPPORT
3979 ompt_frame_t *ompt_frame;
3980 if (ompt_enabled.enabled) {
3981 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
3982 if (ompt_frame->enter_frame.ptr == NULL)
3983 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3984 }
3985 OMPT_STORE_RETURN_ADDRESS(global_tid);
3986#endif
3987#if USE_ITT_NOTIFY
3988 __kmp_threads[global_tid]->th.th_ident = loc;
3989#endif
3990 __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL);
3991#if OMPT_SUPPORT && OMPT_OPTIONAL
3992 if (ompt_enabled.enabled) {
3993 ompt_frame->enter_frame = ompt_data_none;
3994 }
3995#endif
3996
3997 } else if (packed_reduction_method == atomic_reduce_block) {
3998
3999#if OMPT_SUPPORT
4000 ompt_frame_t *ompt_frame;
4001 if (ompt_enabled.enabled) {
4002 __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL);
4003 if (ompt_frame->enter_frame.ptr == NULL)
4004 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4005 }
4006 OMPT_STORE_RETURN_ADDRESS(global_tid);
4007#endif
4008// TODO: implicit barrier: should be exposed
4009#if USE_ITT_NOTIFY
4010 __kmp_threads[global_tid]->th.th_ident = loc;
4011#endif
4012 __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL);
4013#if OMPT_SUPPORT && OMPT_OPTIONAL
4014 if (ompt_enabled.enabled) {
4015 ompt_frame->enter_frame = ompt_data_none;
4016 }
4017#endif
4018
4019 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4020 tree_reduce_block)) {
4021
4022 // only primary thread executes here (primary releases all other workers)
4023 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4024 gtid: global_tid);
4025
4026 } else {
4027
4028 // should never reach this block
4029 KMP_ASSERT(0); // "unexpected method"
4030 }
4031 if (teams_swapped) {
4032 __kmp_restore_swapped_teams(th, team, task_state);
4033 }
4034
4035 if (__kmp_env_consistency_check)
4036 __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc);
4037
4038 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4039 global_tid, packed_reduction_method));
4040
4041 return;
4042}
4043
4044#undef __KMP_GET_REDUCTION_METHOD
4045#undef __KMP_SET_REDUCTION_METHOD
4046
4047/* end of interface to fast scalable reduce routines */
4048
4049kmp_uint64 __kmpc_get_taskid() {
4050
4051 kmp_int32 gtid;
4052 kmp_info_t *thread;
4053
4054 gtid = __kmp_get_gtid();
4055 if (gtid < 0) {
4056 return 0;
4057 }
4058 thread = __kmp_thread_from_gtid(gtid);
4059 return thread->th.th_current_task->td_task_id;
4060
4061} // __kmpc_get_taskid
4062
4063kmp_uint64 __kmpc_get_parent_taskid() {
4064
4065 kmp_int32 gtid;
4066 kmp_info_t *thread;
4067 kmp_taskdata_t *parent_task;
4068
4069 gtid = __kmp_get_gtid();
4070 if (gtid < 0) {
4071 return 0;
4072 }
4073 thread = __kmp_thread_from_gtid(gtid);
4074 parent_task = thread->th.th_current_task->td_parent;
4075 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4076
4077} // __kmpc_get_parent_taskid
4078
4079/*!
4080@ingroup WORK_SHARING
4081@param loc source location information.
4082@param gtid global thread number.
4083@param num_dims number of associated doacross loops.
4084@param dims info on loops bounds.
4085
4086Initialize doacross loop information.
4087Expect compiler send us inclusive bounds,
4088e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4089*/
4090void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4091 const struct kmp_dim *dims) {
4092 __kmp_assert_valid_gtid(gtid);
4093 int j, idx;
4094 kmp_int64 last, trace_count;
4095 kmp_info_t *th = __kmp_threads[gtid];
4096 kmp_team_t *team = th->th.th_team;
4097 kmp_uint32 *flags;
4098 kmp_disp_t *pr_buf = th->th.th_dispatch;
4099 dispatch_shared_info_t *sh_buf;
4100
4101 KA_TRACE(
4102 20,
4103 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4104 gtid, num_dims, !team->t.t_serialized));
4105 KMP_DEBUG_ASSERT(dims != NULL);
4106 KMP_DEBUG_ASSERT(num_dims > 0);
4107
4108 if (team->t.t_serialized) {
4109 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4110 return; // no dependencies if team is serialized
4111 }
4112 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4113 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4114 // the next loop
4115 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4116
4117 // Save bounds info into allocated private buffer
4118 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4119 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4120 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4121 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4122 pr_buf->th_doacross_info[0] =
4123 (kmp_int64)num_dims; // first element is number of dimensions
4124 // Save also address of num_done in order to access it later without knowing
4125 // the buffer index
4126 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4127 pr_buf->th_doacross_info[2] = dims[0].lo;
4128 pr_buf->th_doacross_info[3] = dims[0].up;
4129 pr_buf->th_doacross_info[4] = dims[0].st;
4130 last = 5;
4131 for (j = 1; j < num_dims; ++j) {
4132 kmp_int64
4133 range_length; // To keep ranges of all dimensions but the first dims[0]
4134 if (dims[j].st == 1) { // most common case
4135 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4136 range_length = dims[j].up - dims[j].lo + 1;
4137 } else {
4138 if (dims[j].st > 0) {
4139 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4140 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4141 } else { // negative increment
4142 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4143 range_length =
4144 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4145 }
4146 }
4147 pr_buf->th_doacross_info[last++] = range_length;
4148 pr_buf->th_doacross_info[last++] = dims[j].lo;
4149 pr_buf->th_doacross_info[last++] = dims[j].up;
4150 pr_buf->th_doacross_info[last++] = dims[j].st;
4151 }
4152
4153 // Compute total trip count.
4154 // Start with range of dims[0] which we don't need to keep in the buffer.
4155 if (dims[0].st == 1) { // most common case
4156 trace_count = dims[0].up - dims[0].lo + 1;
4157 } else if (dims[0].st > 0) {
4158 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4159 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4160 } else { // negative increment
4161 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4162 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4163 }
4164 for (j = 1; j < num_dims; ++j) {
4165 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4166 }
4167 KMP_DEBUG_ASSERT(trace_count > 0);
4168
4169 // Check if shared buffer is not occupied by other loop (idx -
4170 // __kmp_dispatch_num_buffers)
4171 if (idx != sh_buf->doacross_buf_idx) {
4172 // Shared buffer is occupied, wait for it to be free
4173 __kmp_wait_4(spinner: (volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, checker: idx,
4174 pred: __kmp_eq_4, NULL);
4175 }
4176#if KMP_32_BIT_ARCH
4177 // Check if we are the first thread. After the CAS the first thread gets 0,
4178 // others get 1 if initialization is in progress, allocated pointer otherwise.
4179 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4180 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4181 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4182#else
4183 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4184 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4185#endif
4186 if (flags == NULL) {
4187 // we are the first thread, allocate the array of flags
4188 size_t size =
4189 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4190 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4191 KMP_MB();
4192 sh_buf->doacross_flags = flags;
4193 } else if (flags == (kmp_uint32 *)1) {
4194#if KMP_32_BIT_ARCH
4195 // initialization is still in progress, need to wait
4196 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4197#else
4198 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4199#endif
4200 KMP_YIELD(TRUE);
4201 KMP_MB();
4202 } else {
4203 KMP_MB();
4204 }
4205 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4206 pr_buf->th_doacross_flags =
4207 sh_buf->doacross_flags; // save private copy in order to not
4208 // touch shared buffer on each iteration
4209 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4210}
4211
4212void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4213 __kmp_assert_valid_gtid(gtid);
4214 kmp_int64 shft;
4215 size_t num_dims, i;
4216 kmp_uint32 flag;
4217 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4218 kmp_info_t *th = __kmp_threads[gtid];
4219 kmp_team_t *team = th->th.th_team;
4220 kmp_disp_t *pr_buf;
4221 kmp_int64 lo, up, st;
4222
4223 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4224 if (team->t.t_serialized) {
4225 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4226 return; // no dependencies if team is serialized
4227 }
4228
4229 // calculate sequential iteration number and check out-of-bounds condition
4230 pr_buf = th->th.th_dispatch;
4231 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4232 num_dims = (size_t)pr_buf->th_doacross_info[0];
4233 lo = pr_buf->th_doacross_info[2];
4234 up = pr_buf->th_doacross_info[3];
4235 st = pr_buf->th_doacross_info[4];
4236#if OMPT_SUPPORT && OMPT_OPTIONAL
4237 SimpleVLA<ompt_dependence_t> deps(num_dims);
4238#endif
4239 if (st == 1) { // most common case
4240 if (vec[0] < lo || vec[0] > up) {
4241 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4242 "bounds [%lld,%lld]\n",
4243 gtid, vec[0], lo, up));
4244 return;
4245 }
4246 iter_number = vec[0] - lo;
4247 } else if (st > 0) {
4248 if (vec[0] < lo || vec[0] > up) {
4249 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4250 "bounds [%lld,%lld]\n",
4251 gtid, vec[0], lo, up));
4252 return;
4253 }
4254 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4255 } else { // negative increment
4256 if (vec[0] > lo || vec[0] < up) {
4257 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4258 "bounds [%lld,%lld]\n",
4259 gtid, vec[0], lo, up));
4260 return;
4261 }
4262 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4263 }
4264#if OMPT_SUPPORT && OMPT_OPTIONAL
4265 deps[0].variable.value = iter_number;
4266 deps[0].dependence_type = ompt_dependence_type_sink;
4267#endif
4268 for (i = 1; i < num_dims; ++i) {
4269 kmp_int64 iter, ln;
4270 size_t j = i * 4;
4271 ln = pr_buf->th_doacross_info[j + 1];
4272 lo = pr_buf->th_doacross_info[j + 2];
4273 up = pr_buf->th_doacross_info[j + 3];
4274 st = pr_buf->th_doacross_info[j + 4];
4275 if (st == 1) {
4276 if (vec[i] < lo || vec[i] > up) {
4277 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4278 "bounds [%lld,%lld]\n",
4279 gtid, vec[i], lo, up));
4280 return;
4281 }
4282 iter = vec[i] - lo;
4283 } else if (st > 0) {
4284 if (vec[i] < lo || vec[i] > up) {
4285 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4286 "bounds [%lld,%lld]\n",
4287 gtid, vec[i], lo, up));
4288 return;
4289 }
4290 iter = (kmp_uint64)(vec[i] - lo) / st;
4291 } else { // st < 0
4292 if (vec[i] > lo || vec[i] < up) {
4293 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4294 "bounds [%lld,%lld]\n",
4295 gtid, vec[i], lo, up));
4296 return;
4297 }
4298 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4299 }
4300 iter_number = iter + ln * iter_number;
4301#if OMPT_SUPPORT && OMPT_OPTIONAL
4302 deps[i].variable.value = iter;
4303 deps[i].dependence_type = ompt_dependence_type_sink;
4304#endif
4305 }
4306 shft = iter_number % 32; // use 32-bit granularity
4307 iter_number >>= 5; // divided by 32
4308 flag = 1 << shft;
4309 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4310 KMP_YIELD(TRUE);
4311 }
4312 KMP_MB();
4313#if OMPT_SUPPORT && OMPT_OPTIONAL
4314 if (ompt_enabled.ompt_callback_dependences) {
4315 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4316 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4317 }
4318#endif
4319 KA_TRACE(20,
4320 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4321 gtid, (iter_number << 5) + shft));
4322}
4323
4324void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4325 __kmp_assert_valid_gtid(gtid);
4326 kmp_int64 shft;
4327 size_t num_dims, i;
4328 kmp_uint32 flag;
4329 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4330 kmp_info_t *th = __kmp_threads[gtid];
4331 kmp_team_t *team = th->th.th_team;
4332 kmp_disp_t *pr_buf;
4333 kmp_int64 lo, st;
4334
4335 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4336 if (team->t.t_serialized) {
4337 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4338 return; // no dependencies if team is serialized
4339 }
4340
4341 // calculate sequential iteration number (same as in "wait" but no
4342 // out-of-bounds checks)
4343 pr_buf = th->th.th_dispatch;
4344 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4345 num_dims = (size_t)pr_buf->th_doacross_info[0];
4346 lo = pr_buf->th_doacross_info[2];
4347 st = pr_buf->th_doacross_info[4];
4348#if OMPT_SUPPORT && OMPT_OPTIONAL
4349 SimpleVLA<ompt_dependence_t> deps(num_dims);
4350#endif
4351 if (st == 1) { // most common case
4352 iter_number = vec[0] - lo;
4353 } else if (st > 0) {
4354 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4355 } else { // negative increment
4356 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4357 }
4358#if OMPT_SUPPORT && OMPT_OPTIONAL
4359 deps[0].variable.value = iter_number;
4360 deps[0].dependence_type = ompt_dependence_type_source;
4361#endif
4362 for (i = 1; i < num_dims; ++i) {
4363 kmp_int64 iter, ln;
4364 size_t j = i * 4;
4365 ln = pr_buf->th_doacross_info[j + 1];
4366 lo = pr_buf->th_doacross_info[j + 2];
4367 st = pr_buf->th_doacross_info[j + 4];
4368 if (st == 1) {
4369 iter = vec[i] - lo;
4370 } else if (st > 0) {
4371 iter = (kmp_uint64)(vec[i] - lo) / st;
4372 } else { // st < 0
4373 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4374 }
4375 iter_number = iter + ln * iter_number;
4376#if OMPT_SUPPORT && OMPT_OPTIONAL
4377 deps[i].variable.value = iter;
4378 deps[i].dependence_type = ompt_dependence_type_source;
4379#endif
4380 }
4381#if OMPT_SUPPORT && OMPT_OPTIONAL
4382 if (ompt_enabled.ompt_callback_dependences) {
4383 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4384 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4385 }
4386#endif
4387 shft = iter_number % 32; // use 32-bit granularity
4388 iter_number >>= 5; // divided by 32
4389 flag = 1 << shft;
4390 KMP_MB();
4391 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4392 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4393 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4394 (iter_number << 5) + shft));
4395}
4396
4397void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4398 __kmp_assert_valid_gtid(gtid);
4399 kmp_int32 num_done;
4400 kmp_info_t *th = __kmp_threads[gtid];
4401 kmp_team_t *team = th->th.th_team;
4402 kmp_disp_t *pr_buf = th->th.th_dispatch;
4403
4404 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4405 if (team->t.t_serialized) {
4406 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4407 return; // nothing to do
4408 }
4409 num_done =
4410 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4411 if (num_done == th->th.th_team_nproc) {
4412 // we are the last thread, need to free shared resources
4413 int idx = pr_buf->th_doacross_buf_idx - 1;
4414 dispatch_shared_info_t *sh_buf =
4415 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4416 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4417 (kmp_int64)&sh_buf->doacross_num_done);
4418 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4419 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4420 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4421 sh_buf->doacross_flags = NULL;
4422 sh_buf->doacross_num_done = 0;
4423 sh_buf->doacross_buf_idx +=
4424 __kmp_dispatch_num_buffers; // free buffer for future re-use
4425 }
4426 // free private resources (need to keep buffer index forever)
4427 pr_buf->th_doacross_flags = NULL;
4428 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4429 pr_buf->th_doacross_info = NULL;
4430 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4431}
4432
4433/* OpenMP 5.1 Memory Management routines */
4434void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4435 return __kmp_alloc(__kmp_entry_gtid(), align: 0, sz: size, al: allocator);
4436}
4437
4438void *omp_aligned_alloc(size_t align, size_t size,
4439 omp_allocator_handle_t allocator) {
4440 return __kmp_alloc(__kmp_entry_gtid(), align, sz: size, al: allocator);
4441}
4442
4443void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4444 return __kmp_calloc(__kmp_entry_gtid(), align: 0, nmemb, sz: size, al: allocator);
4445}
4446
4447void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4448 omp_allocator_handle_t allocator) {
4449 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, sz: size, al: allocator);
4450}
4451
4452void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4453 omp_allocator_handle_t free_allocator) {
4454 return __kmp_realloc(__kmp_entry_gtid(), ptr, sz: size, al: allocator,
4455 free_al: free_allocator);
4456}
4457
4458void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4459 ___kmpc_free(__kmp_entry_gtid(), ptr, al: allocator);
4460}
4461/* end of OpenMP 5.1 Memory Management routines */
4462
4463int __kmpc_get_target_offload(void) {
4464 if (!__kmp_init_serial) {
4465 __kmp_serial_initialize();
4466 }
4467 return __kmp_target_offload;
4468}
4469
4470int __kmpc_pause_resource(kmp_pause_status_t level) {
4471 if (!__kmp_init_serial) {
4472 return 1; // Can't pause if runtime is not initialized
4473 }
4474 return __kmp_pause_resource(level);
4475}
4476
4477void __kmpc_error(ident_t *loc, int severity, const char *message) {
4478 if (!__kmp_init_serial)
4479 __kmp_serial_initialize();
4480
4481 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4482
4483#if OMPT_SUPPORT
4484 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4485 ompt_callbacks.ompt_callback(ompt_callback_error)(
4486 (ompt_severity_t)severity, message, KMP_STRLEN(s: message),
4487 OMPT_GET_RETURN_ADDRESS(0));
4488 }
4489#endif // OMPT_SUPPORT
4490
4491 char *src_loc;
4492 if (loc && loc->psource) {
4493 kmp_str_loc_t str_loc = __kmp_str_loc_init(psource: loc->psource, init_fname: false);
4494 src_loc =
4495 __kmp_str_format(format: "%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4496 __kmp_str_loc_free(loc: &str_loc);
4497 } else {
4498 src_loc = __kmp_str_format(format: "unknown");
4499 }
4500
4501 if (severity == severity_warning)
4502 KMP_WARNING(UserDirectedWarning, src_loc, message);
4503 else
4504 KMP_FATAL(UserDirectedError, src_loc, message);
4505
4506 __kmp_str_free(str: &src_loc);
4507}
4508
4509// Mark begin of scope directive.
4510void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4511// reserved is for extension of scope directive and not used.
4512#if OMPT_SUPPORT && OMPT_OPTIONAL
4513 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4514 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4515 int tid = __kmp_tid_from_gtid(gtid);
4516 ompt_callbacks.ompt_callback(ompt_callback_work)(
4517 ompt_work_scope, ompt_scope_begin,
4518 &(team->t.ompt_team_info.parallel_data),
4519 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4520 OMPT_GET_RETURN_ADDRESS(0));
4521 }
4522#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4523}
4524
4525// Mark end of scope directive
4526void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4527// reserved is for extension of scope directive and not used.
4528#if OMPT_SUPPORT && OMPT_OPTIONAL
4529 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4530 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4531 int tid = __kmp_tid_from_gtid(gtid);
4532 ompt_callbacks.ompt_callback(ompt_callback_work)(
4533 ompt_work_scope, ompt_scope_end,
4534 &(team->t.ompt_team_info.parallel_data),
4535 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4536 OMPT_GET_RETURN_ADDRESS(0));
4537 }
4538#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4539}
4540
4541#ifdef KMP_USE_VERSION_SYMBOLS
4542// For GOMP compatibility there are two versions of each omp_* API.
4543// One is the plain C symbol and one is the Fortran symbol with an appended
4544// underscore. When we implement a specific ompc_* version of an omp_*
4545// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4546// instead of the Fortran versions in kmp_ftn_entry.h
4547extern "C" {
4548// Have to undef these from omp.h so they aren't translated into
4549// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4550#ifdef omp_set_affinity_format
4551#undef omp_set_affinity_format
4552#endif
4553#ifdef omp_get_affinity_format
4554#undef omp_get_affinity_format
4555#endif
4556#ifdef omp_display_affinity
4557#undef omp_display_affinity
4558#endif
4559#ifdef omp_capture_affinity
4560#undef omp_capture_affinity
4561#endif
4562KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4563 "OMP_5.0");
4564KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4565 "OMP_5.0");
4566KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4567 "OMP_5.0");
4568KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4569 "OMP_5.0");
4570} // extern "C"
4571#endif
4572

source code of openmp/runtime/src/kmp_csupport.cpp